├── infra ├── storage │ ├── kustomization.yaml │ └── local-path-storage │ │ ├── kustomization.yaml │ │ ├── local-path-storage-patch.yaml │ │ ├── netpol.yaml │ │ └── local-path-storage.yaml ├── sources │ └── kustomization.yaml ├── operations │ ├── sealed-secrets │ │ ├── kustomization.yaml │ │ ├── sealed-secrets.yaml │ │ ├── README.md │ │ └── pub-sealed-secrets.pem │ ├── descheduler │ │ ├── kustomization.yaml │ │ ├── configmap.yaml │ │ ├── rbac.yaml │ │ └── deployment.yaml │ ├── kustomization.yaml │ ├── kubelet-csr-approver.yaml │ ├── reflector.yaml │ ├── secret-generator.yaml │ ├── etcd-defrag.yaml │ ├── eventrouter.yaml │ └── netpol-default.yaml ├── ingress │ ├── namespace.yaml │ ├── cert-manager │ │ ├── kustomization.yaml │ │ ├── ca.yaml │ │ └── cert-manager.yaml │ ├── kustomization.yaml │ └── netpol.yaml ├── security │ ├── namespace.yaml │ ├── kustomization.yaml │ ├── netpol.yaml │ ├── falco.yaml │ └── kyverno.yaml ├── observability │ ├── namespace.yaml │ ├── alerts │ │ ├── README.md │ │ ├── kustomization.yaml │ │ ├── probes.yaml │ │ ├── flux.yaml │ │ ├── minio.yaml │ │ ├── nginx.yaml │ │ ├── mongodb.yaml │ │ ├── loki.yaml │ │ ├── cert-manager.yaml │ │ ├── rabbitmq.yaml │ │ ├── vault.yaml │ │ └── redis.yaml │ ├── kustomization.yaml │ ├── netpol.yaml │ ├── victoriametrics.yaml │ ├── blackbox-exporter.yaml │ └── goldpinger.yaml └── crds │ └── kustomization.yaml ├── clusters ├── demo-000 │ ├── apps │ │ ├── demoapp-data-processor │ │ │ ├── PR │ │ │ │ ├── namespace.yaml │ │ │ │ ├── ingress.yaml │ │ │ │ └── kustomization.yaml │ │ │ ├── dev │ │ │ │ ├── namespace.yaml │ │ │ │ └── kustomization.yaml │ │ │ └── flux-ks.yaml │ │ └── demoapp-auth │ │ │ ├── dev │ │ │ ├── namespace.yaml │ │ │ ├── redis.yaml │ │ │ ├── ingress.yaml │ │ │ └── kustomization.yaml │ │ │ ├── prod │ │ │ ├── namespace.yaml │ │ │ ├── ingress.yaml │ │ │ └── kustomization.yaml │ │ │ ├── stg │ │ │ ├── namespace.yaml │ │ │ ├── ingress.yaml │ │ │ └── kustomization.yaml │ │ │ └── flux-ks.yaml │ ├── infra │ │ ├── crds │ │ │ └── kustomization.yaml │ │ ├── sources │ │ │ └── kustomization.yaml │ │ ├── storage │ │ │ └── kustomization.yaml │ │ ├── operations │ │ │ └── kustomization.yaml │ │ ├── ingress │ │ │ ├── kustomization.yaml │ │ │ ├── README.md │ │ │ └── ingress-nginx.yaml │ │ ├── observability │ │ │ ├── blackbox-exporter.patch.yaml │ │ │ ├── loki.patch.yaml │ │ │ ├── kustomization.yaml │ │ │ ├── victoriametrics.patch.yaml │ │ │ ├── blackbox-probes.yaml │ │ │ └── kube-prometheus-stack.patch.yaml │ │ ├── security │ │ │ └── kustomization.yaml │ │ └── flux-ks.yaml │ ├── rbac │ │ ├── kustomization.yaml │ │ ├── kyverno.yaml │ │ └── ci.yaml │ ├── kustomization.yaml │ ├── flux-system │ │ ├── kustomization.yaml │ │ ├── pod-monitor.yaml │ │ ├── gotk-sync.yaml │ │ ├── README.md │ │ └── netpol.yaml │ ├── README.md │ └── kubernetes-api-ingress.yaml └── demo-001 │ ├── infra │ ├── crds │ │ └── kustomization.yaml │ ├── sources │ │ └── kustomization.yaml │ ├── storage │ │ └── kustomization.yaml │ ├── operations │ │ └── kustomization.yaml │ ├── ingress │ │ ├── kustomization.yaml │ │ ├── README.md │ │ └── ingress-nginx.yaml │ ├── observability │ │ ├── blackbox-exporter.patch.yaml │ │ ├── loki.patch.yaml │ │ ├── kustomization.yaml │ │ ├── victoriametrics.patch.yaml │ │ └── blackbox-probes.yaml │ ├── security │ │ └── kustomization.yaml │ └── flux-ks.yaml │ ├── rbac │ ├── kustomization.yaml │ ├── kyverno.yaml │ └── ci.yaml │ ├── apps │ ├── namespace.yaml │ ├── kustomization.yaml │ ├── flux-ks.yaml │ └── app.yaml │ ├── kustomization.yaml │ ├── flux-system │ ├── kustomization.yaml │ ├── pod-monitor.yaml │ ├── gotk-sync.yaml │ ├── README.md │ └── netpol.yaml │ ├── README.md │ └── kubernetes-api-ingress.yaml ├── apps ├── demoapp-auth │ ├── sa.yaml │ ├── base │ │ ├── pdb.yaml │ │ ├── service.yaml │ │ ├── hpa.yaml │ │ ├── kustomization.yaml │ │ ├── servicemonitor.yaml │ │ └── deployment.yaml │ ├── kustomization.yaml │ ├── food │ │ └── kustomization.yaml │ ├── movie │ │ └── kustomization.yaml │ ├── sleep │ │ └── kustomization.yaml │ └── netpol.yaml └── demoapp-data-processor │ ├── sa.yaml │ ├── base │ ├── pdb.yaml │ ├── service.yaml │ ├── kustomization.yaml │ ├── servicemonitor.yaml │ └── deployment.yaml │ ├── kustomization.yaml │ ├── call │ └── kustomization.yaml │ ├── meet │ └── kustomization.yaml │ ├── solve │ └── kustomization.yaml │ └── netpol.yaml ├── charts └── security │ ├── default-namespace-network-policies │ ├── templates │ │ ├── default-allow-all-egress..yaml │ │ ├── default-allow-all-ingress.yaml │ │ ├── allow-egress-from-all-to-cluster.yaml │ │ ├── allow-ingress-from-cluster-to-all.yaml │ │ ├── default-deny-all-egress.yaml │ │ ├── default-deny-all-ingress.yaml │ │ ├── allow-egress-internet.yaml │ │ ├── allow-kube-apiserver.yaml │ │ ├── allow-namespace.yaml │ │ ├── allow-egress-from-all-to-private-network.yaml │ │ ├── allow-ingress-from-private-network-to-all.yaml │ │ ├── allow-monitoring.yaml │ │ ├── allow-egress-from-all-to-kube-dns.yaml │ │ └── _helpers.tpl │ ├── .helmignore │ ├── values.yaml │ ├── Chart.yaml │ └── README.md │ └── default-kyverno-cluster-policies │ ├── .helmignore │ ├── values.yaml │ ├── templates │ ├── disable-automountserviceaccounttoken.yaml │ ├── disallow-latest-tag.yaml │ ├── require-ro-rootfs.yaml │ ├── restart_deployment_on_secret_change.yaml │ ├── disallow-container-sock-mounts.yaml │ └── _helpers.tpl │ ├── Chart.yaml │ └── README.md ├── .github ├── workflows │ ├── renovate.yaml │ ├── flux-validate.yaml │ ├── flux-update.yaml │ ├── pr-demo-env-delete.yaml │ ├── flux-e2e.yaml │ ├── flux-dry-run.yaml │ └── pr-demo-env.yaml └── renovate.json └── README.md /infra/storage/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - local-path-storage 5 | -------------------------------------------------------------------------------- /clusters/demo-000/apps/demoapp-data-processor/PR/namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: demoapp-data-processor-$NAME -------------------------------------------------------------------------------- /infra/sources/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: flux-system 4 | resources: 5 | - sources.yaml -------------------------------------------------------------------------------- /clusters/demo-000/infra/crds/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - ../../../../infra/crds 5 | -------------------------------------------------------------------------------- /clusters/demo-000/rbac/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - ci.yaml 5 | - kyverno.yaml 6 | -------------------------------------------------------------------------------- /clusters/demo-001/infra/crds/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - ../../../../infra/crds 5 | -------------------------------------------------------------------------------- /clusters/demo-001/rbac/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - ci.yaml 5 | - kyverno.yaml 6 | -------------------------------------------------------------------------------- /infra/operations/sealed-secrets/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - sealed-secrets.yaml 5 | -------------------------------------------------------------------------------- /clusters/demo-000/infra/sources/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - ../../../../infra/sources 5 | -------------------------------------------------------------------------------- /clusters/demo-000/infra/storage/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - ../../../../infra/storage 5 | -------------------------------------------------------------------------------- /clusters/demo-001/infra/sources/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - ../../../../infra/sources 5 | -------------------------------------------------------------------------------- /clusters/demo-001/infra/storage/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - ../../../../infra/storage 5 | -------------------------------------------------------------------------------- /infra/ingress/namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: ingress 5 | annotations: 6 | kustomize.toolkit.fluxcd.io/prune: disabled 7 | -------------------------------------------------------------------------------- /clusters/demo-000/infra/operations/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - ../../../../infra/operations 5 | -------------------------------------------------------------------------------- /clusters/demo-001/apps/namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: app 5 | annotations: 6 | kustomize.toolkit.fluxcd.io/prune: disabled 7 | -------------------------------------------------------------------------------- /clusters/demo-001/infra/operations/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - ../../../../infra/operations 5 | -------------------------------------------------------------------------------- /infra/security/namespace.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: security 6 | annotations: 7 | kustomize.toolkit.fluxcd.io/prune: disabled 8 | -------------------------------------------------------------------------------- /clusters/demo-001/apps/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: app 4 | resources: 5 | - app.yaml 6 | - namespace.yaml 7 | -------------------------------------------------------------------------------- /infra/observability/namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: observability 5 | annotations: 6 | kustomize.toolkit.fluxcd.io/prune: disabled 7 | -------------------------------------------------------------------------------- /infra/observability/alerts/README.md: -------------------------------------------------------------------------------- 1 | Most of the alerts stored here are obtained from the collection: 2 | 3 | [Awesome Prometheus alerts](https://awesome-prometheus-alerts.grep.to/rules) 4 | -------------------------------------------------------------------------------- /infra/ingress/cert-manager/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: ingress 4 | resources: 5 | - cert-manager.yaml 6 | - ca.yaml 7 | -------------------------------------------------------------------------------- /apps/demoapp-auth/sa.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: demoapp-auth 6 | imagePullSecrets: 7 | - name: ghcr-pull-secret 8 | automountServiceAccountToken: false -------------------------------------------------------------------------------- /clusters/demo-000/apps/demoapp-auth/dev/namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: demoapp-auth-dev 5 | annotations: 6 | kustomize.toolkit.fluxcd.io/prune: disabled 7 | -------------------------------------------------------------------------------- /clusters/demo-000/apps/demoapp-auth/prod/namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: demoapp-auth-dev 5 | annotations: 6 | kustomize.toolkit.fluxcd.io/prune: disabled 7 | -------------------------------------------------------------------------------- /clusters/demo-000/apps/demoapp-auth/stg/namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: demoapp-auth-dev 5 | annotations: 6 | kustomize.toolkit.fluxcd.io/prune: disabled 7 | -------------------------------------------------------------------------------- /infra/ingress/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: ingress 4 | resources: 5 | - namespace.yaml 6 | - netpol.yaml 7 | - cert-manager 8 | -------------------------------------------------------------------------------- /apps/demoapp-data-processor/sa.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: demoapp-data-processor 6 | imagePullSecrets: 7 | - name: ghcr-pull-secret 8 | automountServiceAccountToken: false -------------------------------------------------------------------------------- /clusters/demo-000/infra/ingress/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: ingress 4 | resources: 5 | - ../../../../infra/ingress 6 | - ingress-nginx.yaml 7 | -------------------------------------------------------------------------------- /clusters/demo-001/infra/ingress/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: ingress 4 | resources: 5 | - ../../../../infra/ingress 6 | - ingress-nginx.yaml 7 | -------------------------------------------------------------------------------- /clusters/demo-000/apps/demoapp-data-processor/dev/namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: demoapp-data-processor-dev 5 | annotations: 6 | kustomize.toolkit.fluxcd.io/prune: disabled 7 | -------------------------------------------------------------------------------- /infra/operations/descheduler/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: kube-system 4 | resources: 5 | - configmap.yaml 6 | - rbac.yaml 7 | - deployment.yaml 8 | -------------------------------------------------------------------------------- /infra/security/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: security 4 | resources: 5 | - namespace.yaml 6 | - netpol.yaml 7 | - falco.yaml 8 | - kyverno.yaml 9 | -------------------------------------------------------------------------------- /apps/demoapp-auth/base/pdb.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: policy/v1 2 | kind: PodDisruptionBudget 3 | metadata: 4 | name: demoapp-auth 5 | spec: 6 | minAvailable: 0 7 | selector: 8 | matchLabels: 9 | app: demoapp-auth 10 | -------------------------------------------------------------------------------- /clusters/demo-001/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - kubernetes-api-ingress.yaml 5 | - rbac 6 | - flux-system 7 | - infra/flux-ks.yaml 8 | - apps/flux-ks.yaml 9 | -------------------------------------------------------------------------------- /apps/demoapp-data-processor/base/pdb.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: policy/v1 2 | kind: PodDisruptionBudget 3 | metadata: 4 | name: demoapp-data-processor 5 | spec: 6 | minAvailable: 0 7 | selector: 8 | matchLabels: 9 | app: demoapp-data-processor 10 | -------------------------------------------------------------------------------- /apps/demoapp-auth/base/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: demoapp-auth 5 | spec: 6 | selector: 7 | group: demoapp-auth 8 | ports: 9 | - protocol: TCP 10 | port: 80 11 | targetPort: 8080 12 | name: http 13 | -------------------------------------------------------------------------------- /infra/storage/local-path-storage/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: local-path-storage 4 | resources: 5 | - local-path-storage.yaml 6 | - netpol.yaml 7 | patchesStrategicMerge: 8 | - local-path-storage-patch.yaml 9 | -------------------------------------------------------------------------------- /apps/demoapp-auth/base/hpa.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling/v1 2 | kind: HorizontalPodAutoscaler 3 | metadata: 4 | name: demoapp-auth 5 | spec: 6 | maxReplicas: 3 7 | minReplicas: 1 8 | scaleTargetRef: 9 | apiVersion: apps/v1 10 | kind: Deployment 11 | name: demoapp-auth 12 | -------------------------------------------------------------------------------- /apps/demoapp-data-processor/base/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: demoapp-data-processor 5 | spec: 6 | selector: 7 | group: demoapp-data-processor 8 | ports: 9 | - protocol: TCP 10 | port: 80 11 | targetPort: 8080 12 | name: http 13 | -------------------------------------------------------------------------------- /clusters/demo-000/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - kubernetes-api-ingress.yaml 5 | - rbac 6 | - flux-system 7 | - infra/flux-ks.yaml 8 | - apps/demoapp-auth/flux-ks.yaml 9 | - apps/demoapp-data-processor/flux-ks.yaml 10 | -------------------------------------------------------------------------------- /infra/operations/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - netpol-default.yaml 5 | - descheduler 6 | - sealed-secrets 7 | - kubelet-csr-approver.yaml 8 | - reflector.yaml 9 | - secret-generator.yaml 10 | - etcd-defrag.yaml 11 | - eventrouter.yaml 12 | -------------------------------------------------------------------------------- /infra/observability/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: observability 4 | resources: 5 | - namespace.yaml 6 | - netpol.yaml 7 | - alerts 8 | - goldpinger.yaml 9 | - loki.yaml 10 | - kube-prometheus-stack.yaml 11 | - blackbox-exporter.yaml 12 | - victoriametrics.yaml -------------------------------------------------------------------------------- /apps/demoapp-auth/base/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - deployment.yaml 5 | - service.yaml 6 | - pdb.yaml 7 | - hpa.yaml 8 | - servicemonitor.yaml 9 | commonLabels: 10 | group: demoapp-auth 11 | configMapGenerator: 12 | - name: demoapp-auth 13 | literals: 14 | - PORT='8080' -------------------------------------------------------------------------------- /clusters/demo-000/flux-system/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: flux-system 4 | resources: 5 | - netpol.yaml 6 | - gotk-components.yaml 7 | - gotk-sync.yaml 8 | # - pod-monitor.yaml # here we have a chicken-egg problem, pod-monitor should be added only after kube-prometheus-stack deployment 9 | -------------------------------------------------------------------------------- /clusters/demo-001/flux-system/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: flux-system 4 | resources: 5 | - netpol.yaml 6 | - gotk-components.yaml 7 | - gotk-sync.yaml 8 | # - pod-monitor.yaml # here we have a chicken-egg problem, pod-monitor should be added only after kube-prometheus-stack deployment 9 | -------------------------------------------------------------------------------- /apps/demoapp-auth/base/servicemonitor.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: monitoring.coreos.com/v1 3 | kind: ServiceMonitor 4 | metadata: 5 | labels: 6 | app: demoapp-auth 7 | group: demoapp-auth 8 | name: demoapp-auth 9 | spec: 10 | endpoints: 11 | - port: http 12 | path: /healthz 13 | selector: 14 | matchLabels: 15 | app: demoapp-auth 16 | -------------------------------------------------------------------------------- /apps/demoapp-data-processor/base/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - deployment.yaml 5 | - service.yaml 6 | - pdb.yaml 7 | - servicemonitor.yaml 8 | commonLabels: 9 | group: demoapp-data-processor 10 | configMapGenerator: 11 | - name: demoapp-data-processor 12 | literals: 13 | - PORT='8080' -------------------------------------------------------------------------------- /clusters/demo-001/apps/flux-ks.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 3 | kind: Kustomization 4 | metadata: 5 | name: app 6 | namespace: flux-system 7 | spec: 8 | dependsOn: 9 | - name: operations 10 | interval: 10m0s 11 | sourceRef: 12 | kind: GitRepository 13 | name: flux-system 14 | path: ./clusters/demo-001/apps 15 | prune: true -------------------------------------------------------------------------------- /clusters/demo-000/infra/observability/blackbox-exporter.patch.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.toolkit.fluxcd.io/v2beta1 3 | kind: HelmRelease 4 | metadata: 5 | name: prometheus-blackbox-exporter 6 | spec: 7 | values: 8 | tolerations: 9 | - key: node-role.kubernetes.io/master # we put observability stack on controlplane nodes to utilize their resources 10 | operator: Exists -------------------------------------------------------------------------------- /clusters/demo-001/infra/observability/blackbox-exporter.patch.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.toolkit.fluxcd.io/v2beta1 3 | kind: HelmRelease 4 | metadata: 5 | name: prometheus-blackbox-exporter 6 | spec: 7 | values: 8 | tolerations: 9 | - key: node-role.kubernetes.io/master # we put observability stack on controlplane nodes to utilize their resources 10 | operator: Exists -------------------------------------------------------------------------------- /apps/demoapp-data-processor/base/servicemonitor.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: monitoring.coreos.com/v1 3 | kind: ServiceMonitor 4 | metadata: 5 | labels: 6 | app: demoapp-data-processor 7 | group: demoapp-data-processor 8 | name: demoapp-data-processor 9 | spec: 10 | endpoints: 11 | - port: http 12 | path: /healthz 13 | selector: 14 | matchLabels: 15 | app: demoapp-data-processor 16 | -------------------------------------------------------------------------------- /clusters/demo-000/infra/observability/loki.patch.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.toolkit.fluxcd.io/v2beta1 3 | kind: HelmRelease 4 | metadata: 5 | name: loki 6 | spec: 7 | values: 8 | extraArgs: 9 | config.expand-env: true 10 | tolerations: 11 | - key: node-role.kubernetes.io/master # we put observability stack on controlplane nodes to utilize their resources 12 | operator: Exists 13 | -------------------------------------------------------------------------------- /clusters/demo-001/infra/observability/loki.patch.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.toolkit.fluxcd.io/v2beta1 3 | kind: HelmRelease 4 | metadata: 5 | name: loki 6 | spec: 7 | values: 8 | extraArgs: 9 | config.expand-env: true 10 | tolerations: 11 | - key: node-role.kubernetes.io/master # we put observability stack on controlplane nodes to utilize their resources 12 | operator: Exists 13 | -------------------------------------------------------------------------------- /infra/storage/local-path-storage/local-path-storage-patch.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | name: local-path-provisioner 6 | namespace: local-path-storage 7 | spec: 8 | template: 9 | spec: 10 | tolerations: 11 | - key: node-role.kubernetes.io/master 12 | operator: Exists 13 | - key: node-role.kubernetes.io/control-plane 14 | operator: Exists 15 | -------------------------------------------------------------------------------- /charts/security/default-namespace-network-policies/templates/default-allow-all-egress..yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.allow_egress_traffic }} 2 | apiVersion: networking.k8s.io/v1 3 | kind: NetworkPolicy 4 | metadata: 5 | name: allow-all-egress 6 | labels: 7 | {{- include "default-namespace-network-policies.labels" . | nindent 4 }} 8 | spec: 9 | podSelector: {} 10 | egress: 11 | - {} 12 | policyTypes: 13 | - Egress 14 | {{- end }} 15 | -------------------------------------------------------------------------------- /charts/security/default-namespace-network-policies/templates/default-allow-all-ingress.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.allow_ingress_traffic }} 2 | apiVersion: networking.k8s.io/v1 3 | kind: NetworkPolicy 4 | metadata: 5 | name: allow-all-ingress 6 | labels: 7 | {{- include "default-namespace-network-policies.labels" . | nindent 4 }} 8 | spec: 9 | podSelector: {} 10 | ingress: 11 | - {} 12 | policyTypes: 13 | - Ingress 14 | {{- end }} 15 | -------------------------------------------------------------------------------- /infra/observability/alerts/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: observability 4 | resources: 5 | - flux.yaml 6 | - loki.yaml 7 | - minio.yaml 8 | - mongodb.yaml 9 | - nginx.yaml 10 | - node.yaml 11 | # - redis.yaml # commented as redis cluster got shut down, uncomment when it is back up 12 | - probes.yaml 13 | - rabbitmq.yaml 14 | - cert-manager.yaml 15 | - vault.yaml -------------------------------------------------------------------------------- /clusters/demo-000/apps/demoapp-data-processor/flux-ks.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 3 | kind: Kustomization 4 | metadata: 5 | name: demoapp-data-processor-dev 6 | namespace: flux-system 7 | spec: 8 | dependsOn: 9 | - name: operations 10 | interval: 10m0s 11 | sourceRef: 12 | kind: GitRepository 13 | name: flux-system 14 | path: ./clusters/demo-000/apps/demoapp-data-processor/dev 15 | prune: true 16 | -------------------------------------------------------------------------------- /infra/observability/alerts/probes.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: monitoring.coreos.com/v1 3 | kind: PrometheusRule 4 | metadata: 5 | name: http-endpoint-alerts 6 | spec: 7 | groups: 8 | - name: http-probe-rules 9 | rules: 10 | - alert: ProbeFailing 11 | expr: probe_success == 0 12 | for: 2m 13 | labels: 14 | severity: high 15 | annotations: 16 | summary: Endpoint Down 17 | description: "Endpoint is Down for 2 minutes: {{ $labels.instance }}" -------------------------------------------------------------------------------- /charts/security/default-namespace-network-policies/templates/allow-egress-from-all-to-cluster.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.allow_egress_cluster_traffic }} 2 | kind: NetworkPolicy 3 | apiVersion: networking.k8s.io/v1 4 | metadata: 5 | labels: 6 | {{- include "default-namespace-network-policies.labels" . | nindent 4 }} 7 | name: allow-egress-from-all-to-cluster 8 | spec: 9 | policyTypes: 10 | - Egress 11 | podSelector: {} 12 | egress: 13 | - to: 14 | - namespaceSelector: {} 15 | {{- end }} 16 | -------------------------------------------------------------------------------- /charts/security/default-kyverno-cluster-policies/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /charts/security/default-namespace-network-policies/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /charts/security/default-namespace-network-policies/templates/allow-ingress-from-cluster-to-all.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.allow_ingress_cluster_traffic }} 2 | kind: NetworkPolicy 3 | apiVersion: networking.k8s.io/v1 4 | metadata: 5 | labels: 6 | {{- include "default-namespace-network-policies.labels" . | nindent 4 }} 7 | name: allow-ingress-from-cluster-to-all 8 | spec: 9 | policyTypes: 10 | - Ingress 11 | podSelector: {} 12 | ingress: 13 | - from: 14 | - namespaceSelector: {} 15 | {{- end }} 16 | -------------------------------------------------------------------------------- /charts/security/default-namespace-network-policies/templates/default-deny-all-egress.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.allow_egress_traffic }} 2 | # If any traffic is allowed, we skip creating the DENY rule completely 3 | {{- else }} 4 | --- 5 | kind: NetworkPolicy 6 | apiVersion: networking.k8s.io/v1 7 | metadata: 8 | name: default-deny-all-egress 9 | labels: 10 | {{- include "default-namespace-network-policies.labels" . | nindent 4 }} 11 | spec: 12 | policyTypes: 13 | - Egress 14 | podSelector: {} 15 | {{- end }} 16 | -------------------------------------------------------------------------------- /charts/security/default-namespace-network-policies/templates/default-deny-all-ingress.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.allow_ingress_traffic }} 2 | # If any traffic is allowed, we skip creating the DENY rule completely 3 | {{- else }} 4 | --- 5 | kind: NetworkPolicy 6 | apiVersion: networking.k8s.io/v1 7 | metadata: 8 | name: default-deny-all-ingress 9 | labels: 10 | {{- include "default-namespace-network-policies.labels" . | nindent 4 }} 11 | spec: 12 | policyTypes: 13 | - Ingress 14 | podSelector: {} 15 | {{- end }} 16 | -------------------------------------------------------------------------------- /charts/security/default-namespace-network-policies/templates/allow-egress-internet.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.allow_internet }} 2 | kind: NetworkPolicy 3 | apiVersion: networking.k8s.io/v1 4 | metadata: 5 | labels: 6 | {{- include "default-namespace-network-policies.labels" . | nindent 4 }} 7 | name: allow-egress-internet 8 | spec: 9 | policyTypes: 10 | - Egress 11 | podSelector: {} 12 | egress: 13 | - ports: 14 | - port: 80 15 | protocol: TCP 16 | - port: 443 17 | protocol: TCP 18 | {{- end }} 19 | -------------------------------------------------------------------------------- /charts/security/default-namespace-network-policies/templates/allow-kube-apiserver.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.allow_apiserver }} 2 | --- 3 | kind: NetworkPolicy 4 | apiVersion: networking.k8s.io/v1 5 | metadata: 6 | name: allow-egress-from-all-to-kube-apiserver 7 | labels: 8 | {{- include "default-namespace-network-policies.labels" . | nindent 4 }} 9 | spec: 10 | policyTypes: 11 | - Egress 12 | podSelector: {} 13 | egress: 14 | {{- include "default-namespace-network-policies.APIServerAddress" . | nindent 4 }} 15 | {{- end }} 16 | -------------------------------------------------------------------------------- /charts/security/default-namespace-network-policies/templates/allow-namespace.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.allow_namespace_traffic }} 2 | --- 3 | kind: NetworkPolicy 4 | apiVersion: networking.k8s.io/v1 5 | metadata: 6 | name: allow-namespace 7 | labels: 8 | {{- include "default-namespace-network-policies.labels" . | nindent 4 }} 9 | spec: 10 | policyTypes: 11 | - Ingress 12 | - Egress 13 | podSelector: {} 14 | ingress: 15 | - from: 16 | - podSelector: {} 17 | egress: 18 | - to: 19 | - podSelector: {} 20 | {{- end }} 21 | -------------------------------------------------------------------------------- /clusters/demo-000/infra/ingress/README.md: -------------------------------------------------------------------------------- 1 | Here we create the required instances of NGINX ingress controller. 2 | 3 | The controllers are configured in daemonset mode with hostPort service type, therefore each application node of the cluster will also serve traffic for the apps it hosts. 4 | 5 | This way we achieve proper load balancing, traffic separation between environments, and with no external load balancer involved, we convert all application nodes into web servers at the same time. Nodes will expose port 443 to the outside world via public IPs. 6 | -------------------------------------------------------------------------------- /clusters/demo-001/infra/ingress/README.md: -------------------------------------------------------------------------------- 1 | Here we create the required instances of NGINX ingress controller. 2 | 3 | The controllers are configured in daemonset mode with hostPort service type, therefore each application node of the cluster will also serve traffic for the apps it hosts. 4 | 5 | This way we achieve proper load balancing, traffic separation between environments, and with no external load balancer involved, we convert all application nodes into web servers at the same time. Nodes will expose port 443 to the outside world via public IPs. 6 | -------------------------------------------------------------------------------- /clusters/demo-000/apps/demoapp-auth/dev/redis.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.toolkit.fluxcd.io/v2beta1 3 | kind: HelmRelease 4 | metadata: 5 | name: redis 6 | spec: 7 | releaseName: redis 8 | chart: 9 | spec: 10 | chart: redis 11 | sourceRef: 12 | kind: HelmRepository 13 | name: bitnami-oci 14 | namespace: flux-system 15 | version: ">=19.6.4 <20.0.0" 16 | interval: 1h0m0s 17 | timeout: 10m 18 | install: 19 | remediation: 20 | retries: 3 21 | upgrade: 22 | remediation: 23 | retries: 3 24 | values: 25 | commonLabels: 26 | app: redis 27 | architecture: standalone -------------------------------------------------------------------------------- /charts/security/default-namespace-network-policies/templates/allow-egress-from-all-to-private-network.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.allow_egress_private_traffic }} 2 | kind: NetworkPolicy 3 | apiVersion: networking.k8s.io/v1 4 | metadata: 5 | labels: 6 | {{- include "default-namespace-network-policies.labels" . | nindent 4 }} 7 | name: allow-egress-from-all-to-private-network 8 | spec: 9 | policyTypes: 10 | - Egress 11 | podSelector: {} 12 | egress: 13 | # RFC 1918 14 | - to: 15 | - ipBlock: 16 | cidr: 10.0.0.0/8 17 | - ipBlock: 18 | cidr: 172.16.0.0/12 19 | - ipBlock: 20 | cidr: 192.168.0.0/16 21 | {{- end }} 22 | -------------------------------------------------------------------------------- /charts/security/default-namespace-network-policies/templates/allow-ingress-from-private-network-to-all.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.allow_ingress_private_traffic }} 2 | kind: NetworkPolicy 3 | apiVersion: networking.k8s.io/v1 4 | metadata: 5 | labels: 6 | {{- include "default-namespace-network-policies.labels" . | nindent 4 }} 7 | name: allow-ingress-from-private-network-to-all 8 | spec: 9 | policyTypes: 10 | - Ingress 11 | podSelector: {} 12 | ingress: 13 | # RFC 1918 14 | - from: 15 | - ipBlock: 16 | cidr: 10.0.0.0/8 17 | - ipBlock: 18 | cidr: 172.16.0.0/12 19 | - ipBlock: 20 | cidr: 192.168.0.0/16 21 | {{- end }} 22 | -------------------------------------------------------------------------------- /charts/security/default-namespace-network-policies/templates/allow-monitoring.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.allow_monitoring }} 2 | kind: NetworkPolicy 3 | apiVersion: networking.k8s.io/v1 4 | metadata: 5 | labels: 6 | {{- include "default-namespace-network-policies.labels" . | nindent 4 }} 7 | name: allow-ingress-from-prometheus-to-all 8 | spec: 9 | policyTypes: 10 | - Ingress 11 | podSelector: {} 12 | ingress: 13 | - from: 14 | - namespaceSelector: 15 | matchLabels: 16 | kubernetes.io/metadata.name: {{ .Values.monitoring_namespace }} 17 | podSelector: 18 | matchLabels: 19 | app.kubernetes.io/name: prometheus 20 | {{- end }} 21 | -------------------------------------------------------------------------------- /clusters/demo-001/infra/observability/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: observability 4 | resources: 5 | - ../../../../infra/observability 6 | 7 | patches: 8 | - path: victoriametrics.patch.yaml 9 | target: 10 | kind: HelmRelease 11 | name: victoriametrics 12 | - path: kube-prometheus-stack.patch.yaml 13 | target: 14 | kind: HelmRelease 15 | name: kube-prometheus-stack 16 | - path: blackbox-exporter.patch.yaml 17 | target: 18 | kind: HelmRelease 19 | name: blackbox-exporter 20 | - path: loki.patch.yaml 21 | target: 22 | kind: HelmRelease 23 | name: loki 24 | -------------------------------------------------------------------------------- /clusters/demo-000/rbac/kyverno.yaml: -------------------------------------------------------------------------------- 1 | # Additional RBAC permissions given to Kyverno to mutate deployment resources. 2 | --- 3 | apiVersion: rbac.authorization.k8s.io/v1 4 | kind: ClusterRole 5 | metadata: 6 | name: kyverno:restart-deployment 7 | rules: 8 | - apiGroups: ["apps"] 9 | resources: ["deployments"] 10 | verbs: ["get", "patch", "update"] 11 | 12 | --- 13 | apiVersion: rbac.authorization.k8s.io/v1 14 | kind: ClusterRoleBinding 15 | metadata: 16 | name: kyverno:restart-deployment 17 | roleRef: 18 | apiGroup: rbac.authorization.k8s.io 19 | kind: ClusterRole 20 | name: kyverno:restart-deployment 21 | subjects: 22 | - kind: ServiceAccount 23 | name: kyverno 24 | namespace: security -------------------------------------------------------------------------------- /clusters/demo-001/rbac/kyverno.yaml: -------------------------------------------------------------------------------- 1 | # Additional RBAC permissions given to Kyverno to mutate deployment resources. 2 | --- 3 | apiVersion: rbac.authorization.k8s.io/v1 4 | kind: ClusterRole 5 | metadata: 6 | name: kyverno:restart-deployment 7 | rules: 8 | - apiGroups: ["apps"] 9 | resources: ["deployments"] 10 | verbs: ["get", "patch", "update"] 11 | 12 | --- 13 | apiVersion: rbac.authorization.k8s.io/v1 14 | kind: ClusterRoleBinding 15 | metadata: 16 | name: kyverno:restart-deployment 17 | roleRef: 18 | apiGroup: rbac.authorization.k8s.io 19 | kind: ClusterRole 20 | name: kyverno:restart-deployment 21 | subjects: 22 | - kind: ServiceAccount 23 | name: kyverno 24 | namespace: security -------------------------------------------------------------------------------- /clusters/demo-000/flux-system/pod-monitor.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PodMonitor 3 | metadata: 4 | name: flux-system 5 | namespace: flux-system 6 | labels: 7 | app.kubernetes.io/part-of: flux 8 | spec: 9 | namespaceSelector: 10 | matchNames: 11 | - flux-system 12 | selector: 13 | matchExpressions: 14 | - key: app 15 | operator: In 16 | values: 17 | - helm-controller 18 | - source-controller 19 | - kustomize-controller 20 | - notification-controller 21 | - image-automation-controller 22 | - image-reflector-controller 23 | podMetricsEndpoints: 24 | - port: http-prom 25 | -------------------------------------------------------------------------------- /clusters/demo-000/infra/observability/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: observability 4 | resources: 5 | - ../../../../infra/observability 6 | 7 | patches: 8 | - path: victoriametrics.patch.yaml 9 | target: 10 | kind: HelmRelease 11 | name: victoriametrics 12 | - path: kube-prometheus-stack.patch.yaml 13 | target: 14 | kind: HelmRelease 15 | name: kube-prometheus-stack 16 | - path: blackbox-exporter.patch.yaml 17 | target: 18 | kind: HelmRelease 19 | name: blackbox-exporter 20 | # - path: loki.patch.yaml 21 | # target: 22 | # kind: HelmRelease 23 | # name: loki 24 | -------------------------------------------------------------------------------- /clusters/demo-001/flux-system/pod-monitor.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PodMonitor 3 | metadata: 4 | name: flux-system 5 | namespace: flux-system 6 | labels: 7 | app.kubernetes.io/part-of: flux 8 | spec: 9 | namespaceSelector: 10 | matchNames: 11 | - flux-system 12 | selector: 13 | matchExpressions: 14 | - key: app 15 | operator: In 16 | values: 17 | - helm-controller 18 | - source-controller 19 | - kustomize-controller 20 | - notification-controller 21 | - image-automation-controller 22 | - image-reflector-controller 23 | podMetricsEndpoints: 24 | - port: http-prom 25 | -------------------------------------------------------------------------------- /clusters/demo-000/flux-system/gotk-sync.yaml: -------------------------------------------------------------------------------- 1 | # This manifest was generated by flux. DO NOT EDIT. 2 | --- 3 | apiVersion: source.toolkit.fluxcd.io/v1 4 | kind: GitRepository 5 | metadata: 6 | name: flux-system 7 | namespace: flux-system 8 | spec: 9 | interval: 1m0s 10 | ref: 11 | branch: main 12 | secretRef: 13 | name: flux-system 14 | url: https://github.com/artazar/flux2-general.git 15 | --- 16 | apiVersion: kustomize.toolkit.fluxcd.io/v1 17 | kind: Kustomization 18 | metadata: 19 | name: flux-system 20 | namespace: flux-system 21 | spec: 22 | interval: 10m0s 23 | path: ./clusters/demo-000 24 | prune: true 25 | sourceRef: 26 | kind: GitRepository 27 | name: flux-system 28 | -------------------------------------------------------------------------------- /.github/workflows/renovate.yaml: -------------------------------------------------------------------------------- 1 | name: Renovate 2 | on: 3 | workflow_dispatch: 4 | schedule: 5 | # The "*" (#42, asterisk) character has special semantics in YAML, so this 6 | # string has to be quoted. 7 | - cron: '0 9 * * 0' 8 | jobs: 9 | renovate: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout 13 | uses: actions/checkout@v3.3.0 14 | - name: Self-hosted Renovate 15 | uses: renovatebot/github-action@v34.82.0 16 | with: 17 | configurationFile: .github/renovate.json 18 | token: ${{ secrets.RENOVATE_TOKEN }} 19 | env: 20 | RENOVATE_REPOSITORIES: ${{ github.repository }} 21 | # LOG_LEVEL: debug 22 | -------------------------------------------------------------------------------- /clusters/demo-001/flux-system/gotk-sync.yaml: -------------------------------------------------------------------------------- 1 | # This manifest was generated by flux. DO NOT EDIT. 2 | --- 3 | apiVersion: source.toolkit.fluxcd.io/v1beta2 4 | kind: GitRepository 5 | metadata: 6 | name: flux-system 7 | namespace: flux-system 8 | spec: 9 | interval: 1m0s 10 | ref: 11 | branch: main 12 | secretRef: 13 | name: flux-system 14 | url: https://github.com/artazar/flux2-general.git 15 | --- 16 | apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 17 | kind: Kustomization 18 | metadata: 19 | name: flux-system 20 | namespace: flux-system 21 | spec: 22 | interval: 10m0s 23 | path: ./clusters/demo-001 24 | prune: true 25 | sourceRef: 26 | kind: GitRepository 27 | name: flux-system 28 | -------------------------------------------------------------------------------- /clusters/demo-000/infra/observability/victoriametrics.patch.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.toolkit.fluxcd.io/v2beta1 3 | kind: HelmRelease 4 | metadata: 5 | name: victoriametrics 6 | spec: 7 | values: 8 | server: 9 | tolerations: 10 | - key: node-role.kubernetes.io/master # we put observability stack on controlplane nodes to utilize their resources 11 | operator: Exists 12 | nodeSelector: 13 | kubernetes.io/hostname: minikube # we bind single node installation to particular node due to local path provisioner 14 | persistentVolume: 15 | storageClass: local-path 16 | size: 20Gi # with local-path the size is not actually treated, but we put it here to show how we estimate data usage -------------------------------------------------------------------------------- /clusters/demo-001/infra/observability/victoriametrics.patch.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.toolkit.fluxcd.io/v2beta1 3 | kind: HelmRelease 4 | metadata: 5 | name: victoriametrics 6 | spec: 7 | values: 8 | server: 9 | tolerations: 10 | - key: node-role.kubernetes.io/master # we put observability stack on controlplane nodes to utilize their resources 11 | operator: Exists 12 | nodeSelector: 13 | kubernetes.io/hostname: minikube # we bind single node installation to particular node due to local path provisioner 14 | persistentVolume: 15 | storageClass: local-path 16 | size: 20Gi # with local-path the size is not actually treated, but we put it here to show how we estimate data usage -------------------------------------------------------------------------------- /clusters/demo-000/apps/demoapp-auth/dev/ingress.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: networking.k8s.io/v1 3 | kind: Ingress 4 | metadata: 5 | annotations: 6 | nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" 7 | nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" 8 | name: demoapp-auth-movie 9 | spec: 10 | ingressClassName: nginx 11 | rules: 12 | - host: dev.movie.demoapp-auth.example.com 13 | http: 14 | paths: 15 | - backend: 16 | service: 17 | name: demoapp-auth-movie 18 | port: 19 | number: 80 20 | path: / 21 | pathType: ImplementationSpecific 22 | tls: 23 | - hosts: 24 | - dev.movie.demoapp-auth.example.com 25 | secretName: dev-demoapp-auth-com-tls 26 | -------------------------------------------------------------------------------- /clusters/demo-000/apps/demoapp-auth/stg/ingress.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: networking.k8s.io/v1 3 | kind: Ingress 4 | metadata: 5 | annotations: 6 | nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" 7 | nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" 8 | name: demoapp-auth-movie 9 | spec: 10 | ingressClassName: nginx 11 | rules: 12 | - host: stg.movie.demoapp-auth.example.com 13 | http: 14 | paths: 15 | - backend: 16 | service: 17 | name: demoapp-auth-movie 18 | port: 19 | number: 80 20 | path: / 21 | pathType: ImplementationSpecific 22 | tls: 23 | - hosts: 24 | - stg.movie.demoapp-auth.example.com 25 | secretName: stg-demoapp-auth-com-tls 26 | -------------------------------------------------------------------------------- /clusters/demo-000/apps/demoapp-auth/prod/ingress.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: networking.k8s.io/v1 3 | kind: Ingress 4 | metadata: 5 | annotations: 6 | nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" 7 | nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" 8 | name: demoapp-auth-movie 9 | spec: 10 | ingressClassName: nginx 11 | rules: 12 | - host: prod.movie.demoapp-auth.example.com 13 | http: 14 | paths: 15 | - backend: 16 | service: 17 | name: demoapp-auth-movie 18 | port: 19 | number: 80 20 | path: / 21 | pathType: ImplementationSpecific 22 | tls: 23 | - hosts: 24 | - prod.movie.demoapp-auth.example.com 25 | secretName: prod-demoapp-auth-com-tls 26 | -------------------------------------------------------------------------------- /.github/workflows/flux-validate.yaml: -------------------------------------------------------------------------------- 1 | name: Flux - validate repository manifests 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | workflow_dispatch: 8 | 9 | jobs: 10 | validate: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout 14 | uses: actions/checkout@v2 15 | - name: Setup yq 16 | uses: fluxcd/pkg/actions/yq@main 17 | - name: Setup kubeconform 18 | uses: fluxcd/pkg/actions/kubeconform@main 19 | - name: Setup kustomize 20 | uses: fluxcd/pkg/actions/kustomize@main 21 | - name: Validate manifests 22 | run: | 23 | wget https://raw.githubusercontent.com/fluxcd/flux2-kustomize-helm-example/main/scripts/validate.sh 24 | bash validate.sh 25 | -------------------------------------------------------------------------------- /infra/observability/alerts/flux.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | name: awesome-prometheus-rules-flux 5 | spec: 6 | groups: 7 | - name: Flux 8 | rules: 9 | - alert: FluxReconciliationFailure 10 | expr: max(gotk_reconcile_condition{status="False",type="Ready"}) by (exported_namespace, name, kind) + on(exported_namespace, name, kind) (max(gotk_reconcile_condition{status="Deleted"}) by (exported_namespace, name, kind)) * 2 == 1 11 | for: 15m 12 | labels: 13 | severity: warn 14 | annotations: 15 | summary: '{{ $labels.kind }} {{ $labels.exported_namespace }}/{{ $labels.name }} reconciliation has been failing for more than ten minutes.' 16 | -------------------------------------------------------------------------------- /apps/demoapp-auth/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - netpol.yaml 5 | - sa.yaml 6 | - food 7 | - movie 8 | - sleep 9 | 10 | # some vars added to several app components, grouped by label 11 | patches: 12 | - patch: |- 13 | - op: add 14 | path: "/spec/template/spec/containers/0/env/-" 15 | value: 16 | name: PERIOD 17 | value: day 18 | target: 19 | kind: Deployment 20 | labelSelector: period=day 21 | - patch: |- 22 | - op: add 23 | path: "/spec/template/spec/containers/0/env/-" 24 | value: 25 | name: PERIOD 26 | mountPath: night 27 | target: 28 | kind: Deployment 29 | labelSelector: period=night -------------------------------------------------------------------------------- /apps/demoapp-data-processor/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - netpol.yaml 5 | - sa.yaml 6 | - call 7 | - meet 8 | - solve 9 | 10 | # some vars added to several app components, grouped by label 11 | patches: 12 | - patch: |- 13 | - op: add 14 | path: "/spec/template/spec/containers/0/env/-" 15 | value: 16 | name: TYPE 17 | value: group 18 | target: 19 | kind: Deployment 20 | labelSelector: type=group 21 | - patch: |- 22 | - op: add 23 | path: "/spec/template/spec/containers/0/env/-" 24 | value: 25 | name: TYPE 26 | mountPath: solo 27 | target: 28 | kind: Deployment 29 | labelSelector: type=solo -------------------------------------------------------------------------------- /infra/operations/sealed-secrets/sealed-secrets.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.toolkit.fluxcd.io/v2beta2 3 | kind: HelmRelease 4 | metadata: 5 | name: sealed-secrets 6 | namespace: kube-system 7 | spec: 8 | chart: 9 | spec: 10 | chart: sealed-secrets 11 | sourceRef: 12 | kind: HelmRepository 13 | name: sealed-secrets 14 | namespace: flux-system 15 | version: '>=2.1.5 <3.0.0' 16 | install: 17 | crds: Create 18 | interval: 1h0m0s 19 | releaseName: sealed-secrets-controller 20 | targetNamespace: kube-system 21 | upgrade: 22 | crds: CreateReplace 23 | values: 24 | tolerations: 25 | - key: node-role.kubernetes.io/master 26 | operator: Exists 27 | - key: node-role.kubernetes.io/control-plane 28 | operator: Exists 29 | -------------------------------------------------------------------------------- /infra/operations/kubelet-csr-approver.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.toolkit.fluxcd.io/v2beta2 3 | kind: HelmRelease 4 | metadata: 5 | name: kubelet-csr-approver 6 | namespace: kube-system 7 | spec: 8 | chart: 9 | spec: 10 | chart: kubelet-csr-approver 11 | sourceRef: 12 | kind: HelmRepository 13 | name: kubelet-csr-approver 14 | namespace: flux-system 15 | version: '>=0.2.3 <1.0.0' 16 | interval: 1h0m0s 17 | releaseName: kubelet-csr-approver 18 | timeout: 60m 19 | install: 20 | remediation: 21 | retries: 3 22 | crds: Create 23 | upgrade: 24 | remediation: 25 | retries: 3 26 | crds: CreateReplace 27 | values: 28 | providerIpPrefixes: '10.30.0.0/16' 29 | bypassDnsResolution: true # our node dns names are not resolved atm -------------------------------------------------------------------------------- /infra/operations/reflector.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.toolkit.fluxcd.io/v2beta2 3 | kind: HelmRelease 4 | metadata: 5 | name: reflector 6 | namespace: kube-system 7 | spec: 8 | chart: 9 | spec: 10 | chart: reflector 11 | sourceRef: 12 | kind: HelmRepository 13 | name: emberstack 14 | namespace: flux-system 15 | version: '>=7.1.256 <8.0.0' 16 | interval: 1h0m0s 17 | releaseName: reflector 18 | timeout: 60m 19 | install: 20 | remediation: 21 | retries: 3 22 | crds: Create 23 | upgrade: 24 | remediation: 25 | retries: 3 26 | crds: CreateReplace 27 | values: 28 | tolerations: 29 | - key: node-role.kubernetes.io/master 30 | operator: Exists 31 | - key: node-role.kubernetes.io/control-plane 32 | operator: Exists -------------------------------------------------------------------------------- /infra/operations/sealed-secrets/README.md: -------------------------------------------------------------------------------- 1 | Main doc: [Sealed Secrets](https://fluxcd.io/docs/guides/sealed-secrets/) 2 | 3 | Brief how to to encrypt secrets: 4 | 5 | 1. Create a new secret definition in a YAML file 6 | 7 | ``` 8 | kubectl -n default create secret generic basic-auth \ 9 | --from-literal=user=admin \ 10 | --from-literal=password=change-me \ 11 | --dry-run=client \ 12 | -o yaml > basic-auth.yaml 13 | ``` 14 | 15 | 2. Encrypt the secret to make it sealed: 16 | 17 | ``` 18 | kubeseal --format=yaml --cert=pub-sealed-secrets.pem \ 19 | < basic-auth.yaml > basic-auth-sealed.yaml 20 | ``` 21 | 22 | 3. Remove the original non-encrypted version: 23 | 24 | ``` 25 | rm -f basic-auth.yaml 26 | ``` 27 | 28 | 4. Store the encrypted version in Git. Sealed-Secrets controller in Kubernetes will take care of the secret decryption. 29 | -------------------------------------------------------------------------------- /clusters/demo-000/flux-system/README.md: -------------------------------------------------------------------------------- 1 | This is an automatically created location for flux-managed kustomization, it is not a part of the rest of definitions. 2 | 3 | Flux CLI should be used to generate these base manifests: 4 | 5 | flux bootstrap github --owner=artazar --repository=flux2-general --branch=main --path=clusters/demo-000 --token-auth --toleration-keys='node-role.kubernetes.io/master' 6 | 7 | It is safe to re-run the command with a live cluster. 8 | 9 | Note: `--token-auth` is needed for Flux to make initial commits into repository to correct `flux-system` manifests. The token should provide write access to the repo. 10 | 11 | When a cluster is bootstrapped from scratch, pod-monitor.yaml should be excluded (commented out in kustomization.yaml) and then enabled again after observability unit is fully up (chicken-egg problem). 12 | -------------------------------------------------------------------------------- /clusters/demo-000/infra/observability/blackbox-probes.yaml: -------------------------------------------------------------------------------- 1 | 2 | --- 3 | apiVersion: monitoring.coreos.com/v1 4 | kind: Probe 5 | metadata: 6 | name: app-probes 7 | namespace: observability 8 | spec: 9 | module: http_2xx 10 | prober: 11 | url: prometheus-blackbox-exporter:9115 12 | targets: 13 | staticConfig: 14 | labels: 15 | cluster: demo-000 16 | static: 17 | - https://grafana.example.com/healthz 18 | 19 | --- 20 | apiVersion: monitoring.coreos.com/v1 21 | kind: Probe 22 | metadata: 23 | name: k8s-api-probe 24 | namespace: observability 25 | spec: 26 | module: http_2xx_insecure 27 | prober: 28 | url: prometheus-blackbox-exporter:9115 29 | targets: 30 | staticConfig: 31 | labels: 32 | cluster: demo-000 33 | static: 34 | - https://k8s-api.example.com/healthz 35 | -------------------------------------------------------------------------------- /clusters/demo-001/flux-system/README.md: -------------------------------------------------------------------------------- 1 | This is an automatically created location for flux-managed kustomization, it is not a part of the rest of definitions. 2 | 3 | Flux CLI should be used to generate these base manifests: 4 | 5 | flux bootstrap github --owner=artazar --repository=flux2-general --branch=main --path=clusters/demo-001 --token-auth --toleration-keys='node-role.kubernetes.io/master' 6 | 7 | It is safe to re-run the command with a live cluster. 8 | 9 | Note: `--token-auth` is needed for Flux to make initial commits into repository to correct `flux-system` manifests. The token should provide write access to the repo. 10 | 11 | When a cluster is bootstrapped from scratch, pod-monitor.yaml should be excluded (commented out in kustomization.yaml) and then enabled again after observability unit is fully up (chicken-egg problem). 12 | -------------------------------------------------------------------------------- /charts/security/default-kyverno-cluster-policies/values.yaml: -------------------------------------------------------------------------------- 1 | # Default values for default-kyverno-cluster-policies. 2 | # This is a YAML-formatted file. 3 | # Declare variables to be passed into your templates. 4 | 5 | # -- 'enforce' to apply restrictions and 'audit' to store failures in policy reports only 6 | validationFailureAction: 'enforce' 7 | 8 | # -- Do not mount service account token into pods by default 9 | disable_automountserviceaccounttoken: true 10 | 11 | # -- Do not allow container engine socket mounts into pods 12 | disallow_container_sock_mounts: true 13 | 14 | # -- Do not allow to use :latest tag on container images 15 | disallow_latest_tag: true 16 | 17 | # -- Only allow read-only root FS inside pods 18 | require_ro_rootfs: true 19 | 20 | # -- Restart deployments on secret change 21 | restart_deployment_on_secret_change: [] 22 | -------------------------------------------------------------------------------- /clusters/demo-001/infra/observability/blackbox-probes.yaml: -------------------------------------------------------------------------------- 1 | 2 | --- 3 | apiVersion: monitoring.coreos.com/v1 4 | kind: Probe 5 | metadata: 6 | name: app-probes 7 | namespace: observability 8 | spec: 9 | module: http_2xx 10 | prober: 11 | url: prometheus-blackbox-exporter:9115 12 | targets: 13 | staticConfig: 14 | labels: 15 | cluster: demo-000 16 | static: 17 | - https://grafana-001.example.com/healthz 18 | 19 | --- 20 | apiVersion: monitoring.coreos.com/v1 21 | kind: Probe 22 | metadata: 23 | name: k8s-api-probe 24 | namespace: observability 25 | spec: 26 | module: http_2xx_insecure 27 | prober: 28 | url: prometheus-blackbox-exporter:9115 29 | targets: 30 | staticConfig: 31 | labels: 32 | cluster: demo-000 33 | static: 34 | - https://k8s-api-001.example.com/healthz 35 | -------------------------------------------------------------------------------- /apps/demoapp-auth/base/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: demoapp-auth 5 | spec: 6 | minReadySeconds: 30 7 | template: 8 | spec: 9 | containers: 10 | - name: demoapp-auth 11 | image: demoapp-auth:1.0 12 | env: 13 | - name: APP_GROUP 14 | value: demoapp-auth 15 | securityContext: 16 | allowPrivilegeEscalation: false 17 | capabilities: 18 | drop: 19 | - ALL 20 | privileged: false 21 | volumeMounts: 22 | - mountPath: /tmp 23 | name: temp 24 | serviceAccountName: demoapp-auth 25 | securityContext: 26 | fsGroup: 2000 27 | runAsNonRoot: true 28 | seccompProfile: 29 | type: RuntimeDefault 30 | volumes: 31 | - name: temp 32 | emptyDir: {} -------------------------------------------------------------------------------- /clusters/demo-000/README.md: -------------------------------------------------------------------------------- 1 | This directory contains cluster definitions, managed by FluxCD. 2 | 3 | The structure: 4 | 5 | - `apps` - contains envs and overlays relative to higher level base app definitions 6 | - `infra` - contains grouped infra layer resources with overlays over higher level base infra definitions 7 | - `rbac` - cluster-specific rbac entries 8 | - `kubernetes-api-ingress.yaml` - ingress resource for Kubernetes API 9 | - `flux-system` - internal directory managed by Flux itself 10 | 11 | The following units are added as separate Flux kustomization resources, subject to independent reconciliations: 12 | 13 | - apps/demoapp-auth 14 | - dev 15 | - stg 16 | - prod 17 | 18 | - apps/demoapp-data-processor 19 | - dev 20 | 21 | - infra 22 | - sources 23 | - crds 24 | - operations 25 | - ingress 26 | - storage 27 | - observability 28 | - security -------------------------------------------------------------------------------- /clusters/demo-001/README.md: -------------------------------------------------------------------------------- 1 | This directory contains cluster definitions, managed by FluxCD. 2 | 3 | The structure: 4 | 5 | - `apps` - contains envs and overlays relative to higher level base app definitions 6 | - `infra` - contains grouped infra layer resources with overlays over higher level base infra definitions 7 | - `rbac` - cluster-specific rbac entries 8 | - `kubernetes-api-ingress.yaml` - ingress resource for Kubernetes API 9 | - `flux-system` - internal directory managed by Flux itself 10 | 11 | The following units are added as separate Flux kustomization resources, subject to independent reconciliations: 12 | 13 | - apps/demoapp-auth 14 | - dev 15 | - stg 16 | - prod 17 | 18 | - apps/demoapp-data-processor 19 | - dev 20 | 21 | - infra 22 | - sources 23 | - crds 24 | - operations 25 | - ingress 26 | - storage 27 | - observability 28 | - security -------------------------------------------------------------------------------- /clusters/demo-000/apps/demoapp-data-processor/PR/ingress.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: networking.k8s.io/v1 3 | kind: Ingress 4 | metadata: 5 | annotations: 6 | cert-manager.io/cluster-issuer: letsencrypt-prod 7 | name: demoapp-data-processor 8 | spec: 9 | ingressClassName: nginx 10 | rules: 11 | - host: demoapp-data-processor-$NAME.example.com 12 | http: 13 | paths: 14 | - backend: 15 | service: demoapp-data-processor-meet 16 | name: 17 | port: 18 | number: 80 19 | path: /meet 20 | pathType: ImplementationSpecific 21 | - backend: 22 | service: demoapp-data-processor-call 23 | name: 24 | port: 25 | number: 80 26 | path: /call 27 | pathType: ImplementationSpecific 28 | tls: 29 | - hosts: 30 | - $NAME.example.com 31 | secretName: $NAME-example-com-tls -------------------------------------------------------------------------------- /apps/demoapp-auth/food/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - ../base 3 | 4 | nameSuffix: -food 5 | 6 | commonLabels: 7 | app: demoapp-auth-food 8 | 9 | images: 10 | - name: demoapp-auth 11 | newName: nginxinc/nginx-unprivileged 12 | 13 | configMapGenerator: 14 | - name: demoapp-auth 15 | behavior: merge 16 | literals: 17 | - APP_NAME='demoapp-auth-food' 18 | 19 | patches: 20 | - patch: |- 21 | - op: replace 22 | path: /spec/template/spec/containers/0/name 23 | value: demoapp-auth-food 24 | target: 25 | kind: Deployment 26 | - patch: |- 27 | - op: add 28 | path: "/metadata/labels/period" 29 | value: day 30 | target: 31 | kind: Deployment 32 | - patch: |- 33 | - op: replace 34 | path: /spec/template/spec/containers/0/resources 35 | value: 36 | requests: 37 | cpu: 250m 38 | memory: 512Mi 39 | target: 40 | kind: Deployment -------------------------------------------------------------------------------- /apps/demoapp-auth/movie/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - ../base 3 | 4 | nameSuffix: -movie 5 | 6 | commonLabels: 7 | app: demoapp-auth-movie 8 | 9 | images: 10 | - name: demoapp-auth 11 | newName: nginxinc/nginx-unprivileged 12 | 13 | configMapGenerator: 14 | - name: demoapp-auth 15 | behavior: merge 16 | literals: 17 | - APP_NAME='demoapp-auth-movie' 18 | 19 | patches: 20 | - patch: |- 21 | - op: replace 22 | path: /spec/template/spec/containers/0/name 23 | value: demoapp-auth-movie 24 | target: 25 | kind: Deployment 26 | - patch: |- 27 | - op: add 28 | path: "/metadata/labels/period" 29 | value: day 30 | target: 31 | kind: Deployment 32 | - patch: |- 33 | - op: replace 34 | path: /spec/template/spec/containers/0/resources 35 | value: 36 | requests: 37 | cpu: 250m 38 | memory: 512Mi 39 | target: 40 | kind: Deployment -------------------------------------------------------------------------------- /apps/demoapp-auth/sleep/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - ../base 3 | 4 | nameSuffix: -sleep 5 | 6 | commonLabels: 7 | app: demoapp-auth-sleep 8 | 9 | images: 10 | - name: demoapp-auth 11 | newName: nginxinc/nginx-unprivileged 12 | 13 | configMapGenerator: 14 | - name: demoapp-auth 15 | behavior: merge 16 | literals: 17 | - APP_NAME='demoapp-auth-sleep' 18 | 19 | patches: 20 | - patch: |- 21 | - op: replace 22 | path: /spec/template/spec/containers/0/name 23 | value: demoapp-auth-sleep 24 | target: 25 | kind: Deployment 26 | - patch: |- 27 | - op: add 28 | path: "/metadata/labels/period" 29 | value: night 30 | target: 31 | kind: Deployment 32 | - patch: |- 33 | - op: replace 34 | path: /spec/template/spec/containers/0/resources 35 | value: 36 | requests: 37 | cpu: 250m 38 | memory: 512Mi 39 | target: 40 | kind: Deployment -------------------------------------------------------------------------------- /apps/demoapp-data-processor/base/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: demoapp-data-processor 5 | spec: 6 | minReadySeconds: 30 7 | template: 8 | spec: 9 | containers: 10 | - name: demoapp-data-processor 11 | image: demoapp-data-processor:1.0 12 | env: 13 | - name: APP_GROUP 14 | value: demoapp-data-processor 15 | securityContext: 16 | allowPrivilegeEscalation: false 17 | capabilities: 18 | drop: 19 | - ALL 20 | privileged: false 21 | volumeMounts: 22 | - mountPath: /tmp 23 | name: temp 24 | serviceAccountName: demoapp-data-processor 25 | securityContext: 26 | fsGroup: 2000 27 | runAsNonRoot: true 28 | seccompProfile: 29 | type: RuntimeDefault 30 | volumes: 31 | - name: temp 32 | emptyDir: {} -------------------------------------------------------------------------------- /apps/demoapp-data-processor/call/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - ../base 3 | 4 | nameSuffix: -call 5 | 6 | commonLabels: 7 | app: demoapp-data-processor-call 8 | 9 | images: 10 | - name: demoapp-data-processor 11 | newName: nginxinc/nginx-unprivileged 12 | 13 | configMapGenerator: 14 | - name: demoapp-data-processor 15 | behavior: merge 16 | literals: 17 | - APP_NAME='demoapp-data-processor-call' 18 | 19 | patches: 20 | - patch: |- 21 | - op: replace 22 | path: /spec/template/spec/containers/0/name 23 | value: demoapp-data-processor-call 24 | target: 25 | kind: Deployment 26 | - patch: |- 27 | - op: add 28 | path: "/metadata/labels/type" 29 | value: group 30 | target: 31 | kind: Deployment 32 | - patch: |- 33 | - op: replace 34 | path: /spec/template/spec/containers/0/resources 35 | value: 36 | requests: 37 | cpu: 250m 38 | memory: 512Mi 39 | target: 40 | kind: Deployment -------------------------------------------------------------------------------- /apps/demoapp-data-processor/meet/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - ../base 3 | 4 | nameSuffix: -meet 5 | 6 | commonLabels: 7 | app: demoapp-data-processor-meet 8 | 9 | images: 10 | - name: demoapp-data-processor 11 | newName: nginxinc/nginx-unprivileged 12 | 13 | configMapGenerator: 14 | - name: demoapp-data-processor 15 | behavior: merge 16 | literals: 17 | - APP_NAME='demoapp-data-processor-meet' 18 | 19 | patches: 20 | - patch: |- 21 | - op: replace 22 | path: /spec/template/spec/containers/0/name 23 | value: demoapp-data-processor-meet 24 | target: 25 | kind: Deployment 26 | - patch: |- 27 | - op: add 28 | path: "/metadata/labels/type" 29 | value: group 30 | target: 31 | kind: Deployment 32 | - patch: |- 33 | - op: replace 34 | path: /spec/template/spec/containers/0/resources 35 | value: 36 | requests: 37 | cpu: 250m 38 | memory: 512Mi 39 | target: 40 | kind: Deployment -------------------------------------------------------------------------------- /apps/demoapp-data-processor/solve/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - ../base 3 | 4 | nameSuffix: -solve 5 | 6 | commonLabels: 7 | app: demoapp-data-processor-solve 8 | 9 | images: 10 | - name: demoapp-data-processor 11 | newName: nginxinc/nginx-unprivileged 12 | 13 | configMapGenerator: 14 | - name: demoapp-data-processor 15 | behavior: merge 16 | literals: 17 | - APP_NAME='demoapp-data-processor-solve' 18 | 19 | patches: 20 | - patch: |- 21 | - op: replace 22 | path: /spec/template/spec/containers/0/name 23 | value: demoapp-data-processor-solve 24 | target: 25 | kind: Deployment 26 | - patch: |- 27 | - op: add 28 | path: "/metadata/labels/type" 29 | value: solo 30 | target: 31 | kind: Deployment 32 | - patch: |- 33 | - op: replace 34 | path: /spec/template/spec/containers/0/resources 35 | value: 36 | requests: 37 | cpu: 250m 38 | memory: 512Mi 39 | target: 40 | kind: Deployment -------------------------------------------------------------------------------- /clusters/demo-001/apps/app.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | labels: 5 | app: demo001-app 6 | name: demo001-app 7 | spec: 8 | minReadySeconds: 30 9 | selector: 10 | matchLabels: 11 | app: demo001-app 12 | template: 13 | metadata: 14 | labels: 15 | app: demo001-app 16 | spec: 17 | containers: 18 | - name: demo001-app 19 | image: nginxinc/nginx-unprivileged:1.23 20 | env: 21 | - name: APP_GROUP 22 | value: demo001-app 23 | securityContext: 24 | allowPrivilegeEscalation: false 25 | capabilities: 26 | drop: 27 | - ALL 28 | privileged: false 29 | volumeMounts: 30 | - mountPath: /tmp 31 | name: temp 32 | securityContext: 33 | fsGroup: 2000 34 | runAsNonRoot: true 35 | seccompProfile: 36 | type: RuntimeDefault 37 | volumes: 38 | - name: temp 39 | emptyDir: {} -------------------------------------------------------------------------------- /clusters/demo-001/infra/security/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: security 4 | resources: 5 | - ../../../../infra/security 6 | 7 | patches: 8 | - path: kyverno.patch.yaml 9 | target: 10 | kind: HelmRelease 11 | name: kyverno-policies 12 | # Secrets are automatically synced upon update across namespaces by Reflector. 13 | # This policy makes sure there is a deployment rollout when the certs are updated. 14 | - patch: |- 15 | apiVersion: helm.toolkit.fluxcd.io/v2beta1 16 | kind: HelmRelease 17 | metadata: 18 | name: default-kyverno-cluster-policies 19 | spec: 20 | values: 21 | restart_deployment_on_secret_change: 22 | - deployment: demo001-app 23 | namespace: app 24 | secret: 25 | name: cluster-ca-root-secret 26 | namespace: app 27 | 28 | target: 29 | kind: HelmRelease 30 | name: default-kyverno-cluster-policies -------------------------------------------------------------------------------- /clusters/demo-000/kubernetes-api-ingress.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: networking.k8s.io/v1 3 | kind: Ingress 4 | metadata: 5 | annotations: 6 | nginx.ingress.kubernetes.io/ssl-passthrough: "true" 7 | name: kubernetes 8 | namespace: default 9 | spec: 10 | ingressClassName: nginx 11 | rules: 12 | - host: k8s-api.example.com 13 | http: 14 | paths: 15 | - backend: 16 | service: 17 | name: kubernetes 18 | port: 19 | number: 443 20 | path: / 21 | pathType: ImplementationSpecific 22 | tls: 23 | - hosts: 24 | - k8s-api.example.com 25 | 26 | # Allow cert-manager acme solvers 27 | --- 28 | apiVersion: networking.k8s.io/v1 29 | kind: NetworkPolicy 30 | metadata: 31 | name: allow-ingress-acme-solver 32 | namespace: default 33 | spec: 34 | ingress: 35 | - ports: 36 | - port: 8089 37 | protocol: TCP 38 | podSelector: 39 | matchLabels: 40 | acme.cert-manager.io/http01-solver: "true" 41 | policyTypes: 42 | - Ingress -------------------------------------------------------------------------------- /clusters/demo-001/kubernetes-api-ingress.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: networking.k8s.io/v1 3 | kind: Ingress 4 | metadata: 5 | annotations: 6 | nginx.ingress.kubernetes.io/ssl-passthrough: "true" 7 | name: kubernetes 8 | namespace: default 9 | spec: 10 | ingressClassName: nginx 11 | rules: 12 | - host: k8s-api-001.example.com 13 | http: 14 | paths: 15 | - backend: 16 | service: 17 | name: kubernetes 18 | port: 19 | number: 443 20 | path: / 21 | pathType: ImplementationSpecific 22 | tls: 23 | - hosts: 24 | - k8s-api-001.example.com 25 | 26 | # Allow cert-manager acme solvers 27 | --- 28 | apiVersion: networking.k8s.io/v1 29 | kind: NetworkPolicy 30 | metadata: 31 | name: allow-ingress-acme-solver 32 | namespace: default 33 | spec: 34 | ingress: 35 | - ports: 36 | - port: 8089 37 | protocol: TCP 38 | podSelector: 39 | matchLabels: 40 | acme.cert-manager.io/http01-solver: "true" 41 | policyTypes: 42 | - Ingress -------------------------------------------------------------------------------- /clusters/demo-000/infra/security/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: security 4 | resources: 5 | - ../../../../infra/security 6 | 7 | patches: 8 | - path: kyverno.patch.yaml 9 | target: 10 | kind: HelmRelease 11 | name: kyverno-policies 12 | # Secrets are automatically synced upon update across namespaces by Reflector. 13 | # This policy makes sure there is a deployment rollout when the certs are updated. 14 | - patch: |- 15 | apiVersion: helm.toolkit.fluxcd.io/v2beta1 16 | kind: HelmRelease 17 | metadata: 18 | name: default-kyverno-cluster-policies 19 | spec: 20 | values: 21 | restart_deployment_on_secret_change: 22 | - deployment: 23 | namespace: demoapp-auth-prod 24 | secret: 25 | name: cluster-ca-root-secret 26 | namespace: demoapp-auth-prod 27 | 28 | target: 29 | kind: HelmRelease 30 | name: default-kyverno-cluster-policies -------------------------------------------------------------------------------- /infra/ingress/netpol.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.toolkit.fluxcd.io/v2beta2 3 | kind: HelmRelease 4 | metadata: 5 | name: default-namespace-network-policies 6 | spec: 7 | chart: 8 | spec: 9 | chart: ./helm/charts/security/default-namespace-network-policies 10 | sourceRef: 11 | kind: GitRepository 12 | name: flux-system 13 | namespace: flux-system 14 | interval: 1h0m0s 15 | releaseName: default-namespace-network-policies 16 | timeout: 10m 17 | install: 18 | remediation: 19 | retries: 3 20 | crds: Create 21 | upgrade: 22 | remediation: 23 | retries: 3 24 | crds: CreateReplace 25 | values: 26 | allow_namespace_traffic: false 27 | allow_ingress_cluster_traffic: true 28 | allow_egress_cluster_traffic: true 29 | allow_ingress_private_traffic: false 30 | allow_egress_private_traffic: false 31 | allow_monitoring: true 32 | allow_apiserver: true 33 | allow_internet: true 34 | allow_ingress_traffic: false 35 | allow_egress_traffic: false 36 | -------------------------------------------------------------------------------- /infra/observability/netpol.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.toolkit.fluxcd.io/v2beta2 3 | kind: HelmRelease 4 | metadata: 5 | name: default-namespace-network-policies 6 | spec: 7 | chart: 8 | spec: 9 | chart: ./helm/charts/security/default-namespace-network-policies 10 | sourceRef: 11 | kind: GitRepository 12 | name: flux-system 13 | namespace: flux-system 14 | interval: 1h0m0s 15 | releaseName: default-namespace-network-policies 16 | timeout: 10m 17 | install: 18 | remediation: 19 | retries: 3 20 | crds: Create 21 | upgrade: 22 | remediation: 23 | retries: 3 24 | crds: CreateReplace 25 | values: 26 | allow_namespace_traffic: true 27 | allow_ingress_cluster_traffic: true 28 | allow_egress_cluster_traffic: true 29 | allow_ingress_private_traffic: true 30 | allow_egress_private_traffic: true 31 | allow_monitoring: true 32 | allow_apiserver: true 33 | allow_internet: false 34 | allow_ingress_traffic: true 35 | allow_egress_traffic: false 36 | -------------------------------------------------------------------------------- /infra/ingress/cert-manager/ca.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: cert-manager.io/v1 3 | kind: ClusterIssuer 4 | metadata: 5 | name: selfsigned-issuer 6 | spec: 7 | selfSigned: {} 8 | 9 | --- 10 | apiVersion: cert-manager.io/v1 11 | kind: Certificate 12 | metadata: 13 | name: cluster-ca 14 | spec: 15 | isCA: true 16 | commonName: cluster-ca 17 | secretName: cluster-ca-root-secret 18 | duration: 8760h # 365d 19 | renewBefore: 4380h # 180d 20 | secretTemplate: 21 | annotations: 22 | reflector.v1.k8s.emberstack.com/reflection-allowed: "true" 23 | reflector.v1.k8s.emberstack.com/reflection-allowed-namespaces: "" 24 | reflector.v1.k8s.emberstack.com/reflection-auto-enabled: "true" 25 | privateKey: 26 | algorithm: ECDSA 27 | size: 256 28 | issuerRef: 29 | name: selfsigned-issuer 30 | kind: ClusterIssuer 31 | group: cert-manager.io 32 | --- 33 | apiVersion: cert-manager.io/v1 34 | kind: ClusterIssuer 35 | metadata: 36 | name: cluster-ca-issuer 37 | spec: 38 | ca: 39 | secretName: cluster-ca-root-secret 40 | -------------------------------------------------------------------------------- /clusters/demo-000/flux-system/netpol.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.toolkit.fluxcd.io/v2beta1 3 | kind: HelmRelease 4 | metadata: 5 | name: default-namespace-network-policies 6 | spec: 7 | chart: 8 | spec: 9 | chart: ./charts/security/default-namespace-network-policies 10 | sourceRef: 11 | kind: GitRepository 12 | name: flux-system 13 | namespace: flux-system 14 | interval: 1h0m0s 15 | releaseName: default-namespace-network-policies 16 | timeout: 10m 17 | install: 18 | remediation: 19 | retries: 3 20 | crds: Create 21 | upgrade: 22 | remediation: 23 | retries: 3 24 | crds: CreateReplace 25 | values: 26 | allow_namespace_traffic: false 27 | allow_ingress_cluster_traffic: false 28 | allow_egress_cluster_traffic: false 29 | allow_ingress_private_traffic: false 30 | allow_egress_private_traffic: false 31 | allow_monitoring: true 32 | allow_apiserver: true 33 | allow_internet: false 34 | allow_ingress_traffic: false 35 | allow_egress_traffic: false 36 | -------------------------------------------------------------------------------- /clusters/demo-001/flux-system/netpol.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.toolkit.fluxcd.io/v2beta1 3 | kind: HelmRelease 4 | metadata: 5 | name: default-namespace-network-policies 6 | spec: 7 | chart: 8 | spec: 9 | chart: ./charts/security/default-namespace-network-policies 10 | sourceRef: 11 | kind: GitRepository 12 | name: flux-system 13 | namespace: flux-system 14 | interval: 1h0m0s 15 | releaseName: default-namespace-network-policies 16 | timeout: 10m 17 | install: 18 | remediation: 19 | retries: 3 20 | crds: Create 21 | upgrade: 22 | remediation: 23 | retries: 3 24 | crds: CreateReplace 25 | values: 26 | allow_namespace_traffic: false 27 | allow_ingress_cluster_traffic: false 28 | allow_egress_cluster_traffic: false 29 | allow_ingress_private_traffic: false 30 | allow_egress_private_traffic: false 31 | allow_monitoring: true 32 | allow_apiserver: true 33 | allow_internet: false 34 | allow_ingress_traffic: false 35 | allow_egress_traffic: false 36 | -------------------------------------------------------------------------------- /infra/storage/local-path-storage/netpol.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.toolkit.fluxcd.io/v2beta2 3 | kind: HelmRelease 4 | metadata: 5 | name: default-namespace-network-policies 6 | namespace: local-path-storage 7 | spec: 8 | chart: 9 | spec: 10 | chart: ./helm/charts/security/default-namespace-network-policies 11 | sourceRef: 12 | kind: GitRepository 13 | name: flux-system 14 | namespace: flux-system 15 | interval: 1h0m0s 16 | releaseName: default-namespace-network-policies 17 | timeout: 10m 18 | install: 19 | remediation: 20 | retries: 3 21 | crds: Create 22 | upgrade: 23 | remediation: 24 | retries: 3 25 | crds: CreateReplace 26 | values: 27 | allow_namespace_traffic: false 28 | allow_ingress_cluster_traffic: true 29 | allow_egress_cluster_traffic: true 30 | allow_ingress_private_traffic: false 31 | allow_egress_private_traffic: false 32 | allow_monitoring: false 33 | allow_apiserver: true 34 | allow_internet: false 35 | allow_ingress_traffic: false 36 | allow_egress_traffic: false 37 | -------------------------------------------------------------------------------- /charts/security/default-namespace-network-policies/templates/allow-egress-from-all-to-kube-dns.yaml: -------------------------------------------------------------------------------- 1 | kind: NetworkPolicy 2 | apiVersion: networking.k8s.io/v1 3 | metadata: 4 | labels: 5 | {{- include "default-namespace-network-policies.labels" . | nindent 4 }} 6 | name: allow-egress-from-all-to-kube-dns 7 | spec: 8 | policyTypes: 9 | - Egress 10 | podSelector: {} 11 | egress: 12 | - ports: 13 | - port: 53 14 | protocol: TCP 15 | - port: 53 16 | protocol: UDP 17 | to: 18 | - namespaceSelector: 19 | matchLabels: 20 | kubernetes.io/metadata.name: kube-system 21 | podSelector: 22 | matchLabels: 23 | k8s-app: kube-dns 24 | # allow nodelocaldns queries: https://kubernetes.io/docs/tasks/administer-cluster/nodelocaldns/#configuration 25 | - ipBlock: 26 | cidr: 169.254.20.0/16 27 | # allow the pods to contact DNS service address: https://cloud.google.com/kubernetes-engine/docs/how-to/nodelocal-dns-cache#network_policy_with 28 | - ipBlock: 29 | cidr: {{ include "default-namespace-network-policies.KubeDNSAddress" . }} 30 | -------------------------------------------------------------------------------- /clusters/demo-000/apps/demoapp-auth/flux-ks.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 3 | kind: Kustomization 4 | metadata: 5 | name: demoapp-auth-dev 6 | namespace: flux-system 7 | spec: 8 | dependsOn: 9 | - name: operations 10 | interval: 10m0s 11 | sourceRef: 12 | kind: GitRepository 13 | name: flux-system 14 | path: ./clusters/demo-000/apps/demoapp-auth/dev 15 | prune: true 16 | 17 | --- 18 | apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 19 | kind: Kustomization 20 | metadata: 21 | name: demoapp-auth-stg 22 | namespace: flux-system 23 | spec: 24 | dependsOn: 25 | - name: operations 26 | interval: 10m0s 27 | sourceRef: 28 | kind: GitRepository 29 | name: flux-system 30 | path: ./clusters/demo-000/apps/demoapp-auth/stg 31 | prune: true 32 | 33 | --- 34 | apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 35 | kind: Kustomization 36 | metadata: 37 | name: demoapp-auth-prod 38 | namespace: flux-system 39 | spec: 40 | dependsOn: 41 | - name: operations 42 | interval: 10m0s 43 | sourceRef: 44 | kind: GitRepository 45 | name: flux-system 46 | path: ./clusters/demo-000/apps/demoapp-auth/prod 47 | prune: true 48 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # flux2-general 2 | A repo with FluxCD reference structure. 3 | 4 | An existing Kubernetes cluster is required before using this repository. 5 | 6 | For a demo case it is enough to start a minikube node: 7 | 8 | minikube start 9 | 10 | For a quick start consult with bootstrap procedure: 11 | 12 | [FluxCD bootstrap](https://github.com/artazar/flux2-general/tree/main/clusters/demo-000/flux-system) 13 | 14 | The structure of cluster and grouped resources: 15 | 16 | [demo-app cluster structure](https://github.com/artazar/flux2-general/tree/main/clusters/demo-000) 17 | 18 | ## Auto-update notes 19 | 20 | The repository contains three techniques to allow automatic component updates: 21 | 22 | 1. For Flux specifically - [flux-update workflow](https://github.com/artazar/flux2-general/blob/main/.github/workflows/flux-update.yaml) 23 | 24 | 2. For HelmRelease objects, version range can be indicated - [example kyverno manifest](https://github.com/artazar/flux2-general/blob/main/infra/security/kyverno.yaml#L14) 25 | 26 | 3. Global auto-update flow via [renovate addon](https://github.com/renovatebot/renovate) - [workflow](https://github.com/artazar/flux2-general/blob/main/.github/workflows/renovate.yaml) 27 | -------------------------------------------------------------------------------- /infra/operations/descheduler/configmap.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: ConfigMap 4 | metadata: 5 | name: descheduler-policy-configmap 6 | namespace: kube-system 7 | data: 8 | policy.yaml: | 9 | apiVersion: "descheduler/v1alpha2" 10 | kind: "DeschedulerPolicy" 11 | profiles: 12 | - name: Default 13 | pluginConfig: 14 | - name: "DefaultEvictor" 15 | args: 16 | evictLocalStoragePods: true 17 | - name: "RemovePodsViolatingInterPodAntiAffinity" 18 | - name: "RemovePodsViolatingNodeTaints" 19 | - name: "RemoveDuplicates" 20 | - name: "LowNodeUtilization" 21 | args: 22 | thresholds: 23 | "cpu" : 20 24 | "memory": 20 25 | "pods": 20 26 | targetThresholds: 27 | "cpu" : 75 28 | "memory": 75 29 | "pods": 75 30 | plugins: 31 | balance: 32 | enabled: 33 | - "LowNodeUtilization" 34 | - "RemoveDuplicates" 35 | deschedule: 36 | enabled: 37 | - "RemovePodsViolatingInterPodAntiAffinity" 38 | - "RemovePodsViolatingNodeTaints" 39 | -------------------------------------------------------------------------------- /infra/observability/victoriametrics.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.toolkit.fluxcd.io/v2beta2 3 | kind: HelmRelease 4 | metadata: 5 | name: victoriametrics 6 | namespace: observability 7 | spec: 8 | chart: 9 | spec: 10 | chart: victoria-metrics-single 11 | sourceRef: 12 | kind: HelmRepository 13 | name: victoriametrics 14 | namespace: flux-system 15 | version: '>=0.8.22 <1.0.0' 16 | interval: 1h0m0s 17 | timeout: 10m 18 | install: 19 | remediation: 20 | retries: 3 21 | crds: Create 22 | upgrade: 23 | remediation: 24 | retries: 3 25 | crds: CreateReplace 26 | values: 27 | rbac: 28 | pspEnabled: false # deprecated 29 | server: 30 | fullnameOverride: victoriametrics 31 | retentionPeriod: 1 32 | tolerations: 33 | - key: node-role.kubernetes.io/master # we put observability stack on controlplane nodes to utilize their resources 34 | operator: Exists 35 | - key: node-role.kubernetes.io/control-plane 36 | operator: Exists 37 | persistentVolume: 38 | storageClass: local-path 39 | size: 200Gi # with local-path the size is not actually treated, but we put it here to show how we estimate data usage 40 | -------------------------------------------------------------------------------- /infra/operations/secret-generator.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.toolkit.fluxcd.io/v2beta2 3 | kind: HelmRelease 4 | metadata: 5 | name: secret-generator 6 | namespace: kube-system 7 | spec: 8 | chart: 9 | spec: 10 | chart: kubernetes-secret-generator 11 | sourceRef: 12 | kind: HelmRepository 13 | name: mittwald 14 | namespace: flux-system 15 | version: '>=3.4.0 <4.0.0' 16 | interval: 1h0m0s 17 | releaseName: secret-generator 18 | timeout: 60m 19 | install: 20 | remediation: 21 | retries: 3 22 | crds: Create 23 | upgrade: 24 | remediation: 25 | retries: 3 26 | crds: CreateReplace 27 | values: 28 | fullnameOverride: secret-generator 29 | tolerations: 30 | - key: node-role.kubernetes.io/master 31 | operator: Exists 32 | - key: node-role.kubernetes.io/control-plane 33 | operator: Exists 34 | containerSecurityContext: 35 | allowPrivilegeEscalation: false 36 | capabilities: 37 | drop: 38 | - ALL 39 | privileged: false 40 | readOnlyRootFilesystem: true 41 | runAsNonRoot: true 42 | podSecurityContext: 43 | fsGroup: 2000 44 | runAsNonRoot: true 45 | seccompProfile: 46 | type: RuntimeDefault -------------------------------------------------------------------------------- /charts/security/default-kyverno-cluster-policies/templates/disable-automountserviceaccounttoken.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.disable_automountserviceaccounttoken }} 2 | 3 | --- 4 | apiVersion: kyverno.io/v1 5 | kind: ClusterPolicy 6 | metadata: 7 | name: disable-automountserviceaccounttoken 8 | annotations: 9 | policies.kyverno.io/title: Disable automountServiceAccountToken 10 | policies.kyverno.io/category: Other 11 | policies.kyverno.io/severity: medium 12 | policies.kyverno.io/subject: ServiceAccount 13 | kyverno.io/kyverno-version: 1.5.1 14 | kyverno.io/kubernetes-version: "1.21" 15 | policies.kyverno.io/description: >- 16 | A new ServiceAccount called `default` is created whenever a new Namespace is created. 17 | Pods spawned in that Namespace, unless otherwise set, will be assigned this ServiceAccount. 18 | This policy mutates any new `default` ServiceAccounts to disable auto-mounting of the token 19 | into Pods obviating the need to do so individually. 20 | spec: 21 | rules: 22 | - name: disable-automountserviceaccounttoken 23 | match: 24 | resources: 25 | kinds: 26 | - ServiceAccount 27 | names: 28 | - default 29 | mutate: 30 | patchStrategicMerge: 31 | automountServiceAccountToken: false 32 | 33 | {{- end }} 34 | -------------------------------------------------------------------------------- /charts/security/default-kyverno-cluster-policies/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: default-kyverno-cluster-policies 3 | description: | 4 | This is a combination of default cluster policies to be installed in any cluster for a standard security baseline. 5 | 6 | # A chart can be either an 'application' or a 'library' chart. 7 | # 8 | # Application charts are a collection of templates that can be packaged into versioned archives 9 | # to be deployed. 10 | # 11 | # Library charts provide useful utilities or functions for the chart developer. They're included as 12 | # a dependency of application charts to inject those utilities and functions into the rendering 13 | # pipeline. Library charts do not define any templates and therefore cannot be deployed. 14 | type: application 15 | 16 | # This is the chart version. This version number should be incremented each time you make changes 17 | # to the chart and its templates, including the app version. 18 | # Versions are expected to follow Semantic Versioning (https://semver.org/) 19 | version: 0.1.2 20 | 21 | # This is the version number of the application being deployed. This version number should be 22 | # incremented each time you make changes to the application. Versions are not expected to 23 | # follow Semantic Versioning. They should reflect the version the application is using. 24 | appVersion: 0.1.0 25 | -------------------------------------------------------------------------------- /charts/security/default-namespace-network-policies/values.yaml: -------------------------------------------------------------------------------- 1 | # Default values for default-namespace-network-policies. 2 | # This is a YAML-formatted file. 3 | # Declare variables to be passed into your templates. 4 | 5 | # -- Supported values for cluster type: "gke" or "self-hosted" 6 | k8s_cluster_type: "self-hosted" 7 | 8 | # -- Namespace where Prometheus instance is running 9 | monitoring_namespace: "observability" 10 | 11 | # -- Allow any traffic between the applications in this namespace 12 | allow_namespace_traffic: false 13 | 14 | # -- Allow incoming traffic from other cluster namespaces 15 | allow_ingress_cluster_traffic: false 16 | 17 | # -- Allow outgoing traffic to other cluster namespaces 18 | allow_egress_cluster_traffic: false 19 | 20 | # -- Allow incoming traffic from internal networks 21 | allow_ingress_private_traffic: false 22 | 23 | # -- Allow outgoing traffic to internal networks 24 | allow_egress_private_traffic: false 25 | 26 | # -- Allow the namespace to be monitored by Prometheus 27 | allow_monitoring: false 28 | 29 | # -- Allow the namespace workloads access kube-apiserver endpoint 30 | allow_apiserver: false 31 | 32 | # -- Allow Internet connectivity 33 | allow_internet: false 34 | 35 | # -- Allow all incoming traffic 36 | allow_ingress_traffic: false 37 | 38 | # -- Allow all outgoing traffic 39 | allow_egress_traffic: false 40 | -------------------------------------------------------------------------------- /infra/observability/alerts/minio.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | name: awesome-prometheus-rules-minio 5 | spec: 6 | groups: 7 | - name: minio.rules 8 | rules: 9 | - alert: MinioDiskOffline 10 | expr: minio_cluster_disk_offline_total > 0 11 | for: 1m 12 | labels: 13 | severity: critical 14 | annotations: 15 | summary: Minio disk offline (instance {{ $labels.instance }}) 16 | description: "Minio disk is offline\n VALUE = {{ $value }}\n" 17 | - alert: MinioDiskSpaceUsage 18 | expr: minio_cluster_capacity_usable_free_bytes / minio_cluster_capacity_usable_total_bytes * 100 < 10 19 | for: 1m 20 | labels: 21 | severity: warning 22 | annotations: 23 | summary: Minio disk space usage (instance {{ $labels.instance }}) 24 | description: 'Minio available free space is low (< 10%), VALUE = {{ $value | printf "%.2f" }}' 25 | - alert: MinioEventsQueuePressure 26 | expr: max by (target_id, target_name) (minio_notify_target_queue_length) > 0 27 | for: 5m 28 | labels: 29 | severity: warning 30 | annotations: 31 | summary: Minio fails to publish events 32 | description: 'Minio {{ $labels.target_name }} event notification for {{ $labels.target_id }} fails to get delivered. Current count of events in queue = {{ $value | printf "%.2f" }}' 33 | -------------------------------------------------------------------------------- /charts/security/default-kyverno-cluster-policies/README.md: -------------------------------------------------------------------------------- 1 | # default-kyverno-cluster-policies 2 | 3 | ![Version: 0.1.2](https://img.shields.io/badge/Version-0.1.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.1.0](https://img.shields.io/badge/AppVersion-0.1.0-informational?style=flat-square) 4 | 5 | This is a combination of default cluster policies to be installed in any cluster for a standard security baseline. 6 | 7 | ## Values 8 | 9 | | Key | Type | Default | Description | 10 | |-----|------|---------|-------------| 11 | | disable_automountserviceaccounttoken | bool | `true` | Do not mount service account token into pods by default | 12 | | disallow_container_sock_mounts | bool | `true` | Do not allow container engine socket mounts into pods | 13 | | disallow_latest_tag | bool | `true` | Do not allow to use :latest tag on container images | 14 | | require_ro_rootfs | bool | `true` | Only allow read-only root FS inside pods | 15 | | restart_deployment_on_secret_change | list | `[]` | Restart deployments on secret change | 16 | | validationFailureAction | string | `"enforce"` | 'enforce' to apply restrictions and 'audit' to store failures in policy reports only | 17 | 18 | ---------------------------------------------- 19 | Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0) 20 | -------------------------------------------------------------------------------- /apps/demoapp-auth/netpol.yaml: -------------------------------------------------------------------------------- 1 | # Allow traffic between api-gateway and all demoapp-auth pods 2 | --- 3 | apiVersion: helm.toolkit.fluxcd.io/v2beta1 4 | kind: HelmRelease 5 | metadata: 6 | name: default-namespace-network-policies 7 | spec: 8 | chart: 9 | spec: 10 | chart: ./charts/security/default-namespace-network-policies 11 | sourceRef: 12 | kind: GitRepository 13 | name: flux-system 14 | namespace: flux-system 15 | interval: 1h0m0s 16 | releaseName: default-namespace-network-policies 17 | timeout: 10m 18 | install: 19 | remediation: 20 | retries: 3 21 | crds: Create 22 | upgrade: 23 | remediation: 24 | retries: 3 25 | crds: CreateReplace 26 | values: 27 | allow_namespace_traffic: true 28 | allow_ingress_cluster_traffic: false 29 | allow_egress_cluster_traffic: false 30 | allow_ingress_private_traffic: false 31 | allow_egress_private_traffic: false 32 | allow_monitoring: true 33 | allow_apiserver: false 34 | allow_internet: false 35 | allow_ingress_traffic: false 36 | allow_egress_traffic: false 37 | 38 | --- 39 | apiVersion: networking.k8s.io/v1 40 | kind: NetworkPolicy 41 | metadata: 42 | name: allow-egress-from-movie-to-external 43 | spec: 44 | egress: 45 | - ports: 46 | - port: 443 47 | protocol: TCP 48 | podSelector: 49 | matchLabels: 50 | app: demoapp-auth-movie 51 | policyTypes: 52 | - Egress 53 | -------------------------------------------------------------------------------- /charts/security/default-namespace-network-policies/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: default-namespace-network-policies 3 | description: | 4 | This is a combination of default network policies within a namespace. 5 | No network communication is allowed by default, except for DNS resolution. 6 | Set all values to 'true' to remove all restrictions. 7 | 8 | # A chart can be either an 'application' or a 'library' chart. 9 | # 10 | # Application charts are a collection of templates that can be packaged into versioned archives 11 | # to be deployed. 12 | # 13 | # Library charts provide useful utilities or functions for the chart developer. They're included as 14 | # a dependency of application charts to inject those utilities and functions into the rendering 15 | # pipeline. Library charts do not define any templates and therefore cannot be deployed. 16 | type: application 17 | 18 | # This is the chart version. This version number should be incremented each time you make changes 19 | # to the chart and its templates, including the app version. 20 | # Versions are expected to follow Semantic Versioning (https://semver.org/) 21 | version: 0.1.0 22 | 23 | # This is the version number of the application being deployed. This version number should be 24 | # incremented each time you make changes to the application. Versions are not expected to 25 | # follow Semantic Versioning. They should reflect the version the application is using. 26 | appVersion: 0.1.0 27 | -------------------------------------------------------------------------------- /infra/operations/descheduler/rbac.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | kind: ClusterRole 3 | apiVersion: rbac.authorization.k8s.io/v1 4 | metadata: 5 | name: descheduler-cluster-role 6 | rules: 7 | - apiGroups: [""] 8 | resources: ["events"] 9 | verbs: ["create", "update"] 10 | - apiGroups: [""] 11 | resources: ["nodes"] 12 | verbs: ["get", "watch", "list"] 13 | - apiGroups: [""] 14 | resources: ["namespaces"] 15 | verbs: ["get", "watch", "list"] 16 | - apiGroups: [""] 17 | resources: ["pods"] 18 | verbs: ["get", "watch", "list", "delete"] 19 | - apiGroups: [""] 20 | resources: ["pods/eviction"] 21 | verbs: ["create"] 22 | - apiGroups: ["scheduling.k8s.io"] 23 | resources: ["priorityclasses"] 24 | verbs: ["get", "watch", "list"] 25 | - apiGroups: ["coordination.k8s.io"] 26 | resources: ["leases"] 27 | verbs: ["create"] 28 | - apiGroups: ["coordination.k8s.io"] 29 | resources: ["leases"] 30 | resourceNames: ["descheduler"] 31 | verbs: ["get", "patch", "delete"] 32 | --- 33 | apiVersion: v1 34 | kind: ServiceAccount 35 | metadata: 36 | name: descheduler-sa 37 | namespace: kube-system 38 | --- 39 | apiVersion: rbac.authorization.k8s.io/v1 40 | kind: ClusterRoleBinding 41 | metadata: 42 | name: descheduler-cluster-role-binding 43 | roleRef: 44 | apiGroup: rbac.authorization.k8s.io 45 | kind: ClusterRole 46 | name: descheduler-cluster-role 47 | subjects: 48 | - name: descheduler-sa 49 | kind: ServiceAccount 50 | namespace: kube-system 51 | -------------------------------------------------------------------------------- /.github/workflows/flux-update.yaml: -------------------------------------------------------------------------------- 1 | name: Flux - update version 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: "30 4 * * 1" 7 | 8 | jobs: 9 | update: 10 | runs-on: ubuntu-latest 11 | permissions: 12 | contents: write 13 | pull-requests: write 14 | steps: 15 | - name: Check out code 16 | uses: actions/checkout@v3 17 | - name: Setup Flux CLI 18 | uses: fluxcd/flux2/action@main 19 | - name: Check for updates 20 | id: update 21 | run: | 22 | for cluster in demo-000 demo-001 23 | do 24 | flux install \ 25 | --toleration-keys='node-role.kubernetes.io/master','node-role.kubernetes.io/control-plane' \ 26 | --export > ./clusters/$cluster/flux-system/gotk-components.yaml 27 | done 28 | 29 | VERSION="$(flux -v)" 30 | echo "flux_version=$VERSION" >> $GITHUB_OUTPUT 31 | - name: Create Pull Request 32 | uses: peter-evans/create-pull-request@v4 33 | with: 34 | token: ${{ secrets.GITHUB_TOKEN }} 35 | branch: update-flux 36 | commit-message: "[github automated] flux: update to ${{ steps.update.outputs.flux_version }}" 37 | title: "[github automated] flux: update to ${{ steps.update.outputs.flux_version }}" 38 | body: | 39 | Flux is going to be updated to version ${{ steps.update.outputs.flux_version }} 40 | -------------------------------------------------------------------------------- /apps/demoapp-data-processor/netpol.yaml: -------------------------------------------------------------------------------- 1 | # Allow traffic between api-gateway and all demoapp-data-processor pods 2 | --- 3 | apiVersion: helm.toolkit.fluxcd.io/v2beta1 4 | kind: HelmRelease 5 | metadata: 6 | name: default-namespace-network-policies 7 | spec: 8 | chart: 9 | spec: 10 | chart: ./charts/security/default-namespace-network-policies 11 | sourceRef: 12 | kind: GitRepository 13 | name: flux-system 14 | namespace: flux-system 15 | interval: 1h0m0s 16 | releaseName: default-namespace-network-policies 17 | timeout: 10m 18 | install: 19 | remediation: 20 | retries: 3 21 | crds: Create 22 | upgrade: 23 | remediation: 24 | retries: 3 25 | crds: CreateReplace 26 | values: 27 | allow_namespace_traffic: true 28 | allow_ingress_cluster_traffic: false 29 | allow_egress_cluster_traffic: false 30 | allow_ingress_private_traffic: false 31 | allow_egress_private_traffic: false 32 | allow_monitoring: true 33 | allow_apiserver: false 34 | allow_internet: false 35 | allow_ingress_traffic: false 36 | allow_egress_traffic: false 37 | 38 | --- 39 | apiVersion: networking.k8s.io/v1 40 | kind: NetworkPolicy 41 | metadata: 42 | name: allow-egress-from-movie-to-external 43 | spec: 44 | egress: 45 | - ports: 46 | - port: 443 47 | protocol: TCP 48 | podSelector: 49 | matchLabels: 50 | app: demoapp-data-processor-solve 51 | policyTypes: 52 | - Egress 53 | -------------------------------------------------------------------------------- /charts/security/default-kyverno-cluster-policies/templates/disallow-latest-tag.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.disallow_latest_tag }} 2 | 3 | --- 4 | apiVersion: kyverno.io/v1 5 | kind: ClusterPolicy 6 | metadata: 7 | name: disallow-latest-tag 8 | annotations: 9 | policies.kyverno.io/title: Disallow Latest Tag 10 | policies.kyverno.io/category: Best Practices 11 | policies.kyverno.io/severity: medium 12 | policies.kyverno.io/subject: Pod 13 | policies.kyverno.io/description: >- 14 | The ':latest' tag is mutable and can lead to unexpected errors if the 15 | image changes. A best practice is to use an immutable tag that maps to 16 | a specific version of an application Pod. This policy validates that the image 17 | specifies a tag and that it is not called `latest`. 18 | spec: 19 | validationFailureAction: {{ .Values.validationFailureAction }} 20 | background: true 21 | rules: 22 | - name: require-image-tag 23 | match: 24 | resources: 25 | kinds: 26 | - Pod 27 | validate: 28 | message: "An image tag is required." 29 | pattern: 30 | spec: 31 | containers: 32 | - image: "*:*" 33 | - name: validate-image-tag 34 | match: 35 | resources: 36 | kinds: 37 | - Pod 38 | validate: 39 | message: "Using a mutable image tag e.g. 'latest' is not allowed." 40 | pattern: 41 | spec: 42 | containers: 43 | - image: "!*:latest" 44 | 45 | {{- end }} 46 | -------------------------------------------------------------------------------- /charts/security/default-kyverno-cluster-policies/templates/require-ro-rootfs.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.require_ro_rootfs }} 2 | 3 | --- 4 | apiVersion: kyverno.io/v1 5 | kind: ClusterPolicy 6 | metadata: 7 | name: require-ro-rootfs 8 | annotations: 9 | policies.kyverno.io/title: Require Read-Only Root Filesystem 10 | policies.kyverno.io/category: Best Practices 11 | policies.kyverno.io/severity: medium 12 | policies.kyverno.io/subject: Pod 13 | policies.kyverno.io/description: >- 14 | A read-only root file system helps to enforce an immutable infrastructure strategy; 15 | the container only needs to write on the mounted volume that persists the state. 16 | An immutable root filesystem can also prevent malicious binaries from writing to the 17 | host system. This policy validates that containers define a securityContext 18 | with `readOnlyRootFilesystem: true`. 19 | spec: 20 | validationFailureAction: {{ .Values.validationFailureAction }} 21 | background: true 22 | rules: 23 | - name: validate-readOnlyRootFilesystem 24 | match: 25 | resources: 26 | kinds: 27 | - Pod 28 | validate: 29 | message: "Root filesystem must be read-only." 30 | pattern: 31 | spec: 32 | containers: 33 | - securityContext: 34 | readOnlyRootFilesystem: true 35 | exclude: 36 | resources: 37 | kinds: 38 | - Pod 39 | namespaces: 40 | - ingress 41 | names: 42 | - ingress-nginx-* 43 | 44 | {{- end }} 45 | -------------------------------------------------------------------------------- /clusters/demo-000/apps/demoapp-auth/stg/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: demoapp-auth-dev 4 | resources: 5 | - namespace.yaml 6 | - ingress.yaml 7 | - ../../../../../apps/demoapp-auth 8 | images: 9 | - name: nginxinc/nginx-unprivileged 10 | newTag: '1.22' 11 | configMapGenerator: 12 | - name: demoapp-auth-movie 13 | behavior: merge 14 | literals: 15 | - BASE_URL="https://stg.movie.demoapp-auth.example.com" 16 | - FOOD_URL="http://demoapp-auth-food" 17 | - SLEEP_URL="http://demoapp-auth-sleep" 18 | - name: demoapp-auth-food 19 | behavior: merge 20 | literals: 21 | - SLEEP_URL="http://demoapp-auth-sleep" 22 | - name: demoapp-auth-sleep 23 | behavior: merge 24 | literals: 25 | - TIMEOUT="480" 26 | patches: 27 | # Add tolerations 28 | - patch: |- 29 | apiVersion: apps/v1 30 | kind: Deployment 31 | metadata: 32 | name: any 33 | spec: 34 | template: 35 | spec: 36 | tolerations: 37 | - key: dedicated 38 | operator: Equal 39 | value: stg 40 | target: 41 | kind: Deployment 42 | labelSelector: group=demoapp-auth 43 | # Add variable indicating environment 44 | - patch: |- 45 | - op: add 46 | path: "/spec/template/spec/containers/0/env/-" 47 | value: 48 | name: ENVIRONMENT 49 | value: stg 50 | target: 51 | kind: Deployment 52 | labelSelector: group=demoapp-auth 53 | -------------------------------------------------------------------------------- /clusters/demo-000/apps/demoapp-auth/prod/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: demoapp-auth-dev 4 | resources: 5 | - namespace.yaml 6 | - ingress.yaml 7 | - ../../../../../apps/demoapp-auth 8 | images: 9 | - name: nginxinc/nginx-unprivileged 10 | newTag: '1.22' 11 | configMapGenerator: 12 | - name: demoapp-auth-movie 13 | behavior: merge 14 | literals: 15 | - BASE_URL="https://prod.movie.demoapp-auth.example.com" 16 | - FOOD_URL="http://demoapp-auth-food" 17 | - SLEEP_URL="http://demoapp-auth-sleep" 18 | - name: demoapp-auth-food 19 | behavior: merge 20 | literals: 21 | - SLEEP_URL="http://demoapp-auth-sleep" 22 | - name: demoapp-auth-sleep 23 | behavior: merge 24 | literals: 25 | - TIMEOUT="480" 26 | patches: 27 | # Add tolerations 28 | - patch: |- 29 | apiVersion: apps/v1 30 | kind: Deployment 31 | metadata: 32 | name: any 33 | spec: 34 | template: 35 | spec: 36 | tolerations: 37 | - key: dedicated 38 | operator: Equal 39 | value: prod 40 | target: 41 | kind: Deployment 42 | labelSelector: group=demoapp-auth 43 | # Add variable indicating environment 44 | - patch: |- 45 | - op: add 46 | path: "/spec/template/spec/containers/0/env/-" 47 | value: 48 | name: ENVIRONMENT 49 | value: prod 50 | target: 51 | kind: Deployment 52 | labelSelector: group=demoapp-auth 53 | -------------------------------------------------------------------------------- /infra/observability/alerts/nginx.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | name: awesome-prometheus-rules-nginx 5 | spec: 6 | groups: 7 | - name: nginx.rules 8 | rules: 9 | - alert: NginxHighHttp4xxErrorRate 10 | expr: sum(rate(nginx_ingress_controller_requests{status=~"^4.."}[1m])) by (host, node) / sum(rate(nginx_ingress_controller_requests[1m])) by (host, node) * 100 > 30 11 | for: 15m 12 | labels: 13 | severity: high 14 | annotations: 15 | summary: Nginx high HTTP 4xx error rate (instance {{ $labels.host }}) 16 | description: 'Too many HTTP requests with status 4xx (> 5%)\n VALUE = {{ $value | printf "%.2f" }}' 17 | - alert: NginxHighHttp5xxErrorRate 18 | expr: sum(rate(nginx_ingress_controller_requests{status=~"^5.."}[1m])) by (host, node) / sum(rate(nginx_ingress_controller_requests[1m])) by (host, node) * 100 > 20 19 | for: 15m 20 | labels: 21 | severity: high 22 | annotations: 23 | summary: Nginx high HTTP 5xx error rate (instance {{ $labels.host }}) 24 | description: 'Too many HTTP requests with status 5xx (> 5%)\n VALUE = {{ $value | printf "%.2f" }}' 25 | - alert: NginxLatencyHigh 26 | expr: histogram_quantile(0.99, sum(rate(nginx_ingress_controller_request_duration_seconds_bucket[2m])) by (host, node)) > 3 27 | for: 2m 28 | labels: 29 | severity: warning 30 | annotations: 31 | summary: Nginx latency high (instance {{ $labels.host }}) 32 | description: 'Nginx p99 latency is higher than 3 seconds\n VALUE = {{ $value | printf "%.2f" }}' 33 | -------------------------------------------------------------------------------- /infra/operations/sealed-secrets/pub-sealed-secrets.pem: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIEzTCCArWgAwIBAgIRAJlQLtaHy4s7Y18o5XLJSukwDQYJKoZIhvcNAQELBQAw 3 | ADAeFw0yMjA0MDQxMTI0MTdaFw0zMjA0MDExMTI0MTdaMAAwggIiMA0GCSqGSIb3 4 | DQEBAQUAA4ICDwAwggIKAoICAQCk6hE33jpc9ikvbrN9rDiqr9bYyy5OmGxa9Ud3 5 | g0HP479Uv72djlitFcUI2WAMg4kAStwHitYxTdThGoDQDD0C6kJyuuIDv/1iXGWr 6 | iLYni1pp1aYCKFXRe6b2F4Tl+k7rgBdIsAwOlGes2HbR6ehpYKQqIMxYk7o4Ge1A 7 | EKnlo6G9dweN47UHPxYVVpuCWEkOKZUeWZ5ygXNDl5hatHdFoSsmAkKw8+hZSAha 8 | 44d/A2B/swh3LfA7Tqma8zUt/4Ec2EoaHkwxxU9SbQNtTMj8nDKCbSN6Y2nI9Hsr 9 | PUIgwOu1QZiyPTBQLVF+Mr+vNee6ClVYzgKcNhna+br9XsSDefY1A75l+J9yAaXJ 10 | dvdZzNk+Bh7iYm+9PynP3/JPEq2UKgyX9KG4KXkdmV0Yd/6TFqMLI/ntL8T1akGG 11 | guqqYFHCn4ZLOZl1Uwa24h5vkG83OQtEM6Ghc6D3LvfT1RhZO/XMIETmIB7KBuu3 12 | DOHdAtV9DSVr/m1dSzfRchdKnM4joGPvKtKaSOH4xlDvEPtbKHFXhWSLOk6TeETZ 13 | Is8HAETFRIylIOYIYS2wzx5TijtdW1EPhhHm43mKZMfy//I4R/QFN3qkllVtmSny 14 | Y9cnGStLNlYizJ7dBHB4GhfbjpRhFiYp/k2TinlfzUrDxOFIzMYU/iKcAhk7R4vM 15 | wL9YewIDAQABo0IwQDAOBgNVHQ8BAf8EBAMCAAEwDwYDVR0TAQH/BAUwAwEB/zAd 16 | BgNVHQ4EFgQUgdX0qx3u+3i8CS117aaU27HhDh4wDQYJKoZIhvcNAQELBQADggIB 17 | ABxBlnF61DLZ1pP0zvqeL7B7hHYTKbxzweyeK5Reu1Y7P+wIVNZ6TAy58noIweEz 18 | Xo+w5LUnmwivN3Xpcur9Ox50BpPbCco2DnTZRO5CsC0tC85jEoraLdmfHtAHBnj4 19 | CH2ilDclI+Fl7xqNX884Bx1twI+GnMgb5Q+hJlvbBAp24JP2ls9XdZtkIROw6LZ8 20 | 7zjpN1s9ZwI9QSBaAccUT+xUo+JkpAeFJpOm7OKSZ7zuo2PNLycCFyWH3Osfslk4 21 | pA7NIQZUyptx2Vb+JFw877NmdntrxdK4gRaJ5KHpjz35JVqV3ckG8IKXPovqegrj 22 | 0IYjgExAhuEnthEM9m+YrqsnnwkD1b/RNRLT3cZWpSnutNITkApam4I5XSppT1hf 23 | H3YoEUK5HrsZulWwroFiaqpAtXu1qP2nTqe99yLa/uBtWoNgOXWh39juf/NM1mjL 24 | R4xLJKUuRCyhOUY+t+9u1o+6fIwd6GaKwMYn6mao9aV8oGZNirJyZiki/EdoRP/A 25 | Qo453A6l+NJszIfognLwiYAL4nwU/MCaYU8Gb02jOs/GJSo9rVevraaZfOFf1Wjx 26 | MyLpTFuJmCpkrBau3P2UzjJ63ndl0NWb8mjDMYu0WrT64WaWAcSIt6qQ5cOphBeD 27 | PuSm36mw5y8CfH3OKGH6F4j1vQ1LBbVNp1gM84UutX6O 28 | -----END CERTIFICATE----- 29 | -------------------------------------------------------------------------------- /infra/observability/blackbox-exporter.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.toolkit.fluxcd.io/v2beta2 3 | kind: HelmRelease 4 | metadata: 5 | name: prometheus-blackbox-exporter 6 | namespace: observability 7 | spec: 8 | chart: 9 | spec: 10 | chart: prometheus-blackbox-exporter 11 | sourceRef: 12 | kind: HelmRepository 13 | name: prometheus-community 14 | namespace: flux-system 15 | version: '>=8.12.0 <9.0.0' 16 | interval: 1h0m0s 17 | timeout: 10m 18 | install: 19 | remediation: 20 | retries: 3 21 | crds: Create 22 | upgrade: 23 | remediation: 24 | retries: 3 25 | crds: CreateReplace 26 | values: 27 | extraArgs: 28 | - --log.level=debug 29 | tolerations: 30 | - key: node-role.kubernetes.io/master # we put observability stack on controlplane nodes to utilize their resources 31 | operator: Exists 32 | - key: node-role.kubernetes.io/control-plane 33 | operator: Exists 34 | pspEnabled: false 35 | config: 36 | modules: 37 | http_2xx: 38 | prober: http 39 | timeout: 5s 40 | http: 41 | valid_http_versions: ["HTTP/1.1", "HTTP/2.0"] 42 | follow_redirects: true 43 | preferred_ip_protocol: "ip4" 44 | http_2xx_insecure: 45 | prober: http 46 | timeout: 5s 47 | http: 48 | valid_http_versions: ["HTTP/1.1", "HTTP/2.0"] 49 | follow_redirects: true 50 | preferred_ip_protocol: "ip4" 51 | tls_config: 52 | insecure_skip_verify: true 53 | dns_external: 54 | prober: dns 55 | timeout: 5s 56 | dns: 57 | transport_protocol: "udp" 58 | preferred_ip_protocol: "ip4" 59 | query_name: "www.google.com" 60 | query_type: "A" 61 | valid_rcodes: 62 | - NOERROR 63 | -------------------------------------------------------------------------------- /clusters/demo-000/apps/demoapp-data-processor/PR/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: demoapp-data-processor-$NAME 4 | resources: 5 | - namespace.yaml 6 | - ../../../../../apps/demoapp-data-processor 7 | images: 8 | - name: nginxinc/nginx-unprivileged 9 | newTag: '1.22' 10 | configMapGenerator: 11 | - name: demoapp-data-processor-call 12 | behavior: merge 13 | literals: 14 | - BASE_URL="https://demoapp-data-processor-$NAME.example.com" 15 | - MEET_URL="http://demoapp-data-processor-meet" 16 | - SOLVE_URL="http://demoapp-data-processor-solve" 17 | - name: demoapp-data-processor-meet 18 | behavior: merge 19 | literals: 20 | - SOLVE_URL="http://demoapp-data-processor-solve" 21 | - name: demoapp-data-processor-solve 22 | behavior: merge 23 | literals: 24 | - MEET_URL="http://demoapp-data-processor-meet" 25 | - TIMEOUT="60" 26 | patches: 27 | # No need for resource restrictions on dev 28 | - patch: |- 29 | - op: replace 30 | path: /spec/template/spec/containers/0/resources 31 | value: {} 32 | target: 33 | kind: Deployment 34 | labelSelector: group=demoapp-data-processor 35 | # Add tolerations 36 | - patch: |- 37 | apiVersion: apps/v1 38 | kind: Deployment 39 | metadata: 40 | name: any 41 | spec: 42 | template: 43 | spec: 44 | tolerations: 45 | - key: dedicated 46 | operator: Equal 47 | value: dev 48 | target: 49 | kind: Deployment 50 | labelSelector: group=demoapp-data-processor 51 | # Add variable indicating environment 52 | - patch: |- 53 | - op: add 54 | path: "/spec/template/spec/containers/0/env/-" 55 | value: 56 | name: ENVIRONMENT 57 | value: dev 58 | target: 59 | kind: Deployment 60 | labelSelector: group=demoapp-data-processor 61 | -------------------------------------------------------------------------------- /clusters/demo-001/infra/ingress/ingress-nginx.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: helm.toolkit.fluxcd.io/v2beta1 2 | kind: HelmRelease 3 | metadata: 4 | name: ingress-nginx 5 | spec: 6 | releaseName: ingress-nginx 7 | chart: 8 | spec: 9 | chart: ingress-nginx 10 | sourceRef: 11 | kind: HelmRepository 12 | name: ingress-nginx 13 | namespace: flux-system 14 | version: ">=4.0.0 <5.0.0" 15 | interval: 1h0m0s 16 | timeout: 10m 17 | install: 18 | remediation: 19 | retries: 3 20 | upgrade: 21 | remediation: 22 | retries: 3 23 | # Default values 24 | # https://github.com/kubernetes/ingress-nginx/blob/main/charts/ingress-nginx/values.yaml 25 | values: 26 | controller: 27 | kind: DaemonSet 28 | image: 29 | chroot: true 30 | hostPort: 31 | enabled: true 32 | service: 33 | type: ClusterIP 34 | ingressClassResource: 35 | name: nginx 36 | default: true 37 | ingressClassByName: true 38 | watchIngressWithoutClass: true 39 | ingressClass: nginx # backwards compatibility 40 | config: 41 | enable-real-ip: "true" # to reveal source ip properly 42 | forwarded-for-header: proxy_protocol # to reveal source ip properly 43 | extraArgs: 44 | enable-ssl-passthrough: true 45 | # Tuning performance 46 | # https://kubernetes.github.io/ingress-nginx/examples/customization/sysctl/ 47 | extraInitContainers: 48 | - name: sysctl 49 | image: alpine:3 50 | securityContext: 51 | privileged: true 52 | command: ["sh", "-c", "sysctl -w net.core.somaxconn=32768; sysctl -w net.ipv4.ip_local_port_range='1024 65000'"] 53 | allowSnippetAnnotations: false # CVE-2021-25742 54 | admissionWebhooks: 55 | enabled: false 56 | metrics: 57 | enabled: true 58 | serviceMonitor: 59 | enabled: true 60 | defaultBackend: 61 | enabled: true -------------------------------------------------------------------------------- /infra/security/netpol.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.toolkit.fluxcd.io/v2beta2 3 | kind: HelmRelease 4 | metadata: 5 | name: default-namespace-network-policies 6 | spec: 7 | chart: 8 | spec: 9 | chart: ./helm/charts/security/default-namespace-network-policies 10 | sourceRef: 11 | kind: GitRepository 12 | name: flux-system 13 | namespace: flux-system 14 | interval: 1h0m0s 15 | releaseName: default-namespace-network-policies 16 | timeout: 10m 17 | install: 18 | remediation: 19 | retries: 3 20 | crds: Create 21 | upgrade: 22 | remediation: 23 | retries: 3 24 | crds: CreateReplace 25 | values: 26 | allow_namespace_traffic: false 27 | allow_ingress_cluster_traffic: true 28 | allow_egress_cluster_traffic: true 29 | allow_ingress_private_traffic: false 30 | allow_egress_private_traffic: false 31 | allow_monitoring: true 32 | allow_apiserver: true 33 | allow_internet: false 34 | allow_ingress_traffic: true 35 | allow_egress_traffic: false 36 | 37 | # Allow Falco to contact external servers (fetching kernel falco modules) 38 | --- 39 | apiVersion: networking.k8s.io/v1 40 | kind: NetworkPolicy 41 | metadata: 42 | name: allow-egress-falco 43 | spec: 44 | egress: 45 | - ports: 46 | - port: 443 47 | protocol: TCP 48 | - port: 45000 # falco k8s meta collector 49 | protocol: TCP 50 | podSelector: 51 | matchLabels: 52 | app.kubernetes.io/name: falco 53 | policyTypes: 54 | - Egress 55 | 56 | # Allow Kyverno to contact external servers (attesting ghcr images) 57 | --- 58 | apiVersion: networking.k8s.io/v1 59 | kind: NetworkPolicy 60 | metadata: 61 | name: allow-egress-kyverno 62 | spec: 63 | egress: 64 | - ports: 65 | - port: 443 66 | protocol: TCP 67 | podSelector: 68 | matchLabels: 69 | app.kubernetes.io/component: admission-controller 70 | app.kubernetes.io/instance: kyverno 71 | policyTypes: 72 | - Egress 73 | -------------------------------------------------------------------------------- /charts/security/default-namespace-network-policies/README.md: -------------------------------------------------------------------------------- 1 | # default-namespace-network-policies 2 | 3 | ![Version: 0.1.0](https://img.shields.io/badge/Version-0.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.1.0](https://img.shields.io/badge/AppVersion-0.1.0-informational?style=flat-square) 4 | 5 | This is a combination of default network policies within a namespace. 6 | No network communication is allowed by default, except for DNS resolution. 7 | Set all values to 'true' to remove all restrictions. 8 | 9 | ## Values 10 | 11 | | Key | Type | Default | Description | 12 | |-----|------|---------|-------------| 13 | | allow_apiserver | bool | `false` | Allow the namespace workloads access kube-apiserver endpoint | 14 | | allow_egress_cluster_traffic | bool | `false` | Allow outgoing traffic to other cluster namespaces | 15 | | allow_egress_private_traffic | bool | `false` | Allow outgoing traffic to internal networks | 16 | | allow_egress_traffic | bool | `false` | Allow all outgoing traffic | 17 | | allow_ingress_cluster_traffic | bool | `false` | Allow incoming traffic from other cluster namespaces | 18 | | allow_ingress_private_traffic | bool | `false` | Allow incoming traffic from internal networks | 19 | | allow_ingress_traffic | bool | `false` | Allow all incoming traffic | 20 | | allow_internet | bool | `false` | Allow Internet connectivity | 21 | | allow_monitoring | bool | `false` | Allow the namespace to be monitored by Prometheus | 22 | | allow_namespace_traffic | bool | `false` | Allow any traffic between the applications in this namespace | 23 | | k8s_cluster_type | string | `"self-hosted"` | Supported values for cluster type: "gke" or "self-hosted" | 24 | | monitoring_namespace | string | `"observability"` | Namespace where Prometheus instance is running | 25 | 26 | ---------------------------------------------- 27 | Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0) 28 | -------------------------------------------------------------------------------- /clusters/demo-000/apps/demoapp-auth/dev/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: demoapp-auth-dev 4 | resources: 5 | - namespace.yaml 6 | - ingress.yaml 7 | - redis.yaml 8 | - ../../../../../apps/demoapp-auth 9 | images: 10 | - name: nginxinc/nginx-unprivileged 11 | newTag: '1.23' 12 | configMapGenerator: 13 | - name: demoapp-auth-movie 14 | behavior: merge 15 | literals: 16 | - BASE_URL="https://dev.movie.demoapp-auth.example.com" 17 | - FOOD_URL="http://demoapp-auth-food" 18 | - SLEEP_URL="http://demoapp-auth-sleep" 19 | - name: demoapp-auth-food 20 | behavior: merge 21 | literals: 22 | - SLEEP_URL="http://demoapp-auth-sleep" 23 | - name: demoapp-auth-sleep 24 | behavior: merge 25 | literals: 26 | - TIMEOUT="480" 27 | patches: 28 | # No need for autoscaling on dev 29 | - patch: |- 30 | $patch: delete 31 | apiVersion: autoscaling/v1 32 | kind: HorizontalPodAutoscaler 33 | metadata: 34 | name: any 35 | target: 36 | kind: HorizontalPodAutoscaler 37 | labelSelector: group=demoapp-auth 38 | # No need for resource restrictions on dev 39 | - patch: |- 40 | - op: replace 41 | path: /spec/template/spec/containers/0/resources 42 | value: {} 43 | target: 44 | kind: Deployment 45 | labelSelector: group=demoapp-auth 46 | # Add tolerations 47 | - patch: |- 48 | apiVersion: apps/v1 49 | kind: Deployment 50 | metadata: 51 | name: any 52 | spec: 53 | template: 54 | spec: 55 | tolerations: 56 | - key: dedicated 57 | operator: Equal 58 | value: dev 59 | target: 60 | kind: Deployment 61 | labelSelector: group=demoapp-auth 62 | # Add variable indicating environment 63 | - patch: |- 64 | - op: add 65 | path: "/spec/template/spec/containers/0/env/-" 66 | value: 67 | name: ENVIRONMENT 68 | value: dev 69 | target: 70 | kind: Deployment 71 | labelSelector: group=demoapp-auth 72 | -------------------------------------------------------------------------------- /infra/operations/etcd-defrag.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: batch/v1 3 | kind: CronJob 4 | metadata: 5 | labels: 6 | app: etcd-defrag 7 | name: etcd-defrag 8 | namespace: kube-system 9 | spec: 10 | failedJobsHistoryLimit: 1 11 | schedule: 30 01,11 * * * 12 | jobTemplate: 13 | metadata: 14 | labels: 15 | app: etcd-defrag 16 | name: etcd-defrag 17 | spec: 18 | backoffLimit: 2 19 | template: 20 | metadata: 21 | labels: 22 | app: etcd-defrag 23 | spec: 24 | containers: 25 | - command: 26 | - /bin/sh 27 | - -c 28 | - | 29 | etcdctl \ 30 | --cacert=/etc/ssl/etcd/ssl/ca.pem \ 31 | --cert=/etc/ssl/etcd/ssl/admin-${NODE_NAME}.pem \ 32 | --key=/etc/ssl/etcd/ssl/admin-${NODE_NAME}-key.pem \ 33 | defrag --cluster 34 | env: 35 | - name: ETCDCTL_API 36 | value: "3" 37 | - name: NODE_NAME 38 | valueFrom: 39 | fieldRef: 40 | fieldPath: spec.nodeName 41 | image: quay.io/coreos/etcd:v3.5.6 42 | name: etcd-defrag 43 | resources: {} 44 | securityContext: 45 | allowPrivilegeEscalation: true 46 | capabilities: 47 | drop: 48 | - ALL 49 | privileged: true 50 | runAsUser: 0 51 | volumeMounts: 52 | - mountPath: /etc/ssl/etcd/ssl 53 | name: etcd-certs 54 | readOnly: true 55 | hostNetwork: true 56 | restartPolicy: OnFailure 57 | securityContext: 58 | fsGroup: 2000 59 | runAsNonRoot: false 60 | seccompProfile: 61 | type: RuntimeDefault 62 | tolerations: 63 | - key: node-role.kubernetes.io/control-plane 64 | operator: Exists 65 | volumes: 66 | - name: etcd-certs 67 | hostPath: 68 | path: /etc/ssl/etcd/ssl 69 | type: DirectoryOrCreate -------------------------------------------------------------------------------- /infra/operations/descheduler/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: descheduler 5 | namespace: kube-system 6 | labels: 7 | app: descheduler 8 | spec: 9 | replicas: 1 10 | selector: 11 | matchLabels: 12 | app: descheduler 13 | template: 14 | metadata: 15 | labels: 16 | app: descheduler 17 | spec: 18 | priorityClassName: system-cluster-critical 19 | serviceAccountName: descheduler-sa 20 | containers: 21 | - name: descheduler 22 | image: registry.k8s.io/descheduler/descheduler:v0.29.0 23 | imagePullPolicy: IfNotPresent 24 | command: 25 | - "/bin/descheduler" 26 | args: 27 | - "--policy-config-file" 28 | - "/policy-dir/policy.yaml" 29 | - "--descheduling-interval" 30 | - "5m" 31 | - "--v" 32 | - "3" 33 | ports: 34 | - containerPort: 10258 35 | protocol: TCP 36 | livenessProbe: 37 | failureThreshold: 3 38 | httpGet: 39 | path: /healthz 40 | port: 10258 41 | scheme: HTTPS 42 | initialDelaySeconds: 3 43 | periodSeconds: 10 44 | resources: 45 | requests: 46 | cpu: 500m 47 | memory: 256Mi 48 | securityContext: 49 | allowPrivilegeEscalation: false 50 | capabilities: 51 | drop: 52 | - ALL 53 | privileged: false 54 | readOnlyRootFilesystem: true 55 | runAsNonRoot: true 56 | volumeMounts: 57 | - mountPath: /policy-dir 58 | name: policy-volume 59 | tolerations: 60 | - key: node-role.kubernetes.io/master # we put observability stack on controlplane nodes to utilize their resources 61 | operator: Exists 62 | - key: node-role.kubernetes.io/control-plane 63 | operator: Exists 64 | volumes: 65 | - name: policy-volume 66 | configMap: 67 | name: descheduler-policy-configmap 68 | -------------------------------------------------------------------------------- /charts/security/default-kyverno-cluster-policies/templates/restart_deployment_on_secret_change.yaml: -------------------------------------------------------------------------------- 1 | {{- if gt (len .Values.restart_deployment_on_secret_change) 0 -}} 2 | --- 3 | apiVersion: kyverno.io/v1 4 | kind: ClusterPolicy 5 | metadata: 6 | name: restart-deployment-on-secret-change 7 | annotations: 8 | policies.kyverno.io/title: Restart Deployment On Secret Change 9 | policies.kyverno.io/category: other 10 | policies.kyverno.io/severity: medium 11 | policies.kyverno.io/subject: Deployment 12 | kyverno.io/kyverno-version: 1.7.0 13 | policies.kyverno.io/minversion: 1.7.0 14 | kyverno.io/kubernetes-version: "1.23" 15 | policies.kyverno.io/description: >- 16 | If Secrets are mounted in ways which do not naturally allow updates to 17 | be live refreshed it may be necessary to modify a Deployment. This policy 18 | watches a Secret and if it changes will write an annotation 19 | to one or more target Deployments thus triggering a new rollout and thereby 20 | refreshing the referred Secret. It may be necessary to grant additional privileges 21 | to the Kyverno ServiceAccount, via one of the existing ClusterRoleBindings or a new 22 | one, so it can modify Deployments. 23 | spec: 24 | mutateExistingOnPolicyUpdate: false 25 | rules: 26 | {{- range $k, $v := .Values.restart_deployment_on_secret_change }} 27 | - name: update-secret-{{ $k }} 28 | match: 29 | any: 30 | - resources: 31 | kinds: 32 | - Secret 33 | names: 34 | - {{ $v.secret.name }} 35 | namespaces: 36 | - {{ $v.secret.namespace }} 37 | preconditions: 38 | all: 39 | - key: {{ printf "\"{{request.operation}}\"" }} 40 | operator: Equals 41 | value: UPDATE 42 | mutate: 43 | targets: 44 | - apiVersion: apps/v1 45 | kind: Deployment 46 | namespace: {{ $v.deployment.namespace }} 47 | patchStrategicMerge: 48 | spec: 49 | template: 50 | metadata: 51 | annotations: 52 | cluster.policy/triggerrestart: {{ printf "\"{{request.object.metadata.resourceVersion}}\"" }} 53 | {{- end }} 54 | {{- end }} -------------------------------------------------------------------------------- /clusters/demo-000/rbac/ci.yaml: -------------------------------------------------------------------------------- 1 | # Dedicated service account for github actions 2 | # (1) Need to modify flux native resources to reconcile states 3 | # (2) Need to watch the rollout status of deployments 4 | # (3) Need to have view cluster role for readonly access to all resources 5 | # (4) Need to list bitnami and monitoring objects additionally for dry-run jobs 6 | --- 7 | apiVersion: v1 8 | kind: ServiceAccount 9 | metadata: 10 | creationTimestamp: null 11 | name: github 12 | namespace: kube-system 13 | 14 | --- 15 | apiVersion: rbac.authorization.k8s.io/v1 16 | kind: ClusterRole 17 | metadata: 18 | name: github 19 | rules: 20 | - apiGroups: 21 | - source.toolkit.fluxcd.io 22 | resources: 23 | - '*' 24 | verbs: ["get", "list", "watch", "update", "patch"] 25 | - apiGroups: 26 | - kustomize.toolkit.fluxcd.io 27 | resources: 28 | - '*' 29 | verbs: ["get", "list", "watch", "update", "patch"] 30 | - apiGroups: 31 | - helm.toolkit.fluxcd.io 32 | resources: 33 | - helmreleases 34 | verbs: ["get", "list", "watch"] 35 | - apiGroups: 36 | - bitnami.com 37 | resources: 38 | - sealedsecrets 39 | verbs: ["get", "list", "watch"] 40 | - apiGroups: 41 | - monitoring.coreos.com 42 | resources: 43 | - '*' 44 | verbs: ["get", "list", "watch"] 45 | - apiGroups: 46 | - cert-manager.io 47 | resources: 48 | - '*' 49 | verbs: ["get", "list", "watch"] 50 | - apiGroups: 51 | - rbac.authorization.k8s.io 52 | resources: 53 | - clusterrolebindings 54 | - clusterroles 55 | verbs: ["get", "list", "watch"] 56 | 57 | # Bind service account to role 58 | --- 59 | apiVersion: rbac.authorization.k8s.io/v1 60 | kind: ClusterRoleBinding 61 | metadata: 62 | name: github 63 | roleRef: 64 | apiGroup: rbac.authorization.k8s.io 65 | kind: ClusterRole 66 | name: github 67 | subjects: 68 | - kind: ServiceAccount 69 | name: github 70 | namespace: kube-system 71 | 72 | # Bind service account to view role 73 | --- 74 | apiVersion: rbac.authorization.k8s.io/v1 75 | kind: ClusterRoleBinding 76 | metadata: 77 | name: github-view 78 | roleRef: 79 | apiGroup: rbac.authorization.k8s.io 80 | kind: ClusterRole 81 | name: view 82 | subjects: 83 | - kind: ServiceAccount 84 | name: github 85 | namespace: kube-system 86 | -------------------------------------------------------------------------------- /clusters/demo-001/rbac/ci.yaml: -------------------------------------------------------------------------------- 1 | # Dedicated service account for github actions 2 | # (1) Need to modify flux native resources to reconcile states 3 | # (2) Need to watch the rollout status of deployments 4 | # (3) Need to have view cluster role for readonly access to all resources 5 | # (4) Need to list bitnami and monitoring objects additionally for dry-run jobs 6 | --- 7 | apiVersion: v1 8 | kind: ServiceAccount 9 | metadata: 10 | creationTimestamp: null 11 | name: github 12 | namespace: kube-system 13 | 14 | --- 15 | apiVersion: rbac.authorization.k8s.io/v1 16 | kind: ClusterRole 17 | metadata: 18 | name: github 19 | rules: 20 | - apiGroups: 21 | - source.toolkit.fluxcd.io 22 | resources: 23 | - '*' 24 | verbs: ["get", "list", "watch", "update", "patch"] 25 | - apiGroups: 26 | - kustomize.toolkit.fluxcd.io 27 | resources: 28 | - '*' 29 | verbs: ["get", "list", "watch", "update", "patch"] 30 | - apiGroups: 31 | - helm.toolkit.fluxcd.io 32 | resources: 33 | - helmreleases 34 | verbs: ["get", "list", "watch"] 35 | - apiGroups: 36 | - bitnami.com 37 | resources: 38 | - sealedsecrets 39 | verbs: ["get", "list", "watch"] 40 | - apiGroups: 41 | - monitoring.coreos.com 42 | resources: 43 | - '*' 44 | verbs: ["get", "list", "watch"] 45 | - apiGroups: 46 | - cert-manager.io 47 | resources: 48 | - '*' 49 | verbs: ["get", "list", "watch"] 50 | - apiGroups: 51 | - rbac.authorization.k8s.io 52 | resources: 53 | - clusterrolebindings 54 | - clusterroles 55 | verbs: ["get", "list", "watch"] 56 | 57 | # Bind service account to role 58 | --- 59 | apiVersion: rbac.authorization.k8s.io/v1 60 | kind: ClusterRoleBinding 61 | metadata: 62 | name: github 63 | roleRef: 64 | apiGroup: rbac.authorization.k8s.io 65 | kind: ClusterRole 66 | name: github 67 | subjects: 68 | - kind: ServiceAccount 69 | name: github 70 | namespace: kube-system 71 | 72 | # Bind service account to view role 73 | --- 74 | apiVersion: rbac.authorization.k8s.io/v1 75 | kind: ClusterRoleBinding 76 | metadata: 77 | name: github-view 78 | roleRef: 79 | apiGroup: rbac.authorization.k8s.io 80 | kind: ClusterRole 81 | name: view 82 | subjects: 83 | - kind: ServiceAccount 84 | name: github 85 | namespace: kube-system 86 | -------------------------------------------------------------------------------- /clusters/demo-000/apps/demoapp-data-processor/dev/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: demoapp-data-processor-dev 4 | resources: 5 | - namespace.yaml 6 | - ../../../../../apps/demoapp-data-processor 7 | images: 8 | - name: nginxinc/nginx-unprivileged 9 | newTag: '1.22' 10 | configMapGenerator: 11 | - name: demoapp-data-processor-call 12 | behavior: merge 13 | literals: 14 | - BASE_URL="https://dev.movie.demoapp-data-processor.example.com" 15 | - MEET_URL="http://demoapp-data-processor-meet" 16 | - SOLVE_URL="http://demoapp-data-processor-solve" 17 | - name: demoapp-data-processor-meet 18 | behavior: merge 19 | literals: 20 | - SOLVE_URL="http://demoapp-data-processor-solve" 21 | - name: demoapp-data-processor-solve 22 | behavior: merge 23 | literals: 24 | - MEET_URL="http://demoapp-data-processor-meet" 25 | - TIMEOUT="60" 26 | patches: 27 | # No need for autoscaling on dev 28 | - patch: |- 29 | $patch: delete 30 | apiVersion: autoscaling/v1 31 | kind: HorizontalPodAutoscaler 32 | metadata: 33 | name: any 34 | target: 35 | kind: HorizontalPodAutoscaler 36 | labelSelector: group=demoapp-data-processor 37 | # No need for resource restrictions on dev 38 | - patch: |- 39 | - op: replace 40 | path: /spec/template/spec/containers/0/resources 41 | value: {} 42 | target: 43 | kind: Deployment 44 | labelSelector: group=demoapp-data-processor 45 | # Add tolerations 46 | - patch: |- 47 | apiVersion: apps/v1 48 | kind: Deployment 49 | metadata: 50 | name: any 51 | spec: 52 | template: 53 | spec: 54 | tolerations: 55 | - key: dedicated 56 | operator: Equal 57 | value: dev 58 | target: 59 | kind: Deployment 60 | labelSelector: group=demoapp-data-processor 61 | # Add variable indicating environment 62 | - patch: |- 63 | - op: add 64 | path: "/spec/template/spec/containers/0/env/-" 65 | value: 66 | name: ENVIRONMENT 67 | value: dev 68 | target: 69 | kind: Deployment 70 | labelSelector: group=demoapp-data-processor 71 | -------------------------------------------------------------------------------- /infra/operations/eventrouter.yaml: -------------------------------------------------------------------------------- 1 | ## This is a small Kubernetes add-on for forwarding cluster events to log aggregators for troubleshooting and audit purposes 2 | ## https://github.com/heptiolabs/eventrouter 3 | --- 4 | apiVersion: v1 5 | kind: ServiceAccount 6 | metadata: 7 | name: eventrouter 8 | namespace: kube-system 9 | --- 10 | apiVersion: rbac.authorization.k8s.io/v1 11 | kind: ClusterRole 12 | metadata: 13 | name: eventrouter 14 | rules: 15 | - apiGroups: [""] 16 | resources: ["events"] 17 | verbs: ["get", "watch", "list"] 18 | --- 19 | apiVersion: rbac.authorization.k8s.io/v1 20 | kind: ClusterRoleBinding 21 | metadata: 22 | name: eventrouter 23 | roleRef: 24 | apiGroup: rbac.authorization.k8s.io 25 | kind: ClusterRole 26 | name: eventrouter 27 | subjects: 28 | - kind: ServiceAccount 29 | name: eventrouter 30 | namespace: kube-system 31 | --- 32 | apiVersion: rbac.authorization.k8s.io/v1 33 | kind: RoleBinding 34 | metadata: 35 | name: evenrouter-psp 36 | namespace: kube-system 37 | roleRef: 38 | apiGroup: rbac.authorization.k8s.io 39 | kind: ClusterRole 40 | name: baseline-psp 41 | subjects: 42 | - kind: ServiceAccount 43 | name: eventrouter 44 | namespace: kube-system 45 | --- 46 | apiVersion: v1 47 | data: 48 | config.json: |- 49 | { 50 | "sink": "glog" 51 | } 52 | kind: ConfigMap 53 | metadata: 54 | name: eventrouter-cm 55 | namespace: kube-system 56 | --- 57 | apiVersion: apps/v1 58 | kind: Deployment 59 | metadata: 60 | name: eventrouter 61 | namespace: kube-system 62 | labels: 63 | app: eventrouter 64 | spec: 65 | replicas: 1 66 | selector: 67 | matchLabels: 68 | app: eventrouter 69 | template: 70 | metadata: 71 | labels: 72 | app: eventrouter 73 | tier: control-plane-addons 74 | spec: 75 | containers: 76 | - name: kube-eventrouter 77 | image: gcr.io/heptio-images/eventrouter:v0.3 78 | imagePullPolicy: IfNotPresent 79 | volumeMounts: 80 | - name: config-volume 81 | mountPath: /etc/eventrouter 82 | serviceAccount: eventrouter 83 | tolerations: 84 | - key: node-role.kubernetes.io/control-plane 85 | operator: Exists 86 | volumes: 87 | - name: config-volume 88 | configMap: 89 | name: eventrouter-cm -------------------------------------------------------------------------------- /.github/workflows/pr-demo-env-delete.yaml: -------------------------------------------------------------------------------- 1 | name: Bring down demo env 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | types: 8 | - closed 9 | workflow_dispatch: 10 | 11 | jobs: 12 | demo-env-delete: 13 | if: startsWith(github.event.label.*.name, 'deploy-') 14 | runs-on: ubuntu-latest 15 | permissions: 16 | contents: write 17 | issues: write 18 | pull-requests: write 19 | steps: 20 | - name: Checkout Flux repo 21 | uses: actions/checkout@master 22 | with: 23 | ref: main 24 | fetch-depth: 0 25 | repository: ${{ github.repository_owner }}/flux2-general 26 | token: ${{ secrets.TOKEN_RW }} 27 | 28 | - name: Get PR labels 29 | id: pr-labels 30 | uses: joerick/pr-labels-action@v1.0.6 31 | 32 | - name: Dump github context 33 | env: 34 | GITHUB_CONTEXT: ${{ toJson(github) }} 35 | run: echo "$GITHUB_CONTEXT" 36 | 37 | - name: Extract feature name 38 | id: feature_name 39 | env: 40 | LABELS: ${{ steps.pr-labels.outputs.labels }} 41 | run: | 42 | echo $LABELS 43 | LABEL1=$(echo $LABELS | grep -o -E "deploy-[a-zA-Z0-9]*" | sed "s|deploy-||") 44 | printenv | grep GITHUB_PR_LABEL_DEPLOY 45 | LABEL2=$(printenv | grep GITHUB_PR_LABEL_DEPLOY | sed "s|DEPLOY_||") 46 | echo "LABEL1 is $LABEL1 and LABEL2 is $LABEL2" 47 | echo "feature_name=$LABEL1" >> $GITHUB_OUTPUT 48 | 49 | - name: Clean up 50 | env: 51 | NAME: ${{ steps.feature_name.outputs.feature_name }} 52 | run: | 53 | TAG="deploy-${NAME}" 54 | git config user.name "GitHub Actions Bot" 55 | git config user.email "<>" 56 | git revert $(git rev-list -n1 ${TAG}) --no-edit 57 | git status 58 | git push origin main 59 | git push --delete origin ${TAG} 60 | 61 | - uses: actions/github-script@v3 62 | env: 63 | NAME: ${{ steps.feature_name.outputs.feature_name }} 64 | with: 65 | github-token: ${{ secrets.GITHUB_TOKEN }} 66 | script: | 67 | github.issues.createComment({ 68 | issue_number: context.issue.number, 69 | owner: context.repo.owner, 70 | repo: context.repo.repo, 71 | body: `${{env.NAME}} namespace has been deleted` 72 | }) -------------------------------------------------------------------------------- /.github/workflows/flux-e2e.yaml: -------------------------------------------------------------------------------- 1 | name: Flux - test cluster bootstrap 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | e2e: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Checkout 11 | uses: actions/checkout@v2 12 | - name: Setup Flux CLI 13 | uses: fluxcd/flux2/action@main 14 | - name: Setup Kubernetes Kind 15 | uses: engineerd/setup-kind@v0.5.0 16 | with: 17 | version: "v0.11.1" 18 | - name: Install Flux in Kubernetes Kind 19 | run: flux install 20 | - name: Setup cluster reconciliation 21 | run: | 22 | flux create source git flux-system \ 23 | --url=${{ github.event.repository.html_url }} \ 24 | --username=${GIT_USERNAME} \ 25 | --password=${GIT_PASSWORD} \ 26 | --branch=${GITHUB_REF#refs/heads/} 27 | 28 | flux create kustomization flux-system \ 29 | --source=flux-system \ 30 | --path=./clusters/demo-000 31 | env: 32 | GIT_USERNAME: github 33 | GIT_PASSWORD: ${{ secrets.TOKEN_RW }} 34 | - name: Verify cluster reconciliation 35 | run: | 36 | flux get ks --no-header | \ 37 | cut -f1 | \ 38 | while read ks 39 | do 40 | kubectl -n flux-system wait kustomization $ks --for=condition=ready --timeout=2m 41 | done 42 | - name: Verify helm reconciliation 43 | run: | 44 | flux get hr --all-namespaces --no-header | \ 45 | cut -f1,2 | \ 46 | while read ns hr 47 | do 48 | kubectl -n $ns wait helmrelease $hr --for=condition=ready --timeout=2m 49 | done 50 | - name: Debug failure 51 | if: always() 52 | run: | 53 | kubectl get pods --all-namespaces 54 | kubectl describe pods --all-namespaces 55 | 56 | flux get ks --no-header | \ 57 | cut -f1 | \ 58 | while read ks 59 | do 60 | kubectl -n flux-system describe kustomization $ks 61 | done 62 | 63 | flux get hr --all-namespaces --no-header | \ 64 | cut -f1,2 | \ 65 | while read ns hr 66 | do 67 | kubectl -n $ns describe helmrelease $hr 68 | done 69 | 70 | flux get all --all-namespaces 71 | flux logs --all-namespaces 72 | - name: Tear down cluster 73 | if: always() 74 | run: kind delete cluster 75 | -------------------------------------------------------------------------------- /clusters/demo-000/infra/ingress/ingress-nginx.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: helm.toolkit.fluxcd.io/v2beta1 2 | kind: HelmRelease 3 | metadata: 4 | name: ingress-nginx 5 | spec: 6 | releaseName: ingress-nginx 7 | chart: 8 | spec: 9 | chart: ingress-nginx 10 | sourceRef: 11 | kind: HelmRepository 12 | name: ingress-nginx 13 | namespace: flux-system 14 | version: ">=4.0.0 <5.0.0" 15 | interval: 1h0m0s 16 | timeout: 10m 17 | install: 18 | remediation: 19 | retries: 3 20 | upgrade: 21 | remediation: 22 | retries: 3 23 | # Default values 24 | # https://github.com/kubernetes/ingress-nginx/blob/main/charts/ingress-nginx/values.yaml 25 | values: 26 | controller: 27 | kind: DaemonSet 28 | hostPort: 29 | enabled: true 30 | service: 31 | type: ClusterIP 32 | ingressClassResource: 33 | name: nginx-dev 34 | default: false 35 | controllerValue: "k8s.io/ingress-nginx-dev" 36 | ingressClassByName: true 37 | ingressClass: nginx-dev # backwards compatibility 38 | config: 39 | enable-real-ip: "true" # to reveal source ip properly 40 | forwarded-for-header: proxy_protocol # to reveal source ip properly 41 | # modsecurity 42 | enable-modsecurity: "false" 43 | enable-owasp-modsecurity-crs: "false" 44 | http-snippet: | 45 | proxy_cache_path /tmp/nginx_cache levels=1:2 keys_zone=static-cache:10m max_size=30g inactive=24h use_temp_path=off; 46 | modsecurity-snippet: | 47 | # Increment this to force nginx to reload the rules when you change the configmap: 1.0.1 48 | Include /etc/nginx/owasp-modsecurity-crs/custom/custom-modsecurity.conf 49 | extraArgs: 50 | enable-ssl-passthrough: true 51 | allowSnippetAnnotations: true 52 | admissionWebhooks: 53 | enabled: false 54 | # this block is for modsecurity 55 | extraVolumeMounts: 56 | - name: modsecurity-config 57 | mountPath: /etc/nginx/owasp-modsecurity-crs/custom/ 58 | extraVolumes: 59 | - name: modsecurity-config 60 | configMap: 61 | name: modsecurity-config 62 | nodeSelector: 63 | dedicated: prod 64 | tolerations: 65 | - key: dedicated 66 | operator: Equal 67 | value: prod 68 | defaultBackend: 69 | enabled: true 70 | tolerations: 71 | - key: dedicated 72 | operator: Equal 73 | value: prod -------------------------------------------------------------------------------- /.github/renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": [ 3 | "config:base", 4 | ":ignoreUnstable", 5 | ":dependencyDashboard" 6 | ], 7 | "platform": "github", 8 | "onboarding": true, 9 | "requireConfig": "optional", 10 | "prHourlyLimit": 10, 11 | "dependencyDashboardTitle": "Renovate Dashboard 🤖", 12 | "suppressNotifications": [ 13 | "prIgnoreNotification" 14 | ], 15 | "rebaseWhen": "conflicted", 16 | "commitBodyTable": true, 17 | "commitMessagePrefix": "[skip ci]", 18 | "ignorePaths": [ 19 | "kubernetes/clusters/**/flux-system" 20 | ], 21 | "flux": { 22 | "fileMatch": [ 23 | "infra/.*", 24 | "clusters/.*/infra/.*" 25 | ] 26 | }, 27 | "helm-values": { 28 | "fileMatch": [ 29 | "infra/.*", 30 | "clusters/.*/infra/.*" 31 | ] 32 | }, 33 | "kubernetes": { 34 | "fileMatch": [ 35 | "infra/.*", 36 | "clusters/.*/infra/.*" 37 | ] 38 | }, 39 | "regexManagers": [ 40 | { 41 | "description": "Process custom dependencies", 42 | "fileMatch": [ 43 | "kubernetes/.*.yaml" 44 | ], 45 | "matchStrings": [ 46 | "datasource=(?\\S+) depName=(?\\S+)( versioning=(?\\S+))?\n.*?\"(?.*)\"\n" 47 | ], 48 | "datasourceTemplate": "{{#if datasource}}{{{datasource}}}{{else}}github-releases{{/if}}", 49 | "versioningTemplate": "{{#if versioning}}{{{versioning}}}{{else}}semver{{/if}}" 50 | } 51 | ], 52 | "packageRules": [ 53 | { 54 | "description": "lockFileMaintenance", 55 | "matchUpdateTypes": [ 56 | "pin", 57 | "digest", 58 | "patch", 59 | "minor", 60 | "major", 61 | "lockFileMaintenance" 62 | ], 63 | "dependencyDashboardApproval": false, 64 | "stabilityDays": 0 65 | }, 66 | { 67 | "groupName": "kubernetes", 68 | "matchPaths": [ 69 | "infra/**", 70 | "clusters/**/infra/**" 71 | ] 72 | }, 73 | { 74 | "groupName": "github-actions", 75 | "matchPaths": [ 76 | "\.github/**" 77 | ], 78 | "separateMajorMinor": false 79 | } 80 | ] 81 | } -------------------------------------------------------------------------------- /charts/security/default-kyverno-cluster-policies/templates/disallow-container-sock-mounts.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.disallow_container_sock_mounts }} 2 | 3 | --- 4 | apiVersion: kyverno.io/v1 5 | kind: ClusterPolicy 6 | metadata: 7 | name: disallow-container-sock-mounts 8 | annotations: 9 | policies.kyverno.io/title: Disallow CRI socket mounts 10 | policies.kyverno.io/category: Best Practices 11 | policies.kyverno.io/severity: medium 12 | policies.kyverno.io/subject: Pod 13 | policies.kyverno.io/description: >- 14 | Container daemon socket bind mounts allows access to the container engine on the 15 | node. This access can be used for privilege escalation and to manage containers 16 | outside of Kubernetes, and hence should not be allowed. This policy validates that 17 | the sockets used for CRI engines Docker, Containerd, and CRI-O are not used. 18 | spec: 19 | validationFailureAction: {{ .Values.validationFailureAction }} 20 | background: true 21 | rules: 22 | - name: validate-docker-sock-mount 23 | match: 24 | all: 25 | - resources: 26 | kinds: 27 | - Pod 28 | exclude: 29 | any: 30 | - resources: 31 | kinds: 32 | - Pod 33 | names: 34 | - falco* 35 | validate: 36 | message: "Use of the Docker Unix socket is not allowed." 37 | pattern: 38 | spec: 39 | =(volumes): 40 | - =(hostPath): 41 | path: "!/var/run/docker.sock" 42 | - name: validate-containerd-sock-mount 43 | match: 44 | all: 45 | - resources: 46 | kinds: 47 | - Pod 48 | exclude: 49 | any: 50 | - resources: 51 | kinds: 52 | - Pod 53 | names: 54 | - falco* 55 | validate: 56 | message: "Use of the Containerd Unix socket is not allowed." 57 | pattern: 58 | spec: 59 | =(volumes): 60 | - =(hostPath): 61 | path: "!/var/run/containerd.sock" 62 | - name: validate-crio-sock-mount 63 | match: 64 | all: 65 | - resources: 66 | kinds: 67 | - Pod 68 | exclude: 69 | any: 70 | - resources: 71 | kinds: 72 | - Pod 73 | names: 74 | - falco* 75 | validate: 76 | message: "Use of the CRI-O Unix socket is not allowed." 77 | pattern: 78 | spec: 79 | =(volumes): 80 | - =(hostPath): 81 | path: "!/var/run/crio.sock" 82 | 83 | {{- end }} 84 | -------------------------------------------------------------------------------- /clusters/demo-001/infra/flux-ks.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 3 | kind: Kustomization 4 | metadata: 5 | name: sources 6 | namespace: flux-system 7 | spec: 8 | interval: 10m0s 9 | sourceRef: 10 | kind: GitRepository 11 | name: flux-system 12 | path: ./clusters/demo-000/infra/sources 13 | prune: true 14 | 15 | --- 16 | apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 17 | kind: Kustomization 18 | metadata: 19 | name: crds 20 | namespace: flux-system 21 | spec: 22 | interval: 10m0s 23 | sourceRef: 24 | kind: GitRepository 25 | name: flux-system 26 | path: ./clusters/demo-000/infra/crds 27 | prune: true 28 | 29 | --- 30 | apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 31 | kind: Kustomization 32 | metadata: 33 | name: operations 34 | namespace: flux-system 35 | spec: 36 | interval: 10m0s 37 | sourceRef: 38 | kind: GitRepository 39 | name: flux-system 40 | path: ./clusters/demo-000/infra/operations 41 | prune: true 42 | 43 | --- 44 | apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 45 | kind: Kustomization 46 | metadata: 47 | name: ingress 48 | namespace: flux-system 49 | spec: 50 | dependsOn: 51 | - name: crds 52 | interval: 10m0s 53 | sourceRef: 54 | kind: GitRepository 55 | name: flux-system 56 | path: ./clusters/demo-000/infra/ingress 57 | prune: true 58 | 59 | --- 60 | apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 61 | kind: Kustomization 62 | metadata: 63 | name: observability 64 | namespace: flux-system 65 | spec: 66 | dependsOn: 67 | - name: crds 68 | interval: 10m0s 69 | sourceRef: 70 | kind: GitRepository 71 | name: flux-system 72 | path: ./clusters/demo-000/infra/observability 73 | prune: true 74 | 75 | --- 76 | apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 77 | kind: Kustomization 78 | metadata: 79 | name: security 80 | namespace: flux-system 81 | spec: 82 | dependsOn: 83 | - name: crds 84 | interval: 10m0s 85 | sourceRef: 86 | kind: GitRepository 87 | name: flux-system 88 | path: ./clusters/demo-000/infra/security 89 | prune: true 90 | 91 | --- 92 | apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 93 | kind: Kustomization 94 | metadata: 95 | name: storage 96 | namespace: flux-system 97 | spec: 98 | dependsOn: 99 | - name: crds 100 | interval: 10m0s 101 | sourceRef: 102 | kind: GitRepository 103 | name: flux-system 104 | path: ./clusters/demo-000/infra/storage 105 | prune: true 106 | -------------------------------------------------------------------------------- /infra/observability/alerts/mongodb.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | name: awesome-prometheus-rules-mongodb 5 | spec: 6 | groups: 7 | - name: mongodb.rules 8 | rules: 9 | - alert: MongodbDown 10 | expr: mongodb_up == 0 11 | for: 0m 12 | labels: 13 | severity: critical 14 | annotations: 15 | summary: MongoDB Down (instance {{ $labels.instance }}) 16 | description: "MongoDB instance is down\n LABELS = {{ $labels }}" 17 | - alert: MongodbReplicaSetUnhealthy 18 | expr: mongodb_rs_ok == 0 19 | for: 0m 20 | labels: 21 | severity: critical 22 | annotations: 23 | summary: MongoDB Down (RS {{ $labels.rs_nm }}) 24 | description: "MongoDB replica set is unhealthy" 25 | - alert: MongodbReplicationLag 26 | expr: mongodb_rs_members_optimeDate{member_state="PRIMARY", rs_state="1"} - ON (set) mongodb_rs_members_optimeDate{member_state="SECONDARY", rs_state="2"} > 60 * 1000 27 | for: 1m 28 | labels: 29 | severity: high 30 | annotations: 31 | summary: MongoDB replication lag (instance {{ $labels.instance }}) 32 | description: "Mongodb replication lag is more than 10s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 33 | - alert: MongodbNumberCursorsOpen 34 | expr: mongodb_ss_metrics_cursor_open{csr_type="total"} > 10 * 1000 35 | for: 2m 36 | labels: 37 | severity: warning 38 | annotations: 39 | summary: MongoDB number cursors open (instance {{ $labels.instance }}) 40 | description: "Too many cursors opened by MongoDB for clients (> 10k)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 41 | - alert: MongodbCursorsTimeouts 42 | expr: increase(mongodb_ss_metrics_cursor_timedOut[1m]) > 100 43 | for: 2m 44 | labels: 45 | severity: warning 46 | annotations: 47 | summary: MongoDB cursors timeouts (instance {{ $labels.instance }}) 48 | description: "Too many cursors are timing out\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 49 | - alert: MongodbTooManyConnections 50 | expr: avg by(instance) (rate(mongodb_ss_connections{conn_type="current"}[1m])) / avg by(instance) (sum (mongodb_ss_connections{conn_type="available"}) by (instance)) * 100 > 80 51 | for: 2m 52 | labels: 53 | severity: warning 54 | annotations: 55 | summary: MongoDB too many connections (instance {{ $labels.instance }}) 56 | description: "Too many connections (> 80%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 57 | -------------------------------------------------------------------------------- /infra/ingress/cert-manager/cert-manager.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.toolkit.fluxcd.io/v2beta2 3 | kind: HelmRelease 4 | metadata: 5 | name: cert-manager 6 | namespace: ingress 7 | spec: 8 | chart: 9 | spec: 10 | chart: cert-manager 11 | sourceRef: 12 | kind: HelmRepository 13 | name: jetstack 14 | namespace: flux-system 15 | version: '>=1.8.0 <2.0.0' 16 | interval: 1h0m0s 17 | releaseName: cert-manager 18 | timeout: 10m 19 | install: 20 | remediation: 21 | retries: 3 22 | crds: Create 23 | upgrade: 24 | remediation: 25 | retries: 3 26 | crds: CreateReplace 27 | values: 28 | installCRDs: true 29 | securityContext: 30 | runAsNonRoot: true 31 | seccompProfile: 32 | type: RuntimeDefault 33 | containerSecurityContext: 34 | allowPrivilegeEscalation: false 35 | capabilities: 36 | drop: 37 | - ALL 38 | tolerations: 39 | - key: node-role.kubernetes.io/master 40 | operator: Exists 41 | - key: node-role.kubernetes.io/control-plane 42 | operator: Exists 43 | webhook: 44 | securityContext: 45 | runAsNonRoot: true 46 | seccompProfile: 47 | type: RuntimeDefault 48 | containerSecurityContext: 49 | allowPrivilegeEscalation: false 50 | capabilities: 51 | drop: 52 | - ALL 53 | tolerations: 54 | - key: node-role.kubernetes.io/master 55 | operator: Exists 56 | - key: node-role.kubernetes.io/control-plane 57 | operator: Exists 58 | cainjector: 59 | securityContext: 60 | runAsNonRoot: true 61 | seccompProfile: 62 | type: RuntimeDefault 63 | containerSecurityContext: 64 | allowPrivilegeEscalation: false 65 | capabilities: 66 | drop: 67 | - ALL 68 | tolerations: 69 | - key: node-role.kubernetes.io/master 70 | operator: Exists 71 | - key: node-role.kubernetes.io/control-plane 72 | operator: Exists 73 | startupapicheck: 74 | securityContext: 75 | runAsNonRoot: true 76 | seccompProfile: 77 | type: RuntimeDefault 78 | containerSecurityContext: 79 | allowPrivilegeEscalation: false 80 | capabilities: 81 | drop: 82 | - ALL 83 | tolerations: 84 | - key: node-role.kubernetes.io/master 85 | operator: Exists 86 | - key: node-role.kubernetes.io/control-plane 87 | operator: Exists 88 | prometheus: 89 | enabled: true 90 | servicemonitor: 91 | enabled: true -------------------------------------------------------------------------------- /clusters/demo-000/infra/flux-ks.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 3 | kind: Kustomization 4 | metadata: 5 | name: sources 6 | namespace: flux-system 7 | spec: 8 | interval: 10m0s 9 | sourceRef: 10 | kind: GitRepository 11 | name: flux-system 12 | path: ./clusters/demo-000/infra/sources 13 | prune: true 14 | 15 | --- 16 | apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 17 | kind: Kustomization 18 | metadata: 19 | name: crds 20 | namespace: flux-system 21 | spec: 22 | interval: 10m0s 23 | sourceRef: 24 | kind: GitRepository 25 | name: flux-system 26 | path: ./clusters/demo-000/infra/crds 27 | prune: true 28 | 29 | --- 30 | apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 31 | kind: Kustomization 32 | metadata: 33 | name: operations 34 | namespace: flux-system 35 | spec: 36 | interval: 10m0s 37 | sourceRef: 38 | kind: GitRepository 39 | name: flux-system 40 | path: ./clusters/demo-000/infra/operations 41 | prune: true 42 | 43 | # --- 44 | # apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 45 | # kind: Kustomization 46 | # metadata: 47 | # name: ingress 48 | # namespace: flux-system 49 | # spec: 50 | # dependsOn: 51 | # - name: crds 52 | # interval: 10m0s 53 | # sourceRef: 54 | # kind: GitRepository 55 | # name: flux-system 56 | # path: ./clusters/demo-000/infra/ingress 57 | # prune: true 58 | 59 | # --- 60 | # apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 61 | # kind: Kustomization 62 | # metadata: 63 | # name: observability 64 | # namespace: flux-system 65 | # spec: 66 | # dependsOn: 67 | # - name: crds 68 | # interval: 10m0s 69 | # sourceRef: 70 | # kind: GitRepository 71 | # name: flux-system 72 | # path: ./clusters/demo-000/infra/observability 73 | # prune: true 74 | 75 | # --- 76 | # apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 77 | # kind: Kustomization 78 | # metadata: 79 | # name: security 80 | # namespace: flux-system 81 | # spec: 82 | # dependsOn: 83 | # - name: crds 84 | # interval: 10m0s 85 | # sourceRef: 86 | # kind: GitRepository 87 | # name: flux-system 88 | # path: ./clusters/demo-000/infra/security 89 | # prune: true 90 | 91 | --- 92 | apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 93 | kind: Kustomization 94 | metadata: 95 | name: storage 96 | namespace: flux-system 97 | spec: 98 | dependsOn: 99 | - name: crds 100 | interval: 10m0s 101 | sourceRef: 102 | kind: GitRepository 103 | name: flux-system 104 | path: ./clusters/demo-000/infra/storage 105 | prune: true 106 | -------------------------------------------------------------------------------- /.github/workflows/flux-dry-run.yaml: -------------------------------------------------------------------------------- 1 | name: Flux - dry-run repository definitions 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | workflow_dispatch: 8 | 9 | jobs: 10 | dry-run: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout 14 | uses: actions/checkout@v2 15 | 16 | - name: Setup kubeconfig demo-000 17 | run: | 18 | echo ${B64_KUBECONFIG} | base64 -d > ${KUBECONFIG} 19 | 20 | if kubectl config view 21 | then 22 | echo 'Successfully connected to the target cluster.' 23 | else 24 | echo 'Failed to connect to the target cluster.' 25 | exit 1 26 | fi 27 | env: 28 | B64_KUBECONFIG: ${{ secrets.K8S_KUBECONFIG_DEMO000_B64 }} 29 | KUBECONFIG: /tmp/demo-000.kubeconfig 30 | 31 | - name: Setup kubeconfig demo-001 32 | run: | 33 | echo ${B64_KUBECONFIG} | base64 -d > ${KUBECONFIG} 34 | 35 | if kubectl config view 36 | then 37 | echo 'Successfully connected to the target cluster.' 38 | else 39 | echo 'Failed to connect to the target cluster.' 40 | exit 1 41 | fi 42 | env: 43 | B64_KUBECONFIG: ${{ secrets.K8S_KUBECONFIG_DEMO001_B64 }} 44 | KUBECONFIG: /tmp/demo-001.kubeconfig 45 | 46 | - uses: dorny/paths-filter@v2 47 | id: changes 48 | with: 49 | filters: | 50 | apps: 51 | - 'kubernetes/clusters/demo-000/apps/**' 52 | - 'kubernetes/clusters/demo-001/apps/**' 53 | - 'kubernetes/apps/**' 54 | infra: 55 | - 'kubernetes/clusters/demo-000/infra/**' 56 | - 'kubernetes/clusters/demo-001/infra/**' 57 | - 'kubernetes/infra/**' 58 | 59 | - name: Dry-run apps 60 | if: steps.changes.outputs.apps == 'true' 61 | run: | 62 | for cluster in demo-000 demo-001 63 | do 64 | for env in dev stg prod 65 | do 66 | for app in demoapp-auth demoapp-data-processor 67 | do 68 | kustomize build kubernetes/clusters/$cluster/apps/$app/$env | kubectl --kubeconfig /tmp/${cluster}.kubeconfig apply --dry-run -f - 69 | done 70 | done 71 | done 72 | 73 | - name: Dry-run infra 74 | if: steps.changes.outputs.infra == 'true' 75 | run: | 76 | for cluster in demo-000 demo-001 77 | do 78 | for group in observability security operations ingress 79 | do 80 | kustomize build kubernetes/clusters/$cluster/infra/$group | kubectl --kubeconfig /tmp/${cluster}.kubeconfig apply --dry-run -f - 81 | done 82 | done 83 | -------------------------------------------------------------------------------- /infra/observability/alerts/loki.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | name: awesome-prometheus-rules-loki 5 | spec: 6 | groups: 7 | - name: loki.rules 8 | rules: 9 | - alert: LokiRequestErrors 10 | expr: 100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[1m])) by (namespace, job, route) / sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route) > 10 11 | for: 15m 12 | labels: 13 | severity: critical 14 | annotations: 15 | summary: Loki request errors (instance {{ $labels.instance }}) 16 | description: "The {{ $labels.job }} and {{ $labels.route }} are experiencing errors\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 17 | - alert: LokiRequestPanic 18 | expr: sum(increase(loki_panic_total[10m])) by (namespace, job) > 0 19 | for: 5m 20 | labels: 21 | severity: critical 22 | annotations: 23 | summary: Loki request panic (instance {{ $labels.instance }}) 24 | description: "The {{ $labels.job }} is experiencing {{ printf \"%.2f\" $value }}% increase of panics\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 25 | - alert: LokiRequestLatency 26 | expr: (histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{route!~"(?i).*tail.*"}[5m])) by (le))) > 1 27 | for: 5m 28 | labels: 29 | severity: critical 30 | annotations: 31 | summary: Loki request latency (instance {{ $labels.instance }}) 32 | description: "The {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf \"%.2f\" $value }}s 99th percentile latency\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 33 | - alert: PromtailRequestErrors 34 | expr: 100 * sum(rate(promtail_request_duration_seconds_count{status_code=~"5..|failed"}[1m])) by (namespace, job, route, instance) / sum(rate(promtail_request_duration_seconds_count[1m])) by (namespace, job, route, instance) > 10 35 | for: 5m 36 | labels: 37 | severity: critical 38 | annotations: 39 | summary: Promtail request errors (instance {{ $labels.instance }}) 40 | description: "The {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf \"%.2f\" $value }}% errors.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 41 | - alert: PromtailRequestLatency 42 | expr: histogram_quantile(0.99, sum(rate(promtail_request_duration_seconds_bucket[5m])) by (le)) > 1 43 | for: 5m 44 | labels: 45 | severity: critical 46 | annotations: 47 | summary: Promtail request latency (instance {{ $labels.instance }}) 48 | description: "The {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf \"%.2f\" $value }}s 99th percentile latency.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 49 | -------------------------------------------------------------------------------- /infra/observability/alerts/cert-manager.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | name: awesome-prometheus-rules-cert-manager 5 | spec: 6 | groups: 7 | - name: "cert-manager" 8 | rules: 9 | - alert: "CertManagerAbsent" 10 | annotations: 11 | description: "New certificates will not be able to be minted, and existing ones can't be renewed until cert-manager is back." 12 | summary: "Cert Manager has dissapeared from Prometheus service discovery." 13 | expr: "absent(up{job=\"cert-manager\"})" 14 | for: "15m" 15 | labels: 16 | severity: "critical" 17 | - name: "certificates" 18 | rules: 19 | - alert: "CertManagerClusterCARenewal" 20 | annotations: 21 | summary: "The cert `{{ $labels.name }}` will be renewed in 2 days." 22 | expr: | 23 | (certmanager_certificate_renewal_timestamp_seconds{name="cluster-ca"} - time()) / 60 / 60 == 2 * 24 24 | labels: 25 | severity: "warning" 26 | - alert: "CertManagerCertExpirySoon" 27 | annotations: 28 | description: "The domain that this cert covers will be unavailable after {{ $value | humanizeDuration }}. Clients using endpoints that this cert protects will start to fail in {{ $value | humanizeDuration }}." 29 | summary: "The cert `{{ $labels.name }}` is {{ $value | humanizeDuration }} from expiry, it should have renewed over a week ago." 30 | expr: | 31 | avg by (exported_namespace, namespace, name) ( 32 | certmanager_certificate_expiration_timestamp_seconds - time() 33 | ) / 60 / 60 < (14 * 24) 34 | for: "1h" 35 | labels: 36 | severity: "warning" 37 | - alert: "CertManagerCertNotReady" 38 | annotations: 39 | description: "This certificate has not been ready to serve traffic for at least 10m. If the cert is being renewed or there is another valid cert, the ingress controller _may_ be able to serve that instead." 40 | summary: "The cert `{{ $labels.name }}` in `{{ $labels.namespace }}` is not ready to serve traffic." 41 | expr: | 42 | max by (name, exported_namespace, namespace, condition) ( 43 | certmanager_certificate_ready_status{condition!="True"} == 1 44 | ) 45 | for: "1h" 46 | labels: 47 | severity: "critical" 48 | - alert: "CertManagerHittingRateLimits" 49 | annotations: 50 | dashboard_url: "https://grafana.pulemusic.com/d/TvuRo2iMk/cert-manager" 51 | description: "Depending on the rate limit, cert-manager may be unable to generate certificates for up to a week." 52 | summary: "Cert manager hitting LetsEncrypt rate limits." 53 | expr: | 54 | sum by (host) ( 55 | rate(certmanager_http_acme_client_request_count{status="429"}[5m]) 56 | ) > 0 57 | for: "1h" 58 | labels: 59 | severity: "critical" 60 | -------------------------------------------------------------------------------- /infra/observability/alerts/rabbitmq.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | name: awesome-prometheus-rules-rabbitmq 5 | spec: 6 | groups: 7 | - name: rabbitmq.rules 8 | rules: 9 | - alert: RabbitMQMemoryHigh 10 | expr: rabbitmq_alarms_memory_used_watermark > 0 11 | for: 15m 12 | labels: 13 | severity: warning 14 | annotations: 15 | summary: RabbitMQ high memory usage in {{ $labels.namespace }} namespace for {{ $labels.service }} instance 16 | description: "Used high memory watermark is exceeded\n LABELS = {{ $labels }}" 17 | - alert: RabbitMQDiskSpaceLow 18 | expr: rabbitmq_alarms_free_disk_space_watermark > 0 19 | for: 15m 20 | labels: 21 | severity: warning 22 | annotations: 23 | summary: RabbitMQ low disk space in {{ $labels.namespace }} namespace for {{ $labels.service }} instance 24 | description: "Low disk space watermark is exceeded\n LABELS = {{ $labels }}" 25 | - alert: RabbitMQDiskSpaceLow 26 | expr: rabbitmq_alarms_file_descriptor_limit > 0 27 | for: 15m 28 | labels: 29 | severity: warning 30 | annotations: 31 | summary: RabbitMQ too many file descriptors {{ $labels.namespace }} namespace for {{ $labels.service }} instance 32 | description: "FD limit watermark is exceeded\n LABELS = {{ $labels }}" 33 | - alert: RabbitMQTooManyUnackMessages 34 | expr: sum(rabbitmq_detailed_queue_messages_unacked) BY (namespace, service, queue) > 10 35 | for: 1m 36 | labels: 37 | severity: warning 38 | annotations: 39 | summary: Too many unacked messages in {{ $labels.namespace }} namespace for {{ $labels.service }} instance and {{ $labels.queue }} queue 40 | description: "Amount of messages:\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 41 | - alert: RabbitMQTooManyConnections 42 | expr: rabbitmq_connections > 1000 43 | for: 15m 44 | labels: 45 | severity: warning 46 | annotations: 47 | summary: RabbitMQ too many connections in {{ $labels.namespace }} namespace for {{ $labels.service }} instance 48 | description: "The total connections of a node is too high\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 49 | - alert: RabbitMQUnroutableMessages 50 | expr: increase(rabbitmq_channel_messages_unroutable_returned_total[1m]) > 0 or increase(rabbitmq_channel_messages_unroutable_dropped_total[1m]) > 0 or increase(rabbitmq_global_messages_unroutable_returned_total[1m]) > 0 or increase(rabbitmq_global_messages_unroutable_dropped_total[1m]) > 0 51 | for: 5m 52 | labels: 53 | severity: warning 54 | annotations: 55 | summary: RabbitMQ unroutable messages in {{ $labels.namespace }} namespace for {{ $labels.service }} instance 56 | description: "A queue has unroutable messages\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" -------------------------------------------------------------------------------- /infra/crds/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | ### source: https://github.com/prometheus-community/helm-charts/tree/kube-prometheus-stack-57.0.1/charts/kube-prometheus-stack/charts/crds 5 | - https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.77.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml 6 | - https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.77.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml 7 | - https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.77.1/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml 8 | - https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.77.1/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml 9 | - https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.77.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml 10 | - https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.77.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml 11 | - https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.77.1/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml 12 | - https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.77.1/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml 13 | ### source: https://cert-manager.io/docs/installation/helm/ 14 | - https://github.com/cert-manager/cert-manager/releases/download/v1.15.3/cert-manager.crds.yaml 15 | ### source: https://github.com/bitnami-labs/sealed-secrets/tree/helm-v2.1.5/helm/sealed-secrets/crds 16 | - https://raw.githubusercontent.com/bitnami-labs/sealed-secrets/refs/tags/v0.27.1/helm/sealed-secrets/crds/bitnami.com_sealedsecrets.yaml 17 | ### source: https://github.com/bank-vaults/vault-operator/tree/v1.21.2/deploy/charts/vault-operator/crds 18 | - https://raw.githubusercontent.com/bank-vaults/vault-operator/v1.22.3/deploy/charts/vault-operator/crds/crd.yaml 19 | 20 | patches: 21 | ### cert-manager provides raw crds with templated annotations and labels that lead to errors, 22 | ### so we replace them with the values that helm release expects 23 | - patch: |- 24 | - op: replace 25 | path: "/metadata/labels" 26 | value: 27 | app.kubernetes.io/instance: "cert-manager" 28 | app.kubernetes.io/managed-by: "Helm" 29 | target: 30 | kind: CustomResourceDefinition 31 | name: .*.cert-manager.io 32 | - patch: |- 33 | - op: replace 34 | path: "/metadata/annotations" 35 | value: 36 | meta.helm.sh/release-name: "cert-manager" 37 | meta.helm.sh/release-namespace: "ingress" 38 | target: 39 | kind: CustomResourceDefinition 40 | name: .*.cert-manager.io 41 | -------------------------------------------------------------------------------- /infra/observability/alerts/vault.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | name: awesome-prometheus-rules-vault 5 | spec: 6 | groups: 7 | - name: vault.rules 8 | rules: 9 | - alert: VaultSealed 10 | expr: vault_core_unsealed == 0 11 | for: 5m 12 | labels: 13 | severity: critical 14 | annotations: 15 | summary: Vault sealed (instance {{ $labels.instance }}) 16 | description: "Vault instance is sealed on {{ $labels.instance }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 17 | - alert: VaultTooManyPendingTokens 18 | expr: avg by (namespace, pod, container) (vault_token_create_count - vault_token_store_count) > 50 19 | for: 5m 20 | labels: 21 | severity: warning 22 | annotations: 23 | summary: Vault too many pending tokens (instance {{ $labels.instance }}) 24 | description: "Too many pending tokens {{ $labels.instance }}: {{ $value | printf \"%.2f\"}}%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 25 | - alert: VaultTooManyInfinityTokens 26 | expr: vault_token_count_by_ttl{creation_ttl="+Inf"} > 50 27 | for: 5m 28 | labels: 29 | severity: warning 30 | annotations: 31 | summary: Vault too many infinity tokens (instance {{ $labels.instance }}) 32 | description: "Too many infinity tokens {{ $labels.instance }}: {{ $value | printf \"%.2f\"}}%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 33 | - alert: VaultClusterHealth 34 | expr: vault_core_active{cluster=~"vault-.*"} < 1 35 | for: 0m 36 | labels: 37 | severity: critical 38 | annotations: 39 | summary: Vault cluster is unstable 40 | description: "Vault cluster is not healthy {{ $labels.instance }}: {{ $value | printf \"%.2f\"}}%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 41 | - alert: VaultAutopilotNodeHealthy 42 | # Set to 1 if Autopilot considers all nodes healthy 43 | # https://www.vaultproject.io/docs/internals/telemetry#integrated-storage-raft-autopilot 44 | expr: vault_autopilot_healthy < 1 45 | for: 1m 46 | labels: 47 | severity: critical 48 | annotations: 49 | summary: Autopilot Vault Raft node unhealthy 50 | description: At least one of the Autopilot Vault Raft nodes is unhealthy 51 | - alert: VaultLeadershipLoss 52 | expr: sum(increase(vault_core_leadership_lost_count[1h])) > 5 53 | for: 1m 54 | labels: 55 | severity: critical 56 | annotations: 57 | summary: High frequency of Vault leadership losses 58 | description: There have been more than 5 Vault leadership losses in the past 1h 59 | - alert: VaultRequestFailures 60 | expr: increase(vault_audit_log_request_failure[5m]) > 0 61 | for: 15m 62 | labels: 63 | severity: critical 64 | annotations: 65 | summary: High frequency of failed Vault requests 66 | description: There has been an increased number of failed Vault requests in the last 15 minutes 67 | - alert: VaultResponseFailures 68 | expr: increase(vault_audit_log_response_failure[5m]) > 0 69 | for: 15m 70 | labels: 71 | severity: critical 72 | annotations: 73 | summary: High frequency of failed Vault responses 74 | description: There has been an increased number of failed Vault responses in the last 15 minutes 75 | -------------------------------------------------------------------------------- /infra/observability/alerts/redis.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | name: awesome-prometheus-rules-redis 5 | spec: 6 | groups: 7 | - name: redis.rules 8 | rules: 9 | - alert: RedisDown 10 | expr: redis_up == 0 11 | for: 0m 12 | labels: 13 | severity: critical 14 | annotations: 15 | summary: Redis down (instance {{ $labels.instance }}) 16 | description: "Redis instance is down\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 17 | - alert: RedisMissingMaster 18 | expr: (count(redis_instance_info{role="master"}) or vector(0)) < 1 19 | for: 0m 20 | labels: 21 | severity: critical 22 | annotations: 23 | summary: Redis missing master (instance {{ $labels.instance }}) 24 | description: "Redis cluster has no node marked as master.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 25 | - alert: RedisTooManyMasters 26 | expr: count(redis_instance_info{role="master"}) > 1 27 | for: 0m 28 | labels: 29 | severity: critical 30 | annotations: 31 | summary: Redis too many masters (instance {{ $labels.instance }}) 32 | description: "Redis cluster has too many nodes marked as master.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 33 | - alert: RedisDisconnectedSlaves 34 | expr: count without (instance, job) (redis_connected_slaves) - sum without (instance, job) (redis_connected_slaves) - 1 > 1 35 | for: 0m 36 | labels: 37 | severity: critical 38 | annotations: 39 | summary: Redis disconnected slaves (instance {{ $labels.instance }}) 40 | description: "Redis not replicating for all slaves. Consider reviewing the redis replication status.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 41 | - alert: RedisReplicationBroken 42 | expr: delta(redis_connected_slaves[1m]) < 0 43 | for: 0m 44 | labels: 45 | severity: critical 46 | annotations: 47 | summary: Redis replication broken (instance {{ $labels.instance }}) 48 | description: "Redis instance lost a slave\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 49 | - alert: RedisClusterFlapping 50 | expr: changes(redis_connected_slaves[1m]) > 1 51 | for: 2m 52 | labels: 53 | severity: critical 54 | annotations: 55 | summary: Redis cluster flapping (instance {{ $labels.instance }}) 56 | description: "Changes have been detected in Redis replica connection. This can occur when replica nodes lose connection to the master and reconnect (a.k.a flapping).\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 57 | - alert: RedisOutOfConfiguredMaxmemory 58 | expr: redis_memory_used_bytes / redis_memory_max_bytes * 100 > 90 59 | for: 2m 60 | labels: 61 | severity: warning 62 | annotations: 63 | summary: Redis out of configured maxmemory (instance {{ $labels.instance }}) 64 | description: "Redis is running out of configured maxmemory (> 90%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 65 | - alert: RedisTooManyConnections 66 | expr: redis_connected_clients > 100 67 | for: 2m 68 | labels: 69 | severity: warning 70 | annotations: 71 | summary: Redis too many connections (instance {{ $labels.instance }}) 72 | description: "Redis instance has too many connections\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 73 | - alert: RedisRejectedConnections 74 | expr: increase(redis_rejected_connections_total[1m]) > 0 75 | for: 0m 76 | labels: 77 | severity: critical 78 | annotations: 79 | summary: Redis rejected connections (instance {{ $labels.instance }}) 80 | description: "Some connections to Redis has been rejected\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 81 | -------------------------------------------------------------------------------- /charts/security/default-kyverno-cluster-policies/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "default-kyverno-cluster-policies.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} 7 | {{- end }} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "default-kyverno-cluster-policies.fullname" -}} 15 | {{- if .Values.fullnameOverride }} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} 17 | {{- else }} 18 | {{- $name := default .Chart.Name .Values.nameOverride }} 19 | {{- if contains $name .Release.Name }} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" }} 21 | {{- else }} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} 23 | {{- end }} 24 | {{- end }} 25 | {{- end }} 26 | 27 | {{/* 28 | Create chart name and version as used by the chart label. 29 | */}} 30 | {{- define "default-kyverno-cluster-policies.chart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} 32 | {{- end }} 33 | 34 | {{/* 35 | Common labels 36 | */}} 37 | {{- define "default-kyverno-cluster-policies.labels" -}} 38 | helm.sh/chart: {{ include "default-kyverno-cluster-policies.chart" . }} 39 | {{ include "default-kyverno-cluster-policies.selectorLabels" . }} 40 | {{- if .Chart.AppVersion }} 41 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 42 | {{- end }} 43 | app.kubernetes.io/managed-by: {{ .Release.Service }} 44 | {{- end }} 45 | 46 | {{/* 47 | Selector labels 48 | */}} 49 | {{- define "default-kyverno-cluster-policies.selectorLabels" -}} 50 | app.kubernetes.io/name: {{ include "default-kyverno-cluster-policies.name" . }} 51 | app.kubernetes.io/instance: {{ .Release.Name }} 52 | {{- end }} 53 | 54 | {{/* 55 | Create the name of the service account to use 56 | */}} 57 | {{- define "default-kyverno-cluster-policies.serviceAccountName" -}} 58 | {{- if .Values.serviceAccount.create }} 59 | {{- default (include "default-kyverno-cluster-policies.fullname" .) .Values.serviceAccount.name }} 60 | {{- else }} 61 | {{- default "default" .Values.serviceAccount.name }} 62 | {{- end }} 63 | {{- end }} 64 | 65 | {{/* 66 | Kubernetes API server address block for network policies 67 | */}} 68 | {{- define "default-kyverno-cluster-policies.APIServerAddress" -}} 69 | {{- $APIServerEndpoints := (index (lookup "v1" "Endpoints" "default" "kubernetes").subsets 0) }} 70 | {{- $APIServerServiceAddress := (print (lookup "v1" "Service" "default" "kubernetes").spec.clusterIP) }} 71 | {{- $APIServerServicePort := (print (index (lookup "v1" "Service" "default" "kubernetes").spec.ports 0).port) }} 72 | - to: 73 | {{- range $k, $v := $APIServerEndpoints.addresses }} 74 | - ipBlock: 75 | cidr: {{ $v.ip }}/32 76 | {{- end }} 77 | ports: 78 | {{- range $k, $v := $APIServerEndpoints.ports }} 79 | {{- if eq $v.name "https" }} 80 | - protocol: TCP 81 | port: {{ $v.port }} 82 | {{- end }} 83 | {{- end }} 84 | - to: 85 | - ipBlock: 86 | cidr: {{ $APIServerServiceAddress }}/32 87 | ports: 88 | - protocol: TCP 89 | port: {{ $APIServerServicePort }} 90 | {{- end }} 91 | 92 | {{/* 93 | Kubernetes DNS service address block for network policies 94 | */}} 95 | {{- define "default-kyverno-cluster-policies.KubeDNSAddress" -}} 96 | {{- if (lookup "v1" "Service" "kube-system" "kube-dns") }} 97 | {{- print (lookup "v1" "Service" "kube-system" "kube-dns").spec.clusterIP "/32" }} 98 | {{- end }} 99 | {{- if (lookup "v1" "Service" "kube-system" "coredns") }} 100 | {{- print (lookup "v1" "Service" "kube-system" "coredns").spec.clusterIP "/32" }} 101 | {{- end }} 102 | {{- end }} 103 | -------------------------------------------------------------------------------- /charts/security/default-namespace-network-policies/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "default-namespace-network-policies.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} 7 | {{- end }} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "default-namespace-network-policies.fullname" -}} 15 | {{- if .Values.fullnameOverride }} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} 17 | {{- else }} 18 | {{- $name := default .Chart.Name .Values.nameOverride }} 19 | {{- if contains $name .Release.Name }} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" }} 21 | {{- else }} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} 23 | {{- end }} 24 | {{- end }} 25 | {{- end }} 26 | 27 | {{/* 28 | Create chart name and version as used by the chart label. 29 | */}} 30 | {{- define "default-namespace-network-policies.chart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} 32 | {{- end }} 33 | 34 | {{/* 35 | Common labels 36 | */}} 37 | {{- define "default-namespace-network-policies.labels" -}} 38 | helm.sh/chart: {{ include "default-namespace-network-policies.chart" . }} 39 | {{ include "default-namespace-network-policies.selectorLabels" . }} 40 | {{- if .Chart.AppVersion }} 41 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 42 | {{- end }} 43 | app.kubernetes.io/managed-by: {{ .Release.Service }} 44 | {{- end }} 45 | 46 | {{/* 47 | Selector labels 48 | */}} 49 | {{- define "default-namespace-network-policies.selectorLabels" -}} 50 | app.kubernetes.io/name: {{ include "default-namespace-network-policies.name" . }} 51 | app.kubernetes.io/instance: {{ .Release.Name }} 52 | {{- end }} 53 | 54 | {{/* 55 | Create the name of the service account to use 56 | */}} 57 | {{- define "default-namespace-network-policies.serviceAccountName" -}} 58 | {{- if .Values.serviceAccount.create }} 59 | {{- default (include "default-namespace-network-policies.fullname" .) .Values.serviceAccount.name }} 60 | {{- else }} 61 | {{- default "default" .Values.serviceAccount.name }} 62 | {{- end }} 63 | {{- end }} 64 | 65 | {{/* 66 | Kubernetes API server address block for network policies 67 | */}} 68 | {{- define "default-namespace-network-policies.APIServerAddress" -}} 69 | {{- $APIServerEndpoints := (index (lookup "v1" "Endpoints" "default" "kubernetes").subsets 0) }} 70 | {{- $APIServerServiceAddress := (print (lookup "v1" "Service" "default" "kubernetes").spec.clusterIP) }} 71 | {{- $APIServerServicePort := (print (index (lookup "v1" "Service" "default" "kubernetes").spec.ports 0).port) }} 72 | - to: 73 | {{- range $k, $v := $APIServerEndpoints.addresses }} 74 | - ipBlock: 75 | cidr: {{ $v.ip }}/32 76 | {{- end }} 77 | ports: 78 | {{- range $k, $v := $APIServerEndpoints.ports }} 79 | {{- if eq $v.name "https" }} 80 | - protocol: TCP 81 | port: {{ $v.port }} 82 | {{- end }} 83 | {{- end }} 84 | - to: 85 | - ipBlock: 86 | cidr: {{ $APIServerServiceAddress }}/32 87 | ports: 88 | - protocol: TCP 89 | port: {{ $APIServerServicePort }} 90 | {{- end }} 91 | 92 | {{/* 93 | Kubernetes DNS service address block for network policies 94 | */}} 95 | {{- define "default-namespace-network-policies.KubeDNSAddress" -}} 96 | {{- if (lookup "v1" "Service" "kube-system" "kube-dns") }} 97 | {{- print (lookup "v1" "Service" "kube-system" "kube-dns").spec.clusterIP "/32" }} 98 | {{- end }} 99 | {{- if (lookup "v1" "Service" "kube-system" "coredns") }} 100 | {{- print (lookup "v1" "Service" "kube-system" "coredns").spec.clusterIP "/32" }} 101 | {{- end }} 102 | {{- end }} 103 | -------------------------------------------------------------------------------- /infra/security/falco.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.toolkit.fluxcd.io/v2beta2 3 | kind: HelmRelease 4 | metadata: 5 | name: falco 6 | spec: 7 | chart: 8 | spec: 9 | chart: falco 10 | sourceRef: 11 | kind: HelmRepository 12 | name: falco 13 | namespace: flux-system 14 | version: '>=4.2.2 <5.0.0' 15 | interval: 1h0m0s 16 | releaseName: falco 17 | timeout: 60m 18 | install: 19 | remediation: 20 | retries: 3 21 | crds: Create 22 | upgrade: 23 | remediation: 24 | retries: 3 25 | crds: CreateReplace 26 | values: 27 | podPriorityClassName: system-cluster-critical 28 | # https://github.com/falcosecurity/charts/tree/falco-2.0.2/falco#daemonset 29 | controller: 30 | kind: daemonset 31 | driver: 32 | enabled: true 33 | kind: modern_ebpf 34 | # https://github.com/falcosecurity/charts/tree/master/falco#enabling-grpc 35 | collectors: 36 | kubernetes: 37 | enabled: true 38 | tolerations: 39 | - key: node-role.kubernetes.io/control-plane 40 | operator: Exists 41 | falco: 42 | grpc: 43 | enabled: true 44 | unixSocketPath: "unix:///var/run/falco/falco.sock" 45 | threadiness: 8 46 | grpc_output: 47 | enabled: true 48 | rules_file: # order matters 49 | - /etc/falco/falco_rules.yaml 50 | - /etc/falco/falco_rules.local.yaml 51 | - /etc/falco/falco-incubating_rules.yaml 52 | - /etc/falco/falco-sandbox_rules.yaml 53 | - /etc/falco/k8s_audit_rules.yaml 54 | - /etc/falco/rules.d 55 | plugins: 56 | - name: k8saudit 57 | library_path: libk8saudit.so 58 | init_config: 59 | maxEventBytes: 1048576 60 | useAsync: false # https://github.com/falcosecurity/falco/issues/2475#issuecomment-1502903061 61 | # sslCertificate: /etc/falco/falco.pem 62 | open_params: "http://:9765/k8s-audit" 63 | - name: json 64 | library_path: libjson.so 65 | init_config: "" 66 | load_plugins: [k8saudit, json] 67 | falcoctl: 68 | artifact: 69 | install: 70 | # -- Enable the init container. 71 | enabled: true 72 | follow: 73 | # -- Enable the sidecar container. 74 | enabled: true 75 | config: 76 | artifact: 77 | install: 78 | refs: 79 | - falco-rules:3 80 | - falco-incubating-rules:3 81 | - falco-sandbox-rules:3 82 | - k8saudit-rules:0.7 83 | follow: 84 | refs: 85 | - falco-rules:3 86 | - falco-incubating-rules:3 87 | - falco-sandbox-rules:3 88 | - k8saudit-rules:0.7 89 | services: 90 | - name: k8saudit-webhook 91 | type: NodePort 92 | ports: 93 | - port: 9765 # See plugin open_params 94 | nodePort: 32765 # This port is used by k8s apiserver audit webhook 95 | protocol: TCP 96 | tolerations: 97 | - operator: Exists # run on all nodes in cluster 98 | 99 | --- 100 | apiVersion: helm.toolkit.fluxcd.io/v2beta2 101 | kind: HelmRelease 102 | metadata: 103 | name: falco-exporter 104 | spec: 105 | chart: 106 | spec: 107 | chart: falco-exporter 108 | sourceRef: 109 | kind: HelmRepository 110 | name: falco 111 | namespace: flux-system 112 | version: '>=0.8.0 <1.0.0' 113 | interval: 1h0m0s 114 | releaseName: falco-exporter 115 | timeout: 60m 116 | install: 117 | remediation: 118 | retries: 3 119 | crds: Create 120 | upgrade: 121 | remediation: 122 | retries: 3 123 | crds: CreateReplace 124 | values: 125 | serviceMonitor: 126 | enabled: true 127 | grafanaDashboard: 128 | enabled: true 129 | namespace: observability 130 | prometheusRules: 131 | enabled: true 132 | tolerations: 133 | - operator: Exists # run on all nodes in cluster 134 | -------------------------------------------------------------------------------- /infra/observability/goldpinger.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: goldpinger 6 | --- 7 | apiVersion: rbac.authorization.k8s.io/v1 8 | kind: ClusterRoleBinding 9 | metadata: 10 | name: goldpinger 11 | roleRef: 12 | apiGroup: rbac.authorization.k8s.io 13 | kind: ClusterRole 14 | name: view 15 | subjects: 16 | - kind: ServiceAccount 17 | name: goldpinger 18 | namespace: observability 19 | --- 20 | apiVersion: apps/v1 21 | kind: DaemonSet 22 | metadata: 23 | name: goldpinger 24 | labels: 25 | app: goldpinger 26 | spec: 27 | updateStrategy: 28 | type: RollingUpdate 29 | selector: 30 | matchLabels: 31 | app: goldpinger 32 | template: 33 | metadata: 34 | annotations: 35 | prometheus.io/scrape: 'true' 36 | prometheus.io/port: '8080' 37 | labels: 38 | app: goldpinger 39 | spec: 40 | serviceAccount: goldpinger 41 | tolerations: 42 | - operator: Exists # run on all nodes in cluster 43 | securityContext: 44 | runAsNonRoot: true 45 | runAsUser: 1000 46 | fsGroup: 2000 47 | seccompProfile: 48 | type: RuntimeDefault 49 | containers: 50 | - name: goldpinger 51 | env: 52 | - name: HOST 53 | value: "0.0.0.0" 54 | - name: PORT 55 | value: "8080" 56 | # injecting real hostname will make for easier to understand graphs/metrics 57 | - name: HOSTNAME 58 | valueFrom: 59 | fieldRef: 60 | fieldPath: spec.nodeName 61 | # podIP is used to select a randomized subset of nodes to ping. 62 | - name: POD_IP 63 | valueFrom: 64 | fieldRef: 65 | fieldPath: status.podIP 66 | - name: HOSTS_TO_RESOLVE 67 | value: "www.google.com play.pulsemusic.com" 68 | - name: TCP_TARGETS 69 | value: 10.30.7.110:27017 10.30.7.111:27017 70 | image: "docker.io/bloomberg/goldpinger:3.9.0" 71 | imagePullPolicy: Always 72 | securityContext: 73 | allowPrivilegeEscalation: false 74 | readOnlyRootFilesystem: true 75 | capabilities: 76 | drop: 77 | - ALL 78 | resources: 79 | limits: 80 | memory: 80Mi 81 | requests: 82 | cpu: 1m 83 | memory: 40Mi 84 | ports: 85 | - containerPort: 8080 86 | name: http 87 | readinessProbe: 88 | httpGet: 89 | path: /healthz 90 | port: 8080 91 | initialDelaySeconds: 20 92 | periodSeconds: 5 93 | livenessProbe: 94 | httpGet: 95 | path: /healthz 96 | port: 8080 97 | initialDelaySeconds: 20 98 | periodSeconds: 5 99 | --- 100 | apiVersion: v1 101 | kind: Service 102 | metadata: 103 | name: goldpinger 104 | labels: 105 | app: goldpinger 106 | spec: 107 | type: ClusterIP 108 | ports: 109 | - port: 8080 110 | name: http 111 | selector: 112 | app: goldpinger 113 | 114 | --- 115 | apiVersion: monitoring.coreos.com/v1 116 | kind: ServiceMonitor 117 | metadata: 118 | name: goldpinger 119 | spec: 120 | endpoints: 121 | - path: /metrics 122 | port: http 123 | selector: 124 | matchLabels: 125 | app: goldpinger 126 | 127 | --- 128 | apiVersion: monitoring.coreos.com/v1 129 | kind: PrometheusRule 130 | metadata: 131 | name: goldpinger 132 | spec: 133 | groups: 134 | - name: goldpinger 135 | rules: 136 | - alert: goldpinger_nodes_unhealthy 137 | expr: sum(goldpinger_nodes_health_total{status="unhealthy"}) 138 | BY (instance, goldpinger_instance) > 0 139 | for: 5m 140 | annotations: 141 | description: | 142 | Goldpinger instance {{ $labels.goldpinger_instance }} has been reporting unhealthy nodes for at least 5 minutes. 143 | summary: Instance {{ $labels.instance }} down -------------------------------------------------------------------------------- /infra/security/kyverno.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.toolkit.fluxcd.io/v2beta2 3 | kind: HelmRelease 4 | metadata: 5 | name: kyverno 6 | spec: 7 | chart: 8 | spec: 9 | chart: kyverno 10 | sourceRef: 11 | kind: HelmRepository 12 | name: kyverno 13 | namespace: flux-system 14 | version: "3.1.4" 15 | interval: 1h0m0s 16 | releaseName: kyverno 17 | timeout: 60m 18 | install: 19 | remediation: 20 | retries: 3 21 | crds: Create 22 | upgrade: 23 | remediation: 24 | retries: 3 25 | crds: CreateReplace 26 | values: 27 | config: 28 | excludeKyvernoNamespace: false 29 | features: 30 | policyExceptions: 31 | enabled: true 32 | admissionController: 33 | replicas: 3 34 | networkPolicy: 35 | enabled: true 36 | serviceMonitor: 37 | enabled: true 38 | resources: 39 | limits: 40 | memory: 512Mi 41 | requests: 42 | memory: 512Mi 43 | priorityClassName: system-cluster-critical 44 | tolerations: 45 | - key: node-role.kubernetes.io/control-plane 46 | operator: Exists 47 | backgroundController: 48 | resources: 49 | limits: 50 | memory: 512Mi 51 | requests: 52 | memory: 512Mi 53 | serviceMonitor: 54 | enabled: true 55 | tolerations: 56 | - key: node-role.kubernetes.io/control-plane 57 | operator: Exists 58 | cleanupController: 59 | enabled: false 60 | policyReportsCleanup: 61 | enabled: false 62 | reportsController: 63 | resources: 64 | limits: 65 | memory: 512Mi 66 | requests: 67 | memory: 512Mi 68 | serviceMonitor: 69 | enabled: true 70 | tolerations: 71 | - key: node-role.kubernetes.io/control-plane 72 | operator: Exists 73 | webhooksCleanup: 74 | tolerations: 75 | - key: node-role.kubernetes.io/control-plane 76 | operator: Exists 77 | cleanupJobs: 78 | admissionReports: 79 | tolerations: 80 | - key: node-role.kubernetes.io/control-plane 81 | operator: Exists 82 | clusterAdmissionReports: 83 | tolerations: 84 | - key: node-role.kubernetes.io/control-plane 85 | operator: Exists 86 | 87 | # The chart below adds policies to confirm to Pod Security Standards 88 | # https://kubernetes.io/docs/concepts/security/pod-security-standards/ 89 | # with the default level = "baseline" 90 | --- 91 | apiVersion: helm.toolkit.fluxcd.io/v2beta2 92 | kind: HelmRelease 93 | metadata: 94 | name: kyverno-policies 95 | spec: 96 | chart: 97 | spec: 98 | chart: kyverno-policies 99 | sourceRef: 100 | kind: HelmRepository 101 | name: kyverno 102 | namespace: flux-system 103 | version: "3.1.4" 104 | interval: 1h0m0s 105 | releaseName: kyverno-policies 106 | timeout: 60m 107 | install: 108 | remediation: 109 | retries: 3 110 | crds: Create 111 | upgrade: 112 | remediation: 113 | retries: 3 114 | crds: CreateReplace 115 | values: 116 | podSecurityStandard: restricted # set 'restricted' for full hardening 117 | podSecuritySeverity: high 118 | validationFailureAction: enforce # set 'enforce' for full hardening 119 | 120 | --- 121 | apiVersion: helm.toolkit.fluxcd.io/v2beta2 122 | kind: HelmRelease 123 | metadata: 124 | name: default-kyverno-cluster-policies 125 | spec: 126 | chart: 127 | spec: 128 | chart: ./helm/charts/security/default-kyverno-cluster-policies 129 | sourceRef: 130 | kind: GitRepository 131 | name: flux-system 132 | namespace: flux-system 133 | interval: 1h0m0s 134 | releaseName: default-kyverno-cluster-policies 135 | dependsOn: 136 | - name: kyverno 137 | timeout: 10m 138 | install: 139 | remediation: 140 | retries: 3 141 | crds: Create 142 | upgrade: 143 | remediation: 144 | retries: 3 145 | crds: CreateReplace 146 | values: 147 | require_ro_rootfs: false # TODO: need to complete the work to make this 'true' 148 | -------------------------------------------------------------------------------- /infra/operations/netpol-default.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.toolkit.fluxcd.io/v2beta2 3 | kind: HelmRelease 4 | metadata: 5 | name: default-namespace-network-policies 6 | namespace: kube-system 7 | spec: 8 | chart: 9 | spec: 10 | chart: ./helm/charts/security/default-namespace-network-policies 11 | sourceRef: 12 | kind: GitRepository 13 | name: flux-system 14 | namespace: flux-system 15 | interval: 1h0m0s 16 | releaseName: default-namespace-network-policies 17 | timeout: 10m 18 | install: 19 | remediation: 20 | retries: 3 21 | crds: Create 22 | upgrade: 23 | remediation: 24 | retries: 3 25 | crds: CreateReplace 26 | values: 27 | allow_namespace_traffic: true 28 | allow_ingress_cluster_traffic: true 29 | allow_egress_cluster_traffic: true 30 | allow_ingress_private_traffic: false 31 | allow_egress_private_traffic: false 32 | allow_monitoring: true 33 | allow_apiserver: true 34 | allow_internet: false 35 | allow_ingress_traffic: true 36 | allow_egress_traffic: true 37 | 38 | --- 39 | apiVersion: helm.toolkit.fluxcd.io/v2beta2 40 | kind: HelmRelease 41 | metadata: 42 | name: default-namespace-network-policies 43 | namespace: kube-node-lease 44 | spec: 45 | chart: 46 | spec: 47 | chart: ./helm/charts/security/default-namespace-network-policies 48 | sourceRef: 49 | kind: GitRepository 50 | name: flux-system 51 | namespace: flux-system 52 | interval: 1h0m0s 53 | releaseName: default-namespace-network-policies 54 | timeout: 10m 55 | install: 56 | remediation: 57 | retries: 3 58 | crds: Create 59 | upgrade: 60 | remediation: 61 | retries: 3 62 | crds: CreateReplace 63 | values: 64 | allow_namespace_traffic: false 65 | allow_ingress_cluster_traffic: false 66 | allow_egress_cluster_traffic: false 67 | allow_ingress_private_traffic: false 68 | allow_egress_private_traffic: false 69 | allow_monitoring: false 70 | allow_apiserver: false 71 | allow_internet: false 72 | allow_ingress_traffic: false 73 | allow_egress_traffic: false 74 | 75 | --- 76 | apiVersion: helm.toolkit.fluxcd.io/v2beta2 77 | kind: HelmRelease 78 | metadata: 79 | name: default-namespace-network-policies 80 | namespace: kube-public 81 | spec: 82 | chart: 83 | spec: 84 | chart: ./helm/charts/security/default-namespace-network-policies 85 | sourceRef: 86 | kind: GitRepository 87 | name: flux-system 88 | namespace: flux-system 89 | interval: 1h0m0s 90 | releaseName: default-namespace-network-policies 91 | timeout: 10m 92 | install: 93 | remediation: 94 | retries: 3 95 | crds: Create 96 | upgrade: 97 | remediation: 98 | retries: 3 99 | crds: CreateReplace 100 | values: 101 | allow_namespace_traffic: false 102 | allow_ingress_cluster_traffic: false 103 | allow_egress_cluster_traffic: false 104 | allow_ingress_private_traffic: false 105 | allow_egress_private_traffic: false 106 | allow_monitoring: false 107 | allow_apiserver: false 108 | allow_internet: false 109 | allow_ingress_traffic: false 110 | allow_egress_traffic: false 111 | 112 | 113 | --- 114 | apiVersion: helm.toolkit.fluxcd.io/v2beta2 115 | kind: HelmRelease 116 | metadata: 117 | name: default-namespace-network-policies 118 | namespace: default 119 | spec: 120 | chart: 121 | spec: 122 | chart: ./helm/charts/security/default-namespace-network-policies 123 | sourceRef: 124 | kind: GitRepository 125 | name: flux-system 126 | namespace: flux-system 127 | interval: 1h0m0s 128 | releaseName: default-namespace-network-policies 129 | timeout: 10m 130 | install: 131 | remediation: 132 | retries: 3 133 | crds: Create 134 | upgrade: 135 | remediation: 136 | retries: 3 137 | crds: CreateReplace 138 | values: 139 | allow_namespace_traffic: false 140 | allow_ingress_cluster_traffic: false 141 | allow_egress_cluster_traffic: false 142 | allow_ingress_private_traffic: false 143 | allow_egress_private_traffic: false 144 | allow_monitoring: false 145 | allow_apiserver: false 146 | allow_internet: false 147 | allow_ingress_traffic: false 148 | allow_egress_traffic: false 149 | -------------------------------------------------------------------------------- /infra/storage/local-path-storage/local-path-storage.yaml: -------------------------------------------------------------------------------- 1 | # src: https://raw.githubusercontent.com/rancher/local-path-provisioner/master/deploy/local-path-storage.yaml 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: local-path-storage 6 | 7 | --- 8 | apiVersion: v1 9 | kind: ServiceAccount 10 | metadata: 11 | name: local-path-provisioner-service-account 12 | namespace: local-path-storage 13 | 14 | --- 15 | apiVersion: rbac.authorization.k8s.io/v1 16 | kind: ClusterRole 17 | metadata: 18 | name: local-path-provisioner-role 19 | rules: 20 | - apiGroups: [ "" ] 21 | resources: [ "nodes", "persistentvolumeclaims", "configmaps" ] 22 | verbs: [ "get", "list", "watch" ] 23 | - apiGroups: [ "" ] 24 | resources: [ "endpoints", "persistentvolumes", "pods" ] 25 | verbs: [ "*" ] 26 | - apiGroups: [ "" ] 27 | resources: [ "events" ] 28 | verbs: [ "create", "patch" ] 29 | - apiGroups: [ "storage.k8s.io" ] 30 | resources: [ "storageclasses" ] 31 | verbs: [ "get", "list", "watch" ] 32 | 33 | --- 34 | apiVersion: rbac.authorization.k8s.io/v1 35 | kind: ClusterRoleBinding 36 | metadata: 37 | name: local-path-provisioner-bind 38 | roleRef: 39 | apiGroup: rbac.authorization.k8s.io 40 | kind: ClusterRole 41 | name: local-path-provisioner-role 42 | subjects: 43 | - kind: ServiceAccount 44 | name: local-path-provisioner-service-account 45 | namespace: local-path-storage 46 | 47 | --- 48 | apiVersion: apps/v1 49 | kind: Deployment 50 | metadata: 51 | name: local-path-provisioner 52 | namespace: local-path-storage 53 | spec: 54 | replicas: 1 55 | selector: 56 | matchLabels: 57 | app: local-path-provisioner 58 | template: 59 | metadata: 60 | labels: 61 | app: local-path-provisioner 62 | spec: 63 | serviceAccountName: local-path-provisioner-service-account 64 | containers: 65 | - name: local-path-provisioner 66 | image: rancher/local-path-provisioner:v0.0.26 67 | imagePullPolicy: IfNotPresent 68 | command: 69 | - local-path-provisioner 70 | - --debug 71 | - start 72 | - --config 73 | - /etc/config/config.json 74 | volumeMounts: 75 | - name: config-volume 76 | mountPath: /etc/config/ 77 | env: 78 | - name: POD_NAMESPACE 79 | valueFrom: 80 | fieldRef: 81 | fieldPath: metadata.namespace 82 | volumes: 83 | - name: config-volume 84 | configMap: 85 | name: local-path-config 86 | 87 | --- 88 | apiVersion: storage.k8s.io/v1 89 | kind: StorageClass 90 | metadata: 91 | name: local-path 92 | provisioner: rancher.io/local-path 93 | volumeBindingMode: WaitForFirstConsumer 94 | reclaimPolicy: Delete 95 | 96 | 97 | --- 98 | kind: ConfigMap 99 | apiVersion: v1 100 | metadata: 101 | name: local-path-config 102 | namespace: local-path-storage 103 | data: 104 | config.json: |- 105 | { 106 | "nodePathMap":[ 107 | { 108 | "node":"DEFAULT_PATH_FOR_NON_LISTED_NODES", 109 | "paths":["/opt/local-path-provisioner"] 110 | } 111 | ] 112 | } 113 | setup: |- 114 | #!/bin/sh 115 | while getopts "m:s:p:" opt 116 | do 117 | case $opt in 118 | p) 119 | absolutePath=$OPTARG 120 | ;; 121 | s) 122 | sizeInBytes=$OPTARG 123 | ;; 124 | m) 125 | volMode=$OPTARG 126 | ;; 127 | esac 128 | done 129 | 130 | mkdir -m 0777 -p ${absolutePath} 131 | teardown: |- 132 | #!/bin/sh 133 | while getopts "m:s:p:" opt 134 | do 135 | case $opt in 136 | p) 137 | absolutePath=$OPTARG 138 | ;; 139 | s) 140 | sizeInBytes=$OPTARG 141 | ;; 142 | m) 143 | volMode=$OPTARG 144 | ;; 145 | esac 146 | done 147 | 148 | rm -rf ${absolutePath} 149 | helperPod.yaml: |- 150 | apiVersion: v1 151 | kind: Pod 152 | metadata: 153 | name: helper-pod 154 | spec: 155 | containers: 156 | - name: helper-pod 157 | image: busybox:1.35 158 | imagePullPolicy: IfNotPresent 159 | -------------------------------------------------------------------------------- /.github/workflows/pr-demo-env.yaml: -------------------------------------------------------------------------------- 1 | name: Bring up demo env 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | types: 8 | - opened 9 | - edited 10 | - synchronize 11 | issue_comment: 12 | types: 13 | - created 14 | workflow_dispatch: 15 | 16 | jobs: 17 | check-pr: 18 | runs-on: ubuntu-latest 19 | outputs: 20 | check: ${{ steps.check.outputs.triggered }} 21 | feature_name: ${{ steps.feature_name.outputs.feature_name }} 22 | steps: 23 | - uses: khan/pull-request-comment-trigger@v1.1.0 24 | id: check 25 | with: 26 | trigger: '@deploy' 27 | reaction: rocket 28 | env: 29 | GITHUB_TOKEN: '${{ secrets.GITHUB_TOKEN }}' 30 | 31 | - name: Dump github context 32 | env: 33 | GITHUB_CONTEXT: ${{ toJson(github) }} 34 | run: echo "$GITHUB_CONTEXT" 35 | 36 | - name: Dump comment body 37 | id: feature_name 38 | if: steps.check.outputs.triggered == 'true' 39 | env: 40 | BODY: ${{ steps.check.outputs.comment_body }} 41 | run: | 42 | NAME=$(echo $BODY | sed "s|@deploy ||" | sed "s| |_|g" | cut -c 1-16) 43 | echo "feature_name=$NAME" > $GITHUB_OUTPUT 44 | 45 | demo-env: 46 | needs: check-pr 47 | if: needs.check-pr.outputs.check == 'true' 48 | runs-on: ubuntu-latest 49 | permissions: 50 | contents: write 51 | issues: write 52 | pull-requests: write 53 | steps: 54 | - name: Checkout Flux repo 55 | uses: actions/checkout@master 56 | with: 57 | ref: main 58 | repository: ${{ github.repository_owner }}/flux2-general 59 | token: ${{ secrets.TOKEN_RW }} 60 | 61 | - name: Dump feature name 62 | env: 63 | NAME: ${{ needs.check-pr.outputs.feature_name }} 64 | run: echo $NAME 65 | 66 | - name: Create PR manifests 67 | env: 68 | NAME: ${{ needs.check-pr.outputs.feature_name }} 69 | run: | 70 | export KS_PATH="apps/demoapp-work/flux-ks-$NAME.yaml" 71 | 72 | yq -i '.resources += [ env(KS_PATH) ]' clusters/demo-000/kustomization.yaml 73 | 74 | echo '--- 75 | apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 76 | kind: Kustomization 77 | metadata: 78 | name: demoapp-work-$NAME 79 | namespace: flux-system 80 | spec: 81 | interval: 60m0s 82 | sourceRef: 83 | kind: GitRepository 84 | name: flux-system 85 | path: ./clusters/demo-000/apps/demoapp-work/$NAME 86 | prune: true 87 | ' | envsubst > ./clusters/demo-000/${KS_PATH} 88 | 89 | cp -rf ./clusters/demo-000/apps/demoapp-work/PR ./clusters/demo-000/apps/demoapp-work/$NAME 90 | find ./clusters/demo-000/apps/demoapp-work/$NAME -type f -exec /bin/sh -c 'envsubst < $1 > $1.tmp && mv $1.tmp $1' -- {} \; 91 | 92 | - name: Update application version 93 | id: commit_version 94 | env: 95 | NAME: ${{ needs.check-pr.outputs.feature_name }} 96 | run: | 97 | # Make a git commit 98 | git config user.name "GitHub Actions Bot" 99 | git config user.email "<>" 100 | git status 101 | git add . 102 | git commit -m "Add $NAME namespace for demoapp-work at demo-000 cluster" 103 | git status 104 | git push origin main 105 | git tag "deploy-$NAME" 106 | git push origin main --tags 107 | 108 | - uses: actions-ecosystem/action-add-labels@v1 109 | with: 110 | labels: ${{ format('{0}-{1}', 'deploy', needs.check-pr.outputs.feature_name) }} 111 | 112 | - uses: actions/github-script@v3 113 | env: 114 | NAME: ${{ needs.check-pr.outputs.feature_name }} 115 | with: 116 | github-token: ${{ secrets.GITHUB_TOKEN }} 117 | script: | 118 | github.issues.createComment({ 119 | issue_number: context.issue.number, 120 | owner: context.repo.owner, 121 | repo: context.repo.repo, 122 | body: `${{env.NAME}} namespace has been deployed, access the app at https://demoapp-work-${{env.NAME}}.example.com and merge/close the PR to remove it` 123 | }) 124 | -------------------------------------------------------------------------------- /clusters/demo-000/infra/observability/kube-prometheus-stack.patch.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.toolkit.fluxcd.io/v2beta1 3 | kind: HelmRelease 4 | metadata: 5 | name: kube-prometheus-stack 6 | spec: 7 | values: 8 | alertmanager: 9 | alertmanagerSpec: 10 | storage: 11 | volumeClaimTemplate: 12 | spec: 13 | storageClassName: local-path 14 | resources: 15 | requests: 16 | storage: 10Gi # with local-path the size is not actually treated, but we put it here to show how we estimate data usage 17 | tolerations: 18 | - key: node-role.kubernetes.io/master # we put observability stack on controlplane nodes to utilize their resources 19 | operator: Exists 20 | nodeSelector: 21 | kubernetes.io/hostname: minikube # we bind single node installation to particular node due to local path provisioner 22 | ingress: 23 | enabled: true 24 | annotations: 25 | nginx.ingress.kubernetes.io/auth-type: basic 26 | nginx.ingress.kubernetes.io/auth-secret: basic-auth 27 | nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required - supply user & password' 28 | hosts: 29 | - alerts.example.com 30 | paths: 31 | - / 32 | pathType: ImplementationSpecific 33 | tls: 34 | - secretName: alertmanager-tls 35 | hosts: 36 | - alerts.example.com 37 | 38 | grafana: 39 | tolerations: 40 | - key: node-role.kubernetes.io/master # we put observability stack on controlplane nodes to utilize their resources 41 | operator: Exists 42 | nodeSelector: 43 | kubernetes.io/hostname: minikube # we bind single node installation to particular node due to local path provisioner 44 | persistence: 45 | enabled: true 46 | storageClassName: local-path 47 | size: 10Gi 48 | ingress: 49 | enabled: true 50 | hosts: 51 | - grafana.example.com 52 | paths: 53 | - / 54 | pathType: ImplementationSpecific 55 | tls: 56 | - secretName: grafana-tls 57 | hosts: 58 | - grafana.example.com 59 | 60 | prometheus: 61 | prometheusSpec: 62 | tolerations: 63 | - key: node-role.kubernetes.io/master # we put observability stack on controlplane nodes to utilize their resources 64 | operator: Exists 65 | nodeSelector: 66 | kubernetes.io/hostname: minikube # we bind single node installation to particular node due to local path provisioner 67 | remoteWrite: 68 | - url: http://victoriametrics:8428/api/v1/write 69 | remoteWriteDashboards: true 70 | storageSpec: 71 | volumeClaimTemplate: 72 | spec: 73 | storageClassName: local-path 74 | resources: 75 | requests: 76 | storage: 20Gi # with local-path the size is not actually treated, but we put it here to show how we estimate data usage 77 | 78 | ingress: 79 | enabled: true 80 | annotations: 81 | nginx.ingress.kubernetes.io/auth-type: basic 82 | nginx.ingress.kubernetes.io/auth-secret: basic-auth 83 | nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required - supply user & password' 84 | hosts: 85 | - prometheus.example.com 86 | paths: 87 | - / 88 | pathType: ImplementationSpecific 89 | tls: 90 | - secretName: prometheus-tls 91 | hosts: 92 | - prometheus.example.com 93 | 94 | prometheusOperator: 95 | tolerations: 96 | - key: node-role.kubernetes.io/master # we put observability stack on controlplane nodes to utilize their resources 97 | operator: Exists 98 | admissionWebhooks: 99 | patch: 100 | tolerations: 101 | - key: node-role.kubernetes.io/master # we put observability stack on controlplane nodes to utilize their resources 102 | operator: Exists 103 | 104 | kube-state-metrics: 105 | tolerations: 106 | - key: node-role.kubernetes.io/master # we put observability stack on controlplane nodes to utilize their resources 107 | operator: Exists 108 | --------------------------------------------------------------------------------