├── .gitignore
├── kind
    ├── overlays
    │   ├── dev
    │   │   ├── namespace.yaml
    │   │   └── kustomization.yaml
    │   └── dev-persistence
    │   │   ├── namespace.yaml
    │   │   ├── kustomization.yaml
    │   │   └── deployment.yaml
    ├── base
    │   ├── kustomization.yaml
    │   ├── service.yaml
    │   ├── deployment_rbac.yaml
    │   ├── configMap.yaml
    │   └── deployment.yaml
    ├── kind-cluster
    │   └── kind-cluster.yaml
    └── README.md
├── gke
    ├── namespace.yaml
    ├── headless-service.yaml
    ├── client-service.yaml
    ├── configmap.yaml
    ├── rbac.yaml
    ├── statefulset.yaml
    └── README.md
├── minikube
    ├── namespace.yaml
    ├── services.yaml
    ├── rbac.yaml
    ├── configmap.yaml
    ├── Makefile
    ├── statefulset.yaml
    └── README.md
├── README.md
└── LICENSE


/.gitignore:
--------------------------------------------------------------------------------
1 | gke/user
2 | gke/pass
3 | gke/cookie
4 | 


--------------------------------------------------------------------------------
/kind/overlays/dev/namespace.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Namespace
3 | metadata:
4 |   name: rabbitmq-dev
5 | 


--------------------------------------------------------------------------------
/kind/overlays/dev-persistence/namespace.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Namespace
3 | metadata:
4 |   name: rabbitmq-dev-persistence
5 | 


--------------------------------------------------------------------------------
/kind/overlays/dev/kustomization.yaml:
--------------------------------------------------------------------------------
 1 | commonLabels:
 2 |   app: rabbitmq
 3 |   
 4 | bases:
 5 |  - ../../base
 6 | 
 7 | resources:
 8 |  - namespace.yaml 
 9 | 
10 | namespace: rabbitmq-dev


--------------------------------------------------------------------------------
/gke/namespace.yaml:
--------------------------------------------------------------------------------
1 | ## All resources will be created in this namespace
2 | ## To delete all resources created by this example, simply delete this namespace:
3 | apiVersion: v1
4 | kind: Namespace
5 | metadata:
6 |   name: test-rabbitmq
7 | 


--------------------------------------------------------------------------------
/kind/base/kustomization.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: kustomize.config.k8s.io/v1beta1
 2 | kind: Kustomization
 3 | commonLabels:
 4 |   app: rabbitmq
 5 | 
 6 | resources:
 7 | - configMap.yaml
 8 | - deployment.yaml
 9 | - deployment_rbac.yaml
10 | - service.yaml
11 | 


--------------------------------------------------------------------------------
/kind/overlays/dev-persistence/kustomization.yaml:
--------------------------------------------------------------------------------
 1 | commonLabels:
 2 |   app: rabbitmq
 3 |   
 4 | bases:
 5 |  - ../../base
 6 | 
 7 | resources:
 8 |  - namespace.yaml
 9 | 
10 | patchesStrategicMerge:
11 |  - deployment.yaml
12 | 
13 | 
14 | namespace: rabbitmq-dev-persistence


--------------------------------------------------------------------------------
/minikube/namespace.yaml:
--------------------------------------------------------------------------------
1 | ## All resources will be created in this namespace
2 | ## To delete all resources created by this example, simply delete this namespace:
3 | ## kubectl delete -f rabbitmq_statefulsets
4 | apiVersion: v1
5 | kind: Namespace
6 | metadata:
7 |   name: test-rabbitmq
8 | 


--------------------------------------------------------------------------------
/kind/base/service.yaml:
--------------------------------------------------------------------------------
 1 | kind: Service
 2 | apiVersion: v1
 3 | metadata:
 4 |   name: rabbitmq
 5 | spec:
 6 |   type: NodePort
 7 |   ports:
 8 |    - name: http
 9 |      protocol: TCP
10 |      port: 15672
11 |      targetPort: 15672
12 |      nodePort: 31672
13 |    - name: amqp
14 |      protocol: TCP
15 |      port: 5672
16 |      targetPort: 5672
17 |      nodePort: 30672
18 |   selector:
19 |     app: rabbitmq
20 | 


--------------------------------------------------------------------------------
/gke/headless-service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   name: rabbitmq-headless
 5 |   namespace: test-rabbitmq
 6 | spec:
 7 |   clusterIP: None
 8 |   ports:
 9 |   - name: epmd
10 |     port: 4369
11 |     protocol: TCP
12 |     targetPort: 4369
13 |   - name: cluster-rpc
14 |     port: 25672
15 |     protocol: TCP
16 |     targetPort: 25672
17 |   selector:
18 |     app: rabbitmq
19 |   type: ClusterIP
20 |   sessionAffinity: None
21 | 


--------------------------------------------------------------------------------
/gke/client-service.yaml:
--------------------------------------------------------------------------------
 1 | kind: Service
 2 | apiVersion: v1
 3 | metadata:
 4 |   namespace: test-rabbitmq
 5 |   name: rabbitmq-client
 6 |   labels:
 7 |     app: rabbitmq
 8 |     type: LoadBalancer
 9 | spec:
10 |   type: LoadBalancer
11 |   ports:
12 |    - name: http
13 |      protocol: TCP
14 |      port: 15672
15 |    - name: prometheus
16 |      protocol: TCP
17 |      port: 15692
18 |    - name: amqp
19 |      protocol: TCP
20 |      port: 5672
21 |   selector:
22 |     app: rabbitmq
23 | 


--------------------------------------------------------------------------------
/kind/overlays/dev-persistence/deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | # See the Prerequisites section of https://www.rabbitmq.com/cluster-formation.html#peer-discovery-k8s.
 3 | kind: StatefulSet
 4 | metadata:
 5 |   name: rabbitmq
 6 | spec:
 7 |   volumeClaimTemplates:
 8 |   - metadata:
 9 |       name: rabbitmq-data
10 |     spec:
11 |       accessModes: [ "ReadWriteOnce" ]
12 |       storageClassName: "standard"
13 |       resources:
14 |         requests:
15 |           storage: 1Gi
16 | 


--------------------------------------------------------------------------------
/minikube/services.yaml:
--------------------------------------------------------------------------------
 1 | kind: Service
 2 | apiVersion: v1
 3 | metadata:
 4 |   namespace: test-rabbitmq
 5 |   name: rabbitmq
 6 |   labels:
 7 |     app: rabbitmq
 8 |     type: LoadBalancer
 9 | spec:
10 |   type: NodePort
11 |   ports:
12 |    - name: http
13 |      protocol: TCP
14 |      port: 15672
15 |      targetPort: 15672
16 |      nodePort: 31672
17 |    - name: amqp
18 |      protocol: TCP
19 |      port: 5672
20 |      targetPort: 5672
21 |      nodePort: 30672
22 |   selector:
23 |     app: rabbitmq
24 | 


--------------------------------------------------------------------------------
/kind/kind-cluster/kind-cluster.yaml:
--------------------------------------------------------------------------------
 1 | kind: Cluster
 2 | apiVersion: kind.x-k8s.io/v1alpha4 
 3 | nodes:
 4 | - role: control-plane
 5 |   kubeadmConfigPatches:
 6 |   - |
 7 |     apiVersion: kubeadm.k8s.io/v1beta2
 8 |     kind: InitConfiguration
 9 |     nodeRegistration:
10 |       kubeletExtraArgs:
11 |         node-labels: "ingress-ready=true"
12 |         authorization-mode: "AlwaysAllow"
13 |   extraPortMappings:
14 |   - containerPort: 31672
15 |     hostPort: 15672
16 |   - containerPort: 30672
17 |     hostPort: 5672 
18 | 


--------------------------------------------------------------------------------
/gke/configmap.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: ConfigMap
 3 | metadata:
 4 |   name: rabbitmq-config
 5 |   namespace: test-rabbitmq
 6 | data:
 7 |   enabled_plugins: |
 8 |     [rabbitmq_peer_discovery_k8s, rabbitmq_management, rabbitmq_prometheus].
 9 |   rabbitmq.conf: |
10 |     cluster_formation.peer_discovery_backend = k8s
11 |     cluster_formation.k8s.host = kubernetes.default.svc.cluster.local
12 |     cluster_formation.k8s.address_type = hostname
13 |     cluster_formation.k8s.service_name = rabbitmq-headless
14 | 
15 |     queue_master_locator=min-masters
16 | 


--------------------------------------------------------------------------------
/kind/base/deployment_rbac.yaml:
--------------------------------------------------------------------------------
 1 | kind: Role
 2 | apiVersion: rbac.authorization.k8s.io/v1
 3 | metadata:
 4 |   name: rabbitmq-peer-discovery-rbac
 5 |  
 6 | rules:
 7 | - apiGroups: 
 8 |     - ""
 9 |   resources: 
10 |     - endpoints
11 |   verbs: 
12 |     - get
13 |     - list
14 |     - watch
15 | - apiGroups:
16 |     - ""
17 |   resources:
18 |     - events
19 |   verbs:
20 |     - create
21 | ---
22 | kind: RoleBinding
23 | apiVersion: rbac.authorization.k8s.io/v1
24 | metadata:
25 |   name: rabbitmq-peer-discovery-rbac
26 | subjects:
27 | - kind: ServiceAccount
28 |   name: default
29 | roleRef:
30 |   apiGroup: rbac.authorization.k8s.io
31 |   kind: Role
32 |   name: rabbitmq-peer-discovery-rbac
33 | 


--------------------------------------------------------------------------------
/gke/rbac.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: v1
 3 | kind: ServiceAccount
 4 | metadata:
 5 |   name: rabbitmq
 6 |   namespace: test-rabbitmq
 7 | ---
 8 | kind: Role
 9 | apiVersion: rbac.authorization.k8s.io/v1beta1
10 | metadata:
11 |   name: rabbitmq
12 |   namespace: test-rabbitmq
13 | rules:
14 | - apiGroups: [""]
15 |   resources: ["endpoints"]
16 |   verbs: ["get"]
17 | - apiGroups: [""]
18 |   resources: ["events"]
19 |   verbs: ["create"]
20 | ---
21 | kind: RoleBinding
22 | apiVersion: rbac.authorization.k8s.io/v1beta1
23 | metadata:
24 |   name: rabbitmq
25 |   namespace: test-rabbitmq
26 | subjects:
27 | - kind: ServiceAccount
28 |   name: rabbitmq
29 | roleRef:
30 |   apiGroup: rbac.authorization.k8s.io
31 |   kind: Role
32 |   name: rabbitmq
33 | 


--------------------------------------------------------------------------------
/minikube/rbac.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: v1
 3 | kind: ServiceAccount
 4 | metadata:
 5 |   name: rabbitmq
 6 |   namespace: test-rabbitmq
 7 | ---
 8 | kind: Role
 9 | apiVersion: rbac.authorization.k8s.io/v1beta1
10 | metadata:
11 |   name: rabbitmq-peer-discovery-rbac
12 |   namespace: test-rabbitmq
13 | rules:
14 | - apiGroups: [""]
15 |   resources: ["endpoints"]
16 |   verbs: ["get"]
17 | - apiGroups: [""]
18 |   resources: ["events"]
19 |   verbs: ["create"]
20 | ---
21 | kind: RoleBinding
22 | apiVersion: rbac.authorization.k8s.io/v1beta1
23 | metadata:
24 |   name: rabbitmq-peer-discovery-rbac
25 |   namespace: test-rabbitmq
26 | subjects:
27 | - kind: ServiceAccount
28 |   name: rabbitmq
29 | roleRef:
30 |   apiGroup: rbac.authorization.k8s.io
31 |   kind: Role
32 |   name: rabbitmq-peer-discovery-rbac
33 | 


--------------------------------------------------------------------------------
/minikube/configmap.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: ConfigMap
 3 | metadata:
 4 |   name: rabbitmq-config
 5 |   namespace: test-rabbitmq
 6 | data:
 7 |   enabled_plugins: |
 8 |       [rabbitmq_management,rabbitmq_peer_discovery_k8s].
 9 | 
10 |   rabbitmq.conf: |
11 |       ## Cluster formation. See https://www.rabbitmq.com/cluster-formation.html to learn more.
12 |       cluster_formation.peer_discovery_backend  = rabbit_peer_discovery_k8s
13 |       cluster_formation.k8s.host = kubernetes.default.svc.cluster.local
14 |       ## Should RabbitMQ node name be computed from the pod's hostname or IP address?
15 |       ## IP addresses are not stable, so using [stable] hostnames is recommended when possible.
16 |       ## Set to "hostname" to use pod hostnames.
17 |       ## When this value is changed, so should the variable used to set the RABBITMQ_NODENAME
18 |       ## environment variable.
19 |       cluster_formation.k8s.address_type = hostname
20 |       ## How often should node cleanup checks run?
21 |       cluster_formation.node_cleanup.interval = 30
22 |       ## Set to false if automatic removal of unknown/absent nodes
23 |       ## is desired. This can be dangerous, see
24 |       ##  * https://www.rabbitmq.com/cluster-formation.html#node-health-checks-and-cleanup
25 |       ##  * https://groups.google.com/forum/#!msg/rabbitmq-users/wuOfzEywHXo/k8z_HWIkBgAJ
26 |       cluster_formation.node_cleanup.only_log_warning = true
27 |       cluster_partition_handling = autoheal
28 |       ## See https://www.rabbitmq.com/ha.html#master-migration-data-locality
29 |       queue_master_locator=min-masters
30 |       ## This is just an example.
31 |       ## This enables remote access for the default user with well known credentials.
32 |       ## Consider deleting the default user and creating a separate user with a set of generated
33 |       ## credentials instead.
34 |       ## Learn more at https://www.rabbitmq.com/access-control.html#loopback-users
35 |       loopback_users.guest = false
36 | 


--------------------------------------------------------------------------------
/kind/base/configMap.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: ConfigMap
 3 | metadata:
 4 |   name: rabbitmq-config
 5 | data:
 6 |   enabled_plugins: |
 7 |       [rabbitmq_management,rabbitmq_peer_discovery_k8s].
 8 | 
 9 |   rabbitmq.conf: |
10 |       ## Cluster formation. See https://www.rabbitmq.com/cluster-formation.html to learn more.
11 |       cluster_formation.peer_discovery_backend  = rabbit_peer_discovery_k8s
12 |       cluster_formation.k8s.host = kubernetes.default.svc.cluster.local
13 |       ## Should RabbitMQ node name be computed from the pod's hostname or IP address?
14 |       ## IP addresses are not stable, so using [stable] hostnames is recommended when possible.
15 |       ## Set to "hostname" to use pod hostnames.
16 |       ## When this value is changed, so should the variable used to set the RABBITMQ_NODENAME
17 |       ## environment variable.
18 |       log.file.level = debug
19 |       log.console = true
20 |       log.console.level = debug
21 |       cluster_formation.k8s.address_type = hostname
22 |       ## How often should node cleanup checks run?
23 |       cluster_formation.node_cleanup.interval = 30
24 |       ## Set to false if automatic removal of unknown/absent nodes
25 |       ## is desired. This can be dangerous, see
26 |       ##  * https://www.rabbitmq.com/cluster-formation.html#node-health-checks-and-cleanup
27 |       ##  * https://groups.google.com/forum/#!msg/rabbitmq-users/wuOfzEywHXo/k8z_HWIkBgAJ
28 |       cluster_formation.node_cleanup.only_log_warning = true
29 |       cluster_partition_handling = autoheal
30 |       ## See https://www.rabbitmq.com/ha.html#master-migration-data-locality
31 |       queue_master_locator=min-masters
32 |       ## This is just an example.
33 |       ## This enables remote access for the default user with well known credentials.
34 |       ## Consider deleting the default user and creating a separate user with a set of generated
35 |       ## credentials instead.
36 |       ## Learn more at https://www.rabbitmq.com/access-control.html#loopback-users
37 |       loopback_users.guest = false


--------------------------------------------------------------------------------
/minikube/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL := bash# we want bash behaviour in all shell invocations
 2 | PLATFORM := $(shell uname)
 3 | 
 4 | # https://stackoverflow.com/questions/4842424/list-of-ansi-color-escape-sequences
 5 | RED := \033[1;31m
 6 | GREEN := \033[1;32m
 7 | YELLOW := \033[1;33m
 8 | WHITE := \033[1;37m
 9 | BOLD := \033[1m
10 | NORMAL := \033[0m
11 | 
12 | OK := $(GREEN)OK$(NORMAL)\n
13 | 
14 | K8S_NAMESPACE := test-rabbitmq
15 | 
16 | ### Tested on OS X 10.14.6 & 10.15.1
17 | ifeq ($(PLATFORM),Darwin)
18 | 
19 | ### DEPS ###
20 | #
21 | VIRTUALBOX := /usr/local/bin/VBoxManage
22 | $(VIRTUALBOX):
23 | 	@brew cask install virtualbox \
24 | 	|| ( echo "Remember to read & follow the Caveats if installation fails" ; exit 1 )
25 | 
26 | MINIKUBE := /usr/local/bin/minikube
27 | $(MINIKUBE): $(VIRTUALBOX)
28 | 	@brew install minikube
29 | 
30 | KUBECTL := /usr/local/bin/kubectl
31 | $(KUBECTL):
32 | 	@brew install kubectl
33 | 
34 | ### TARGETS ###
35 | #
36 | .DEFAULT_GOAL := wait-for-rabbitmq
37 | 
38 | .PHONY: start-minikube
39 | start-minikube: $(MINIKUBE)
40 | 	@( $(MINIKUBE) status | grep Running ) \
41 | 	|| $(MINIKUBE) start --vm-driver=virtualbox --disk-size "10 GB"
42 | 
43 | .PHONY: run-in-minikube
44 | run-in-minikube: start-minikube $(KUBECTL)
45 | 	@( $(KUBECTL) get namespace $(K8S_NAMESPACE) \
46 | 	   || $(KUBECTL) create namespace $(K8S_NAMESPACE) ) \
47 | 	&& $(KUBECTL) apply -f .
48 | 
49 | CHECK_EVERY := 5
50 | 
51 | define RABBITMQ_STATEFULSET_READY_REPLICAS
52 | $(KUBECTL) --namespace=$(K8S_NAMESPACE) get statefulset.apps/rabbitmq --output=jsonpath='{.status.readyReplicas}'
53 | endef
54 | 
55 | define RABBITMQ_STATEFULSET_REPLICAS
56 | $(KUBECTL) --namespace=$(K8S_NAMESPACE) get statefulset.apps/rabbitmq --output=jsonpath='{.status.replicas}'
57 | endef
58 | 
59 | define RABBITMQ_STATEFULSET_READY
60 | [ $$($(RABBITMQ_STATEFULSET_REPLICAS)) = $$($(RABBITMQ_STATEFULSET_READY_REPLICAS)) ]
61 | endef
62 | 
63 | .PHONY: wait-for-rabbitmq
64 | wait-for-rabbitmq: run-in-minikube
65 | 	@printf "$(YELLOW)Waiting for RabbitMQ StatefulSet to be ready..." \
66 | 	; while ! $(RABBITMQ_STATEFULSET_READY); do printf "."; sleep $(CHECK_EVERY); done \
67 | 	&& printf "$(OK)\n" \
68 | 	&& printf "$(YELLOW)Checking RabbitMQ cluster status using $(NORMAL)$(BOLD)rabbitmq-diagnostics cluster_status$(NORMAL) ...\n\n" \
69 | 	&& $(KUBECTL) exec --namespace=$(K8S_NAMESPACE) rabbitmq-0 rabbitmq-diagnostics cluster_status \
70 | 	&& printf "\n$(YELLOW)For connection information see README.md$(NOMAL)\n\n"
71 | 
72 | endif
73 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DIY RabbitMQ on Kubernetes
 2 | 
 3 | ### ⚠️⚠️⚠️ Stop! There is a Better Way!
 4 | 
 5 | These examples were put together to accompany [a blog post](https://www.rabbitmq.com/blog/2020/08/10/deploying-rabbitmq-to-kubernetes-whats-involved/).
 6 | They **should not** be used as the primary example of RabbitMQ deployments on Kubernetes.
 7 | This code **is unlikely to receive timely updates**. For most intents and purposes,
 8 | it should be considered frozen in time 🥶 and **effectively unmaintained**.
 9 | 
10 | The recommended way to deploy RabbitMQ on Kubernetes is the [RabbitMQ Cluster Operator for Kubernetes](https://www.rabbitmq.com/kubernetes/operator/operator-overview.html).
11 | The Operator is developed [on GitHub](https://github.com/rabbitmq/cluster-operator/) and contains its own [set of examples](https://github.com/rabbitmq/cluster-operator/tree/master/docs/examples).
12 | 
13 | ## What is This?
14 | 
15 | This directory contains **examples** that demonstrate a DIY minimalistic [RabbitMQ cluster](https://www.rabbitmq.com/clustering.html) deployments
16 | on Kubernetes with [Kubernetes peer discovery](https://www.rabbitmq.com/cluster-formation.html).
17 | There are several examples:
18 | 
19 |  * An [extensive one that targets the Google Kubernetes Engine (GKE)](./gke), originally contributed by Feroz Jilla
20 |  * A [basic one that targets Minikube](./minikube)
21 |  * Another [basic one that targets Kind](./kind), originally contributed by Gabriele Santomaggio
22 | 
23 | ## Production (Non-)Suitability
24 | 
25 | Some values in these example files **may or may not be optimal for your deployment**. There are many aspects to
26 | deploying and running a production-grade cluster on Kubernetes that this example cannot know or make too many assumptions about.
27 | Persistent volume configuration is one obvious examples. You are welcome and encouraged to expand
28 | the example by adding more files under the `examples/{environment}` directory.
29 | 
30 | We assume that the users of this plugin familiarize themselves with the [RabbitMQ Peer Discovery guide](https://www.rabbitmq.com/cluster-formation.html),
31 | [RabbitMQ Production Checklist](https://www.rabbitmq.com/production-checklist.html),
32 | and the rest of [RabbitMQ documentation](https://www.rabbitmq.com/documentation.html) before going into production.
33 | 
34 | Having [metrics](https://www.rabbitmq.com/monitoring.html), both of RabbitMQ and applications that use it,
35 | is critically important when making informed decisions about production systems.
36 | 
37 | 
38 | ## Copyright and License
39 | 
40 | Released under the [Mozilla Public License 2.0](https://www.mozilla.org/en-US/MPL/2.0/).
41 | 
42 | (c) VMware, Inc. or its affiliates, 2020-2021.
43 | 


--------------------------------------------------------------------------------
/kind/base/deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | # See the Prerequisites section of https://www.rabbitmq.com/cluster-formation.html#peer-discovery-k8s.
 3 | kind: StatefulSet
 4 | metadata:
 5 |   name: rabbitmq
 6 | spec:
 7 |   serviceName: rabbitmq
 8 |   # Three nodes is the recommended minimum. Some features may require a majority of nodes
 9 |   # to be available.
10 |   replicas: 3
11 |   volumeClaimTemplates: []
12 |   selector:
13 |     matchLabels:
14 |       app: rabbitmq
15 |   template:
16 |     spec:
17 |     #  serviceAccountName: rabbitmq
18 |       terminationGracePeriodSeconds: 10
19 |       nodeSelector:
20 |         # Use Linux nodes in a mixed OS kubernetes cluster.
21 |         # Learn more at https://kubernetes.io/docs/reference/kubernetes-api/labels-annotations-taints/#kubernetes-io-os
22 |         kubernetes.io/os: linux
23 |       containers:
24 |       - name: rabbitmq-k8s
25 |         image: rabbitmq:3.8
26 |         volumeMounts:
27 |           - name: config-volume
28 |             mountPath: /etc/rabbitmq
29 |         # Learn more about what ports various protocols use
30 |         # at https://www.rabbitmq.com/networking.html#ports
31 |         ports:
32 |           - name: http
33 |             protocol: TCP
34 |             containerPort: 15672
35 |           - name: amqp
36 |             protocol: TCP
37 |             containerPort: 5672
38 |         livenessProbe:
39 |           exec:
40 |             # This is just an example. There is no "one true health check" but rather
41 |             # several rabbitmq-diagnostics commands that can be combined to form increasingly comprehensive
42 |             # and intrusive health checks.
43 |             # Learn more at https://www.rabbitmq.com/monitoring.html#health-checks.
44 |             #
45 |             # Stage 2 check:
46 |             command: ["rabbitmq-diagnostics", "status"]
47 |           initialDelaySeconds: 60
48 |           # See https://www.rabbitmq.com/monitoring.html for monitoring frequency recommendations.
49 |           periodSeconds: 60
50 |           timeoutSeconds: 15
51 |         readinessProbe:
52 |           exec:
53 |             # This is just an example. There is no "one true health check" but rather
54 |             # several rabbitmq-diagnostics commands that can be combined to form increasingly comprehensive
55 |             # and intrusive health checks.
56 |             # Learn more at https://www.rabbitmq.com/monitoring.html#health-checks.
57 |             #
58 |             # Stage 1 check:
59 |             command: ["rabbitmq-diagnostics", "ping"]
60 |           initialDelaySeconds: 20
61 |           periodSeconds: 60
62 |           timeoutSeconds: 10
63 |         imagePullPolicy: Always
64 |         env:
65 |           - name: MY_POD_NAME
66 |             valueFrom:
67 |               fieldRef:
68 |                 apiVersion: v1
69 |                 fieldPath: metadata.name
70 |           - name: MY_POD_NAMESPACE
71 |             valueFrom:
72 |               fieldRef:
73 |                 fieldPath: metadata.namespace
74 |           - name: RABBITMQ_USE_LONGNAME
75 |             value: "true"
76 |           # See a note on cluster_formation.k8s.address_type in the config file section
77 |           - name: K8S_SERVICE_NAME
78 |             value: rabbitmq
79 |           - name: RABBITMQ_NODENAME
80 |             value: rabbit@$(MY_POD_NAME).$(K8S_SERVICE_NAME).$(MY_POD_NAMESPACE).svc.cluster.local
81 |           - name: K8S_HOSTNAME_SUFFIX
82 |             value: .$(K8S_SERVICE_NAME).$(MY_POD_NAMESPACE).svc.cluster.local
83 |           - name: RABBITMQ_ERLANG_COOKIE
84 |             value: "mycookie"
85 |       volumes:
86 |         - name: config-volume
87 |           configMap:
88 |             name: rabbitmq-config
89 |             items:
90 |             - key: rabbitmq.conf
91 |               path: rabbitmq.conf
92 |             - key: enabled_plugins
93 |               path: enabled_plugins
94 | 


--------------------------------------------------------------------------------
/minikube/statefulset.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | # See the Prerequisites section of https://www.rabbitmq.com/cluster-formation.html#peer-discovery-k8s.
 3 | kind: StatefulSet
 4 | metadata:
 5 |   name: rabbitmq
 6 |   namespace: test-rabbitmq
 7 | spec:
 8 |   serviceName: rabbitmq
 9 |   # Three nodes is the recommended minimum. Some features may require a majority of nodes
10 |   # to be available.
11 |   replicas: 3
12 |   selector:
13 |     matchLabels:
14 |       app: rabbitmq
15 |   template:
16 |     metadata:
17 |       labels:
18 |         app: rabbitmq
19 |     spec:
20 |       serviceAccountName: rabbitmq
21 |       terminationGracePeriodSeconds: 10
22 |       nodeSelector:
23 |         # Use Linux nodes in a mixed OS kubernetes cluster.
24 |         # Learn more at https://kubernetes.io/docs/reference/kubernetes-api/labels-annotations-taints/#kubernetes-io-os
25 |         kubernetes.io/os: linux
26 |       containers:
27 |       - name: rabbitmq-k8s
28 |         image: rabbitmq:3.8
29 |         volumeMounts:
30 |           - name: config-volume
31 |             mountPath: /etc/rabbitmq
32 |         # Learn more about what ports various protocols use
33 |         # at https://www.rabbitmq.com/networking.html#ports
34 |         ports:
35 |           - name: http
36 |             protocol: TCP
37 |             containerPort: 15672
38 |           - name: amqp
39 |             protocol: TCP
40 |             containerPort: 5672
41 |         livenessProbe:
42 |           exec:
43 |             # This is just an example. There is no "one true health check" but rather
44 |             # several rabbitmq-diagnostics commands that can be combined to form increasingly comprehensive
45 |             # and intrusive health checks.
46 |             # Learn more at https://www.rabbitmq.com/monitoring.html#health-checks.
47 |             #
48 |             # Stage 2 check:
49 |             command: ["rabbitmq-diagnostics", "status"]
50 |           initialDelaySeconds: 60
51 |           # See https://www.rabbitmq.com/monitoring.html for monitoring frequency recommendations.
52 |           periodSeconds: 60
53 |           timeoutSeconds: 15
54 |         readinessProbe:
55 |           exec:
56 |             # This is just an example. There is no "one true health check" but rather
57 |             # several rabbitmq-diagnostics commands that can be combined to form increasingly comprehensive
58 |             # and intrusive health checks.
59 |             # Learn more at https://www.rabbitmq.com/monitoring.html#health-checks.
60 |             #
61 |             # Stage 1 check:
62 |             command: ["rabbitmq-diagnostics", "ping"]
63 |           initialDelaySeconds: 20
64 |           periodSeconds: 60
65 |           timeoutSeconds: 10
66 |         imagePullPolicy: Always
67 |         env:
68 |           - name: MY_POD_NAME
69 |             valueFrom:
70 |               fieldRef:
71 |                 apiVersion: v1
72 |                 fieldPath: metadata.name
73 |           - name: MY_POD_NAMESPACE
74 |             valueFrom:
75 |               fieldRef:
76 |                 fieldPath: metadata.namespace
77 |           - name: RABBITMQ_USE_LONGNAME
78 |             value: "true"
79 |           # See a note on cluster_formation.k8s.address_type in the config file section
80 |           - name: K8S_SERVICE_NAME
81 |             value: rabbitmq
82 |           - name: RABBITMQ_NODENAME
83 |             value: rabbit@$(MY_POD_NAME).$(K8S_SERVICE_NAME).$(MY_POD_NAMESPACE).svc.cluster.local
84 |           - name: K8S_HOSTNAME_SUFFIX
85 |             value: .$(K8S_SERVICE_NAME).$(MY_POD_NAMESPACE).svc.cluster.local
86 |           - name: RABBITMQ_ERLANG_COOKIE
87 |             value: "mycookie"
88 |       volumes:
89 |         - name: config-volume
90 |           configMap:
91 |             name: rabbitmq-config
92 |             items:
93 |             - key: rabbitmq.conf
94 |               path: rabbitmq.conf
95 |             - key: enabled_plugins
96 |               path: enabled_plugins
97 | 


--------------------------------------------------------------------------------
/kind/README.md:
--------------------------------------------------------------------------------
  1 | # Deploy RabbitMQ on Kubernetes with the Kubernetes Peer Discovery Plugin to Kind
  2 | 
  3 | This is an **example** that demonstrates a RabbitMQ deployment on Kubernetes with peer discovery
  4 | via `rabbitmq-peer-discovery-k8s` plugin.
  5 | 
  6 | ## Production (Non-)Suitability
  7 | 
  8 | Some values in this example **may or may not be optimal for your deployment**. We encourage users
  9 | to get familiar with the [RabbitMQ Peer Discovery guide](https://www.rabbitmq.com/cluster-formation.html), [RabbitMQ Production Checklist](https://www.rabbitmq.com/production-checklist.html)
 10 | and the rest of [RabbitMQ documentation](https://www.rabbitmq.com/documentation.html) before going into production.
 11 | 
 12 | Having [metrics](https://www.rabbitmq.com/monitoring.html), both of RabbitMQ and applications that use it,
 13 | is critically important when making informed decisions about production systems.
 14 | 
 15 | 
 16 | ## Pre-requisites
 17 | 
 18 | The example uses, targets or assumes:
 19 | 
 20 |  * [Kind](https://github.com/kubernetes-sigs/kind) 
 21 |  * [Kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) 
 22 |  
 23 | ## Quick Start
 24 | 
 25 |  * Create Kind cluster 
 26 | ```
 27 | kind create cluster --config kind/kind-cluster/kind-cluster.yaml
 28 | ```
 29 | 
 30 | Deploy RabbitMQ with or without [persistent volumes](https://kubernetes.io/docs/concepts/storage/persistent-volumes/).
 31 | 
 32 | **Note:** You can use **one** deploy at time
 33 | 
 34 | * Deploy RabbitMQ without PV:
 35 | ```
 36 | kubectl apply  -k overlays/dev
 37 | ```
 38 | 
 39 | * Deploy RabbitMQ with PV using storage class:
 40 | ```
 41 | kubectl apply  -k overlays/dev-persistence/
 42 | ```
 43 | 
 44 | ## Use Localhost Address to Connect
 45 | 
 46 | The ports used by this example are:
 47 | 
 48 | * `amqp://guest:guest@localhost`: AMQP 0-9-1 and AMQP 1.0 client connections
 49 | * http://localhost:15672: HTTP API and management UI
 50 | 
 51 | 
 52 | ## Details
 53 | 
 54 | _kind is a tool for running local Kubernetes clusters using Docker container "nodes"._
 55 | Kind should be used only for developing or/and for CI integration.
 56 | 
 57 | ### Port Mapping
 58 | The `kind-cluster.yaml` configuration binds localhost ports:
 59 | 
 60 | ```yaml
 61 |  extraPortMappings:
 62 |   - containerPort: 31672
 63 |     hostPort: 15672
 64 |   - containerPort: 30672
 65 |     hostPort: 5672
 66 | ```
 67 | 
 68 | The `NodePort` service exposes the ports: 
 69 | ```yaml
 70 | spec:
 71 |   type: NodePort
 72 |   ports:
 73 |    - name: http
 74 |      protocol: TCP
 75 |      port: 15672
 76 |      targetPort: 15672
 77 |      nodePort: 31672  # <---- binds the extraPortMappings.containerPort 31672
 78 |    - name: amqp
 79 |      protocol: TCP
 80 |      port: 5672
 81 |      targetPort: 5672
 82 |      nodePort: 30672  # <---- binds the extraPortMappings.containerPort 30672
 83 | ```
 84 | 
 85 | So in this way you can easly use the localhost ports.
 86 | 
 87 | ### Persistent Volumes
 88 | 
 89 | `Kind` by default creates a [storage class](https://kubernetes.io/docs/concepts/storage/storage-classes/) called `standard`
 90 | ```
 91 | kubectl get storageclass
 92 | NAME                 PROVISIONER               AGE
 93 | standard (default)   kubernetes.io/host-path   5h
 94 | ```
 95 | 
 96 | used by:
 97 | ```yaml
 98 | cat overlays/dev-persistence/deployment.yaml
 99 | apiVersion: apps/v1
100 | # See the Prerequisites section of https://www.rabbitmq.com/cluster-formation.html#peer-discovery-k8s.
101 | kind: StatefulSet
102 | metadata:
103 |   name: rabbitmq
104 | spec:
105 |   volumeClaimTemplates:
106 |   - metadata:
107 |       name: rabbitmq-data
108 |     spec:
109 |       accessModes: [ "ReadWriteOnce" ]
110 |       storageClassName: "standard" # <----- standard storage class
111 |       resources:
112 |         requests:
113 |           storage: 1Gi
114 | ```
115 | 
116 | Check the persistent volumes claim:
117 | ```
118 | kubectl get pvc -n rabbitmq-dev-persistence
119 | NAME                       STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS   AGE
120 | rabbitmq-data-rabbitmq-0   Bound    pvc-0db837ec-a856-4a6b-9acf-2c9110bb9f12   1Gi        RWO            standard       2m38s
121 | rabbitmq-data-rabbitmq-1   Bound    pvc-24514f31-5b40-4bd6-a721-84a16afaa697   1Gi        RWO            standard       78s
122 | rabbitmq-data-rabbitmq-2   Bound    pvc-2b7162f5-c596-404d-be85-475600b9f82a   1Gi        RWO            standard       2s
123 | ```
124 | 
125 | Check the persistent volumes:
126 | ```
127 | kubectl get pv -n rabbitmq-dev-persistence
128 | NAME                                       CAPACITY   ACCESS MODES   RECLAIM POLICY   STATUS   CLAIM                                               STORAGECLASS   REASON   AGE
129 | pvc-0db837ec-a856-4a6b-9acf-2c9110bb9f12   1Gi        RWO            Delete           Bound    rabbitmq-dev-persistence/rabbitmq-data-rabbitmq-0   standard                2m41s
130 | pvc-24514f31-5b40-4bd6-a721-84a16afaa697   1Gi        RWO            Delete           Bound    rabbitmq-dev-persistence/rabbitmq-data-rabbitmq-1   standard                81s
131 | pvc-2b7162f5-c596-404d-be85-475600b9f82a   1Gi        RWO            Delete           Bound    rabbitmq-dev-persistence/rabbitmq-data-rabbitmq-2   standard                5s
132 | ```
133 | 
134 | ## Clean up
135 | 
136 | Clean up RabbitMQ without PV:
137 | ```
138 | kubectl delete  -k overlays/dev
139 | ```
140 | 
141 | Clean up RabbitMQ with PV:
142 | ```
143 | kubectl delete  -k overlays/dev-persistence/
144 | ```
145 | 
146 | 


--------------------------------------------------------------------------------
/gke/statefulset.yaml:
--------------------------------------------------------------------------------
  1 | apiVersion: apps/v1
  2 | kind: StatefulSet
  3 | metadata:
  4 |   name: rabbitmq
  5 |   namespace: test-rabbitmq
  6 | spec:
  7 |   selector:
  8 |     matchLabels:
  9 |       app: "rabbitmq"
 10 |   # headless service that gives network identity to the RMQ nodes, and enables them to cluster
 11 |   serviceName: rabbitmq-headless # serviceName is the name of the service that governs this StatefulSet. This service must exist before the StatefulSet, and is responsible for the network identity of the set. Pods get DNS/hostnames that follow the pattern: pod-specific-string.serviceName.default.svc.cluster.local where "pod-specific-string" is managed by the StatefulSet controller.
 12 |   volumeClaimTemplates:
 13 |   - metadata:
 14 |       name: rabbitmq-data
 15 |       namespace: test-rabbitmq
 16 |     spec:
 17 |       storageClassName: standard
 18 |       accessModes:
 19 |       - ReadWriteOnce
 20 |       resources:
 21 |         requests:
 22 |           storage: "3Gi"
 23 |   template:
 24 |     metadata:
 25 |       name: rabbitmq
 26 |       namespace: test-rabbitmq
 27 |       labels:
 28 |         app: rabbitmq
 29 |     spec:
 30 |       initContainers:
 31 |       # Since k8s 1.9.4, config maps mount read-only volumes. Since the Docker image also writes to the config file,
 32 |       # the file must be mounted as read-write. We use init containers to copy from the config map read-only
 33 |       # path, to a read-write path
 34 |       - name: "rabbitmq-config"
 35 |         image: busybox:1.32.0
 36 |         volumeMounts:
 37 |         - name: rabbitmq-config
 38 |           mountPath: /tmp/rabbitmq
 39 |         - name: rabbitmq-config-rw
 40 |           mountPath: /etc/rabbitmq
 41 |         command:
 42 |         - sh
 43 |         - -c
 44 |         # the newline is needed since the Docker image entrypoint scripts appends to the config file
 45 |         - cp /tmp/rabbitmq/rabbitmq.conf /etc/rabbitmq/rabbitmq.conf && echo '' >> /etc/rabbitmq/rabbitmq.conf;
 46 |           cp /tmp/rabbitmq/enabled_plugins /etc/rabbitmq/enabled_plugins
 47 |       volumes:
 48 |       - name: rabbitmq-config
 49 |         configMap:
 50 |           name: rabbitmq-config
 51 |           optional: false
 52 |           items:
 53 |           - key: enabled_plugins
 54 |             path: "enabled_plugins"
 55 |           - key: rabbitmq.conf
 56 |             path: "rabbitmq.conf"
 57 |       # read-write volume into which to copy the rabbitmq.conf and enabled_plugins files
 58 |       # this is needed since the docker image writes to the rabbitmq.conf file
 59 |       # and Kubernetes Config Maps are mounted as read-only since Kubernetes 1.9.4
 60 |       - name: rabbitmq-config-rw
 61 |         emptyDir: {}
 62 |       - name: rabbitmq-data
 63 |         persistentVolumeClaim:
 64 |           claimName: rabbitmq-data
 65 |       serviceAccount: rabbitmq
 66 |       # The Docker image runs as the `rabbitmq` user with uid 999 
 67 |       # and writes to the `rabbitmq.conf` file
 68 |       # The security context is needed since the image needs
 69 |       # permission to write to this file. Without the security 
 70 |       # context, `rabbitmq.conf` is owned by root and inaccessible
 71 |       # by the `rabbitmq` user
 72 |       securityContext:
 73 |         fsGroup: 999
 74 |         runAsUser: 999
 75 |         runAsGroup: 999
 76 |       containers:
 77 |       - name: rabbitmq
 78 |         # Community Docker Image
 79 |         image: rabbitmq:latest
 80 |         volumeMounts:
 81 |         # mounting rabbitmq.conf and enabled_plugins
 82 |         # this should have writeable access, this might be a problem
 83 |         - name: rabbitmq-config-rw
 84 |           mountPath: "/etc/rabbitmq"
 85 |           # mountPath: "/etc/rabbitmq/conf.d/"
 86 |         # rabbitmq data directory
 87 |         - name: rabbitmq-data
 88 |           mountPath: "/var/lib/rabbitmq/mnesia"
 89 |         env:
 90 |         - name: RABBITMQ_DEFAULT_PASS
 91 |           valueFrom:
 92 |             secretKeyRef:
 93 |               name: rabbitmq-admin
 94 |               key: pass
 95 |         - name: RABBITMQ_DEFAULT_USER
 96 |           valueFrom:
 97 |             secretKeyRef:
 98 |               name: rabbitmq-admin
 99 |               key: user
100 |         - name: RABBITMQ_ERLANG_COOKIE
101 |           valueFrom:
102 |             secretKeyRef:
103 |               name: erlang-cookie
104 |               key: cookie
105 |         ports:
106 |         - name: amqp
107 |           containerPort: 5672
108 |           protocol: TCP
109 |         - name: management
110 |           containerPort: 15672
111 |           protocol: TCP
112 |         - name: prometheus
113 |           containerPort: 15692
114 |           protocol: TCP
115 |         - name: epmd
116 |           containerPort: 4369
117 |           protocol: TCP
118 |         livenessProbe:
119 |           exec:
120 |             # This is just an example. There is no "one true health check" but rather
121 |             # several rabbitmq-diagnostics commands that can be combined to form increasingly comprehensive
122 |             # and intrusive health checks.
123 |             # Learn more at https://www.rabbitmq.com/monitoring.html#health-checks.
124 |             #
125 |             # Stage 2 check:
126 |             command: ["rabbitmq-diagnostics", "status"]
127 |           initialDelaySeconds: 60
128 |           # See https://www.rabbitmq.com/monitoring.html for monitoring frequency recommendations.
129 |           periodSeconds: 60
130 |           timeoutSeconds: 15
131 |         readinessProbe: # probe to know when RMQ is ready to accept traffic
132 |           exec:
133 |             # This is just an example. There is no "one true health check" but rather
134 |             # several rabbitmq-diagnostics commands that can be combined to form increasingly comprehensive
135 |             # and intrusive health checks.
136 |             # Learn more at https://www.rabbitmq.com/monitoring.html#health-checks.
137 |             #
138 |             # Stage 1 check:
139 |             command: ["rabbitmq-diagnostics", "ping"]
140 |           initialDelaySeconds: 20
141 |           periodSeconds: 60
142 |           timeoutSeconds: 10
143 | 


--------------------------------------------------------------------------------
/minikube/README.md:
--------------------------------------------------------------------------------
  1 | # Deploy RabbitMQ on Kubernetes with the Kubernetes Peer Discovery Plugin to Minikube
  2 | 
  3 | This is an **example** that demonstrates a RabbitMQ deployment on Kubernetes with peer discovery
  4 | via `rabbitmq-peer-discovery-k8s` plugin.
  5 | 
  6 | ## Production (Non-)Suitability
  7 | 
  8 | Some values in this example **may or may not be optimal for your deployment**. We encourage users
  9 | to get familiar with the [RabbitMQ Peer Discovery guide](https://www.rabbitmq.com/cluster-formation.html), [RabbitMQ Production Checklist](https://www.rabbitmq.com/production-checklist.html)
 10 | and the rest of [RabbitMQ documentation](https://www.rabbitmq.com/documentation.html) before going into production.
 11 | 
 12 | Having [metrics](https://www.rabbitmq.com/monitoring.html), both of RabbitMQ and applications that use it,
 13 | is critically important when making informed decisions about production systems.
 14 | 
 15 | 
 16 | ## Pre-requisites
 17 | 
 18 | The example uses, targets or assumes:
 19 | 
 20 |  * [Minikube](https://kubernetes.io/docs/setup/learning-environment/minikube/) with the [VirtualBox](https://www.virtualbox.org/) driver (other drivers can be used, too)
 21 |  * Kubernetes 1.6
 22 |  * RabbitMQ [Docker image](https://hub.docker.com/_/rabbitmq/) (maintained [by Docker, Inc](https://hub.docker.com/_/rabbitmq/))
 23 |  * A [StatefulSets controller](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/)
 24 | 
 25 | 
 26 | ## Quick Start with Make
 27 | 
 28 | This example comes with a Make target that sets up VirtualBox, Minikube and an example cluster
 29 | in a single command. It can be found under this directory. [Homebrew](https://brew.sh/) will be used to install
 30 | packages and on macOS, VirtualBox [will need OS permissions to install its kernel module](https://developer.apple.com/library/archive/technotes/tn2459/_index.html).
 31 | 
 32 | The Homebrew cask installer will ask for your password at some point with a prompt that looks like this:
 33 | 
 34 | ```
 35 | Changing ownership of paths required by virtualbox; your password may be necessary
 36 | ```
 37 | 
 38 | Please inspect the Make file to be extra sure that you understand and agree to what it does.
 39 | After enabling 3rd party kernel extensions in OS setings, run the default Make target in this directory:
 40 | 
 41 | ```
 42 | make
 43 | ```
 44 | 
 45 | which is equivalent to first running
 46 | 
 47 | ```
 48 | make start-minikube
 49 | ```
 50 | 
 51 | to install VirtualBox and Minikube using Homebrew, then
 52 | 
 53 | ```
 54 | make run-in-minikube
 55 | ```
 56 | 
 57 | to start Minikube and `kubectl apply` the example, and finally
 58 | 
 59 | ```
 60 | make wait-for-rabbitmq
 61 | ```
 62 | 
 63 | to wait for cluster formation.
 64 | 
 65 | Once the changes are applied, follow the steps in the Check Cluster Status section below.
 66 | 
 67 | In case you would prefer to install and run Minikube manually, see the following few sections.
 68 | 
 69 | 
 70 | ## Running the Example Manually with Minikube
 71 | 
 72 | ### Preresuites
 73 | 
 74 |  * Make sure that VirtualBox is installed
 75 |  * Install [`minikube`](https://kubernetes.io/docs/tasks/tools/install-minikube/) and start it with `--vm-driver=virtualbox`
 76 |  * Install [`kubectl`](https://kubernetes.io/docs/tasks/tools/install-kubectl/)
 77 | 
 78 | ### Start Minikube
 79 | 
 80 | Start a `minikube` virtual machine:
 81 | 
 82 | ``` sh
 83 | minikube start --cpus=2 --memory=2040 --disk-size "10 GB" --vm-driver=virtualbox
 84 | ```
 85 | 
 86 | ### Create a Namespace
 87 | 
 88 | Create a Kubernetes namespace for RabbitMQ tests:
 89 | 
 90 | ``` sh
 91 | kubectl create namespace test-rabbitmq
 92 | ```
 93 | 
 94 | ### Set Up Kubernetes Permissions
 95 | 
 96 | In Kubernetes 1.6 or above, RBAC authorization is enabled by default.
 97 | This example configures RBAC related bits so that the peer discovery plugin is allowed to access
 98 | the nodes information it needs. The `ServiceAccount` and `Role` resources will be created
 99 | in the following step.
100 | 
101 | ### kubectl Apply Things
102 | 
103 | Deploy the config map, services, a stateful set and so on:
104 | 
105 | ``` sh
106 | # will apply all files under this directory
107 | kubectl create -f minikube
108 | ```
109 | 
110 | ### Check Cluster Status
111 | 
112 | Wait for a a few minutes for pods to start. Since this example uses a stateful set with ordered
113 | startup, the pods will be started one by one. To monitor pod startup process, use
114 | 
115 | ``` sh
116 | kubectl --namespace="test-rabbitmq" get pods
117 | ```
118 | 
119 | To run `rabbitmq-diagnostics cluster_status`:
120 | 
121 | ``` sh
122 | FIRST_POD=$(kubectl get pods --namespace test-rabbitmq -l 'app=rabbitmq' -o jsonpath='{.items[0].metadata.name }')
123 | kubectl exec --namespace=test-rabbitmq $FIRST_POD -- rabbitmq-diagnostics cluster_status
124 | ```
125 | 
126 | to check cluster status. Note that nodes can take some time to start and discover each other.
127 | 
128 | The output should look something like this:
129 | 
130 | ```
131 | Cluster status of node rabbit@rabbitmq-0.rabbitmq.test-rabbitmq.svc.cluster.local ...
132 | Basics
133 | 
134 | Cluster name: rabbit@rabbitmq-0.rabbitmq.test-rabbitmq.svc.cluster.local
135 | 
136 | Disk Nodes
137 | 
138 | rabbit@rabbitmq-0.rabbitmq.test-rabbitmq.svc.cluster.local
139 | rabbit@rabbitmq-1.rabbitmq.test-rabbitmq.svc.cluster.local
140 | rabbit@rabbitmq-2.rabbitmq.test-rabbitmq.svc.cluster.local
141 | 
142 | Running Nodes
143 | 
144 | rabbit@rabbitmq-0.rabbitmq.test-rabbitmq.svc.cluster.local
145 | rabbit@rabbitmq-1.rabbitmq.test-rabbitmq.svc.cluster.local
146 | rabbit@rabbitmq-2.rabbitmq.test-rabbitmq.svc.cluster.local
147 | 
148 | Versions
149 | 
150 | rabbit@rabbitmq-0.rabbitmq.test-rabbitmq.svc.cluster.local: RabbitMQ 3.8.1 on Erlang 22.1.8
151 | rabbit@rabbitmq-1.rabbitmq.test-rabbitmq.svc.cluster.local: RabbitMQ 3.8.1 on Erlang 22.1.8
152 | rabbit@rabbitmq-2.rabbitmq.test-rabbitmq.svc.cluster.local: RabbitMQ 3.8.1 on Erlang 22.1.8
153 | 
154 | Alarms
155 | 
156 | (none)
157 | 
158 | Network Partitions
159 | 
160 | (none)
161 | 
162 | Listeners
163 | 
164 | Node: rabbit@rabbitmq-0.rabbitmq.test-rabbitmq.svc.cluster.local, interface: [::], port: 25672, protocol: clustering, purpose: inter-node and CLI tool communication
165 | Node: rabbit@rabbitmq-0.rabbitmq.test-rabbitmq.svc.cluster.local, interface: [::], port: 5672, protocol: amqp, purpose: AMQP 0-9-1 and AMQP 1.0
166 | Node: rabbit@rabbitmq-0.rabbitmq.test-rabbitmq.svc.cluster.local, interface: [::], port: 15672, protocol: http, purpose: HTTP API
167 | Node: rabbit@rabbitmq-1.rabbitmq.test-rabbitmq.svc.cluster.local, interface: [::], port: 25672, protocol: clustering, purpose: inter-node and CLI tool communication
168 | Node: rabbit@rabbitmq-1.rabbitmq.test-rabbitmq.svc.cluster.local, interface: [::], port: 5672, protocol: amqp, purpose: AMQP 0-9-1 and AMQP 1.0
169 | Node: rabbit@rabbitmq-1.rabbitmq.test-rabbitmq.svc.cluster.local, interface: [::], port: 15672, protocol: http, purpose: HTTP API
170 | 
171 | Feature flags
172 | 
173 | Flag: drop_unroutable_metric, state: enabled
174 | Flag: empty_basic_get_metric, state: enabled
175 | Flag: implicit_default_bindings, state: enabled
176 | Flag: quorum_queue, state: enabled
177 | Flag: virtual_host_metadata, state: enabled
178 | ```
179 | 
180 | ### Use Public Minikube IP Address to Connect
181 | 
182 | Get the public `minikube` VM IP address:
183 | 
184 | ``` sh
185 | minikube ip
186 | # => 192.168.99.104
187 | ```
188 | 
189 | The [ports used](https://www.rabbitmq.com/networking.html#ports) by this example are:
190 | 
191 |  * `amqp://guest:guest@{minikube_ip}:30672`: [AMQP 0-9-1 and AMQP 1.0](https://www.rabbitmq.com/networking.html#ports) client connections
192 |  * `http://{minikube_ip}:31672`: [HTTP API and management UI](https://www.rabbitmq.com/management.html)
193 | 
194 | 
195 | ### Scaling the Number of RabbitMQ Cluster Nodes (Kubernetes Pod Replicas)
196 | 
197 | ``` sh
198 | # Odd numbers of nodes are necessary for a clear quorum: 3, 5, 7 and so on
199 | kubectl scale statefulset/rabbitmq --namespace=test-rabbitmq --replicas=5
200 | ```
201 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Mozilla Public License Version 2.0
  2 | ==================================
  3 | 
  4 | 1. Definitions
  5 | --------------
  6 | 
  7 | 1.1. "Contributor"
  8 |     means each individual or legal entity that creates, contributes to
  9 |     the creation of, or owns Covered Software.
 10 | 
 11 | 1.2. "Contributor Version"
 12 |     means the combination of the Contributions of others (if any) used
 13 |     by a Contributor and that particular Contributor's Contribution.
 14 | 
 15 | 1.3. "Contribution"
 16 |     means Covered Software of a particular Contributor.
 17 | 
 18 | 1.4. "Covered Software"
 19 |     means Source Code Form to which the initial Contributor has attached
 20 |     the notice in Exhibit A, the Executable Form of such Source Code
 21 |     Form, and Modifications of such Source Code Form, in each case
 22 |     including portions thereof.
 23 | 
 24 | 1.5. "Incompatible With Secondary Licenses"
 25 |     means
 26 | 
 27 |     (a) that the initial Contributor has attached the notice described
 28 |         in Exhibit B to the Covered Software; or
 29 | 
 30 |     (b) that the Covered Software was made available under the terms of
 31 |         version 1.1 or earlier of the License, but not also under the
 32 |         terms of a Secondary License.
 33 | 
 34 | 1.6. "Executable Form"
 35 |     means any form of the work other than Source Code Form.
 36 | 
 37 | 1.7. "Larger Work"
 38 |     means a work that combines Covered Software with other material, in
 39 |     a separate file or files, that is not Covered Software.
 40 | 
 41 | 1.8. "License"
 42 |     means this document.
 43 | 
 44 | 1.9. "Licensable"
 45 |     means having the right to grant, to the maximum extent possible,
 46 |     whether at the time of the initial grant or subsequently, any and
 47 |     all of the rights conveyed by this License.
 48 | 
 49 | 1.10. "Modifications"
 50 |     means any of the following:
 51 | 
 52 |     (a) any file in Source Code Form that results from an addition to,
 53 |         deletion from, or modification of the contents of Covered
 54 |         Software; or
 55 | 
 56 |     (b) any new file in Source Code Form that contains any Covered
 57 |         Software.
 58 | 
 59 | 1.11. "Patent Claims" of a Contributor
 60 |     means any patent claim(s), including without limitation, method,
 61 |     process, and apparatus claims, in any patent Licensable by such
 62 |     Contributor that would be infringed, but for the grant of the
 63 |     License, by the making, using, selling, offering for sale, having
 64 |     made, import, or transfer of either its Contributions or its
 65 |     Contributor Version.
 66 | 
 67 | 1.12. "Secondary License"
 68 |     means either the GNU General Public License, Version 2.0, the GNU
 69 |     Lesser General Public License, Version 2.1, the GNU Affero General
 70 |     Public License, Version 3.0, or any later versions of those
 71 |     licenses.
 72 | 
 73 | 1.13. "Source Code Form"
 74 |     means the form of the work preferred for making modifications.
 75 | 
 76 | 1.14. "You" (or "Your")
 77 |     means an individual or a legal entity exercising rights under this
 78 |     License. For legal entities, "You" includes any entity that
 79 |     controls, is controlled by, or is under common control with You. For
 80 |     purposes of this definition, "control" means (a) the power, direct
 81 |     or indirect, to cause the direction or management of such entity,
 82 |     whether by contract or otherwise, or (b) ownership of more than
 83 |     fifty percent (50%) of the outstanding shares or beneficial
 84 |     ownership of such entity.
 85 | 
 86 | 2. License Grants and Conditions
 87 | --------------------------------
 88 | 
 89 | 2.1. Grants
 90 | 
 91 | Each Contributor hereby grants You a world-wide, royalty-free,
 92 | non-exclusive license:
 93 | 
 94 | (a) under intellectual property rights (other than patent or trademark)
 95 |     Licensable by such Contributor to use, reproduce, make available,
 96 |     modify, display, perform, distribute, and otherwise exploit its
 97 |     Contributions, either on an unmodified basis, with Modifications, or
 98 |     as part of a Larger Work; and
 99 | 
100 | (b) under Patent Claims of such Contributor to make, use, sell, offer
101 |     for sale, have made, import, and otherwise transfer either its
102 |     Contributions or its Contributor Version.
103 | 
104 | 2.2. Effective Date
105 | 
106 | The licenses granted in Section 2.1 with respect to any Contribution
107 | become effective for each Contribution on the date the Contributor first
108 | distributes such Contribution.
109 | 
110 | 2.3. Limitations on Grant Scope
111 | 
112 | The licenses granted in this Section 2 are the only rights granted under
113 | this License. No additional rights or licenses will be implied from the
114 | distribution or licensing of Covered Software under this License.
115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a
116 | Contributor:
117 | 
118 | (a) for any code that a Contributor has removed from Covered Software;
119 |     or
120 | 
121 | (b) for infringements caused by: (i) Your and any other third party's
122 |     modifications of Covered Software, or (ii) the combination of its
123 |     Contributions with other software (except as part of its Contributor
124 |     Version); or
125 | 
126 | (c) under Patent Claims infringed by Covered Software in the absence of
127 |     its Contributions.
128 | 
129 | This License does not grant any rights in the trademarks, service marks,
130 | or logos of any Contributor (except as may be necessary to comply with
131 | the notice requirements in Section 3.4).
132 | 
133 | 2.4. Subsequent Licenses
134 | 
135 | No Contributor makes additional grants as a result of Your choice to
136 | distribute the Covered Software under a subsequent version of this
137 | License (see Section 10.2) or under the terms of a Secondary License (if
138 | permitted under the terms of Section 3.3).
139 | 
140 | 2.5. Representation
141 | 
142 | Each Contributor represents that the Contributor believes its
143 | Contributions are its original creation(s) or it has sufficient rights
144 | to grant the rights to its Contributions conveyed by this License.
145 | 
146 | 2.6. Fair Use
147 | 
148 | This License is not intended to limit any rights You have under
149 | applicable copyright doctrines of fair use, fair dealing, or other
150 | equivalents.
151 | 
152 | 2.7. Conditions
153 | 
154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
155 | in Section 2.1.
156 | 
157 | 3. Responsibilities
158 | -------------------
159 | 
160 | 3.1. Distribution of Source Form
161 | 
162 | All distribution of Covered Software in Source Code Form, including any
163 | Modifications that You create or to which You contribute, must be under
164 | the terms of this License. You must inform recipients that the Source
165 | Code Form of the Covered Software is governed by the terms of this
166 | License, and how they can obtain a copy of this License. You may not
167 | attempt to alter or restrict the recipients' rights in the Source Code
168 | Form.
169 | 
170 | 3.2. Distribution of Executable Form
171 | 
172 | If You distribute Covered Software in Executable Form then:
173 | 
174 | (a) such Covered Software must also be made available in Source Code
175 |     Form, as described in Section 3.1, and You must inform recipients of
176 |     the Executable Form how they can obtain a copy of such Source Code
177 |     Form by reasonable means in a timely manner, at a charge no more
178 |     than the cost of distribution to the recipient; and
179 | 
180 | (b) You may distribute such Executable Form under the terms of this
181 |     License, or sublicense it under different terms, provided that the
182 |     license for the Executable Form does not attempt to limit or alter
183 |     the recipients' rights in the Source Code Form under this License.
184 | 
185 | 3.3. Distribution of a Larger Work
186 | 
187 | You may create and distribute a Larger Work under terms of Your choice,
188 | provided that You also comply with the requirements of this License for
189 | the Covered Software. If the Larger Work is a combination of Covered
190 | Software with a work governed by one or more Secondary Licenses, and the
191 | Covered Software is not Incompatible With Secondary Licenses, this
192 | License permits You to additionally distribute such Covered Software
193 | under the terms of such Secondary License(s), so that the recipient of
194 | the Larger Work may, at their option, further distribute the Covered
195 | Software under the terms of either this License or such Secondary
196 | License(s).
197 | 
198 | 3.4. Notices
199 | 
200 | You may not remove or alter the substance of any license notices
201 | (including copyright notices, patent notices, disclaimers of warranty,
202 | or limitations of liability) contained within the Source Code Form of
203 | the Covered Software, except that You may alter any license notices to
204 | the extent required to remedy known factual inaccuracies.
205 | 
206 | 3.5. Application of Additional Terms
207 | 
208 | You may choose to offer, and to charge a fee for, warranty, support,
209 | indemnity or liability obligations to one or more recipients of Covered
210 | Software. However, You may do so only on Your own behalf, and not on
211 | behalf of any Contributor. You must make it absolutely clear that any
212 | such warranty, support, indemnity, or liability obligation is offered by
213 | You alone, and You hereby agree to indemnify every Contributor for any
214 | liability incurred by such Contributor as a result of warranty, support,
215 | indemnity or liability terms You offer. You may include additional
216 | disclaimers of warranty and limitations of liability specific to any
217 | jurisdiction.
218 | 
219 | 4. Inability to Comply Due to Statute or Regulation
220 | ---------------------------------------------------
221 | 
222 | If it is impossible for You to comply with any of the terms of this
223 | License with respect to some or all of the Covered Software due to
224 | statute, judicial order, or regulation then You must: (a) comply with
225 | the terms of this License to the maximum extent possible; and (b)
226 | describe the limitations and the code they affect. Such description must
227 | be placed in a text file included with all distributions of the Covered
228 | Software under this License. Except to the extent prohibited by statute
229 | or regulation, such description must be sufficiently detailed for a
230 | recipient of ordinary skill to be able to understand it.
231 | 
232 | 5. Termination
233 | --------------
234 | 
235 | 5.1. The rights granted under this License will terminate automatically
236 | if You fail to comply with any of its terms. However, if You become
237 | compliant, then the rights granted under this License from a particular
238 | Contributor are reinstated (a) provisionally, unless and until such
239 | Contributor explicitly and finally terminates Your grants, and (b) on an
240 | ongoing basis, if such Contributor fails to notify You of the
241 | non-compliance by some reasonable means prior to 60 days after You have
242 | come back into compliance. Moreover, Your grants from a particular
243 | Contributor are reinstated on an ongoing basis if such Contributor
244 | notifies You of the non-compliance by some reasonable means, this is the
245 | first time You have received notice of non-compliance with this License
246 | from such Contributor, and You become compliant prior to 30 days after
247 | Your receipt of the notice.
248 | 
249 | 5.2. If You initiate litigation against any entity by asserting a patent
250 | infringement claim (excluding declaratory judgment actions,
251 | counter-claims, and cross-claims) alleging that a Contributor Version
252 | directly or indirectly infringes any patent, then the rights granted to
253 | You by any and all Contributors for the Covered Software under Section
254 | 2.1 of this License shall terminate.
255 | 
256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all
257 | end user license agreements (excluding distributors and resellers) which
258 | have been validly granted by You or Your distributors under this License
259 | prior to termination shall survive termination.
260 | 
261 | ************************************************************************
262 | *                                                                      *
263 | *  6. Disclaimer of Warranty                                           *
264 | *  -------------------------                                           *
265 | *                                                                      *
266 | *  Covered Software is provided under this License on an "as is"       *
267 | *  basis, without warranty of any kind, either expressed, implied, or  *
268 | *  statutory, including, without limitation, warranties that the       *
269 | *  Covered Software is free of defects, merchantable, fit for a        *
270 | *  particular purpose or non-infringing. The entire risk as to the     *
271 | *  quality and performance of the Covered Software is with You.        *
272 | *  Should any Covered Software prove defective in any respect, You     *
273 | *  (not any Contributor) assume the cost of any necessary servicing,   *
274 | *  repair, or correction. This disclaimer of warranty constitutes an   *
275 | *  essential part of this License. No use of any Covered Software is   *
276 | *  authorized under this License except under this disclaimer.         *
277 | *                                                                      *
278 | ************************************************************************
279 | 
280 | ************************************************************************
281 | *                                                                      *
282 | *  7. Limitation of Liability                                          *
283 | *  --------------------------                                          *
284 | *                                                                      *
285 | *  Under no circumstances and under no legal theory, whether tort      *
286 | *  (including negligence), contract, or otherwise, shall any           *
287 | *  Contributor, or anyone who distributes Covered Software as          *
288 | *  permitted above, be liable to You for any direct, indirect,         *
289 | *  special, incidental, or consequential damages of any character      *
290 | *  including, without limitation, damages for lost profits, loss of    *
291 | *  goodwill, work stoppage, computer failure or malfunction, or any    *
292 | *  and all other commercial damages or losses, even if such party      *
293 | *  shall have been informed of the possibility of such damages. This   *
294 | *  limitation of liability shall not apply to liability for death or   *
295 | *  personal injury resulting from such party's negligence to the       *
296 | *  extent applicable law prohibits such limitation. Some               *
297 | *  jurisdictions do not allow the exclusion or limitation of           *
298 | *  incidental or consequential damages, so this exclusion and          *
299 | *  limitation may not apply to You.                                    *
300 | *                                                                      *
301 | ************************************************************************
302 | 
303 | 8. Litigation
304 | -------------
305 | 
306 | Any litigation relating to this License may be brought only in the
307 | courts of a jurisdiction where the defendant maintains its principal
308 | place of business and such litigation shall be governed by laws of that
309 | jurisdiction, without reference to its conflict-of-law provisions.
310 | Nothing in this Section shall prevent a party's ability to bring
311 | cross-claims or counter-claims.
312 | 
313 | 9. Miscellaneous
314 | ----------------
315 | 
316 | This License represents the complete agreement concerning the subject
317 | matter hereof. If any provision of this License is held to be
318 | unenforceable, such provision shall be reformed only to the extent
319 | necessary to make it enforceable. Any law or regulation which provides
320 | that the language of a contract shall be construed against the drafter
321 | shall not be used to construe this License against a Contributor.
322 | 
323 | 10. Versions of the License
324 | ---------------------------
325 | 
326 | 10.1. New Versions
327 | 
328 | Mozilla Foundation is the license steward. Except as provided in Section
329 | 10.3, no one other than the license steward has the right to modify or
330 | publish new versions of this License. Each version will be given a
331 | distinguishing version number.
332 | 
333 | 10.2. Effect of New Versions
334 | 
335 | You may distribute the Covered Software under the terms of the version
336 | of the License under which You originally received the Covered Software,
337 | or under the terms of any subsequent version published by the license
338 | steward.
339 | 
340 | 10.3. Modified Versions
341 | 
342 | If you create software not governed by this License, and you want to
343 | create a new license for such software, you may create and use a
344 | modified version of this License if you rename the license and remove
345 | any references to the name of the license steward (except to note that
346 | such modified license differs from this License).
347 | 
348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary
349 | Licenses
350 | 
351 | If You choose to distribute Source Code Form that is Incompatible With
352 | Secondary Licenses under the terms of this version of the License, the
353 | notice described in Exhibit B of this License must be attached.
354 | 
355 | Exhibit A - Source Code Form License Notice
356 | -------------------------------------------
357 | 
358 |   This Source Code Form is subject to the terms of the Mozilla Public
359 |   License, v. 2.0. If a copy of the MPL was not distributed with this
360 |   file, You can obtain one at http://mozilla.org/MPL/2.0/.
361 | 
362 | If it is not possible or desirable to put the notice in a particular
363 | file, then You may include the notice in a location (such as a LICENSE
364 | file in a relevant directory) where a recipient would be likely to look
365 | for such a notice.
366 | 
367 | You may add additional accurate notices of copyright ownership.
368 | 
369 | Exhibit B - "Incompatible With Secondary Licenses" Notice
370 | ---------------------------------------------------------
371 | 
372 |   This Source Code Form is "Incompatible With Secondary Licenses", as
373 |   defined by the Mozilla Public License, v. 2.0.
374 | 


--------------------------------------------------------------------------------
/gke/README.md:
--------------------------------------------------------------------------------
  1 | # Deploy RabbitMQ on Kubernetes with the Kubernetes Peer Discovery Plugin to GKE
  2 | 
  3 | This is an **example** that demonstrates a RabbitMQ deployment on the Google Kubernetes Engine (GKE) with peer discovery
  4 | via `rabbitmq-peer-discovery-k8s` plugin. This example is meant to be more detailed compared to its Minikube and Kind
  5 | counterparts. We cover several key aspects of a manual RabbitMQ deployment on Kubernetes, such as
  6 | 
  7 |  * A [Kubernetes namespace](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/)
  8 |  * A [stateful set](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/) for RabbitMQ cluter nodes
  9 |  * Ensuring durable storage is used by [node data directories](https://www.rabbitmq.com/relocate.html)
 10 |  * A Kubernetes Secret for [initial RabbitMQ user credentials](https://www.rabbitmq.com/access-control.html#default-state)
 11 |  * A Kubernetes Secret for [inter-node and CLI tool authentication](https://www.rabbitmq.com/clustering.html#erlang-cookie)
 12 |  * A [headless service](https://kubernetes.io/docs/concepts/services-networking/service/#headless-services) for inter-node communication
 13 |  * Permissions for RabbitMQ node data directory and configuration file(s)
 14 |  * Node [configuration files](https://www.rabbitmq.com/configure.html#configuration-files)
 15 |  * [Pre-enabled plugin file](https://www.rabbitmq.com/plugins.html#enabled-plugins-file)
 16 |  * [Peer discovery](https://www.rabbitmq.com/cluster-formation.html) settings
 17 |  * Kubernetes [access control (RBAC)](https://kubernetes.io/docs/reference/access-authn-authz/rbac/) rules
 18 |  * [Liveness and readiness](https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-probes) probes
 19 |  * A [load balancer service](https://kubernetes.io/docs/concepts/services-networking/service/#loadbalancer) for external client connections
 20 |  * Resource limits (CPU, memory, disk, network bandwidth)
 21 | 
 22 | In this example, we will try to cover the key parts as well as mention a couple
 23 | more steps that are not technically required to run RabbitMQ on Kubernetes, but every
 24 | production system operator will have to worry about sooner rather than later:
 25 | 
 26 |  * How to set up cluster monitoring with Prometheus and Grafana
 27 |  * How to deploy a PerfTest instance to do basic functional and load testing of the cluster
 28 | 
 29 | This example by no means covers every aspect that may be relevant when deploying
 30 | RabbitMQ to Kubernetes; our goal is to highlight the most important parts.
 31 | Deployment- and workload-specific decisions such as what [resource limits](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/) to apply
 32 | to RabbitMQ node pod (containers), [what kind of durable storage](https://www.rabbitmq.com/blog/2020/06/18/cluster-sizing-case-study-quorum-queues-part-1/) to use,
 33 | how to approach TLS certificate/key pair rotation, log aggregation, and upgrades are out of scope of this example.
 34 | 
 35 | 
 36 | ## Production (Non-)Suitability
 37 | 
 38 | Some values in this example **may or may not be optimal for your deployment**. We encourage users
 39 | to get familiar with the [RabbitMQ Peer Discovery guide](https://www.rabbitmq.com/cluster-formation.html), [RabbitMQ Production Checklist](https://www.rabbitmq.com/production-checklist.html)
 40 | and the rest of [RabbitMQ documentation](https://www.rabbitmq.com/documentation.html) before going into production.
 41 | 
 42 | Having [metrics](https://www.rabbitmq.com/monitoring.html), both of RabbitMQ and applications that use it,
 43 | is critically important when making informed decisions about production systems.
 44 | 
 45 | These examples have been created as part of a blog post, and thus read like a blog post.
 46 | 
 47 | ## Pre-requisites
 48 | 
 49 | The example uses, targets or assumes:
 50 | 
 51 |  * A [GKE cluster](https://cloud.google.com/kubernetes-engine), version `v1.16.13-gke.1` was used at the time of writing.
 52 |  * The `kubectl` CLI tool. Version `v1.18.0` was used at the time of writing.
 53 |  * RabbitMQ [community Docker image](https://hub.docker.com/_/rabbitmq/)
 54 | 
 55 | ## Kubernetes Namespace and Permissions (RBAC)
 56 | 
 57 | Every set of Kubernetes objects belongs to a [Kubernetes Namespace](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/).
 58 | RabbitMQ cluster resources are no exception.
 59 | 
 60 | We recommend using a dedicated Namespace to keep the RabbitMQ cluster separate from other services that may be deployed
 61 | in the Kubernetes cluster.
 62 | Having a dedicated namespace makes logical sense but also allows the specification of
 63 | fine-grained [role-based access rules](https://kubernetes.io/docs/reference/access-authn-authz/rbac/).
 64 | 
 65 | RabbitMQ's Kubernetes peer discovery plugin relies on the Kubernetes API as a data source. On first boot, every nodes
 66 | will try to discover their peer pods using the API and attempt to join them. When a node comes online, it also
 67 | emits a [Kubernetes event](https://kubernetes.io/docs/tasks/debug-application-cluster/debug-application-introspection/).
 68 | 
 69 | The plugin requires the following access to Kubernetes resources:
 70 | 
 71 |  * `get` access to the `endpoints` resource
 72 |  * `create` access to the `events` resource
 73 | 
 74 | Specify a [Role, Role Binding and a Service Account](https://kubernetes.io/docs/reference/access-authn-authz/rbac/)
 75 | to configure this access.
 76 | 
 77 | An example namespace, along with RBAC rules can be seen in the [rbac.yaml exanple file](https://github.com/rabbitmq/diy-kubernetes-examples/blob/master/minikube/rbac.yaml).
 78 | 
 79 | If following from the example, use the following command to create a namespace and the required RBAC rules.
 80 | Note that this creates a namespace called `test-rabbitmq`.
 81 | 
 82 | ```shell
 83 | kubectl apply -f namespace.yaml
 84 | kubectl apply -f rbac.yaml
 85 | ```
 86 | 
 87 | The `kubectl`  examples below will use the `test-rabbitmq` namespace. This namespace can be set to be the default
 88 | one for convenience:
 89 | 
 90 | ```shell
 91 | # set the namespace to be the current (default) one
 92 | kubectl config set-context --current --namespace=test-rabbitmq
 93 | # verify
 94 | kubectl config view --minify | grep namespace:
 95 | ```
 96 | 
 97 | Alternatively, `--namespace="test-rabbitmq"` can be appended to all `kubectl` commands
 98 | demonstrated below.
 99 | 
100 | 
101 | ## Use a Stateful Set
102 | 
103 | RabbitMQ *requires* using a [Stateful Set](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/) to deploy a RabbitMQ cluster to Kubernetes.
104 | The Stateful Set ensures that the RabbitMQ nodes are deployed one at a time, which avoids running into a potential [peer discovery race condition](https://www.rabbitmq.com/cluster-formation.html#initial-formation-race-condition) when deploying a multi-node RabbitMQ cluster.
105 | 
106 | The Stateful Set definition file is packed with detail such as mounting configuration, mounting credentials, openening ports, etc,
107 | which is explained topic-wise in the following sections.
108 | 
109 | The final Stateful Set file can be found in the [under `./gke`](https://github.com/rabbitmq/diy-kubernetes-examples/blob/master/gke/statefulset.yaml).
110 | 
111 | Do not deploy the Stateful Set yet. We will need to create a few other Kubernetes resources
112 | before we can tie everything together in the Stateful Set.
113 | 
114 | ## Create a Service For Clustering and CLI Tools
115 | 
116 | The Stateful Set definition can reference a Service which gives the Pods of the Stateful Set their network identity. Here, we are referring to the [`v1.StatefulSet.Spec.serviceName` property](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#statefulsetspec-v1-apps).
117 | 
118 | This is required by RabbitMQ for clustering, and as mentioned in the Kubernetes documentation, has to be created before the Stateful Set.
119 | 
120 | RabbitMQ uses port 4369 for inter-node communication, and since this Service is used internally and does not need to be exposed, we create a [Headless Service](https://kubernetes.io/docs/concepts/services-networking/service/#headless-services).
121 | 
122 | [Example Headless Service](https://github.com/rabbitmq/diy-kubernetes-examples/blob/master/gke/headless-service.yaml).
123 | 
124 | If following from the example, run the following to create a Headless Service:
125 | 
126 | ```
127 | kubectl apply -f rabbitmq-headless.yaml
128 | ```
129 | 
130 | The service now can be observed in the `test-rabbitmq` namespace:
131 | 
132 | ```shell
133 | kubectl get all
134 | # => NAME                        TYPE        CLUSTER-IP   EXTERNAL-IP   PORT(S)    AGE
135 | # => service/rabbitmq-headless   ClusterIP   None         <none>        4369/TCP   7s
136 | ```
137 | 
138 | ## Use a Persistent Volume for Node Data
139 | 
140 | In order for RabbitMQ nodes to retain data between Pod restarts, node's data directory must use durable storage.
141 | A [Persistent Volume](https://kubernetes.io/docs/concepts/storage/persistent-volumes/) can be attached to each RabbitMQ Pod.
142 | Although some data can be synced from other nodes, transient volumes defy most benefits of clustering.
143 | 
144 | In our [statefulset.yaml example](https://github.com/rabbitmq/diy-kubernetes-examples/blob/master/gke/statefulset.yaml#L12-L22),
145 | we create a Persistent Volume Claim to provision a Persistent Volume.
146 | 
147 | The Persistent Volume is mounted at `/var/lib/rabbitmq/mnesia`. This path is used for a [`RABBITMQ_MNESIA_BASE` location](https://www.rabbitmq.com/relocate.html): the base directory
148 | for all persistent data of a node.
149 | 
150 | A description of [default file paths for RabbitMQ](https://www.rabbitmq.com/relocate.html) can be found in the RabbitMQ documentation.
151 | 
152 | Node's data directory base can be changed using the `RABBITMQ_MNESIA_BASE` variable if needed. Make sure
153 | to mount a Persistent Volume at the updated path.
154 | 
155 | 
156 | ## Node Authentication Secret: the Erlang Cookie
157 | 
158 | RabbitMQ nodes and CLI tools use a shared secret known as [the Erlang Cookie](https://www.rabbitmq.com/clustering.html#erlang-cookie), to authenticate to each other.
159 | The cookie value is a string of alphanumeric characters up to 255 characters in size. The value must be generated before creating
160 | a RabbitMQ cluster since it is needed by the nodes to [form a cluster](https://github.com/rabbitmq/diy-kubernetes-examples/blob/master/gke/statefulset.yaml#L72-L75).
161 | 
162 | With the community Docker image, RabbitMQ nodes will expect the cookie to be at `/var/lib/rabbitmq/.erlang.cookie`.
163 | We recommend creating a Secret and mounting it as a Volume on the Pods at this path.
164 | 
165 | This is demonstrated in the [statefulset.yaml example](https://github.com/rabbitmq/diy-kubernetes-examples/blob/master/gke/statefulset.yaml#L101-L105) file.
166 | 
167 | The secret is expected to have the following key/value pair:
168 | 
169 | ```
170 | cookie: {value}
171 | ```
172 | 
173 | To create a cookie Secret, run
174 | 
175 | ```shell
176 | echo -n "this secret value is JUST AN EXAMPLE. Replace it!" > cookie
177 | kubectl create secret generic erlang-cookie --from-file=./cookie
178 | ```
179 | 
180 | This will create a Secret with a single key, `cookie`, taken from the file name,
181 | and the file contents as its value.
182 | 
183 | 
184 | ## Administrator Credentials
185 | 
186 | RabbitMQ will seed a [default user](https://www.rabbitmq.com/access-control.html#default-state) with well-known credentials on first boot.
187 | The username and password of this user are both `guest`.
188 | 
189 | This default user can [only connect from localhost](https://www.rabbitmq.com/access-control.html#loopback-users) by default.
190 | It is possible to lift this restriction by opting in. This may be useful for testing but **very insecure**.
191 | Instead, an administrative user must be created using generated credentials.
192 | 
193 | The administrative user credentials should be stored in a [Kubernetes Secret](https://kubernetes.io/docs/concepts/configuration/secret/),
194 | and mounting them onto the RabbitMQ Pods.
195 | The `RABBITMQ_DEFAULT_USER` and `RABBITMQ_DEFAULT_PASS` environment variables then can be set to the Secret values.
196 | The community Docker image will use them to [override default user credentials](https://www.rabbitmq.com/access-control.html#seeding).
197 | 
198 | [Example for reference](https://github.com/rabbitmq/diy-kubernetes-examples/blob/master/gke/statefulset.yaml#L91-L100).
199 | 
200 | The secret is expected to have the following key/value pair:
201 | 
202 | ```
203 | user: {username}
204 | pass: {password}
205 | ```
206 | 
207 | To create an administrative user Secret, use
208 | 
209 | ```shell
210 | # this is merely an example, you are welcome to use a different username
211 | echo -n "administrator" > user
212 | # this is merely an example, you MUST use a different, generated password value!
213 | echo -n "g3N3rAtED-Pa$$w0rd" > pass
214 | kubectl create secret generic rabbitmq-admin --from-file=./user --from-file=./pass
215 | ```
216 | 
217 | This will create a Secret with two keys, `user` and `pass`, taken from the file names,
218 | and file contents as their respective values.
219 | 
220 | Users can be create explicitly using CLI tools as well.
221 | See [RabbitMQ doc section on user management](https://www.rabbitmq.com/access-control.html#seeding) to learn more.
222 | 
223 | 
224 | 
225 | ## Node Configuration
226 | 
227 | There are [several ways](https://www.rabbitmq.com/configure.html) to configure a RabbitMQ node. The recommended way is to use configuration files.
228 | 
229 | Configuration files can be expressed as [Config Maps](https://kubernetes.io/docs/concepts/configuration/configmap/),
230 | and mounted as a Volume onto the RabbitMQ pods.
231 | 
232 | To create a Config Map with RabbitMQ configuration, apply our [minimal configmap.yaml example](https://github.com/rabbitmq/diy-kubernetes-examples/blob/master/gke/configmap.yaml):
233 | 
234 | ```shell
235 | kubectl apply -f configmap.yaml
236 | ```
237 | 
238 | ### Use an Init Container
239 | 
240 | Since Kubernetes 1.9.4, Config Maps are mounted as read-only volumes onto Pods. This is problematic for the RabbitMQ community Docker image:
241 | the image can try to update the config file on container start up.
242 | 
243 | Thus, the path at which the RabbitMQ config is mounted must be read-write. If a read-only file is detected by the Docker image,
244 | you'll see the following warning:
245 | 
246 | ```
247 | touch: cannot touch '/etc/rabbitmq/rabbitmq.conf': Permission denied
248 | 
249 | WARNING: '/etc/rabbitmq/rabbitmq.conf' is not writable, but environment variables have been provided which request that we write to it
250 |   We have copied it to '/tmp/rabbitmq.conf' so it can be amended to work around the problem, but it is recommended that the read-only
251 |   source file should be modified and the environment variables removed instead.
252 | ```
253 | 
254 | While the Docker image does work around the issue, it is not ideal to store the configuration file in `/tmp` and we recommend instead
255 | making the mount path read-write.
256 | 
257 | As a few other projects in the Kubernetes community, we use an [init container](https://kubernetes.io/docs/concepts/workloads/pods/init-containers/) to overcome this.
258 | 
259 | Examples:
260 | 
261 | * [The Config Map](https://github.com/rabbitmq/diy-kubernetes-examples/blob/master/minikube/configmap.yaml)
262 | * [Using an Init Container to mount the Config Map](https://github.com/rabbitmq/diy-kubernetes-examples/blob/master/gke/statefulset.yml#L30-L64)
263 | 
264 | ### Run The Pod As the `rabbitmq` User
265 | 
266 | The Docker image [runs as the `rabbitmq` user with uid 999]([https://github.com/docker-library/rabbitmq/blob/38bc089c287d05d22b03a4d619f7ad9d9a4501bc/3.8/ubuntu/Dockerfile#L186-L187](https://github.com/docker-library/rabbitmq/blob/38bc089c287d05d22b03a4d619f7ad9d9a4501bc/3.8/ubuntu/Dockerfile#L186-L187)) and writes to the `rabbitmq.conf` file.
267 | Thus, the file permissions on `rabbitmq.conf` must allow this. A [Pod Security Context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/) can be
268 | added to the Stateful Set definition to achieve this.
269 | Set the [`runAsUser`, `runAsGroup` and the `fsGroup`](https://github.com/rabbitmq/diy-kubernetes-examples/blob/master/gke/statefulset.yaml#L66-L75) to 999 in the Security Context.
270 | 
271 | See [Security Context](https://github.com/rabbitmq/diy-kubernetes-examples/blob/master/gke/statefulset.yaml#L72-L75)
272 | in the Stateful Set definition file.
273 | 
274 | ### Importing Definitions
275 | 
276 | RabbitMQ nodes can [importi definitions](https://www.rabbitmq.com/definitions.html) exported from another RabbitMQ cluster.
277 | This may also be done at [node boot time](https://www.rabbitmq.com/definitions.html#import-on-boot).
278 | 
279 | Following from the RabbitMQ documentation, this can be done using the following steps:
280 | 
281 | 1. Export definitions from the RabbitMQ cluster you wish to replicate and save the file
282 | 1. Create a Config Map with the key being the file name, and the value being the contents of the file (See the `rabbitmq.conf` Config Map [example]())
283 | 1. Mount the Config Map as a Volume on the RabbitMQ Pod in the Stateful Set definition
284 | 1. Update the `rabbitmq.conf` Config Map with `load_definitions = /path/to/definitions/file`
285 | 
286 | 
287 | ## Readiness Probe
288 | 
289 | Kubernetes uses a check known as the [readiness probe](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/) to determine if a pod is ready to serve client traffic.
290 | This is effectively a specialized [health check](https://www.rabbitmq.com/monitoring.html#health-checks) defined
291 | by the system operator.
292 | 
293 | When an [ordered pod deployment policy](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#pod-management-policies) is used — and this is the commended option for RabbitMQ clusters —
294 | the probe controls when the Kubernetes controller will consider the currently deployed pod to be ready
295 | and proceed to deploy the next one. This check, if not chosen appropriately, can deadlock a rolling
296 | cluster node restart.
297 | 
298 | RabbitMQ nodes that belong to a clsuter will [attempt to sync schema from their peers on startup](https://www.rabbitmq.com/clustering.html#restarting-schema-sync). If no peer comes online within a configurable time window (five minutes by default),
299 | the node will give up and voluntarily stop. Before the sync is complete, the node won't mark itself as fully booted.
300 | 
301 | Therefore, if a readiness probe assumes that a node is fully booted and running,
302 | **a rolling restart of RabbitMQ node pods using such probe will deadlock**: the probe will never succeed,
303 | and will never proceed to deploy the next pod, which must come online for the original pod to be considered
304 | ready by the deployment.
305 | 
306 | It is therefore recommended to use a very basic RabbitMQ health check for readiness probe:
307 | 
308 | ``` shell
309 | rabbitmq-diagnostics ping
310 | ```
311 | 
312 | While this check is not thorough, it allows all pods to be started and re-join the cluster within a certain time period,
313 | even when pods are restarted one by one, in order.
314 | 
315 | This is covered in a dedicated section of the RabbitMQ clustering guide: [Restarts and Health Checks (Readiness Probes)](https://www.rabbitmq.com/clustering.html#restarting-readiness-probes).
316 | 
317 | The [readiness probe section](https://github.com/rabbitmq/diy-kubernetes-examples/blob/master/gke/statefulset.yaml#L132-L143)
318 | in the Stateful Set definition file demonstrates how to configure a readiness probe.
319 | 
320 | 
321 | ## Liveness Probe
322 | 
323 | Similarly to the readiness probe described above, Kubernetes allows for pod health checks using a different health check
324 | called the [liveness probe](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/).
325 | The check determines if a pod must be restarted.
326 | 
327 | As with all [health checks](https://www.rabbitmq.com/monitoring.html#health-checks), there is no single solution that can be
328 | recommended for all deployments. Health checks can produce false positives, which means reasonably healthy, operational nodes
329 | will be restarted or even destroyed and re-created for no reason, reducing system availability.
330 | 
331 | Moreover, a RabbitMQ node restart won't necessarily address the issue. For example, restarting a node
332 | that is in an [alarmed state](https://www.rabbitmq.com/alarms.html) because it is low on available disk space won't help.
333 | 
334 | All this is to say that **liveness probes must be chosen wisely** and with false positives and "recoverability by a restart"
335 | taken into account. Liveness probes also must [use node-local health checks instead of cluster-wide ones](https://www.rabbitmq.com/monitoring.html#health-checks).
336 | 
337 | RabbitMQ CLI tools provie a number of [pre-defined health checks](https://www.rabbitmq.com/monitoring.html#health-checks) that
338 | vary in how thorough they are, how intrusive they are and how likely they are to produce false positives in different
339 | scenarios, e.g. when the system is under load. The checks are composable and can be combined.
340 | The right liveness probe choice is a system-specific decision. When in doubt, start with a simpler, less intrusive
341 | and less thorough option such as
342 | 
343 | ``` shell
344 | rabbitmq-diagnostics -q ping
345 | ```
346 | 
347 | The following checks can be reasonable liveness probe candidates:
348 | 
349 | ``` shell
350 | rabbitmq-diagnostics -q check_port_connectivity
351 | ```
352 | 
353 | ``` shell
354 | rabbitmq-diagnostics -q check_local_alarms
355 | ```
356 | 
357 | Note, however, that they will fail for the nodes [paused by the "pause minority" partition handliner strategy](https://www.rabbitmq.com/partitions.html).
358 | 
359 | The [liveness probe section](https://github.com/rabbitmq/diy-kubernetes-examples/blob/master/gke/statefulset.yaml#L119-L131)
360 | in the Stateful Set definition file demonstrates how to configure a liveness probe.
361 | 
362 | 
363 | ## Plugins
364 | 
365 | RabbitMQ [supports plugins](https://www.rabbitmq.com/plugins.html). Some plugins are essential when running RabbitMQ on Kubernetes,
366 | e.g. the Kubernetes-specific peer discovery implementation.
367 | 
368 | The [`rabbitmq_peer_discovery_k8s` plugin](https://github.com/rabbitmq/diy-kubernetes-examples) is required
369 | to deploy RabbitMQ on Kubernetes.
370 | It is quite common to also enable [`rabbitmq_management` plugin](https://www.rabbitmq.com/management.html) in order to get a browser-based management UI
371 | and an HTTP API, and [`rabbitmq_prometheus`](https://www.rabbitmq.com/prometheus.html) for monitoring.
372 | 
373 | Plugins can be enabled in [different ways](https://www.rabbitmq.com/plugins.html#ways-to-enable-plugins).
374 | We recommend mounting the plugins file, `enabled_plugins`, to the node configuration directory, `/etc/rabbitmq`.
375 | A Config Map can be used to express the value of the `enabled_plugins` file. It can then be mounted
376 | as a Volume onto each RabbitMQ container in the Stateful Set definition.
377 | 
378 | In our [configmap.yaml example](https://github.com/rabbitmq/diy-kubernetes-examples/blob/master/gke/configmap.yaml) file,
379 | we demonstrate how to popular the the `enabled_plugins` file and mount it under the `/etc/rabbitmq` directory.
380 | 
381 | 
382 | ## Ports
383 | 
384 | The final consideration for the Stateful Set is the ports to open on the RabbitMQ Pods.
385 | Protocols supported by RabbitMQ are all TCP-based and require the [protocol ports](https://www.rabbitmq.com/networking.html#ports) to be opened on the RabbitMQ nodes.
386 | Depending on the plugins that are enabled on a node, the list of required ports can vary.
387 | 
388 | The example `enabled_plugins` file mentioned above enables a few plugins: `rabbitmq_peer_discovery_k8s` (mandatory), `rabbitmq_management`
389 | and `rabbitmq_prometheus`.
390 | Therefore, the service must [open several ports](https://github.com/rabbitmq/diy-kubernetes-examples/blob/master/gke/statefulset.yaml#L106-L118) relevant for the core server and the enabled plugins:
391 | 
392 |  * `5672`: used by AMQP 0-9-1 and AMQP 1.0 clients
393 |  * `15672`: management UI and  HTTP API)
394 |  * `15692`: Prometheus scraping endpoint)
395 | 
396 | 
397 | ## Deploy the Stateful Set
398 | 
399 | These are the key components in the Stateful Set file. Please have a look [at the file](https://github.com/rabbitmq/diy-kubernetes-examples/blob/master/gke/statefulset.yaml),
400 | and if following from the example, deploy the Stateful Set:
401 | 
402 | ```shell
403 | kubectl apply -f statefulset.yaml
404 | ```
405 | 
406 | This will start spinning up a RabbitMQ cluster. To watch the progress:
407 | 
408 | ```shell
409 | watch kubectl get all
410 | # => NAME             READY   STATUS    RESTARTS   AGE
411 | # => pod/rabbitmq-0   0/1     Pending   0          8s
412 | # =>
413 | # => NAME                        TYPE        CLUSTER-IP   EXTERNAL-IP   PORT(S)    AGE
414 | # => service/rabbitmq-headless   ClusterIP   None         <none>        4369/TCP   61m
415 | # =>
416 | # => NAME                        READY   AGE
417 | # => statefulset.apps/rabbitmq   0/1     8s
418 | ```
419 | 
420 | ## Create a Service for Client Connections
421 | 
422 | If all the steps above succeeded, you should have functioning RabbitMQ cluster deployed on Kubernetes! 🥳
423 | However, having a RabbitMQ cluster on Kubernetes is only useful clients can [connect](https://www.rabbitmq.com/connections.html) to it.
424 | 
425 | Time to create a Service to make the cluster accessible to [client connections](https://www.rabbitmq.com/connections.html).
426 | 
427 | The type of the Service depends on your use case. The [Kubernetes API reference](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#servicespec-v1-core)
428 | gives a good overview of the types of Services available.
429 | 
430 | In our [client-service.yaml example](https://github.com/rabbitmq/diy-kubernetes-examples/blob/master/gke/client-service.yaml), we have gone with a `LoadBalancer` Service.
431 | This gives us an external IP that can be used to access the RabbitMQ cluter.
432 | 
433 | For example, this should make it possible to visit the RabbitMQ management UI by visiting `{external-ip}:15672`, and signing in.
434 | Client applications can connect to endpoints such as `{external-ip}:5672` (AMQP 0-9-1, AMQP 1.0) or `{external-ip}:1883` (MQTT).
435 | Please refer to the [get started guide](https://www.rabbitmq.com/getstarted.html) to learn how to use RabbitMQ.
436 | 
437 | If following from the example, run
438 | 
439 | ``` shell
440 | kubectl apply -f client-service.yaml
441 | ```
442 | 
443 | to create a Service of type LoadBalancer with an external IP address. To find out what the external IP address is,
444 | use `kubectl get svc`:
445 | 
446 | ``` shell
447 | kubectl get svc
448 | # => NAME                        TYPE           CLUSTER-IP     EXTERNAL-IP      PORT(S)                                          AGE
449 | # => service/rabbitmq-client     LoadBalancer   10.59.243.60   34.105.135.116   15672:30902/TCP,15692:30605/TCP,5672:31210/TCP   2m19s
450 | ```
451 | 
452 | ## Resource Usage and Limits
453 | 
454 | [Container resource management](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/) is a topic that deserves
455 | its own post. [Capacity planning](https://www.rabbitmq.com/blog/tag/capacity-planning/) recommendations are entirely workload-,
456 | environment- and system-specific. Optimal values are usually found via extensive [monitoring](https://www.rabbitmq.com/monitoring.html) of the system, trial, and error.
457 | However, when picking the limits and resource allocation settings, consider a few RabbitMQ-specific things.
458 | 
459 | ### Use the Latest Major Erlang Release
460 | 
461 | RabbitMQ runs on the Erlang runtime. Recent Erlang/OTP releases have introduced a number of improvements highly relevant to
462 | the users who run RabbitMQ on Kubernetes:
463 | 
464 |  * In Erlang 22, inter-node communication [latency and head-of-line blocking(http://blog.erlang.org/OTP-22-Highlights/) have been
465 |    significantly reduced. In earlier versions, link congestion was known to make [cluster node heartbeat](https://www.rabbitmq.com/nettick.html) false
466 |    positives likely.
467 |  * In Erlang 23, the runtime will [respect the container CPU quotas](http://blog.erlang.org/OTP-23-Highlights/) when computing the default
468 |    number of schedulers to start. This means that nodes will respect the Kubernetes-managed CPU resource limits.
469 | 
470 | Docker community image for RabbitMQ ships with Erlang 23 at the time of writing. Users of custom Docker images are highly recommended
471 | to provision Erlang 23 as well.
472 | 
473 | ### CPU Resource Usage
474 | 
475 | RabbitMQ was designed for workloads that involve [multiple queues](https://www.rabbitmq.com/queues.html#runtime-characteristics) and where
476 | a node serves multiple clients at the same time. Nodes will generally use all the [CPU cores allowed](https://www.rabbitmq.com/runtime.html)
477 | without any explicit configuration. As the number of cores grows, some tuning may be necessary to reduce [CPU context switching](https://www.rabbitmq.com/runtime.html#scheduling).
478 | 
479 | How CPU time is spent can be monitored via the [runtime thread activity metrics](https://www.rabbitmq.com/runtime.html#thread-stats) which
480 | are also exposed via the [RabbitMQ Prometheus plugin](https://www.rabbitmq.com/prometheus.html).
481 | 
482 | If RabbitMQ pods hover around their CPU resource allowance and experience throttling in environments with a large number of
483 | relatively idle clients, the load likely can be [reduced with a modest amount of configuration](https://www.rabbitmq.com/runtime.html#cpu-reduce-idle-usage).
484 | 
485 | ### Memory Limits
486 | 
487 | RabbitMQ uses the concept of a [runtime memory high watermark](https://www.rabbitmq.com/memory.html). By default a node will use 40% of detected
488 | (available) memory as the watermark. When the watermark is crossed, publishers across the entire cluster will be blocked
489 | and more aggressive paging out to disk initiated. The watermark value may seem like a memory quota on Kubernetes at first
490 | but there is an important difference: RabbitMQ resource alarms assume a node can typically recover from this state. For example,
491 | a large backlog of messages will eventually be consumed.
492 | 
493 | Kubernetes memory limits are [enforced by the OOM killer](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#requests-and-limits):
494 | no recovery is expected. This means that a RabbitMQ node's high memory watermark **must be lower** than the memory limit
495 | imposed on the node container. Kubernetes deployments should use the relative watermark values in the [recommended range](https://www.rabbitmq.com/production-checklist.html#resource-limits-ram).
496 | 
497 | [Memory usage breakdown data](https://www.rabbitmq.com/memory-use.html) should be used to determine what consumes most memory on the node.
498 | 
499 | ### Disk Usage
500 | 
501 | We highly recommend overprovisioning the [disk space available to RabbitMQ containers](https://www.rabbitmq.com/production-checklist.html#resource-limits-disk-space).
502 | A node that has run out of disk space won't always be able to recover from such an event. Such nodes must be
503 | decomissioned and replaced.
504 | 
505 | ### Consider Available Network Link Bandwidth
506 | 
507 | Finally, consider what kind of links and Kubernetes networking options are used for inter-node communication. Network link congestion
508 | can be a significant limiting factor to system throughput and affect its availability.
509 | 
510 | Below is a very simplistic formula to calculate the amount of bandwidth needed by a workload, in bits:
511 | 
512 | ```
513 | # peak message rate * bits per message * 110% to account for metadata and protocol framing
514 | PeakMessageRate * AverageMessagePayloadSizeInBytes * 8 * 1.1
515 | ```
516 | 
517 | Therefore a workload with average message size of 3 kiB and expected peak message rate
518 | of 20K messages a second can consume up to
519 | 
520 | ```
521 | 3 kiB * 20000/second * 8 * 1.1 = 528 megabits/second
522 | ```
523 | 
524 | of bandwidth.
525 | 
526 | Team RabbitMQ maintains a [Grafana dashboard](https://www.rabbitmq.com/prometheus.html#other-dashboards) for inter-node communication
527 | link metrics.
528 | 
529 | 
530 | ## Using `rabbitmq-perf-test` to Run a Functional and Load Test of the Cluster
531 | 
532 | RabbitMQ comes with a load simulation tool, [PerfTest](https://rabbitmq.github.io/rabbitmq-perf-test/stable/htmlsingle/), which can be executed from outside of a cluster or
533 | deployed to Kubernetes using the `perf-test` public [docker image](https://hub.docker.com/r/pivotalrabbitmq/perf-test/). Here's an example of how
534 | the image can be deployed to a Kubernetes cluster
535 | 
536 | ```shell
537 | kubectl run perf-test --image=pivotalrabbitmq/perf-test -- --uri amqp://{username}:{password}@{service}
538 | ```
539 | 
540 | Here the `{username}` and `{password}` are the user credentials, e.g. those set up in the `rabbitmq-admin` Secret.
541 | The `{serivce}` is the hostname to connect to. We use the name of the client service that will resolve as a hostname when deployed.
542 | 
543 | The above `kubectl run` command will start a PerfTest pod which can be observed in
544 | 
545 | ``` shell
546 | kubectl get pods
547 | ```
548 | 
549 | For a functioning RabbitMQ cluster, running `kubectl logs -f {perf-test-pod-name}` where `{perf-test-pod-name}`
550 | is the name of the pod as reported by `kubectl get pods`,  will produce output similar to this:
551 | 
552 | ```
553 | id: test-110102-976, time: 263.100s, sent: 21098 msg/s, received: 21425 msg/s, min/median/75th/95th/99th consumer latency: 1481452/1600817/1636996/1674410/1682972 μs
554 | id: test-110102-976, time: 264.100s, sent: 17314 msg/s, received: 17856 msg/s, min/median/75th/95th/99th consumer latency: 1509657/1600942/1636253/1695525/1718537 μs
555 | id: test-110102-976, time: 265.100s, sent: 18597 msg/s, received: 17707 msg/s, min/median/75th/95th/99th consumer latency: 1573151/1716519/1756060/1813985/1846490 μs
556 | ```
557 | 
558 | To learn more about PerfTest, its settings, capabilities and output, see the [PerfTest doc guide](https://rabbitmq.github.io/rabbitmq-perf-test/stable/htmlsingle/).
559 | 
560 | PerfTest is not meant to be running permanently. To tear down the `perf-test` pod, use
561 | 
562 | ```shell
563 | kubectl delete pod perf-test
564 | ```
565 | 
566 | ## Monitoring the Cluster
567 | 
568 | [Monitoring](https://www.rabbitmq.com/monitoring.html) is a critically important part of any production deployment.
569 | 
570 | RabbitMQ comes with [in-built support for Prometheus](https://www.rabbitmq.com/prometheus.html). To enable it, enable the `rabbitmq_prometheus` plugin.
571 | This in turn can be done by adding `rabbitmq_promethus` to the `enabled_plugins` Config Map as explained above.
572 | 
573 | The Prometheus scraping port, 15972, must be open on both the Pod and the client Service.
574 | 
575 | Node and cluster metrics can be [visualised with Grafana](https://www.rabbitmq.com/prometheus.html).
576 | 
577 | 
578 | ## Alternative Option: the Kubernetes Cluster Operator for RabbitMQ
579 | 
580 | As this post demonstrates, there are quite a few parts involved in hosting a stateful data services
581 | such as RabbitMQ on Kubernetes. It may seem like a daunting task.
582 | There are several alternatives to this kind of DIY deployment demonstrated in this post.
583 | 
584 | Team RabbitMQ at VMware has open sourced a [Kubernetes Operator pattern](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/)
585 | implementation for RabbitMQ. As of August 2020, this is a young project under active development.
586 | While it currently has limitations, it is our recommended option over the manual DIY setup
587 | demonstrated in this post.
588 | 
589 | See [RabbitMQ Cluster Operator for Kubernetes ](https://www.rabbitmq.com/kubernetes/operator/operator-overview.html) to learn more.
590 | The project is developed in the open at [rabbitmq/cluster-operator on GitHub](). Give it a try and let us know how it goes.
591 | Besides GitHub, two great venues for providing feedback to the team behind the Operator are the [RabbitMQ mailing list](https://groups.google.com/forum/#!forum/rabbitmq-users)
592 | and the [`#kubernetes channel in RabbitMQ community Slack`](https://rabbitmq-slack.herokuapp.com/).
593 | 


--------------------------------------------------------------------------------