├── .github
    ├── FUNDING.yml
    └── workflows
    │   ├── ci.yml
    │   └── stale.yml
├── .gitignore
├── .yamllint
├── LICENSE
├── README.md
├── ansible.cfg
├── benchmarks
    ├── README.md
    ├── disk-benchmark.sh
    ├── drupal-benchmark.sh
    └── stress.yml
├── ceph
    ├── README.md
    └── main.yml
├── example.config.yml
├── example.hosts.ini
├── images
    ├── deskpi-super6c-running.jpg
    └── turing-pi-2-hero.jpg
├── main.yml
├── networking.yml
├── requirements.yml
├── tasks
    ├── kubernetes
    │   ├── drupal.yml
    │   ├── helm.yml
    │   ├── nfs.yml
    │   └── prometheus.yml
    ├── networking
    │   ├── reverse-tunnel.yml
    │   ├── router.yml
    │   ├── static-networking.yml
    │   └── ubuntu-prep.yml
    └── storage
    │   ├── filesystem.yml
    │   └── zfs.yml
├── templates
    ├── drupal.yml
    ├── exports.j2
    └── mariadb.yml
└── upgrade.yml


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 | ---
3 | github: geerlingguy
4 | patreon: geerlingguy
5 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: CI
 3 | 'on':
 4 |   pull_request:
 5 |   push:
 6 |     branches:
 7 |       - master
 8 | 
 9 | jobs:
10 | 
11 |   lint:
12 |     name: Lint
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |       - name: Check out the codebase.
17 |         uses: actions/checkout@v2
18 | 
19 |       - name: Set up Python 3.
20 |         uses: actions/setup-python@v2
21 |         with:
22 |           python-version: '3.x'
23 | 
24 |       - name: Install test dependencies.
25 |         run: pip3 install yamllint
26 | 
27 |       - name: Lint all the YAMLs.
28 |         run: yamllint .
29 | 


--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Close inactive issues
 3 | 'on':
 4 |   schedule:
 5 |     - cron: "55 15 * * 4"  # semi-random time
 6 | 
 7 | jobs:
 8 |   close-issues:
 9 |     runs-on: ubuntu-latest
10 |     permissions:
11 |       issues: write
12 |       pull-requests: write
13 |     steps:
14 |       - uses: actions/stale@v8
15 |         with:
16 |           days-before-stale: 120
17 |           days-before-close: 60
18 |           exempt-issue-labels: bug,pinned,security,planned
19 |           exempt-pr-labels: bug,pinned,security,planned
20 |           stale-issue-label: "stale"
21 |           stale-pr-label: "stale"
22 |           stale-issue-message: |
23 |             This issue has been marked 'stale' due to lack of recent activity. If there is no further activity, the issue will be closed in another 30 days. Thank you for your contribution!
24 |             
25 |             Please read [this blog post](https://www.jeffgeerling.com/blog/2020/enabling-stale-issue-bot-on-my-github-repositories) to see the reasons why I mark issues as stale.
26 |           close-issue-message: |
27 |             This issue has been closed due to inactivity. If you feel this is in error, please reopen the issue or file a new issue with the relevant details.
28 |           stale-pr-message: |
29 |             This pr has been marked 'stale' due to lack of recent activity. If there is no further activity, the issue will be closed in another 30 days. Thank you for your contribution!
30 |             
31 |             Please read [this blog post](https://www.jeffgeerling.com/blog/2020/enabling-stale-issue-bot-on-my-github-repositories) to see the reasons why I mark issues as stale.
32 |           close-pr-message: |
33 |             This pr has been closed due to inactivity. If you feel this is in error, please reopen the issue or file a new issue with the relevant details.
34 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
35 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | hosts.ini
2 | hosts-*
3 | config.yml
4 | config-*
5 | ansible_collections
6 | roles/geerlingguy.*
7 | 


--------------------------------------------------------------------------------
/.yamllint:
--------------------------------------------------------------------------------
 1 | ---
 2 | extends: default
 3 | rules:
 4 |   line-length:
 5 |     max: 140
 6 |     level: warning
 7 |   truthy: false
 8 | 
 9 | ignore: |
10 |   **/.github/workflows/ci.yml
11 |   **/stale.yml
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Jeff Geerling
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Raspberry Pi Cluster
  2 | 
  3 | [![CI](https://github.com/geerlingguy/pi-cluster/actions/workflows/ci.yml/badge.svg)](https://github.com/geerlingguy/pi-cluster/actions/workflows/ci.yml)
  4 | 
  5 | <p align="center"><a href="https://www.youtube.com/watch?v=kgVz4-SEhbE"><img src="images/turing-pi-2-hero.jpg?raw=true" width="500" height="auto" alt="Turing Pi 2 - Raspberry Pi Compute Module Cluster" /></a></p>
  6 | 
  7 | This repository contains examples and automation used in various Raspberry Pi clustering scenarios, as seen on [Jeff Geerling's YouTube channel](https://www.youtube.com/c/JeffGeerling).
  8 | 
  9 | <p align="center"><a href="https://www.youtube.com/watch?v=ecdm3oA-QdQ"><img src="images/deskpi-super6c-running.jpg?raw=true" width="500" height="auto" alt="DeskPi Super6c Mini ITX Raspberry Pi Compute Module Cluster" /></a></p>
 10 | 
 11 | The inspiration for this project was my first Pi cluster, the [Raspberry Pi Dramble](https://www.pidramble.com), which is still running in my basement to this day!
 12 | 
 13 | ## Usage
 14 | 
 15 |   1. Make sure you have [Ansible](https://docs.ansible.com/ansible/latest/installation_guide/intro_installation.html) installed.
 16 |   2. Copy the `example.hosts.ini` inventory file to `hosts.ini`. Make sure it has the `control_plane` and `node`s configured correctly (for my examples I named my nodes `node[1-4].local`).
 17 |   3. Copy the `example.config.yml` file to `config.yml`, and modify the variables to your liking.
 18 | 
 19 | ### Raspberry Pi Setup
 20 | 
 21 | I am running Raspberry Pi OS on various Pi clusters. You can run this on any Pi cluster, but I tend to use Compute Modules without eMMC ('Lite' versions) and I often run them using [32 GB SanDisk Extreme microSD cards](https://amzn.to/3G35QbY) to boot each node. For some setups (like when I run the [Compute Blade](https://computeblade.com) or [DeskPi Super6c](https://deskpi.com/collections/deskpi-super6c), I boot off NVMe SSDs instead.
 22 | 
 23 | In every case, I flashed Raspberry Pi OS (64-bit, lite) to the storage devices using Raspberry Pi Imager.
 24 | 
 25 | To make network discovery and integration easier, I edit the advanced configuration in Imager, and set the following options:
 26 | 
 27 |   - Set hostname: `node1.local` (set to `2` for node 2, `3` for node 3, etc.)
 28 |   - Enable SSH: 'Allow public-key', and paste in my public SSH key(s)
 29 |   - Configure wifi: (ONLY on node 1, if desired) enter SSID and password for local WiFi network
 30 | 
 31 | After setting all those options, making sure only node 1 has WiFi configured, and the hostname is unique to each node (and matches what is in `hosts.ini`), I inserted the microSD cards into the respective Pis, or installed the NVMe SSDs into the correct slots, and booted the cluster.
 32 | 
 33 | ### SSH connection test
 34 | 
 35 | To test the SSH connection from my Ansible controller (my main workstation, where I'm running all the playbooks), I connected to each server individually, and accepted the hostkey:
 36 | 
 37 | ```
 38 | ssh pi@node1.local
 39 | ```
 40 | 
 41 | This ensures Ansible will also be able to connect via SSH in the following steps. You can test Ansible's connection with:
 42 | 
 43 | ```
 44 | ansible all -m ping
 45 | ```
 46 | 
 47 | It should respond with a 'SUCCESS' message for each node.
 48 | 
 49 | ### Storage Configuration
 50 | 
 51 | This playbook will create a storage location on node 3 by default. You can use one of the storage configurations by switching the `storage_type` variable from `filesystem` to `zfs` in your `config.yml` file.
 52 | 
 53 | #### Filesystem Storage
 54 | 
 55 | If using filesystem (`storage_type: filesystem`), make sure to use the appropriate `storage_nfs_dir` variable in `config.yml`.
 56 | 
 57 | #### ZFS Storage
 58 | 
 59 | If using ZFS (`storage_type: zfs`, you should have two volumes available on node 3, `/dev/sda`, and `/dev/sdb`, able to be pooled into a mirror. Make sure your two SATA drives are wiped:
 60 | 
 61 | ```
 62 | pi@node3:~ $ sudo wipefs --all --force /dev/sda?; sudo wipefs --all --force /dev/sda
 63 | pi@node3:~ $ sudo wipefs --all --force /dev/sdb?; sudo wipefs --all --force /dev/sdb
 64 | ```
 65 | 
 66 | If you run `lsblk`, you should see `sda` and `sdb` have no partitions, and are ready to use:
 67 | 
 68 | ```
 69 | pi@node3:~ $ lsblk
 70 | NAME        MAJ:MIN RM  SIZE RO TYPE MOUNTPOINT
 71 | sda           8:0    0  1.8T  0 disk 
 72 | sdb           8:16   0  1.8T  0 disk 
 73 | ```
 74 | 
 75 | You should also make sure the `storage_nfs_dir` variable is set appropriately for ZFS in your `config.yml`.
 76 | 
 77 | This ZFS layout was configured originally for the Turing Pi 2 board, which has two built-in SATA ports connected directly to node 3. In the future, the configuration may be genericized a bit better.
 78 | 
 79 | #### Ceph Storage Configuration
 80 | 
 81 | You could also run Ceph on a Pi cluster—see the storage configuration playbook inside the `ceph` directory.
 82 | 
 83 | This configuration is not yet integrated into the general K3s setup.
 84 | 
 85 | ### Cluster configuration and K3s installation
 86 | 
 87 | First, make sure Ansible requirements are installed:
 88 | 
 89 | ```
 90 | ansible-galaxy install -r requirements.yml --force
 91 | ```
 92 | 
 93 | Configure static networking, if your cluster nodes don't already have static IP addresses—see later section in this README.
 94 | 
 95 | Then, run the playbook:
 96 | 
 97 | ```
 98 | ansible-playbook main.yml
 99 | ```
100 | 
101 | At the end of the playbook, there should be an instance of Drupal running on the cluster. If you log into node 1, you should be able to access it with `curl localhost`.
102 | 
103 | > If the playbook stalls while installing K3s, [you might need to configure static IP addresses](https://github.com/geerlingguy/pi-cluster/issues/11#issuecomment-1983874999) for the nodes, especially if using mDNS (like with `.local` names for the nodes). Follow the guide in "Static network configuration" then run the `main.yml` playbook again afterwards, and it should get things in order.
104 | 
105 | If you have SSH tunnelling configured (see later section), you could access `http://[your-vps-ip-or-hostname]:8080/` and you'd see the site.
106 | 
107 | You can also log into node 1, switch to the root user account (`sudo su`), then use `kubectl` to manage the cluster (e.g. view Drupal pods with `kubectl get pods -n drupal`).
108 | 
109 | The Kubernetes Ingress object for Drupal (how HTTP requests from outside the cluster make it to Drupal) can be found by running `kubectl get ingress -n drupal`. Take the IP address or hostname there and enter it in your browser on a computer on the same network, and voila! You should see Drupal's installer.
110 | 
111 | K3s' `kubeconfig` file is located at `/etc/rancher/k3s/k3s.yaml`. If you'd like to manage the cluster from other hosts (or using a tool like Lens), copy the contents of that file, replacing `localhost` with the IP address or hostname of the control plane node, and paste the contents into a file `~/.kube/config`.
112 | 
113 | Alternatively, if you'd like to use [k9s](https://k9scli.io) on the main Pi itself, symlink the rancher kubeconfig file into a location where k9s expects to see it:
114 | 
115 | ```
116 | # (perform all commands as root user)
117 | # Download and install K9s
118 | wget https://github.com/derailed/k9s/releases/latest/download/k9s_linux_arm64.deb && apt install ./k9s_linux_arm64.deb && rm k9s_linux_arm64.deb
119 | 
120 | # Symlink K3s kubeconfig into root user's home directory
121 | ln -s /etc/rancher/k3s/k3s.yaml ~/.kube/config
122 | 
123 | # Launch k9s
124 | k9s
125 | ```
126 | 
127 | ### Upgrading the cluster
128 | 
129 | Run the upgrade playbook:
130 | 
131 | ```
132 | ansible-playbook upgrade.yml
133 | ```
134 | 
135 | ### Monitoring the cluster
136 | 
137 | Prometheus and Grafana are used for monitoring. Grafana can be accessed via port forwarding (or you could choose to expose it another way).
138 | 
139 | To access Grafana:
140 | 
141 |   1. Make sure you set up a valid `~/.kube/config` file (see 'K3s installation' above).
142 |   1. Run `kubectl port-forward service/cluster-monitoring-grafana :80`
143 |   1. Grab the port that's output, and browse to `localhost:[port]`, and bingo! Grafana.
144 | 
145 | The default login is `admin` / `prom-operator`, but you can also get the secret with `kubectl get secret cluster-monitoring-grafana -o jsonpath="{.data.admin-password}" | base64 -D`.
146 | 
147 | You can then browse to all the Kubernetes and Pi-related dashboards by browsing the Dashboards in the 'General' folder.
148 | 
149 | ### Benchmarking the cluster
150 | 
151 | See the README file within the `benchmarks` folder.
152 | 
153 | ### Shutting down the cluster
154 | 
155 | The safest way to shut down the cluster is to run the following command:
156 | 
157 | ```
158 | ansible all -m community.general.shutdown -b
159 | ```
160 | 
161 | > Note: If using the SSH tunnel, you might want to run the command _first_ on nodes 2-4, _then_ on node 1. So first run `ansible 'all:!control_plane' [...]`, then run it again just for `control_plane`.
162 | 
163 | Then after you confirm the nodes are shut down (with K3s running, it can take a few minutes), press the cluster's power button (or yank the Ethernet cables if using PoE) to power down all Pis physically. Then you can switch off or disconnect your power supply.
164 | 
165 | ### Static network configuration (highly recommended)
166 | 
167 | Kubernetes generally likes static network routes, especially when using DNS to connect to other nodes in a cluster.
168 | 
169 | There is a playbook which configures static networking so your nodes maintain the same IP address after a reboot, even under different networking scenarios.
170 | 
171 | If using your cluster both on-premise and remote (e.g. using 4G LTE connected to the first Pi), you can set it up on its _own_ subnet (e.g. `10.1.1.x`). Otherwise, you can set it to the same subnet as your local network.
172 | 
173 | Configure the subnet via the `ipv4_subnet_prefix` variable in `config.yml`, then run the playbook:
174 | 
175 | ```
176 | ansible-playbook networking.yml
177 | ```
178 | 
179 | After running the playbook, until a reboot, the Pis will still be accessible over their former DHCP-assigned IP address. After rebooting, the nodes will be accessible on their new IP addresses.
180 | 
181 | You can reboot all the nodes with:
182 | 
183 | ```
184 | ansible all -m reboot -b
185 | ```
186 | 
187 | > If you are running Ubuntu, and you get an error like `"Failed to find required executable "nmcli"`, run the `ubuntu-setup.yml` playbook: `ansible-playbook tasks/networking/ubuntu-prep.yml`
188 | 
189 | #### If using a different subnet
190 | 
191 | If you chose a different subnet than your LAN, make sure your workstation is connected to an interface on the same subnet as the cluster (e.g. `10.1.1.x`).
192 | 
193 | After the networking changes are made, since this playbook uses DNS names (e.g. `node1.local`) instead of IP addresses, your computer will still be able to connect to the nodes directly—assuming your network has IPv6 support. Pinging the nodes on their new IP addresses will _not_ work, however. For better network compatibility, it's recommended you set up a separate network interface on the Ansible controller that's on the same subnet as the Pis in the cluster:
194 | 
195 | On my Mac, I connected a second network interface and manually configured its IP address as `10.1.1.10`, with subnet mask `255.255.255.0`, and that way I could still access all the nodes via IP address or their hostnames (e.g. `node2.local`).
196 | 
197 | Because the cluster subnet needs its own router, node 1 is configured as a router, using `wlan0` as the primary interface for Internet traffic by default. The other nodes get their Internet access through node 1.
198 | 
199 | #### Switch between 4G LTE and WiFi (optional)
200 | 
201 | The network configuration defaults to an `active_internet_interface` of `wlan0`, meaning node 1 will route all Internet traffic for the cluster through it's WiFi interface.
202 | 
203 | Assuming you have a [working 4G card in slot 1](https://www.jeffgeerling.com/blog/2022/using-4g-lte-wireless-modems-on-raspberry-pi), you can switch node 1 to route through an alternate interface (e.g. `usb0`):
204 | 
205 |   1. Set `active_internet_interface: "usb0"` in your `config.yml`
206 |   2. Run the networking playbook again: `ansible-playbook networking.yml`
207 | 
208 | You can switch back and forth between interfaces using the steps above.
209 | 
210 | #### Reverse SSH and HTTP tunnel configuration (optional)
211 | 
212 | For my own experimentation, I ran my Pi cluster 'off-grid', using a 4G LTE modem, as mentioned above.
213 | 
214 | Because my mobile network provider uses CG-NAT, there is no way to remotely access the cluster, or serve web traffic to the public internet from it, at least not out of the box.
215 | 
216 | I am using a reverse SSH tunnel to enable direct remote SSH and HTTP access. To set that up, I configured a VPS I run to use TCP Forwarding (see [this blog post for details](https://www.jeffgeerling.com/blog/2022/ssh-and-http-raspberry-pi-behind-cg-nat)), and I configured an SSH key so node 1 could connect to my VPS (e.g. `ssh my-vps-username@my-vps-hostname-or-ip`).
217 | 
218 | Then I set the `reverse_tunnel_enable` variable to `true` in my `config.yml`, and configured the VPS username and hostname options.
219 | 
220 | Doing that and running the `main.yml` playbook configures `autossh` on node 1, and will try to get a connection through to the VPS on ports 2222 (to node 1's port 22) and 8080 (to node 1's port 80).
221 | 
222 | After that's done, you should be able to log into the cluster _through_ your VPS with a command like:
223 | 
224 | ```
225 | $ ssh -p 2222 pi@[my-vps-hostname]
226 | ```
227 | 
228 | > Note: If autossh isn't working, it could be that it didn't exit cleanly, and a tunnel is still reserving the port on the remote VPS. That's often the case if you run `sudo systemctl status autossh` and see messages like `Warning: remote port forwarding failed for listen port 2222`.
229 | >
230 | > In that case, log into the remote VPS and run `pgrep ssh | xargs kill` to kill off all active SSH sessions, then `autossh` should pick back up again.
231 | 
232 | > **Warning**: Use this feature at your own risk. Security is your own responsibility, and for better protection, you should probably avoid directly exposing your cluster (e.g. by disabling the `GatewayPorts` option) so you can only access the cluster while already logged into your VPS).
233 | 
234 | ## Caveats
235 | 
236 | These playbooks are used in both production and test clusters, but security is _always_ your responsibility. If you want to use any of this configuration in production, take ownership of it and understand how it works so you don't wake up to a hacked Pi cluster one day!
237 | 
238 | ## Author
239 | 
240 | The repository was created in 2023 by [Jeff Geerling](https://www.jeffgeerling.com), author of [Ansible for DevOps](https://www.ansiblefordevops.com), [Ansible for Kubernetes](https://www.ansibleforkubernetes.com), and [Kubernetes 101](https://www.kubernetes101book.com).
241 | 


--------------------------------------------------------------------------------
/ansible.cfg:
--------------------------------------------------------------------------------
1 | [defaults]
2 | nocows = true
3 | inventory = hosts.ini
4 | roles_path = roles
5 | collections_path = ./
6 | interpreter_python = /usr/bin/python3
7 | 


--------------------------------------------------------------------------------
/benchmarks/README.md:
--------------------------------------------------------------------------------
 1 | # Pi Cluster Benchmarks
 2 | 
 3 | I test a variety of use-cases using my Pi clusters.
 4 | 
 5 | This folder contains some playbooks and guides for different types of benchmarking I do.
 6 | 
 7 | More benchmarks will be added over time.
 8 | 
 9 | ## Top500 High Performance Linpack (HPL)
10 | 
11 | I like to run the HPL benchmark on my clusters to see where they fall in the historic [Top500 supercomputing list](https://top500.org).
12 | 
13 | My automated Top500 HPL benchmark code is located in a separate repository: [Top500 Benchmark - HPL Linpack](https://github.com/geerlingguy/top500-benchmark).
14 | 
15 | ## `disk-benchmark.sh`
16 | 
17 | The `disk-benchmark` script is what I use to test various storage media with the Raspberry Pi.
18 | 
19 | As a rule of thumb, NVMe devices will max out the Pi's PCIe bus (around 400 MB/sec), while microSD and eMMC storage on the Pi tops out under 100 MB/sec, at least as of the Pi 4 generation.
20 | 
21 | See the `disk-benchmark.sh` comments for usage examples.
22 | 
23 | ## `drupal-benchmark.sh`
24 | 
25 | The `drupal-benchmark` script runs two types of load tests on the Drupal instance running on the cluster:
26 | 
27 |   - `wrk` anonymous load test: Tests the performance of completely cacheable page loads as an anonymous user.
28 |   - `ab` authenticated load test: Tests the performance of partially-cacheable page loads as an authenticated user.
29 | 
30 | Drupal 10 and later have fairly robust caching in place to make both of these scenarios fairly fast even on a single modern SBC. But it is useful as an end-to-end performance test, from ingress and cluster networking all the way down to Drupal's separate database and persistent volume storage performance.
31 | 
32 | See the `drupal-benchmark.sh` comments for usage examples.
33 | 
34 | ## `stress-ng`
35 | 
36 | The `stress.yml` playbook hammers all CPU cores on all nodes simultaneously. This can be useful to measure the maximum power draw under CPU load, and to test whether the Pis in the cluster are getting enough power to run stably (especially when overclocked).
37 | 
38 | To run it, run the following command within the main `pi-cluster` directory (up one level):
39 | 
40 | ```
41 | ansible-playbook benchmarks/stress.yml
42 | ```
43 | 
44 | Run it with a longer `stress_time` if you really want to test thermals and make sure your cluster doesn't overheat.
45 | 


--------------------------------------------------------------------------------
/benchmarks/disk-benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | # Disk benchmark script.
 5 | #
 6 | # A script I use to automate the running and reporting of benchmarks I compile
 7 | # for my YouTube channel.
 8 | #
 9 | # Usage:
10 | #   # Run it locally (overriding mount path and test size).
11 | #   $ sudo MOUNT_PATH=/mnt/sda1 TEST_SIZE=1g ./disk-benchmark.sh
12 | #
13 | #   # Run it straight from GitHub (with default options).
14 | #   $ curl https://raw.githubusercontent.com/geerlingguy/pi-cluster/master/benchmarks/disk-benchmark.sh | sudo bash
15 | #
16 | # Author: Jeff Geerling, 2024
17 | 
18 | printf "\n"
19 | printf "Disk benchmarks\n"
20 | 
21 | # Fail if $SUDO_USER is empty.
22 | if [ -z "$SUDO_USER" ]; then
23 |   printf "This script must be run with sudo.\n"
24 |   exit 1;
25 | fi
26 | 
27 | # Variables.
28 | MOUNT_PATH=${MOUNT_PATH:-"/"}
29 | USER_HOME_PATH=$(getent passwd $SUDO_USER | cut -d: -f6)
30 | TEST_SIZE=${TEST_SIZE:-"1g"}
31 | IOZONE_INSTALL_PATH=$USER_HOME_PATH
32 | IOZONE_VERSION=iozone3_506
33 | 
34 | cd $IOZONE_INSTALL_PATH
35 | 
36 | # Install dependencies.
37 | if [ ! `which curl` ]; then
38 |   printf "Installing curl...\n"
39 |   apt-get install -y curl
40 |   printf "Install complete!\n\n"
41 | fi
42 | if [ ! `which make` ]; then
43 |   printf "Installing build tools...\n"
44 |   apt-get install -y build-essential
45 |   printf "Install complete!\n\n"
46 | fi
47 | 
48 | # Download and build iozone.
49 | if [ ! -f $IOZONE_INSTALL_PATH/$IOZONE_VERSION/src/current/iozone ]; then
50 |   printf "Installing iozone...\n"
51 |   curl "http://www.iozone.org/src/current/$IOZONE_VERSION.tar" | tar -x
52 |   cd $IOZONE_VERSION/src/current
53 |   case $(uname -m) in
54 |     arm64|aarch64)
55 |       make --quiet linux-arm
56 |       ;;
57 |     *)
58 |       make --quiet linux-AMD64
59 |   esac
60 |   printf "Install complete!\n\n"
61 | else
62 |   cd $IOZONE_VERSION/src/current
63 | fi
64 | 
65 | printf "Running iozone 4K / 1024K read and write tests...\n"
66 | iozone_result=$(./iozone -e -I -a -s $TEST_SIZE -r 4k -r 1024k -i 0 -i 1 -i 2 -f $MOUNT_PATH/iozone | cut -c7-100 | tail -n6 | head -n4)
67 | echo -e "$iozone_result"
68 | printf "\n"
69 | 
70 | random_read_4k=$(echo -e "$iozone_result" | awk 'FNR == 3 {printf "%.2f", $7/(1024)}')
71 | random_write_4k=$(echo -e "$iozone_result" | awk 'FNR == 3 {printf "%.2f", $8/(1024)}')
72 | random_read_1024k=$(echo -e "$iozone_result" | awk 'FNR == 4 {printf "%.2f", $7/(1024)}')
73 | random_write_1024k=$(echo -e "$iozone_result" | awk 'FNR == 4 {printf "%.2f", $8/(1024)}')
74 | sequential_read_1024k=$(echo -e "$iozone_result" | awk 'FNR == 4 {printf "%.2f", $6/(1024)}')
75 | sequential_write_1024k=$(echo -e "$iozone_result" | awk 'FNR == 4 {printf "%.2f", $4/(1024)}')
76 | cat << EOF
77 | # --- Copy and paste the result below ---
78 | 
79 | | Benchmark                  | Result |
80 | | -------------------------- | ------ |
81 | | iozone 4K random read      | $random_read_4k MB/s |
82 | | iozone 4K random write     | $random_write_4k MB/s |
83 | | iozone 1M random read      | $random_read_1024k MB/s |
84 | | iozone 1M random write     | $random_write_1024k MB/s |
85 | | iozone 1M sequential read  | $sequential_read_1024k MB/s |
86 | | iozone 1M sequential write | $sequential_write_1024k MB/s |
87 | 
88 | # --- End result ---
89 | EOF
90 | printf "\n"
91 | 
92 | printf "Disk benchmark complete!\n\n"
93 | 


--------------------------------------------------------------------------------
/benchmarks/drupal-benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | # wrk and ab load test script
 5 | #
 6 | # Usage:
 7 | #   # Make sure wrk and ab are installed.
 8 | #   $ brew install wrk
 9 | #
10 | #   # Grab the authenticated session cookie from a logged-in window:
11 | #   1. Open browser's dev console.
12 | #   2. Navigate to storage/cookies.
13 | #   3. Right-click on the 'SESSxxxx' cookie and copy it.
14 | #   4. Paste the cookie in `AUTHENTICATED_SESSION_COOKIE` before running script.
15 | #
16 | #   # Run the load tests.
17 | #   $ ./drupal-benchmark.sh
18 | #
19 | # Author: Jeff Geerling, 2023
20 | 
21 | printf "\n"
22 | printf "Drupal benchmarks.\n"
23 | 
24 | # Variables. Best to use IP address to prevent `ab` errors.
25 | DRUPAL_URL="http://10.0.2.61/"
26 | AUTHENTICATED_SESSION_COOKIE="SESS3747f176b3220dbe6938dbbc37681fd0=VsCYFTA3-5A16oYGR%2Cer%2C7-wm53P3wLnN8ZKIlVmnyHqfR2D"
27 | # Install dependencies.
28 | if [ ! `which ab` ]; then
29 |   printf "Please install apachebench (ab) and try again.\n\n"
30 | fi
31 | if [ ! `which wrk` ]; then
32 |   printf "Please install wrk (wrk) and try again.\n\n"
33 | fi
34 | 
35 | # Run benchmarks.
36 | printf "Running wrk anonymous page load benchmark...\n"
37 | curl -s -o /dev/null $DRUPAL_URL # Load once to fill caches.
38 | sleep 2
39 | wrk -t4 -c100 -d30 --timeout 10s $DRUPAL_URL
40 | printf "\n"
41 | 
42 | printf "Running ab authenticated page load benchmark...\n"
43 | ab -n 1 -c 1 -C "SESSxyz=XYZ" $DRUPAL_URL >/dev/null # Load once to fill caches.
44 | sleep 2
45 | ab -n 700 -c 10 -C "$AUTHENTICATED_SESSION_COOKIE" $DRUPAL_URL
46 | printf "\n"
47 | 
48 | printf "Drupal benchmark complete!\n\n"
49 | 


--------------------------------------------------------------------------------
/benchmarks/stress.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Stress the CPUs for temperature and power testing.
 3 |   hosts: all
 4 |   gather_facts: true
 5 |   become: true
 6 | 
 7 |   vars:
 8 |     stress_time: 5m
 9 | 
10 |   vars_files:
11 |     - ../config.yml
12 | 
13 |   tasks:
14 |     - name: Ensure stress-ng is installed.
15 |       ansible.builtin.package:
16 |         name: stress-ng
17 |         state: present
18 | 
19 |     - name: Run stress-ng.
20 |       ansible.builtin.command: >-
21 |         stress-ng -c {{ ansible_processor_vcpus }} -t {{ stress_time }}
22 | 


--------------------------------------------------------------------------------
/ceph/README.md:
--------------------------------------------------------------------------------
 1 | # Ceph Storage Cluster Setup
 2 | 
 3 | This directory contains a playbook that configures Ceph storage on a Pi cluster. I initially set this up as part of my [6-node DeskPi Super6c video](https://www.youtube.com/watch?v=UT5UbSJOyog).
 4 | 
 5 | ### Cluster configuration
 6 | 
 7 | Run the playbook:
 8 | 
 9 | ```
10 | ansible-playbook main.yml
11 | ```
12 | 
13 | TODO.
14 | 
15 | ### Upgrading the cluster
16 | 
17 | Run the upgrade playbook:
18 | 
19 | ```
20 | ansible-playbook upgrade.yml
21 | ```
22 | 
23 | ### Monitoring the cluster
24 | 
25 | TODO.
26 | 


--------------------------------------------------------------------------------
/ceph/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Set up cluster-wide configuration.
 3 |   hosts: cluster
 4 |   gather_facts: false
 5 |   become: true
 6 | 
 7 |   handlers:
 8 |     - name: reboot-pi
 9 |       reboot:
10 | 
11 |   vars_files:
12 |     - config.yml
13 | 
14 |   tasks: []
15 | 
16 | 
17 | - name: Configure the control plane (node 1).
18 |   hosts: control_plane
19 |   gather_facts: false
20 |   become: true
21 | 
22 |   vars_files:
23 |     - config.yml
24 | 
25 |   tasks:
26 |     # See: https://forums.raspberrypi.com/viewtopic.php?t=274486
27 |     - name: Set up the Debian unstable repo (TODO).
28 |       meta: noop
29 | 
30 |     # See: https://ceph.com/en/news/blog/2022/install-ceph-in-a-raspberrypi-4-cluster/
31 |     - name: Install cephadm (TODO).
32 |       meta: noop
33 | 
34 |     - name: Create the ceph cluster (TODO).
35 |       meta: noop
36 | 
37 |     - name: Retrieve the ceph pubkey.
38 |       ansible.builtin.fetch:
39 |         src: /etc/ceph/ceph.pub
40 |         dest: files/ceph.pub
41 |         flat: yes
42 | 
43 |     - name: Ensure NFS dependencies are installed.
44 |       ansible.builtin.package:
45 |         name:
46 |           - libcephfs2
47 |           - nfs-ganesha
48 |           - nfs-ganesha-ceph
49 |         state: present
50 | 
51 | - name: Configure the nodes (nodes 2-6).
52 |   hosts: nodes
53 |   gather_facts: false
54 |   become: true
55 | 
56 |   vars_files:
57 |     - config.yml
58 | 
59 |   tasks:
60 |     - name: Ensure Ceph dependencies are installed.
61 |       ansible.builtin.package:
62 |         name:
63 |           - podman
64 |           - lvm2
65 |         state: present
66 | 
67 |     - name: Copy the ceph pubkey to each node.
68 |       ansible.posix.authorized_key:
69 |         user: root
70 |         state: present
71 |         key: "{{ lookup('file', 'files/ceph.pub') }}"
72 | 


--------------------------------------------------------------------------------
/example.config.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # Cluster storage options.
 3 | storage_type: filesystem  # options: 'filesystem', 'zfs'
 4 | storage_zfs_pool_name: zfsdata
 5 | # storage_nfs_dir: '{{ storage_zfs_pool_name }}'  # Use this for 'zfs'
 6 | storage_nfs_dir: "srv"  # Use this for 'filesystem'
 7 | storage_nfs_share_name: nfsshare
 8 | 
 9 | # Drupal installation options.
10 | drupal_image: drupal:10.2-apache
11 | drupal_hash_salt: OTk4MTYzYWI4N2E2MGIxNjlmYmQ2MTA4
12 | drupal_trusted_host_patterns: '^.+$'
13 | drupal_database_password: 'drupal'
14 | drupal_base_web_path: '/var/www/html/sites/default/'
15 | drupal_config_sync_directory: 'sites/default/files/config_OTk4MTYzY'
16 | drupal_extra_settings_php: ''
17 | 
18 | # These networking variables are only necessary if using optional static and
19 | # remote networking features in the `tasks/networking` playbooks.
20 | ipv4_subnet_prefix: "10.1.1"
21 | ipv4_gateway: "10.1.1.1"
22 | dns4_servers: "{{ ipv4_gateway }}"
23 | active_internet_interface: "wlan0"
24 | reverse_tunnel_enable: false
25 | reverse_tunnel_vps_username: my-vps-username
26 | reverse_tunnel_vps_hostname: my-vps-hostname
27 | control_plane_router_setup: false
28 | 


--------------------------------------------------------------------------------
/example.hosts.ini:
--------------------------------------------------------------------------------
 1 | # The 'ip_host_octet' is used only when configuring static networking using the
 2 | # playbooks inside 'tasks/networking'.
 3 | [control_plane]
 4 | node1.local ip_host_octet=61
 5 | 
 6 | [nodes]
 7 | node2.local ip_host_octet=62
 8 | node3.local ip_host_octet=63
 9 | node4.local ip_host_octet=64
10 | 
11 | # The node to be used for shared cluster storage.
12 | [storage]
13 | node3.local
14 | 
15 | [cluster:children]
16 | control_plane
17 | nodes
18 | 
19 | [cluster:vars]
20 | ansible_user='pi'
21 | 
22 | # Uncomment below when working on cluster through VPS tunnel host.
23 | #[control_plane:vars]
24 | #ansible_port='2222'
25 | #ansible_user='pi'
26 | #ansible_host='my-vps-host-or-ip'
27 | 
28 | #[nodes:vars]
29 | #ansible_ssh_common_args='-o ProxyCommand="ssh -p 2222 -W %h:%p -q pi@my-vps-host-or-ip"'
30 | 


--------------------------------------------------------------------------------
/images/deskpi-super6c-running.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geerlingguy/pi-cluster/6e443778bc16d489c6f8049ae5e1e2624e939289/images/deskpi-super6c-running.jpg


--------------------------------------------------------------------------------
/images/turing-pi-2-hero.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geerlingguy/pi-cluster/6e443778bc16d489c6f8049ae5e1e2624e939289/images/turing-pi-2-hero.jpg


--------------------------------------------------------------------------------
/main.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | - name: Set up cluster-wide configuration.
  3 |   hosts: cluster
  4 |   gather_facts: true
  5 |   become: true
  6 | 
  7 |   handlers:
  8 |     - name: reboot-pi
  9 |       ansible.builtin.reboot:
 10 | 
 11 |   vars_files:
 12 |     - config.yml
 13 | 
 14 |   tasks:
 15 |     - name: Ensure cgroups are configured correctly in cmdline.txt.
 16 |       ansible.builtin.replace:
 17 |         path: /boot/firmware/cmdline.txt
 18 |         regexp: '^([\w](?!.*\b{{ item }}\b).*)$'
 19 |         replace: '\1 {{ item }}'
 20 |       with_items:
 21 |         - "cgroup_memory=1"
 22 |         - "cgroup_enable=memory"
 23 |       notify: reboot-pi
 24 |       when: ansible_distribution == 'Debian'
 25 | 
 26 |     - name: Ensure cgroups are configured correctly in ubuntuEnv.txt.
 27 |       ansible.builtin.replace:
 28 |         path: /boot/firmware/ubuntuEnv.txt
 29 |         regexp: '^(bootargs=[\w](?!.*\b{{ item }}\b).*)$'
 30 |         replace: '\1 {{ item }}'
 31 |       with_items:
 32 |         - "cgroup_memory=1"
 33 |         - "cgroup_enable=memory"
 34 |       notify: reboot-pi
 35 |       when: ansible_distribution == 'Ubuntu'
 36 | 
 37 |     - name: Download K3s install script.
 38 |       ansible.builtin.get_url:
 39 |         url: https://get.k3s.io
 40 |         dest: "~/k3s_install.sh"
 41 |         mode: a+x
 42 | 
 43 |     - name: Install required dependencies
 44 |       ansible.builtin.apt:
 45 |         name: nfs-common
 46 |         state: present
 47 | 
 48 | 
 49 | - name: Configure storage node.
 50 |   hosts: storage
 51 |   gather_facts: false
 52 |   become: true
 53 | 
 54 |   handlers:
 55 |     - name: restart nfs
 56 |       ansible.builtin.service:
 57 |         name: nfs-server
 58 |         state: restarted
 59 | 
 60 |   vars_files:
 61 |     - config.yml
 62 | 
 63 |   tasks:
 64 |     - name: Set up storage.
 65 |       include_tasks: tasks/storage/{{ storage_type }}.yml
 66 | 
 67 | 
 68 | - name: Configure the control plane.
 69 |   hosts: control_plane
 70 |   gather_facts: false
 71 |   become: true
 72 | 
 73 |   vars_files:
 74 |     - config.yml
 75 | 
 76 |   tasks:
 77 |     - name: Install K3s on control plane (takes a while).
 78 |       ansible.builtin.shell: >-
 79 |         ~/k3s_install.sh >> ~/k3s_install_log.txt
 80 |       args:
 81 |         chdir: "~"
 82 |         creates: /var/lib/rancher/k3s/server/node-token
 83 | 
 84 |     - name: Get node token.
 85 |       ansible.builtin.command: cat /var/lib/rancher/k3s/server/node-token
 86 |       changed_when: false
 87 |       register: node_token_output
 88 | 
 89 |     - name: Set node_token fact.
 90 |       ansible.builtin.set_fact:
 91 |         node_token: "{{ node_token_output.stdout_lines[0] }}"
 92 | 
 93 |     - name: Ensure required dependencies are installed.
 94 |       ansible.builtin.package:
 95 |         name:
 96 |           - python3-pip
 97 |           - python3-setuptools
 98 |           - python3-openshift
 99 |           - python3-yaml
100 |           - build-essential
101 |           - golang
102 |           - git
103 |         state: present
104 |       become: true
105 | 
106 |     - name: Ignore PEP 668 because it's silly.
107 |       ansible.builtin.file:
108 |         path: /usr/lib/python3.11/EXTERNALLY-MANAGED
109 |         state: absent
110 |       become: true
111 | 
112 | 
113 | - name: Configure the worker nodes.
114 |   hosts: nodes
115 |   gather_facts: false
116 |   become: true
117 | 
118 |   vars_files:
119 |     - config.yml
120 | 
121 |   tasks:
122 |     - name: Install K3s on nodes (takes a while).
123 |       ansible.builtin.shell: >-
124 |         K3S_URL="https://{{ groups['control_plane'][0] }}:6443"
125 |         K3S_TOKEN="{{ hostvars[groups['control_plane'][0]]['node_token'] }}"
126 |         ~/k3s_install.sh >> ~/k3s_install_log.txt
127 |       args:
128 |         chdir: "~"
129 |         creates: /var/lib/rancher/k3s/agent/kubelet.kubeconfig
130 | 
131 | - name: Set up Helm.
132 |   import_playbook: tasks/kubernetes/helm.yml
133 |   tags: ['helm']
134 | 
135 | - name: Set up NFS PVCs.
136 |   import_playbook: tasks/kubernetes/nfs.yml
137 |   tags: ['nfs']
138 | 
139 | - name: Set up Prometheus.
140 |   import_playbook: tasks/kubernetes/prometheus.yml
141 |   tags: ['prometheus']
142 | 
143 | - name: Set up Drupal.
144 |   import_playbook: tasks/kubernetes/drupal.yml
145 |   tags: ['drupal']
146 | 


--------------------------------------------------------------------------------
/networking.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Configure reverse SSH tunnels for SSH and HTTP on control plane.
 3 |   import_playbook: tasks/networking/reverse-tunnel.yml
 4 |   when: reverse_tunnel_enable
 5 | 
 6 | - name: Set up static networking configuration.
 7 |   import_playbook: tasks/networking/static-networking.yml
 8 | 
 9 | - name: Configure control plane as a router.
10 |   import_playbook: tasks/networking/router.yml
11 |   when: control_plane_router_setup
12 | 


--------------------------------------------------------------------------------
/requirements.yml:
--------------------------------------------------------------------------------
1 | ---
2 | collections:
3 |   - name: community.general
4 | 


--------------------------------------------------------------------------------
/tasks/kubernetes/drupal.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Configure Drupal.
 3 |   hosts: control_plane
 4 |   gather_facts: false
 5 |   become: true
 6 | 
 7 |   vars_files:
 8 |     - ../../config.yml
 9 | 
10 |   environment:
11 |     # The location of the kubeconfig file on the master.
12 |     K8S_AUTH_KUBECONFIG: /etc/rancher/k3s/k3s.yaml
13 |     PATH: "~/go/bin:{{ ansible_env.PATH }}"
14 | 
15 |   tasks:
16 |     - name: Create drupal namespace.
17 |       k8s:
18 |         name: drupal
19 |         api_version: v1
20 |         kind: Namespace
21 |         state: present
22 | 
23 |     - name: Apply drupal manifests.
24 |       k8s:
25 |         definition: "{{ lookup('template', '../../templates/' + item ) }}"
26 |         state: present
27 |       loop:
28 |         - mariadb.yml
29 |         - drupal.yml
30 | 


--------------------------------------------------------------------------------
/tasks/kubernetes/helm.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Set up Helm.
 3 |   hosts: control_plane
 4 |   gather_facts: false
 5 |   become: true
 6 | 
 7 |   vars:
 8 |     # See available releases: https://github.com/helm/helm/releases/
 9 |     helm_version: 'v3.16.2'
10 |     helm_platform: linux
11 |     helm_arch: arm64
12 |     helm_bin_path: /usr/local/bin/helm
13 | 
14 |   tasks:
15 |     - name: Check if Helm binary exists.
16 |       stat:
17 |         path: "{{ helm_bin_path }}"
18 |       register: helm_check
19 | 
20 |     - name: Check Helm version.
21 |       command: "{{ helm_bin_path }} version"
22 |       failed_when: false
23 |       changed_when: false
24 |       register: helm_existing_version
25 | 
26 |     - name: Download helm.
27 |       unarchive:
28 |         src: https://get.helm.sh/helm-{{ helm_version }}-{{ helm_platform }}-{{ helm_arch }}.tar.gz
29 |         dest: /tmp
30 |         remote_src: true
31 |       register: helm_download
32 |       when: >
33 |         not helm_check.stat.exists
34 |         or helm_version not in helm_existing_version.stdout
35 | 
36 |     - name: Copy helm binary into place.
37 |       copy:
38 |         src: "/tmp/{{ helm_platform }}-{{ helm_arch }}/helm"
39 |         dest: "{{ helm_bin_path }}"
40 |         mode: 0755
41 |         remote_src: true
42 |       become: true
43 |       when: helm_download is changed
44 | 


--------------------------------------------------------------------------------
/tasks/kubernetes/nfs.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Configure NFS Subdir External Provisioner.
 3 |   hosts: control_plane
 4 |   gather_facts: false
 5 |   become: true
 6 | 
 7 |   vars_files:
 8 |     - ../../config.yml
 9 | 
10 |   environment:
11 |     # The location of the kubeconfig file on the master.
12 |     K8S_AUTH_KUBECONFIG: /etc/rancher/k3s/k3s.yaml
13 |     PATH: "~/go/bin:{{ ansible_env.PATH }}"
14 | 
15 |   tasks:
16 |     - name: Add nfs-subdir-external-provisioner chart repo.
17 |       kubernetes.core.helm_repository:
18 |         name: nfs-subdir-external-provisioner
19 |         repo_url: "https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner/"
20 | 
21 |     - name: Deploy NFS Subdir External Provisioner Helm chart.
22 |       kubernetes.core.helm:
23 |         name: nfs-subdir-external-provisioner
24 |         chart_ref: nfs-subdir-external-provisioner/nfs-subdir-external-provisioner
25 |         release_namespace: default
26 |         state: present
27 |         values:
28 |           nfs:
29 |             server: "{{ groups['storage'][0] }}"
30 |             path: "/{{ storage_nfs_dir }}/{{ storage_nfs_share_name }}"
31 | 


--------------------------------------------------------------------------------
/tasks/kubernetes/prometheus.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Configure Prometheus + Grafana monitoring stack.
 3 |   hosts: control_plane
 4 |   gather_facts: false
 5 |   become: true
 6 | 
 7 |   vars_files:
 8 |     - ../../config.yml
 9 | 
10 |   environment:
11 |     # The location of the kubeconfig file on the master.
12 |     K8S_AUTH_KUBECONFIG: /etc/rancher/k3s/k3s.yaml
13 |     PATH: "~/go/bin:{{ ansible_env.PATH }}"
14 | 
15 |   tasks:
16 |     - name: Add prometheus-community chart repo.
17 |       kubernetes.core.helm_repository:
18 |         name: prometheus-community
19 |         repo_url: "https://prometheus-community.github.io/helm-charts"
20 | 
21 |     - name: Deploy Prometheus + Grafana Helm chart.
22 |       kubernetes.core.helm:
23 |         name: cluster-monitoring
24 |         chart_ref: prometheus-community/kube-prometheus-stack
25 |         release_namespace: default
26 |         state: present
27 |         values:
28 |           alertmanager:
29 |             enabled: false
30 | 


--------------------------------------------------------------------------------
/tasks/networking/reverse-tunnel.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # Note: This playbook makes the following assumptions:
 3 | #
 4 | # 1. The configured VPS server already has the proper sshd_config.
 5 | # 2. Node 1 already has an SSH key generated that's on the VPS server.
 6 | # 3. You've confirmed Node 1 can SSH into the VPS server.
 7 | #
 8 | # See: https://www.jeffgeerling.com/blog/2022/ssh-and-http-raspberry-pi-behind-cg-nat
 9 | - name: Configure control plane as a reverse tunnel for SSH and HTTP.
10 |   hosts: control_plane
11 |   gather_facts: false
12 |   become: true
13 | 
14 |   handlers:
15 |     - name: restart dhcpcd
16 |       ansible.builtin.service:
17 |         name: dhcpcd
18 |         state: restarted
19 | 
20 |     - name: restart autossh
21 |       ansible.builtin.systemd:
22 |         name: autossh
23 |         state: restarted
24 |       when: reverse_tunnel_enable
25 | 
26 |   vars_files:
27 |     - ../../config.yml
28 | 
29 |   tasks:
30 |     - name: Install autossh.
31 |       ansible.builtin.apt:
32 |         name: autossh
33 |         state: present
34 | 
35 |     - name: Configure autossh defaults.
36 |       ansible.builtin.copy:
37 |         dest: /etc/default/autossh
38 |         content: |
39 |           AUTOSSH_POLL=60
40 |           AUTOSSH_FIRST_POLL=30
41 |           AUTOSSH_GATETIME=0
42 |           AUTOSSH_PORT=22000
43 |           SSH_OPTIONS="-N -R 2222:localhost:22 -R 8080:localhost:80 {{ reverse_tunnel_vps_username }}@{{ reverse_tunnel_vps_hostname }}"
44 | 
45 |     - name: Create autossh unit file.
46 |       ansible.builtin.copy:
47 |         dest: /lib/systemd/system/autossh.service
48 |         content: |
49 |           [Unit]
50 |           Description=autossh
51 |           Wants=network-online.target
52 |           After=network-online.target
53 | 
54 |           [Service]
55 |           Type=simple
56 |           User=pi
57 |           EnvironmentFile=/etc/default/autossh
58 |           ExecStart=/usr/bin/autossh $SSH_OPTIONS
59 |           Restart=always
60 |           RestartSec=60
61 | 
62 |           [Install]
63 |           WantedBy=multi-user.target
64 |       register: autossh_unit
65 | 
66 |     - name: Reload systemd daemon if unit file changed.
67 |       ansible.builtin.systemd:
68 |         daemon_reload: true
69 |       when: autossh_unit is changed
70 | 
71 |     - name: Ensure autossh service is running.
72 |       ansible.builtin.systemd:
73 |         name: autossh
74 |         state: started
75 |         enabled: true
76 | 
77 |     - name: Set active Internet gateway interface on control plane.
78 |       ansible.builtin.blockinfile:
79 |         path: /etc/dhcpcd.conf
80 |         marker: "# ANSIBLE MANAGED - Internet routing metric {mark}"
81 |         block: |
82 |           interface {{ active_internet_interface }}
83 |           metric 100
84 |       delegate_to: "{{ groups['control_plane'][0] }}"
85 |       run_once: true
86 |       notify:
87 |         - restart dhcpcd
88 |         - restart autossh
89 | 


--------------------------------------------------------------------------------
/tasks/networking/router.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Configure node 1 as a router.
 3 |   hosts: control_plane
 4 |   gather_facts: false
 5 |   become: true
 6 | 
 7 |   handlers:
 8 |     - name: restart dnsmasq
 9 |       ansible.builtin.service:
10 |         name: dnsmasq
11 |         state: restarted
12 | 
13 |     - name: persist iptables rules
14 |       ansible.builtin.command: netfilter-persistent save
15 | 
16 |   vars_files:
17 |     - ../../config.yml
18 | 
19 |   tasks:
20 |     - name: Install routing prerequisites.
21 |       ansible.builtin.apt:
22 |         name:
23 |           - dnsmasq
24 |           - netfilter-persistent
25 |           - iptables-persistent
26 |         state: present
27 | 
28 |     - name: Ensure netfilter-persistent is enabled.
29 |       ansible.builtin.service:
30 |         name: netfilter-persistent
31 |         enabled: true
32 | 
33 |     - name: Ensure dnsmasq is running and enabled.
34 |       ansible.builtin.service:
35 |         name: dnsmasq
36 |         state: started
37 |         enabled: true
38 | 
39 |     - name: "Configure iptables for {{ active_internet_interface }} masquerade."
40 |       ansible.builtin.iptables:
41 |         table: nat
42 |         chain: POSTROUTING
43 |         out_interface: "{{ active_internet_interface }}"
44 |         jump: MASQUERADE
45 |       notify: persist iptables rules
46 | 
47 |     - name: Enable IPv4 forwarding.
48 |       ansible.posix.sysctl:
49 |         name: net.ipv4.ip_forward
50 |         value: '1'
51 |         sysctl_set: yes
52 | 
53 |     - name: Configure dnsmasq for bridged DNS.
54 |       ansible.builtin.copy:
55 |         dest: /etc/dnsmasq.d/bridge.conf
56 |         content: |
57 |           interface=eth0
58 |           bind-interfaces
59 |           server=1.1.1.1
60 |           server=1.0.0.1
61 |           domain-needed
62 |           bogus-priv
63 |       notify: restart dnsmasq
64 | 
65 |     # See: https://github.com/geerlingguy/turing-pi-2-cluster/issues/9
66 |     - name: Add crontab task to restart dnsmasq.
67 |       ansible.builtin.cron:
68 |         name: "restart dnsmasq if not running"
69 |         minute: "*"
70 |         job: "/usr/bin/systemctl status dnsmasq || /usr/bin/systemctl restart dnsmasq"
71 | 


--------------------------------------------------------------------------------
/tasks/networking/static-networking.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Set up static networking configuration.
 3 |   hosts: cluster
 4 |   gather_facts: false
 5 |   become: true
 6 | 
 7 |   handlers:
 8 |     - name: restart dhcpcd
 9 |       ansible.builtin.service:
10 |         name: dhcpcd
11 |         state: restarted
12 | 
13 |     - name: restart networkmanager
14 |       ansible.builtin.service:
15 |         name: NetworkManager
16 |         state: restarted
17 | 
18 |   vars_files:
19 |     - ../../config.yml
20 | 
21 |   tasks:
22 |     - name: Check if using dhcpcd for networking.
23 |       ansible.builtin.stat:
24 |         path: /etc/dhcpcd.conf
25 |       register: dhcpcd_file_result
26 | 
27 |     - name: Configure static IP address (dhcpcd).
28 |       ansible.builtin.blockinfile:
29 |         path: /etc/dhcpcd.conf
30 |         marker: "# ANSIBLE MANAGED - static ip {mark}"
31 |         block: |
32 |           interface eth0
33 |           static ip_address={{ ipv4_subnet_prefix }}.{{ ip_host_octet }}/24
34 |           static routers={{ ipv4_subnet_prefix }}.1
35 |           static domain_name_servers={{ ipv4_subnet_prefix }}.1
36 |       notify: restart dhcpcd
37 |       when: dhcpcd_file_result.stat.exists
38 | 
39 |     - name: Configure static IP address (Network Manager).
40 |       community.general.nmcli:
41 |         conn_name: "Wired connection 1"
42 |         ifname: eth0
43 |         type: ethernet
44 |         ip4: "{{ ipv4_subnet_prefix }}.{{ ip_host_octet }}/24"
45 |         gw4: "{{ ipv4_gateway }}"
46 |         dns4: "{{ dns4_servers }}"
47 |         state: present
48 |       notify: restart networkmanager
49 |       when: not dhcpcd_file_result.stat.exists
50 | 
51 |     - name: Configure hosts file so nodes can see each other by hostname.
52 |       ansible.builtin.blockinfile:
53 |         path: /etc/hosts
54 |         marker: "# ANSIBLE MANAGED - static ip config {mark}"
55 |         block: |
56 |           {% for host in groups['cluster'] %}
57 |           {{ ipv4_subnet_prefix }}.{{ hostvars[host].ip_host_octet }} {{ host }} {{ host | regex_replace('\.local', '') }}
58 |           {% endfor %}
59 | 


--------------------------------------------------------------------------------
/tasks/networking/ubuntu-prep.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Prepare Ubuntu for static networking.
 3 |   hosts: cluster
 4 |   gather_facts: false
 5 |   become: true
 6 | 
 7 |   vars:
 8 |     netplan_file: /etc/netplan/50-cloud-init.yaml
 9 | 
10 |   tasks:
11 |     - name: Ensure NetworkManager is installed.
12 |       ansible.builtin.apt:
13 |         name: network-manager
14 |         state: present
15 |         update_cache: true
16 | 
17 |     - name: Configure netplan file for NetworkManager.
18 |       ansible.builtin.copy:
19 |         dest: "{{ netplan_file }}"
20 |         mode: 0600
21 |         content: |
22 |           # ANSIBLE MANAGED - netplan configuration
23 |           network:
24 |             version: 2
25 |             renderer: NetworkManager
26 | 
27 |     - name: Regenerate netplan config.
28 |       ansible.builtin.command: "{{ item }}"
29 |       with_items:
30 |         - sudo netplan generate
31 |         - sudo netplan apply
32 | 
33 |     - name: Reboot.
34 |       ansible.builtin.reboot:
35 | 


--------------------------------------------------------------------------------
/tasks/storage/filesystem.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Ensure NFS share directory exists.
 3 |   ansible.builtin.file:
 4 |     dest: "/{{ storage_nfs_dir }}/{{ storage_nfs_share_name }}"
 5 |     owner: root
 6 |     group: "{{ ansible_user }}"
 7 |     state: directory
 8 |     mode: 0777
 9 | 
10 | - name: Ensure NFS is installed.
11 |   ansible.builtin.apt:
12 |     name: nfs-kernel-server
13 |     state: present
14 | 
15 | - name: Configure NFS exports.
16 |   ansible.builtin.lineinfile:
17 |     dest: /etc/exports
18 |     line: "/{{ storage_nfs_dir }}/{{ storage_nfs_share_name }}    *(rw,sync,no_root_squash)"
19 |     regexp: ".*"
20 |     create: true
21 |   notify: restart nfs
22 | 
23 | - name: Ensure NFS is started and enabled at boot.
24 |   ansible.builtin.service:
25 |     name: nfs-server
26 |     state: started
27 |     enabled: true
28 | 


--------------------------------------------------------------------------------
/tasks/storage/zfs.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Ensure ZFS prerequisites are installed.
 3 |   ansible.builtin.apt:
 4 |     name: raspberrypi-kernel-headers
 5 |     state: present
 6 | 
 7 | - name: Ensure ZFS is installed.
 8 |   ansible.builtin.apt:
 9 |     name:
10 |       - raspberrypi-kernel-headers
11 |       - zfs-dkms
12 |       - zfsutils-linux
13 |     state: present
14 | 
15 | - name: Configure ZFS mirror pool.
16 |   ansible.builtin.command: "zpool create {{ storage_zfs_pool_name }} mirror /dev/sda /dev/sdb"
17 |   args:
18 |     creates: "/{{ storage_zfs_pool_name }}"
19 | 
20 | - name: Ensure NFS filesystem is present in ZFS.
21 |   community.general.zfs:
22 |     name: "{{ storage_zfs_pool_name }}/{{ storage_nfs_share_name }}"
23 |     state: present
24 | 
25 | - name: Configure permissions for ZFS share.
26 |   ansible.builtin.file:
27 |     dest: "/{{ storage_zfs_pool_name }}/{{ storage_nfs_share_name }}"
28 |     owner: root
29 |     group: pi
30 |     mode: 0777
31 | 
32 | - name: Check if sharenfs is enabled on ZFS NFS share.
33 |   ansible.builtin.command: "zfs get sharenfs {{ storage_zfs_pool_name }}/{{ storage_nfs_share_name }}"
34 |   register: sharenfs_status
35 |   changed_when: false
36 | 
37 | # Note: no_root_squash can be dangerous. Use at your own peril.
38 | - name: Ensure NFS filesystem is allowed to be shared via NFS.
39 |   ansible.builtin.command: "zfs set sharenfs='no_root_squash,rw=*' {{ storage_zfs_pool_name }}/{{ storage_nfs_share_name }}"
40 |   when: "'rw' not in sharenfs_status.stdout"
41 | 
42 | - name: Ensure NFS is installed.
43 |   ansible.builtin.apt:
44 |     name: nfs-kernel-server
45 |     state: present
46 | 
47 | - name: Ensure NFS is started and enabled at boot.
48 |   ansible.builtin.service:
49 |     name: nfs-server
50 |     state: started
51 |     enabled: true
52 | 


--------------------------------------------------------------------------------
/templates/drupal.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | kind: ConfigMap
  3 | apiVersion: v1
  4 | metadata:
  5 |   name: drupal-config
  6 |   namespace: drupal
  7 | data:
  8 |   settings.php: |-
  9 |     <?php
 10 |     $databases['default']['default'] = [
 11 |       'database' => 'drupal',
 12 |       'username' => 'drupal',
 13 |       'password' => '{{ drupal_database_password }}',
 14 |       'prefix' => '',
 15 |       'host' => 'mariadb',
 16 |       'port' => '3306',
 17 |       'namespace' => 'Drupal\\Core\\Database\\Driver\\mysql',
 18 |       'driver' => 'mysql',
 19 |     ];
 20 |     $settings['hash_salt'] = '{{ drupal_hash_salt }}';
 21 |     $settings['trusted_host_patterns'] = ['{{ drupal_trusted_host_patterns }}'];
 22 |     $settings['config_sync_directory'] = '{{ drupal_config_sync_directory }}';
 23 |     {{ drupal_extra_settings_php }}
 24 | 
 25 | ---
 26 | kind: PersistentVolumeClaim
 27 | apiVersion: v1
 28 | metadata:
 29 |   name: drupal-files-pvc
 30 |   namespace: drupal
 31 | spec:
 32 |   storageClassName: nfs-client
 33 |   accessModes:
 34 |     - ReadWriteMany
 35 |   resources:
 36 |     requests:
 37 |       storage: 20Gi
 38 | 
 39 | ---
 40 | kind: Deployment
 41 | apiVersion: apps/v1
 42 | metadata:
 43 |   name: drupal
 44 |   namespace: drupal
 45 | spec:
 46 |   replicas: 1
 47 |   selector:
 48 |     matchLabels:
 49 |       app: drupal
 50 |   template:
 51 |     metadata:
 52 |       labels:
 53 |         app: drupal
 54 |     spec:
 55 |       containers:
 56 |         - name: drupal
 57 |           image: '{{ drupal_image }}'
 58 |           ports:
 59 |             - containerPort: 80
 60 |           livenessProbe:
 61 |             tcpSocket:
 62 |               port: 80
 63 |             initialDelaySeconds: 60
 64 |           readinessProbe:
 65 |             tcpSocket:
 66 |               port: 80
 67 |             initialDelaySeconds: 30
 68 |           volumeMounts:
 69 |             - mountPath: '{{ drupal_base_web_path }}'
 70 |               name: drupal-settings
 71 |             - mountPath: '{{ drupal_base_web_path }}files/'
 72 |               name: drupal-files
 73 |           resources:
 74 |             limits:
 75 |               cpu: '2'
 76 |               memory: '2048Mi'
 77 |             requests:
 78 |               cpu: '1'
 79 |               memory: '1024Mi'
 80 |       volumes:
 81 |         - name: drupal-settings
 82 |           configMap:
 83 |             name: drupal-config
 84 |         - name: drupal-files
 85 |           persistentVolumeClaim:
 86 |             claimName: drupal-files-pvc
 87 | 
 88 | ---
 89 | kind: Service
 90 | apiVersion: v1
 91 | metadata:
 92 |   name: drupal
 93 |   namespace: drupal
 94 | spec:
 95 |   ports:
 96 |     - port: 80
 97 |       protocol: TCP
 98 |   selector:
 99 |     app: drupal
100 | 
101 | ---
102 | apiVersion: networking.k8s.io/v1
103 | kind: Ingress
104 | metadata:
105 |   name: drupal
106 |   namespace: drupal
107 |   annotations:
108 |     kubernetes.io/ingress.class: "traefik"
109 | spec:
110 |   rules:
111 |     - http:
112 |         paths:
113 |           - path: /
114 |             pathType: Prefix
115 |             backend:
116 |               service:
117 |                 name: drupal
118 |                 port:
119 |                   number: 80
120 | 


--------------------------------------------------------------------------------
/templates/exports.j2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geerlingguy/pi-cluster/6e443778bc16d489c6f8049ae5e1e2624e939289/templates/exports.j2


--------------------------------------------------------------------------------
/templates/mariadb.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | kind: PersistentVolumeClaim
 3 | apiVersion: v1
 4 | metadata:
 5 |   name: mariadb-pvc
 6 |   namespace: drupal
 7 | spec:
 8 |   storageClassName: nfs-client
 9 |   accessModes:
10 |     - ReadWriteMany
11 |   resources:
12 |     requests:
13 |       storage: 20Gi
14 | 
15 | ---
16 | kind: Deployment
17 | apiVersion: apps/v1
18 | metadata:
19 |   name: mariadb
20 |   namespace: drupal
21 | spec:
22 |   replicas: 1
23 |   selector:
24 |     matchLabels:
25 |       app: mariadb
26 |   template:
27 |     metadata:
28 |       labels:
29 |         app: mariadb
30 |     spec:
31 |       containers:
32 |         - name: mariadb
33 |           image: mariadb:10.6
34 |           ports:
35 |             - containerPort: 3306
36 |           env:
37 |             - name: MARIADB_DATABASE
38 |               value: drupal
39 |             - name: MARIADB_USER
40 |               value: drupal
41 |             - name: MARIADB_PASSWORD
42 |               value: '{{ drupal_database_password }}'
43 |             - name: MARIADB_RANDOM_ROOT_PASSWORD
44 |               value: 'yes'
45 |           volumeMounts:
46 |             - mountPath: /var/lib/mysql
47 |               name: database
48 |           resources:
49 |             limits:
50 |               cpu: '2'
51 |               memory: '4096Mi'
52 |             requests:
53 |               cpu: '1'
54 |               memory: '2048Mi'
55 |       volumes:
56 |         - name: database
57 |           persistentVolumeClaim:
58 |             claimName: mariadb-pvc
59 | 
60 | ---
61 | kind: Service
62 | apiVersion: v1
63 | metadata:
64 |   name: mariadb
65 |   namespace: drupal
66 | spec:
67 |   ports:
68 |     - port: 3306
69 |       targetPort: 3306
70 |   selector:
71 |     app: mariadb
72 | 


--------------------------------------------------------------------------------
/upgrade.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Upgrade everything on the cluster.
 3 |   hosts: cluster
 4 |   become: true
 5 | 
 6 |   tasks:
 7 |     - name: Upgrade all software.
 8 |       ansible.builtin.apt:
 9 |         update_cache: true
10 |         upgrade: dist
11 | 
12 |     - name: Check if a reboot is required.
13 |       stat:
14 |         path: /var/run/reboot-required
15 |         get_checksum: false
16 |       register: reboot_required_file
17 | 
18 |     - name: Reboot the server (if required).
19 |       reboot:
20 |       when: reboot_required_file.stat.exists == true
21 | 
22 |     - name: Remove dependencies that are no longer required.
23 |       apt:
24 |         autoremove: true
25 | 


--------------------------------------------------------------------------------