├── .github ├── FUNDING.yml └── workflows │ ├── ci.yml │ └── stale.yml ├── .gitignore ├── .yamllint ├── LICENSE ├── README.md ├── ansible.cfg ├── benchmarks ├── README.md ├── disk-benchmark.sh ├── drupal-benchmark.sh └── stress.yml ├── ceph ├── README.md └── main.yml ├── example.config.yml ├── example.hosts.ini ├── images ├── deskpi-super6c-running.jpg └── turing-pi-2-hero.jpg ├── main.yml ├── networking.yml ├── requirements.yml ├── tasks ├── kubernetes │ ├── drupal.yml │ ├── helm.yml │ ├── nfs.yml │ └── prometheus.yml ├── networking │ ├── reverse-tunnel.yml │ ├── router.yml │ ├── static-networking.yml │ └── ubuntu-prep.yml └── storage │ ├── filesystem.yml │ └── zfs.yml ├── templates ├── drupal.yml ├── exports.j2 └── mariadb.yml └── upgrade.yml /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | --- 3 | github: geerlingguy 4 | patreon: geerlingguy 5 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: CI 3 | 'on': 4 | pull_request: 5 | push: 6 | branches: 7 | - master 8 | 9 | jobs: 10 | 11 | lint: 12 | name: Lint 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: Check out the codebase. 17 | uses: actions/checkout@v2 18 | 19 | - name: Set up Python 3. 20 | uses: actions/setup-python@v2 21 | with: 22 | python-version: '3.x' 23 | 24 | - name: Install test dependencies. 25 | run: pip3 install yamllint 26 | 27 | - name: Lint all the YAMLs. 28 | run: yamllint . 29 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Close inactive issues 3 | 'on': 4 | schedule: 5 | - cron: "55 15 * * 4" # semi-random time 6 | 7 | jobs: 8 | close-issues: 9 | runs-on: ubuntu-latest 10 | permissions: 11 | issues: write 12 | pull-requests: write 13 | steps: 14 | - uses: actions/stale@v8 15 | with: 16 | days-before-stale: 120 17 | days-before-close: 60 18 | exempt-issue-labels: bug,pinned,security,planned 19 | exempt-pr-labels: bug,pinned,security,planned 20 | stale-issue-label: "stale" 21 | stale-pr-label: "stale" 22 | stale-issue-message: | 23 | This issue has been marked 'stale' due to lack of recent activity. If there is no further activity, the issue will be closed in another 30 days. Thank you for your contribution! 24 | 25 | Please read [this blog post](https://www.jeffgeerling.com/blog/2020/enabling-stale-issue-bot-on-my-github-repositories) to see the reasons why I mark issues as stale. 26 | close-issue-message: | 27 | This issue has been closed due to inactivity. If you feel this is in error, please reopen the issue or file a new issue with the relevant details. 28 | stale-pr-message: | 29 | This pr has been marked 'stale' due to lack of recent activity. If there is no further activity, the issue will be closed in another 30 days. Thank you for your contribution! 30 | 31 | Please read [this blog post](https://www.jeffgeerling.com/blog/2020/enabling-stale-issue-bot-on-my-github-repositories) to see the reasons why I mark issues as stale. 32 | close-pr-message: | 33 | This pr has been closed due to inactivity. If you feel this is in error, please reopen the issue or file a new issue with the relevant details. 34 | repo-token: ${{ secrets.GITHUB_TOKEN }} 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | hosts.ini 2 | hosts-* 3 | config.yml 4 | config-* 5 | ansible_collections 6 | roles/geerlingguy.* 7 | -------------------------------------------------------------------------------- /.yamllint: -------------------------------------------------------------------------------- 1 | --- 2 | extends: default 3 | rules: 4 | line-length: 5 | max: 140 6 | level: warning 7 | truthy: false 8 | 9 | ignore: | 10 | **/.github/workflows/ci.yml 11 | **/stale.yml 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Jeff Geerling 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Raspberry Pi Cluster 2 | 3 | [![CI](https://github.com/geerlingguy/pi-cluster/actions/workflows/ci.yml/badge.svg)](https://github.com/geerlingguy/pi-cluster/actions/workflows/ci.yml) 4 | 5 |

Turing Pi 2 - Raspberry Pi Compute Module Cluster

6 | 7 | This repository contains examples and automation used in various Raspberry Pi clustering scenarios, as seen on [Jeff Geerling's YouTube channel](https://www.youtube.com/c/JeffGeerling). 8 | 9 |

DeskPi Super6c Mini ITX Raspberry Pi Compute Module Cluster

10 | 11 | The inspiration for this project was my first Pi cluster, the [Raspberry Pi Dramble](https://www.pidramble.com), which is still running in my basement to this day! 12 | 13 | ## Usage 14 | 15 | 1. Make sure you have [Ansible](https://docs.ansible.com/ansible/latest/installation_guide/intro_installation.html) installed. 16 | 2. Copy the `example.hosts.ini` inventory file to `hosts.ini`. Make sure it has the `control_plane` and `node`s configured correctly (for my examples I named my nodes `node[1-4].local`). 17 | 3. Copy the `example.config.yml` file to `config.yml`, and modify the variables to your liking. 18 | 19 | ### Raspberry Pi Setup 20 | 21 | I am running Raspberry Pi OS on various Pi clusters. You can run this on any Pi cluster, but I tend to use Compute Modules without eMMC ('Lite' versions) and I often run them using [32 GB SanDisk Extreme microSD cards](https://amzn.to/3G35QbY) to boot each node. For some setups (like when I run the [Compute Blade](https://computeblade.com) or [DeskPi Super6c](https://deskpi.com/collections/deskpi-super6c), I boot off NVMe SSDs instead. 22 | 23 | In every case, I flashed Raspberry Pi OS (64-bit, lite) to the storage devices using Raspberry Pi Imager. 24 | 25 | To make network discovery and integration easier, I edit the advanced configuration in Imager, and set the following options: 26 | 27 | - Set hostname: `node1.local` (set to `2` for node 2, `3` for node 3, etc.) 28 | - Enable SSH: 'Allow public-key', and paste in my public SSH key(s) 29 | - Configure wifi: (ONLY on node 1, if desired) enter SSID and password for local WiFi network 30 | 31 | After setting all those options, making sure only node 1 has WiFi configured, and the hostname is unique to each node (and matches what is in `hosts.ini`), I inserted the microSD cards into the respective Pis, or installed the NVMe SSDs into the correct slots, and booted the cluster. 32 | 33 | ### SSH connection test 34 | 35 | To test the SSH connection from my Ansible controller (my main workstation, where I'm running all the playbooks), I connected to each server individually, and accepted the hostkey: 36 | 37 | ``` 38 | ssh pi@node1.local 39 | ``` 40 | 41 | This ensures Ansible will also be able to connect via SSH in the following steps. You can test Ansible's connection with: 42 | 43 | ``` 44 | ansible all -m ping 45 | ``` 46 | 47 | It should respond with a 'SUCCESS' message for each node. 48 | 49 | ### Storage Configuration 50 | 51 | This playbook will create a storage location on node 3 by default. You can use one of the storage configurations by switching the `storage_type` variable from `filesystem` to `zfs` in your `config.yml` file. 52 | 53 | #### Filesystem Storage 54 | 55 | If using filesystem (`storage_type: filesystem`), make sure to use the appropriate `storage_nfs_dir` variable in `config.yml`. 56 | 57 | #### ZFS Storage 58 | 59 | If using ZFS (`storage_type: zfs`, you should have two volumes available on node 3, `/dev/sda`, and `/dev/sdb`, able to be pooled into a mirror. Make sure your two SATA drives are wiped: 60 | 61 | ``` 62 | pi@node3:~ $ sudo wipefs --all --force /dev/sda?; sudo wipefs --all --force /dev/sda 63 | pi@node3:~ $ sudo wipefs --all --force /dev/sdb?; sudo wipefs --all --force /dev/sdb 64 | ``` 65 | 66 | If you run `lsblk`, you should see `sda` and `sdb` have no partitions, and are ready to use: 67 | 68 | ``` 69 | pi@node3:~ $ lsblk 70 | NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT 71 | sda 8:0 0 1.8T 0 disk 72 | sdb 8:16 0 1.8T 0 disk 73 | ``` 74 | 75 | You should also make sure the `storage_nfs_dir` variable is set appropriately for ZFS in your `config.yml`. 76 | 77 | This ZFS layout was configured originally for the Turing Pi 2 board, which has two built-in SATA ports connected directly to node 3. In the future, the configuration may be genericized a bit better. 78 | 79 | #### Ceph Storage Configuration 80 | 81 | You could also run Ceph on a Pi cluster—see the storage configuration playbook inside the `ceph` directory. 82 | 83 | This configuration is not yet integrated into the general K3s setup. 84 | 85 | ### Cluster configuration and K3s installation 86 | 87 | First, make sure Ansible requirements are installed: 88 | 89 | ``` 90 | ansible-galaxy install -r requirements.yml --force 91 | ``` 92 | 93 | Configure static networking, if your cluster nodes don't already have static IP addresses—see later section in this README. 94 | 95 | Then, run the playbook: 96 | 97 | ``` 98 | ansible-playbook main.yml 99 | ``` 100 | 101 | At the end of the playbook, there should be an instance of Drupal running on the cluster. If you log into node 1, you should be able to access it with `curl localhost`. 102 | 103 | > If the playbook stalls while installing K3s, [you might need to configure static IP addresses](https://github.com/geerlingguy/pi-cluster/issues/11#issuecomment-1983874999) for the nodes, especially if using mDNS (like with `.local` names for the nodes). Follow the guide in "Static network configuration" then run the `main.yml` playbook again afterwards, and it should get things in order. 104 | 105 | If you have SSH tunnelling configured (see later section), you could access `http://[your-vps-ip-or-hostname]:8080/` and you'd see the site. 106 | 107 | You can also log into node 1, switch to the root user account (`sudo su`), then use `kubectl` to manage the cluster (e.g. view Drupal pods with `kubectl get pods -n drupal`). 108 | 109 | The Kubernetes Ingress object for Drupal (how HTTP requests from outside the cluster make it to Drupal) can be found by running `kubectl get ingress -n drupal`. Take the IP address or hostname there and enter it in your browser on a computer on the same network, and voila! You should see Drupal's installer. 110 | 111 | K3s' `kubeconfig` file is located at `/etc/rancher/k3s/k3s.yaml`. If you'd like to manage the cluster from other hosts (or using a tool like Lens), copy the contents of that file, replacing `localhost` with the IP address or hostname of the control plane node, and paste the contents into a file `~/.kube/config`. 112 | 113 | Alternatively, if you'd like to use [k9s](https://k9scli.io) on the main Pi itself, symlink the rancher kubeconfig file into a location where k9s expects to see it: 114 | 115 | ``` 116 | # (perform all commands as root user) 117 | # Download and install K9s 118 | wget https://github.com/derailed/k9s/releases/latest/download/k9s_linux_arm64.deb && apt install ./k9s_linux_arm64.deb && rm k9s_linux_arm64.deb 119 | 120 | # Symlink K3s kubeconfig into root user's home directory 121 | ln -s /etc/rancher/k3s/k3s.yaml ~/.kube/config 122 | 123 | # Launch k9s 124 | k9s 125 | ``` 126 | 127 | ### Upgrading the cluster 128 | 129 | Run the upgrade playbook: 130 | 131 | ``` 132 | ansible-playbook upgrade.yml 133 | ``` 134 | 135 | ### Monitoring the cluster 136 | 137 | Prometheus and Grafana are used for monitoring. Grafana can be accessed via port forwarding (or you could choose to expose it another way). 138 | 139 | To access Grafana: 140 | 141 | 1. Make sure you set up a valid `~/.kube/config` file (see 'K3s installation' above). 142 | 1. Run `kubectl port-forward service/cluster-monitoring-grafana :80` 143 | 1. Grab the port that's output, and browse to `localhost:[port]`, and bingo! Grafana. 144 | 145 | The default login is `admin` / `prom-operator`, but you can also get the secret with `kubectl get secret cluster-monitoring-grafana -o jsonpath="{.data.admin-password}" | base64 -D`. 146 | 147 | You can then browse to all the Kubernetes and Pi-related dashboards by browsing the Dashboards in the 'General' folder. 148 | 149 | ### Benchmarking the cluster 150 | 151 | See the README file within the `benchmarks` folder. 152 | 153 | ### Shutting down the cluster 154 | 155 | The safest way to shut down the cluster is to run the following command: 156 | 157 | ``` 158 | ansible all -m community.general.shutdown -b 159 | ``` 160 | 161 | > Note: If using the SSH tunnel, you might want to run the command _first_ on nodes 2-4, _then_ on node 1. So first run `ansible 'all:!control_plane' [...]`, then run it again just for `control_plane`. 162 | 163 | Then after you confirm the nodes are shut down (with K3s running, it can take a few minutes), press the cluster's power button (or yank the Ethernet cables if using PoE) to power down all Pis physically. Then you can switch off or disconnect your power supply. 164 | 165 | ### Static network configuration (highly recommended) 166 | 167 | Kubernetes generally likes static network routes, especially when using DNS to connect to other nodes in a cluster. 168 | 169 | There is a playbook which configures static networking so your nodes maintain the same IP address after a reboot, even under different networking scenarios. 170 | 171 | If using your cluster both on-premise and remote (e.g. using 4G LTE connected to the first Pi), you can set it up on its _own_ subnet (e.g. `10.1.1.x`). Otherwise, you can set it to the same subnet as your local network. 172 | 173 | Configure the subnet via the `ipv4_subnet_prefix` variable in `config.yml`, then run the playbook: 174 | 175 | ``` 176 | ansible-playbook networking.yml 177 | ``` 178 | 179 | After running the playbook, until a reboot, the Pis will still be accessible over their former DHCP-assigned IP address. After rebooting, the nodes will be accessible on their new IP addresses. 180 | 181 | You can reboot all the nodes with: 182 | 183 | ``` 184 | ansible all -m reboot -b 185 | ``` 186 | 187 | > If you are running Ubuntu, and you get an error like `"Failed to find required executable "nmcli"`, run the `ubuntu-setup.yml` playbook: `ansible-playbook tasks/networking/ubuntu-prep.yml` 188 | 189 | #### If using a different subnet 190 | 191 | If you chose a different subnet than your LAN, make sure your workstation is connected to an interface on the same subnet as the cluster (e.g. `10.1.1.x`). 192 | 193 | After the networking changes are made, since this playbook uses DNS names (e.g. `node1.local`) instead of IP addresses, your computer will still be able to connect to the nodes directly—assuming your network has IPv6 support. Pinging the nodes on their new IP addresses will _not_ work, however. For better network compatibility, it's recommended you set up a separate network interface on the Ansible controller that's on the same subnet as the Pis in the cluster: 194 | 195 | On my Mac, I connected a second network interface and manually configured its IP address as `10.1.1.10`, with subnet mask `255.255.255.0`, and that way I could still access all the nodes via IP address or their hostnames (e.g. `node2.local`). 196 | 197 | Because the cluster subnet needs its own router, node 1 is configured as a router, using `wlan0` as the primary interface for Internet traffic by default. The other nodes get their Internet access through node 1. 198 | 199 | #### Switch between 4G LTE and WiFi (optional) 200 | 201 | The network configuration defaults to an `active_internet_interface` of `wlan0`, meaning node 1 will route all Internet traffic for the cluster through it's WiFi interface. 202 | 203 | Assuming you have a [working 4G card in slot 1](https://www.jeffgeerling.com/blog/2022/using-4g-lte-wireless-modems-on-raspberry-pi), you can switch node 1 to route through an alternate interface (e.g. `usb0`): 204 | 205 | 1. Set `active_internet_interface: "usb0"` in your `config.yml` 206 | 2. Run the networking playbook again: `ansible-playbook networking.yml` 207 | 208 | You can switch back and forth between interfaces using the steps above. 209 | 210 | #### Reverse SSH and HTTP tunnel configuration (optional) 211 | 212 | For my own experimentation, I ran my Pi cluster 'off-grid', using a 4G LTE modem, as mentioned above. 213 | 214 | Because my mobile network provider uses CG-NAT, there is no way to remotely access the cluster, or serve web traffic to the public internet from it, at least not out of the box. 215 | 216 | I am using a reverse SSH tunnel to enable direct remote SSH and HTTP access. To set that up, I configured a VPS I run to use TCP Forwarding (see [this blog post for details](https://www.jeffgeerling.com/blog/2022/ssh-and-http-raspberry-pi-behind-cg-nat)), and I configured an SSH key so node 1 could connect to my VPS (e.g. `ssh my-vps-username@my-vps-hostname-or-ip`). 217 | 218 | Then I set the `reverse_tunnel_enable` variable to `true` in my `config.yml`, and configured the VPS username and hostname options. 219 | 220 | Doing that and running the `main.yml` playbook configures `autossh` on node 1, and will try to get a connection through to the VPS on ports 2222 (to node 1's port 22) and 8080 (to node 1's port 80). 221 | 222 | After that's done, you should be able to log into the cluster _through_ your VPS with a command like: 223 | 224 | ``` 225 | $ ssh -p 2222 pi@[my-vps-hostname] 226 | ``` 227 | 228 | > Note: If autossh isn't working, it could be that it didn't exit cleanly, and a tunnel is still reserving the port on the remote VPS. That's often the case if you run `sudo systemctl status autossh` and see messages like `Warning: remote port forwarding failed for listen port 2222`. 229 | > 230 | > In that case, log into the remote VPS and run `pgrep ssh | xargs kill` to kill off all active SSH sessions, then `autossh` should pick back up again. 231 | 232 | > **Warning**: Use this feature at your own risk. Security is your own responsibility, and for better protection, you should probably avoid directly exposing your cluster (e.g. by disabling the `GatewayPorts` option) so you can only access the cluster while already logged into your VPS). 233 | 234 | ## Caveats 235 | 236 | These playbooks are used in both production and test clusters, but security is _always_ your responsibility. If you want to use any of this configuration in production, take ownership of it and understand how it works so you don't wake up to a hacked Pi cluster one day! 237 | 238 | ## Author 239 | 240 | The repository was created in 2023 by [Jeff Geerling](https://www.jeffgeerling.com), author of [Ansible for DevOps](https://www.ansiblefordevops.com), [Ansible for Kubernetes](https://www.ansibleforkubernetes.com), and [Kubernetes 101](https://www.kubernetes101book.com). 241 | -------------------------------------------------------------------------------- /ansible.cfg: -------------------------------------------------------------------------------- 1 | [defaults] 2 | nocows = true 3 | inventory = hosts.ini 4 | roles_path = roles 5 | collections_path = ./ 6 | interpreter_python = /usr/bin/python3 7 | -------------------------------------------------------------------------------- /benchmarks/README.md: -------------------------------------------------------------------------------- 1 | # Pi Cluster Benchmarks 2 | 3 | I test a variety of use-cases using my Pi clusters. 4 | 5 | This folder contains some playbooks and guides for different types of benchmarking I do. 6 | 7 | More benchmarks will be added over time. 8 | 9 | ## Top500 High Performance Linpack (HPL) 10 | 11 | I like to run the HPL benchmark on my clusters to see where they fall in the historic [Top500 supercomputing list](https://top500.org). 12 | 13 | My automated Top500 HPL benchmark code is located in a separate repository: [Top500 Benchmark - HPL Linpack](https://github.com/geerlingguy/top500-benchmark). 14 | 15 | ## `disk-benchmark.sh` 16 | 17 | The `disk-benchmark` script is what I use to test various storage media with the Raspberry Pi. 18 | 19 | As a rule of thumb, NVMe devices will max out the Pi's PCIe bus (around 400 MB/sec), while microSD and eMMC storage on the Pi tops out under 100 MB/sec, at least as of the Pi 4 generation. 20 | 21 | See the `disk-benchmark.sh` comments for usage examples. 22 | 23 | ## `drupal-benchmark.sh` 24 | 25 | The `drupal-benchmark` script runs two types of load tests on the Drupal instance running on the cluster: 26 | 27 | - `wrk` anonymous load test: Tests the performance of completely cacheable page loads as an anonymous user. 28 | - `ab` authenticated load test: Tests the performance of partially-cacheable page loads as an authenticated user. 29 | 30 | Drupal 10 and later have fairly robust caching in place to make both of these scenarios fairly fast even on a single modern SBC. But it is useful as an end-to-end performance test, from ingress and cluster networking all the way down to Drupal's separate database and persistent volume storage performance. 31 | 32 | See the `drupal-benchmark.sh` comments for usage examples. 33 | 34 | ## `stress-ng` 35 | 36 | The `stress.yml` playbook hammers all CPU cores on all nodes simultaneously. This can be useful to measure the maximum power draw under CPU load, and to test whether the Pis in the cluster are getting enough power to run stably (especially when overclocked). 37 | 38 | To run it, run the following command within the main `pi-cluster` directory (up one level): 39 | 40 | ``` 41 | ansible-playbook benchmarks/stress.yml 42 | ``` 43 | 44 | Run it with a longer `stress_time` if you really want to test thermals and make sure your cluster doesn't overheat. 45 | -------------------------------------------------------------------------------- /benchmarks/disk-benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # Disk benchmark script. 5 | # 6 | # A script I use to automate the running and reporting of benchmarks I compile 7 | # for my YouTube channel. 8 | # 9 | # Usage: 10 | # # Run it locally (overriding mount path and test size). 11 | # $ sudo MOUNT_PATH=/mnt/sda1 TEST_SIZE=1g ./disk-benchmark.sh 12 | # 13 | # # Run it straight from GitHub (with default options). 14 | # $ curl https://raw.githubusercontent.com/geerlingguy/pi-cluster/master/benchmarks/disk-benchmark.sh | sudo bash 15 | # 16 | # Author: Jeff Geerling, 2024 17 | 18 | printf "\n" 19 | printf "Disk benchmarks\n" 20 | 21 | # Fail if $SUDO_USER is empty. 22 | if [ -z "$SUDO_USER" ]; then 23 | printf "This script must be run with sudo.\n" 24 | exit 1; 25 | fi 26 | 27 | # Variables. 28 | MOUNT_PATH=${MOUNT_PATH:-"/"} 29 | USER_HOME_PATH=$(getent passwd $SUDO_USER | cut -d: -f6) 30 | TEST_SIZE=${TEST_SIZE:-"1g"} 31 | IOZONE_INSTALL_PATH=$USER_HOME_PATH 32 | IOZONE_VERSION=iozone3_506 33 | 34 | cd $IOZONE_INSTALL_PATH 35 | 36 | # Install dependencies. 37 | if [ ! `which curl` ]; then 38 | printf "Installing curl...\n" 39 | apt-get install -y curl 40 | printf "Install complete!\n\n" 41 | fi 42 | if [ ! `which make` ]; then 43 | printf "Installing build tools...\n" 44 | apt-get install -y build-essential 45 | printf "Install complete!\n\n" 46 | fi 47 | 48 | # Download and build iozone. 49 | if [ ! -f $IOZONE_INSTALL_PATH/$IOZONE_VERSION/src/current/iozone ]; then 50 | printf "Installing iozone...\n" 51 | curl "http://www.iozone.org/src/current/$IOZONE_VERSION.tar" | tar -x 52 | cd $IOZONE_VERSION/src/current 53 | case $(uname -m) in 54 | arm64|aarch64) 55 | make --quiet linux-arm 56 | ;; 57 | *) 58 | make --quiet linux-AMD64 59 | esac 60 | printf "Install complete!\n\n" 61 | else 62 | cd $IOZONE_VERSION/src/current 63 | fi 64 | 65 | printf "Running iozone 4K / 1024K read and write tests...\n" 66 | iozone_result=$(./iozone -e -I -a -s $TEST_SIZE -r 4k -r 1024k -i 0 -i 1 -i 2 -f $MOUNT_PATH/iozone | cut -c7-100 | tail -n6 | head -n4) 67 | echo -e "$iozone_result" 68 | printf "\n" 69 | 70 | random_read_4k=$(echo -e "$iozone_result" | awk 'FNR == 3 {printf "%.2f", $7/(1024)}') 71 | random_write_4k=$(echo -e "$iozone_result" | awk 'FNR == 3 {printf "%.2f", $8/(1024)}') 72 | random_read_1024k=$(echo -e "$iozone_result" | awk 'FNR == 4 {printf "%.2f", $7/(1024)}') 73 | random_write_1024k=$(echo -e "$iozone_result" | awk 'FNR == 4 {printf "%.2f", $8/(1024)}') 74 | sequential_read_1024k=$(echo -e "$iozone_result" | awk 'FNR == 4 {printf "%.2f", $6/(1024)}') 75 | sequential_write_1024k=$(echo -e "$iozone_result" | awk 'FNR == 4 {printf "%.2f", $4/(1024)}') 76 | cat << EOF 77 | # --- Copy and paste the result below --- 78 | 79 | | Benchmark | Result | 80 | | -------------------------- | ------ | 81 | | iozone 4K random read | $random_read_4k MB/s | 82 | | iozone 4K random write | $random_write_4k MB/s | 83 | | iozone 1M random read | $random_read_1024k MB/s | 84 | | iozone 1M random write | $random_write_1024k MB/s | 85 | | iozone 1M sequential read | $sequential_read_1024k MB/s | 86 | | iozone 1M sequential write | $sequential_write_1024k MB/s | 87 | 88 | # --- End result --- 89 | EOF 90 | printf "\n" 91 | 92 | printf "Disk benchmark complete!\n\n" 93 | -------------------------------------------------------------------------------- /benchmarks/drupal-benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # wrk and ab load test script 5 | # 6 | # Usage: 7 | # # Make sure wrk and ab are installed. 8 | # $ brew install wrk 9 | # 10 | # # Grab the authenticated session cookie from a logged-in window: 11 | # 1. Open browser's dev console. 12 | # 2. Navigate to storage/cookies. 13 | # 3. Right-click on the 'SESSxxxx' cookie and copy it. 14 | # 4. Paste the cookie in `AUTHENTICATED_SESSION_COOKIE` before running script. 15 | # 16 | # # Run the load tests. 17 | # $ ./drupal-benchmark.sh 18 | # 19 | # Author: Jeff Geerling, 2023 20 | 21 | printf "\n" 22 | printf "Drupal benchmarks.\n" 23 | 24 | # Variables. Best to use IP address to prevent `ab` errors. 25 | DRUPAL_URL="http://10.0.2.61/" 26 | AUTHENTICATED_SESSION_COOKIE="SESS3747f176b3220dbe6938dbbc37681fd0=VsCYFTA3-5A16oYGR%2Cer%2C7-wm53P3wLnN8ZKIlVmnyHqfR2D" 27 | # Install dependencies. 28 | if [ ! `which ab` ]; then 29 | printf "Please install apachebench (ab) and try again.\n\n" 30 | fi 31 | if [ ! `which wrk` ]; then 32 | printf "Please install wrk (wrk) and try again.\n\n" 33 | fi 34 | 35 | # Run benchmarks. 36 | printf "Running wrk anonymous page load benchmark...\n" 37 | curl -s -o /dev/null $DRUPAL_URL # Load once to fill caches. 38 | sleep 2 39 | wrk -t4 -c100 -d30 --timeout 10s $DRUPAL_URL 40 | printf "\n" 41 | 42 | printf "Running ab authenticated page load benchmark...\n" 43 | ab -n 1 -c 1 -C "SESSxyz=XYZ" $DRUPAL_URL >/dev/null # Load once to fill caches. 44 | sleep 2 45 | ab -n 700 -c 10 -C "$AUTHENTICATED_SESSION_COOKIE" $DRUPAL_URL 46 | printf "\n" 47 | 48 | printf "Drupal benchmark complete!\n\n" 49 | -------------------------------------------------------------------------------- /benchmarks/stress.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Stress the CPUs for temperature and power testing. 3 | hosts: all 4 | gather_facts: true 5 | become: true 6 | 7 | vars: 8 | stress_time: 5m 9 | 10 | vars_files: 11 | - ../config.yml 12 | 13 | tasks: 14 | - name: Ensure stress-ng is installed. 15 | ansible.builtin.package: 16 | name: stress-ng 17 | state: present 18 | 19 | - name: Run stress-ng. 20 | ansible.builtin.command: >- 21 | stress-ng -c {{ ansible_processor_vcpus }} -t {{ stress_time }} 22 | -------------------------------------------------------------------------------- /ceph/README.md: -------------------------------------------------------------------------------- 1 | # Ceph Storage Cluster Setup 2 | 3 | This directory contains a playbook that configures Ceph storage on a Pi cluster. I initially set this up as part of my [6-node DeskPi Super6c video](https://www.youtube.com/watch?v=UT5UbSJOyog). 4 | 5 | ### Cluster configuration 6 | 7 | Run the playbook: 8 | 9 | ``` 10 | ansible-playbook main.yml 11 | ``` 12 | 13 | TODO. 14 | 15 | ### Upgrading the cluster 16 | 17 | Run the upgrade playbook: 18 | 19 | ``` 20 | ansible-playbook upgrade.yml 21 | ``` 22 | 23 | ### Monitoring the cluster 24 | 25 | TODO. 26 | -------------------------------------------------------------------------------- /ceph/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Set up cluster-wide configuration. 3 | hosts: cluster 4 | gather_facts: false 5 | become: true 6 | 7 | handlers: 8 | - name: reboot-pi 9 | reboot: 10 | 11 | vars_files: 12 | - config.yml 13 | 14 | tasks: [] 15 | 16 | 17 | - name: Configure the control plane (node 1). 18 | hosts: control_plane 19 | gather_facts: false 20 | become: true 21 | 22 | vars_files: 23 | - config.yml 24 | 25 | tasks: 26 | # See: https://forums.raspberrypi.com/viewtopic.php?t=274486 27 | - name: Set up the Debian unstable repo (TODO). 28 | meta: noop 29 | 30 | # See: https://ceph.com/en/news/blog/2022/install-ceph-in-a-raspberrypi-4-cluster/ 31 | - name: Install cephadm (TODO). 32 | meta: noop 33 | 34 | - name: Create the ceph cluster (TODO). 35 | meta: noop 36 | 37 | - name: Retrieve the ceph pubkey. 38 | ansible.builtin.fetch: 39 | src: /etc/ceph/ceph.pub 40 | dest: files/ceph.pub 41 | flat: yes 42 | 43 | - name: Ensure NFS dependencies are installed. 44 | ansible.builtin.package: 45 | name: 46 | - libcephfs2 47 | - nfs-ganesha 48 | - nfs-ganesha-ceph 49 | state: present 50 | 51 | - name: Configure the nodes (nodes 2-6). 52 | hosts: nodes 53 | gather_facts: false 54 | become: true 55 | 56 | vars_files: 57 | - config.yml 58 | 59 | tasks: 60 | - name: Ensure Ceph dependencies are installed. 61 | ansible.builtin.package: 62 | name: 63 | - podman 64 | - lvm2 65 | state: present 66 | 67 | - name: Copy the ceph pubkey to each node. 68 | ansible.posix.authorized_key: 69 | user: root 70 | state: present 71 | key: "{{ lookup('file', 'files/ceph.pub') }}" 72 | -------------------------------------------------------------------------------- /example.config.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # Cluster storage options. 3 | storage_type: filesystem # options: 'filesystem', 'zfs' 4 | storage_zfs_pool_name: zfsdata 5 | # storage_nfs_dir: '{{ storage_zfs_pool_name }}' # Use this for 'zfs' 6 | storage_nfs_dir: "srv" # Use this for 'filesystem' 7 | storage_nfs_share_name: nfsshare 8 | 9 | # Drupal installation options. 10 | drupal_image: drupal:10.2-apache 11 | drupal_hash_salt: OTk4MTYzYWI4N2E2MGIxNjlmYmQ2MTA4 12 | drupal_trusted_host_patterns: '^.+$' 13 | drupal_database_password: 'drupal' 14 | drupal_base_web_path: '/var/www/html/sites/default/' 15 | drupal_config_sync_directory: 'sites/default/files/config_OTk4MTYzY' 16 | drupal_extra_settings_php: '' 17 | 18 | # These networking variables are only necessary if using optional static and 19 | # remote networking features in the `tasks/networking` playbooks. 20 | ipv4_subnet_prefix: "10.1.1" 21 | ipv4_gateway: "10.1.1.1" 22 | dns4_servers: "{{ ipv4_gateway }}" 23 | active_internet_interface: "wlan0" 24 | reverse_tunnel_enable: false 25 | reverse_tunnel_vps_username: my-vps-username 26 | reverse_tunnel_vps_hostname: my-vps-hostname 27 | control_plane_router_setup: false 28 | -------------------------------------------------------------------------------- /example.hosts.ini: -------------------------------------------------------------------------------- 1 | # The 'ip_host_octet' is used only when configuring static networking using the 2 | # playbooks inside 'tasks/networking'. 3 | [control_plane] 4 | node1.local ip_host_octet=61 5 | 6 | [nodes] 7 | node2.local ip_host_octet=62 8 | node3.local ip_host_octet=63 9 | node4.local ip_host_octet=64 10 | 11 | # The node to be used for shared cluster storage. 12 | [storage] 13 | node3.local 14 | 15 | [cluster:children] 16 | control_plane 17 | nodes 18 | 19 | [cluster:vars] 20 | ansible_user='pi' 21 | 22 | # Uncomment below when working on cluster through VPS tunnel host. 23 | #[control_plane:vars] 24 | #ansible_port='2222' 25 | #ansible_user='pi' 26 | #ansible_host='my-vps-host-or-ip' 27 | 28 | #[nodes:vars] 29 | #ansible_ssh_common_args='-o ProxyCommand="ssh -p 2222 -W %h:%p -q pi@my-vps-host-or-ip"' 30 | -------------------------------------------------------------------------------- /images/deskpi-super6c-running.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geerlingguy/pi-cluster/6e443778bc16d489c6f8049ae5e1e2624e939289/images/deskpi-super6c-running.jpg -------------------------------------------------------------------------------- /images/turing-pi-2-hero.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geerlingguy/pi-cluster/6e443778bc16d489c6f8049ae5e1e2624e939289/images/turing-pi-2-hero.jpg -------------------------------------------------------------------------------- /main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Set up cluster-wide configuration. 3 | hosts: cluster 4 | gather_facts: true 5 | become: true 6 | 7 | handlers: 8 | - name: reboot-pi 9 | ansible.builtin.reboot: 10 | 11 | vars_files: 12 | - config.yml 13 | 14 | tasks: 15 | - name: Ensure cgroups are configured correctly in cmdline.txt. 16 | ansible.builtin.replace: 17 | path: /boot/firmware/cmdline.txt 18 | regexp: '^([\w](?!.*\b{{ item }}\b).*)$' 19 | replace: '\1 {{ item }}' 20 | with_items: 21 | - "cgroup_memory=1" 22 | - "cgroup_enable=memory" 23 | notify: reboot-pi 24 | when: ansible_distribution == 'Debian' 25 | 26 | - name: Ensure cgroups are configured correctly in ubuntuEnv.txt. 27 | ansible.builtin.replace: 28 | path: /boot/firmware/ubuntuEnv.txt 29 | regexp: '^(bootargs=[\w](?!.*\b{{ item }}\b).*)$' 30 | replace: '\1 {{ item }}' 31 | with_items: 32 | - "cgroup_memory=1" 33 | - "cgroup_enable=memory" 34 | notify: reboot-pi 35 | when: ansible_distribution == 'Ubuntu' 36 | 37 | - name: Download K3s install script. 38 | ansible.builtin.get_url: 39 | url: https://get.k3s.io 40 | dest: "~/k3s_install.sh" 41 | mode: a+x 42 | 43 | - name: Install required dependencies 44 | ansible.builtin.apt: 45 | name: nfs-common 46 | state: present 47 | 48 | 49 | - name: Configure storage node. 50 | hosts: storage 51 | gather_facts: false 52 | become: true 53 | 54 | handlers: 55 | - name: restart nfs 56 | ansible.builtin.service: 57 | name: nfs-server 58 | state: restarted 59 | 60 | vars_files: 61 | - config.yml 62 | 63 | tasks: 64 | - name: Set up storage. 65 | include_tasks: tasks/storage/{{ storage_type }}.yml 66 | 67 | 68 | - name: Configure the control plane. 69 | hosts: control_plane 70 | gather_facts: false 71 | become: true 72 | 73 | vars_files: 74 | - config.yml 75 | 76 | tasks: 77 | - name: Install K3s on control plane (takes a while). 78 | ansible.builtin.shell: >- 79 | ~/k3s_install.sh >> ~/k3s_install_log.txt 80 | args: 81 | chdir: "~" 82 | creates: /var/lib/rancher/k3s/server/node-token 83 | 84 | - name: Get node token. 85 | ansible.builtin.command: cat /var/lib/rancher/k3s/server/node-token 86 | changed_when: false 87 | register: node_token_output 88 | 89 | - name: Set node_token fact. 90 | ansible.builtin.set_fact: 91 | node_token: "{{ node_token_output.stdout_lines[0] }}" 92 | 93 | - name: Ensure required dependencies are installed. 94 | ansible.builtin.package: 95 | name: 96 | - python3-pip 97 | - python3-setuptools 98 | - python3-openshift 99 | - python3-yaml 100 | - build-essential 101 | - golang 102 | - git 103 | state: present 104 | become: true 105 | 106 | - name: Ignore PEP 668 because it's silly. 107 | ansible.builtin.file: 108 | path: /usr/lib/python3.11/EXTERNALLY-MANAGED 109 | state: absent 110 | become: true 111 | 112 | 113 | - name: Configure the worker nodes. 114 | hosts: nodes 115 | gather_facts: false 116 | become: true 117 | 118 | vars_files: 119 | - config.yml 120 | 121 | tasks: 122 | - name: Install K3s on nodes (takes a while). 123 | ansible.builtin.shell: >- 124 | K3S_URL="https://{{ groups['control_plane'][0] }}:6443" 125 | K3S_TOKEN="{{ hostvars[groups['control_plane'][0]]['node_token'] }}" 126 | ~/k3s_install.sh >> ~/k3s_install_log.txt 127 | args: 128 | chdir: "~" 129 | creates: /var/lib/rancher/k3s/agent/kubelet.kubeconfig 130 | 131 | - name: Set up Helm. 132 | import_playbook: tasks/kubernetes/helm.yml 133 | tags: ['helm'] 134 | 135 | - name: Set up NFS PVCs. 136 | import_playbook: tasks/kubernetes/nfs.yml 137 | tags: ['nfs'] 138 | 139 | - name: Set up Prometheus. 140 | import_playbook: tasks/kubernetes/prometheus.yml 141 | tags: ['prometheus'] 142 | 143 | - name: Set up Drupal. 144 | import_playbook: tasks/kubernetes/drupal.yml 145 | tags: ['drupal'] 146 | -------------------------------------------------------------------------------- /networking.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Configure reverse SSH tunnels for SSH and HTTP on control plane. 3 | import_playbook: tasks/networking/reverse-tunnel.yml 4 | when: reverse_tunnel_enable 5 | 6 | - name: Set up static networking configuration. 7 | import_playbook: tasks/networking/static-networking.yml 8 | 9 | - name: Configure control plane as a router. 10 | import_playbook: tasks/networking/router.yml 11 | when: control_plane_router_setup 12 | -------------------------------------------------------------------------------- /requirements.yml: -------------------------------------------------------------------------------- 1 | --- 2 | collections: 3 | - name: community.general 4 | -------------------------------------------------------------------------------- /tasks/kubernetes/drupal.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Configure Drupal. 3 | hosts: control_plane 4 | gather_facts: false 5 | become: true 6 | 7 | vars_files: 8 | - ../../config.yml 9 | 10 | environment: 11 | # The location of the kubeconfig file on the master. 12 | K8S_AUTH_KUBECONFIG: /etc/rancher/k3s/k3s.yaml 13 | PATH: "~/go/bin:{{ ansible_env.PATH }}" 14 | 15 | tasks: 16 | - name: Create drupal namespace. 17 | k8s: 18 | name: drupal 19 | api_version: v1 20 | kind: Namespace 21 | state: present 22 | 23 | - name: Apply drupal manifests. 24 | k8s: 25 | definition: "{{ lookup('template', '../../templates/' + item ) }}" 26 | state: present 27 | loop: 28 | - mariadb.yml 29 | - drupal.yml 30 | -------------------------------------------------------------------------------- /tasks/kubernetes/helm.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Set up Helm. 3 | hosts: control_plane 4 | gather_facts: false 5 | become: true 6 | 7 | vars: 8 | # See available releases: https://github.com/helm/helm/releases/ 9 | helm_version: 'v3.16.2' 10 | helm_platform: linux 11 | helm_arch: arm64 12 | helm_bin_path: /usr/local/bin/helm 13 | 14 | tasks: 15 | - name: Check if Helm binary exists. 16 | stat: 17 | path: "{{ helm_bin_path }}" 18 | register: helm_check 19 | 20 | - name: Check Helm version. 21 | command: "{{ helm_bin_path }} version" 22 | failed_when: false 23 | changed_when: false 24 | register: helm_existing_version 25 | 26 | - name: Download helm. 27 | unarchive: 28 | src: https://get.helm.sh/helm-{{ helm_version }}-{{ helm_platform }}-{{ helm_arch }}.tar.gz 29 | dest: /tmp 30 | remote_src: true 31 | register: helm_download 32 | when: > 33 | not helm_check.stat.exists 34 | or helm_version not in helm_existing_version.stdout 35 | 36 | - name: Copy helm binary into place. 37 | copy: 38 | src: "/tmp/{{ helm_platform }}-{{ helm_arch }}/helm" 39 | dest: "{{ helm_bin_path }}" 40 | mode: 0755 41 | remote_src: true 42 | become: true 43 | when: helm_download is changed 44 | -------------------------------------------------------------------------------- /tasks/kubernetes/nfs.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Configure NFS Subdir External Provisioner. 3 | hosts: control_plane 4 | gather_facts: false 5 | become: true 6 | 7 | vars_files: 8 | - ../../config.yml 9 | 10 | environment: 11 | # The location of the kubeconfig file on the master. 12 | K8S_AUTH_KUBECONFIG: /etc/rancher/k3s/k3s.yaml 13 | PATH: "~/go/bin:{{ ansible_env.PATH }}" 14 | 15 | tasks: 16 | - name: Add nfs-subdir-external-provisioner chart repo. 17 | kubernetes.core.helm_repository: 18 | name: nfs-subdir-external-provisioner 19 | repo_url: "https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner/" 20 | 21 | - name: Deploy NFS Subdir External Provisioner Helm chart. 22 | kubernetes.core.helm: 23 | name: nfs-subdir-external-provisioner 24 | chart_ref: nfs-subdir-external-provisioner/nfs-subdir-external-provisioner 25 | release_namespace: default 26 | state: present 27 | values: 28 | nfs: 29 | server: "{{ groups['storage'][0] }}" 30 | path: "/{{ storage_nfs_dir }}/{{ storage_nfs_share_name }}" 31 | -------------------------------------------------------------------------------- /tasks/kubernetes/prometheus.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Configure Prometheus + Grafana monitoring stack. 3 | hosts: control_plane 4 | gather_facts: false 5 | become: true 6 | 7 | vars_files: 8 | - ../../config.yml 9 | 10 | environment: 11 | # The location of the kubeconfig file on the master. 12 | K8S_AUTH_KUBECONFIG: /etc/rancher/k3s/k3s.yaml 13 | PATH: "~/go/bin:{{ ansible_env.PATH }}" 14 | 15 | tasks: 16 | - name: Add prometheus-community chart repo. 17 | kubernetes.core.helm_repository: 18 | name: prometheus-community 19 | repo_url: "https://prometheus-community.github.io/helm-charts" 20 | 21 | - name: Deploy Prometheus + Grafana Helm chart. 22 | kubernetes.core.helm: 23 | name: cluster-monitoring 24 | chart_ref: prometheus-community/kube-prometheus-stack 25 | release_namespace: default 26 | state: present 27 | values: 28 | alertmanager: 29 | enabled: false 30 | -------------------------------------------------------------------------------- /tasks/networking/reverse-tunnel.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # Note: This playbook makes the following assumptions: 3 | # 4 | # 1. The configured VPS server already has the proper sshd_config. 5 | # 2. Node 1 already has an SSH key generated that's on the VPS server. 6 | # 3. You've confirmed Node 1 can SSH into the VPS server. 7 | # 8 | # See: https://www.jeffgeerling.com/blog/2022/ssh-and-http-raspberry-pi-behind-cg-nat 9 | - name: Configure control plane as a reverse tunnel for SSH and HTTP. 10 | hosts: control_plane 11 | gather_facts: false 12 | become: true 13 | 14 | handlers: 15 | - name: restart dhcpcd 16 | ansible.builtin.service: 17 | name: dhcpcd 18 | state: restarted 19 | 20 | - name: restart autossh 21 | ansible.builtin.systemd: 22 | name: autossh 23 | state: restarted 24 | when: reverse_tunnel_enable 25 | 26 | vars_files: 27 | - ../../config.yml 28 | 29 | tasks: 30 | - name: Install autossh. 31 | ansible.builtin.apt: 32 | name: autossh 33 | state: present 34 | 35 | - name: Configure autossh defaults. 36 | ansible.builtin.copy: 37 | dest: /etc/default/autossh 38 | content: | 39 | AUTOSSH_POLL=60 40 | AUTOSSH_FIRST_POLL=30 41 | AUTOSSH_GATETIME=0 42 | AUTOSSH_PORT=22000 43 | SSH_OPTIONS="-N -R 2222:localhost:22 -R 8080:localhost:80 {{ reverse_tunnel_vps_username }}@{{ reverse_tunnel_vps_hostname }}" 44 | 45 | - name: Create autossh unit file. 46 | ansible.builtin.copy: 47 | dest: /lib/systemd/system/autossh.service 48 | content: | 49 | [Unit] 50 | Description=autossh 51 | Wants=network-online.target 52 | After=network-online.target 53 | 54 | [Service] 55 | Type=simple 56 | User=pi 57 | EnvironmentFile=/etc/default/autossh 58 | ExecStart=/usr/bin/autossh $SSH_OPTIONS 59 | Restart=always 60 | RestartSec=60 61 | 62 | [Install] 63 | WantedBy=multi-user.target 64 | register: autossh_unit 65 | 66 | - name: Reload systemd daemon if unit file changed. 67 | ansible.builtin.systemd: 68 | daemon_reload: true 69 | when: autossh_unit is changed 70 | 71 | - name: Ensure autossh service is running. 72 | ansible.builtin.systemd: 73 | name: autossh 74 | state: started 75 | enabled: true 76 | 77 | - name: Set active Internet gateway interface on control plane. 78 | ansible.builtin.blockinfile: 79 | path: /etc/dhcpcd.conf 80 | marker: "# ANSIBLE MANAGED - Internet routing metric {mark}" 81 | block: | 82 | interface {{ active_internet_interface }} 83 | metric 100 84 | delegate_to: "{{ groups['control_plane'][0] }}" 85 | run_once: true 86 | notify: 87 | - restart dhcpcd 88 | - restart autossh 89 | -------------------------------------------------------------------------------- /tasks/networking/router.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Configure node 1 as a router. 3 | hosts: control_plane 4 | gather_facts: false 5 | become: true 6 | 7 | handlers: 8 | - name: restart dnsmasq 9 | ansible.builtin.service: 10 | name: dnsmasq 11 | state: restarted 12 | 13 | - name: persist iptables rules 14 | ansible.builtin.command: netfilter-persistent save 15 | 16 | vars_files: 17 | - ../../config.yml 18 | 19 | tasks: 20 | - name: Install routing prerequisites. 21 | ansible.builtin.apt: 22 | name: 23 | - dnsmasq 24 | - netfilter-persistent 25 | - iptables-persistent 26 | state: present 27 | 28 | - name: Ensure netfilter-persistent is enabled. 29 | ansible.builtin.service: 30 | name: netfilter-persistent 31 | enabled: true 32 | 33 | - name: Ensure dnsmasq is running and enabled. 34 | ansible.builtin.service: 35 | name: dnsmasq 36 | state: started 37 | enabled: true 38 | 39 | - name: "Configure iptables for {{ active_internet_interface }} masquerade." 40 | ansible.builtin.iptables: 41 | table: nat 42 | chain: POSTROUTING 43 | out_interface: "{{ active_internet_interface }}" 44 | jump: MASQUERADE 45 | notify: persist iptables rules 46 | 47 | - name: Enable IPv4 forwarding. 48 | ansible.posix.sysctl: 49 | name: net.ipv4.ip_forward 50 | value: '1' 51 | sysctl_set: yes 52 | 53 | - name: Configure dnsmasq for bridged DNS. 54 | ansible.builtin.copy: 55 | dest: /etc/dnsmasq.d/bridge.conf 56 | content: | 57 | interface=eth0 58 | bind-interfaces 59 | server=1.1.1.1 60 | server=1.0.0.1 61 | domain-needed 62 | bogus-priv 63 | notify: restart dnsmasq 64 | 65 | # See: https://github.com/geerlingguy/turing-pi-2-cluster/issues/9 66 | - name: Add crontab task to restart dnsmasq. 67 | ansible.builtin.cron: 68 | name: "restart dnsmasq if not running" 69 | minute: "*" 70 | job: "/usr/bin/systemctl status dnsmasq || /usr/bin/systemctl restart dnsmasq" 71 | -------------------------------------------------------------------------------- /tasks/networking/static-networking.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Set up static networking configuration. 3 | hosts: cluster 4 | gather_facts: false 5 | become: true 6 | 7 | handlers: 8 | - name: restart dhcpcd 9 | ansible.builtin.service: 10 | name: dhcpcd 11 | state: restarted 12 | 13 | - name: restart networkmanager 14 | ansible.builtin.service: 15 | name: NetworkManager 16 | state: restarted 17 | 18 | vars_files: 19 | - ../../config.yml 20 | 21 | tasks: 22 | - name: Check if using dhcpcd for networking. 23 | ansible.builtin.stat: 24 | path: /etc/dhcpcd.conf 25 | register: dhcpcd_file_result 26 | 27 | - name: Configure static IP address (dhcpcd). 28 | ansible.builtin.blockinfile: 29 | path: /etc/dhcpcd.conf 30 | marker: "# ANSIBLE MANAGED - static ip {mark}" 31 | block: | 32 | interface eth0 33 | static ip_address={{ ipv4_subnet_prefix }}.{{ ip_host_octet }}/24 34 | static routers={{ ipv4_subnet_prefix }}.1 35 | static domain_name_servers={{ ipv4_subnet_prefix }}.1 36 | notify: restart dhcpcd 37 | when: dhcpcd_file_result.stat.exists 38 | 39 | - name: Configure static IP address (Network Manager). 40 | community.general.nmcli: 41 | conn_name: "Wired connection 1" 42 | ifname: eth0 43 | type: ethernet 44 | ip4: "{{ ipv4_subnet_prefix }}.{{ ip_host_octet }}/24" 45 | gw4: "{{ ipv4_gateway }}" 46 | dns4: "{{ dns4_servers }}" 47 | state: present 48 | notify: restart networkmanager 49 | when: not dhcpcd_file_result.stat.exists 50 | 51 | - name: Configure hosts file so nodes can see each other by hostname. 52 | ansible.builtin.blockinfile: 53 | path: /etc/hosts 54 | marker: "# ANSIBLE MANAGED - static ip config {mark}" 55 | block: | 56 | {% for host in groups['cluster'] %} 57 | {{ ipv4_subnet_prefix }}.{{ hostvars[host].ip_host_octet }} {{ host }} {{ host | regex_replace('\.local', '') }} 58 | {% endfor %} 59 | -------------------------------------------------------------------------------- /tasks/networking/ubuntu-prep.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Prepare Ubuntu for static networking. 3 | hosts: cluster 4 | gather_facts: false 5 | become: true 6 | 7 | vars: 8 | netplan_file: /etc/netplan/50-cloud-init.yaml 9 | 10 | tasks: 11 | - name: Ensure NetworkManager is installed. 12 | ansible.builtin.apt: 13 | name: network-manager 14 | state: present 15 | update_cache: true 16 | 17 | - name: Configure netplan file for NetworkManager. 18 | ansible.builtin.copy: 19 | dest: "{{ netplan_file }}" 20 | mode: 0600 21 | content: | 22 | # ANSIBLE MANAGED - netplan configuration 23 | network: 24 | version: 2 25 | renderer: NetworkManager 26 | 27 | - name: Regenerate netplan config. 28 | ansible.builtin.command: "{{ item }}" 29 | with_items: 30 | - sudo netplan generate 31 | - sudo netplan apply 32 | 33 | - name: Reboot. 34 | ansible.builtin.reboot: 35 | -------------------------------------------------------------------------------- /tasks/storage/filesystem.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Ensure NFS share directory exists. 3 | ansible.builtin.file: 4 | dest: "/{{ storage_nfs_dir }}/{{ storage_nfs_share_name }}" 5 | owner: root 6 | group: "{{ ansible_user }}" 7 | state: directory 8 | mode: 0777 9 | 10 | - name: Ensure NFS is installed. 11 | ansible.builtin.apt: 12 | name: nfs-kernel-server 13 | state: present 14 | 15 | - name: Configure NFS exports. 16 | ansible.builtin.lineinfile: 17 | dest: /etc/exports 18 | line: "/{{ storage_nfs_dir }}/{{ storage_nfs_share_name }} *(rw,sync,no_root_squash)" 19 | regexp: ".*" 20 | create: true 21 | notify: restart nfs 22 | 23 | - name: Ensure NFS is started and enabled at boot. 24 | ansible.builtin.service: 25 | name: nfs-server 26 | state: started 27 | enabled: true 28 | -------------------------------------------------------------------------------- /tasks/storage/zfs.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Ensure ZFS prerequisites are installed. 3 | ansible.builtin.apt: 4 | name: raspberrypi-kernel-headers 5 | state: present 6 | 7 | - name: Ensure ZFS is installed. 8 | ansible.builtin.apt: 9 | name: 10 | - raspberrypi-kernel-headers 11 | - zfs-dkms 12 | - zfsutils-linux 13 | state: present 14 | 15 | - name: Configure ZFS mirror pool. 16 | ansible.builtin.command: "zpool create {{ storage_zfs_pool_name }} mirror /dev/sda /dev/sdb" 17 | args: 18 | creates: "/{{ storage_zfs_pool_name }}" 19 | 20 | - name: Ensure NFS filesystem is present in ZFS. 21 | community.general.zfs: 22 | name: "{{ storage_zfs_pool_name }}/{{ storage_nfs_share_name }}" 23 | state: present 24 | 25 | - name: Configure permissions for ZFS share. 26 | ansible.builtin.file: 27 | dest: "/{{ storage_zfs_pool_name }}/{{ storage_nfs_share_name }}" 28 | owner: root 29 | group: pi 30 | mode: 0777 31 | 32 | - name: Check if sharenfs is enabled on ZFS NFS share. 33 | ansible.builtin.command: "zfs get sharenfs {{ storage_zfs_pool_name }}/{{ storage_nfs_share_name }}" 34 | register: sharenfs_status 35 | changed_when: false 36 | 37 | # Note: no_root_squash can be dangerous. Use at your own peril. 38 | - name: Ensure NFS filesystem is allowed to be shared via NFS. 39 | ansible.builtin.command: "zfs set sharenfs='no_root_squash,rw=*' {{ storage_zfs_pool_name }}/{{ storage_nfs_share_name }}" 40 | when: "'rw' not in sharenfs_status.stdout" 41 | 42 | - name: Ensure NFS is installed. 43 | ansible.builtin.apt: 44 | name: nfs-kernel-server 45 | state: present 46 | 47 | - name: Ensure NFS is started and enabled at boot. 48 | ansible.builtin.service: 49 | name: nfs-server 50 | state: started 51 | enabled: true 52 | -------------------------------------------------------------------------------- /templates/drupal.yml: -------------------------------------------------------------------------------- 1 | --- 2 | kind: ConfigMap 3 | apiVersion: v1 4 | metadata: 5 | name: drupal-config 6 | namespace: drupal 7 | data: 8 | settings.php: |- 9 | 'drupal', 12 | 'username' => 'drupal', 13 | 'password' => '{{ drupal_database_password }}', 14 | 'prefix' => '', 15 | 'host' => 'mariadb', 16 | 'port' => '3306', 17 | 'namespace' => 'Drupal\\Core\\Database\\Driver\\mysql', 18 | 'driver' => 'mysql', 19 | ]; 20 | $settings['hash_salt'] = '{{ drupal_hash_salt }}'; 21 | $settings['trusted_host_patterns'] = ['{{ drupal_trusted_host_patterns }}']; 22 | $settings['config_sync_directory'] = '{{ drupal_config_sync_directory }}'; 23 | {{ drupal_extra_settings_php }} 24 | 25 | --- 26 | kind: PersistentVolumeClaim 27 | apiVersion: v1 28 | metadata: 29 | name: drupal-files-pvc 30 | namespace: drupal 31 | spec: 32 | storageClassName: nfs-client 33 | accessModes: 34 | - ReadWriteMany 35 | resources: 36 | requests: 37 | storage: 20Gi 38 | 39 | --- 40 | kind: Deployment 41 | apiVersion: apps/v1 42 | metadata: 43 | name: drupal 44 | namespace: drupal 45 | spec: 46 | replicas: 1 47 | selector: 48 | matchLabels: 49 | app: drupal 50 | template: 51 | metadata: 52 | labels: 53 | app: drupal 54 | spec: 55 | containers: 56 | - name: drupal 57 | image: '{{ drupal_image }}' 58 | ports: 59 | - containerPort: 80 60 | livenessProbe: 61 | tcpSocket: 62 | port: 80 63 | initialDelaySeconds: 60 64 | readinessProbe: 65 | tcpSocket: 66 | port: 80 67 | initialDelaySeconds: 30 68 | volumeMounts: 69 | - mountPath: '{{ drupal_base_web_path }}' 70 | name: drupal-settings 71 | - mountPath: '{{ drupal_base_web_path }}files/' 72 | name: drupal-files 73 | resources: 74 | limits: 75 | cpu: '2' 76 | memory: '2048Mi' 77 | requests: 78 | cpu: '1' 79 | memory: '1024Mi' 80 | volumes: 81 | - name: drupal-settings 82 | configMap: 83 | name: drupal-config 84 | - name: drupal-files 85 | persistentVolumeClaim: 86 | claimName: drupal-files-pvc 87 | 88 | --- 89 | kind: Service 90 | apiVersion: v1 91 | metadata: 92 | name: drupal 93 | namespace: drupal 94 | spec: 95 | ports: 96 | - port: 80 97 | protocol: TCP 98 | selector: 99 | app: drupal 100 | 101 | --- 102 | apiVersion: networking.k8s.io/v1 103 | kind: Ingress 104 | metadata: 105 | name: drupal 106 | namespace: drupal 107 | annotations: 108 | kubernetes.io/ingress.class: "traefik" 109 | spec: 110 | rules: 111 | - http: 112 | paths: 113 | - path: / 114 | pathType: Prefix 115 | backend: 116 | service: 117 | name: drupal 118 | port: 119 | number: 80 120 | -------------------------------------------------------------------------------- /templates/exports.j2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geerlingguy/pi-cluster/6e443778bc16d489c6f8049ae5e1e2624e939289/templates/exports.j2 -------------------------------------------------------------------------------- /templates/mariadb.yml: -------------------------------------------------------------------------------- 1 | --- 2 | kind: PersistentVolumeClaim 3 | apiVersion: v1 4 | metadata: 5 | name: mariadb-pvc 6 | namespace: drupal 7 | spec: 8 | storageClassName: nfs-client 9 | accessModes: 10 | - ReadWriteMany 11 | resources: 12 | requests: 13 | storage: 20Gi 14 | 15 | --- 16 | kind: Deployment 17 | apiVersion: apps/v1 18 | metadata: 19 | name: mariadb 20 | namespace: drupal 21 | spec: 22 | replicas: 1 23 | selector: 24 | matchLabels: 25 | app: mariadb 26 | template: 27 | metadata: 28 | labels: 29 | app: mariadb 30 | spec: 31 | containers: 32 | - name: mariadb 33 | image: mariadb:10.6 34 | ports: 35 | - containerPort: 3306 36 | env: 37 | - name: MARIADB_DATABASE 38 | value: drupal 39 | - name: MARIADB_USER 40 | value: drupal 41 | - name: MARIADB_PASSWORD 42 | value: '{{ drupal_database_password }}' 43 | - name: MARIADB_RANDOM_ROOT_PASSWORD 44 | value: 'yes' 45 | volumeMounts: 46 | - mountPath: /var/lib/mysql 47 | name: database 48 | resources: 49 | limits: 50 | cpu: '2' 51 | memory: '4096Mi' 52 | requests: 53 | cpu: '1' 54 | memory: '2048Mi' 55 | volumes: 56 | - name: database 57 | persistentVolumeClaim: 58 | claimName: mariadb-pvc 59 | 60 | --- 61 | kind: Service 62 | apiVersion: v1 63 | metadata: 64 | name: mariadb 65 | namespace: drupal 66 | spec: 67 | ports: 68 | - port: 3306 69 | targetPort: 3306 70 | selector: 71 | app: mariadb 72 | -------------------------------------------------------------------------------- /upgrade.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Upgrade everything on the cluster. 3 | hosts: cluster 4 | become: true 5 | 6 | tasks: 7 | - name: Upgrade all software. 8 | ansible.builtin.apt: 9 | update_cache: true 10 | upgrade: dist 11 | 12 | - name: Check if a reboot is required. 13 | stat: 14 | path: /var/run/reboot-required 15 | get_checksum: false 16 | register: reboot_required_file 17 | 18 | - name: Reboot the server (if required). 19 | reboot: 20 | when: reboot_required_file.stat.exists == true 21 | 22 | - name: Remove dependencies that are no longer required. 23 | apt: 24 | autoremove: true 25 | --------------------------------------------------------------------------------