├── .github ├── FUNDING.yml ├── stale.yml └── workflows │ └── ci.yml ├── .gitignore ├── .yamllint ├── LICENSE ├── README.md ├── ansible.cfg ├── benchmark ├── README.md ├── ansible.cfg ├── files │ └── benchmark-Make.rpi ├── main.yml └── templates │ ├── HPL.dat.j2 │ └── mpi-node-config.j2 ├── example.config.yml ├── example.hosts.ini ├── group_vars └── all.yml ├── images └── turing-pi-2-hero.jpg ├── main.yml ├── networking.yml ├── tasks ├── kubernetes │ ├── drupal.yml │ ├── helm.yml │ ├── nfs.yml │ └── prometheus.yml ├── networking │ ├── reverse-tunnel.yml │ ├── router.yml │ └── static-networking.yml └── storage.yml ├── templates ├── drupal.yml └── mariadb.yml └── upgrade.yml /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | --- 3 | github: geerlingguy 4 | patreon: geerlingguy 5 | -------------------------------------------------------------------------------- /.github/stale.yml: -------------------------------------------------------------------------------- 1 | # Configuration for probot-stale - https://github.com/probot/stale 2 | 3 | # Number of days of inactivity before an Issue or Pull Request becomes stale 4 | daysUntilStale: 90 5 | 6 | # Number of days of inactivity before an Issue or Pull Request with the stale label is closed. 7 | # Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale. 8 | daysUntilClose: 30 9 | 10 | # Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled) 11 | onlyLabels: [] 12 | 13 | # Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable 14 | exemptLabels: 15 | - pinned 16 | - security 17 | - planned 18 | 19 | # Set to true to ignore issues in a project (defaults to false) 20 | exemptProjects: false 21 | 22 | # Set to true to ignore issues in a milestone (defaults to false) 23 | exemptMilestones: false 24 | 25 | # Set to true to ignore issues with an assignee (defaults to false) 26 | exemptAssignees: false 27 | 28 | # Label to use when marking as stale 29 | staleLabel: stale 30 | 31 | # Limit the number of actions per hour, from 1-30. Default is 30 32 | limitPerRun: 30 33 | 34 | pulls: 35 | markComment: |- 36 | This pull request has been marked 'stale' due to lack of recent activity. If there is no further activity, the PR will be closed in another 30 days. Thank you for your contribution! 37 | 38 | Please read [this blog post](https://www.jeffgeerling.com/blog/2020/enabling-stale-issue-bot-on-my-github-repositories) to see the reasons why I mark pull requests as stale. 39 | 40 | unmarkComment: >- 41 | This pull request is no longer marked for closure. 42 | 43 | closeComment: >- 44 | This pull request has been closed due to inactivity. If you feel this is in error, please reopen the pull request or file a new PR with the relevant details. 45 | 46 | issues: 47 | markComment: |- 48 | This issue has been marked 'stale' due to lack of recent activity. If there is no further activity, the issue will be closed in another 30 days. Thank you for your contribution! 49 | 50 | Please read [this blog post](https://www.jeffgeerling.com/blog/2020/enabling-stale-issue-bot-on-my-github-repositories) to see the reasons why I mark issues as stale. 51 | 52 | unmarkComment: >- 53 | This issue is no longer marked for closure. 54 | 55 | closeComment: >- 56 | This issue has been closed due to inactivity. If you feel this is in error, please reopen the issue or file a new issue with the relevant details. 57 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: CI 3 | 'on': 4 | pull_request: 5 | push: 6 | branches: 7 | - master 8 | 9 | jobs: 10 | 11 | lint: 12 | name: Lint 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: Check out the codebase. 17 | uses: actions/checkout@v2 18 | 19 | - name: Set up Python 3. 20 | uses: actions/setup-python@v2 21 | with: 22 | python-version: '3.x' 23 | 24 | - name: Install test dependencies. 25 | run: pip3 install yamllint 26 | 27 | - name: Lint all the YAMLs. 28 | run: yamllint . 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | hosts.ini 2 | config.yml 3 | ansible_collections 4 | roles/geerlingguy.* 5 | -------------------------------------------------------------------------------- /.yamllint: -------------------------------------------------------------------------------- 1 | --- 2 | extends: default 3 | rules: 4 | line-length: 5 | max: 140 6 | level: warning 7 | truthy: false 8 | 9 | ignore: | 10 | **/.github/workflows/ci.yml 11 | **/stale.yml 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Jeff Geerling 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Turing Pi 2 Cluster 2 | 3 | DEPRECATED: Please see my [pi-cluster](https://github.com/geerlingguy/pi-cluster) project for active development. 4 | 5 | [![CI](https://github.com/geerlingguy/turing-pi-2-cluster/workflows/CI/badge.svg?branch=master&event=push)](https://github.com/geerlingguy/turing-pi-2-cluster/actions?query=workflow%3ACI) 6 | 7 |

Turing Pi 2 - Raspberry Pi Compute Module Cluster

8 | 9 | This repository will contain examples and automation used in Turing Pi 2-related videos on [Jeff Geerling's YouTube channel](https://www.youtube.com/c/JeffGeerling). 10 | 11 | You might also be interested in another Raspberry-Pi cluster I've maintained for years, the [Raspberry Pi Dramble](https://www.pidramble.com), which is a Kubernetes Pi cluster in my basement that hosts [www.pidramble.com](https://www.pidramble.com). 12 | 13 | ## Usage 14 | 15 | 1. Make sure you have [Ansible](https://docs.ansible.com/ansible/latest/installation_guide/intro_installation.html) installed. 16 | 2. Copy the `example.hosts.ini` inventory file to `hosts.ini`. Make sure it has a single `control_plane` node and the rest of the `node`s configured correctly (for my examples I named my nodes `turing-node[1-4].local`). 17 | 3. Copy the `example.config.yml` file to `config.yml`, and modify the variables to your liking. 18 | 19 | ### Raspberry Pi Setup 20 | 21 | I am running Raspberry Pi OS (64-bit, lite) on a set of four Raspberry Pi Compute Module 4s with 8GB of RAM and no built-in eMMC. I am using [32 GB SanDisk Extreme microSD cards](https://amzn.to/3G35QbY) to boot each node. 22 | 23 | I flashed Raspberry Pi OS to the Pis using Raspberry Pi Imager. 24 | 25 | To make network discovery and integration easier, I edited the advanced configuration in Imager (press Shift + Ctrl + X), and set the following options: 26 | 27 | - Set hostname: `turing-node-1.local` (set to `2` for node 2, `3` for node 3, etc.) 28 | - Enable SSH: 'Allow public-key', and paste in my public SSH key(s) 29 | - Configure wifi: (ONLY on node 1) enter SSID and password for local WiFi network 30 | 31 | After setting all those options, making sure only node 1 has WiFi configured, and the hostname is unique to each node (and matches what is in `hosts.ini`), I inserted the microSD cards into the respective Pis, and booted the cluster. 32 | 33 | ### SSH connection test 34 | 35 | To test the SSH connection from my Ansible controller (my main workstation, where I'm running all the playbooks), I connected to each server individually, and accepted the hostkey: 36 | 37 | ``` 38 | ssh pi@turing-node-1.local 39 | ``` 40 | 41 | This ensures Ansible will also be able to connect via SSH in the following steps. You can test Ansible's connection with: 42 | 43 | ``` 44 | ansible all -m ping 45 | ``` 46 | 47 | It should respond with a 'SUCCESS' message for each node. 48 | 49 | ### Storage Configuration 50 | 51 | > **Warning**: This playbook is configured to set up a ZFS mirror volume on node 3, with two drives connected to the built-in SATA ports on the Turing Pi 2. 52 | 53 | To disable this behavior, you can set `storage_configure: false` in `config.yml`. 54 | 55 | To make sure the ZFS mirror volume is able to be created, log into node 3, and make sure your two SATA drives are wiped: 56 | 57 | ``` 58 | pi@turing-node-3:~ $ sudo wipefs --all --force /dev/sda?; sudo wipefs --all --force /dev/sda 59 | pi@turing-node-3:~ $ sudo wipefs --all --force /dev/sdb?; sudo wipefs --all --force /dev/sdb 60 | ``` 61 | 62 | If you run `lsblk`, you should see `sda` and `sdb` have no partitions, and are ready to use: 63 | 64 | ``` 65 | pi@turing-node-3:~ $ lsblk 66 | NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT 67 | sda 8:0 0 1.8T 0 disk 68 | sdb 8:16 0 1.8T 0 disk 69 | ``` 70 | 71 | ### Static network configuration (optional, but recommended) 72 | 73 | Because I am using my Turing Pi cluster both on-premise and remote (using a 4G LTE modem connected to the Pi in slot 1), I set it up on its own subnet (10.1.1.x). You can change the subnet that's used via the `ipv4_subnet_prefix` variable in `config.yml`. 74 | 75 | To configure the local network for the Turing Pi cluster (this is optional—you can still use the rest of the configuration without a custom local network), run the playbook: 76 | 77 | ``` 78 | ansible-playbook networking.yml 79 | ``` 80 | 81 | After running the playbook, until a reboot, the Pis will still be accessible over their former DHCP-assigned IP address. After the nodes are rebooted, you will need to make sure your workstation is connected to an interface using the same subnet as the cluster (e.g. 10.1.1.x). 82 | 83 | > Note: After the networking changes are made, since this playbook uses DNS names (e.g. `turing-node-1.local`) instead of IP addresses, your computer will still be able to connect to the nodes directly—assuming your network has IPv6 support. Pinging the nodes on their new IP addresses will _not_ work, however. For better network compatibility, it's recommended you set up a separate network interface on the Ansible controller that's on the same subnet as the Pis in the cluster: 84 | > 85 | > On my Mac, I connected a second network interface and manually configured its IP address as `10.1.1.10`, with subnet mask `255.255.255.0`, and that way I could still access all the nodes via IP address or their hostnames (e.g. `turing-node-2.local`). 86 | 87 | Because the cluster subnet needs its own router, node 1 is configured as a router, using `wlan0` as the primary interface for Internet traffic by default. The other nodes get their Internet access through node 1. 88 | 89 | #### Switch between 4G LTE and WiFi (optional) 90 | 91 | The network configuration defaults to an `active_internet_interface` of `wlan0`, meaning node 1 will route all Internet traffic for the cluster through it's WiFi interface. 92 | 93 | Assuming you have a [working 4G card in slot 1](https://www.jeffgeerling.com/blog/2022/using-4g-lte-wireless-modems-on-raspberry-pi), you can switch node 1 to route through an alternate interface (e.g. `usb0`): 94 | 95 | 1. Set `active_internet_interface: "usb0"` in your `config.yml` 96 | 2. Run the networking playbook again: `ansible-playbook networking.yml` 97 | 98 | You can switch back and forth between interfaces using the steps above. 99 | 100 | #### Reverse SSH and HTTP tunnel configuration (optional) 101 | 102 | For my own experimentation, I decided to run my Pi cluster 'off-grid', using a 4G LTE modem, as mentioned above. 103 | 104 | Because my mobile network provider uses CG-NAT, there is no way to remotely access the cluster, or serve web traffic to the public internet from it, at least not out of the box. 105 | 106 | I am using a reverse SSH tunnel to enable direct remote SSH and HTTP access. To set that up, I configured a VPS I run to use TCP Forwarding (see [this blog post for details](https://www.jeffgeerling.com/blog/2022/ssh-and-http-raspberry-pi-behind-cg-nat)), and I configured an SSH key so node 1 could connect to my VPS (e.g. `ssh my-vps-username@my-vps-hostname-or-ip`). 107 | 108 | Then I set the `reverse_tunnel_enable` variable to `true` in my `config.yml`, and configured the VPS username and hostname options. 109 | 110 | Doing that and running the `main.yml` playbook configures `autossh` on node 1, and will try to get a connection through to the VPS on ports 2222 (to node 1's port 22) and 8080 (to node 1's port 80). 111 | 112 | After that's done, you should be able to log into the cluster _through_ your VPS with a command like: 113 | 114 | ``` 115 | $ ssh -p 2222 pi@[my-vps-hostname] 116 | ``` 117 | 118 | > Note: If autossh isn't working, it could be that it didn't exit cleanly, and a tunnel is still reserving the port on the remote VPS. That's often the case if you run `sudo systemctl status autossh` and see messages like `Warning: remote port forwarding failed for listen port 2222`. 119 | > 120 | > In that case, log into the remote VPS and run `pgrep ssh | xargs kill` to kill off all active SSH sessions, then `autossh` should pick back up again. 121 | 122 | > **Warning**: Use this feature at your own risk. Security is your own responsibility, and for better protection, you should probably avoid directly exposing your cluster (e.g. by disabling the `GatewayPorts` option) so you can only access the cluster while already logged into your VPS). 123 | 124 | ### Cluster configuration and K3s installation 125 | 126 | Run the playbook: 127 | 128 | ``` 129 | ansible-playbook main.yml 130 | ``` 131 | 132 | At the end of the playbook, there should be an instance of Drupal running on the cluster. If you log into node 1, you should be able to access it with `curl localhost`. Alternatively, if you have SSH tunnelling configured, you could access `http://[your-vps-ip-or-hostname]:8080/` and you'd see the site. 133 | 134 | You can also log into node 1, switch to the root user account (`sudo su`), then use `kubectl` to manage the cluster (e.g. view Drupal pods with `kubectl get pods -n drupal`). 135 | 136 | K3s' `kubeconfig` file is located at `/etc/rancher/k3s/k3s.yaml`. If you'd like to manage the cluster from other hosts (or using a tool like Lens), copy the contents of that file, replacing `localhost` with the IP address or hostname of the control plane node, and paste the contents into a file `~/.kube/config`. 137 | 138 | ### Upgrading the cluster 139 | 140 | Run the upgrade playbook: 141 | 142 | ``` 143 | ansible-playbook upgrade.yml 144 | ``` 145 | 146 | ### Monitoring the cluster 147 | 148 | Prometheus and Grafana are used for monitoring. Grafana can be accessed via port forwarding (or you could choose to expose it another way). 149 | 150 | To access Grafana: 151 | 152 | 1. Make sure you set up a valid `~/.kube/config` file (see 'K3s installation' above). 153 | 1. Run `kubectl port-forward service/cluster-monitoring-grafana :80` 154 | 1. Grab the port that's output, and browse to `localhost:[port]`, and bingo! Grafana. 155 | 156 | The default login is `admin` / `prom-operator`, but you can also get the secret with `kubectl get secret cluster-monitoring-grafana -o jsonpath="{.data.admin-password}" | base64 -D`. 157 | 158 | ### Benchmarking the cluster 159 | 160 | See the README file within the `benchmark` folder. 161 | 162 | ### Shutting down the cluster 163 | 164 | The safest way to shut down the cluster is to run the following command: 165 | 166 | ``` 167 | ansible all -B 500 -P 0 -a "shutdown now" -b 168 | ``` 169 | 170 | > Note: If using the SSH tunnel, you might want to run the command _first_ on nodes 2-4, _then_ on node 1. So first run `ansible 'all:!control_plane' [...]`, then run it again just for `control_plane`. 171 | 172 | Then after you confirm the nodes are shut down (with K3s running, it can take a few minutes), press the 'STM32_POWER' button or a power button attached to the front panel connector to power down all the slots physically. Then you can switch off or disconnect your ATX power supply. 173 | 174 | ## Caveats 175 | 176 | The Turing Pi 2 I'm using is a prototype, pre-production version of the board. If you have a production board, YMMV. You've been warned! 177 | 178 | ## Author 179 | 180 | The repository was created in 2021 by [Jeff Geerling](https://www.jeffgeerling.com), author of [Ansible for DevOps](https://www.ansiblefordevops.com), [Ansible for Kubernetes](https://www.ansibleforkubernetes.com), and [Kubernetes 101](https://www.kubernetes101book.com). 181 | -------------------------------------------------------------------------------- /ansible.cfg: -------------------------------------------------------------------------------- 1 | [defaults] 2 | nocows = true 3 | inventory = hosts.ini 4 | roles_path = roles 5 | collections_paths = ./ 6 | interpreter_python = /usr/bin/python3 7 | -------------------------------------------------------------------------------- /benchmark/README.md: -------------------------------------------------------------------------------- 1 | # Cluster Benchmark 2 | 3 | A common generic benchmark for clusters is Linpack, or HPL (High Performance Linpack), which is famous for its use in rankings in the TOP500 supercomputer list over the past few decades. 4 | 5 | I wanted to see where my various Pi clusters would rank, historically, so I built this playbook which installs all the necessary tooling for HPL to run, connects all the nodes together via SSH, then runs the benchmark and outputs the result. 6 | 7 | ## Usage 8 | 9 | Make sure you have Ansible installed, and make sure you've at _least_ run the `networking.yml` playbook in the main directory. Then run the benchmarking playbook inside this directory: 10 | 11 | ``` 12 | ansible-playbook main.yml 13 | ``` 14 | 15 | You should be able to log directly into any of the nodes (I did my tests on node 1), and run the following commands to kick off a benchmarking run: 16 | 17 | ``` 18 | cd ~/tmp/hpl-2.3/bin/rpi 19 | mpirun -f cluster-hosts ./xhpl 20 | ``` 21 | 22 | > The configuration here is optimized for a 4-node Pi CM4 cluster with 8 GB of RAM on each module. Some settings like those in the `HPL.dat` file may need changes for different cluster layouts! 23 | 24 | ## Results 25 | 26 | In my testing on Raspberry Pi OS Bullseye, in November 2021, I got the following results: 27 | 28 | | Benchmark | Result | Wattage | Gflops/W | 29 | | --- | --- | --- | --- | 30 | | HPL (1.5 GHz default clock) | 44.942 Gflops | 24.5W | 1.83 Gflops/W | 31 | | HPL (2.0 GHz overclock) | 51.327 Gflops | 33W | 1.54 Gflops/W | 32 | -------------------------------------------------------------------------------- /benchmark/ansible.cfg: -------------------------------------------------------------------------------- 1 | [defaults] 2 | nocows = true 3 | inventory = ../hosts.ini 4 | roles_path = ../roles 5 | collections_paths = ../ 6 | interpreter_python = /usr/bin/python3 7 | -------------------------------------------------------------------------------- /benchmark/files/benchmark-Make.rpi: -------------------------------------------------------------------------------- 1 | # 2 | # -- High Performance Computing Linpack Benchmark (HPL) 3 | # HPL - 2.3 - December 2, 2018 4 | # Antoine P. Petitet 5 | # University of Tennessee, Knoxville 6 | # Innovative Computing Laboratory 7 | # (C) Copyright 2000-2008 All Rights Reserved 8 | # 9 | # -- Copyright notice and Licensing terms: 10 | # 11 | # Redistribution and use in source and binary forms, with or without 12 | # modification, are permitted provided that the following conditions 13 | # are met: 14 | # 15 | # 1. Redistributions of source code must retain the above copyright 16 | # notice, this list of conditions and the following disclaimer. 17 | # 18 | # 2. Redistributions in binary form must reproduce the above copyright 19 | # notice, this list of conditions, and the following disclaimer in the 20 | # documentation and/or other materials provided with the distribution. 21 | # 22 | # 3. All advertising materials mentioning features or use of this 23 | # software must display the following acknowledgement: 24 | # This product includes software developed at the University of 25 | # Tennessee, Knoxville, Innovative Computing Laboratory. 26 | # 27 | # 4. The name of the University, the name of the Laboratory, or the 28 | # names of its contributors may not be used to endorse or promote 29 | # products derived from this software without specific written 30 | # permission. 31 | # 32 | # -- Disclaimer: 33 | # 34 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 | # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY 38 | # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 | # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 | # ###################################################################### 46 | # 47 | # ---------------------------------------------------------------------- 48 | # - shell -------------------------------------------------------------- 49 | # ---------------------------------------------------------------------- 50 | # 51 | SHELL = /bin/sh 52 | # 53 | CD = cd 54 | CP = cp 55 | LN_S = ln -s 56 | MKDIR = mkdir 57 | RM = /bin/rm -f 58 | TOUCH = touch 59 | # 60 | # ---------------------------------------------------------------------- 61 | # - Platform identifier ------------------------------------------------ 62 | # ---------------------------------------------------------------------- 63 | # 64 | ARCH = rpi 65 | # 66 | # ---------------------------------------------------------------------- 67 | # - HPL Directory Structure / HPL library ------------------------------ 68 | # ---------------------------------------------------------------------- 69 | # 70 | TOPdir = $(HOME)/tmp/hpl-2.3 71 | INCdir = $(TOPdir)/include 72 | BINdir = $(TOPdir)/bin/$(ARCH) 73 | LIBdir = $(TOPdir)/lib/$(ARCH) 74 | # 75 | HPLlib = $(LIBdir)/libhpl.a 76 | # 77 | # ---------------------------------------------------------------------- 78 | # - Message Passing library (MPI) -------------------------------------- 79 | # ---------------------------------------------------------------------- 80 | # MPinc tells the C compiler where to find the Message Passing library 81 | # header files, MPlib is defined to be the name of the library to be 82 | # used. The variable MPdir is only used for defining MPinc and MPlib. 83 | # 84 | MPdir = /usr/local 85 | MPinc = -I /usr/local/include 86 | MPlib = /usr/local/lib/libmpich.so 87 | # 88 | # ---------------------------------------------------------------------- 89 | # - Linear Algebra library (BLAS or VSIPL) ----------------------------- 90 | # ---------------------------------------------------------------------- 91 | # LAinc tells the C compiler where to find the Linear Algebra library 92 | # header files, LAlib is defined to be the name of the library to be 93 | # used. The variable LAdir is only used for defining LAinc and LAlib. 94 | # 95 | LAdir = /home/pi/tmp/atlas-build 96 | LAinc = 97 | LAlib = $(LAdir)/lib/libf77blas.a $(LAdir)/lib/libatlas.a 98 | # 99 | # ---------------------------------------------------------------------- 100 | # - F77 / C interface -------------------------------------------------- 101 | # ---------------------------------------------------------------------- 102 | # You can skip this section if and only if you are not planning to use 103 | # a BLAS library featuring a Fortran 77 interface. Otherwise, it is 104 | # necessary to fill out the F2CDEFS variable with the appropriate 105 | # options. **One and only one** option should be chosen in **each** of 106 | # the 3 following categories: 107 | # 108 | # 1) name space (How C calls a Fortran 77 routine) 109 | # 110 | # -DAdd_ : all lower case and a suffixed underscore (Suns, 111 | # Intel, ...), [default] 112 | # -DNoChange : all lower case (IBM RS6000), 113 | # -DUpCase : all upper case (Cray), 114 | # -DAdd__ : the FORTRAN compiler in use is f2c. 115 | # 116 | # 2) C and Fortran 77 integer mapping 117 | # 118 | # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] 119 | # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, 120 | # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. 121 | # 122 | # 3) Fortran 77 string handling 123 | # 124 | # -DStringSunStyle : The string address is passed at the string loca- 125 | # tion on the stack, and the string length is then 126 | # passed as an F77_INTEGER after all explicit 127 | # stack arguments, [default] 128 | # -DStringStructPtr : The address of a structure is passed by a 129 | # Fortran 77 string, and the structure is of the 130 | # form: struct {char *cp; F77_INTEGER len;}, 131 | # -DStringStructVal : A structure is passed by value for each Fortran 132 | # 77 string, and the structure is of the form: 133 | # struct {char *cp; F77_INTEGER len;}, 134 | # -DStringCrayStyle : Special option for Cray machines, which uses 135 | # Cray fcd (fortran character descriptor) for 136 | # interoperation. 137 | # 138 | F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle 139 | # 140 | # ---------------------------------------------------------------------- 141 | # - HPL includes / libraries / specifics ------------------------------- 142 | # ---------------------------------------------------------------------- 143 | # 144 | HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) 145 | HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) 146 | # 147 | # - Compile time options ----------------------------------------------- 148 | # 149 | # -DHPL_COPY_L force the copy of the panel L before bcast; 150 | # -DHPL_CALL_CBLAS call the cblas interface; 151 | # -DHPL_CALL_VSIPL call the vsip library; 152 | # -DHPL_DETAILED_TIMING enable detailed timers; 153 | # 154 | # By default HPL will: 155 | # *) not copy L before broadcast, 156 | # *) call the BLAS Fortran 77 interface, 157 | # *) not display detailed timing information. 158 | # 159 | HPL_OPTS = 160 | # 161 | # ---------------------------------------------------------------------- 162 | # 163 | HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) 164 | # 165 | # ---------------------------------------------------------------------- 166 | # - Compilers / linkers - Optimization flags --------------------------- 167 | # ---------------------------------------------------------------------- 168 | # 169 | CC = mpicc 170 | CCNOOPT = $(HPL_DEFS) 171 | CCFLAGS = $(HPL_DEFS) 172 | # 173 | LINKER = mpif77 174 | LINKFLAGS = 175 | # 176 | ARCHIVER = ar 177 | ARFLAGS = r 178 | RANLIB = echo 179 | # 180 | # ---------------------------------------------------------------------- -------------------------------------------------------------------------------- /benchmark/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # Automated setup of distributed Linpack benchmark on a Raspberry Pi cluster. 3 | # 4 | # Inspired by: https://mikejmcfarlane.github.io/blog/2020/09/17/High-Performance-Linpack-for-raspberry-pi-supercomputer 5 | # See also: https://www.sci-pi.org.uk/bench/linpack.html 6 | 7 | - name: Install linpack benchmark. 8 | hosts: cluster 9 | become: false 10 | tags: ['setup'] 11 | 12 | tasks: 13 | - name: Install dependencies. 14 | ansible.builtin.apt: 15 | name: 16 | - gfortran 17 | - automake 18 | state: present 19 | become: true 20 | 21 | - name: Create required temporary directories. 22 | ansible.builtin.file: 23 | path: "{{ item }}" 24 | state: directory 25 | mode: 0755 26 | loop: 27 | - /home/pi/tmp 28 | - /home/pi/tmp/atlas-build 29 | 30 | - name: Download MPI (Message Passing Interface). 31 | ansible.builtin.unarchive: 32 | src: https://www.mpich.org/static/downloads/3.4.2/mpich-3.4.2.tar.gz 33 | dest: /home/pi/tmp 34 | remote_src: true 35 | creates: /home/pi/tmp/mpich-3.4.2/README 36 | 37 | - name: Build MPI (takes a while). 38 | ansible.builtin.command: "{{ item }}" 39 | args: 40 | chdir: /home/pi/tmp/mpich-3.4.2 41 | creates: /home/pi/tmp/COMPILE_MPI_COMPLETE 42 | loop: 43 | - ./configure --with-device=ch3:sock FFLAGS=-fallow-argument-mismatch 44 | - make -j4 45 | 46 | - name: Install MPI. 47 | ansible.builtin.command: make install 48 | args: 49 | chdir: /home/pi/tmp/mpich-3.4.2 50 | creates: /home/pi/tmp/COMPILE_MPI_COMPLETE 51 | become: true 52 | 53 | - name: Create 'COMPILE_MPI_COMPLETE' file. 54 | file: 55 | path: /home/pi/tmp/COMPILE_MPI_COMPLETE 56 | state: touch 57 | mode: 0644 58 | 59 | # Note: There was no simpler way to do this besides `shell`. 60 | - name: Ensure CPU scaling is set to 'performance'. 61 | ansible.builtin.shell: >- 62 | echo performance | sudo tee /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor 63 | 64 | - name: Download ATLAS linear algebra library. 65 | ansible.builtin.unarchive: 66 | src: https://sourceforge.net/projects/math-atlas/files/Stable/3.10.3/atlas3.10.3.tar.bz2 67 | dest: /home/pi/tmp 68 | remote_src: true 69 | creates: /home/pi/tmp/ATLAS/README 70 | 71 | - name: Install ATLAS (takes a LONG time). 72 | ansible.builtin.command: "{{ item }}" 73 | args: 74 | chdir: /home/pi/tmp/atlas-build 75 | creates: /home/pi/tmp/COMPILE_ATLAS_COMPLETE 76 | loop: 77 | - ../ATLAS/configure 78 | - make 79 | 80 | - name: Create 'COMPILE_ATLAS_COMPLETE' file. 81 | file: 82 | path: /home/pi/tmp/COMPILE_ATLAS_COMPLETE 83 | state: touch 84 | mode: 0644 85 | 86 | - name: Download HPL (High Performance Linpack). 87 | ansible.builtin.unarchive: 88 | src: http://www.netlib.org/benchmark/hpl/hpl-2.3.tar.gz 89 | dest: /home/pi/tmp 90 | remote_src: true 91 | creates: /home/pi/tmp/hpl-2.3/README 92 | 93 | - name: Set up HPL makefile. 94 | ansible.builtin.shell: sh make_generic 95 | args: 96 | chdir: /home/pi/tmp/hpl-2.3/setup 97 | creates: /home/pi/tmp/COMPILE_HPL_COMPLETE 98 | 99 | - name: Copy rpi makefile into place. 100 | ansible.builtin.copy: 101 | src: files/benchmark-Make.rpi 102 | dest: /home/pi/tmp/hpl-2.3/Make.rpi 103 | mode: 0644 104 | 105 | - name: Install HPL. 106 | ansible.builtin.command: >- 107 | make arch=rpi 108 | args: 109 | chdir: /home/pi/tmp/hpl-2.3 110 | creates: /home/pi/tmp/COMPILE_HPL_COMPLETE 111 | 112 | - name: Create COMPILE_HPL_COMPLETE file. 113 | ansible.builtin.file: 114 | path: /home/pi/tmp/COMPILE_HPL_COMPLETE 115 | state: touch 116 | mode: 0644 117 | 118 | - name: Configure SSH connections between nodes. 119 | hosts: cluster 120 | become: false 121 | tags: ['ssh'] 122 | 123 | vars: 124 | host_ips: [] 125 | 126 | tasks: 127 | - name: Generate an OpenSSH keypair. 128 | community.crypto.openssh_keypair: 129 | path: /home/pi/.ssh/id_rsa 130 | size: 2048 131 | 132 | - name: Read out ssh pubkey from each host. 133 | ansible.builtin.command: cat /home/pi/.ssh/id_rsa.pub 134 | changed_when: false 135 | register: ssh_pubkey 136 | 137 | - name: Combine pubkeys into single list. 138 | ansible.builtin.set_fact: 139 | combined_ssh_pubkeys: "{{ ansible_play_hosts | map('extract', hostvars, 'ssh_pubkey') | map(attribute='stdout') | list }}" 140 | run_once: yes 141 | 142 | - name: Write all pubkeys to each host. 143 | ansible.posix.authorized_key: 144 | user: pi 145 | state: present 146 | key: "{{ item }}" 147 | loop: "{{ combined_ssh_pubkeys }}" 148 | 149 | - name: Generate list of host IP addresses. 150 | ansible.builtin.set_fact: 151 | host_ips: "{{ host_ips + [ hostvars[item].ansible_default_ipv4.address ] }}" 152 | loop: "{{ groups['cluster'] }}" 153 | 154 | - name: Accept hostkeys for each host on each host. 155 | ansible.builtin.command: >- 156 | ssh pi@{{ item }} -o StrictHostKeyChecking=accept-new date 157 | loop: "{{ host_ips }}" 158 | 159 | - name: Run linpack benchmark. 160 | hosts: cluster 161 | become: false 162 | tags: ['benchmark'] 163 | 164 | tasks: 165 | - name: Create a file describing nodes for MPI execution. 166 | ansible.builtin.template: 167 | src: templates/mpi-node-config.j2 168 | dest: /home/pi/tmp/hpl-2.3/bin/rpi/cluster-hosts 169 | mode: 0644 170 | 171 | # Generate the template using this website: 172 | # https://www.advancedclustering.com/act_kb/tune-hpl-dat-file/ 173 | - name: Create HPL.dat file. 174 | ansible.builtin.template: 175 | src: templates/HPL.dat.j2 176 | dest: /home/pi/tmp/hpl-2.3/bin/rpi/HPL.dat 177 | mode: 0644 178 | 179 | - name: Run the benchmark on the cluster. 180 | ansible.builtin.command: mpirun -f cluster-hosts ./xhpl 181 | args: 182 | chdir: /home/pi/tmp/hpl-2.3/bin/rpi 183 | register: mpirun_output 184 | run_once: true 185 | 186 | - name: Output the results. 187 | debug: var=mpirun_output.stdout 188 | -------------------------------------------------------------------------------- /benchmark/templates/HPL.dat.j2: -------------------------------------------------------------------------------- 1 | HPLinpack benchmark input file 2 | Innovative Computing Laboratory, University of Tennessee 3 | HPL.out output file name (if any) 4 | 6 device out (6=stdout,7=stderr,file) 5 | 1 # of problems sizes (N) 6 | 57600 Ns 7 | 1 # of NBs 8 | 192 NBs 9 | 0 PMAP process mapping (0=Row-,1=Column-major) 10 | 1 # of process grids (P x Q) 11 | 4 Ps 12 | 4 Qs 13 | 16.0 threshold 14 | 1 # of panel fact 15 | 2 PFACTs (0=left, 1=Crout, 2=Right) 16 | 1 # of recursive stopping criterium 17 | 4 NBMINs (>= 1) 18 | 1 # of panels in recursion 19 | 2 NDIVs 20 | 1 # of recursive panel fact. 21 | 1 RFACTs (0=left, 1=Crout, 2=Right) 22 | 1 # of broadcast 23 | 1 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) 24 | 1 # of lookahead depth 25 | 1 DEPTHs (>=0) 26 | 2 SWAP (0=bin-exch,1=long,2=mix) 27 | 64 swapping threshold 28 | 0 L1 in (0=transposed,1=no-transposed) form 29 | 0 U in (0=transposed,1=no-transposed) form 30 | 1 Equilibration (0=no,1=yes) 31 | 8 memory alignment in double (> 0) 32 | ##### This line (no. 32) is ignored (it serves as a separator). ###### 33 | 0 Number of additional problem sizes for PTRANS 34 | 1200 10000 30000 values of N 35 | 0 number of additional blocking sizes for PTRANS 36 | 40 9 8 13 13 20 16 32 64 values of NB -------------------------------------------------------------------------------- /benchmark/templates/mpi-node-config.j2: -------------------------------------------------------------------------------- 1 | {% for host in groups['cluster'] %} 2 | {{ hostvars[host].ansible_default_ipv4.address }}:4 3 | {% endfor %} -------------------------------------------------------------------------------- /example.config.yml: -------------------------------------------------------------------------------- 1 | --- 2 | ipv4_subnet_prefix: "10.1.1" 3 | active_internet_interface: "wlan0" 4 | reverse_tunnel_enable: false 5 | reverse_tunnel_vps_username: my-vps-username 6 | reverse_tunnel_vps_hostname: my-vps-hostname 7 | 8 | storage_configure: true 9 | storage_pool_name: zfsdata 10 | storage_nfs_share_name: nfsshare 11 | 12 | drupal_image: drupal:9.3-apache 13 | drupal_hash_salt: OTk4MTYzYWI4N2E2MGIxNjlmYmQ2MTA4 14 | drupal_trusted_host_patterns: '^.+$' 15 | drupal_database_password: 'drupal' 16 | drupal_base_web_path: '/var/www/html/sites/default/' 17 | drupal_config_sync_directory: 'sites/default/files/config_OTk4MTYzY' 18 | drupal_extra_settings_php: '' 19 | -------------------------------------------------------------------------------- /example.hosts.ini: -------------------------------------------------------------------------------- 1 | # This playbook currently supports only one single control_plane. 2 | [control_plane] 3 | turing-node-1.local ip_host_octet=1 4 | 5 | # Uncomment below when working on cluster through VPS tunnel host. 6 | #[control_plane:vars] 7 | #ansible_port='2222' 8 | #ansible_user='pi' 9 | #ansible_host='my-vps-host-or-ip' 10 | 11 | [nodes] 12 | turing-node-2.local ip_host_octet=2 13 | turing-node-3.local ip_host_octet=3 14 | turing-node-4.local ip_host_octet=4 15 | 16 | # Uncomment below when working on cluster through VPS tunnel host. 17 | #[nodes:vars] 18 | #ansible_ssh_common_args='-o ProxyCommand="ssh -p 2222 -W %h:%p -q pi@my-vps-host-or-ip"' 19 | 20 | [storage] 21 | turing-node-3.local 22 | 23 | [cluster:children] 24 | control_plane 25 | nodes 26 | -------------------------------------------------------------------------------- /group_vars/all.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # Set to 'pi' for Raspberry Pi OS. 3 | ansible_user: pi 4 | -------------------------------------------------------------------------------- /images/turing-pi-2-hero.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geerlingguy/turing-pi-2-cluster/5cc1bef733e91984621085e0dc627d645fef8ec5/images/turing-pi-2-hero.jpg -------------------------------------------------------------------------------- /main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Set up cluster-wide configuration. 3 | hosts: cluster 4 | gather_facts: false 5 | become: true 6 | 7 | handlers: 8 | - name: reboot-pi 9 | reboot: 10 | 11 | vars_files: 12 | - config.yml 13 | 14 | tasks: 15 | - name: Ensure cgroups are configured correctly in cmdline.txt. 16 | ansible.builtin.replace: 17 | path: /boot/cmdline.txt 18 | regexp: '^([\w](?!.*\b{{ item }}\b).*)$' 19 | replace: '\1 {{ item }}' 20 | with_items: 21 | - "cgroup_memory=1" 22 | - "cgroup_enable=memory" 23 | notify: reboot-pi 24 | 25 | - name: Download K3s install script. 26 | ansible.builtin.get_url: 27 | url: https://get.k3s.io 28 | dest: "~/k3s_install.sh" 29 | mode: a+x 30 | 31 | 32 | - name: Configure storage node (node 3). 33 | hosts: storage 34 | gather_facts: false 35 | become: true 36 | 37 | vars_files: 38 | - config.yml 39 | 40 | tasks: 41 | - name: Set up storage. 42 | include_tasks: tasks/storage.yml 43 | when: storage_configure 44 | 45 | 46 | - name: Configure the control plane (node 1). 47 | hosts: control_plane 48 | gather_facts: false 49 | become: true 50 | 51 | vars_files: 52 | - config.yml 53 | 54 | tasks: 55 | - name: Install K3s on control plane (takes a while). 56 | ansible.builtin.shell: >- 57 | ~/k3s_install.sh >> ~/k3s_install_log.txt 58 | args: 59 | chdir: "~" 60 | creates: /var/lib/rancher/k3s/server/node-token 61 | 62 | - name: Get node token. 63 | ansible.builtin.command: cat /var/lib/rancher/k3s/server/node-token 64 | changed_when: false 65 | register: node_token_output 66 | 67 | - name: Set node_token fact. 68 | ansible.builtin.set_fact: 69 | node_token: "{{ node_token_output.stdout_lines[0] }}" 70 | 71 | - name: Ensure required dependencies are installed. 72 | package: 73 | name: 74 | - python3-pip 75 | - python3-setuptools 76 | - build-essential 77 | - golang 78 | - git 79 | state: present 80 | become: true 81 | 82 | - name: Ensure required Python libraries are installed. 83 | pip: 84 | name: 85 | - openshift 86 | - pyyaml 87 | state: present 88 | become: true 89 | 90 | 91 | - name: Configure the nodes (nodes 2-4). 92 | hosts: nodes 93 | gather_facts: false 94 | become: true 95 | 96 | vars_files: 97 | - config.yml 98 | 99 | tasks: 100 | - name: Install K3s on nodes (takes a while). 101 | ansible.builtin.shell: >- 102 | K3S_URL=https://"{{ groups['control_plane'][0] }}":6443 103 | K3S_TOKEN="{{ hostvars[groups['control_plane'][0]]['node_token'] }}" 104 | ~/k3s_install.sh >> ~/k3s_install_log.txt 105 | args: 106 | chdir: "~" 107 | creates: /var/lib/rancher/k3s/agent/kubelet.kubeconfig 108 | 109 | - name: Set up Helm. 110 | import_playbook: tasks/kubernetes/helm.yml 111 | tags: ['helm'] 112 | 113 | - name: Set up NFS PVCs. 114 | import_playbook: tasks/kubernetes/nfs.yml 115 | tags: ['nfs'] 116 | 117 | - name: Set up Prometheus. 118 | import_playbook: tasks/kubernetes/prometheus.yml 119 | tags: ['prometheus'] 120 | 121 | - name: Set up Drupal. 122 | import_playbook: tasks/kubernetes/drupal.yml 123 | tags: ['drupal'] 124 | -------------------------------------------------------------------------------- /networking.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Configure reverse SSH tunnels for SSH and HTTP on node 1. 3 | import_playbook: tasks/networking/reverse-tunnel.yml 4 | when: reverse_tunnel_enable 5 | 6 | - name: Set up static networking configuration. 7 | import_playbook: tasks/networking/static-networking.yml 8 | 9 | - name: Configure node 1 as a router. 10 | import_playbook: tasks/networking/router.yml 11 | -------------------------------------------------------------------------------- /tasks/kubernetes/drupal.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Configure Drupal. 3 | hosts: control_plane 4 | gather_facts: false 5 | become: true 6 | 7 | vars_files: 8 | - ../../config.yml 9 | 10 | environment: 11 | # The location of the kubeconfig file on the master. 12 | K8S_AUTH_KUBECONFIG: /etc/rancher/k3s/k3s.yaml 13 | PATH: "~/go/bin:{{ ansible_env.PATH }}" 14 | 15 | tasks: 16 | - name: Create drupal namespace. 17 | k8s: 18 | name: drupal 19 | api_version: v1 20 | kind: Namespace 21 | state: present 22 | 23 | - name: Apply drupal manifests. 24 | k8s: 25 | definition: "{{ lookup('template', '../../templates/' + item ) }}" 26 | state: present 27 | loop: 28 | - mariadb.yml 29 | - drupal.yml 30 | -------------------------------------------------------------------------------- /tasks/kubernetes/helm.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Set up Helm. 3 | hosts: control_plane 4 | gather_facts: false 5 | become: true 6 | 7 | vars: 8 | # See available releases: https://github.com/helm/helm/releases/ 9 | helm_version: 'v3.8.0' 10 | helm_platform: linux 11 | helm_arch: arm64 12 | helm_bin_path: /usr/local/bin/helm 13 | 14 | tasks: 15 | - name: Check if Helm binary exists. 16 | stat: 17 | path: "{{ helm_bin_path }}" 18 | register: helm_check 19 | 20 | - name: Check Helm version. 21 | command: "{{ helm_bin_path }} version" 22 | failed_when: false 23 | changed_when: false 24 | register: helm_existing_version 25 | 26 | - name: Download helm. 27 | unarchive: 28 | src: https://get.helm.sh/helm-{{ helm_version }}-{{ helm_platform }}-{{ helm_arch }}.tar.gz 29 | dest: /tmp 30 | remote_src: true 31 | register: helm_download 32 | when: > 33 | not helm_check.stat.exists 34 | or helm_version not in helm_existing_version.stdout 35 | 36 | - name: Copy helm binary into place. 37 | copy: 38 | src: "/tmp/{{ helm_platform }}-{{ helm_arch }}/helm" 39 | dest: "{{ helm_bin_path }}" 40 | mode: 0755 41 | remote_src: true 42 | become: true 43 | when: helm_download is changed 44 | -------------------------------------------------------------------------------- /tasks/kubernetes/nfs.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Configure NFS Subdir External Provisioner. 3 | hosts: control_plane 4 | gather_facts: false 5 | become: true 6 | 7 | vars_files: 8 | - ../../config.yml 9 | 10 | environment: 11 | # The location of the kubeconfig file on the master. 12 | K8S_AUTH_KUBECONFIG: /etc/rancher/k3s/k3s.yaml 13 | PATH: "~/go/bin:{{ ansible_env.PATH }}" 14 | 15 | tasks: 16 | - name: Add nfs-subdir-external-provisioner chart repo. 17 | kubernetes.core.helm_repository: 18 | name: nfs-subdir-external-provisioner 19 | repo_url: "https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner/" 20 | 21 | - name: Deploy NFS Subdir External Provisioner Helm chart. 22 | kubernetes.core.helm: 23 | name: nfs-subdir-external-provisioner 24 | chart_ref: nfs-subdir-external-provisioner/nfs-subdir-external-provisioner 25 | release_namespace: default 26 | state: present 27 | values: 28 | nfs: 29 | server: "{{ groups['storage'][0] }}" 30 | path: "/{{ storage_pool_name }}/{{ storage_nfs_share_name }}" 31 | -------------------------------------------------------------------------------- /tasks/kubernetes/prometheus.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Configure Prometheus + Grafana monitoring stack. 3 | hosts: control_plane 4 | gather_facts: false 5 | become: true 6 | 7 | vars_files: 8 | - ../../config.yml 9 | 10 | environment: 11 | # The location of the kubeconfig file on the master. 12 | K8S_AUTH_KUBECONFIG: /etc/rancher/k3s/k3s.yaml 13 | PATH: "~/go/bin:{{ ansible_env.PATH }}" 14 | 15 | tasks: 16 | - name: Add prometheus-community chart repo. 17 | kubernetes.core.helm_repository: 18 | name: prometheus-community 19 | repo_url: "https://prometheus-community.github.io/helm-charts" 20 | 21 | - name: Deploy Prometheus + Grafana Helm chart. 22 | kubernetes.core.helm: 23 | name: cluster-monitoring 24 | chart_ref: prometheus-community/kube-prometheus-stack 25 | release_namespace: default 26 | state: present 27 | values: 28 | alertmanager: 29 | enabled: false 30 | -------------------------------------------------------------------------------- /tasks/networking/reverse-tunnel.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # Note: This playbook makes the following assumptions: 3 | # 4 | # 1. The configured VPS server already has the proper sshd_config. 5 | # 2. Node 1 already has an SSH key generated that's on the VPS server. 6 | # 3. You've confirmed Node 1 can SSH into the VPS server. 7 | # 8 | # See: https://www.jeffgeerling.com/blog/2022/ssh-and-http-raspberry-pi-behind-cg-nat 9 | - name: Configure node 1 as a reverse tunnel for SSH and HTTP. 10 | hosts: control_plane 11 | gather_facts: false 12 | become: true 13 | 14 | vars_files: 15 | - ../../config.yml 16 | 17 | tasks: 18 | - name: Install autossh. 19 | ansible.builtin.apt: 20 | name: autossh 21 | state: present 22 | 23 | - name: Configure autossh defaults. 24 | ansible.builtin.copy: 25 | dest: /etc/default/autossh 26 | content: | 27 | AUTOSSH_POLL=60 28 | AUTOSSH_FIRST_POLL=30 29 | AUTOSSH_GATETIME=0 30 | AUTOSSH_PORT=22000 31 | SSH_OPTIONS="-N -R 2222:localhost:22 -R 8080:localhost:80 {{ reverse_tunnel_vps_username }}@{{ reverse_tunnel_vps_hostname }}" 32 | 33 | - name: Create autossh unit file. 34 | ansible.builtin.copy: 35 | dest: /lib/systemd/system/autossh.service 36 | content: | 37 | [Unit] 38 | Description=autossh 39 | Wants=network-online.target 40 | After=network-online.target 41 | 42 | [Service] 43 | Type=simple 44 | User=pi 45 | EnvironmentFile=/etc/default/autossh 46 | ExecStart=/usr/bin/autossh $SSH_OPTIONS 47 | Restart=always 48 | RestartSec=60 49 | 50 | [Install] 51 | WantedBy=multi-user.target 52 | register: autossh_unit 53 | 54 | - name: Reload systemd daemon if unit file changed. 55 | ansible.builtin.systemd: 56 | daemon_reload: true 57 | when: autossh_unit is changed 58 | 59 | - name: Ensure autossh service is running. 60 | ansible.builtin.systemd: 61 | name: autossh 62 | state: started 63 | enabled: true 64 | -------------------------------------------------------------------------------- /tasks/networking/router.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Configure node 1 as a router. 3 | hosts: control_plane 4 | gather_facts: false 5 | become: true 6 | 7 | handlers: 8 | - name: restart dnsmasq 9 | ansible.builtin.service: 10 | name: dnsmasq 11 | state: restarted 12 | 13 | - name: persist iptables rules 14 | ansible.builtin.command: netfilter-persistent save 15 | 16 | vars_files: 17 | - ../../config.yml 18 | 19 | tasks: 20 | - name: Install routing prerequisites. 21 | ansible.builtin.apt: 22 | name: 23 | - dnsmasq 24 | - netfilter-persistent 25 | - iptables-persistent 26 | state: present 27 | 28 | - name: Ensure netfilter-persistent is enabled. 29 | ansible.builtin.service: 30 | name: netfilter-persistent 31 | enabled: true 32 | 33 | - name: Ensure dnsmasq is running and enabled. 34 | ansible.builtin.service: 35 | name: dnsmasq 36 | state: started 37 | enabled: true 38 | 39 | - name: "Configure iptables for {{ active_internet_interface }} masquerade." 40 | ansible.builtin.iptables: 41 | table: nat 42 | chain: POSTROUTING 43 | out_interface: "{{ active_internet_interface }}" 44 | jump: MASQUERADE 45 | notify: persist iptables rules 46 | 47 | - name: Enable IPv4 forwarding. 48 | ansible.posix.sysctl: 49 | name: net.ipv4.ip_forward 50 | value: '1' 51 | sysctl_set: yes 52 | 53 | - name: Configure dnsmasq for bridged DNS. 54 | ansible.builtin.copy: 55 | dest: /etc/dnsmasq.d/bridge.conf 56 | content: | 57 | interface=eth0 58 | bind-interfaces 59 | server=1.1.1.1 60 | server=1.0.0.1 61 | domain-needed 62 | bogus-priv 63 | notify: restart dnsmasq 64 | 65 | # See: https://github.com/geerlingguy/turing-pi-2-cluster/issues/9 66 | - name: Add crontab task to restart dnsmasq. 67 | ansible.builtin.cron: 68 | name: "restart dnsmasq if not running" 69 | minute: "*" 70 | job: "/usr/bin/systemctl status dnsmasq || /usr/bin/systemctl restart dnsmasq" 71 | -------------------------------------------------------------------------------- /tasks/networking/static-networking.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Set up static networking configuration. 3 | hosts: cluster 4 | gather_facts: false 5 | become: true 6 | 7 | handlers: 8 | - name: restart dhcpcd 9 | service: 10 | name: dhcpcd 11 | state: restarted 12 | 13 | - name: restart autossh 14 | ansible.builtin.systemd: 15 | name: autossh 16 | state: restarted 17 | when: reverse_tunnel_enable 18 | 19 | vars_files: 20 | - ../../config.yml 21 | 22 | tasks: 23 | - name: Configure static IP address on each node. 24 | ansible.builtin.blockinfile: 25 | path: /etc/dhcpcd.conf 26 | marker: "# ANSIBLE MANAGED - static ip {mark}" 27 | block: | 28 | interface eth0 29 | static ip_address={{ ipv4_subnet_prefix }}.{{ ip_host_octet }}/24 30 | static routers={{ ipv4_subnet_prefix }}.1 31 | static domain_name_servers={{ ipv4_subnet_prefix }}.1 32 | notify: restart dhcpcd 33 | 34 | # TODO: This doesn't use hosts as defined in inventory. Convert to template? 35 | - name: Configure hosts file so nodes can see each other by hostname. 36 | ansible.builtin.blockinfile: 37 | path: /etc/hosts 38 | marker: "# ANSIBLE MANAGED - static ip config {mark}" 39 | block: | 40 | {{ ipv4_subnet_prefix }}.1 turing-node-1.local turing-node-1 41 | {{ ipv4_subnet_prefix }}.2 turing-node-2.local turing-node-2 42 | {{ ipv4_subnet_prefix }}.3 turing-node-3.local turing-node-3 43 | {{ ipv4_subnet_prefix }}.4 turing-node-4.local turing-node-4 44 | 45 | - name: Set active Internet gateway interface on node 1. 46 | ansible.builtin.blockinfile: 47 | path: /etc/dhcpcd.conf 48 | marker: "# ANSIBLE MANAGED - Internet routing metric {mark}" 49 | block: | 50 | interface {{ active_internet_interface }} 51 | metric 100 52 | delegate_to: "{{ groups['control_plane'][0] }}" 53 | run_once: true 54 | notify: 55 | - restart dhcpcd 56 | - restart autossh 57 | -------------------------------------------------------------------------------- /tasks/storage.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Ensure ZFS prerequisites are installed. 3 | ansible.builtin.apt: 4 | name: raspberrypi-kernel-headers 5 | state: present 6 | 7 | - name: Ensure ZFS is installed. 8 | ansible.builtin.apt: 9 | name: 10 | - raspberrypi-kernel-headers 11 | - zfs-dkms 12 | - zfsutils-linux 13 | state: present 14 | 15 | - name: Configure ZFS mirror pool. 16 | ansible.builtin.command: "zpool create {{ storage_pool_name }} mirror /dev/sda /dev/sdb" 17 | args: 18 | creates: "/{{ storage_pool_name }}" 19 | 20 | - name: Ensure NFS filesystem is present in ZFS. 21 | community.general.zfs: 22 | name: "{{ storage_pool_name }}/{{ storage_nfs_share_name }}" 23 | state: present 24 | 25 | - name: Configure permissions for ZFS share. 26 | ansible.builtin.file: 27 | dest: "/{{ storage_pool_name }}/{{ storage_nfs_share_name }}" 28 | owner: root 29 | group: pi 30 | mode: 0777 31 | 32 | - name: Check if sharenfs is enabled on ZFS NFS share. 33 | ansible.builtin.command: "zfs get sharenfs {{ storage_pool_name }}/{{ storage_nfs_share_name }}" 34 | register: sharenfs_status 35 | changed_when: false 36 | 37 | # Note: no_root_squash can be dangerous. Use at your own peril. 38 | - name: Ensure NFS filesystem is allowed to be shared via NFS. 39 | ansible.builtin.command: "zfs set sharenfs='no_root_squash,rw=*' {{ storage_pool_name }}/{{ storage_nfs_share_name }}" 40 | when: "'rw' not in sharenfs_status.stdout" 41 | 42 | - name: Ensure NFS is installed. 43 | ansible.builtin.apt: 44 | name: nfs-kernel-server 45 | state: present 46 | 47 | - name: Ensure NFS is started and enabled at boot. 48 | ansible.builtin.service: 49 | name: nfs-server 50 | state: started 51 | enabled: true 52 | -------------------------------------------------------------------------------- /templates/drupal.yml: -------------------------------------------------------------------------------- 1 | --- 2 | kind: ConfigMap 3 | apiVersion: v1 4 | metadata: 5 | name: drupal-config 6 | namespace: drupal 7 | data: 8 | settings.php: |- 9 | 'drupal', 12 | 'username' => 'drupal', 13 | 'password' => '{{ drupal_database_password }}', 14 | 'prefix' => '', 15 | 'host' => 'mariadb', 16 | 'port' => '3306', 17 | 'namespace' => 'Drupal\\Core\\Database\\Driver\\mysql', 18 | 'driver' => 'mysql', 19 | ]; 20 | $settings['hash_salt'] = '{{ drupal_hash_salt }}'; 21 | $settings['trusted_host_patterns'] = ['{{ drupal_trusted_host_patterns }}']; 22 | $settings['config_sync_directory'] = '{{ drupal_config_sync_directory }}'; 23 | {{ drupal_extra_settings_php }} 24 | 25 | --- 26 | kind: PersistentVolumeClaim 27 | apiVersion: v1 28 | metadata: 29 | name: drupal-files-pvc 30 | namespace: drupal 31 | spec: 32 | storageClassName: nfs-client 33 | accessModes: 34 | - ReadWriteMany 35 | resources: 36 | requests: 37 | storage: 20Gi 38 | 39 | --- 40 | kind: Deployment 41 | apiVersion: apps/v1 42 | metadata: 43 | name: drupal 44 | namespace: drupal 45 | spec: 46 | replicas: 1 47 | selector: 48 | matchLabels: 49 | app: drupal 50 | template: 51 | metadata: 52 | labels: 53 | app: drupal 54 | spec: 55 | containers: 56 | - name: drupal 57 | image: '{{ drupal_image }}' 58 | ports: 59 | - containerPort: 80 60 | livenessProbe: 61 | tcpSocket: 62 | port: 80 63 | initialDelaySeconds: 60 64 | readinessProbe: 65 | tcpSocket: 66 | port: 80 67 | initialDelaySeconds: 30 68 | volumeMounts: 69 | - mountPath: '{{ drupal_base_web_path }}' 70 | name: drupal-settings 71 | - mountPath: '{{ drupal_base_web_path }}files/' 72 | name: drupal-files 73 | resources: 74 | limits: 75 | cpu: '2' 76 | memory: '2048Mi' 77 | requests: 78 | cpu: '1' 79 | memory: '1024Mi' 80 | volumes: 81 | - name: drupal-settings 82 | configMap: 83 | name: drupal-config 84 | - name: drupal-files 85 | persistentVolumeClaim: 86 | claimName: drupal-files-pvc 87 | 88 | --- 89 | kind: Service 90 | apiVersion: v1 91 | metadata: 92 | name: drupal 93 | namespace: drupal 94 | spec: 95 | ports: 96 | - port: 80 97 | protocol: TCP 98 | selector: 99 | app: drupal 100 | 101 | --- 102 | apiVersion: networking.k8s.io/v1 103 | kind: Ingress 104 | metadata: 105 | name: drupal 106 | namespace: drupal 107 | annotations: 108 | kubernetes.io/ingress.class: "traefik" 109 | spec: 110 | rules: 111 | - http: 112 | paths: 113 | - path: / 114 | pathType: Prefix 115 | backend: 116 | service: 117 | name: drupal 118 | port: 119 | number: 80 120 | -------------------------------------------------------------------------------- /templates/mariadb.yml: -------------------------------------------------------------------------------- 1 | --- 2 | kind: PersistentVolumeClaim 3 | apiVersion: v1 4 | metadata: 5 | name: mariadb-pvc 6 | namespace: drupal 7 | spec: 8 | storageClassName: nfs-client 9 | accessModes: 10 | - ReadWriteMany 11 | resources: 12 | requests: 13 | storage: 20Gi 14 | 15 | --- 16 | kind: Deployment 17 | apiVersion: apps/v1 18 | metadata: 19 | name: mariadb 20 | namespace: drupal 21 | spec: 22 | replicas: 1 23 | selector: 24 | matchLabels: 25 | app: mariadb 26 | template: 27 | metadata: 28 | labels: 29 | app: mariadb 30 | spec: 31 | containers: 32 | - name: mariadb 33 | image: mariadb:10.6 34 | ports: 35 | - containerPort: 3306 36 | env: 37 | - name: MARIADB_DATABASE 38 | value: drupal 39 | - name: MARIADB_USER 40 | value: drupal 41 | - name: MARIADB_PASSWORD 42 | value: '{{ drupal_database_password }}' 43 | - name: MARIADB_RANDOM_ROOT_PASSWORD 44 | value: 'yes' 45 | volumeMounts: 46 | - mountPath: /var/lib/mysql 47 | name: database 48 | resources: 49 | limits: 50 | cpu: '2' 51 | memory: '4096Mi' 52 | requests: 53 | cpu: '1' 54 | memory: '2048Mi' 55 | volumes: 56 | - name: database 57 | persistentVolumeClaim: 58 | claimName: mariadb-pvc 59 | 60 | --- 61 | kind: Service 62 | apiVersion: v1 63 | metadata: 64 | name: mariadb 65 | namespace: drupal 66 | spec: 67 | ports: 68 | - port: 3306 69 | targetPort: 3306 70 | selector: 71 | app: mariadb 72 | -------------------------------------------------------------------------------- /upgrade.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Upgrade everything on the cluster. 3 | hosts: cluster 4 | become: true 5 | 6 | tasks: 7 | - name: Upgrade all software. 8 | ansible.builtin.apt: 9 | update_cache: true 10 | upgrade: dist 11 | 12 | - name: Check if a reboot is required. 13 | stat: 14 | path: /var/run/reboot-required 15 | get_md5: no 16 | register: reboot_required_file 17 | 18 | - name: Reboot the server (if required). 19 | reboot: 20 | when: reboot_required_file.stat.exists == true 21 | 22 | - name: Remove dependencies that are no longer required. 23 | apt: 24 | autoremove: yes 25 | --------------------------------------------------------------------------------