├── .github ├── FUNDING.yml ├── dependabot.yml └── workflows │ ├── ci.yml │ └── stale.yml ├── .gitignore ├── .yamllint ├── LICENSE ├── README.md ├── ansible.cfg ├── dependencies ├── arch-based.yml ├── debian-based.yml └── rhel-based.yml ├── example.config.yml ├── example.hosts.ini ├── main.yml ├── overclock-pi.yml ├── tasks ├── algebra_atlas.yml ├── algebra_blis.yml ├── algebra_openblas.yml ├── firewall-configure.yml └── firewall-reset.yml └── templates ├── HPL.dat.j2 ├── benchmark-Make.top500.j2 └── mpi-node-config.j2 /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | --- 3 | github: geerlingguy 4 | patreon: geerlingguy 5 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 2 3 | updates: 4 | - package-ecosystem: "github-actions" 5 | directory: "/" 6 | schedule: 7 | interval: "weekly" 8 | groups: 9 | updates: 10 | applies-to: version-updates 11 | patterns: 12 | - "*" 13 | security-updates: 14 | applies-to: security-updates 15 | patterns: 16 | - "*" 17 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: CI 3 | 'on': 4 | pull_request: 5 | push: 6 | branches: 7 | - master 8 | 9 | jobs: 10 | 11 | lint: 12 | name: Lint 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: Check out the codebase. 17 | uses: actions/checkout@v4 18 | 19 | - name: Set up Python 3. 20 | uses: actions/setup-python@v5 21 | with: 22 | python-version: '3.x' 23 | 24 | - name: Install test dependencies. 25 | run: pip3 install yamllint 26 | 27 | - name: Lint all the YAMLs. 28 | run: yamllint . 29 | 30 | benchmark: 31 | name: Benchmark 32 | runs-on: ubuntu-latest 33 | 34 | container: 35 | image: geerlingguy/docker-ubuntu2204-ansible:latest 36 | env: 37 | ANSIBLE_FORCE_COLOR: "true" 38 | 39 | steps: 40 | - name: Check out the codebase. 41 | uses: actions/checkout@v4 42 | 43 | - name: Set up files for the test. 44 | run: | 45 | cp example.hosts.ini hosts.ini 46 | cp example.config.yml config.yml 47 | 48 | - name: Check the syntax of the HPL benchmark playbook. 49 | run: ansible-playbook main.yml --syntax-check 50 | 51 | - name: Run the playbook but not the HPL benchmark. 52 | run: ansible-playbook main.yml --tags "setup" 53 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Close inactive issues 3 | 'on': 4 | schedule: 5 | - cron: "55 6 * * 4" # semi-random time 6 | 7 | jobs: 8 | close-issues: 9 | runs-on: ubuntu-latest 10 | permissions: 11 | issues: write 12 | pull-requests: write 13 | steps: 14 | - uses: actions/stale@v9 15 | with: 16 | days-before-stale: 120 17 | days-before-close: 60 18 | exempt-issue-labels: bug,pinned,security,planned 19 | exempt-pr-labels: bug,pinned,security,planned 20 | stale-issue-label: "stale" 21 | stale-pr-label: "stale" 22 | stale-issue-message: | 23 | This issue has been marked 'stale' due to lack of recent activity. If there is no further activity, the issue will be closed in another 30 days. Thank you for your contribution! 24 | 25 | Please read [this blog post](https://www.jeffgeerling.com/blog/2020/enabling-stale-issue-bot-on-my-github-repositories) to see the reasons why I mark issues as stale. 26 | close-issue-message: | 27 | This issue has been closed due to inactivity. If you feel this is in error, please reopen the issue or file a new issue with the relevant details. 28 | stale-pr-message: | 29 | This pr has been marked 'stale' due to lack of recent activity. If there is no further activity, the issue will be closed in another 30 days. Thank you for your contribution! 30 | 31 | Please read [this blog post](https://www.jeffgeerling.com/blog/2020/enabling-stale-issue-bot-on-my-github-repositories) to see the reasons why I mark issues as stale. 32 | close-pr-message: | 33 | This pr has been closed due to inactivity. If you feel this is in error, please reopen the issue or file a new issue with the relevant details. 34 | repo-token: ${{ secrets.GITHUB_TOKEN }} 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | config.yml 2 | hosts.ini 3 | -------------------------------------------------------------------------------- /.yamllint: -------------------------------------------------------------------------------- 1 | --- 2 | extends: default 3 | rules: 4 | line-length: 5 | max: 140 6 | level: warning 7 | truthy: false 8 | 9 | ignore: | 10 | **/.github/workflows/ci.yml 11 | **/stale.yml 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2022 Jeff Geerling 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Top500 Benchmark - HPL Linpack 2 | 3 | [![CI](https://github.com/geerlingguy/top500-benchmark/actions/workflows/ci.yml/badge.svg)](https://github.com/geerlingguy/top500-benchmark/actions/workflows/ci.yml) 4 | 5 | A common generic benchmark for clusters (or extremly powerful single node workstations) is Linpack, or HPL (High Performance Linpack), which is famous for its use in rankings in the [Top500 supercomputer list](https://top500.org) over the past few decades. 6 | 7 | The benchmark solves a random dense linear system in double-precision (64 bits / FP64) arithmetic ([source](https://netlib.org/benchmark/hpl/)). 8 | 9 | I wanted to see where my various clusters and workstations would rank, historically ([you can compare to past lists here](https://hpl-calculator.sourceforge.net/hpl-calculations.php)), so I built this Ansible playbook which installs all the necessary tooling for HPL to run, connects all the nodes together via SSH, then runs the benchmark and outputs the result. 10 | 11 | ## Why not PTS? 12 | 13 | Phoronix Test Suite includes [HPL Linpack](https://openbenchmarking.org/test/pts/hpl) and [HPCC](https://openbenchmarking.org/test/pts/hpcc) test suites. I may see how they compare in the future. 14 | 15 | When I initially started down this journey, the PTS versions didn't play nicely with the Pi, especially when clustered. And the PTS versions don't seem to support clustered usage at all! 16 | 17 | ## Supported OSes 18 | 19 | Currently supported OSes: 20 | 21 | - Ubuntu (20.04+) 22 | - Raspberry Pi OS (11+) 23 | - Debian (11+) 24 | - Rocky Linux (9+) 25 | - AlmaLinux (9+) 26 | - CentOS Stream(9+) 27 | - RHEL (9+) 28 | - Fedora (38+) 29 | - Arch Linux 30 | - Manjaro 31 | 32 | Other OSes may need a few tweaks to work correctly. You can also run the playbook inside Docker (see the note under 'Benchmarking - Single Node'), but performance will be artificially limited. 33 | 34 | ## Benchmarking - Cluster 35 | 36 | Make sure you have Ansible installed (`pip3 install ansible`), then copy the following files: 37 | 38 | - `cp example.hosts.ini hosts.ini`: This is an inventory of all the hosts in your cluster (or just a single computer). 39 | - `cp example.config.yml config.yml`: This has some configuration options you may need to override, especially the `ssh_*` and `ram_in_gb` options (depending on your cluster layout) 40 | 41 | Each host should be reachable via SSH using the username set in `ansible_user`. Other Ansible options can be set under `[cluster:vars]` to connect in more exotic clustering scenarios (e.g. via bastion/jump-host). 42 | 43 | Tweak other settings inside `config.yml` as desired (the most important being `hpl_root`—this is where the compiled MPI, ATLAS/OpenBLAS/Blis, and HPL benchmarking code will live). 44 | 45 | > **Note**: The names of the nodes inside `hosts.ini` must match the hostname of their corresponding node; otherwise, the benchmark will hang when you try to run it in a cluster. 46 | > 47 | > For example, if you have `node-01.local` in your `hosts.ini` your host's hostname should be `node-01` and not something else like `raspberry-pi`. 48 | > 49 | > If you're testing with `.local` domains on Ubuntu, and local mDNS resolution isn't working, consider installing the `avahi-daemon` package: 50 | > 51 | > `sudo apt-get install avahi-daemon` 52 | 53 | Then run the benchmarking playbook inside this directory: 54 | 55 | ``` 56 | ansible-playbook main.yml 57 | ``` 58 | 59 | This will run three separate plays: 60 | 61 | 1. Setup: downloads and compiles all the code required to run HPL. (This play takes a long time—up to many hours on a slower Raspberry Pi!) 62 | 2. SSH: configures the nodes to be able to communicate with each other. 63 | 3. Benchmark: creates an `HPL.dat` file and runs the benchmark, outputting the results in your console. 64 | 65 | After the entire playbook is complete, you can also log directly into any of the nodes (though I generally do things on node 1), and run the following commands to kick off a benchmarking run: 66 | 67 | ``` 68 | cd ~/tmp/hpl-2.3/bin/top500 69 | mpirun -f cluster-hosts ./xhpl 70 | ``` 71 | 72 | > The configuration here was tested on smaller 1, 4, and 6-node clusters with 6-64 GB of RAM. Some settings in the `config.yml` file that affect the generated `HPL.dat` file may need diffent tuning for different cluster layouts! 73 | 74 | ### Benchmarking - Single Node 75 | 76 | To run locally on a single node, clone or download this repository to the node where you want to run HPL. Make sure the `hosts.ini` is set up with the default options (with just one node, `127.0.0.1`). 77 | 78 | All the default configuration from `example.config.yml` should be copied to a `config.yml` file, and all the variables should scale dynamically for your node. 79 | 80 | Run the following command so the cluster networking portion of the playbook is not run: 81 | 82 | ``` 83 | ansible-playbook main.yml --tags "setup,benchmark" 84 | ``` 85 | 86 | > For testing, you can start an Ubuntu docker container: 87 | > 88 | > ``` 89 | > docker run --name top500 -it -v $PWD:/code geerlingguy/docker-ubuntu2404-ansible:latest bash 90 | > ``` 91 | > 92 | > Then go into the code directory (`cd /code`) and run the playbook using the command above. 93 | 94 | #### Setting `performance` CPU frequency 95 | 96 | If you get an error like `CPU Throttling apparently enabled!`, you may need to set the CPU frequency to `performance` (and disable any throttling or performance scaling). 97 | 98 | For different OSes and different CPU types, the way you do this could be different. So far the automated `performance` setting in the `main.yml` playbook has only been tested on Raspberry Pi OS. You may need to look up how to disable throttling on your own system. Do that, then run the `main.yml` playbook again. 99 | 100 | ### Overclocking 101 | 102 | Since I originally built this project for a Raspberry Pi cluster, I include a playbook to set an overclock for all the Raspberry Pis in a given cluster. 103 | 104 | You can set a clock speed by changing the `pi_arm_freq` in the `overclock-pi.yml` playbook, then run it with: 105 | 106 | ``` 107 | ansible-playbook overclock-pi.yml 108 | ``` 109 | 110 | Higher clock speeds require more power and thus more cooling, so if you are running a Pi cluster with just heatsinks, you may also require a fan blowing over them if running overclocked. 111 | 112 | ## Results 113 | 114 | Here are a few of the results I've acquired in my testing (sorted by efficiency, highest to lowest): 115 | 116 | | Configuration | Architecture | Result | Wattage | Gflops/W | 117 | |--- |--- |--- |--- |--- | 118 | | [M4 Mac mini (1x M4 @ 4.4 GHz, in Docker)](https://github.com/geerlingguy/top500-benchmark/issues/47) | Arm | 299.93 Gflops | 39.6W | 7.57 Gflops/W | 119 | | [M4 Max Mac Studio (1x M4 Max @ 4.51 GHz, in Docker)](https://github.com/geerlingguy/top500-benchmark/issues/57) | Arm | 685.00 Gflops | 120W | 5.71 Gflops/W | 120 | | [Radxa CM5 (RK3588S2 8-core)](https://github.com/geerlingguy/top500-benchmark/issues/31) | Arm | 48.619 Gflops | 10W | 4.86 Gflops/W | 121 | | [Supermicro AmpereOne (A192-26X @ 2.6 GHz)](https://github.com/geerlingguy/top500-benchmark/issues/43) | Arm | 2,745.1 Gflops | 570W | 4.82 Gflops/W | 122 | | [Ampere Altra Dev Kit (Q64-22 @ 2.2 GHz)](https://github.com/geerlingguy/top500-benchmark/issues/19) | Arm | 655.90 Gflops | 140W | 4.69 Gflops/W | 123 | | [Orange Pi 5 (RK3588S 8-core)](https://github.com/geerlingguy/top500-benchmark/issues/14) | Arm | 53.333 Gflops | 11.5W | 4.64 Gflops/W | 124 | | [Supermicro AmpereOne (A192-32X @ 3.2 GHz)](https://github.com/geerlingguy/top500-benchmark/issues/43) | Arm | 3,026.9 Gflops | 692W | 4.37 Gflops/W | 125 | | [Radxa ROCK 5B (RK3588 8-core)](https://github.com/geerlingguy/top500-benchmark/issues/8) | Arm | 51.382 Gflops | 12W | 4.32 Gflops/W | 126 | | [Ampere Altra Developer Platform (M128-28 @ 2.8 GHz)](https://github.com/geerlingguy/top500-benchmark/issues/17) | Arm | 1,265.5 Gflops | 296W | 4.27 Gflops/W | 127 | | [Orange Pi 5 Max (RK3588 8-core)](https://github.com/geerlingguy/top500-benchmark/issues/39) | Arm | 52.924 Gflops | 12.8W | 4.13 Gflops/W | 128 | | [Radxa ROCK 5C (RK3588S2 8-core)](https://github.com/geerlingguy/top500-benchmark/issues/32) | Arm | 49.285 Gflops | 12W | 4.11 Gflops/W | 129 | | [Ampere Altra Developer Platform (M96-28 @ 2.8 GHz)](https://github.com/geerlingguy/top500-benchmark/issues/10) | Arm | 1,188.3 Gflops | 295W | 4.01 Gflops/W | 130 | | [M1 Max Mac Studio (1x M1 Max @ 3.2 GHz, in Docker)](https://github.com/geerlingguy/top500-benchmark/issues/4) | Arm | 264.32 Gflops | 66W | 4.00 Gflops/W | 131 | | [System76 Thelio Astra (Ampere Altra Max M128-30 @ 3.0 GHz)](https://github.com/geerlingguy/top500-benchmark/issues/44) | Arm | 1,652.4 Gflops | 440W | 3.76 Gflops/W | 132 | | [Raspberry Pi CM5 (BCM2712 @ 2.4 GHz)](https://github.com/geerlingguy/top500-benchmark/issues/48) | Arm | 32.152 Gflops | 9.2W | 3.49 Gflops/W | 133 | | [45Drives HL15 (Ampere Altra Q32-17 @ 1.7 GHz)](https://github.com/geerlingguy/top500-benchmark/issues/25) | Arm | 332.07 Gflops | 100W | 3.32 Gflops/W | 134 | | [Turing Machines RK1 (RK3588 8-core)](https://github.com/geerlingguy/top500-benchmark/issues/22) | Arm | 59.810 Gflops | 18.1W | 3.30 Gflops/W | 135 | | [Raspberry Pi 500 (BCM2712 @ 2.4 GHz)](https://github.com/geerlingguy/top500-benchmark/issues/50) | Arm | 35.586 Gflops | 11W | 3.24 Gflops/W | 136 | | [Turing Pi 2 (4x RK1 @ 2.4 GHz)](https://github.com/geerlingguy/top500-benchmark/issues/27) | Arm | 224.60 Gflops | 73W | 3.08 Gflops/W | 137 | | [Raspberry Pi 5 16 GB (BCM2712 @ 2.4 GHz)](https://github.com/geerlingguy/top500-benchmark/issues/51) | Arm | 36.737 Gflops | 12.0W | 3.06 Gflops/W | 138 | | [Raspberry Pi 5 8 GB (BCM2712 @ 2.4 GHz)](https://github.com/geerlingguy/top500-benchmark/issues/18) | Arm | 35.169 Gflops | 12.7W | 2.77 Gflops/W | 139 | | [LattePanda Mu (1x N100 @ 3.4 GHz)](https://github.com/geerlingguy/top500-benchmark/issues/30) | x86 | 62.851 Gflops | 25W | 2.51 Gflops/W | 140 | | [Radxa X4 (1x N100 @ 3.4 GHz)](https://github.com/geerlingguy/top500-benchmark/issues/38) | x86 | 37.224 Gflops | 16W | 2.33 Gflops/W | 141 | | [Raspberry Pi CM4 (BCM2711 @ 1.5 GHz)](https://github.com/geerlingguy/top500-benchmark/issues/33) | Arm | 11.433 Gflops | 5.2W | 2.20 Gflops/W | 142 | | [GMKtec NucBox G3 Plus (1x N150 @ 3.6 GHz)](https://github.com/geerlingguy/top500-benchmark/issues/52) | Intel | 62.067 Gflops | 28.5W | 2.18 Glops/W | 143 | | [Supermicro Ampere Altra (M128-30 @ 3.0 GHz)](https://github.com/geerlingguy/top500-benchmark/issues/3) | Arm | 953.47 Gflops | 500W | 1.91 Gflops/W | 144 | | [Turing Pi 2 (4x CM4 @ 1.5 GHz)](https://www.jeffgeerling.com/blog/2021/turing-pi-2-4-raspberry-pi-nodes-on-mini-itx-board) | Arm | 44.942 Gflops | 24.5W | 1.83 Gflops/W | 145 | | [Sipeed NanoCluster (4x CM5 @ 2.4 GHz)](https://github.com/geerlingguy/top500-benchmark/issues/63) | Arm | 112.25 Gflops | 62W | 1.81 Gflops/W | 146 | | [Lenovo M710q Tiny (1x i5-7400T @ 2.4 GHz)](https://www.jeffgeerling.com/blog/2023/rock-5-b-not-raspberry-pi-killer-yet) | x86 | 72.472 Gflops | 41W | 1.76 Gflops/W | 147 | | [Raspberry Pi 400 (BCM2711 @ 1.8 GHz)](https://github.com/geerlingguy/top500-benchmark/issues/49) | Arm | 11.077 Gflops | 6.4W | 1.73 Gflops/W | 148 | | [Raspberry Pi 4 (BCM2711 @ 1.8 GHz)](https://github.com/geerlingguy/top500-benchmark/issues/13) | Arm | 11.889 Gflops | 7.2W | 1.65 Gflops/W | 149 | | [Turing Pi 2 (4x CM4 @ 2.0 GHz)](https://www.jeffgeerling.com/blog/2021/turing-pi-2-4-raspberry-pi-nodes-on-mini-itx-board) | Arm | 51.327 Gflops | 33W | 1.54 Gflops/W | 150 | | [DeskPi Super6c (6x CM4 @ 1.5 GHz)](https://www.jeffgeerling.com/blog/2022/pi-cluster-vs-ampere-altra-max-128-core-arm-cpu) | Arm | 60.293 Gflops | 40W | 1.50 Gflops/W | 151 | | [Orange Pi CM4 (RK3566 4-core)](https://github.com/geerlingguy/top500-benchmark/issues/23) | Arm | 5.604 Gflops | 4.0W | 1.40 Gflop/W | 152 | | [DeskPi Super6c (6x CM4 @ 2.0 GHz)](https://www.jeffgeerling.com/blog/2022/pi-cluster-vs-ampere-altra-max-128-core-arm-cpu) | Arm | 70.338 Gflops | 51W | 1.38 Gflops/W | 153 | | Custom PC (AMD 5600x @ 3.7 GHz) | x86 | 229 Gflops | 196W | 1.16 Gflops/W | 154 | | [Radxa Orion O6 (CIX P1 CD8180 @ 2.6 GHz)](https://github.com/geerlingguy/top500-benchmark/issues/54) | Arm | 36.014 Gflops | 35.7W | 1.01 Gflops/W | 155 | | [HiFive Premier P550 (ESWIN EIC7700X @ 1.4 GHz)](https://github.com/geerlingguy/top500-benchmark/issues/56) | RISC-V | 7.181 Gflops | 8.9W | 0.81 Gflops/W | 156 | | [Milk-V Mars CM (JH7110 4-core)](https://github.com/geerlingguy/top500-benchmark/issues/20) | RISC-V | 1.99 Gflops | 3.6W | 0.55 Gflops/W | 157 | | [Lichee Console 4A (TH1520 4-core)](https://github.com/geerlingguy/top500-benchmark/issues/20) | RISC-V | 1.99 Gflops | 3.6W | 0.55 Gflops/W | 158 | | [Milk-V Jupiter (SpacemiT X60 8-core)](https://github.com/geerlingguy/top500-benchmark/issues/37) | RISC-V | 5.66 Gflops | 10.6W | 0.55 Gflops/W | 159 | | [Sipeed Lichee Pi 3A (SpacemiT K1 8-core)](https://github.com/geerlingguy/top500-benchmark/issues/42) | RISC-V | 4.95 Gflops | 9.1W | 0.54 Gflops/W | 160 | | [Milk-V Mars (JH7110 4-core)](https://github.com/geerlingguy/top500-benchmark/issues/35) | RISC-V | 2.06 Gflops | 4.7W | 0.44 Gflops/W | 161 | | [Raspberry Pi Zero 2 W (RP3A0-AU @ 1.0 GHz)](https://github.com/geerlingguy/top500-benchmark/issues/26) | Arm | 0.370 Gflops | 2.1W | 0.18 Gflops/W | 162 | | [M2 Pro MacBook Pro (1x M2 Pro, in Asahi Linux)](https://github.com/geerlingguy/top500-benchmark/issues/21#issuecomment-1792425949) | Arm | 296.93 Gflops | N/A | N/A | 163 | | M2 MacBook Air (1x M2 @ 3.5 GHz, in Docker) | Arm | 104.68 Gflops | N/A | N/A | 164 | 165 | You can [enter the Gflops in this tool](https://hpl-calculator.sourceforge.net/hpl-calculations.php) to see how it compares to historical top500 lists. 166 | 167 | > **Note**: My current calculation for efficiency is based on average power draw over the course of the benchmark, based on either a Kill-A-Watt (pre-2024 tests) or a ThirdReality Smart Outlet monitor. The efficiency calculations may vary depending on the specific system under test. 168 | 169 | ### Other Listings 170 | 171 | Over the years, as I find other people's listings of HPL results—especially those with power usage ratings—I will add them here: 172 | 173 | - [VMW Research Group GFLOPS/W listing](https://web.eece.maine.edu/~vweaver/group/green_machines.html) 174 | -------------------------------------------------------------------------------- /ansible.cfg: -------------------------------------------------------------------------------- 1 | [defaults] 2 | nocows = true 3 | forks = 25 4 | inventory = hosts.ini 5 | interpreter_python = /usr/bin/python3 6 | stdout_callback = yaml 7 | -------------------------------------------------------------------------------- /dependencies/arch-based.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Update pacman cache. 3 | community.general.pacman: 4 | update_cache: true 5 | become: true 6 | 7 | - name: Install dependencies. 8 | ansible.builtin.package: 9 | name: 10 | - git 11 | - base-devel 12 | - gcc-fortran 13 | state: present 14 | become: true 15 | -------------------------------------------------------------------------------- /dependencies/debian-based.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Update apt cache. 3 | ansible.builtin.apt: 4 | update_cache: true 5 | cache_valid_time: 600 6 | become: true 7 | 8 | - name: Install dependencies. 9 | ansible.builtin.package: 10 | name: 11 | - git 12 | - build-essential 13 | - gfortran 14 | - automake 15 | state: present 16 | become: true 17 | -------------------------------------------------------------------------------- /dependencies/rhel-based.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Update dnf cache. 3 | ansible.builtin.dnf: 4 | update_cache: true 5 | become: true 6 | 7 | - name: Install dependencies. 8 | ansible.builtin.package: 9 | name: 10 | - '@Development Tools' 11 | - gcc-gfortran 12 | # Fedora doesn't install this with the Development Tools group. 13 | - gcc-g++ 14 | state: present 15 | become: true 16 | -------------------------------------------------------------------------------- /example.config.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # Working directory where HPL and associated applications will be compiled. 3 | hpl_root: /opt/top500 4 | 5 | mpich_version: "4.2.3" 6 | 7 | # Linear algebra library options. 8 | linear_algebra_library: blis # 'atlas', 'openblas', or 'blis' 9 | linear_algebra_blis_version: master # only used for blis 10 | linear_algebra_openblas_version: develop # only used for openblas 11 | blis_configure_options: "" 12 | 13 | # Home directory of the user for whom SSH keys will be configured. 14 | ssh_user: pi 15 | ssh_user_home: /home/pi 16 | 17 | # Specify manually if needed for mixed-RAM-capacity clusters. 18 | ram_in_gb: "{{ ( ansible_memtotal_mb / 1024 * 0.75 ) | int | abs }}" 19 | 20 | # Count the nodes for accurate HPL.dat calculations. 21 | nodecount: "{{ ansible_play_hosts | length | int }}" 22 | 23 | # HPL.dat configuration options. 24 | # See: https://www.advancedclustering.com/act_kb/tune-hpl-dat-file/ 25 | # See also: https://hpl-calculator.sourceforge.net/HPL-HowTo.pdf 26 | hpl_dat_opts: 27 | # sqrt((Memory in GB * 1024 * 1024 * 1024 * Node count) / 8) * 0.9 28 | Ns: "{{ (((((ram_in_gb | int) * 1024 * 1024 * 1024 * (nodecount | int)) / 8) | root) * 0.90) | int }}" 29 | NBs: 256 30 | # (P * Q) should be roughly equivalent to total core count, with Qs higher. 31 | # If running on a single system, Ps should be 1 and Qs should be core count. 32 | Ps: 1 33 | Qs: 4 34 | -------------------------------------------------------------------------------- /example.hosts.ini: -------------------------------------------------------------------------------- 1 | # For single node benchmarking (default), use this: 2 | [cluster] 3 | 127.0.0.1 ansible_connection=local 4 | 5 | # For cluster benchmarking, delete everything above this line and uncomment: 6 | # [cluster] 7 | # node-01.local 8 | # node-02.local 9 | # node-03.local 10 | # node-04.local 11 | # node-05.local 12 | # 13 | # [cluster:vars] 14 | # ansible_user=username 15 | -------------------------------------------------------------------------------- /main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # Automated setup of distributed Linpack benchmark. 3 | # 4 | # Inspired by: https://mikejmcfarlane.github.io/blog/2020/09/17/High-Performance-Linpack-for-raspberry-pi-supercomputer 5 | # See also: https://www.sci-pi.org.uk/bench/linpack.html 6 | 7 | - name: Install linpack benchmark. 8 | hosts: cluster 9 | become: false 10 | tags: ['setup'] 11 | 12 | vars_files: ['config.yml'] 13 | 14 | tasks: 15 | - ansible.builtin.include_tasks: dependencies/rhel-based.yml 16 | when: ansible_os_family == 'RedHat' 17 | 18 | - ansible.builtin.include_tasks: dependencies/debian-based.yml 19 | when: ansible_os_family == 'Debian' 20 | 21 | - ansible.builtin.include_tasks: dependencies/arch-based.yml 22 | when: ansible_os_family == 'Archlinux' 23 | 24 | - name: Create required temporary directories. 25 | ansible.builtin.file: 26 | path: "{{ item }}" 27 | state: directory 28 | owner: "{{ ansible_user | default(ansible_env.USER, true) | default(ansible_user_id, true) }}" 29 | group: "{{ ansible_user | default(ansible_env.USER, true) | default(ansible_user_id, true) }}" 30 | mode: 0755 31 | loop: 32 | - "{{ hpl_root }}/tmp" 33 | - "{{ hpl_root }}/tmp/{{ linear_algebra_library }}-build" 34 | become: true 35 | 36 | - name: Download MPI (Message Passing Interface). 37 | ansible.builtin.unarchive: 38 | src: https://www.mpich.org/static/downloads/{{ mpich_version }}/mpich-{{ mpich_version }}.tar.gz 39 | dest: "{{ hpl_root }}/tmp" 40 | remote_src: true 41 | creates: "{{ hpl_root }}/tmp/mpich-{{ mpich_version }}/README" 42 | 43 | - name: Build MPI (takes a while). 44 | ansible.builtin.command: "{{ item }}" 45 | args: 46 | chdir: "{{ hpl_root }}/tmp/mpich-{{ mpich_version }}" 47 | creates: "{{ hpl_root }}/tmp/COMPILE_MPI_COMPLETE" 48 | loop: 49 | - ./configure --with-device=ch3:sock FFLAGS=-fallow-argument-mismatch 50 | - "make -j{{ ansible_processor_nproc }}" 51 | 52 | - name: Install MPI. 53 | ansible.builtin.command: make install 54 | args: 55 | chdir: "{{ hpl_root }}/tmp/mpich-{{ mpich_version }}" 56 | creates: "{{ hpl_root }}/tmp/COMPILE_MPI_COMPLETE" 57 | become: true 58 | 59 | - name: Create 'COMPILE_MPI_COMPLETE' file. 60 | file: 61 | path: "{{ hpl_root }}/tmp/COMPILE_MPI_COMPLETE" 62 | state: touch 63 | mode: 0644 64 | 65 | - name: Test if we can set CPU scaling parameters. 66 | ansible.builtin.command: >- 67 | ls /sys/devices/system/cpu/cpu0/cpufreq 68 | failed_when: false 69 | changed_when: false 70 | register: cpufreq_exists 71 | 72 | # Note: There was no simpler way to do this besides `shell`. 73 | - name: Ensure CPU scaling is set to 'performance'. 74 | ansible.builtin.shell: >- 75 | echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor 76 | when: cpufreq_exists.rc == 0 77 | become: true 78 | 79 | - ansible.builtin.include_tasks: tasks/algebra_atlas.yml 80 | when: linear_algebra_library == 'atlas' 81 | 82 | - ansible.builtin.include_tasks: tasks/algebra_blis.yml 83 | when: linear_algebra_library == 'blis' 84 | 85 | - ansible.builtin.include_tasks: tasks/algebra_openblas.yml 86 | when: linear_algebra_library == 'openblas' 87 | 88 | - name: Download HPL (High Performance Linpack). 89 | ansible.builtin.unarchive: 90 | src: http://www.netlib.org/benchmark/hpl/hpl-2.3.tar.gz 91 | dest: "{{ hpl_root }}/tmp" 92 | remote_src: true 93 | creates: "{{ hpl_root }}/tmp/hpl-2.3/README" 94 | 95 | - name: Set up HPL makefile. 96 | ansible.builtin.shell: sh make_generic 97 | args: 98 | chdir: "{{ hpl_root }}/tmp/hpl-2.3/setup" 99 | creates: "{{ hpl_root }}/tmp/COMPILE_HPL_COMPLETE" 100 | 101 | - name: Copy HPL makefile into place. 102 | ansible.builtin.template: 103 | src: templates/benchmark-Make.top500.j2 104 | dest: "{{ hpl_root }}/tmp/hpl-2.3/Make.top500" 105 | mode: 0644 106 | 107 | - name: Install HPL. 108 | ansible.builtin.command: >- 109 | make arch=top500 110 | args: 111 | chdir: "{{ hpl_root }}/tmp/hpl-2.3" 112 | creates: "{{ hpl_root }}/tmp/COMPILE_HPL_COMPLETE" 113 | 114 | - name: Create COMPILE_HPL_COMPLETE file. 115 | ansible.builtin.file: 116 | path: "{{ hpl_root }}/tmp/COMPILE_HPL_COMPLETE" 117 | state: touch 118 | mode: 0644 119 | 120 | # See: https://github.com/geerlingguy/top500-benchmark/issues/1 121 | - name: Configure SSH connections between nodes. 122 | hosts: cluster 123 | become: false 124 | tags: ['ssh'] 125 | 126 | vars_files: ['config.yml'] 127 | vars: 128 | host_ips: [] 129 | 130 | tasks: 131 | - name: Ensure .ssh directory exists. 132 | ansible.builtin.file: 133 | path: "{{ ssh_user_home }}/.ssh" 134 | state: directory 135 | owner: "{{ ssh_user }}" 136 | group: "{{ ssh_user }}" 137 | mode: 0700 138 | 139 | - name: Generate an OpenSSH keypair. 140 | community.crypto.openssh_keypair: 141 | path: "{{ ssh_user_home }}/.ssh/id_rsa" 142 | size: 2048 143 | 144 | - name: Read out ssh pubkey from each host. 145 | ansible.builtin.command: cat "{{ ssh_user_home }}/.ssh/id_rsa.pub" 146 | changed_when: false 147 | register: ssh_pubkey 148 | 149 | - name: Combine pubkeys into single list. 150 | ansible.builtin.set_fact: 151 | combined_ssh_pubkeys: "{{ ansible_play_hosts | map('extract', hostvars, 'ssh_pubkey') | map(attribute='stdout') | list }}" 152 | run_once: true 153 | 154 | - name: Write all pubkeys to each host. 155 | ansible.posix.authorized_key: 156 | user: "{{ ssh_user }}" 157 | state: present 158 | key: "{{ item }}" 159 | loop: "{{ combined_ssh_pubkeys }}" 160 | 161 | - name: Generate list of host IP addresses. 162 | ansible.builtin.set_fact: 163 | host_ips: "{{ host_ips + [ hostvars[item].ansible_default_ipv4.address ] }}" 164 | loop: "{{ groups['cluster'] }}" 165 | 166 | - name: Accept hostkeys for each host on each host. 167 | ansible.builtin.command: >- 168 | ssh {{ ssh_user }}@{{ item }} -o StrictHostKeyChecking=accept-new date 169 | loop: "{{ host_ips }}" 170 | 171 | - name: Write chunk of hosts information to the hosts file. 172 | ansible.builtin.blockinfile: 173 | path: /etc/hosts 174 | marker: "# {mark} Ansible MPI host {{ item }}" 175 | block: | 176 | {{ hostvars[item].ansible_default_ipv4.address }} {{ item }} {{ item | replace('.local', '') }} 177 | loop: "{{ groups['cluster'] }}" 178 | become: true 179 | 180 | - name: Run linpack benchmark. 181 | hosts: cluster 182 | become: false 183 | tags: ['benchmark'] 184 | 185 | vars_files: ['config.yml'] 186 | vars: 187 | host_ips: [] 188 | 189 | tasks: 190 | - name: Create a file describing nodes for MPI execution. 191 | ansible.builtin.template: 192 | src: templates/mpi-node-config.j2 193 | dest: "{{ hpl_root }}/tmp/hpl-2.3/bin/top500/cluster-hosts" 194 | mode: 0644 195 | 196 | # Template originally generated using this website: 197 | # https://www.advancedclustering.com/act_kb/tune-hpl-dat-file/ 198 | - name: Create HPL.dat file. 199 | ansible.builtin.template: 200 | src: templates/HPL.dat.j2 201 | dest: "{{ hpl_root }}/tmp/hpl-2.3/bin/top500/HPL.dat" 202 | mode: 0644 203 | 204 | - name: Generate list of host IP addresses. 205 | ansible.builtin.set_fact: 206 | host_ips: "{{ host_ips + [ hostvars[item].ansible_default_ipv4.address ] }}" 207 | loop: "{{ groups['cluster'] }}" 208 | 209 | # If this is not done, the nodes will fail to connect to each other 210 | # causing the playbook to hang at 'Run the benchmark.' 211 | - include_tasks: tasks/firewall-configure.yml 212 | when: ansible_os_family == "RedHat" 213 | 214 | - name: Run the benchmark. 215 | ansible.builtin.command: mpirun -f cluster-hosts ./xhpl 216 | args: 217 | chdir: "{{ hpl_root }}/tmp/hpl-2.3/bin/top500" 218 | register: mpirun_output 219 | run_once: true 220 | 221 | - include_tasks: tasks/firewall-reset.yml 222 | when: ansible_os_family == "RedHat" 223 | 224 | - name: Output the results. 225 | debug: var=mpirun_output.stdout 226 | run_once: true 227 | -------------------------------------------------------------------------------- /overclock-pi.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Set up overclock on Raspberry Pis. 3 | hosts: cluster 4 | become: true 5 | 6 | vars_files: ['config.yml'] 7 | vars: 8 | pi_over_voltage: '6' 9 | pi_arm_freq: '2000' 10 | 11 | handlers: 12 | - name: reboot pi 13 | ansible.builtin.reboot: 14 | 15 | tasks: 16 | - name: Configure options in /boot/config.txt. 17 | community.general.ini_file: 18 | path: /boot/config.txt 19 | section: '{{ item.section }}' 20 | option: "{{ item.option }}" 21 | value: "{{ item.value }}" 22 | no_extra_spaces: true 23 | state: present 24 | with_items: 25 | - section: all 26 | option: gpu_mem 27 | value: '16' 28 | - section: '' 29 | option: over_voltage 30 | value: "{{ pi_over_voltage }}" 31 | - section: '' 32 | option: arm_freq 33 | value: "{{ pi_arm_freq }}" 34 | notify: reboot pi 35 | -------------------------------------------------------------------------------- /tasks/algebra_atlas.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Download ATLAS linear algebra library. 3 | ansible.builtin.unarchive: 4 | src: https://sourceforge.net/projects/math-atlas/files/Stable/3.10.3/atlas3.10.3.tar.bz2 5 | dest: "{{ hpl_root }}/tmp" 6 | remote_src: true 7 | creates: "{{ hpl_root }}/tmp/ATLAS/README" 8 | 9 | - name: Install ATLAS (takes a LONG time). 10 | ansible.builtin.command: "{{ item }}" 11 | args: 12 | chdir: "{{ hpl_root }}/tmp/atlas-build" 13 | creates: "{{ hpl_root }}/tmp/COMPILE_ATLAS_COMPLETE" 14 | loop: 15 | - ../ATLAS/configure 16 | - make 17 | 18 | - name: Create 'COMPILE_ATLAS_COMPLETE' file. 19 | ansible.builtin.file: 20 | path: "{{ hpl_root }}/tmp/COMPILE_ATLAS_COMPLETE" 21 | state: touch 22 | mode: 0644 23 | -------------------------------------------------------------------------------- /tasks/algebra_blis.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Download Blis linear algebra library. 3 | ansible.builtin.git: 4 | repo: 'https://github.com/flame/blis.git' 5 | version: "{{ linear_algebra_blis_version }}" 6 | dest: "{{ hpl_root }}/tmp/blis-build" 7 | 8 | - name: Install Blis. 9 | ansible.builtin.command: "{{ item }}" 10 | args: 11 | chdir: "{{ hpl_root }}/tmp/blis-build" 12 | creates: "{{ hpl_root }}/tmp/COMPILE_BLIS_COMPLETE" 13 | loop: 14 | - ./configure --prefix=/opt/blis {{ blis_configure_options | default('auto', true) }} 15 | - make -j{{ ansible_processor_vcpus }} 16 | - make install 17 | become: true 18 | 19 | - name: Create 'COMPILE_BLIS_COMPLETE' file. 20 | ansible.builtin.file: 21 | path: "{{ hpl_root }}/tmp/COMPILE_BLIS_COMPLETE" 22 | state: touch 23 | mode: 0644 24 | -------------------------------------------------------------------------------- /tasks/algebra_openblas.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Download OpenBLAS linear algebra library. 3 | ansible.builtin.git: 4 | repo: 'https://github.com/OpenMathLib/OpenBLAS.git' 5 | version: "{{ linear_algebra_openblas_version }}" 6 | dest: "{{ hpl_root }}/tmp/openblas-build" 7 | 8 | - name: Install OpenBLAS. 9 | ansible.builtin.command: "{{ item }}" 10 | args: 11 | chdir: "{{ hpl_root }}/tmp/openblas-build" 12 | creates: "{{ hpl_root }}/tmp/COMPILE_OPENBLAS_COMPLETE" 13 | loop: 14 | - make -j{{ ansible_processor_vcpus }} 15 | - make PREFIX=/opt/openblas install 16 | become: true 17 | 18 | - name: Create 'COMPILE_OPENBLAS_COMPLETE' file. 19 | ansible.builtin.file: 20 | path: "{{ hpl_root }}/tmp/COMPILE_OPENBLAS_COMPLETE" 21 | state: touch 22 | mode: 0644 23 | -------------------------------------------------------------------------------- /tasks/firewall-configure.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Creating new custom firewall zone. 3 | ansible.posix.firewalld: 4 | zone: top500 5 | permanent: true 6 | state: present 7 | become: true 8 | 9 | - name: Setting custom firewall zone to accept connections. 10 | ansible.posix.firewalld: 11 | zone: top500 12 | target: ACCEPT 13 | state: enabled 14 | permanent: true 15 | become: true 16 | 17 | - name: Adding nodes as trusted sources in the firewall. 18 | ansible.posix.firewalld: 19 | source: "{{ item }}" 20 | zone: top500 21 | state: enabled 22 | permanent: true 23 | loop: "{{ host_ips }}" 24 | when: item != ansible_default_ipv4.address 25 | become: true 26 | 27 | - name: Restarting firewall for changes to take effect. 28 | ansible.builtin.service: 29 | name: firewalld 30 | state: restarted 31 | become: true 32 | -------------------------------------------------------------------------------- /tasks/firewall-reset.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # Remove our custom firewall zone since we don't need it anymore 3 | - name: Reverting firewall back to its original state. 4 | ansible.posix.firewalld: 5 | zone: top500 6 | state: absent 7 | permanent: true 8 | become: true 9 | 10 | - name: Restarting firewall for changes to take effect. 11 | ansible.builtin.service: 12 | name: firewalld 13 | state: restarted 14 | become: true 15 | 16 | # When removing a custom firewall zone, a .xml.old file will sometimes remain 17 | - name: Cleaning up firewall rules. 18 | ansible.builtin.file: 19 | path: /etc/firewalld/zones/top500.xml.old 20 | state: absent 21 | become: true 22 | -------------------------------------------------------------------------------- /templates/HPL.dat.j2: -------------------------------------------------------------------------------- 1 | {# https://www.advancedclustering.com/act_kb/tune-hpl-dat-file/ #} 2 | HPLinpack benchmark input file 3 | Innovative Computing Laboratory, University of Tennessee 4 | HPL.out output file name (if any) 5 | 6 device out (6=stdout,7=stderr,file) 6 | 1 # of problems sizes (N) 7 | {{ hpl_dat_opts.Ns }} Ns 8 | 1 # of NBs 9 | {{ hpl_dat_opts.NBs }} NBs 10 | 0 PMAP process mapping (0=Row-,1=Column-major) 11 | 1 # of process grids (P x Q) 12 | {{ hpl_dat_opts.Ps }} Ps 13 | {{ hpl_dat_opts.Qs }} Qs 14 | 16.0 threshold 15 | 1 # of panel fact 16 | 2 PFACTs (0=left, 1=Crout, 2=Right) 17 | 1 # of recursive stopping criterium 18 | 4 NBMINs (>= 1) 19 | 1 # of panels in recursion 20 | 2 NDIVs 21 | 1 # of recursive panel fact. 22 | 1 RFACTs (0=left, 1=Crout, 2=Right) 23 | 1 # of broadcast 24 | 1 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) 25 | 1 # of lookahead depth 26 | 1 DEPTHs (>=0) 27 | 2 SWAP (0=bin-exch,1=long,2=mix) 28 | 64 swapping threshold 29 | 0 L1 in (0=transposed,1=no-transposed) form 30 | 0 U in (0=transposed,1=no-transposed) form 31 | 1 Equilibration (0=no,1=yes) 32 | 8 memory alignment in double (> 0) 33 | ##### This line (no. 32) is ignored (it serves as a separator). ###### 34 | 0 Number of additional problem sizes for PTRANS 35 | 1200 10000 30000 values of N 36 | 0 number of additional blocking sizes for PTRANS 37 | 40 9 8 13 13 20 16 32 64 values of NB 38 | -------------------------------------------------------------------------------- /templates/benchmark-Make.top500.j2: -------------------------------------------------------------------------------- 1 | # 2 | # -- High Performance Computing Linpack Benchmark (HPL) 3 | # HPL - 2.3 - December 2, 2018 4 | # Antoine P. Petitet 5 | # University of Tennessee, Knoxville 6 | # Innovative Computing Laboratory 7 | # (C) Copyright 2000-2008 All Rights Reserved 8 | # 9 | # -- Copyright notice and Licensing terms: 10 | # 11 | # Redistribution and use in source and binary forms, with or without 12 | # modification, are permitted provided that the following conditions 13 | # are met: 14 | # 15 | # 1. Redistributions of source code must retain the above copyright 16 | # notice, this list of conditions and the following disclaimer. 17 | # 18 | # 2. Redistributions in binary form must reproduce the above copyright 19 | # notice, this list of conditions, and the following disclaimer in the 20 | # documentation and/or other materials provided with the distribution. 21 | # 22 | # 3. All advertising materials mentioning features or use of this 23 | # software must display the following acknowledgement: 24 | # This product includes software developed at the University of 25 | # Tennessee, Knoxville, Innovative Computing Laboratory. 26 | # 27 | # 4. The name of the University, the name of the Laboratory, or the 28 | # names of its contributors may not be used to endorse or promote 29 | # products derived from this software without specific written 30 | # permission. 31 | # 32 | # -- Disclaimer: 33 | # 34 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 | # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY 38 | # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 | # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 | # ###################################################################### 46 | # 47 | # ---------------------------------------------------------------------- 48 | # - shell -------------------------------------------------------------- 49 | # ---------------------------------------------------------------------- 50 | # 51 | SHELL = /bin/sh 52 | # 53 | CD = cd 54 | CP = cp 55 | LN_S = ln -s 56 | MKDIR = mkdir 57 | RM = /bin/rm -f 58 | TOUCH = touch 59 | # 60 | # ---------------------------------------------------------------------- 61 | # - Platform identifier ------------------------------------------------ 62 | # ---------------------------------------------------------------------- 63 | # 64 | ARCH = top500 65 | # 66 | # ---------------------------------------------------------------------- 67 | # - HPL Directory Structure / HPL library ------------------------------ 68 | # ---------------------------------------------------------------------- 69 | # 70 | TOPdir = {{ hpl_root }}/tmp/hpl-2.3 71 | INCdir = $(TOPdir)/include 72 | BINdir = $(TOPdir)/bin/$(ARCH) 73 | LIBdir = $(TOPdir)/lib/$(ARCH) 74 | # 75 | HPLlib = $(LIBdir)/libhpl.a 76 | # 77 | # ---------------------------------------------------------------------- 78 | # - Message Passing library (MPI) -------------------------------------- 79 | # ---------------------------------------------------------------------- 80 | # MPinc tells the C compiler where to find the Message Passing library 81 | # header files, MPlib is defined to be the name of the library to be 82 | # used. The variable MPdir is only used for defining MPinc and MPlib. 83 | # 84 | MPdir = /usr/local 85 | MPinc = -I /usr/local/include 86 | MPlib = /usr/local/lib/libmpich.so 87 | # 88 | # ---------------------------------------------------------------------- 89 | # - Linear Algebra library (BLAS or VSIPL) ----------------------------- 90 | # ---------------------------------------------------------------------- 91 | # LAinc tells the C compiler where to find the Linear Algebra library 92 | # header files, LAlib is defined to be the name of the library to be 93 | # used. The variable LAdir is only used for defining LAinc and LAlib. 94 | # 95 | {% if linear_algebra_library == 'atlas' %} 96 | LAdir = {{ hpl_root }}/tmp/atlas-build 97 | LAinc = 98 | LAlib = $(LAdir)/lib/libf77blas.a $(LAdir)/lib/libatlas.a 99 | {% elif linear_algebra_library == 'blis' %} 100 | LAdir = /opt/blis 101 | LAinc = 102 | LAlib = $(LAdir)/lib/libblis.a -lpthread 103 | {% elif linear_algebra_library == 'openblas' %} 104 | LAdir = /opt/openblas 105 | LAinc = 106 | LAlib = $(LAdir)/lib/libopenblas.a -lpthread 107 | {% endif %} 108 | # 109 | # ---------------------------------------------------------------------- 110 | # - F77 / C interface -------------------------------------------------- 111 | # ---------------------------------------------------------------------- 112 | # You can skip this section if and only if you are not planning to use 113 | # a BLAS library featuring a Fortran 77 interface. Otherwise, it is 114 | # necessary to fill out the F2CDEFS variable with the appropriate 115 | # options. **One and only one** option should be chosen in **each** of 116 | # the 3 following categories: 117 | # 118 | # 1) name space (How C calls a Fortran 77 routine) 119 | # 120 | # -DAdd_ : all lower case and a suffixed underscore (Suns, 121 | # Intel, ...), [default] 122 | # -DNoChange : all lower case (IBM RS6000), 123 | # -DUpCase : all upper case (Cray), 124 | # -DAdd__ : the FORTRAN compiler in use is f2c. 125 | # 126 | # 2) C and Fortran 77 integer mapping 127 | # 128 | # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] 129 | # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, 130 | # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. 131 | # 132 | # 3) Fortran 77 string handling 133 | # 134 | # -DStringSunStyle : The string address is passed at the string loca- 135 | # tion on the stack, and the string length is then 136 | # passed as an F77_INTEGER after all explicit 137 | # stack arguments, [default] 138 | # -DStringStructPtr : The address of a structure is passed by a 139 | # Fortran 77 string, and the structure is of the 140 | # form: struct {char *cp; F77_INTEGER len;}, 141 | # -DStringStructVal : A structure is passed by value for each Fortran 142 | # 77 string, and the structure is of the form: 143 | # struct {char *cp; F77_INTEGER len;}, 144 | # -DStringCrayStyle : Special option for Cray machines, which uses 145 | # Cray fcd (fortran character descriptor) for 146 | # interoperation. 147 | # 148 | F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle 149 | # 150 | # ---------------------------------------------------------------------- 151 | # - HPL includes / libraries / specifics ------------------------------- 152 | # ---------------------------------------------------------------------- 153 | # 154 | HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) 155 | HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) 156 | # 157 | # - Compile time options ----------------------------------------------- 158 | # 159 | # -DHPL_COPY_L force the copy of the panel L before bcast; 160 | # -DHPL_CALL_CBLAS call the cblas interface; 161 | # -DHPL_CALL_VSIPL call the vsip library; 162 | # -DHPL_DETAILED_TIMING enable detailed timers; 163 | # 164 | # By default HPL will: 165 | # *) not copy L before broadcast, 166 | # *) call the BLAS Fortran 77 interface, 167 | # *) not display detailed timing information. 168 | # 169 | HPL_OPTS = 170 | # 171 | # ---------------------------------------------------------------------- 172 | # 173 | HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) 174 | # 175 | # ---------------------------------------------------------------------- 176 | # - Compilers / linkers - Optimization flags --------------------------- 177 | # ---------------------------------------------------------------------- 178 | # 179 | CC = mpicc 180 | CCNOOPT = $(HPL_DEFS) 181 | CCFLAGS = $(HPL_DEFS) 182 | # 183 | LINKER = mpif77 184 | LINKFLAGS = 185 | # 186 | ARCHIVER = ar 187 | ARFLAGS = r 188 | RANLIB = echo 189 | # 190 | # ---------------------------------------------------------------------- 191 | -------------------------------------------------------------------------------- /templates/mpi-node-config.j2: -------------------------------------------------------------------------------- 1 | {% for host in groups['cluster'] %} 2 | {{ hostvars[host].ansible_default_ipv4.address }}:{{ hostvars[host].ansible_processor_vcpus }} 3 | {% endfor %} 4 | --------------------------------------------------------------------------------