├── roles
├── fireball
│ ├── templates
│ │ └── resolv.conf.j2
│ └── tasks
│ │ └── main.yml
├── headnode
│ ├── templates
│ │ ├── exports.j2
│ │ └── dnsmasq.conf.j2
│ ├── files
│ │ └── iptables-nat.txt
│ └── tasks
│ │ └── main.yml
├── gmetad
│ ├── handlers
│ │ └── main.yml
│ ├── templates
│ │ └── ganglia.conf.j2
│ └── tasks
│ │ └── main.yml
├── slurm
│ ├── files
│ │ └── munge.key
│ ├── handlers
│ │ └── main.yml
│ ├── tasks
│ │ └── main.yml
│ └── templates
│ │ └── slurm.conf.j2
├── network_client
│ ├── handlers
│ │ └── main.yml
│ ├── tasks
│ │ └── main.yml
│ └── templates
│ │ └── ntp.conf.j2
├── gmond
│ └── tasks
│ │ └── main.yml
├── common
│ └── tasks
│ │ └── main.yml
└── ansible-deploy
│ └── tasks
│ └── main.yml
├── hosts.pi
├── computes.yml
├── headnode.yml
├── LICENSE
└── README.md
/roles/fireball/templates/resolv.conf.j2:
--------------------------------------------------------------------------------
1 | nameserver 8.8.8.8
2 |
--------------------------------------------------------------------------------
/hosts.pi:
--------------------------------------------------------------------------------
1 | [headnode]
2 | 10.0.1.2
3 | [computes]
4 | pi01
5 | pi02
6 | pi03
7 | pi04
8 |
--------------------------------------------------------------------------------
/roles/headnode/templates/exports.j2:
--------------------------------------------------------------------------------
1 | /home 192.168.42.0/255.255.255.0(rw,no_root_squash)
2 |
--------------------------------------------------------------------------------
/roles/gmetad/handlers/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: restart apache2
3 | action: service name=apache2 state=restarted
4 |
--------------------------------------------------------------------------------
/roles/slurm/files/munge.key:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ajdecon/ansible-pi-cluster/HEAD/roles/slurm/files/munge.key
--------------------------------------------------------------------------------
/roles/network_client/handlers/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: restart ntp
3 | action: service name=ntp state=restarted
4 |
--------------------------------------------------------------------------------
/roles/headnode/templates/dnsmasq.conf.j2:
--------------------------------------------------------------------------------
1 | expand-hosts
2 | domain=picluster
3 | no-dhcp-interface=eth0
4 | no-dhcp-interface=eth0:1
5 | interface=eth0:1
6 |
7 |
--------------------------------------------------------------------------------
/roles/slurm/handlers/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: restart munge
3 | action: service name=munge state=restarted
4 | - name: restart slurm
5 | action: service name=slurm-llnl state=restarted
6 |
--------------------------------------------------------------------------------
/roles/gmond/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: install ganglia
3 | action: apt pkg=ganglia-monitor state=installed
4 | - name: start gmond
5 | action: service name=ganglia-monitor state=started
6 | ignore_errors: yes
7 |
--------------------------------------------------------------------------------
/computes.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - hosts: computes
3 | user: pi
4 | sudo: yes
5 | roles:
6 | - fireball
7 |
8 | - hosts: computes
9 | connection: fireball
10 | roles:
11 | - common
12 | - network_client
13 | - slurm
14 | - gmond
15 |
--------------------------------------------------------------------------------
/roles/gmetad/templates/ganglia.conf.j2:
--------------------------------------------------------------------------------
1 | Alias /ganglia /usr/share/ganglia-webfrontend
2 |
3 |
4 | AllowOverride All
5 | Order allow,deny
6 | Allow from all
7 | Deny from none
8 |
9 |
10 |
--------------------------------------------------------------------------------
/headnode.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - hosts: headnode
3 | user: pi
4 | sudo: yes
5 | roles:
6 | - fireball
7 |
8 | - hosts: headnode
9 | connection: fireball
10 | roles:
11 | - common
12 | - headnode
13 | - slurm
14 | - gmetad
15 | - gmond
16 | - ansible-deploy
17 |
--------------------------------------------------------------------------------
/roles/common/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: install common packages
3 | action: apt pkg={{item}} state=installed
4 | with_items:
5 | - openmpi-bin
6 | - openmpi-doc
7 | - libopenmpi-dev
8 | - hpcc
9 | - liblapack-dev
10 | - vim-gtk
11 | - pdsh
12 |
13 |
--------------------------------------------------------------------------------
/roles/ansible-deploy/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: destination location exists
3 | action: file path=/home/pi/ansible-pi-cluster state=directory owner=pi group=pi mode=0755
4 | - name: download this repo
5 | action: "git repo=https://github.com/ajdecon/ansible-pi-cluster dest=/home/pi/ansible-pi-cluster"
6 | - name: git pull
7 | action: "shell cd /home/pi/ansible-pi-cluster && git pull"
8 | - name: repo owned by pi
9 | action: file path=/home/pi/ansible-pi-cluster state=directory owner=pi group=pi recurse=yes
10 |
11 |
--------------------------------------------------------------------------------
/roles/network_client/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: copy hosts
3 | action: copy src=/etc/hosts dest=/etc/hosts
4 |
5 | - name: ntp is running
6 | action: service name=ntp state=started
7 | - name: configure ntp
8 | action: template src=ntp.conf.j2 dest=/etc/ntp.conf
9 | notify:
10 | - restart ntp
11 |
12 | - name: /home is in fstab
13 | action: "mount name=/home src=pihead:/home fstype=nfs state=present"
14 | - name: /home is mounted
15 | action: "mount name=/home src=pihead:/home fstype=nfs state=mounted"
16 |
17 |
--------------------------------------------------------------------------------
/roles/headnode/files/iptables-nat.txt:
--------------------------------------------------------------------------------
1 | # Generated by iptables-save v1.4.14 on Mon May 27 22:08:57 2013
2 | *nat
3 | :PREROUTING ACCEPT [10:1551]
4 | :INPUT ACCEPT [10:1551]
5 | :OUTPUT ACCEPT [7:572]
6 | :POSTROUTING ACCEPT [0:0]
7 | -A POSTROUTING -o eth0 -j MASQUERADE
8 | COMMIT
9 | # Completed on Mon May 27 22:08:57 2013
10 | # Generated by iptables-save v1.4.14 on Mon May 27 22:08:57 2013
11 | *filter
12 | :INPUT ACCEPT [145:10964]
13 | :FORWARD ACCEPT [0:0]
14 | :OUTPUT ACCEPT [77:8260]
15 | -A FORWARD -i eth0 -o eth0:1 -m state --state RELATED,ESTABLISHED -j ACCEPT
16 | -A FORWARD -i eth0:1 -o eth0 -j ACCEPT
17 | COMMIT
18 | # Completed on Mon May 27 22:08:57 2013
19 |
--------------------------------------------------------------------------------
/roles/gmetad/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: install packages for gmetad
3 | action: apt pkg=$item state=installed
4 | with_items:
5 | - ganglia-monitor
6 | - ganglia-webfrontend
7 | - apache2
8 |
9 | - name: start apache
10 | action: service name=apache2 state=started
11 | - name: configure apache
12 | action: template src=ganglia.conf.j2 dest=/etc/apache2/conf.d/ganglia.conf
13 | notify:
14 | - restart apache2
15 |
16 | - name: start gmond
17 | action: service name=ganglia-monitor state=started
18 | ignore_errors: yes
19 |
20 | - name: start gmetad
21 | action: service name=gmetad state=started
22 | ignore_errors: yes
23 |
--------------------------------------------------------------------------------
/roles/fireball/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: point dns at google
3 | action: template src=resolv.conf.j2 dest=/etc/resolv.conf
4 |
5 | - name: update apt-cache
6 | action: command apt-get -y update
7 | - name: install python-apt
8 | action: command apt-get -y install python-apt
9 |
10 | - name: install apt packages
11 | action: apt pkg={{item}} state=installed
12 | with_items:
13 | - python-dev
14 | - python-setuptools
15 |
16 |
17 | - name: install pip
18 | action: easy_install name=pip
19 |
20 | - name: install pip packages
21 | action: pip name={{item}} state=present
22 | with_items:
23 | - pyzmq
24 | - pyasn1
25 | - PyCrypto
26 | - python-keyczar
27 |
28 |
29 | - name: start fireball
30 | action: fireball
31 |
32 |
--------------------------------------------------------------------------------
/roles/slurm/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: install munge key
3 | action: copy src=munge.key dest=/etc/munge/munge.key owner=munge group=munge mode=0600
4 | notify:
5 | - restart munge
6 |
7 | - name: munge is started
8 | action: service name=munge state=started
9 |
10 | - name: install slurm packages
11 | action: apt pkg={{item}} state=installed
12 | with_items:
13 | - slurm-llnl
14 | - slurm-llnl-doc
15 | - slurm-llnl-torque
16 |
17 |
18 | - name: touch slurm log file
19 | action: command touch /var/log/slurm_jobacct.log
20 | - name: slurm log file ownership
21 | action: file dest=/var/log/slurm_jobacct.log owner=slurm state=file
22 |
23 | - name: slurm is configured
24 | action: template src=slurm.conf.j2 dest=/etc/slurm-llnl/slurm.conf owner=slurm
25 | notify:
26 | - restart slurm
27 |
28 | - name: slurm is started
29 | action: service name=slurm-llnl state=started
30 |
31 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2013 Adam DeConinck
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 |
--------------------------------------------------------------------------------
/roles/headnode/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: install headnode packages from apt
3 | action: apt pkg={{item}} state=installed
4 | with_items:
5 | - git
6 | - screen
7 | - environment-modules
8 | - gfortran
9 | - dnsmasq
10 | - dnsutils
11 | - nfs-kernel-server
12 | - nfs-common
13 |
14 | - name: install ansible
15 | action: pip name=ansible state=latest
16 |
17 | - name: configure nfs
18 | action: template src=exports.j2 dest=/etc/exports
19 | - name: rpcbind is running
20 | action: service name=rpcbind state=started enabled=yes
21 | - name: nfs is running
22 | action: service name=nfs-kernel-server state=started enabled=yes
23 | - name: exportfs
24 | action: command exportfs -a
25 |
26 | - name: copy iptables config
27 | action: copy src=iptables-nat.txt dest=/etc/iptables-nat.txt
28 | - name: restore iptables config
29 | action: shell iptables-restore < /etc/iptables-nat.txt
30 | - name: rc.local restores iptables
31 | action: lineinfile dest=/etc/rc.local regexp=^iptables-restore line="iptables-restore < /etc/iptables-nat.txt"
32 | - name: configure on forwarding
33 | action: lineinfile dest=/etc/sysctl.conf regexp=^net.ipv4.ip_forward line=net.ipv4.ip_forward=1
34 | - name: turn on forwarding
35 | action: shell echo 1 > /proc/sys/net/ipv4/ip_forward
36 |
--------------------------------------------------------------------------------
/roles/network_client/templates/ntp.conf.j2:
--------------------------------------------------------------------------------
1 | # /etc/ntp.conf, configuration for ntpd; see ntp.conf(5) for help
2 |
3 | driftfile /var/lib/ntp/ntp.drift
4 |
5 |
6 | # Enable this if you want statistics to be logged.
7 | #statsdir /var/log/ntpstats/
8 |
9 | statistics loopstats peerstats clockstats
10 | filegen loopstats file loopstats type day enable
11 | filegen peerstats file peerstats type day enable
12 | filegen clockstats file clockstats type day enable
13 |
14 |
15 | # You do need to talk to an NTP server or two (or three).
16 | #server ntp.your-provider.example
17 |
18 | # pool.ntp.org maps to about 1000 low-stratum NTP servers. Your server will
19 | # pick a different set every time it starts up. Please consider joining the
20 | # pool:
21 | server pihead
22 |
23 |
24 | # Access control configuration; see /usr/share/doc/ntp-doc/html/accopt.html for
25 | # details. The web page
26 | # might also be helpful.
27 | #
28 | # Note that "restrict" applies to both servers and clients, so a configuration
29 | # that might be intended to block requests from certain clients could also end
30 | # up blocking replies from your own upstream servers.
31 |
32 | # By default, exchange time with everybody, but don't allow configuration.
33 | restrict -4 default kod notrap nomodify nopeer noquery
34 | restrict -6 default kod notrap nomodify nopeer noquery
35 |
36 | # Local users may interrogate the ntp server more closely.
37 | restrict 127.0.0.1
38 | restrict ::1
39 |
40 | # Clients from this (example!) subnet have unlimited access, but only if
41 | # cryptographically authenticated.
42 | #restrict 192.168.123.0 mask 255.255.255.0 notrust
43 |
44 |
45 | # If you want to provide time to your local subnet, change the next line.
46 | # (Again, the address is an example only.)
47 | #broadcast 192.168.123.255
48 |
49 | # If you want to listen to time broadcasts on your local subnet, de-comment the
50 | # next lines. Please do this only if you trust everybody on the network!
51 | #disable auth
52 | #broadcastclient
53 |
--------------------------------------------------------------------------------
/roles/slurm/templates/slurm.conf.j2:
--------------------------------------------------------------------------------
1 | # slurm.conf file generated by configurator.html.
2 | # Put this file on all nodes of your cluster.
3 | # See the slurm.conf man page for more information.
4 | #
5 | ControlMachine=pihead
6 | #ControlAddr=
7 | #BackupController=
8 | #BackupAddr=
9 | #
10 | AuthType=auth/munge
11 | CacheGroups=0
12 | #CheckpointType=checkpoint/none
13 | CryptoType=crypto/munge
14 | #DisableRootJobs=NO
15 | #EnforcePartLimits=NO
16 | #Epilog=
17 | #EpilogSlurmctld=
18 | #FirstJobId=1
19 | #MaxJobId=999999
20 | #GresTypes=
21 | #GroupUpdateForce=0
22 | #GroupUpdateTime=600
23 | #JobCheckpointDir=/var/slurm/checkpoint
24 | #JobCredentialPrivateKey=
25 | #JobCredentialPublicCertificate=
26 | #JobFileAppend=0
27 | #JobRequeue=1
28 | #JobSubmitPlugins=1
29 | #KillOnBadExit=0
30 | #Licenses=foo*4,bar
31 | #MailProg=/bin/mail
32 | #MaxJobCount=5000
33 | #MaxStepCount=40000
34 | #MaxTasksPerNode=128
35 | MpiDefault=none
36 | #MpiParams=ports=#-#
37 | #PluginDir=
38 | #PlugStackConfig=
39 | #PrivateData=jobs
40 | ProctrackType=proctrack/pgid
41 | #Prolog=
42 | #PrologSlurmctld=
43 | #PropagatePrioProcess=0
44 | #PropagateResourceLimits=
45 | #PropagateResourceLimitsExcept=
46 | ReturnToService=1
47 | #SallocDefaultCommand=
48 | SlurmctldPidFile=/var/run/slurmctld.pid
49 | SlurmctldPort=6817
50 | SlurmdPidFile=/var/run/slurmd.pid
51 | SlurmdPort=6818
52 | SlurmdSpoolDir=/tmp/slurmd
53 | SlurmUser=slurm
54 | #SlurmdUser=root
55 | #SrunEpilog=
56 | #SrunProlog=
57 | StateSaveLocation=/tmp
58 | SwitchType=switch/none
59 | #TaskEpilog=
60 | TaskPlugin=task/none
61 | #TaskPluginParam=
62 | #TaskProlog=
63 | #TopologyPlugin=topology/tree
64 | #TmpFs=/tmp
65 | #TrackWCKey=no
66 | #TreeWidth=
67 | #UnkillableStepProgram=
68 | #UsePAM=0
69 | #
70 | #
71 | # TIMERS
72 | #BatchStartTimeout=10
73 | #CompleteWait=0
74 | #EpilogMsgTime=2000
75 | #GetEnvTimeout=2
76 | #HealthCheckInterval=0
77 | #HealthCheckProgram=
78 | InactiveLimit=0
79 | KillWait=30
80 | #MessageTimeout=10
81 | #ResvOverRun=0
82 | MinJobAge=300
83 | #OverTimeLimit=0
84 | SlurmctldTimeout=120
85 | SlurmdTimeout=300
86 | #UnkillableStepTimeout=60
87 | #VSizeFactor=0
88 | Waittime=0
89 | #
90 | #
91 | # SCHEDULING
92 | #DefMemPerCPU=0
93 | FastSchedule=1
94 | #MaxMemPerCPU=0
95 | #SchedulerRootFilter=1
96 | #SchedulerTimeSlice=30
97 | SchedulerType=sched/backfill
98 | SchedulerPort=7321
99 | SelectType=select/linear
100 | #SelectTypeParameters=
101 | #
102 | #
103 | # JOB PRIORITY
104 | #PriorityType=priority/basic
105 | #PriorityDecayHalfLife=
106 | #PriorityCalcPeriod=
107 | #PriorityFavorSmall=
108 | #PriorityMaxAge=
109 | #PriorityUsageResetPeriod=
110 | #PriorityWeightAge=
111 | #PriorityWeightFairshare=
112 | #PriorityWeightJobSize=
113 | #PriorityWeightPartition=
114 | #PriorityWeightQOS=
115 | #
116 | #
117 | # LOGGING AND ACCOUNTING
118 | #AccountingStorageEnforce=0
119 | #AccountingStorageHost=
120 | #AccountingStorageLoc=
121 | #AccountingStoragePass=
122 | #AccountingStoragePort=
123 | AccountingStorageType=accounting_storage/filetxt
124 | #AccountingStorageUser=
125 | AccountingStoreJobComment=YES
126 | ClusterName=cluster
127 | #DebugFlags=
128 | #JobCompHost=
129 | #JobCompLoc=
130 | #JobCompPass=
131 | #JobCompPort=
132 | JobCompType=jobcomp/none
133 | #JobCompUser=
134 | JobAcctGatherFrequency=30
135 | JobAcctGatherType=jobacct_gather/none
136 | SlurmctldDebug=3
137 | #SlurmctldLogFile=
138 | SlurmdDebug=3
139 | #SlurmdLogFile=
140 | #SlurmSchedLogFile=
141 | #SlurmSchedLogLevel=
142 | #
143 | #
144 | # POWER SAVE SUPPORT FOR IDLE NODES (optional)
145 | #SuspendProgram=
146 | #ResumeProgram=
147 | #SuspendTimeout=
148 | #ResumeTimeout=
149 | #ResumeRate=
150 | #SuspendExcNodes=
151 | #SuspendExcParts=
152 | #SuspendRate=
153 | #SuspendTime=
154 | #
155 | #
156 | # COMPUTE NODES
157 | NodeName=pi[01-04] CPUs=1 State=UNKNOWN
158 | PartitionName=raspberry Nodes=pi[01-04] Default=YES MaxTime=INFINITE State=UP
159 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Ansible scripts for my Raspberry Pi Cluster
2 | ===========================================
3 |
4 | **Note: this repository is no longer being developed. My active development
5 | of Ansible playbooks for HPC can be found as part of the
6 | [ansible-simple-slurm-cluster](https://github.com/ajdecon/ansible-simple-slurm-cluster)
7 | repo.**
8 |
9 | --------------------
10 |
11 | This is a small collection of
12 | [Ansible](http://ansible.cc) playbooks for my Raspberry Pi cluster.
13 |
14 | **These really aren't set up to be general at the moment** -- i.e., there
15 | are still hard-coded IPs and names in config files, and other crap -- but
16 | if you want to try to use this to build your own cluster, feel free to give it
17 | a shot. :) It should at least be pretty straightforward to read and understand
18 | the playbooks.
19 |
20 | Ansible is a really cool tool for automating system configuration and deployment tasks.
21 | This set of playbooks will set up an HPC cluster with some of the following
22 | libraries and services:
23 |
24 | * Cluster Scheduler: [SLURM](http://slurm.schedmd.com)
25 | * Message-passing library: [OpenMPI](http://www.open-mpi.org/)
26 | * Benchmark: HPCC
27 | * Shared filesystem: NFS
28 | * Time server: NTP
29 | * DNS: dnsmasq
30 | * Performance montioring: Ganglia
31 |
32 | **Important**: the notes below describe roughly what I did, but there was a lot of
33 | trial-and-error and I didn't note all the blind alleys. I haven't reproduced
34 | with this set of instructions yet, so you might have to troubleshoot a bit.
35 |
36 | Physical setup and OS
37 | ---------------------
38 |
39 | 1. Get 2 or more Raspberry Pi Model B's. One will be the head node, the others
40 | will be compute nodes.
41 | 1. Download the most recent [Raspbian Linux](http://www.raspbian.org/) and copy
42 | the raw image onto an SD card. (I used the "dd" command on a Linux laptop to do
43 | this, but [this page](http://www.raspbian.org/RaspbianInstaller) has additional
44 | information on how to do that.
45 | 1. I booted up the SD card on one of the Raspberry Pi's. I changed the password,
46 | made sure SSH was turned on and X Windows was turned off, then shut it back down.
47 | 1. Copy the contents of the Raspbian SD card to N idential SD cards for the
48 | other Raspberry Pi's.
49 |
50 |
51 | Networking setup
52 | ----------------
53 |
54 | I set up all my Raspberry Pi's with static addresses in the 192.168.42.0/24 subnet,
55 | and gave the head node a second interface to DHCP.
56 |
57 | Mostly I did this so that I could run the cluster "stand-alone", i.e. without an
58 | external DHCP server, but so that I could also plug into a larger network
59 | and have the head node reachable from the rest of the network.
60 |
61 | Also because HPC clusters usually keep all their compute nodes on a "private" network,
62 | and I was trying to make it look like a "real" cluster. :)
63 |
64 | I probably could have figured out how to do this with Ansible, but I did this
65 | by hand at the time.
66 |
67 | 1. Get an ethernet switch and connect all the Raspberry Pi's. Also temporarily
68 | connect this switch to your home router, or other DHCP server.
69 | 1. Turn on all the Raspberry Pi's and let them boot.
70 | 1. Use nmap or look at your router config to figure out what their temporary IPs are.
71 | 1. SSH into the head node and set up two interfaces on eth0. I configured eth0 to have
72 | the static address 192.168.42.1 and eth0:1 to DHCP, so I could have the head node NAT
73 | to the outside world when connected to my home router. I also changed the head node
74 | hostname to "pihead" at this point.
75 | 1. SSH into each "compute" Raspberry Pi and configure a static IP on eth0. Change the
76 | hostname to something like "pi01", "pi02", etc. Then reboot.
77 | 1. Create an /etc/hosts file on the head node to list the compute node names and IPs.
78 |
79 | At this point it's also a good idea to set up passwordless SSH with keys from your laptop
80 | into the head node, and from the head node to all the compute nodes. I used the "pi"
81 | user for this and gave "pi" sudo rights on all nodes.
82 |
83 |
84 | Ansible configuration of head node
85 | ----------------------------------
86 |
87 | If you haven't installed [Ansible](http://ansible.cc) yet, you should do
88 | so! Run "pip install ansible" or "easy_install ansible" to do this.
89 |
90 | Edit the "hosts.pi" file and enter the IP address of the head node, and the
91 | names or IPs of the compute nodes.
92 |
93 | You should (hopefully) be able to set up all the cluster services using the following
94 | command:
95 |
96 | ansible-playbook -i hosts.pi headnode.yml
97 |
98 | Ansible configuration of compute nodes
99 | --------------------------------------
100 |
101 | The compute nodes are on a private network, so you have to configure them from the
102 | head node.
103 |
104 | SSH into the head node as "pi" and confirm that this repo is present as "ansible-pi-cluster".
105 | Then you should be able to just run
106 |
107 | ansible-playbook -i hosts.pi computes.yml
108 |
109 | And with luck, you should then have your SLURM cluster!
110 |
111 | For information about how to use SLURM, see the
112 | [quickstart guide](http://slurm.schedmd.com/quickstart.html).
113 |
--------------------------------------------------------------------------------