├── .gitignore ├── roles └── slurm │ ├── tests │ ├── inventory │ └── test.yml │ ├── defaults │ └── main.yml │ ├── vars │ └── main.yml │ ├── handlers │ └── main.yml │ ├── README.md │ ├── tasks │ ├── podman-compose.yml │ └── main.yml │ └── meta │ └── main.yml ├── local-setup ├── requirements.txt ├── collections.yml ├── env.sh ├── venv.sh └── install.sh ├── .codespellrc ├── .github └── workflows │ ├── codespell.yml │ └── shellcheck.yml ├── dartmouth-discovery.yml ├── Makefile ├── sample.slurm └── images └── login-node └── Containerfile /.gitignore: -------------------------------------------------------------------------------- 1 | .phpc 2 | -------------------------------------------------------------------------------- /roles/slurm/tests/inventory: -------------------------------------------------------------------------------- 1 | localhost 2 | 3 | -------------------------------------------------------------------------------- /roles/slurm/defaults/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | state: started 3 | -------------------------------------------------------------------------------- /local-setup/requirements.txt: -------------------------------------------------------------------------------- 1 | ansible 2 | podman-compose 3 | -------------------------------------------------------------------------------- /roles/slurm/vars/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # vars file for slurm 3 | -------------------------------------------------------------------------------- /roles/slurm/handlers/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # handlers file for slurm 3 | -------------------------------------------------------------------------------- /.codespellrc: -------------------------------------------------------------------------------- 1 | [codespell] 2 | skip = .git,*.pdf,*.svg 3 | # ignore-words-list = 4 | -------------------------------------------------------------------------------- /local-setup/collections.yml: -------------------------------------------------------------------------------- 1 | --- 2 | collections: 3 | - containers.podman 4 | # - community.kubernetes 5 | -------------------------------------------------------------------------------- /roles/slurm/tests/test.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - hosts: localhost 3 | remote_user: root 4 | roles: 5 | - slurm 6 | -------------------------------------------------------------------------------- /local-setup/env.sh: -------------------------------------------------------------------------------- 1 | PHPC_VENV=${PHPC_VENV:='.phpc'} 2 | python3 -m venv $PHPC_VENV 3 | source $PHPC_VENV/bin/activate 4 | -------------------------------------------------------------------------------- /local-setup/venv.sh: -------------------------------------------------------------------------------- 1 | python3 -m venv .phpc 2 | . .phpc/bin/activate 3 | which pip3 4 | pip3 install -r requirements.txt 5 | ansible-galaxy collection install -r collections.yml 6 | -------------------------------------------------------------------------------- /local-setup/install.sh: -------------------------------------------------------------------------------- 1 | source local-setup/env.sh 2 | pip3 install --upgrade pip 3 | pip3 install -r local-setup/requirements.txt 4 | ansible-galaxy collection install -r local-setup/collections.yml 5 | -------------------------------------------------------------------------------- /.github/workflows/codespell.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Codespell 3 | 4 | on: 5 | push: 6 | branches: [master] 7 | pull_request: 8 | branches: [master] 9 | 10 | jobs: 11 | codespell: 12 | name: Check for spelling errors 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: Checkout 17 | uses: actions/checkout@v3 18 | - name: Codespell 19 | uses: codespell-project/actions-codespell@v1 20 | -------------------------------------------------------------------------------- /dartmouth-discovery.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # Simulates an environment like Darmouth's SLURM 3 | - name: Set up a discovery-like cluster 4 | hosts: localhost 5 | # vars: 6 | # custom image tags 7 | # custom images 8 | # custom ports 9 | # custom node names 10 | # custom environments [LONG TERM, provisioning will be annoying,probably 11 | # best to build custom images] 12 | roles: 13 | - slurm 14 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | install: 2 | ./local-setup/install.sh 3 | 4 | .PHONY: all test clean 5 | slurm-up: 6 | source local-setup/env.sh; podman-compose -f slurm/podman-compose.yml up 7 | 8 | .PHONY: all test clean 9 | slurm-down: 10 | source local-setup/env.sh; podman-compose -f slurm/podman-compose.yml down 11 | 12 | .PHONY: 13 | discovery-up: 14 | ansible-playbook dartmouth-discovery.yml -e state=started 15 | 16 | .PHONY: 17 | discovery-down: 18 | ansible-playbook dartmouth-discovery.yml -e state=absent 19 | 20 | .PHONY: 21 | again: discovery-down discovery-up 22 | echo "AGAIN!" 23 | -------------------------------------------------------------------------------- /sample.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=serial_job_test # Job name 3 | #SBATCH --mail-type=NONE # Mail events (NONE, BEGIN, END, FAIL, ALL) 4 | #SBATCH --mail-user=email@ufl.edu # Where to send mail 5 | #SBATCH --ntasks=1 # Run on a single CPU 6 | #SBATCH --mem=100mb # Job memory request 7 | #SBATCH --time=00:05:00 # Time limit hrs:min:sec 8 | #SBATCH --output=serial_test_%j.log # Standard output and error log 9 | pwd; hostname; date 10 | 11 | echo "Running plot script on a single CPU core" 12 | 13 | date 14 | 15 | -------------------------------------------------------------------------------- /.github/workflows/shellcheck.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Shellcheck 3 | 4 | on: 5 | push: 6 | branches: [master] 7 | pull_request: 8 | branches: [master] 9 | 10 | jobs: 11 | codespell: 12 | name: Check shell scripts 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: Checkout 17 | uses: actions/checkout@v3 18 | 19 | - name: Install shellcheck 20 | run: | 21 | sudo apt-get update -qq 22 | sudo apt-get install shellcheck 23 | 24 | - name: Run shellcheck 25 | run: git grep -m 1 -l '^#!/bin/.*sh' | xargs shellcheck 26 | -------------------------------------------------------------------------------- /images/login-node/Containerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | RUN apt-get update && apt-get install -y openssh-server 4 | 5 | RUN mkdir /var/run/sshd 6 | 7 | RUN adduser testuser 8 | RUN echo 'testuser:pass' | chpasswd 9 | 10 | RUN sed -i 's/PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config 11 | # RUN sed -i 's/# Port 22/# Port 22/' /etc/ssh/sshd_config 12 | # RUN echo "Port 7788" > /etc/ssh/sshd_config 13 | 14 | 15 | RUN sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd 16 | 17 | EXPOSE 7788 18 | 19 | CMD ["/usr/sbin/sshd", "-D", "-p", "7788"] 20 | -------------------------------------------------------------------------------- /roles/slurm/README.md: -------------------------------------------------------------------------------- 1 | Role Name 2 | ========= 3 | 4 | A brief description of the role goes here. 5 | 6 | Requirements 7 | ------------ 8 | 9 | Any pre-requisites that may not be covered by Ansible itself or the role should be mentioned here. For instance, if the role uses the EC2 module, it may be a good idea to mention in this section that the boto package is required. 10 | 11 | Role Variables 12 | -------------- 13 | 14 | A description of the settable variables for this role should go here, including any variables that are in defaults/main.yml, vars/main.yml, and any variables that can/should be set via parameters to the role. Any variables that are read from other roles and/or the global scope (ie. hostvars, group vars, etc.) should be mentioned here as well. 15 | 16 | Dependencies 17 | ------------ 18 | 19 | A list of other roles hosted on Galaxy should go here, plus any details in regards to parameters that may need to be set for other roles, or variables that are used from other roles. 20 | 21 | Example Playbook 22 | ---------------- 23 | 24 | Including an example of how to use your role (for instance, with variables passed in as parameters) is always nice for users too: 25 | 26 | - hosts: servers 27 | roles: 28 | - { role: username.rolename, x: 42 } 29 | 30 | License 31 | ------- 32 | 33 | BSD 34 | 35 | Author Information 36 | ------------------ 37 | 38 | An optional section for the role authors to include contact information, or a website (HTML is not allowed). 39 | -------------------------------------------------------------------------------- /roles/slurm/tasks/podman-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | slurmjupyter: 3 | image: rancavil/slurm-jupyter:19.05.5-1 4 | hostname: slurmjupyter 5 | user: admin 6 | volumes: 7 | - shared-vol:/home/admin 8 | ports: 9 | - 8888:8888 10 | slurmmaster: 11 | image: rancavil/slurm-master:19.05.5-1 12 | hostname: slurmmaster 13 | user: admin 14 | volumes: 15 | - shared-vol:/home/admin 16 | ports: 17 | - 6817:6817 18 | - 6818:6818 19 | - 6819:6819 20 | slurmnode1: 21 | image: rancavil/slurm-node:19.05.5-1 22 | hostname: slurmnode1 23 | user: admin 24 | volumes: 25 | - shared-vol:/home/admin 26 | environment: 27 | - SLURM_NODENAME=slurmnode1 28 | links: 29 | - slurmmaster 30 | slurmnode2: 31 | image: rancavil/slurm-node:19.05.5-1 32 | hostname: slurmnode2 33 | user: admin 34 | volumes: 35 | - shared-vol:/home/admin 36 | environment: 37 | - SLURM_NODENAME=slurmnode2 38 | links: 39 | - slurmmaster 40 | slurmnode3: 41 | image: rancavil/slurm-node:19.05.5-1 42 | hostname: slurmnode3 43 | user: admin 44 | volumes: 45 | - shared-vol:/home/admin 46 | environment: 47 | - SLURM_NODENAME=slurmnode3 48 | links: 49 | - slurmmaster 50 | volumes: 51 | shared-vol: 52 | -------------------------------------------------------------------------------- /roles/slurm/meta/main.yml: -------------------------------------------------------------------------------- 1 | galaxy_info: 2 | author: your name 3 | description: your role description 4 | company: your company (optional) 5 | 6 | # If the issue tracker for your role is not on github, uncomment the 7 | # next line and provide a value 8 | # issue_tracker_url: http://example.com/issue/tracker 9 | 10 | # Choose a valid license ID from https://spdx.org - some suggested licenses: 11 | # - BSD-3-Clause (default) 12 | # - MIT 13 | # - GPL-2.0-or-later 14 | # - GPL-3.0-only 15 | # - Apache-2.0 16 | # - CC-BY-4.0 17 | license: license (GPL-2.0-or-later, MIT, etc) 18 | 19 | min_ansible_version: 2.1 20 | 21 | # If this a Container Enabled role, provide the minimum Ansible Container version. 22 | # min_ansible_container_version: 23 | 24 | # 25 | # Provide a list of supported platforms, and for each platform a list of versions. 26 | # If you don't wish to enumerate all versions for a particular platform, use 'all'. 27 | # To view available platforms and versions (or releases), visit: 28 | # https://galaxy.ansible.com/api/v1/platforms/ 29 | # 30 | # platforms: 31 | # - name: Fedora 32 | # versions: 33 | # - all 34 | # - 25 35 | # - name: SomePlatform 36 | # versions: 37 | # - all 38 | # - 1.0 39 | # - 7 40 | # - 99.99 41 | 42 | galaxy_tags: [] 43 | # List tags for your role here, one per line. A tag is a keyword that describes 44 | # and categorizes the role. Users find roles by searching for tags. Be sure to 45 | # remove the '[]' above, if you add tags to this list. 46 | # 47 | # NOTE: A tag is limited to a single word comprised of alphanumeric characters. 48 | # Maximum 20 tags per role. 49 | 50 | dependencies: [] 51 | # List your role dependencies here, one per line. Be sure to remove the '[]' above, 52 | # if you add dependencies to this list. 53 | -------------------------------------------------------------------------------- /roles/slurm/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # - name: run jupyter 3 | # containers.podman.podman_container: 4 | # name: jupyter-lab 5 | # image: registry.hub.docker.com/rancavil/slurm-jupyter:19.05.5-1 6 | # state: "{{ state }}" 7 | # ports: 8 | # - 8888:8888 9 | # docker network create -d bridge test 10 | # docker run -p 22 --rm -ti --name u1 --network test ubuntu:v2 bash 11 | # docker run -p 22 --rm -ti --name u2 --network test ubuntu:v2 bash 12 | # - name: Create a podman network 13 | # containers.podman.podman_network: 14 | # name: podman_network 15 | 16 | # - name: run login-node 17 | # containers.podman.podman_container: 18 | # name: login-node 19 | # image: localhost/login-node:0.0.0 20 | # hostname: loginnode 21 | # state: "{{ state }}" 22 | # etc_hosts: 23 | # other: "127.0.0.1" 24 | # ports: 25 | # - "7788:7788" 26 | # # network: podman_network 27 | # network: slurm_default 28 | 29 | # TODO(asmacdo) need to make ports configurable. do they need to be in containerfile at all? 30 | # - name: run login-node2 31 | # containers.podman.podman_container: 32 | # name: login-node2 33 | # image: localhost/login-node:0.0.0 34 | # hostname: loginnode 35 | # state: "{{ state }}" 36 | # etc_hosts: 37 | # other: "127.0.0.1" 38 | # ports: 39 | # - "7799:7799" 40 | # # network: podman_network 41 | # # 42 | 43 | # - name: run jupyternode 44 | # name: slurmjupyter 45 | # hostname: slurmjupyter 46 | # user: admin 47 | # volumes: 48 | # - shared-vol:/home/admin 49 | # ports: 50 | # - 8888:8888 51 | - name: Create scheduler node 52 | containers.podman.podman_container: 53 | state: "{{ state }}" 54 | name: slurmmaster 55 | image: rancavil/slurm-master:19.05.5-1 56 | hostname: slurmmaster 57 | user: admin 58 | volumes: 59 | - shared-vol:/home/admin 60 | ports: 61 | - 6817:6817 62 | - 6818:6818 63 | - 6819:6819 64 | network: slurm_default 65 | 66 | - name: Create compute node 67 | containers.podman.podman_container: 68 | state: "{{ state }}" 69 | name: slurmnode1 70 | image: rancavil/slurm-node:19.05.5-1 71 | hostname: slurmnode1 72 | user: admin 73 | volumes: 74 | - shared-vol:/home/admin 75 | env: 76 | SLURM_NODENAME: slurmnode1 77 | network: slurm_default 78 | # links: 79 | # - slurmmaster 80 | # slurmnode2: 81 | # image: rancavil/slurm-node:19.05.5-1 82 | # hostname: slurmnode2 83 | # user: admin 84 | # volumes: 85 | # - shared-vol:/home/admin 86 | # environment: 87 | # - SLURM_NODENAME=slurmnode2 88 | # links: 89 | # - slurmmaster 90 | # slurmnode3: 91 | # image: rancavil/slurm-node:19.05.5-1 92 | # hostname: slurmnode3 93 | # user: admin 94 | # volumes: 95 | # - shared-vol:/home/admin 96 | # environment: 97 | # - SLURM_NODENAME=slurmnode3 98 | # links: 99 | # - slurmmaster 100 | # volumes: 101 | # shared-vol: 102 | --------------------------------------------------------------------------------