├── Dockerfile.centos ├── Dockerfile.ubuntu ├── Makefile ├── README.md ├── generate_dependencies.sh └── slurm.pc /Dockerfile.centos: -------------------------------------------------------------------------------- 1 | MAINTAINER Jonathan Lefman jlefman@nvidia.com 2 | 3 | RUN yum groupinstall -y "Development Tools" 4 | RUN yum install -y bzip2 wget ruby-devel libmunge-devel pam-devel perl-devel 5 | RUN wget https://repo.mysql.com//mysql80-community-release-el7-1.noarch.rpm && \ 6 | rpm -i mysql80-community-release-el7-1.noarch.rpm && \ 7 | yum install -y mysql-community-devel && \ 8 | rm mysql80-community-release-el7-1.noarch.rpm 9 | RUN gem install fpm 10 | 11 | ARG SLURM_VERSION 12 | ARG PKG_VERSION 13 | 14 | RUN wget https://www.schedmd.com/downloads/latest/slurm-$SLURM_VERSION.tar.bz2 \ 15 | && tar xvjf slurm-$SLURM_VERSION.tar.bz2 -C / \ 16 | && rm -f slurm-$SLURM_VERSION.tar.bz2 17 | RUN cd /slurm-$SLURM_VERSION \ 18 | && ./configure --prefix=/tmp/slurm-build --sysconfdir=/etc/slurm 19 | 20 | RUN cd /slurm-$SLURM_VERSION \ 21 | && make -j 22 | RUN cd /slurm-$SLURM_VERSION \ 23 | && make -j contrib 24 | RUN cd /slurm-$SLURM_VERSION \ 25 | && make -j install 26 | 27 | RUN cp /slurm-$SLURM_VERSION/contribs/pam/.libs/pam_slurm.so /tmp/slurm-build/lib 28 | 29 | COPY slurm.pc /tmp/slurm-build/share/pkgconfig/slurm.pc 30 | COPY slurm.pc /tmp/slurm-build/lib/pkgconfig/slurm.pc 31 | 32 | RUN mkdir -p /tmp/slurm-build/share/pkgconfig \ 33 | && mkdir -p /tmp/slurm-build/lib/pkgconfig 34 | 35 | RUN cd /tmp/slurm-build && fpm -s dir -t rpm -n slurm -v ${SLURM_VERSION} --iteration=${PKG_VERSION} --prefix=/usr -C /tmp/slurm-build 36 | -------------------------------------------------------------------------------- /Dockerfile.ubuntu: -------------------------------------------------------------------------------- 1 | MAINTAINER Ryan Olson rolson@nvidia.com 2 | 3 | RUN apt-get update && apt-get install -y --no-install-recommends \ 4 | build-essential ruby-dev libpam0g-dev libmysqlclient-dev \ 5 | libmunge-dev libmysqld-dev wget python-minimal && \ 6 | rm -rf /var/lib/apt/lists/* 7 | 8 | RUN gem install fpm 9 | 10 | ARG SLURM_VERSION 11 | ARG PKG_VERSION 12 | 13 | RUN wget https://www.schedmd.com/downloads/latest/slurm-$SLURM_VERSION.tar.bz2 \ 14 | && tar xvjf slurm-$SLURM_VERSION.tar.bz2 -C / \ 15 | && rm -f slurm-$SLURM_VERSION.tar.bz2 \ 16 | && cd /slurm-$SLURM_VERSION \ 17 | && ./configure --prefix=/tmp/slurm-build --sysconfdir=/etc/slurm \ 18 | && make -j \ 19 | && make -j contrib \ 20 | && make -j install 21 | 22 | RUN cp /slurm-$SLURM_VERSION/contribs/pam/.libs/pam_slurm.so /tmp/slurm-build/lib 23 | 24 | COPY ./generate_dependencies.sh /tmp/ 25 | RUN /tmp/generate_dependencies.sh 2>/dev/null 26 | # Create Depends list 27 | # for each directory in bin, lib, sbin run the following: 28 | # objdump -p ./* | grep NEEDED | tr -s ' ' | cut -d ' ' -f3 | sort | uniq | xargs -n1 dpkg -S | cut -d ' ' -f 1 | sort | uniq | tr ':' ' ' | cut -d ' ' -f 1 >> /tmp/depends 29 | 30 | RUN mkdir -p /tmp/slurm-build/share/doc/slurm/ \ 31 | && cp /slurm-$SLURM_VERSION/COPYING /tmp/slurm-build/share/doc/slurm/copyright 32 | 33 | RUN mkdir -p /tmp/slurm-build/share/pkgconfig \ 34 | && mkdir -p /tmp/slurm-build/lib/pkgconfig 35 | 36 | COPY slurm.pc /tmp/slurm-build/share/pkgconfig/slurm.pc 37 | COPY slurm.pc /tmp/slurm-build/lib/pkgconfig/slurm.pc 38 | 39 | RUN fpm -s dir -t deb -n slurm -v ${SLURM_VERSION} --iteration ${PKG_VERSION} --prefix=/usr -C /tmp/slurm-build \ 40 | $(for pkg in $(cat /tmp/slurm-packages); do echo --depends $pkg; done) . 41 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PKG_VERSION ?= 2 2 | 3 | ifdef DOCKER_APT_PROXY 4 | CACHES = --build-arg APT_PROXY_PORT=${DOCKER_APT_PROXY} 5 | else 6 | CACHES = 7 | endif 8 | 9 | .PHONY: build tag push release clean distclean 10 | 11 | default: clean copy 12 | 13 | BUILD_DISTRO ?= ubuntu 14 | ifeq ($(BUILD_DISTRO), ubuntu) 15 | BASE_IMAGE := ubuntu:16.04 16 | IMAGE_NAME := build-slurm:ubuntu-16.04 17 | FILE_EXT := _amd64.deb 18 | FILE_PRE := _ 19 | endif 20 | ifeq ($(BUILD_DISTRO), centos) 21 | BASE_IMAGE := centos:7 22 | IMAGE_NAME := build-slurm:centos-7 23 | FILE_EXT := .x86_64.rpm 24 | FILE_PRE := - 25 | endif 26 | 27 | RELEASE_IMAGE ?= nvcr.io/nvidian_sas/${IMAGE_NAME} 28 | 29 | .Dockerfile: 30 | echo FROM $(BASE_IMAGE) > .Dockerfile 31 | cat Dockerfile.$(BUILD_DISTRO) >> .Dockerfile 32 | 33 | build: .Dockerfile 34 | docker build ${CACHES} --build-arg SLURM_VERSION=${SLURM_VERSION} --build-arg PKG_VERSION=${PKG_VERSION} -f .Dockerfile -t ${IMAGE_NAME} . 35 | 36 | copy: build 37 | docker run --rm -ti -v ${PWD}:/out ${IMAGE_NAME} cp /tmp/slurm-build/slurm${FILE_PRE}${SLURM_VERSION}-${PKG_VERSION}${FILE_EXT} /out 38 | 39 | dev: build 40 | docker run --rm -ti -v ${PWD}:/out ${IMAGE_NAME} bash 41 | 42 | tag: build 43 | docker tag ${IMAGE_NAME} ${RELEASE_IMAGE} 44 | 45 | push: tag 46 | docker push ${RELEASE_IMAGE} 47 | 48 | release: push 49 | 50 | clean: 51 | @rm -f .Dockerfile 2> /dev/null ||: 52 | @docker rm -v `docker ps -a -q -f "status=exited"` 2> /dev/null ||: 53 | @docker rmi `docker images -q -f "dangling=true"` 2> /dev/null ||: 54 | 55 | distclean: clean 56 | @docker rmi ${IMAGE_NAME} 2> /dev/null ||: 57 | @docker rmi ${RELEASE_IMAGE} 2> /dev/null ||: 58 | 59 | 60 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Overview 4 | 5 | Slurm overview: https://slurm.schedmd.com/overview.html 6 | 7 | > Slurm is an open source, fault-tolerant, and highly scalable cluster management and job scheduling system for large and small Linux clusters. Slurm requires no kernel modifications for its operation and is relatively self-contained. As a cluster workload manager, Slurm has three key functions. First, it allocates exclusive and/or non-exclusive access to resources (compute nodes) to users for some duration of time so they can perform work. Second, it provides a framework for starting, executing, and monitoring work (normally a parallel job) on the set of allocated nodes. Finally, it arbitrates contention for resources by managing a queue of pending work. Optional plugins can be used for accounting, advanced reservation, gang scheduling (time sharing for parallel jobs), backfill scheduling, topology optimized resource selection, resource limits by user or bank account, and sophisticated multifactor job prioritization algorithms. 8 | 9 | ## GPU resource scheduling in Slurm 10 | 11 | ### Simple GPU scheduling with exclusive node access 12 | 13 | Slurm supports scheduling GPUs as a consumable resource just like memory and disk. If you're not interested in allowing multiple jobs per compute node, you many not nessesarily need to make Slurm aware of the GPUs in the system, and the configuration can be greatly simplified. 14 | 15 | One way of scheduling GPUs without making use of GRES (Generic REsource Scheduling) is to create partitions or queues for logical groups of GPUs. For example, grouping nodes with P100 GPUs into a P100 partition: 16 | 17 | ```console 18 | $ sinfo -s 19 | PARTITION AVAIL TIMELIMIT NODES(A/I/O/T) NODELIST 20 | p100 up infinite 4/9/3/16 node[212-213,215-218,220-229] 21 | ``` 22 | 23 | Partition configuration via Slurm configuration file `slurm.conf`: 24 | 25 | ```console 26 | NodeName=node[212-213,215-218,220-229] 27 | PartitionName=p100 Default=NO DefaultTime=01:00:00 State=UP Nodes=node[212-213,215-218,220-229] 28 | ``` 29 | 30 | ### Scheduling resources at the per GPU level 31 | 32 | Slurm can be made aware of GPUs as a consumable resource to allow jobs to request any number of GPUs. 33 | 34 | This feature requires job accounting to be enabled first; for more info, see: https://slurm.schedmd.com/accounting.html 35 | 36 | The Slurm configuration file needs parameters set to enable cgroups for resource management and GPU resource scheduling: 37 | 38 | `slurm.conf`: 39 | 40 | ```console 41 | # General 42 | ProctrackType=proctrack/cgroup 43 | TaskPlugin=task/cgroup 44 | 45 | # Scheduling 46 | SelectType=select/cons_res 47 | SelectTypeParameters=CR_Core_Memory 48 | 49 | # Logging and Accounting 50 | AccountingStorageTRES=gres/gpu 51 | DebugFlags=CPU_Bind,gres # show detailed information in Slurm logs about GPU binding and affinity 52 | JobAcctGatherType=jobacct_gather/cgroup 53 | ``` 54 | 55 | Partition information in `slurm.conf` defines the available GPUs for each resource: 56 | 57 | ```console 58 | # Partitions 59 | GresTypes=gpu 60 | NodeName=slurm-node-0[0-1] Gres=gpu:2 CPUs=10 Sockets=1 CoresPerSocket=10 ThreadsPerCore=1 RealMemory=30000 State=UNKNOWN 61 | PartitionName=compute Nodes=ALL Default=YES MaxTime=48:00:00 DefaultTime=04:00:00 MaxNodes=2 State=UP DefMemPerCPU=3000 62 | ``` 63 | 64 | Cgroups require a seperate configuration file: 65 | 66 | `cgroup.conf`: 67 | 68 | ```console 69 | CgroupAutomount=yes 70 | CgroupReleaseAgentDir="/etc/slurm/cgroup" 71 | 72 | ConstrainCores=yes 73 | ConstrainDevices=yes 74 | ConstrainRAMSpace=yes 75 | #TaskAffinity=yes 76 | ``` 77 | 78 | GPU resource scheduling requires a configuration file to define the available GPUs and their CPU affinity 79 | 80 | `gres.conf`: 81 | 82 | ```console 83 | Name=gpu File=/dev/nvidia0 CPUs=0-4 84 | Name=gpu File=/dev/nvidia1 CPUs=5-9 85 | ``` 86 | 87 | Running jobs utilizing GPU resources requires the `--gres` flag; for example, to run a job requiring a single GPU: 88 | 89 | ```console 90 | $ srun --gres=gpu:1 nvidia-smi 91 | ``` 92 | 93 | In order to enforce proper CPU:GPU affinity (i.e. for performance reasons), use the flag `--gres-flags=enforce-binding` 94 | 95 | > --gres-flags=enforce-binding 96 | If set, the only CPUs available to the job will be those bound to the selected GRES (i.e. the CPUs identified in the gres.conf file will be strictly enforced rather than advisory). This option may result in delayed initiation of a job. For example a job requiring two GPUs and one CPU will be delayed until both GPUs on a single socket are available rather than using GPUs bound to separate sockets, however the application performance may be improved due to improved communication speed. Requires the node to be configured with more than one socket and resource filtering will be performed on a per-socket basis. This option applies to job allocations. 97 | 98 | 99 | ### Kernel configuration 100 | 101 | Using memory cgroups to restrict jobs to allocated memory resources requires setting kernel parameters 102 | 103 | On Ubuntu systems this is configurable via `/etc/default/grub` 104 | 105 | > GRUB_CMDLINE_LINUX="cgroup_enable=memory swapaccount=1" 106 | 107 | ## Step-by-step instructions 108 | See: https://github.com/mknoxnv/ubuntu-slurm 109 | 110 | ## Building newer versions of Slurm from source as .deb packages for Ubuntu or .rpm for Centos 111 | 112 | ### Install dependencies 113 | 114 | #### Ubuntu 115 | 116 | ```console 117 | sudo apt-get install build-essential ruby-dev libpam0g-dev libmysqlclient-dev libmunge-dev libmysqld-dev 118 | ``` 119 | 120 | #### Centos 121 | 122 | ```console 123 | sudo yum groupinstall -y "Development Tools" 124 | sudo yum install -y bzip2 wget ruby-devel libmunge-devel pam-devel perl-devel 125 | wget https://repo.mysql.com//mysql80-community-release-el7-1.noarch.rpm && \ 126 | sudo rpm -i mysql80-community-release-el7-1.noarch.rpm && \ 127 | sudo yum install -y mysql-community-devel && \ 128 | rm mysql80-community-release-el7-1.noarch.rpm 129 | ``` 130 | 131 | ### Install FPM packaging tool 132 | 133 | > fpm - https://github.com/jordansissel/fpm 134 | 135 | ```console 136 | sudo gem install fpm 137 | ``` 138 | 139 | ### Configure and build Slurm 140 | 141 | ```console 142 | export SLURM_VERSION=17.11.12 143 | wget http://www.schedmd.com/downloads/latest/slurm-${SLURM_VERSION}.tar.bz2 144 | tar xvjf slurm-${SLURM_VERSION}.tar.bz2 145 | ./configure --prefix=/tmp/slurm-build --sysconfdir=/etc/slurm 146 | make -j 147 | make -j contrib 148 | make -j install 149 | ``` 150 | 151 | ### Package Slurm install directory as a Debian package using FPM 152 | 153 | > Modify version via `-v` flag for source version changes, and `--iteration` flag for build version cahnges so that APT will detect updated packages 154 | 155 | ```console 156 | export BUILD_ITERATION=1 157 | fpm -s dir -t deb -v ${SLURM_VERSION} --iteration ${BUILD_ITERATION} -n slurm --prefix=/usr -C /tmp/slurm-build . 158 | ``` 159 | A deb package such as `slurm_17.11.12-2_amd64.deb` has been created in the same directory. You may inspect its contents using: 160 | ```console 161 | dpkg --contents slurm_${SLURM_VERSION}-${BUILD_ITERATION}_amd64.deb 162 | ``` 163 | 164 | ## Build with docker (preferred method) 165 | 166 | > Assumes `docker` is already installed. 167 | 168 | The Dockerfile and Makefile provided in this repo wraps the above build-and-package-as-deb steps into a containerized workflow. 169 | The deb package also copies the customary `copyright` license file from the source archive to the appropriate `/usr/share/doc` location. 170 | 171 | If you need to update the Slurm source version, make necessary version string changes in `Makefile` prior to the `make` step: 172 | 173 | ```console 174 | git clone https://github.com/dholt/slurm-gpu 175 | cd slurm-gpu/ 176 | make BUILD_DISTRO=ubuntu # For Centos, use BUILD_DISTRO=centos 177 | ``` 178 | 179 | For Ubuntu, a nicely packaged `slurm_17.11.12-2_amd64.deb` should now exist in the same directory. 180 | Inspect the contents using `dpkg --contents slurm_17.11.12-2_amd64.deb` 181 | 182 | For Centos, a nicely packaged `slurm-17.11.12-2.x86_64.rpm` should now exist in the same directory. 183 | Inspect the contents using `rpm -qlp slurm-17.11.12-2.x86_64.rpm` 184 | -------------------------------------------------------------------------------- /generate_dependencies.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | if [ -e /tmp/slurm-dependencies ]; then 5 | rm /tmp/slurm-dependencies 6 | fi 7 | 8 | for component in bin lib lib/slurm sbin 9 | do 10 | path=/tmp/slurm-build/${component} 11 | objdump -p $path/* | grep NEEDED | tr -s ' ' | cut -d ' ' -f3 | sort | uniq >> /tmp/slurm-dependencies 12 | done 13 | 14 | cat /tmp/slurm-dependencies | sort | uniq > /tmp/slurm-libraries 15 | cat /tmp/slurm-libraries | xargs -n1 dpkg -S | cut -d ' ' -f 1 | sort | uniq | tr ':' ' ' | cut -d ' ' -f 1 > /tmp/slurm-packages 16 | -------------------------------------------------------------------------------- /slurm.pc: -------------------------------------------------------------------------------- 1 | slurm.pc: 2 | includedir=/usr/include/slurm 3 | 4 | Name: slurm 5 | Description: Slurm 6 | Version: 1.0.0 7 | Cflags: -I/usr/include/slurm 8 | Libs: -lslurm 9 | 10 | --------------------------------------------------------------------------------