├── use-env ├── test.conf.include ├── version.map ├── Makefile ├── test.conf ├── log_msg.h ├── main.c ├── use-env.h └── log_msg.c ├── META ├── cpuset ├── version.map ├── cpuset.init ├── slurm.h ├── create.h ├── nodemap.h ├── slurm.c ├── Makefile ├── log.h ├── util.h ├── conf-parser.l ├── release-agent.c ├── test.c ├── pam_slurm_cpuset.8 ├── README ├── conf.h ├── use-cpusets.1 ├── log.c ├── conf-parser.y ├── pam_slurm_cpuset.c └── conf.c ├── iorelay ├── Makefile ├── iorelay-mrsh-sshfs-wrap.sh ├── iorelay-bind-nfs.sh ├── iorelay-mount-nodezero.sh └── iorelay.c ├── lua ├── lua-schedutils │ ├── cpuset-str.h │ ├── Makefile │ ├── tests │ │ ├── lunit │ │ └── lunit-console.lua │ └── cpuset-str.c ├── Makefile ├── hugepages.lua └── oom-detect.lua ├── overcommit-memory ├── Makefile ├── overcommit.h ├── util.c └── overcommit-memory.c ├── DISCLAIMER ├── lib ├── split.h ├── fd.h ├── split.c └── fd.c ├── Makefile ├── NEWS.old ├── system-safe.c ├── addr-no-randomize.c ├── tmpdir.c ├── iotrace.c ├── NEWS ├── renice.c ├── README ├── setsched.c ├── slurm-spank-plugins.spec └── system-safe-preload.c /use-env/test.conf.include: -------------------------------------------------------------------------------- 1 | 2 | print "Included file" 3 | 4 | -------------------------------------------------------------------------------- /META: -------------------------------------------------------------------------------- 1 | Name: slurm-spank-plugins 2 | Version: 0.25 3 | Release: 1 4 | Author: Mark Grondona 5 | -------------------------------------------------------------------------------- /cpuset/version.map: -------------------------------------------------------------------------------- 1 | { global: 2 | plugin_name; 3 | plugin_type; 4 | plugin_version; 5 | spank*; 6 | slurm_spank*; 7 | local: 8 | *; 9 | }; 10 | -------------------------------------------------------------------------------- /use-env/version.map: -------------------------------------------------------------------------------- 1 | { global: 2 | plugin_name; 3 | plugin_type; 4 | plugin_version; 5 | spank*; 6 | slurm_spank*; 7 | local: 8 | *; 9 | }; 10 | -------------------------------------------------------------------------------- /iorelay/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS = -Wall -ggdb 2 | 3 | all: iorelay.so 4 | 5 | .SUFFIXES: .c .o .so 6 | 7 | .c.o: 8 | $(CC) $(CFLAGS) -o $@ -fPIC -c $< 9 | .o.so: 10 | $(CC) -shared -o $*.so $< $(LIBS) 11 | 12 | clean: 13 | rm -f *.so *.o 14 | -------------------------------------------------------------------------------- /lua/lua-schedutils/cpuset-str.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifndef _HAVE_CPUSET_STR_H 4 | #define _HAVE_CPUSET_STR_H 5 | 6 | int hex_to_cpuset (cpu_set_t *mask, const char *str); 7 | int cpuset_to_hex (cpu_set_t *mask, char *str, size_t len, char *sep); 8 | int cstr_to_cpuset(cpu_set_t *mask, const char* str); 9 | int str_to_cpuset(cpu_set_t *mask, const char* str); 10 | char * cpuset_to_cstr (cpu_set_t *mask, char *str); 11 | 12 | #endif /* !_HAVE_CPUSET_STR_H */ 13 | -------------------------------------------------------------------------------- /lua/lua-schedutils/Makefile: -------------------------------------------------------------------------------- 1 | LUA_VER ?= 5.1 2 | LIBDIR ?= /usr/local/lib 3 | LUA_OBJDIR ?= $(LIBDIR)/lua/$(LUA_VER) 4 | PREFIX ?= /usr/local 5 | 6 | CFLAGS += -Wall -ggdb 7 | 8 | .SUFFIXES: .c .o .so 9 | 10 | .c.o: 11 | $(CC) $(CFLAGS) -o $@ -fPIC -c $< 12 | 13 | schedutils.so: lua-schedutils.o cpuset-str.o 14 | $(CC) -shared -o $*.so $^ -llua $(LDFLAGS) 15 | 16 | check: schedutils.so 17 | @(cd tests && LUA_CPATH=../?.so ./lunit tests.lua) 18 | 19 | check-coverage: 20 | make clean 21 | make schedutils.so CFLAGS="-fprofile-arcs -ftest-coverage" LDFLAGS=-lgcov 22 | make check 23 | gcov lua-schedutils.c 24 | gcov cpuset-str.c 25 | 26 | clean: 27 | rm -f *.so *.o *.gcov *.gcda *.gcno *.core 28 | 29 | install: 30 | install -D -m0644 schedutils.so $(DESTDIR)$(LUA_OBJDIR)/schedutils.so 31 | -------------------------------------------------------------------------------- /overcommit-memory/Makefile: -------------------------------------------------------------------------------- 1 | LIBDIR ?= /usr/lib$(shell uname -m | grep -q x86_64 && echo 64) 2 | LIBEXECDIR ?= /usr/libexec 3 | PACKAGE ?= slurm-spank-plugins 4 | 5 | SHOPTS := -shared 6 | OBJS := overcommit-memory.o overcommit.o ../lib/fd.o 7 | 8 | all: overcommit-memory.so overcommit-util 9 | 10 | install: 11 | mkdir -p --mode=0755 $(DESTDIR)$(LIBDIR)/slurm 12 | install -m0755 overcommit-memory.so $(DESTDIR)$(LIBDIR)/slurm/ 13 | mkdir -p --mode=0755 $(DESTDIR)$(LIBEXECDIR)/$(PACKAGE) 14 | install -m0755 overcommit-util $(DESTDIR)$(LIBEXECDIR)/$(PACKAGE)/ 15 | 16 | overcommit-memory.so : $(OBJS) 17 | $(CC) $(SHOPTS) -o overcommit-memory.so $(OBJS) 18 | 19 | overcommit-util : util.o overcommit.o ../lib/fd.o 20 | $(CC) -o overcommit-util util.o overcommit.o ../lib/fd.o -lpthread 21 | 22 | .c.o : 23 | $(CC) -ggdb -I../lib -Wall $(CFLAGS) -o $@ -fPIC -c $< 24 | 25 | 26 | clean: 27 | rm -f *.o *.so overcommit-util 28 | -------------------------------------------------------------------------------- /lua/Makefile: -------------------------------------------------------------------------------- 1 | PLUGINS = \ 2 | lua.so \ 3 | 4 | SUBDIRS = \ 5 | lua-schedutils 6 | 7 | ifeq ($(HAVE_SPANK_OPTION_GETOPT), 1) 8 | EXTRA_CFLAGS = -DHAVE_SPANK_OPTION_GETOPT 9 | endif 10 | 11 | all: $(PLUGINS) subdirs 12 | 13 | check: subdirs-check 14 | 15 | subdirs: 16 | @for d in $(SUBDIRS); do make -C $$d; done 17 | 18 | subdirs-check: 19 | @for d in $(SUBDIRS); do make -C $$d check; done 20 | 21 | 22 | .SUFFIXES: .c .o .so 23 | 24 | .c.o: 25 | $(CC) -I.. $(EXTRA_CFLAGS) $(CFLAGS) -o $@ -fPIC -c $< 26 | 27 | lua.so : lua.o ../lib/list.o 28 | $(CC) -shared -o $*.so $^ -llua 29 | 30 | clean: subdirs-clean 31 | rm -f *.so *.o lib/*.o 32 | 33 | subdirs-clean: 34 | @for d in $(SUBDIRS); do make -C $$d clean; done 35 | 36 | install: subdirs-install 37 | @mkdir -p --mode=0755 $(DESTDIR)$(LIBDIR)/slurm 38 | @for p in $(PLUGINS); do \ 39 | echo "Installing $$p in $(LIBDIR)/slurm"; \ 40 | install -m0755 $$p $(DESTDIR)$(LIBDIR)/slurm; \ 41 | done 42 | 43 | subdirs-install: 44 | @for d in $(SUBDIRS); do \ 45 | make -C $$d DESTDIR=$(DESTDIR) install; \ 46 | done 47 | 48 | -------------------------------------------------------------------------------- /use-env/Makefile: -------------------------------------------------------------------------------- 1 | 2 | LIBDIR ?= /usr/lib$(shell uname -m | grep -q x86_64 && echo 64) 3 | 4 | sysconfdir ?= /etc/slurm/ 5 | 6 | OBJS := lex.yy.o use-env-parser.o ../lib/list.o log_msg.o ../lib/split.o 7 | HDRS := use-env.h ../lib/list.h ../lib/split.h log_msg.h use-env-parser.h 8 | SHOPTS := -shared -Wl,--version-script=version.map 9 | DEFS := -DSYSCONFDIR=\"$(sysconfdir)\" 10 | 11 | 12 | all: use-env.so test 13 | 14 | install: use-env.so 15 | @mkdir -p --mode=0755 $(DESTDIR)$(LIBDIR)/slurm 16 | install -m0755 use-env.so $(DESTDIR)$(LIBDIR)/slurm 17 | 18 | use-env.so : $(OBJS) use-env.o 19 | $(CC) $(SHOPTS) -o use-env.so $(OBJS) use-env.o 20 | 21 | test: $(OBJS) main.o 22 | $(CC) -ggdb -o test $(OBJS) main.o 23 | 24 | check: test 25 | ./test -f test.conf 26 | 27 | .c.o : 28 | $(CC) $(DEFS) -ggdb -I../lib -Wall $(CFLAGS) -o $@ -fPIC -c $< 29 | 30 | use-env-parser.c use-env-parser.h : use-env-parser.y 31 | bison -d -o use-env-parser.c $< 32 | 33 | lex.yy.c : use-env-parser.l use-env-parser.h 34 | lex $< 35 | 36 | clean: 37 | rm -f test *.o use-env-parser.[ch] lex.yy.c *.so 38 | -------------------------------------------------------------------------------- /cpuset/cpuset.init: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | ############################################################################### 3 | # chkconfig: 12345 01 99 4 | ############################################################################### 5 | ### BEGIN INIT INFO 6 | # Provides: slurm-cpuset 7 | # Required-Start: $named $time 8 | # Default-Start: 3 4 5 9 | # Default-Stop: 0 1 2 6 10 | # Description: Mount /dev/cpuset filesystem 11 | ### END INIT INFO 12 | ############################################################################### 13 | 14 | 15 | case "$1" in 16 | start) 17 | echo -n "Mounting /dev/cpuset filesystem: " 18 | mkdir -m 0755 -p /dev/cpuset 19 | mount -t cpuset none /dev/cpuset 20 | if [ $? -ne 0 ]; then 21 | echo "Failed" 22 | exit 1 23 | fi 24 | 25 | # Spread slab allocations over all memory nodes 26 | echo 1 > /dev/cpuset/memory_spread_slab 27 | echo "Success" 28 | ;; 29 | 30 | stop) 31 | # Do nothing 32 | exit 0; 33 | ;; 34 | 35 | status) 36 | echo -n "cpuset filesystem is " 37 | [ -f /dev/cpuset/cpus ] || echo -n "not " 38 | echo -n "mounted." 39 | ;; 40 | 41 | *) 42 | echo "Usage: $0 start|status" 43 | exit 1 44 | ;; 45 | esac 46 | 47 | exit 0 48 | -------------------------------------------------------------------------------- /cpuset/slurm.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | 26 | #ifndef _HAVE_DYN_SLURM_H 27 | #define _HAVE_DYN_SLURM_H 28 | 29 | #include 30 | 31 | int dyn_slurm_open (); 32 | void dyn_slurm_close (); 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /DISCLAIMER: -------------------------------------------------------------------------------- 1 | This work was produced at the Lawrence Livermore National Laboratory 2 | (LLNL) under Contract No. DE-AC52-07NA27344 (Contract 44) between 3 | the U.S. Department of Energy (DOE) and Lawrence Livermore National 4 | Security, LLC (LLNS) for the operation of LLNL. 5 | 6 | This work was prepared as an account of work sponsored by an agency of 7 | the United States Government. Neither the United States Government nor 8 | Lawrence Livermore National Security, LLC nor any of their employees, 9 | makes any warranty, express or implied, or assumes any liability or 10 | responsibility for the accuracy, completeness, or usefulness of any 11 | information, apparatus, product, or process disclosed, or represents 12 | that its use would not infringe privately-owned rights. 13 | 14 | Reference herein to any specific commercial products, process, or 15 | services by trade name, trademark, manufacturer or otherwise does 16 | not necessarily constitute or imply its endorsement, recommendation, 17 | or favoring by the United States Government or Lawrence Livermore 18 | National Security, LLC. The views and opinions of authors expressed 19 | herein do not necessarily state or reflect those of the Untied States 20 | Government or Lawrence Livermore National Security, LLC, and shall 21 | not be used for advertising or product endorsement purposes. 22 | 23 | The precise terms and conditions for copying, distribution, and 24 | modification are specified in the file "COPYING". 25 | -------------------------------------------------------------------------------- /use-env/test.conf: -------------------------------------------------------------------------------- 1 | # Test file for use-env parser 2 | 3 | # Comment 4 | # Comment 5 | # Comment # 6 | FOO = 1 # Comment 7 | FOO=2# 8 | 9 | A = 1 10 | A |= 2 11 | 12 | B = 3 13 | 14 | C = 1;D=1; 15 | 16 | if ($A == 2) 17 | print "ERROR |= didn't seem to work" 18 | endif 19 | 20 | PATH += /foo/bin 21 | 22 | PATH = "${PATH}:/usr/local/bin" 23 | 24 | print "$PATH" 25 | 26 | if (($A == 1) && ($B >= 3)) 27 | C = 10 28 | else if ($A == 1) 29 | print "ERROR else if fallthrough not working" 30 | else 31 | print "ERROR else fallthrough not working" 32 | endif 33 | 34 | EMPTY = "" 35 | EMPTY = 36 | 37 | print "EMPTY = \"$EMPTY\"" 38 | 39 | unset EMPTY 40 | 41 | define n = ${EMPTY}$SLURM_NPROCS 42 | define N = $SLURM_NNODES 43 | 44 | define x = 101 45 | define y = 10 46 | 47 | if ($x < 100) 48 | print "ERROR: x not < 100" 49 | else if ($x < 200) 50 | if ($y > 1) 51 | # 52 | else if ($y > 5) 53 | print "ERROR: nested else if fallthrough failed" 54 | else 55 | print "ERROR: nested else fallthrough failed" 56 | endif 57 | else 58 | print "ERROR: else fallthrough failed" 59 | endif 60 | 61 | include test.conf.include 62 | 63 | undefine n 64 | 65 | dump all 66 | 67 | 68 | set debuglevel 3 69 | 70 | in task { 71 | print "In task $SLURM_PROCID"; 72 | if (defined $LD_PRELOAD) 73 | LD_PRELOAD = "$LD_PRELOAD libfoo.so" 74 | else 75 | LD_PRELOAD = libfoo.so 76 | endif 77 | } 78 | 79 | print ~/bin 80 | -------------------------------------------------------------------------------- /lib/split.h: -------------------------------------------------------------------------------- 1 | /*****************************************************************************\ 2 | * $Id$ 3 | ***************************************************************************** 4 | * Copyright (C) 2006 The Regents of the University of California. 5 | * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). 6 | * Written by Jim Garlick . 7 | * UCRL-CODE-2003-005. 8 | * 9 | * This file is part of Pdsh, a parallel remote shell program. 10 | * For details, see . 11 | * 12 | * Pdsh is free software; you can redistribute it and/or modify it under 13 | * the terms of the GNU General Public License as published by the Free 14 | * Software Foundation; either version 2 of the License, or (at your option) 15 | * any later version. 16 | * 17 | * Pdsh is distributed in the hope that it will be useful, but WITHOUT ANY 18 | * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 19 | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 20 | * details. 21 | * 22 | * You should have received a copy of the GNU General Public License along 23 | * with Pdsh; if not, write to the Free Software Foundation, Inc., 24 | * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. 25 | \*****************************************************************************/ 26 | #ifndef _SPLIT_H 27 | #define _SPLIT_H 28 | 29 | #include "list.h" 30 | 31 | List list_split (char *sep, char *str); 32 | List list_split_append (List l, char *sep, char *str); 33 | int list_join (char *result, size_t len, const char *sep, List l); 34 | 35 | #endif /* !_SPLIT_H */ 36 | -------------------------------------------------------------------------------- /use-env/log_msg.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | #ifndef _LOG_MSG_H 26 | #define _LOG_MSG_H 27 | 28 | int log_msg_init (const char *prefix); 29 | void log_msg_fini (); 30 | 31 | int log_msg_verbose (); 32 | int log_msg_set_verbose (int level); 33 | int log_msg_quiet (); 34 | int log_err (const char *format, ...); 35 | void log_msg (const char *format, ...); 36 | void log_verbose (const char *format, ...); 37 | void log_debug (const char *format, ...); 38 | void log_debug2 (const char *format, ...); 39 | void log_debug3 (const char *format, ...); 40 | 41 | #endif /* !_LOG_MSG_H */ 42 | -------------------------------------------------------------------------------- /cpuset/create.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | 26 | #ifndef _HAVE_CREATE_H 27 | #define _HAVE_CREATE_H 28 | 29 | #include 30 | #include 31 | #include 32 | 33 | #include "conf.h" 34 | 35 | int job_cpuset_exists (uint32_t jobid, uid_t uid); 36 | 37 | int create_cpuset_for_job (cpuset_conf_t cf, 38 | unsigned int jobid, uid_t uid, int ncpus); 39 | 40 | int create_cpuset_for_step (cpuset_conf_t cf, 41 | unsigned int stepid, int ncpus); 42 | 43 | int create_cpuset_for_task (cpuset_conf_t cf, 44 | unsigned int taskid, int ncpus_per_task); 45 | 46 | int user_cpuset_update (cpuset_conf_t cf, 47 | uid_t uid, const struct bitmask *b); 48 | 49 | int update_user_cpusets (); 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /iorelay/iorelay-mrsh-sshfs-wrap.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ############################################################################### 3 | # 4 | # Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 5 | # Produced at Lawrence Livermore National Laboratory. 6 | # Written by Jim Garlick . 7 | # 8 | # UCRL-CODE-235358 9 | # 10 | # This file is part of chaos-spankings, a set of spank plugins for SLURM. 11 | # 12 | # This is free software; you can redistribute it and/or modify it 13 | # under the terms of the GNU General Public License as published by 14 | # the Free Software Foundation; either version 2 of the License, or 15 | # (at your option) any later version. 16 | # 17 | # This is distributed in the hope that it will be useful, but WITHOUT 18 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 19 | # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 20 | # for more details. 21 | # 22 | # You should have received a copy of the GNU General Public License 23 | # along with this program. If not, see . 24 | # 25 | ############################################################################### 26 | # 27 | # sshfs-mrsh-wrap - wrapper for mrsh for sshfs usage 28 | # 29 | declare -r prog=iorelay-sshfs-mrsh-wrap 30 | 31 | die () { 32 | echo "$prog: $1" >&2 33 | exit 1 34 | } 35 | 36 | # Expected args: 37 | # -x -a -oClearAllForwardings=yes -2 user@host -s sftp 38 | # We ignore everything except user@host arg 39 | for arg in $*; do 40 | if echo $arg | grep -q "@"; then 41 | user=$(echo $arg | cut -d@ -f1) 42 | host=$(echo $arg | cut -d@ -f2) 43 | fi 44 | done 45 | 46 | [ -n "$user" ] && [ -n "$host" ] || die "no user@host arg" 47 | 48 | exec /usr/bin/mrsh -l $user $host /usr/libexec/openssh/sftp-server 49 | die "failed to exec mrsh" 50 | # NOTREACHED 51 | -------------------------------------------------------------------------------- /cpuset/nodemap.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | 26 | #ifndef HAVE_NODEMAP_H 27 | #define HAVE_NODEMAP_H 28 | 29 | #include "conf.h" 30 | 31 | /* 32 | * Create a nodemap with optional used CPUs bitmask 33 | * if used == NULL, then the nodemap will be initialized 34 | * with the actual utilized CPUs. 35 | */ 36 | struct nodemap * nodemap_create (cpuset_conf_t cf, struct bitmask *used); 37 | int nodemap_policy_update (struct nodemap *map, cpuset_conf_t cf); 38 | 39 | void nodemap_destroy (struct nodemap *); 40 | 41 | void print_nodemap (const struct nodemap *); 42 | 43 | /* 44 | * Allocate ncpus from nodemap 45 | */ 46 | struct bitmask * nodemap_allocate (struct nodemap *map, int ncpus); 47 | 48 | const struct bitmask * nodemap_used (struct nodemap *map); 49 | 50 | 51 | #endif /* !HAVE_NODEMAP_H */ 52 | -------------------------------------------------------------------------------- /overcommit-memory/overcommit.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | #ifndef _HAVE_OVERCOMMIT_H 26 | #define _HAVE_OVERCOMMIT_H 27 | 28 | typedef struct overcommit_shared_context * overcommit_shared_ctx_t; 29 | 30 | overcommit_shared_ctx_t overcommit_shared_ctx_create (int jobid, int stepid); 31 | 32 | void overcommit_shared_ctx_destroy (overcommit_shared_ctx_t ctx); 33 | void overcommit_shared_ctx_unregister (overcommit_shared_ctx_t ctx); 34 | 35 | int overcommit_in_use (overcommit_shared_ctx_t ctx, int value); 36 | int overcommit_shared_list_users (); 37 | 38 | int overcommit_shared_cleanup (int jobid, int stepid); 39 | int overcommit_force_cleanup (); 40 | 41 | int overcommit_memory_get_current_state (); 42 | int overcommit_memory_set_current_state (int value); 43 | 44 | int overcommit_ratio_get (); 45 | int overcommit_ratio_set (int value); 46 | 47 | #endif /* !_HAVE_OVERCOMMIT_H */ 48 | -------------------------------------------------------------------------------- /cpuset/slurm.c: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | 26 | #include 27 | #include "slurm.h" 28 | #include "log.h" 29 | /* 30 | * Handle for libslurm.so 31 | * 32 | * We open libslurm.so via dlopen () in order to pass the 33 | * flag RTDL_GLOBAL so that subsequently loaded modules have 34 | * access to libslurm symbols. This is pretty much only needed 35 | * for dynamically loaded modules that would otherwise be 36 | * linked against libslurm. 37 | * 38 | */ 39 | static void * slurm_h = NULL; 40 | 41 | 42 | int dyn_slurm_open () 43 | { 44 | if (slurm_h) 45 | return (0); 46 | if (!(slurm_h = dlopen("libslurm.so", RTLD_NOW|RTLD_GLOBAL))) { 47 | log_err ("Unable to dlopen libslurm: %s\n", dlerror ()); 48 | return (-1); 49 | } 50 | return (0); 51 | } 52 | 53 | void dyn_slurm_close () 54 | { 55 | if (slurm_h) dlclose (slurm_h); 56 | } 57 | 58 | /* 59 | * vi: ts=4 sw=4 expandtab 60 | */ 61 | -------------------------------------------------------------------------------- /cpuset/Makefile: -------------------------------------------------------------------------------- 1 | NAME := cpuset 2 | SBINDIR ?= /sbin 3 | LIBNAME ?= lib$(shell uname -m | grep -q x86_64 && echo 64) 4 | LIBDIR ?= /usr/$(LIBNAME) 5 | MANDIR ?= /usr/share/man 6 | PAMDIR := /$(LIBNAME)/security 7 | 8 | FLAGS := -ggdb -Wall -I../lib 9 | SHOPTS := -shared -Wl,--version-script=version.map 10 | LLIBS := -lslurm -lbitmask -lcpuset -ldl -lfl 11 | OBJS := nodemap.o util.o create.o log.o slurm.o \ 12 | conf.o conf-lexer.o conf-parser.o \ 13 | ../lib/fd.o ../lib/list.o ../lib/split.o 14 | 15 | MAN8 := slurm-cpuset.8 pam_slurm_cpuset.8 16 | MAN1 := use-cpusets.1 17 | 18 | all: $(NAME).so test cpuset_release_agent pam_slurm_cpuset.so 19 | 20 | install: 21 | mkdir -p --mode=0755 $(DESTDIR)$(LIBDIR)/slurm 22 | install -m0755 cpuset.so $(DESTDIR)$(LIBDIR)/slurm 23 | mkdir -p --mode=0755 $(DESTDIR)$(PAMDIR) 24 | install -m0755 pam_slurm_cpuset.so $(DESTDIR)$(PAMDIR) 25 | mkdir -p --mode=0755 $(DESTDIR)$(SBINDIR) 26 | install -m0755 cpuset_release_agent $(DESTDIR)$(SBINDIR)/ 27 | mkdir -p --mode=0755 $(DESTDIR)$(MANDIR)/man1 28 | mkdir -p --mode=0755 $(DESTDIR)$(MANDIR)/man8 29 | install -m0644 $(MAN8) $(DESTDIR)$(MANDIR)/man8 30 | install -m0644 $(MAN1) $(DESTDIR)$(MANDIR)/man1 31 | 32 | $(NAME).so: $(OBJS) $(NAME).o 33 | $(CC) $(SHOPTS) -o $(NAME).so $(OBJS) $(NAME).o $(LLIBS) 34 | 35 | test: test.o $(OBJS) 36 | $(CC) -o test $(OBJS) test.o $(LLIBS) 37 | 38 | cpuset_release_agent: release-agent.o $(OBJS) 39 | $(CC) -o cpuset_release_agent $(OBJS) release-agent.o $(LLIBS) 40 | 41 | 42 | pam_slurm_cpuset.so : $(OBJS) pam_slurm_cpuset.o ../lib/hostlist.o 43 | $(CC) -shared -o pam_slurm_cpuset.so $(OBJS) ../lib/hostlist.o \ 44 | pam_slurm_cpuset.o -lbitmask $(LLIBS) -lpam -lpam_misc 45 | .c.o: 46 | $(CC) $(CFLAGS) $(FLAGS) -o $@ -fPIC -c $< 47 | 48 | conf.o : conf-parser.h 49 | 50 | conf-lexer.c : conf-parser.l conf-parser.h 51 | flex -oconf-lexer.c conf-parser.l 52 | 53 | conf-parser.c conf-parser.h : conf-parser.y 54 | bison -d -oconf-parser.c conf-parser.y 55 | 56 | clean: 57 | -rm -f *.o *.so conf-parser.[ch] conf-lexer.c cpuset_release_agent test 58 | -------------------------------------------------------------------------------- /cpuset/log.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | 26 | #ifndef _CPUSET_LOG_H 27 | #define _CPUSET_LOG_H 28 | 29 | #define C_LOG_QUIET -2 30 | #define C_LOG_CRIT -1 31 | #define C_LOG_NORMAL 0 32 | #define C_LOG_VERBOSE 1 33 | #define C_LOG_DEBUG 2 34 | #define C_LOG_DEBUG2 3 35 | 36 | typedef int (log_f) (const char *msg); 37 | 38 | int log_add_dest (int level, log_f *fn); 39 | int log_update (int level, log_f *fn); 40 | int log_set_prefix (const char *prefix); 41 | void log_cleanup (); 42 | int log_err (const char *format, ...); 43 | void log_msg (const char *format, ...); 44 | void log_verbose (const char *format, ...); 45 | void log_debug (const char *format, ...); 46 | void log_debug2 (const char *format, ...); 47 | 48 | /* 49 | * Legacy logging functions 50 | */ 51 | #define cpuset_error(args...) log_err (args) 52 | #define cpuset_verbose(args...) log_verbose (args) 53 | #define cpuset_debug(args...) log_debug (args) 54 | #define cpuset_debug2(args...) log_debug2 (args) 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /cpuset/util.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | #ifndef _HAVE_CPUSET_UTIL_H 26 | #define _HAVE_CPUSET_UTIL_H 27 | 28 | #include 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | #include "fd.h" 36 | #include "conf.h" 37 | 38 | int cpumask_size (void); 39 | int memmask_size (void); 40 | 41 | int slurm_cpuset_lock (void); 42 | int slurm_cpuset_unlock (int fd); 43 | 44 | int user_cpuset_lock (uid_t uid); 45 | void user_cpuset_unlock (int fd); 46 | 47 | void print_current_cpuset_info (); 48 | void print_cpuset_info (const char *path, struct cpuset *cp); 49 | 50 | void print_bitmask (const char * fmt, const struct bitmask *b); 51 | 52 | struct bitmask *used_cpus_bitmask_path (char *path, int clearall); 53 | 54 | int slurm_cpuset_create (cpuset_conf_t conf); 55 | int slurm_cpuset_clean_path (const char *path); 56 | 57 | int str2int (const char *str); 58 | 59 | const char * cpuset_path_to_name (const char *path); 60 | #endif 61 | 62 | /* 63 | * vi: ts=4 sw=4 expandtab 64 | */ 65 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PACKAGE ?= slurm-spank-plugins 2 | 3 | sysconfdir ?= /etc/slurm/ 4 | 5 | LIBNAME ?= lib$(shell uname -m | grep -q x86_64 && echo 64) 6 | LIBDIR ?= /usr/$(LIBNAME) 7 | BINDIR ?= /usr/bin 8 | SBINDIR ?= /sbin 9 | LIBEXECDIR ?= /usr/libexec 10 | 11 | export LIBNAME LIBDIR BINDIR SBINDIR LIBEXECDIR PACKAGE 12 | 13 | CFLAGS = -Wall -ggdb 14 | 15 | PLUGINS = \ 16 | renice.so \ 17 | system-safe.so \ 18 | iotrace.so \ 19 | tmpdir.so \ 20 | auto-affinity.so \ 21 | pty.so \ 22 | addr-no-randomize.so \ 23 | preserve-env.so \ 24 | private-mount.so \ 25 | setsched.so 26 | 27 | LIBRARIES = \ 28 | system-safe-preload.so \ 29 | 30 | SUBDIRS = \ 31 | use-env \ 32 | overcommit-memory 33 | 34 | ifeq ($(BUILD_CPUSET), 1) 35 | SUBDIRS += cpuset 36 | endif 37 | 38 | ifeq ($(WITH_LUA), 1) 39 | SUBDIRS += lua 40 | endif 41 | 42 | all: $(PLUGINS) $(LIBRARIES) subdirs 43 | 44 | .SUFFIXES: .c .o .so 45 | 46 | .c.o: 47 | $(CC) $(CFLAGS) -o $@ -fPIC -c $< 48 | .o.so: 49 | $(CC) -shared -o $*.so $< $(LIBS) 50 | 51 | subdirs: 52 | @for d in $(SUBDIRS); do make -C $$d; done 53 | 54 | system-safe-preload.so : system-safe-preload.o 55 | $(CC) -shared -o $*.so $< -ldl 56 | 57 | auto-affinity.so : auto-affinity.o lib/split.o lib/list.o lib/fd.o 58 | $(CC) -shared -o $*.so auto-affinity.o lib/split.o lib/list.o -lslurm 59 | 60 | preserve-env.so : preserve-env.o lib/list.o 61 | $(CC) -shared -o $*.so preserve-env.o lib/list.o 62 | 63 | private-mount.so : private-mount.o lib/list.o lib/split.o 64 | $(CC) -shared -o $*.so private-mount.o lib/list.o lib/split.o 65 | 66 | pty.so : pty.o 67 | $(CC) -shared -o $*.so $< -lutil 68 | 69 | clean: subdirs-clean 70 | rm -f *.so *.o lib/*.o 71 | 72 | install: 73 | @mkdir -p --mode=0755 $(DESTDIR)$(LIBDIR)/slurm 74 | @for p in $(PLUGINS); do \ 75 | echo "Installing $$p in $(LIBDIR)/slurm"; \ 76 | install -m0755 $$p $(DESTDIR)$(LIBDIR)/slurm; \ 77 | done 78 | @for f in $(LIBRARIES); do \ 79 | echo "Installing $$f in $(LIBDIR)"; \ 80 | install -m0755 $$f $(DESTDIR)$(LIBDIR); \ 81 | done 82 | @for d in $(SUBDIRS); do \ 83 | make -C $$d DESTDIR=$(DESTDIR) install; \ 84 | done 85 | 86 | subdirs-clean: 87 | @for d in $(SUBDIRS); do make -C $$d clean; done 88 | 89 | -------------------------------------------------------------------------------- /lua/hugepages.lua: -------------------------------------------------------------------------------- 1 | -- ========================================================================== 2 | -- 3 | -- Add a --hugepages option to s{run,queue,batch} to allow users 4 | -- to configure hugetlbfs for the nodes of their job. 5 | -- 6 | -- ========================================================================== 7 | local hugepages 8 | 9 | -- Export new --hugepages option to SLURM: 10 | spank_options = { 11 | { 12 | name = "hugepages", 13 | usage = "Attempt to create N (kB,MB,GB) worth of HugePages".. 14 | " on the nodes of of the job.", 15 | arginfo = "N[KMG]", 16 | has_arg = 1, 17 | cb = "opt_handler" 18 | }, 19 | } 20 | 21 | 22 | -- Validate that the suffix of the hugepages option argument is valid: 23 | function valid_suffix (suffix) 24 | local valid_suffixes = { 'K', 'M', 'G', 'B' } 25 | 26 | -- No suffix == bytes and is acceptable 27 | if suffix == nil then return true end 28 | 29 | for _,s in ipairs (valid_suffixes) do 30 | if suffix:upper() == s then return true end 31 | end 32 | return false 33 | end 34 | 35 | -- Validate the option argument to --hugepages. 36 | function validate_hugepages (arg) 37 | local n = arg:match ("^[%d]+") 38 | 39 | if tonumber(n) <= 0 then 40 | SPANK.log_error ("invalid --hugepages value '%d'\n", n) 41 | return false 42 | end 43 | 44 | local suffix = arg:match ("[^%d]+$") 45 | if not valid_suffix (suffix) then 46 | SPANK.log_error ("invalid --hugepages suffix '%s'\n", suffix) 47 | return false 48 | end 49 | 50 | return true 51 | end 52 | 53 | -- Option handler: 54 | function opt_handler (val, optarg, isremote) 55 | hugepages = optarg 56 | if isremote or validate_hugepages (optarg) then 57 | return SPANK.SUCCESS 58 | end 59 | return SPANK.FAILURE 60 | end 61 | 62 | function slurm_spank_init_post_opt (spank) 63 | -- 64 | -- Do nothing in remote context or when no --hugepages option was seen 65 | -- 66 | if spank.context == "remote" or hugepages == nil then 67 | return SPANK.SUCCESS 68 | end 69 | 70 | -- Export SPANK_HUGEPAGES to SLURM prolog/epilog: 71 | local rc, msg = spank:job_control_setenv ("HUGEPAGES", hugepages, 1); 72 | if rc == nil then 73 | return SPANK.log_error ("Unable to propagate HUGEPAGES=%s: %s", 74 | hugepages, msg) 75 | end 76 | return SPANK.SUCCESS 77 | end 78 | -------------------------------------------------------------------------------- /cpuset/conf-parser.l: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | %{ 26 | #include 27 | #include 28 | #include 29 | 30 | #include "conf.h" 31 | 32 | #define YYSTYPE char * 33 | #include "conf-parser.h" 34 | 35 | %} 36 | 37 | %option nounput 38 | 39 | %% 40 | 41 | #[^\n]* ; /* Ignore comments. */ 42 | [ \t\r]+ ; /* Ignore whitespace. */ 43 | ; { return ';'; } 44 | , { return ','; } 45 | (#.*)?\\?\n { return '\n'; } 46 | 47 | \"[^\"]*\" | 48 | \'[^\']*\' { 49 | yytext [strlen (yytext) - 1] = '\0'; 50 | yylval = strdup (yytext+1); 51 | return STRING; 52 | } 53 | 54 | (fit-)?policy { return POLICY; } 55 | order { return ORDER; } 56 | use-idle | 57 | alloc-idle { return USE_IDLE; } 58 | constrain-mem(s)? { return CONST_MEM; } 59 | kill-orph(an)?s { return KILL_ORPHS; } 60 | = { return '='; } 61 | 62 | 0 | 63 | no | 64 | No { return FALSE; } 65 | 1 | 66 | yes | 67 | Yes { return TRUE; } 68 | 69 | [^=;, \t\r\n]+ { 70 | yylval = strdup (yytext); 71 | return STRING; 72 | } 73 | 74 | %% 75 | 76 | /* 77 | * vi: ts=4 sw=4 expandtab 78 | */ 79 | -------------------------------------------------------------------------------- /iorelay/iorelay-bind-nfs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ############################################################################### 3 | # 4 | # Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 5 | # Produced at Lawrence Livermore National Laboratory. 6 | # Written by Jim Garlick . 7 | # 8 | # UCRL-CODE-235358 9 | # 10 | # This file is part of chaos-spankings, a set of spank plugins for SLURM. 11 | # 12 | # This is free software; you can redistribute it and/or modify it 13 | # under the terms of the GNU General Public License as published by 14 | # the Free Software Foundation; either version 2 of the License, or 15 | # (at your option) any later version. 16 | # 17 | # This is distributed in the hope that it will be useful, but WITHOUT 18 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 19 | # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 20 | # for more details. 21 | # 22 | # You should have received a copy of the GNU General Public License 23 | # along with this program. If not, see . 24 | # 25 | ############################################################################### 26 | # 27 | # iorelay-bind-nfs - bind directories from mntpt over all nfs mounted 28 | # file systems 29 | # 30 | # Run as root in private namespace 31 | # 32 | declare -r prog=iorelay-bind-nfs 33 | 34 | die () 35 | { 36 | echo "$prog: $1" >&2 37 | exit 1 38 | } 39 | warn () 40 | { 41 | echo "$prog: $1" >&2 42 | } 43 | usage () 44 | { 45 | echo "Usage: $prog -m mntpt" 46 | exit 1 47 | } 48 | listnfs () 49 | { 50 | local src dst typ opts a1 a2 51 | 52 | cat /proc/mounts | while read src dst typ opts a1 a2; do 53 | [ ${typ} = nfs ] && echo ${dst} 54 | fi 55 | done 56 | } 57 | 58 | [ -n "$SLURM_NODELIST" ] || die "SLURM_NODELIST is not set" 59 | relayhost=$(echo $SLURM_NODELIST | glob-hosts -n1) 60 | [ "$(hostname)" = "$relayhost" ] && exit 0 # silently exit if relayhost 61 | 62 | uopt=0 63 | mntpt="" 64 | while getopts "m:" opt; do 65 | case ${opt} in 66 | m) mntpt=${OPTARG} ;; 67 | *) usage ;; 68 | esac 69 | done 70 | shift $((${OPTIND} - 1)) 71 | [ $# = 0 ] || usage 72 | [ -n "$mntpt" ] || usage 73 | [ -d $mntpt ] || die "not a directory: $mntpt" 74 | 75 | count=0 76 | for dir in $(listnfs); do 77 | if [ -d ${mntpt}/${dir} ]; then 78 | mount --bind ${mntpt}/${dir} ${dir} || warn "bind ${dir} failed" 79 | count=$(($count+1)) 80 | fi 81 | done 82 | warn "relayed $count file systems" 83 | 84 | exit 0 85 | -------------------------------------------------------------------------------- /use-env/main.c: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | #include 26 | #include 27 | 28 | #include "use-env.h" 29 | #include "log_msg.h" 30 | 31 | extern int yydebug; 32 | static char *run_as_task = NULL; 33 | 34 | int get_options (int ac, char **av, char **ppath, char **nnodes, char **nprocs) 35 | { 36 | int c; 37 | 38 | while ((c = getopt (ac, av, "dvt:f:n:N:")) >= 0) { 39 | switch (c) { 40 | case 'd' : 41 | yydebug = 1; 42 | break; 43 | case 'v': 44 | log_msg_verbose (); 45 | break; 46 | case 'f': 47 | *ppath = optarg; 48 | break; 49 | case 'n': 50 | *nprocs = optarg; 51 | break; 52 | case 'N': 53 | *nnodes = optarg; 54 | break; 55 | case 't': 56 | run_as_task = optarg; 57 | break; 58 | case '?' : 59 | default: 60 | exit (1); 61 | } 62 | } 63 | return (0); 64 | } 65 | 66 | 67 | int main (int ac, char **av) 68 | { 69 | int rc = 0; 70 | char *filename = NULL; 71 | char *nnodes = "0"; 72 | char *nprocs = "0"; 73 | 74 | log_msg_init ("use-env"); 75 | 76 | get_options (ac, av, &filename, &nnodes, &nprocs); 77 | 78 | keyword_define ("SLURM_NNODES", nnodes); 79 | keyword_define ("SLURM_NPROCS", nprocs); 80 | 81 | if (run_as_task) { 82 | keyword_define ("SLURM_PROCID", run_as_task); 83 | keyword_define ("SLURM_NODEID", "0"); 84 | } 85 | 86 | use_env_parser_init (run_as_task != NULL); 87 | rc = use_env_parse (filename); 88 | use_env_parser_fini (); 89 | log_msg_fini (); 90 | 91 | return (rc); 92 | } 93 | -------------------------------------------------------------------------------- /cpuset/release-agent.c: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #include "util.h" 33 | #include "create.h" 34 | #include "conf.h" 35 | #include "log.h" 36 | 37 | const char cpuset_path[] = "/dev/cpuset"; 38 | 39 | const char * basename (const char *path); 40 | static FILE *fp = NULL; 41 | 42 | static int log_fp (const char *msg) 43 | { 44 | if (fp) 45 | fprintf (fp, "%s", msg); 46 | return (0); 47 | } 48 | 49 | int main (int ac, char **av) 50 | { 51 | int lockfd; 52 | char path [4096]; 53 | const char *prog = basename (av[0]); 54 | 55 | cpuset_conf_t conf = cpuset_conf_create (); 56 | 57 | if (ac < 2) { 58 | fprintf (stderr, "Usage: %s cpuset_path\n", prog); 59 | return (1); 60 | } 61 | 62 | fp = fopen ("/var/log/slurm-cpuset.log", "a"); 63 | 64 | log_add_dest (C_LOG_VERBOSE, log_fp); 65 | cpuset_conf_parse_system (conf); /* Ignore errors, we must proceed */ 66 | 67 | snprintf (path, sizeof (path), "%s%s", cpuset_path, av[1]); 68 | 69 | if ((lockfd = slurm_cpuset_create (conf)) < 0) { 70 | log_err ("Failed to lock slurm cpuset: %s\n", strerror (errno)); 71 | exit (1); 72 | } 73 | 74 | log_verbose ("Cleaning path %s\n", path); 75 | 76 | update_user_cpusets (conf); 77 | slurm_cpuset_unlock (lockfd); 78 | cpuset_conf_destroy (conf); 79 | fclose (fp); 80 | 81 | return (0); 82 | } 83 | 84 | /* 85 | * vi: ts=4 sw=4 expandtab 86 | */ 87 | -------------------------------------------------------------------------------- /cpuset/test.c: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | #include "nodemap.h" 32 | #include "util.h" 33 | #include "conf.h" 34 | #include "log.h" 35 | 36 | static int log_stderr (const char *msg) 37 | { 38 | fprintf (stderr, "%s", msg); return 0; 39 | } 40 | 41 | int main (int ac, char **av) 42 | { 43 | cpuset_conf_t conf; 44 | struct bitmask * b; 45 | struct nodemap * map; 46 | int n = str2int (av[1]); 47 | 48 | log_add_dest (4, log_stderr); 49 | 50 | conf = cpuset_conf_create (); 51 | //cpuset_conf_debug (); 52 | 53 | if (cpuset_conf_parse_system (conf) < 0) 54 | exit (1); 55 | 56 | if (ac < 2) 57 | exit (1); 58 | 59 | if (av[1] == NULL || ((n = str2int (av[1])) <= 0)) { 60 | fprintf (stderr, "Usage: %s NCPUS\n", av[0]); 61 | exit (1); 62 | } 63 | 64 | fprintf (stdout, "Faking a job with %d CPUs\n", n); 65 | 66 | if ((map = nodemap_create (conf, NULL)) == NULL) { 67 | fprintf (stderr, "Failed to create nodemap\n"); 68 | exit (1); 69 | } 70 | 71 | print_nodemap (map); 72 | 73 | if (!(b = nodemap_allocate (map, n))) { 74 | fprintf (stderr, "Failed to allocate %d tasks in nodemap\n", n); 75 | exit (1); 76 | } 77 | 78 | print_bitmask ("Used CPUs: %s\n", nodemap_used (map)); 79 | 80 | nodemap_destroy (map); 81 | 82 | cpuset_conf_destroy (conf); 83 | 84 | exit (0); 85 | 86 | } 87 | 88 | /* 89 | * vi: ts=4 sw=4 expandtab 90 | */ 91 | -------------------------------------------------------------------------------- /cpuset/pam_slurm_cpuset.8: -------------------------------------------------------------------------------- 1 | 2 | .TH "PAM_SLURM_CPUSET" "8" 3 | 4 | .SH NAME 5 | pam_slurm_cpuset \- restrict user logins to SLURM cpusets 6 | 7 | .SH SYNOPSIS 8 | \fBpam_slurm_cpuset.so\fR [\fIOPTIONS\fR]... 9 | 10 | .SH DESCRIPTION 11 | .PP 12 | The \fBpam_slurm_cpuset\fR module may be used to restrict user 13 | login sessions on compute nodes to only the CPUs which they have 14 | been allocated by SLURM. It will also deny access to users attempting 15 | to log in to nodes which they have not been allocated. Thus, it 16 | should replace \fBpam_slurm.so\fR in the PAM stack. 17 | .PP 18 | Like the \fBpam_slurm\fR module, the \fBpam_slurm_cpuset.so\fR module 19 | should be enabled in the account section of the PAM stack. 20 | .PP 21 | User login session tasks are placed into the \fBuser\fR cpuset created 22 | by the \fBslurm-cpuset\fR(8) utilities. If a \fBuser\fR cpuset doesn't 23 | exist at the time of operation of this module, and the user has one 24 | or more valid SLURM jobs assigned to the current system, then a user 25 | cpuset under 26 | 27 | .B /dev/cpuset/slurm/UID 28 | 29 | will be created with access to all CPUs to which the user has access. 30 | .PP 31 | As jobs begin and are terminated on the node, the set of CPUs in the 32 | user cpuset is automatically adjusted to the union of all job cpusets. 33 | If and when all the user's jobs on the node are complete, and the 34 | user has no CPUs allocated to them, SLURM with either \fBorphan\fR 35 | the user cpuset by renaming it to 36 | 37 | .B /dev/cpuset/slurm/orphan:UID 38 | 39 | or will immediately terminate the user login and clean up the 40 | user cpuset. The method used depends on the \fBkill-orphs\fR 41 | setting in \fBslurm-cpuset.conf\fR. 42 | .PP 43 | For more information about the SLURM cpuset suite and its 44 | operation, see the \fBslurm-cpuset\fR(8) man page. 45 | 46 | .SH OPTIONS 47 | .TP 48 | .BI debug [=level] 49 | Enable verbose module logging via \fBpam_syslog\fR(3). Optionally 50 | a \fIlevel\fR may be specified. 51 | .TP 52 | .BI conf= FILENAME 53 | Read configuration from config file \fIFILENAME\fR. By default, the 54 | configuration is read from /etc/slurm/slurm-cpuset.conf. 55 | .PP 56 | For valid configuration file syntax and options, see the 57 | \fBslurm-cpuset\fR(8) man page. 58 | 59 | .SH "MODULE SERVICES PROVIDED" 60 | .PP 61 | Currently, on the \fBaccount\fR service is supported. 62 | 63 | .SH "RETURN VALUES" 64 | .TP 3n 65 | PAM_SUCCESS 66 | Access was granted. 67 | .TP 68 | PAM_PERM_DENIED 69 | Access was not granted. 70 | .TP 71 | PAM_USER_UNKNOWN 72 | Failed to read \fBPAM_USER\fR or user not in passwd file. 73 | .TP 74 | PAM_SYSTEM_ERR 75 | System or module configuration error. 76 | 77 | .SH "SEE ALSO" 78 | .BR slurm-cpuset (8), 79 | .BR cpuset (4), 80 | .BR pam (8), 81 | .BR pam.d (8) 82 | -------------------------------------------------------------------------------- /iorelay/iorelay-mount-nodezero.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ############################################################################### 3 | # 4 | # Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 5 | # Produced at Lawrence Livermore National Laboratory. 6 | # Written by Jim Garlick . 7 | # 8 | # UCRL-CODE-235358 9 | # 10 | # This file is part of chaos-spankings, a set of spank plugins for SLURM. 11 | # 12 | # This is free software; you can redistribute it and/or modify it 13 | # under the terms of the GNU General Public License as published by 14 | # the Free Software Foundation; either version 2 of the License, or 15 | # (at your option) any later version. 16 | # 17 | # This is distributed in the hope that it will be useful, but WITHOUT 18 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 19 | # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 20 | # for more details. 21 | # 22 | # You should have received a copy of the GNU General Public License 23 | # along with this program. If not, see . 24 | # 25 | ############################################################################### 26 | # 27 | # iorelay-mount-nodezero - mount / from first slurm node on /mnt 28 | # 29 | # Run as root in private namespace. 30 | # 31 | declare -r prog=iorelay-mount-nodezero 32 | declare -r sshcmd=/usr/libexec/iorelay-mrsh-sshfs-wrap 33 | 34 | die () 35 | { 36 | echo "$prog: $1" >&2 37 | exit 1 38 | } 39 | warn () 40 | { 41 | echo "$prog: $1" >&2 42 | } 43 | usage () 44 | { 45 | echo "Usage: $prog -m mntpt -u username" 46 | exit 1 47 | } 48 | 49 | 50 | [ -n "$SLURM_NODELIST" ] || die "SLURM_NODELIST is not set" 51 | relayhost=$(echo $SLURM_NODELIST | glob-hosts -n1) 52 | [ -n "$relayhost" ] || die "could not determine relayhost" 53 | [ "$(hostname)" = "$relayhost" ] && exit 0 # silently exit if relayhost 54 | 55 | mntpt="" 56 | username="" 57 | while getopts "u:m:" opt; do 58 | case ${opt} in 59 | m) mntpt=${OPTARG} ;; 60 | u) username=${OPTARG} ;; 61 | *) usage ;; 62 | esac 63 | done 64 | shift $((${OPTIND} - 1)) 65 | [ $# = 0 ] || usage 66 | [ -n "$mntpt" ] || usage 67 | [ -d $mntpt ] || die "not a directory: $mntpt" 68 | [ -n "$username" ] || usage 69 | uid=$(id -u $username 2>&1) || die "no such user: $username" 70 | [ "$uid" != 0 ] || die "sshfs as root is unsupported" 71 | 72 | grep -q sshfs /proc/mounts && die "sshfs is already mounted" 73 | 74 | # NOTE: work around missing -n option in sshfs/fusermount 75 | mv -f /etc/mtab /etc/mtab-iorelay || die "failed to back up /etc/mtab" 76 | sshfs -o ssh_command=${sshcmd} ${username}@${relayhost}/ ${mntpt} 77 | result=$? 78 | mv -f /etc/mtab-iorelay /etc/mtab || warn "failed to restore /etc/mtab" 79 | [ $result = 0 ] || die "sshfs mount ${username}@${relayhost}/ ${mntpt} failed" 80 | 81 | exit 0 82 | -------------------------------------------------------------------------------- /cpuset/README: -------------------------------------------------------------------------------- 1 | 2 | INTRODUCTION 3 | 4 | The SLURM 'cpuset' plugin uses Linux cpusets to constrain jobs to 5 | the number of CPUs they have been allocated on nodes. The plugin is 6 | specifically designed for sytems sharing nodes and using CPU scheduling 7 | (i.e. using the select/cons_res plugin). The plugin will not work on 8 | systems where CPUs are oversubscribed to jobs (i.e. strict node sharing 9 | without the use of select/cons_res). 10 | 11 | The plugin uses SLURM's spank framework, and thus it is enabled by adding 12 | the following line to /etc/slurm/plugstack.conf: 13 | 14 | required cpuset.so [options] 15 | 16 | where options [options] may be supplied to tune module behavior. 17 | 18 | The plugin may also constrain job steps to their own cpusets under 19 | the job cpuset. This may be useful when running multiple job steps 20 | under a single allocation, as the resources of each job step may 21 | be partitioned into separate job steps. This functionality is enabled 22 | by the srun user option 23 | 24 | --use-cpusets=[args...] 25 | 26 | Use of the --use-cpusets option for job steps is described below. 27 | 28 | 29 | REQUIREMENTS 30 | 31 | The cpuset plugin of course requires cpuset support. It also uses the 32 | libbitmask and libcpuset libraries from SGI for creating and managing 33 | cpusets. Source for these libraries are available at 34 | 35 | http://oss.sgi.com/projects/cpusets/ 36 | 37 | The cpuset filesystem must also be mounted at runtime in order for 38 | the plugin to be able to query and create cpusets. To mount the cpuset 39 | filesystem, use: 40 | 41 | mount -t cpuset none /dev/cpuset 42 | 43 | The plugin currently assumes that the cpuset filesystem will be available 44 | under /dev/cpuset. 45 | 46 | Included with the cpuset plugin source is a cpusets "release 47 | agent" (release-agent.c) which may optionally be installed as 48 | /sbin/cpuset_release_agent on any nodes using the SLURM cpuset plugin. 49 | This release agent will be run for each SLURM cpuset when the last task 50 | within the cpuset exits, and will free the cpuset immediately (with 51 | proper locking so as not to race with other jobs). This release agent 52 | is optional for a couple reasons: 53 | 54 | 1. In the current version of Linux for which this plugin was written 55 | (RHEL5), there can only be one release-agent system-wide. We don't 56 | want to interfere with other uses of cpusets if they exist. 57 | 58 | 2. The cpuset plugin removes stale cpusets at startup anyway. So, 59 | the cpuset_release_agent is not a critical component. However, 60 | it is nice to clean up job cpusets as the jobs exit, instead of 61 | waiting until the *next* job is run. Unused cpusets lying around 62 | may be confusing to users and sysadmins. 63 | 64 | 65 | MAN PAGES 66 | 67 | This file is out of date. For up-to-date information see the 68 | man pages provided with this software: slurm-cpuset(8), 69 | use-cpusets(1), and pam_slurm_cpuset(8). 70 | 71 | $Id: README 7653 2008-07-29 22:33:31Z grondo $ 72 | -------------------------------------------------------------------------------- /cpuset/conf.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | 26 | #ifndef _CPUSET_CONF_H 27 | #define _CPUSET_CONF_H 28 | 29 | typedef struct cpuset_conf * cpuset_conf_t; 30 | 31 | /* 32 | * Valid allocation policies for cpusets 33 | */ 34 | enum fit_policy { 35 | BEST_FIT, 36 | FIRST_FIT, 37 | WORST_FIT, 38 | }; 39 | 40 | 41 | /* 42 | * Accessor routines 43 | */ 44 | enum fit_policy cpuset_conf_policy (cpuset_conf_t conf); 45 | 46 | int cpuset_conf_alloc_idle (cpuset_conf_t conf); 47 | 48 | int cpuset_conf_constrain_mem (cpuset_conf_t conf); 49 | 50 | int cpuset_conf_alloc_idle_gt (cpuset_conf_t conf); 51 | 52 | int cpuset_conf_alloc_idle_multiple (cpuset_conf_t conf); 53 | 54 | int cpuset_conf_kill_orphans (cpuset_conf_t conf); 55 | 56 | int cpuset_conf_reverse_order (cpuset_conf_t conf); 57 | 58 | int cpuset_conf_set_policy (cpuset_conf_t conf, enum fit_policy policy); 59 | 60 | int cpuset_conf_set_alloc_idle (cpuset_conf_t conf, int alloc_idle); 61 | 62 | int cpuset_conf_set_alloc_idle_mode (cpuset_conf_t conf, int multiple_only); 63 | 64 | int cpuset_conf_set_kill_orphans (cpuset_conf_t conf, int kill_orphans); 65 | 66 | int cpuset_conf_set_alloc_idle_string (cpuset_conf_t conf, const char *s); 67 | 68 | int cpuset_conf_set_policy_string (cpuset_conf_t conf, const char *name); 69 | 70 | int cpuset_conf_set_constrain_mem (cpuset_conf_t conf, int constrain_mem); 71 | 72 | int cpuset_conf_set_order (cpuset_conf_t conf, int reverse); 73 | /* 74 | * Create and Destroy: 75 | */ 76 | cpuset_conf_t cpuset_conf_create (); 77 | 78 | void cpuset_conf_destroy (cpuset_conf_t conf); 79 | 80 | 81 | /* 82 | * Parsing 83 | */ 84 | 85 | int cpuset_conf_parse (cpuset_conf_t conf, const char *path); 86 | 87 | int cpuset_conf_parse_system (cpuset_conf_t conf); 88 | 89 | int cpuset_conf_parse_opt (cpuset_conf_t conf, const char *opt); 90 | 91 | /* 92 | * Return filename of last config file parsed 93 | */ 94 | const char *cpuset_conf_file (cpuset_conf_t conf); 95 | 96 | void cpuset_conf_set_file (cpuset_conf_t conf, const char *file); 97 | 98 | #endif 99 | /* 100 | * vi: ts=4 sw=4 expandtab 101 | */ 102 | -------------------------------------------------------------------------------- /NEWS.old: -------------------------------------------------------------------------------- 1 | Version 0.34 (2008-09-25): 2 | - auto-affinity: Fix for using auto-affinity module with jobs using 3 | --use-cpusets=task. The auto-affinity module now checks to make sure 4 | CPU mask has not changed in task context, and if so, silently 5 | does nothing. 6 | - preserve-env: New plugin which, when enabled with --preserve-slurm-env 7 | option, will attempt to keep the remote SLURM_* environment variables 8 | the same as in the current context. Useful for invoking 9 | "srun -n1 --pty bash" from within an allocation shell. 10 | 11 | Version 0.33 (2008-09-11): 12 | - Fix for critical locking bug in cpuset plugin. The cpuset plugin 13 | now uses a global lockfile in /var/lock instead of locking files 14 | under /dev/cpuset. 15 | - Fix for generation of SLURM_CMDLINE in use-env plugin. 16 | 17 | Version 0.32 (2008-08-21): 18 | - oom-detect: Optionally log OOM killed jobs via syslog(3), if 19 | the do_syslog parameter is used in plugstack.conf. The syslog 20 | message has the form "slurmd: OOM detected: jobid=JOBID uid=UID" 21 | 22 | Version 0.31 (2008-08-19): 23 | - oom-detect: Delay slightly if an OOM killed process is detected 24 | to give the error message time to make it to srun stderr. 25 | 26 | Version 0.30 (2008-08-04): 27 | - cpuset: Slightly improve config file error messages. 28 | - cpuset: Minor fixes for man pages. 29 | - auto-affinity: Update --auto-affinity=help message. 30 | 31 | Version 0.29 (2008-07-29): 32 | - cpuset: Major overhaul of SLURM cpuset support. Now includes a PAM 33 | module, pam_slurm_cpuset.so, and a global config file in 34 | /etc/slurm/slurm-cpuset.conf. For more information, see the 35 | new manual pages included with the distribution. 36 | - auto-affinity: Do not set CPU affinity by default if the number 37 | of available CPUs is not evenly divisible by the number of tasks. 38 | 39 | Version 0.28 (2008-07-22): 40 | - auto-affinity: Fix error where spank_post_opt hook was incorrectly 41 | run in srun, which caused an immediate error and abort. 42 | 43 | Version 0.27 (2008-07-16): 44 | - cpuset: Expand cpuset support to per-task cpusets via --use-cpusets=tasks. 45 | 46 | Version 0.26 (2008-07-16): 47 | - cpuset: Add support for per-job-step cpusets via the new srun option 48 | '--use-cpusets'. See the README or --use-cpusets=help for more information. 49 | - auto-affinity: Delay detection of current cpuset until after user 50 | option processing in the event that user option changed our cpuset. 51 | 52 | Version 0.25 (2008-07-10): 53 | - cpuset: Added cpuset plugin to constrain jobs to number of CPUs 54 | allocated on shared, but not oversubscribed nodes. 55 | - auto-affinity: Make auto-affinity plugin cpuset-aware. CPU affinity 56 | is assigned as if the job were running on a node the size of the 57 | current cpuset. If cpusets are not enabled, the auto-affinity behavior 58 | is unchanged. 59 | 60 | Version 0.24 (2008-06-10): 61 | - auto-affinity: Query SLURM controller for number of CPUs allocated 62 | to the current job in exclusive_only mode if the environment variable 63 | SLURM_JOB_CPUS_PER_NODE is not set. 64 | 65 | Version 0.23 (2008-06-10): 66 | - auto-affinity: Add 'exclusive_only' flag to auto-affinity plugin 67 | to constrain plugin activity to only those jobs that have exclusive 68 | use of the current node. 69 | 70 | (2008-06-10): 71 | - Started NEWS file. 72 | 73 | $Id: NEWS 7811 2008-09-25 22:21:11Z grondo $ 74 | -------------------------------------------------------------------------------- /system-safe.c: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | #include 30 | 31 | /* 32 | * All spank plugins must define this macro for the SLURM plugin loader. 33 | */ 34 | SPANK_PLUGIN(system-safe, 1) 35 | 36 | #define SYSTEM_SAFE_ENABLE 0x0 37 | #define SYSTEM_SAFE_DISABLE 0x1 38 | 39 | /* 40 | * Disabled by default 41 | */ 42 | static int enabled = 0; 43 | static int opt_enable = 0; 44 | static int opt_disable = 0; 45 | 46 | static int _opt_process (int val, const char *optarg, int remote); 47 | 48 | /* 49 | * Provide a --renice=[prio] option to srun: 50 | */ 51 | struct spank_option spank_options[] = 52 | { 53 | { "system-safe", NULL, "Replace system(3) with version safe for MPI.", 54 | 0, SYSTEM_SAFE_ENABLE, 55 | (spank_opt_cb_f) _opt_process 56 | }, 57 | { "no-system-safe", NULL, "Disable system(3) replacement.", 58 | 0, SYSTEM_SAFE_DISABLE, 59 | (spank_opt_cb_f) _opt_process 60 | }, 61 | SPANK_OPTIONS_TABLE_END 62 | }; 63 | 64 | 65 | /* 66 | * Called from both srun and slurmd. 67 | */ 68 | int slurm_spank_init (spank_t sp, int ac, char **av) 69 | { 70 | int i; 71 | 72 | if (!spank_remote (sp)) 73 | return (0); 74 | 75 | for (i = 0; i < ac; i++) { 76 | if (strncmp ("enabled", av[i], 7) == 0) { 77 | enabled = 1; 78 | } 79 | else if (strncmp ("disabled", av[i], 8) == 0) { 80 | enabled = 0; 81 | } 82 | else { 83 | slurm_error ("system-safe: Invalid option \"%s\"", av[i]); 84 | } 85 | } 86 | 87 | return (0); 88 | } 89 | 90 | int slurm_spank_user_init (spank_t sp, int ac, char **av) 91 | { 92 | char buf [4096]; 93 | const char *preload = "system-safe-preload.so"; 94 | 95 | if (opt_disable || (!enabled && !opt_enable)) 96 | return (0); 97 | 98 | if (spank_getenv (sp, "LD_PRELOAD", buf, sizeof (buf)) == ESPANK_SUCCESS) 99 | snprintf (buf, sizeof (buf), "%s %s", buf, preload); 100 | else 101 | strncpy (buf, preload, strlen (preload)); 102 | 103 | if (spank_setenv (sp, "LD_PRELOAD", buf, 1) != ESPANK_SUCCESS) 104 | slurm_error ("Failed to set LD_PRELOAD=%s\n", buf); 105 | 106 | return (0); 107 | } 108 | 109 | static int _opt_process (int val, const char *optarg, int remote) 110 | { 111 | if (val == SYSTEM_SAFE_ENABLE) 112 | opt_enable = 1; 113 | else 114 | opt_disable = 0; 115 | 116 | return (0); 117 | } 118 | 119 | 120 | 121 | /* 122 | * vi: ts=4 sw=4 expandtab 123 | */ 124 | -------------------------------------------------------------------------------- /addr-no-randomize.c: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | #include 30 | 31 | /* 32 | * All spank plugins must define this macro for the SLURM plugin loader. 33 | */ 34 | SPANK_PLUGIN(no-randomize, 1); 35 | 36 | #define ADDR_NO_RANDOMIZE 0x0040000 37 | 38 | static int default_randomize = 0; 39 | static int randomize = -1; 40 | 41 | #define OPT_RANDOMIZE 1 42 | #define OPT_NO_RANDOMIZE 2 43 | 44 | static int process_opts (int val, const char *optarg, int remote); 45 | 46 | /* 47 | * Provide options to srun: 48 | */ 49 | struct spank_option spank_options[] = 50 | { 51 | { "addr-randomize", NULL, 52 | "Enable address space randomization", 0, OPT_RANDOMIZE, 53 | (spank_opt_cb_f) process_opts 54 | }, 55 | { "no-addr-randomize", NULL, 56 | "Disable address space randomization", 0, OPT_NO_RANDOMIZE, 57 | (spank_opt_cb_f) process_opts 58 | }, 59 | SPANK_OPTIONS_TABLE_END 60 | }; 61 | 62 | 63 | /* 64 | * Called from both srun and slurmd. 65 | */ 66 | int slurm_spank_init (spank_t sp, int ac, char **av) 67 | { 68 | int i; 69 | 70 | for (i = 0; i < ac; i++) { 71 | if (strncmp ("default_randomize=", av[i], 8) == 0) { 72 | const char *optarg = av[i] + 18; 73 | if (*optarg == '0') 74 | default_randomize = 0; 75 | else if (*optarg == '1') 76 | default_randomize = 1; 77 | else 78 | slurm_error ("no-randomize: Ignoring invalid default value: " 79 | "\"%s\"", av[i]); 80 | } 81 | else { 82 | slurm_error ("no-randomize: Invalid option \"%s\"", av[i]); 83 | } 84 | } 85 | 86 | randomize = default_randomize; 87 | 88 | return (0); 89 | } 90 | 91 | static int process_opts (int val, const char *optarg, int remote) 92 | { 93 | if (val == OPT_RANDOMIZE) 94 | randomize = 1; 95 | else if (val == OPT_NO_RANDOMIZE) 96 | randomize = 0; 97 | else 98 | randomize = default_randomize; 99 | 100 | return (0); 101 | } 102 | 103 | int slurm_spank_task_init (spank_t sp, int ac, char **av) 104 | { 105 | if (randomize == -1) 106 | randomize = default_randomize; 107 | 108 | slurm_info ("randomize = %d\n", randomize); 109 | 110 | if (randomize == 0 && (personality (ADDR_NO_RANDOMIZE) < 0)) 111 | slurm_error ("Failed to set personality: %m"); 112 | return 0; 113 | } 114 | 115 | -------------------------------------------------------------------------------- /cpuset/use-cpusets.1: -------------------------------------------------------------------------------- 1 | .TH use-cpusets 1 "user options for SLURM cpuset plugin" 2 | 3 | .SH NAME 4 | use-cpusets \- user options for SLURM cpuset plugin 5 | 6 | .SH SYNOPSIS 7 | \fB--use-cpusets=\fR[\fIargs\fR]... 8 | 9 | .SH DESCRIPTION 10 | The \fB--use-cpusets\fR option is added to \fBsrun\fR(1) 11 | by the SLURM cpuset plugin, which is described fully 12 | in the \fBslurm-cpuset\fR(8) manpage. This option allows 13 | users to request that job steps and optionally individual 14 | tasks be contained within cpusets under a SLURM job cpuset. 15 | This may be useful when running multiple job steps under 16 | an allocation, as the resources of each job step may be 17 | partitioned into separate cpus and/or memory nodes. 18 | 19 | .SH OPTIONS 20 | The \fB--use-cpusets\fR option may be used to override some of 21 | the SLURM cpuset defaults and system configuration. Additionally, 22 | some extra options are provided. 23 | .PP 24 | Used alone, the \fB--use-cpusets\fR option enables per-job-step 25 | cpusets for the spawned tasks. Options that change policies 26 | and behavior of the SLURM cpuset plugin may specified with an 27 | optional list of comma-separated arguments to the \fB--use-cpusets\fR 28 | option, e.g. 29 | 30 | .BI "--use-cpusets=" debug,tasks 31 | 32 | .PP 33 | Currently supported arguments for this option include: 34 | .TP 35 | .B help 36 | Print a short usage message to stderr and exit. 37 | .TP 38 | .B debug 39 | Enable debug messages. 40 | .TP 41 | .BI "debug=" N 42 | Increase debugging verbosity to \fIN\fR 43 | .TP 44 | .BI "conf=" FILENAME 45 | Read configuration from file \fIFILENAME\fR. Settings in this 46 | config file will override system configuration, as well as options 47 | previously set on the command line. 48 | .TP 49 | .BI "policy=" POLICY 50 | As above, set the allocation policy for cpusets to \fIPOLICY\fR. 51 | For the user option, this only overrides the policy as applied to 52 | job steps and tasks. 53 | .TP 54 | .BI "order=" ORDER 55 | Set allocation order to \fInormal\fR or \fIreverse\fR. 56 | .TP 57 | .B reverse 58 | Same as \fBorder=\fR\fIreverse\fR. 59 | .TP 60 | .B best-fit | worst-fit | first-fit 61 | Shortcut for \fBpolicy\fR=\fIPOLICY\fR. 62 | .TP 63 | .BI "idle-first=" WHEN 64 | As above, set \fIWHEN\fR to allocate idle nodes first. 65 | .TP 66 | .BI "no-idle" 67 | Same as \fBidle-first\fR=\fIno\fR. 68 | .TP 69 | .B mem | constrain-mem 70 | Constrain memory as well as CPUs. Same as \fBconstrain-mem\fR = \fIyes\fR 71 | in the config file. 72 | .TP 73 | .B nomem | !constrain-mem 74 | Do not constrain memory. 75 | .TP 76 | .B tasks 77 | Also constrain individual tasks to cpusets. 78 | 79 | .SH EXAMPLES 80 | Using cpusets for multiple job steps under an allocate of 1 node 81 | with 8 cpus. 82 | 83 | .nf 84 | 85 | > srun --use-cpusets=debug -n1 sleep 100 & 86 | 87 | cpuset: /slurm/6885/69993: 8 cpus [0-7], 4 mems [0-3] 88 | cpuset: /slurm/6885/69993/0: 1 cpu [0], 1 mem [0] 89 | 90 | > srun --use-cpusets=debug -n2 sleep 100 & 91 | 92 | cpuset: /slurm/6885/69993: 8 cpus [0-7], 4 mems [0-3] 93 | cpuset: /slurm/6885/69993/1: 2 cpus [2-3], 1 mem [1] 94 | 95 | .fi 96 | Use of --use-cpusets=tasks 97 | 98 | .nf 99 | 100 | > srun --use-cpusets=debug,tasks -n4 sleep 100 101 | 102 | cpuset: /slurm/6885/69993: 8 cpus [0-7], 4 mems [0-3] 103 | cpuset: /slurm/6885/69993/2: 4 cpus [0-3], 2 mems [0-1] 104 | cpuset: /slurm/6885/69993/2/0: 1 cpu [0], 1 mem [0] 105 | cpuset: /slurm/6885/69993/2/1: 1 cpu [1], 1 mem [0] 106 | cpuset: /slurm/6885/69993/2/2: 1 cpu [2], 1 mem [1] 107 | cpuset: /slurm/6885/69993/2/3: 1 cpu [3], 1 mem [1] 108 | .fi 109 | .SH AUTHOR 110 | Mark Grondona 111 | 112 | .SH "SEE ALSO" 113 | .BR slurm-cpuset (8), 114 | .BR cpuset (4) 115 | -------------------------------------------------------------------------------- /tmpdir.c: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | SPANK_PLUGIN (tmpdir, 1); 31 | 32 | /* 33 | * Create job-specific TMPDIR. 34 | * Called from srun after allocation before launch. 35 | * Does the equivalent of TMPDIR=${TMPDIR-/tmp}/$SLURM_JOBID.$SLURM_STEPID 36 | */ 37 | int slurm_spank_local_user_init (spank_t sp, int ac, char **av) 38 | { 39 | uint32_t jobid, stepid; 40 | const char *tmpdir; 41 | char buf [1024]; 42 | int n; 43 | 44 | if (spank_get_item (sp, S_JOB_ID, &jobid) != ESPANK_SUCCESS) { 45 | slurm_error ("Failed to get jobid from SLURM"); 46 | return (-1); 47 | } 48 | 49 | if (spank_get_item (sp, S_JOB_STEPID, &stepid) != ESPANK_SUCCESS) { 50 | slurm_error ("Failed to get job step id from SLURM"); 51 | return (-1); 52 | } 53 | 54 | if (!(tmpdir = getenv ("TMPDIR"))) 55 | tmpdir = "/tmp"; 56 | 57 | n = snprintf (buf, sizeof (buf), "%s/%u.%u", tmpdir, jobid, stepid); 58 | 59 | if ((n < 0) || (n > sizeof (buf) - 1)) { 60 | slurm_error ("TMPDIR = \"%s\" too large. Aborting"); 61 | return (-1); 62 | } 63 | 64 | if (setenv ("TMPDIR", buf, 1) < 0) { 65 | slurm_error ("setenv (TMPDIR, \"%s\"): %m", buf); 66 | return (-1); 67 | } 68 | 69 | return (0); 70 | } 71 | 72 | /* 73 | * ``rm -rf TMPDIR'' *as user* after job tasks have exited 74 | */ 75 | int slurm_spank_exit (spank_t sp, int ac, char **av) 76 | { 77 | const char sudo [] = "/usr/bin/sudo -u"; 78 | const char rm [] = "/bin/rm -rf"; 79 | char tmp [1024]; 80 | char cmd [4096]; 81 | int n; 82 | int status; 83 | uid_t uid = (uid_t) -1; 84 | 85 | if (!spank_remote (sp)) 86 | return (0); 87 | 88 | if (spank_getenv (sp, "TMPDIR", tmp, sizeof (tmp)) != ESPANK_SUCCESS) { 89 | slurm_error ("Unable to remove TMPDIR at exit!"); 90 | return (-1); 91 | } 92 | 93 | if (spank_get_item (sp, S_JOB_UID, &uid) != ESPANK_SUCCESS) { 94 | slurm_error ("tmpdir: Unable to get job's user id"); 95 | return (-1); 96 | } 97 | 98 | n = snprintf (cmd, sizeof (cmd), "%s \\#%d %s %s", sudo, uid, rm, tmp); 99 | 100 | if ((n < 0) || (n > sizeof (cmd) - 1)) { 101 | slurm_error ("Unable to remove TMPDIR at exit!"); 102 | return (-1); 103 | } 104 | 105 | if ((status = system (cmd)) != 0) { 106 | slurm_error ("\"%s\" exited with status=0x%04x\n", cmd, status); 107 | return (-1); 108 | } 109 | 110 | return (0); 111 | } 112 | -------------------------------------------------------------------------------- /lua/lua-schedutils/tests/lunit: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | 3 | # This file is part of lunit 0.5. 4 | # 5 | # For Details about lunit look at: http://www.mroth.net/lunit/ 6 | # 7 | # Author: Michael Roth 8 | # 9 | # Copyright (c) 2004-2009 Michael Roth 10 | # 11 | # Permission is hereby granted, free of charge, to any person 12 | # obtaining a copy of this software and associated documentation 13 | # files (the "Software"), to deal in the Software without restriction, 14 | # including without limitation the rights to use, copy, modify, merge, 15 | # publish, distribute, sublicense, and/or sell copies of the Software, 16 | # and to permit persons to whom the Software is furnished to do so, 17 | # subject to the following conditions: 18 | # 19 | # The above copyright notice and this permission notice shall be 20 | # included in all copies or substantial portions of the Software. 21 | # 22 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 23 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 24 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 25 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 26 | # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 27 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 28 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 29 | 30 | 31 | if test $# = 0 ; then 32 | echo "$0: Usage Error. Try $0 --help" >&2 33 | exit 1 34 | fi 35 | 36 | if [ `uname` = "Darwin" ]; then 37 | scriptname="$(readlink -n "$0")" 38 | else 39 | scriptname="$(readlink -n -f "$0")" 40 | fi 41 | interpreter="lua" 42 | options="" 43 | 44 | while true ; do 45 | case "$1" in 46 | -h|--help) 47 | cat < 50 | This program comes WITHOUT WARRANTY OF ANY KIND. 51 | 52 | Usage: lunit [OPTIONS] [--] scripts 53 | 54 | Options: 55 | 56 | -i, --interpreter LUA Complete path of the lua binary to use. 57 | -p, --path PATH Sets the LUA_PATH environment for the tests. 58 | --cpath CPATH Sets the LUA_CPATH environment for the tests. 59 | -r, --runner RUNNER Testrunner to use, defaults to 'lunit-console'. 60 | -t, --test PATTERN Which tests to run, may contain * or ? wildcards. 61 | --loadonly Only load the tests. 62 | --dontforce Do not force to load $scriptname*.lua. 63 | -h, --help Print this help screen. 64 | --version Print lunit version. 65 | 66 | Please report bugs to . 67 | EOT 68 | exit ;; 69 | 70 | --version) 71 | echo "lunit 0.5 Copyright 2004-2009 Michael Roth " 72 | exit ;; 73 | 74 | -i|--interpreter) 75 | interpreter="$2" 76 | shift 2 ;; 77 | 78 | -p|--path) 79 | LUA_PATH="$2" 80 | export LUA_PATH 81 | shift 2 ;; 82 | 83 | --cpath) 84 | LUA_CPATH="$2" 85 | export LUA_CPATH 86 | shift 2 ;; 87 | 88 | --loadonly) 89 | options="$options $1" 90 | shift 1 ;; 91 | 92 | --dontforce) 93 | scriptname="" 94 | shift 1 ;; 95 | 96 | -r|--runner|-t|--test) 97 | options="$options $1 $2" 98 | shift 2 ;; 99 | 100 | --) 101 | break ;; 102 | 103 | -*) 104 | echo "$0: Invalid option: $1" >&2 105 | exit 1 ;; 106 | 107 | *) 108 | break ;; 109 | esac 110 | done 111 | 112 | 113 | exec "$interpreter" - "$scriptname" $options "$@" < 0 or stats.failed > 0 then 126 | os.exit(1) 127 | end 128 | EOT 129 | -------------------------------------------------------------------------------- /lua/lua-schedutils/tests/lunit-console.lua: -------------------------------------------------------------------------------- 1 | 2 | --[[-------------------------------------------------------------------------- 3 | 4 | This file is part of lunit 0.5. 5 | 6 | For Details about lunit look at: http://www.mroth.net/lunit/ 7 | 8 | Author: Michael Roth 9 | 10 | Copyright (c) 2006-2008 Michael Roth 11 | 12 | Permission is hereby granted, free of charge, to any person 13 | obtaining a copy of this software and associated documentation 14 | files (the "Software"), to deal in the Software without restriction, 15 | including without limitation the rights to use, copy, modify, merge, 16 | publish, distribute, sublicense, and/or sell copies of the Software, 17 | and to permit persons to whom the Software is furnished to do so, 18 | subject to the following conditions: 19 | 20 | The above copyright notice and this permission notice shall be 21 | included in all copies or substantial portions of the Software. 22 | 23 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 26 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 27 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 28 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 29 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 30 | 31 | --]]-------------------------------------------------------------------------- 32 | 33 | 34 | 35 | --[[ 36 | 37 | begin() 38 | run(testcasename, testname) 39 | err(fullname, message, traceback) 40 | fail(fullname, where, message, usermessage) 41 | pass(testcasename, testname) 42 | done() 43 | 44 | Fullname: 45 | testcase.testname 46 | testcase.testname:setupname 47 | testcase.testname:teardownname 48 | 49 | --]] 50 | 51 | 52 | require "lunit" 53 | 54 | module( "lunit-console", package.seeall ) 55 | 56 | 57 | local function printformat(format, ...) 58 | io.write( string.format(format, ...) ) 59 | end 60 | 61 | 62 | local columns_printed = 0 63 | 64 | local function writestatus(char) 65 | if columns_printed == 0 then 66 | io.write(" ") 67 | end 68 | if columns_printed == 60 then 69 | io.write("\n ") 70 | columns_printed = 0 71 | end 72 | io.write(char) 73 | io.flush() 74 | columns_printed = columns_printed + 1 75 | end 76 | 77 | 78 | local msgs = {} 79 | 80 | 81 | function begin() 82 | local total_tc = 0 83 | local total_tests = 0 84 | 85 | for tcname in lunit.testcases() do 86 | total_tc = total_tc + 1 87 | for testname, test in lunit.tests(tcname) do 88 | total_tests = total_tests + 1 89 | end 90 | end 91 | 92 | printformat("Loaded testsuite with %d tests in %d testcases.\n\n", total_tests, total_tc) 93 | end 94 | 95 | 96 | function run(testcasename, testname) 97 | -- NOP 98 | end 99 | 100 | 101 | function err(fullname, message, traceback) 102 | writestatus("E") 103 | msgs[#msgs+1] = "Error! ("..fullname.."):\n"..message.."\n\t"..table.concat(traceback, "\n\t") .. "\n" 104 | end 105 | 106 | 107 | function fail(fullname, where, message, usermessage) 108 | writestatus("F") 109 | local text = "Failure ("..fullname.."):\n".. 110 | where..": "..message.."\n" 111 | 112 | if usermessage then 113 | text = text .. where..": "..usermessage.."\n" 114 | end 115 | 116 | msgs[#msgs+1] = text 117 | end 118 | 119 | 120 | function pass(testcasename, testname) 121 | writestatus(".") 122 | end 123 | 124 | 125 | 126 | function done() 127 | printformat("\n\n%d Assertions checked.\n", lunit.stats.assertions ) 128 | print() 129 | 130 | for i, msg in ipairs(msgs) do 131 | printformat( "%3d) %s\n", i, msg ) 132 | end 133 | 134 | printformat("Testsuite finished (%d passed, %d failed, %d errors).\n", 135 | lunit.stats.passed, lunit.stats.failed, lunit.stats.errors ) 136 | end 137 | 138 | 139 | 140 | 141 | 142 | -------------------------------------------------------------------------------- /use-env/use-env.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | #ifndef _USE_ENV_H 26 | #define _USE_ENV_H 27 | 28 | enum { TYPE_STR, TYPE_INT, TYPE_SYM }; 29 | enum { SYM_INT, SYM_STR }; 30 | 31 | struct lex_item { 32 | int used; /* Is item still used (for item cache) */ 33 | char * name; /* Name of item */ 34 | int type; /* Type of item (int, string, symbol) */ 35 | char * str; /* String representation of item */ 36 | 37 | union { /* Union of different item types */ 38 | int num; 39 | char *str; 40 | const struct sym *sym; 41 | } val; 42 | }; 43 | 44 | struct sym { 45 | char * name; /* Name of symbol */ 46 | int type; /* Type of symbol (INT || STRING) */ 47 | int val; /* Value if type is INT */ 48 | char * string; /* String representation */ 49 | }; 50 | 51 | typedef char * (*getenv_f) (void *arg, const char *name); 52 | typedef int (*unsetenv_f) (void *arg, const char *name); 53 | typedef int (*setenv_f) (void *arg, const char *name, 54 | const char *value, int overwrite); 55 | 56 | struct use_env_ops { 57 | getenv_f getenv; 58 | setenv_f setenv; 59 | unsetenv_f unsetenv; 60 | }; 61 | 62 | /* 63 | * Environment manipulation 64 | */ 65 | const char * xgetenv (const char *name); 66 | int xunsetenv (const char *name); 67 | int xsetenv (const char *name, const char *value, int overwrite); 68 | 69 | 70 | /* 71 | * Parser operations: 72 | */ 73 | void use_env_parser_init (); 74 | void use_env_set_operations (struct use_env_ops *ops, void *arg); 75 | int use_env_parse (const char *filename); 76 | void use_env_parser_fini (); 77 | 78 | /* 79 | * Lexer cleanup 80 | */ 81 | void lex_fini (); 82 | 83 | /* 84 | * lex_item functions 85 | */ 86 | void lex_item_cache_clear (); 87 | struct lex_item * lex_item_create (char *name, int type); 88 | int is_valid_identifier (const char *s); 89 | 90 | int item_cmp (int cmp, struct lex_item *x, struct lex_item *y); 91 | int item_strcmp (struct lex_item *x, struct lex_item *y); 92 | char * item_str (struct lex_item *item); 93 | int item_val (struct lex_item *item); 94 | int item_type_int (struct lex_item *i); 95 | 96 | /* 97 | * symbol lookup and definition functions 98 | */ 99 | const struct sym * sym (char *name); 100 | const struct sym * sym_define (char *name, const char *value); 101 | const struct sym * keyword_define (char *name, const char *value); 102 | int sym_delete (char *name); 103 | int env_cache_delete (char *name); 104 | void symtab_destroy (); 105 | void keytab_destroy (); 106 | void dump_keywords (); 107 | void dump_symbols (); 108 | 109 | /* 110 | * include file functions 111 | */ 112 | int lex_file_init (const char *file); 113 | int lex_include_push (const char *include); 114 | int lex_include_pop (); 115 | 116 | const char *lex_file (); 117 | int lex_line (); 118 | int lex_line_increment (); 119 | 120 | #endif 121 | /* 122 | * vi: ts=4 sw=4 expandtab 123 | */ 124 | -------------------------------------------------------------------------------- /iotrace.c: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | #include 32 | 33 | /* 34 | * All spank plugins must define this macro for the SLURM plugin loader. 35 | */ 36 | SPANK_PLUGIN(iotrace, 1) 37 | 38 | #define IOTRACE_ENABLE 1 39 | 40 | static int enabled = 0; 41 | static char *flags = NULL; 42 | 43 | static int _opt_process (int val, const char *optarg, int remote); 44 | 45 | /* 46 | * Provide a --iotrace option to srun: 47 | */ 48 | struct spank_option spank_options[] = 49 | { 50 | { "iotrace", "[flags]", "Enable application I/O tracing.", 51 | 2, IOTRACE_ENABLE, 52 | (spank_opt_cb_f) _opt_process 53 | }, 54 | SPANK_OPTIONS_TABLE_END 55 | }; 56 | 57 | 58 | static void _iotrace_label(spank_t sp, char *buf, int len) 59 | { 60 | char hostname[128], *p; 61 | uint32_t taskid = 0; 62 | spank_err_t rc; 63 | 64 | rc = spank_get_item (sp, S_TASK_GLOBAL_ID, &taskid); 65 | if (rc != ESPANK_SUCCESS) 66 | slurm_error ("iotrace: error fetching taskid: %d", rc); 67 | 68 | if (gethostname (hostname, sizeof (hostname)) == 0) { 69 | hostname[sizeof(hostname) - 1] = '\0'; 70 | if ((p = strchr (hostname, '.'))) 71 | *p = '\0'; 72 | } else 73 | strncpy (hostname, "unknown", sizeof(hostname)); 74 | 75 | snprintf (buf, len, "iotrace-%d@%s", taskid, hostname); 76 | } 77 | 78 | int slurm_spank_task_init (spank_t sp, int ac, char **av) 79 | { 80 | char nbuf [4096], obuf [4096]; 81 | char label [64]; 82 | const char *preload = "libplasticfs.so"; 83 | const char *lflags = flags ? flags : ""; 84 | 85 | if (!enabled) 86 | return (0); 87 | 88 | /* append to LD_PRELOAD (with a space) */ 89 | if (spank_getenv (sp, "LD_PRELOAD", obuf, sizeof (obuf)) == ESPANK_SUCCESS) 90 | snprintf (nbuf, sizeof (nbuf), "%s %s", obuf, preload); 91 | else 92 | strncpy (nbuf, preload, strlen (preload)); 93 | if (spank_setenv (sp, "LD_PRELOAD", nbuf, 1) != ESPANK_SUCCESS) 94 | slurm_error ("Failed to set LD_PRELOAD=%s\n", nbuf); 95 | 96 | /* prepend to PLASTICFS (with a pipe) */ 97 | _iotrace_label (sp, label, sizeof (label)); 98 | if (spank_getenv (sp, "PLASTICFS", obuf, sizeof (obuf)) == ESPANK_SUCCESS) 99 | snprintf (nbuf, sizeof (nbuf), "log - %s %s | %s", 100 | label, lflags, obuf); 101 | else 102 | snprintf (nbuf, sizeof (nbuf), "log - %s %s", label, flags); 103 | 104 | if (spank_setenv (sp, "PLASTICFS", nbuf, 1) != ESPANK_SUCCESS) 105 | slurm_error ("Failed to set PLASTICFS=%s\n", nbuf); 106 | 107 | return (0); 108 | } 109 | 110 | static int _opt_process (int val, const char *optarg, int remote) 111 | { 112 | switch (val) { 113 | case IOTRACE_ENABLE: 114 | enabled = 1; 115 | if (optarg) 116 | flags = strdup (optarg); 117 | break; 118 | default: 119 | slurm_error ("Ignoring unknown iotrace option value %d\n", val); 120 | break; 121 | } 122 | 123 | return (0); 124 | } 125 | 126 | int slurm_spank_exit (spank_t sp, int ac, char **av) 127 | { 128 | if (flags) 129 | free (flags); 130 | return (0); 131 | } 132 | 133 | /* 134 | * vi: ts=4 sw=4 expandtab 135 | */ 136 | -------------------------------------------------------------------------------- /iorelay/iorelay.c: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Jim Garlick . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | #include 35 | 36 | /* 37 | * All spank plugins must define this macro for the SLURM plugin loader. 38 | */ 39 | SPANK_PLUGIN(iorelay, 1) 40 | 41 | #define IORELAY_ENABLE 1 42 | 43 | /* Usage: iorelay-mount-nodezero -u user -m mntpt */ 44 | #define MOUNT_SCRIPT "/usr/libexec/iorelay-mount-nodezero" 45 | 46 | /* Usage: iorelay-bind-nfs -m mntpt */ 47 | #define BIND_SCRIPT "/usr/libexec/iorelay-bind-nfs" 48 | 49 | static int enabled = 0; 50 | 51 | static int _opt_process (int val, const char *optarg, int remote); 52 | 53 | /* 54 | * Provide a --iorelay option to srun: 55 | */ 56 | struct spank_option spank_options[] = 57 | { 58 | { "iorelay", NULL, "Enable NFS I/O relaying.", 59 | 1, IORELAY_ENABLE, 60 | (spank_opt_cb_f) _opt_process 61 | }, 62 | SPANK_OPTIONS_TABLE_END 63 | }; 64 | 65 | /* 66 | * Called from both srun and slurmd. 67 | */ 68 | int slurm_spank_init (spank_t sp, int ac, char **av) 69 | { 70 | char cmd[256]; 71 | struct passwd *pw; 72 | uid_t uid; 73 | 74 | if (!enabled || !spank_remote (sp)) 75 | return (0); 76 | 77 | spank_get_item (sp, S_JOB_UID, &uid); 78 | pw = getpwuid (uid); 79 | if (!pw) { 80 | slurm_error ("Error looking up uid in /etc/passwd"); 81 | return (-1); 82 | } 83 | 84 | /* Unshare file namespace. This means only this process and its children 85 | * will see the following mounts, and when this process and its children 86 | * terminate, the mounts go away automatically. 87 | */ 88 | if (unshare (CLONE_NEWNS) < 0) { 89 | slurm_error ("unshare CLONE_NEWNS: %m"); 90 | return (-1); 91 | } 92 | 93 | /* Mount node zero root on /mnt using sshfs. 94 | * Script has no effect on node zero. 95 | */ 96 | snprintf (cmd, sizeof(cmd), "%s -u %s -m /mnt", MOUNT_SCRIPT, pw->pw_name); 97 | if (system (cmd) != 0) { 98 | slurm_error ("Error running `%s': %m", cmd); 99 | return (-1); 100 | } 101 | 102 | /* Bind NFS-mounted directories now mirrored in /mnt via sshfs 103 | * over their NFS mount points. 104 | * Script has no effect on node zero. 105 | */ 106 | snprintf (cmd, sizeof(cmd), "%s -m /mnt", BIND_SCRIPT); 107 | if (system (cmd) != 0) { 108 | slurm_error ("Error running `%s': %m", cmd); 109 | return (-1); 110 | } 111 | 112 | return (0); 113 | } 114 | 115 | /* 116 | * Called from both srun and slurmd. 117 | */ 118 | int slurm_spank_exit (spank_t sp, int ac, char **av) 119 | { 120 | /* Do nothing here as mounts in private namespace will take care of 121 | * themselves. 122 | */ 123 | return (0); 124 | } 125 | 126 | static int _opt_process (int val, const char *optarg, int remote) 127 | { 128 | switch (val) { 129 | case IORELAY_ENABLE: 130 | enabled = 1; 131 | break; 132 | default: 133 | slurm_error ("Ignoring unknown iorelay option value %d\n", val); 134 | break; 135 | } 136 | 137 | return (0); 138 | } 139 | 140 | /* 141 | * vi: ts=4 sw=4 expandtab 142 | */ 143 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | Version 0.25 (2012-12-10): 2 | - auto-affinity: Disable auto-affinity for batch scripts. 3 | 4 | Version 0.24 (2012-05-07): 5 | - lua: Add missing support for S_GLOBAL_TO_LOCAL_ID and S_LOCAL_TO_GLOBAL_ID 6 | - lua: Fix S_JOB_PID_TO_LOCAL_ID functionality 7 | 8 | Version 0.23 (2012-03-30): 9 | - lua: Fix spank-lua(8) manpage reference to S_TASK_EXIT_CODE 10 | - lua: Remove explicit newlines from debug meessages. 11 | 12 | Version 0.22 (2012-02-18): 13 | - lua: support job_prolog and job_epilog callbacks 14 | - lua: support slurmd_init and slurmd_exit callbacks 15 | - lua: add support for spank_option_getopt (spank:getopt) 16 | 17 | Version 0.21 (2011-12-19): 18 | - use-env: Do not fail on missing HOME environment variable. 19 | 20 | Version 0.20 (2011-12-19): 21 | - lua: Fix erroneous failures in spank_init(). 22 | 23 | Version 0.19 (2011-12-19): 24 | - auto-affinity: Fix --auto-affinity=off 25 | - use-env: Fix segfault in remote options processing 26 | - lua: Do not fail on error loading/compiling lua scripts by default 27 | (Set 'failonerror' for old behavior. see spank-lua(8) man page) 28 | 29 | Version 0.18 (2011-10-11): 30 | - Disable build of cpuset plugin on CHAOS5 systems (LLNL-only). 31 | - lua: Add S_STEP_ALLOC_MEM and S_STEP_ALLOC_CORES for spank:get_item() 32 | - lua: Add multi-value return for S_TASK_EXIT_CODE (status, code, signal, core) 33 | 34 | Version 0.17 (2011-03-22): 35 | - Disable build of oom-detect plugin, which is now obsolete (LLNL-only). 36 | - Fix cpuset plugin build against slurm-2.3. 37 | 38 | Version 0.16 (2011-01-12): 39 | - Lua: Package a lua 'schedutils' module for access to sched_getaffinity(2) 40 | and sched_setaffinity(2) from lua spank plugins. 41 | 42 | Version 0.15 (2011-01-10): 43 | - auto-affinity: Fix bug in auto CPU affinity enable for case where 44 | multiples_only is not set. 45 | - Allow a repeat count for CPU masks in the masks= option. For example, 46 | 0xf0*4,0x0f*2 = 0xf0,0xf0,0xf0,0xf0,0x0f,0x0f. 47 | 48 | Version 0.14 (2010-10-28): 49 | - auto-affinity: Check for SLURM's -c, --cpus-per-task, and if set 50 | to a value greater than 1, use this for cpt,cpus_per_task. 51 | - auto-affinity: Unless the config option 'multiples_only' is set 52 | round cpus_per_task down to the next multiple, if the result is 53 | greater than 1. (E.g. 5 tasks on a 12 CPU node would use 54 | 2 CPUs per task, leaving 2 CPUs unbound.) 55 | 56 | Version 0.13 (2010-08-19): 57 | - auto-affinity: Add cpus= and masks= options to allow manual 58 | mapping of tasks to CPUs. 59 | 60 | Version 0.12 (2010-03-08): 61 | - Fix unchecked return from slurm_load_jobs(3) in cpuset code. 62 | 63 | Version 0.11 (2010-03-07): 64 | - Fix segfault in spank/lua due to lua stack overflow. 65 | - Fix use-env segv when exit hook called before init. 66 | - Fixes for spank/cpuset use with slurm-2.1. 67 | 68 | Version 0.10 (2009-12-21): 69 | - Fix segfault in spank/lua when no lua scripts are configured. 70 | 71 | Version 0.9 (2009-12-16): 72 | - Added lua plugin. 73 | - Fix preprocessor tests for SLURM-2.1.0 74 | 75 | Version 0.8 (2009-11-05): 76 | - Added setsched plugin (Mattheiu Hautreux) 77 | - Adapt existing plugins to new API in SLURM-2.1.0. 78 | 79 | Version 0.7 (2009-05-12): 80 | - Added statfs(1) after mount in private-mount plugin. 81 | 82 | Version 0.6 (2009-04-23): 83 | - Added private-mount plugin (llnl subpackage). 84 | 85 | Version 0.5 (2009-03-13): 86 | - auto-affinity: Don't assume any correspondence between CPU 87 | logical ID and CPU physical location (i.e. socket). This fixes 88 | poor CPU assignment on systems where CPUs are not necessarily 89 | sequentially numbered. 90 | - The preserve-env plugin is no longer packaged automatically 91 | with the slurm-spank-plugins RPM. This functionality is now 92 | available in SLURM directly as of SLURM 1.4. 93 | - Don't include oom-detect in the base slurm-spank-plugins RPM, 94 | it belongs in the llnl-specific RPM only. 95 | 96 | Version 0.4 (2008-10-09): 97 | - preserve-env: Fix slurmstepd crash when dealing with environment 98 | variables larger than 64B (a very common case). The plugin now 99 | uses dynamic instead of static buffers. 100 | 101 | Version 0.3 (2008-10-06): 102 | - Fix RPM build without cpuset and llnl subpackages. 103 | 104 | Version 0.2 (2008-10-06): 105 | - slurm-spank-plugins RPMs now obsolete chaos-spankings RPMs. 106 | For older NEWS items, see NEWS.old. 107 | 108 | Version 0.1 (2008-10-03): 109 | - First release as slurm-spank-plugins. 110 | - For older history, see NEWS.old. 111 | -------------------------------------------------------------------------------- /lib/fd.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * $Id: fd.h 412 2003-06-03 21:31:19Z achu $ 3 | ***************************************************************************** 4 | * This file is part of the Munge Uid 'N' Gid Emporium (MUNGE). 5 | * For details, see . 6 | * UCRL-CODE-2003-???. 7 | * 8 | * Copyright (C) 2001-2003 The Regents of the University of California. 9 | * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). 10 | * Written by Chris Dunlap . 11 | * 12 | * This is free software; you can redistribute it and/or modify it 13 | * under the terms of the GNU General Public License as published by 14 | * the Free Software Foundation; either version 2 of the License, or 15 | * (at your option) any later version. 16 | * 17 | * This is distributed in the hope that it will be useful, but WITHOUT 18 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 19 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 20 | * for more details. 21 | * 22 | * You should have received a copy of the GNU General Public License; 23 | * if not, write to the Free Software Foundation, Inc., 59 Temple Place, 24 | * Suite 330, Boston, MA 02111-1307 USA. 25 | *****************************************************************************/ 26 | 27 | 28 | #ifndef FD_H 29 | #define FD_H 30 | 31 | 32 | #if HAVE_CONFIG_H 33 | # include "config.h" 34 | #endif /* HAVE_CONFIG_H */ 35 | 36 | #include 37 | #include 38 | 39 | 40 | int fd_set_close_on_exec (int fd); 41 | /* 42 | * Sets the file descriptor [fd] to be closed on exec(). 43 | * Returns 0 on success, or -1 on error. 44 | */ 45 | 46 | int fd_set_nonblocking (int fd); 47 | /* 48 | * Sets the file descriptor [fd] for non-blocking I/O. 49 | * Returns 0 on success, or -1 on error. 50 | */ 51 | 52 | int fd_get_read_lock (int fd); 53 | /* 54 | * Obtain a read lock on the file specified by [fd]. 55 | * Returns 0 on success, or -1 if prevented from obtaining the lock. 56 | */ 57 | 58 | int fd_get_readw_lock (int fd); 59 | /* 60 | * Obtain a read lock on the file specified by [fd], 61 | * blocking until one becomes available. 62 | * Returns 0 on success, or -1 on error. 63 | */ 64 | 65 | int fd_get_write_lock (int fd); 66 | /* 67 | * Obtain a write lock on the file specified by [fd]. 68 | * Returns 0 on success, or -1 if prevented from obtaining the lock. 69 | */ 70 | 71 | int fd_get_writew_lock (int fd); 72 | /* 73 | * Obtain a write lock on the file specified by [fd], 74 | * blocking until one becomes available. 75 | * Returns 0 on success, or -1 on error. 76 | */ 77 | 78 | int fd_release_lock (int fd); 79 | /* 80 | * Release a lock held on the file specified by [fd]. 81 | * Returns 0 on success, or -1 on error. 82 | */ 83 | 84 | pid_t fd_is_read_lock_blocked (int fd); 85 | /* 86 | * Checks to see if a lock exists on [fd] that would block a request for a 87 | * read-lock (ie, if a write-lock is already being held on the file). 88 | * Returns the pid of the process holding the lock, 0 if no lock exists, 89 | * or -1 on error. 90 | */ 91 | 92 | pid_t fd_is_write_lock_blocked (int fd); 93 | /* 94 | * Checks to see if a lock exists on [fd] that would block a request for a 95 | * write-lock (ie, if any lock is already being held on the file). 96 | * Returns the pid of the process holding the lock, 0 if no lock exists, 97 | * or -1 on error. 98 | */ 99 | 100 | ssize_t fd_read_n (int fd, void *buf, size_t n); 101 | /* 102 | * Reads up to [n] bytes from [fd] into [buf]. 103 | * Returns the number of bytes read, 0 on EOF, or -1 on error. 104 | */ 105 | 106 | ssize_t fd_write_n (int fd, void *buf, size_t n); 107 | /* 108 | * Writes [n] bytes from [buf] to [fd]. 109 | * Returns the number of bytes written, or -1 on error. 110 | */ 111 | 112 | ssize_t fd_read_line (int fd, void *buf, size_t maxlen); 113 | /* 114 | * Reads at most [maxlen-1] bytes up to a newline from [fd] into [buf]. 115 | * The [buf] is guaranteed to be NUL-terminated and will contain the 116 | * newline if it is encountered within [maxlen-1] bytes. 117 | * Returns the number of bytes read, 0 on EOF, or -1 on error. 118 | */ 119 | 120 | ssize_t fd_null_read_n (int fd, void *buf, size_t maxlen); 121 | /* 122 | * Reads up to [n] bytes from [fd] into [buf]. 123 | * Returns the number of bytes read, 0 on EOF, or -1 on error. 124 | * Differs from fd_read_n() in that it checks for the presence 125 | * a null along the partial read and breaks out if it does. 126 | * Added by Mike Haskell 127 | */ 128 | 129 | #endif /* !FD_H */ 130 | -------------------------------------------------------------------------------- /lib/split.c: -------------------------------------------------------------------------------- 1 | /*****************************************************************************\ 2 | * $Id: split.c 1042 2006-03-30 20:55:59Z grondo $ 3 | ***************************************************************************** 4 | * Copyright (C) 2006 The Regents of the University of California. 5 | * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). 6 | * Written by Jim Garlick . 7 | * UCRL-CODE-2003-005. 8 | * 9 | * This file is part of Pdsh, a parallel remote shell program. 10 | * For details, see . 11 | * 12 | * Pdsh is free software; you can redistribute it and/or modify it under 13 | * the terms of the GNU General Public License as published by the Free 14 | * Software Foundation; either version 2 of the License, or (at your option) 15 | * any later version. 16 | * 17 | * Pdsh is distributed in the hope that it will be useful, but WITHOUT ANY 18 | * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 19 | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 20 | * details. 21 | * 22 | * You should have received a copy of the GNU General Public License along 23 | * with Pdsh; if not, write to the Free Software Foundation, Inc., 24 | * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. 25 | \*****************************************************************************/ 26 | 27 | #if HAVE_CONFIG_H 28 | # include 29 | #endif 30 | 31 | #include 32 | #include 33 | #include 34 | #include "split.h" 35 | 36 | /* 37 | * Helper function for list_split(). Extract tokens from str. 38 | * Return a pointer to the next token; at the same time, advance 39 | * *str to point to the next separator. 40 | * sep (IN) string containing list of separator characters 41 | * str (IN) double-pointer to string containing tokens and separators 42 | * RETURN next token 43 | */ 44 | static char *_next_tok(char *sep, char **str) 45 | { 46 | char *tok; 47 | 48 | /* push str past any leading separators */ 49 | while (**str != '\0' && strchr(sep, **str) != NULL) 50 | (*str)++; 51 | 52 | if (**str == '\0') 53 | return NULL; 54 | 55 | /* assign token pointer */ 56 | tok = *str; 57 | 58 | /* push str past token and leave pointing to first separator */ 59 | while (**str != '\0' && strchr(sep, **str) == NULL) 60 | (*str)++; 61 | 62 | /* nullify consecutive separators and push str beyond them */ 63 | while (**str != '\0' && strchr(sep, **str) != NULL) 64 | *(*str)++ = '\0'; 65 | 66 | return tok; 67 | } 68 | 69 | List list_split_append (List l, char *sep, char *str) 70 | { 71 | char *tok; 72 | 73 | if (sep == NULL) 74 | sep = " \t"; 75 | 76 | while ((tok = _next_tok(sep, &str)) != NULL) { 77 | if (strlen(tok) > 0) { 78 | char *s = strdup(tok); 79 | if (!s || !list_append(l, s)) 80 | goto fail; 81 | } 82 | } 83 | 84 | return l; 85 | 86 | fail: 87 | list_destroy(l); 88 | return NULL; 89 | } 90 | 91 | 92 | /* 93 | * Given a list of separators and a string, generate a list 94 | * sep (IN) string containing separater characters 95 | * str (IN) string containing tokens and separators 96 | * RETURN new list containing all tokens 97 | */ 98 | List list_split(char *sep, char *str) 99 | { 100 | List new = list_create((ListDelF) free); 101 | if (new == NULL) 102 | return NULL; 103 | return list_split_append(new, sep, str); 104 | } 105 | 106 | int list_join (char *result, size_t len, const char *sep, List l) 107 | { 108 | char *str = NULL; 109 | int n = 0; 110 | int truncated = 0; 111 | ListIterator i; 112 | 113 | memset (result, 0, len); 114 | 115 | if (list_count(l) == 0) 116 | return (0); 117 | 118 | i = list_iterator_create(l); 119 | while ((str = list_next(i))) { 120 | int count; 121 | 122 | if (!truncated) { 123 | count = snprintf(result + n, len - n, "%s%s", str, sep); 124 | 125 | if ((count >= (len - n)) || (count < 0)) 126 | truncated = 1; 127 | else 128 | n += count; 129 | } 130 | else 131 | n += strlen (str) + strlen (sep); 132 | } 133 | list_iterator_destroy(i); 134 | 135 | if (truncated) 136 | result [len - 1] = '\0'; 137 | else { 138 | /* 139 | * Delete final separator 140 | */ 141 | result[strlen(result) - strlen(sep)] = '\0'; 142 | } 143 | 144 | return (n); 145 | } 146 | 147 | /* vi: ts=4 sw=4 expandtab 148 | */ 149 | 150 | -------------------------------------------------------------------------------- /renice.c: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #include 33 | 34 | /* 35 | * All spank plugins must define this macro for the SLURM plugin loader. 36 | */ 37 | SPANK_PLUGIN(renice, 1) 38 | 39 | #define PRIO_ENV_VAR "SLURM_RENICE" 40 | #define PRIO_NOT_SET 42 41 | 42 | /* 43 | * Minimum allowable value for priority. May be set globally 44 | * via plugin option min_prio= 45 | */ 46 | static int min_prio = -20; 47 | static int default_prio = 0; 48 | 49 | static int prio = PRIO_NOT_SET; 50 | 51 | static int _renice_opt_process (int val, const char *optarg, int remote); 52 | static int _str2prio (const char *str, int *p2int); 53 | static int _check_env (spank_t sp); 54 | 55 | /* 56 | * Provide a --renice=[prio] option to srun: 57 | */ 58 | struct spank_option spank_options[] = 59 | { 60 | { "renice", "[prio]", "Re-nice job tasks to priority [prio].", 1, 0, 61 | (spank_opt_cb_f) _renice_opt_process 62 | }, 63 | SPANK_OPTIONS_TABLE_END 64 | }; 65 | 66 | 67 | /* 68 | * Called from both srun and slurmd. 69 | */ 70 | int slurm_spank_init (spank_t sp, int ac, char **av) 71 | { 72 | int i; 73 | 74 | for (i = 0; i < ac; i++) { 75 | if (strncmp ("min_prio=", av[i], 9) == 0) { 76 | const char *optarg = av[i] + 9; 77 | if (_str2prio (optarg, &min_prio) < 0) 78 | slurm_error ("Ignoring invalid min_prio value \"%s\"", av[i]); 79 | } 80 | else if (strncmp ("default=", av[i], 8) == 0) { 81 | const char *optarg = av[i] + 8; 82 | if (_str2prio (optarg, &default_prio) < 0) 83 | slurm_error ("renice: Ignoring invalid default value \"%s\"", 84 | av[i]); 85 | } 86 | else { 87 | slurm_error ("renice: Invalid option \"%s\"", av[i]); 88 | } 89 | } 90 | 91 | if (!spank_remote (sp)) 92 | slurm_verbose ("renice: min_prio = %d", min_prio); 93 | 94 | return (0); 95 | } 96 | 97 | 98 | int slurm_spank_task_post_fork (spank_t sp, int ac, char **av) 99 | { 100 | pid_t pid; 101 | int taskid; 102 | 103 | /* 104 | * Use default priority if prio not set by command line or env var 105 | */ 106 | if ((prio == PRIO_NOT_SET) && (_check_env (sp) < 0)) 107 | prio = default_prio; 108 | 109 | if (prio < min_prio) 110 | prio = min_prio; 111 | 112 | spank_get_item (sp, S_TASK_GLOBAL_ID, &taskid); 113 | spank_get_item (sp, S_TASK_PID, &pid); 114 | 115 | /* 116 | * No need to do any thing if priority is system default 117 | */ 118 | if (prio == getpriority (PRIO_PROCESS, (int) pid)) 119 | return (0); 120 | 121 | slurm_verbose ("re-nicing task%d pid %ld to %d\n", taskid, pid, prio); 122 | 123 | if (setpriority (PRIO_PROCESS, (int) pid, (int) prio) < 0) { 124 | slurm_error ("setpriority: %m"); 125 | return (-1); 126 | } 127 | 128 | return (0); 129 | } 130 | 131 | static int _renice_opt_process (int val, const char *optarg, int remote) 132 | { 133 | if (optarg == NULL) { 134 | slurm_error ("--renice: invalid argument!"); 135 | return (-1); 136 | } 137 | 138 | if (_str2prio (optarg, &prio) < 0) { 139 | slurm_error ("Bad value for --renice: \"%s\"\n", optarg); 140 | return (-1); 141 | } 142 | 143 | if (prio < min_prio) 144 | slurm_error ("--renice=%d not allowed, will use min=%d", 145 | prio, min_prio); 146 | 147 | return (0); 148 | } 149 | 150 | static int _str2prio (const char *str, int *p2int) 151 | { 152 | long int l; 153 | char *p; 154 | 155 | l = strtol (str, &p, 10); 156 | if ((*p != '\0') || (l < -20) || (l > 20)) 157 | return (-1); 158 | 159 | *p2int = (int) l; 160 | 161 | return (0); 162 | } 163 | 164 | static int _check_env (spank_t sp) 165 | { 166 | /* 167 | * See if SLURM_RENICE env var is set by user 168 | */ 169 | char val [1024]; 170 | 171 | if (spank_getenv (sp, PRIO_ENV_VAR, val, 1024) != ESPANK_SUCCESS) 172 | return (-1); 173 | 174 | if (_str2prio (val, &prio) < 0) { 175 | slurm_error ("Bad value for %s: \"%s\".\n", PRIO_ENV_VAR, val); 176 | return (-1); 177 | } 178 | 179 | if (prio < min_prio) { 180 | slurm_error ("%s=%d not allowed, using min=%d", 181 | PRIO_ENV_VAR, prio, min_prio); 182 | } 183 | 184 | return (0); 185 | } 186 | 187 | 188 | /* 189 | * vi: ts=4 sw=4 expandtab 190 | */ 191 | -------------------------------------------------------------------------------- /cpuset/log.c: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include "list.h" 31 | #include "log.h" 32 | 33 | static char facility [64] = "cpuset"; 34 | 35 | struct logger { 36 | int level; 37 | log_f *logfn; 38 | }; 39 | 40 | static List log_list = NULL; 41 | 42 | static struct logger * logger_create (int level, log_f *fn) 43 | { 44 | struct logger *l = malloc (sizeof (*l)); 45 | 46 | if (l != NULL) { 47 | l->level = level; 48 | l->logfn = fn; 49 | } 50 | 51 | return (l); 52 | } 53 | 54 | void logger_destroy (struct logger *l) 55 | { 56 | free (l); 57 | } 58 | 59 | int log_add_dest (int level, log_f *fn) 60 | { 61 | struct logger *l; 62 | 63 | if (log_list == NULL) { 64 | log_list = list_create ((ListDelF) logger_destroy); 65 | } 66 | 67 | if ((l = logger_create (level, fn)) == NULL) 68 | return (-1); 69 | 70 | list_push (log_list, l); 71 | return (0); 72 | } 73 | 74 | int log_set_prefix (const char *prefix) 75 | { 76 | strncpy (facility, prefix, sizeof (facility)); 77 | return (0); 78 | } 79 | 80 | int find_fn (struct logger *l, log_f *fn) 81 | { 82 | return (l->logfn == fn); 83 | } 84 | 85 | int log_update (int level, log_f *fn) 86 | { 87 | struct logger *l = list_find_first (log_list, (ListFindF) find_fn, fn); 88 | 89 | if (l == NULL) 90 | return (-1); 91 | 92 | l->level = level; 93 | return (0); 94 | } 95 | 96 | 97 | void log_cleanup () 98 | { 99 | list_destroy (log_list); 100 | } 101 | 102 | static int do_log_all (int level, const char *buf) 103 | { 104 | struct logger *l; 105 | ListIterator i = list_iterator_create (log_list); 106 | 107 | while ((l = list_next (i))) { 108 | if (l->level >= level) 109 | (*l->logfn) (buf); 110 | } 111 | 112 | list_iterator_destroy (i); 113 | return (0); 114 | } 115 | 116 | static void vlog_msg (const char *prefix, int level, const char *format, va_list ap) 117 | { 118 | char buf[4096]; 119 | char *p; 120 | int n; 121 | int len; 122 | 123 | if (!log_list) 124 | return; 125 | 126 | p = buf; 127 | len = sizeof (buf); 128 | 129 | if (strlen (facility)) { 130 | n = snprintf (p, len, "%s: ", facility); 131 | if ((n < 0) || (n >= len)) { 132 | p += len - 1; 133 | len = 0; 134 | } 135 | else { 136 | p += n; 137 | len -= n; 138 | } 139 | } 140 | 141 | /* Add a log level prefix. 142 | */ 143 | if ((len > 0) && prefix) { 144 | n = snprintf (p, len, "%s: ", prefix); 145 | if ((n < 0) || (n >= len)) { 146 | p += len - 1; 147 | len = 0; 148 | } 149 | else { 150 | p += n; 151 | len -= n; 152 | } 153 | } 154 | 155 | if ((len > 0) && (format)) { 156 | n = vsnprintf (p, len, format, ap); 157 | if ((n < 0) || (n >= len)) { 158 | p += len - 1; 159 | len = 0; 160 | } 161 | else { 162 | p += n; 163 | len -= n; 164 | } 165 | } 166 | 167 | /* Add suffix for truncation if necessary. 168 | */ 169 | if (len <= 0) { 170 | char *q; 171 | const char *suffix = "+"; 172 | q = buf + sizeof (buf) - 1 - strlen (suffix); 173 | p = (p < q) ? p : q; 174 | strcpy (p, suffix); 175 | p += strlen (suffix); 176 | } 177 | 178 | *p = '\0'; 179 | 180 | do_log_all (level, buf); 181 | 182 | return; 183 | } 184 | 185 | int log_err (const char *format, ...) 186 | { 187 | va_list ap; 188 | va_start (ap, format); 189 | vlog_msg ("Error", -1, format, ap); 190 | va_end (ap); 191 | return (-1); /* So we can do return (log_err (...)) */ 192 | } 193 | 194 | void log_msg (const char *format, ...) 195 | { 196 | va_list ap; 197 | va_start (ap, format); 198 | vlog_msg (NULL, 0, format, ap); 199 | va_end (ap); 200 | return; 201 | } 202 | 203 | void log_verbose (const char *format, ...) 204 | { 205 | va_list ap; 206 | va_start (ap, format); 207 | vlog_msg (NULL, 1, format, ap); 208 | va_end (ap); 209 | return; 210 | } 211 | 212 | void log_debug (const char *format, ...) 213 | { 214 | va_list ap; 215 | va_start (ap, format); 216 | vlog_msg ("Debug", 2, format, ap); 217 | va_end (ap); 218 | return; 219 | } 220 | 221 | void log_debug2 (const char *format, ...) 222 | { 223 | va_list ap; 224 | va_start (ap, format); 225 | vlog_msg ("Debug", 3, format, ap); 226 | va_end (ap); 227 | return; 228 | } 229 | 230 | /* 231 | * vi: ts=4 sw=4 expandtab 232 | */ 233 | -------------------------------------------------------------------------------- /use-env/log_msg.c: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | #include "use-env.h" 32 | 33 | struct log_ctx { 34 | int quiet; 35 | int verbose; 36 | char *prefix; 37 | }; 38 | 39 | static struct log_ctx log_ctx = { 0, 0, NULL }; 40 | 41 | int log_msg_init (const char *prefix) 42 | { 43 | if (prefix) 44 | log_ctx.prefix = strdup (prefix); 45 | return (0); 46 | } 47 | 48 | void log_msg_fini () 49 | { 50 | if (log_ctx.prefix) 51 | free (log_ctx.prefix); 52 | } 53 | 54 | int log_msg_verbose () 55 | { 56 | return (log_ctx.verbose++); 57 | } 58 | 59 | int log_msg_set_verbose (int level) 60 | { 61 | return (log_ctx.verbose = level); 62 | } 63 | 64 | int log_msg_quiet () 65 | { 66 | return (log_ctx.quiet++); 67 | } 68 | 69 | 70 | static void 71 | vlog_msg (const char *prefix, int use_basename, const char *format, va_list ap) 72 | { 73 | char buf[4096]; 74 | char *p; 75 | int n; 76 | int len; 77 | 78 | p = buf; 79 | len = sizeof (buf); 80 | 81 | /* Prefix output with facility name. 82 | */ 83 | if (log_ctx.prefix && (*log_ctx.prefix != '\0')) { 84 | n = snprintf (buf, len, "%s: ", log_ctx.prefix); 85 | if ((n < 0) || (n >= len)) { 86 | p += len - 1; 87 | len = 0; 88 | } 89 | else { 90 | p += n; 91 | len -= n; 92 | } 93 | } 94 | 95 | /* Add a log level prefix. 96 | */ 97 | if ((len > 0) && (prefix)) { 98 | n = snprintf (p, len, "%s: ", prefix); 99 | if ((n < 0) || (n >= len)) { 100 | p += len - 1; 101 | len = 0; 102 | } 103 | else { 104 | p += n; 105 | len -= n; 106 | } 107 | } 108 | 109 | /* Add file and line number information 110 | */ 111 | if (len > 0 && (lex_file () != NULL)) { 112 | char *file = strdup (lex_file ()); 113 | char *name = use_basename ? basename (file) : file; 114 | 115 | n = snprintf (p, len, "%s: %d: ", name, lex_line()); 116 | 117 | if ((n < 0) || (n >= len)) { 118 | p += len - 1; 119 | len = 0; 120 | } 121 | else { 122 | p += n; 123 | len -= n; 124 | } 125 | free (file); 126 | } 127 | 128 | if ((len > 0) && (format)) { 129 | n = vsnprintf (p, len, format, ap); 130 | if ((n < 0) || (n >= len)) { 131 | p += len - 1; 132 | len = 0; 133 | } 134 | else { 135 | p += n; 136 | len -= n; 137 | } 138 | } 139 | 140 | /* Add suffix for truncation if necessary. 141 | */ 142 | if (len <= 0) { 143 | char *q; 144 | const char *suffix = "+"; 145 | q = buf + sizeof (buf) - 1 - strlen (suffix); 146 | p = (p < q) ? p : q; 147 | strcpy (p, suffix); 148 | p += strlen (suffix); 149 | } 150 | 151 | *p = '\0'; 152 | 153 | fprintf (stderr, "%s", buf); 154 | 155 | return; 156 | } 157 | 158 | 159 | int log_err (const char *format, ...) 160 | { 161 | va_list ap; 162 | 163 | if (log_ctx.quiet) 164 | return (-1); 165 | 166 | va_start (ap, format); 167 | vlog_msg ("Error", 0, format, ap); 168 | va_end (ap); 169 | return (-1); 170 | } 171 | 172 | void log_msg (const char *format, ...) 173 | { 174 | va_list ap; 175 | 176 | if (log_ctx.quiet) 177 | return; 178 | 179 | va_start (ap, format); 180 | vlog_msg (NULL, 1, format, ap); 181 | va_end (ap); 182 | return; 183 | } 184 | 185 | void log_verbose (const char *format, ...) 186 | { 187 | va_list ap; 188 | 189 | if (log_ctx.quiet || !log_ctx.verbose) 190 | return; 191 | 192 | va_start (ap, format); 193 | vlog_msg (NULL, 1, format, ap); 194 | va_end (ap); 195 | return; 196 | } 197 | 198 | void log_debug (const char *format, ...) 199 | { 200 | va_list ap; 201 | 202 | if ((log_ctx.quiet) || (log_ctx.verbose < 2)) 203 | return; 204 | 205 | va_start (ap, format); 206 | vlog_msg (NULL, 1, format, ap); 207 | va_end (ap); 208 | return; 209 | } 210 | 211 | void log_debug2 (const char *format, ...) 212 | { 213 | va_list ap; 214 | 215 | if ((log_ctx.quiet) || (log_ctx.verbose < 3)) 216 | return; 217 | 218 | va_start (ap, format); 219 | vlog_msg (NULL, 1, format, ap); 220 | va_end (ap); 221 | return; 222 | } 223 | 224 | void log_debug3 (const char *format, ...) 225 | { 226 | va_list ap; 227 | 228 | if ((log_ctx.quiet) || (log_ctx.verbose < 4)) 229 | return; 230 | 231 | va_start (ap, format); 232 | vlog_msg (NULL, 1, format, ap); 233 | va_end (ap); 234 | return; 235 | } 236 | 237 | 238 | 239 | /* 240 | * vi: ts=4 sw=4 expandtab 241 | */ 242 | -------------------------------------------------------------------------------- /cpuset/conf-parser.y: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | 26 | %{ 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #include "conf.h" 33 | #include "log.h" 34 | 35 | extern int yylex (); 36 | void yyerror (const char *s); 37 | extern FILE *yyin; 38 | 39 | static int cpuset_conf_line; 40 | 41 | #define YYSTYPE char * 42 | #define YYDEBUG 1 43 | int yydebug = 0; 44 | 45 | static int cf_policy (const char *); 46 | static int cf_use_idle (const char *); 47 | static int cf_order (const char *); 48 | static int cf_const_mem (int); 49 | static int cf_kill_orphs (int); 50 | 51 | %} 52 | 53 | %token POLICY "policy" 54 | %token USE_IDLE "use-idle" 55 | %token CONST_MEM "constrain-mem" 56 | %token KILL_ORPHS "kill-orphs" 57 | %token ORDER "order" 58 | %token TRUE "true" 59 | %token FALSE "false" 60 | %token STRING "string" 61 | 62 | %error-verbose 63 | 64 | %% 65 | 66 | file : /* empty */ 67 | | file stmts 68 | ; 69 | 70 | stmts : end 71 | | stmt end 72 | | stmts stmt 73 | ; 74 | 75 | stmt : POLICY '=' STRING { if (cf_policy ($3) < 0) YYABORT; } 76 | | USE_IDLE '=' STRING { if (cf_use_idle ($3) < 0) YYABORT; } 77 | | USE_IDLE '=' FALSE { if (cf_use_idle ("no") < 0) YYABORT; } 78 | | USE_IDLE '=' TRUE { if (cf_use_idle ("yes") < 0) YYABORT; } 79 | | CONST_MEM '=' TRUE { if (cf_const_mem (1) < 0) YYABORT; } 80 | | CONST_MEM '=' FALSE { if (cf_const_mem (0) < 0) YYABORT; } 81 | | KILL_ORPHS '=' TRUE { if (cf_kill_orphs (1) < 0) YYABORT; } 82 | | KILL_ORPHS '=' FALSE { if (cf_kill_orphs (0) < 0) YYABORT; } 83 | | ORDER '=' STRING { if (cf_order ($3) < 0) YYABORT; } 84 | 85 | end : '\n' { cpuset_conf_line++; } 86 | | ';' 87 | ; 88 | 89 | %% 90 | 91 | static cpuset_conf_t conf; 92 | static const char * cpuset_conf_filename = NULL; 93 | 94 | void cpuset_conf_debug () 95 | { 96 | yydebug = 1; 97 | } 98 | 99 | static const char * cf_file () 100 | { 101 | if (!cpuset_conf_filename) 102 | return ("stdin"); 103 | return (cpuset_conf_filename); 104 | } 105 | 106 | static int cf_line () 107 | { 108 | return (cpuset_conf_line); 109 | } 110 | 111 | void yyerror (const char *s) 112 | { 113 | log_err ("%s: %d: %s\n", cf_file (), cf_line (), s); 114 | } 115 | 116 | int cpuset_conf_parse (cpuset_conf_t cf, const char *path) 117 | { 118 | cpuset_conf_filename = NULL; 119 | 120 | cpuset_conf_set_file (cf, path); 121 | 122 | if (strcmp (path, "-") == 0) 123 | yyin = stdin; 124 | else if (!(yyin = fopen (path, "r"))) { 125 | int err = errno; 126 | log_err ("open: %s: %s\n", path, strerror (errno)); 127 | errno = err; 128 | return (-1); 129 | } 130 | 131 | cpuset_conf_filename = path; 132 | cpuset_conf_line = 1; 133 | conf = cf; 134 | 135 | log_debug ("reading config from \"%s\"\n", cf_file ()); 136 | 137 | if (yyparse ()) { 138 | log_err ("%s: %d: parser failed\n", cf_file (), cf_line ()); 139 | errno = 0; 140 | return (-1); 141 | } 142 | 143 | fclose (yyin); 144 | 145 | return (0); 146 | } 147 | 148 | static int cf_policy (const char *name) 149 | { 150 | log_debug ("%s: %d: Setting allocation policy to %s.\n", 151 | cf_file (), cf_line(), name); 152 | if (cpuset_conf_set_policy_string (conf, name) < 0) 153 | return log_err ("%s: %d: Invalid allocation policy '%s'.\n", 154 | cf_file (), cf_line (), name); 155 | return (0); 156 | } 157 | 158 | static int cf_use_idle (const char *s) 159 | { 160 | log_debug ("%s: %d: Setting idle node use policy to %s.\n", 161 | cf_file (), cf_line(), s); 162 | if (cpuset_conf_set_alloc_idle_string (conf, s) < 0) 163 | return log_err ("%s: %d: Invalid alloc-idle string '%s'\n", 164 | cf_file (), cf_line (), s); 165 | return (0); 166 | } 167 | 168 | static int cf_order (const char *s) 169 | { 170 | log_debug ("%s: %d: Setting order to %s.\n", 171 | cf_file (), cf_line (), s); 172 | 173 | if (strcasecmp (s, "reverse") == 0) 174 | return cpuset_conf_set_order (conf, 1); 175 | else if (strcasecmp (s, "normal") == 0) 176 | return cpuset_conf_set_order (conf, 0); 177 | 178 | return log_err ("%s: %d: Invalid setting for order: %s\n", 179 | cf_file (), cf_line (), s); 180 | } 181 | 182 | static int cf_const_mem (int val) 183 | { 184 | log_debug ("%s: %d: Setting constrain-memsto %s.\n", 185 | cf_file (), cf_line(), val ? "true" : "false"); 186 | return (cpuset_conf_set_constrain_mem (conf, val)); 187 | } 188 | 189 | static int cf_kill_orphs (int val) 190 | { 191 | log_debug ("%s: %d: Setting kill-orphans to %s.\n", 192 | cf_file (), cf_line(), val ? "true" : "false"); 193 | return (cpuset_conf_set_kill_orphans (conf, val)); 194 | } 195 | 196 | /* 197 | * vi: ts=4 sw=4 expandtab 198 | */ 199 | 200 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | SLURM spank plugins README 2 | ================================== 3 | 4 | This package includes several SLURM spank plugins developed 5 | at LLNL and used on production compute clusters onsite. A few 6 | of these plugins are only valid when used on LLNL's software 7 | stack (oom-detect.so, for example, requires LLNL-specific patches 8 | to track job's terminated by the OOM killer). However, the 9 | source for all plugins is provided here in the hope that they 10 | might be useful to other plugin developers. The following 11 | is a short description of most of the plugins in this package. 12 | 13 | addr-no-randomize 14 | ----------------- 15 | 16 | The addr-no-randomize plugin allows sysadmins to set a default 17 | policy for address space randomization (when supported and 18 | enabled in the Linux kernel), and provides an option for users 19 | to enable/disable randomization on a per-job basis. 20 | 21 | auto-affinity 22 | ----------------- 23 | 24 | Automatically assign CPU affinity using best-guess defaults. 25 | 26 | The default behavior of this plugin attempts to accomodate 27 | multi-threaded apps by assigning more than one CPU per task 28 | if the number of tasks running on the node is evenly divisible 29 | into the number of CPUs. Otherwise, CPU affinity is not enabled 30 | unless the cpus_per_task (cpt) option is specified. The default 31 | behavior may be modified using the --auto-affinity options 32 | listed below. Also, the srun(1) --cpu_bind option is processed 33 | after auto-affinity, and thus may be used to override any CPU 34 | affinity settings from this module. 35 | 36 | This plugin should not be used alone on systems using node 37 | sharing. In that case, it should be used along with 38 | the cpuset plugin below (and auto-affinity.so should be listed 39 | *after* cpuset.so in the plugstack.conf). 40 | 41 | cpuset 42 | ----------------- 43 | 44 | The cpuset plugin uses Linux cpusets to constrain jobs to the 45 | number of CPUs they have been allocated on nodes. The plugin 46 | is specifically designed for sytems sharing nodes and using CPU 47 | scheduling (i.e. using the select/cons_res plugin). The plugin 48 | will not work on systems where CPUs are oversubscribed to jobs 49 | (i.e. strict node sharing without the use of select/cons_res). 50 | 51 | The plugin also has a pam_slurm_cpuset counterpart, which 52 | replaces pam_slurm and serves an identical functionality, 53 | except that user login sessions are constrained to their 54 | currently allocated CPUs on a node. 55 | 56 | The cpuset plugin requires the SGI libbitmask and libcpuset 57 | libraries available from 58 | 59 | http://oss.sgi.com/projects/cpusets 60 | 61 | (See also cpuset/README) 62 | 63 | iorelay 64 | ----------------- 65 | 66 | The iorelay plugin is an experimental proof-of-concept plugin 67 | for remounting required filesystems for a parallel job from 68 | the first allocated node to all others. It is meant to reduce 69 | the load on global NFS servers. 70 | 71 | It has not been used in production. 72 | 73 | 74 | iotrace 75 | ----------------- 76 | 77 | The iotrace plugin is another experimental plugin which 78 | uses "plasticfs" to log filesystem access on a per-job 79 | basis. 80 | 81 | 82 | oom-detect 83 | ----------------- 84 | 85 | The oom-detect plugin detects jobs that have been victims 86 | of the OOM killer using some special code added to the LLNL 87 | Linux kernel. As tasks exit after having been killed by 88 | the OOM killer, a message is printed to the user's stderr 89 | along with some memory information about the task. 90 | 91 | overcommit-memory 92 | ----------------- 93 | 94 | The overcommit-memory plugin is an attempt to allow users 95 | to tune global overcommit behavior of the Linux kernel on 96 | a per-job basis. It is currently buggy and thus not used. 97 | 98 | preserve-env 99 | ----------------- 100 | 101 | The preserve-env plugin adds an srun option 102 | 103 | --preserve-slurm-env 104 | 105 | which attempts to preserve the current state of all SLURM_* 106 | environment variables in the remotely executed environment. This 107 | is meant solely to be used from an allocation shell with 108 | the syntax 109 | 110 | srun -n1 -N1 --pty --preserve-slurm-env $SHELL 111 | 112 | as a sort of "remote" allocation shell. 113 | 114 | pty 115 | ----------------- 116 | 117 | The pty plugin provides the SLURM --pty option, introduced 118 | in slurm-1.3, for slurm-1.2. It isn't fully functional at this 119 | point, but is a good example of a complex feature added solely 120 | from a spank plugin. 121 | 122 | 123 | renice 124 | ----------------- 125 | 126 | The renice plugin is the same as the example code in the 127 | spank(8) man page. It provides a new srun option "--renice=VALUE" 128 | which allows users to set the nice value of their remote 129 | tasks (down to a minimum value configured by sysadmin). 130 | 131 | system-safe 132 | ------------------ 133 | 134 | The system-safe plugin provides an MPI-safe system(3) 135 | replacement through an LD_PRELOAD library (most of the work 136 | is done in system-safe-preload.c). The preloaded library 137 | interposes a version of system(3) that does not fork. Instead, 138 | the command line is passed through a pipe to a copy of the 139 | program which was pre-forked before MPI_Init(). The return 140 | value of the real system() call is passed back through the 141 | pipe and returned to the calling application, for which there 142 | is no noticable difference with the real system(3). 143 | 144 | use-env 145 | ------------------ 146 | 147 | The use-env plugin allows system administrators and users to 148 | modify the environment of SLURM jobs using a set of simple 149 | yet very flexible config files. Environment variables can 150 | be overridden, set only if unset, set based on conditional 151 | syntax, and even defined in a per-task context. The config 152 | files have access to key slurm variables such as SLURM_NNODES, 153 | SLURM_NPROCS, etc., so variables can even be defined differently 154 | depending of the size of the job. 155 | 156 | See README.use-env for further information. 157 | 158 | setsched 159 | ------------------ 160 | 161 | The setsched plugin allows system administrators to configure a 162 | particular kernel scheduling policy that can be applied to tasks 163 | spawned by slurmstepd. The policy can be used by default or not. 164 | In all cases, users can enable/disable as soon as it is described 165 | in the configuration, using --setsched=[yes|no|auto]. 166 | 167 | setsched spank module configuration looks like the following 168 | (default parameters used, policies are configured using numerical 169 | values) : 170 | 171 | optional setsched.so policy=1 priority=10 default=disabled 172 | -------------------------------------------------------------------------------- /overcommit-memory/util.c: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #include "overcommit.h" 31 | 32 | char *prog = NULL; 33 | 34 | static int cleanup = 0; 35 | static int list_users = 0; 36 | static int force_reset = 0; 37 | static int jobid = -1; 38 | 39 | #define __GNU_SOURCE 40 | #include 41 | 42 | struct option opt_table [] = { 43 | { "help", 0, NULL, 'h' }, 44 | { "cleanup", 0, NULL, 'c' }, 45 | { "list-users", 0, NULL, 'l' }, 46 | { "force-reset", 0, NULL, 'f' }, 47 | { "jobid", 1, NULL, 'j' }, 48 | { NULL, 0, NULL, 0 } 49 | }; 50 | 51 | const char opt_string[] = "hclfj:"; 52 | 53 | #define USAGE "\ 54 | Usage: %s [OPTONS]\n\ 55 | -h, --help Display this message\n\ 56 | -l, --list-users List current jobs using overcommit-memory plugin.\n\ 57 | -c, --cleanup Cleanup any overcommit-memory usage by a SLURM job.\n\ 58 | SLURM_JOBID and SLURM_STEPID should be set in current\n\ 59 | environment. Removes shared memory file and resets\n\ 60 | overcommit_memory to default if no more references\n\ 61 | to overcommit-memory exist.\n\ 62 | -f, --force-reset Force total cleanup of overcommit-memory state. Reset\n\ 63 | overcommit_memory setting to default and remove\n\ 64 | overcommit shared file.\n\ 65 | -j, --jobid=ID Specify SLURM jobid to clean up after if SLURM_JOBID\n\ 66 | not set in environment\n" 67 | 68 | static int get_env_int (const char *var); 69 | static int str2int (const char *str); 70 | static int parse_cmdline (int ac, char **av); 71 | static void log_fatal (char *fmt, ...); 72 | 73 | int main (int ac, char *av[]) 74 | { 75 | int stepid = -1; 76 | 77 | parse_cmdline (ac, av); 78 | 79 | if (jobid < 0) 80 | jobid = get_env_int ("SLURM_JOBID"); 81 | if (stepid < 0) 82 | stepid = get_env_int ("SLURM_STEPID"); 83 | 84 | if (cleanup && jobid < 0) 85 | log_fatal ("--cleanup requires SLURM_JOBID in environment\n"); 86 | 87 | if (!cleanup && !list_users && !force_reset) 88 | log_fatal ("Specify one of --cleanup, --force-reset, or --list-users.\n"); 89 | 90 | if (list_users) 91 | overcommit_shared_list_users (); 92 | 93 | if (force_reset) { 94 | if (overcommit_force_cleanup () < 0) 95 | return (1); 96 | printf ("Successfuly reset overcommit-memory state\n"); 97 | } 98 | else if (cleanup) { 99 | /* 100 | * If overcommit_shared_cleanup returns < 0, this probably just 101 | * means that the jobid.stepid is not in the shared memory state. 102 | */ 103 | if (overcommit_shared_cleanup (jobid, stepid) < 0) 104 | printf ("No overcommit state for job %d\n", jobid); 105 | else 106 | printf ("Succesfully cleaned up overcommit state for job %d\n", 107 | jobid); 108 | } 109 | 110 | return (0); 111 | } 112 | 113 | static void usage (const char *prog) 114 | { 115 | fprintf (stderr, USAGE, prog); 116 | } 117 | 118 | static int parse_cmdline (int ac, char **av) 119 | { 120 | prog = basename (av[0]); 121 | 122 | for (;;) { 123 | char c = getopt_long (ac, av, opt_string, opt_table, NULL); 124 | 125 | if (c == -1) 126 | break; 127 | 128 | switch (c) { 129 | case 'h': 130 | usage (prog); 131 | exit (0); 132 | case 'c': 133 | cleanup = 1; 134 | break; 135 | case 'l': 136 | list_users = 1; 137 | break; 138 | case 'f': 139 | force_reset = 1; 140 | break; 141 | case 'j': 142 | if ((jobid = str2int (optarg)) < 0) 143 | log_fatal ("Invalid argument: --jobid=%s\n", optarg); 144 | break; 145 | case '?': 146 | if (optopt > 0) 147 | fprintf (stderr, "%s: Invalid option \"-%c\"\n", 148 | prog, optopt); 149 | else 150 | fprintf (stderr, "%s: Invalid option \"%s\"\n", 151 | prog, av[optind-1]); 152 | break; 153 | default: 154 | fprintf (stderr, "%s: Unimplemented option \"%s\"\n", 155 | prog, av[optind-1]); 156 | break; 157 | } 158 | } 159 | 160 | return (0); 161 | } 162 | 163 | static void log_fatal (char *fmt, ...) 164 | { 165 | va_list ap; 166 | va_start (ap, fmt); 167 | fprintf (stderr, "%s: ", prog); 168 | vfprintf (stderr, fmt, ap); 169 | va_end (ap); 170 | exit (1); 171 | } 172 | 173 | static int str2int (const char *str) 174 | { 175 | char *p; 176 | long l = strtol (str, &p, 10); 177 | 178 | if (p && (*p != '\0')) 179 | return (-1); 180 | 181 | return ((int) l); 182 | } 183 | 184 | static int get_env_int (const char *var) 185 | { 186 | char *val; 187 | int id; 188 | 189 | if (!(val = getenv (var))) 190 | return (-1); 191 | 192 | if ((id = str2int (val)) < 0) 193 | log_fatal ("Bad environment value: %s=%s\n", var, val); 194 | 195 | return (id); 196 | } 197 | 198 | 199 | /* 200 | * vi: ts=4 sw=4 expandtab 201 | */ 202 | -------------------------------------------------------------------------------- /setsched.c: -------------------------------------------------------------------------------- 1 | /***************************************************************************\ 2 | * setsched.c - Spank Plugin to enforce a particular kernel scheduling policy 3 | *************************************************************************** 4 | * 5 | * Copyright CEA/DAM/DIF (2009) 6 | * 7 | * Written by Matthieu Hautreux 8 | * 9 | * This file is part of slurm-spank-plugins, a set of spank plugins 10 | * for SLURM. 11 | * 12 | * This is free software; you can redistribute it and/or modify it 13 | * under the terms of the GNU General Public License as published by 14 | * the Free Software Foundation; either version 2 of the License, or 15 | * (at your option) any later version. 16 | * 17 | * This is distributed in the hope that it will be useful, but WITHOUT 18 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 19 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 20 | * for more details. 21 | * 22 | * You should have received a copy of the GNU General Public License 23 | * along with this program. If not, see . 24 | * 25 | \***************************************************************************/ 26 | 27 | /* 28 | * To compile it : gcc -fPIC -shared -o setsched.so setsched.c 29 | * 30 | * This plugin can be used to enforce a particular scheduling policy 31 | * as well as the associated priority of tasks spawned by slurm. 32 | * 33 | * The following configuration parameters are available on server side : 34 | * 35 | * policy : set the kernel scheduling policy to use (default is 0) 36 | * priority : set the priority to configure with the policy (default is 0) 37 | * default : set setsched plugin default behavior i.e. enabled/disabled 38 | * 39 | * Users can alter the enabled/disabled behavior on command line using 40 | * --setsched 41 | * 42 | * setsched can be used only if at least one of the policy or priority 43 | * parameters are set to a non-zero value 44 | * 45 | * Here is an example of configuration : 46 | * 47 | * optional setsched.so policy=55 priority=0 default=disabled 48 | * 49 | */ 50 | 51 | #include 52 | #include 53 | #include 54 | #include 55 | #include 56 | #include 57 | #include 58 | 59 | #ifdef _POSIX_PRIORITY_SCHEDULING 60 | #include 61 | #else 62 | #warning "no Posix priority scheduling primitives detected on this system" 63 | #endif 64 | 65 | #include 66 | extern int errno; 67 | 68 | #include 69 | 70 | #define SPANK_SETSCHED_VERSION "0.1.4" 71 | 72 | static int setsched_pol=0; 73 | static int setsched_prio=0; 74 | static int setsched_default=0; 75 | 76 | #define xinfo slurm_info 77 | #define xerror slurm_error 78 | #define xdebug slurm_debug 79 | 80 | /* 81 | * All spank plugins must define this macro for the SLURM plugin loader. 82 | */ 83 | SPANK_PLUGIN(setsched, 1); 84 | 85 | static int _str2int (const char *str, int *p2int) 86 | { 87 | long int l; 88 | char *p; 89 | 90 | l = strtol (str, &p, 10); 91 | 92 | /* check for underflow and overflow */ 93 | if ( l == LONG_MIN || l == LONG_MAX ) 94 | return (-1); 95 | 96 | *p2int = (int) l; 97 | 98 | return (0); 99 | } 100 | 101 | static int _setsched_opt_process (int val, const char *optarg, int remote) 102 | { 103 | if (optarg == NULL) { 104 | return 0; 105 | } 106 | 107 | if (strncmp ("no", optarg, 2) == 0) { 108 | setsched_default=0; 109 | xdebug("setsched: disabled on user request"); 110 | } 111 | else if (strncmp ("yes", optarg, 3) == 0) { 112 | setsched_default=1; 113 | xdebug("setsched: enabled on user request"); 114 | } 115 | else if (strncmp ("auto", optarg, 4) != 0) { 116 | xerror ("setsched: bad parameter %s", optarg); 117 | return (-1); 118 | } 119 | 120 | return (0); 121 | } 122 | 123 | 124 | /* 125 | * Provide a --setsched=[yes|no|auto] option to srun: 126 | */ 127 | struct spank_option spank_options[] = 128 | { 129 | { "setsched", "[yes|no|auto]", "Activate/Desactivate scheduling policy " 130 | "setting of Setsched spank plugin", 2, 0, 131 | (spank_opt_cb_f) _setsched_opt_process 132 | }, 133 | SPANK_OPTIONS_TABLE_END 134 | }; 135 | 136 | 137 | /* 138 | * Called from both srun and slurmd. 139 | */ 140 | int slurm_spank_init (spank_t sp, int ac, char **av) 141 | { 142 | int i; 143 | 144 | int pol=0; 145 | int prio=0; 146 | 147 | /* do something in remote mode only */ 148 | if ( ! spank_remote(sp) ) 149 | return 0; 150 | 151 | for (i = 0; i < ac; i++) { 152 | if (strncmp ("policy=", av[i], 7) == 0) { 153 | const char *optarg = av[i] + 7; 154 | if (_str2int (optarg, &pol) < 0) 155 | xerror ("setsched: ignoring invalid policy " 156 | "value: %s", av[i]); 157 | } 158 | else if (strncmp ("priority=", av[i], 9) == 0) { 159 | const char *optarg = av[i] + 9; 160 | if (_str2int (optarg, &prio) < 0) 161 | xerror ("setsched: ignoring invalid priority " 162 | "value: %s", av[i]); 163 | 164 | } 165 | else if (strncmp ("default=enabled", av[i], 15) == 0) { 166 | setsched_default=1; 167 | } 168 | else if (strncmp ("default=disabled", av[i], 16) == 0) { 169 | setsched_default=0; 170 | } 171 | else { 172 | xerror ("setsched: " 173 | "invalid option: %s", av[i]); 174 | } 175 | } 176 | 177 | if ( pol > 0 || prio > 0 ) { 178 | setsched_pol=pol; 179 | setsched_prio=prio; 180 | xdebug("setsched: configuration is policy=%d " 181 | "priority=%d default=%s (version %s)", 182 | setsched_pol,setsched_prio, 183 | setsched_default?"enabled":"disabled", 184 | SPANK_SETSCHED_VERSION); 185 | } 186 | 187 | return 0; 188 | } 189 | 190 | int slurm_spank_task_post_fork (spank_t sp, int ac, char **av) 191 | { 192 | int status = 0; 193 | 194 | pid_t pid; 195 | int taskid; 196 | 197 | int pol; 198 | struct sched_param spar; 199 | 200 | if ( setsched_default && ( setsched_pol > 0 || setsched_prio > 0 ) ) { 201 | 202 | pol=setsched_pol; 203 | spar.sched_priority=setsched_prio; 204 | 205 | spank_get_item (sp, S_TASK_GLOBAL_ID, &taskid); 206 | spank_get_item (sp, S_TASK_PID, &pid); 207 | 208 | status = sched_setscheduler(pid, pol, &spar); 209 | if (status < 0) { 210 | xerror("setsched: unable to set scheduling " 211 | "policy of task%d pid %d : %s", 212 | taskid, pid,strerror(errno)); 213 | } 214 | else 215 | xinfo("setsched: " 216 | "scheduling policy of task%d pid %d is " 217 | "now %d (prio=%d)", 218 | taskid, pid, pol, setsched_prio); 219 | } 220 | 221 | return status; 222 | } 223 | -------------------------------------------------------------------------------- /overcommit-memory/overcommit-memory.c: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #include 37 | 38 | #include "overcommit.h" 39 | 40 | SPANK_PLUGIN (overcommit, 1); 41 | 42 | const char env_flag [] = "SPANK_OVERCOMMIT_MEMORY_FLAG"; 43 | 44 | static int jobid; 45 | static int stepid; 46 | static int overcommit_ratio = 100; 47 | static overcommit_shared_ctx_t ctx = NULL; 48 | 49 | static int overcommit_opt_process (int val, const char *arg, int remote); 50 | 51 | struct spank_option spank_options [] = 52 | { 53 | { "overcommit-memory", "[m]", 54 | "Choose memory overcommit mode [m] (always|off|on) for all nodes of job.", 55 | 1, 0, 56 | (spank_opt_cb_f) overcommit_opt_process 57 | }, 58 | SPANK_OPTIONS_TABLE_END 59 | }; 60 | 61 | static int set_overcommit_policy (int val) 62 | { 63 | ctx = overcommit_shared_ctx_create (jobid, stepid); 64 | 65 | if (ctx == NULL) 66 | return (-1); 67 | 68 | if (overcommit_in_use (ctx, val)) { 69 | slurm_error ("overcommit-memory: Cannot set desired mode on this node"); 70 | overcommit_shared_ctx_destroy (ctx); 71 | } 72 | else if (overcommit_memory_set_current_state (val) < 0) 73 | slurm_error ("overcommit-memory: Failed to set overcommit = %d", val); 74 | else if (overcommit_ratio_set (overcommit_ratio) < 0) 75 | slurm_error ("overcommit-memory: Failed to set overcommit_ratio to %d\n", 76 | overcommit_ratio); 77 | 78 | return (0); 79 | } 80 | 81 | static int strnmatch (const char *src, int n, ...) 82 | { 83 | int i = 0; 84 | int rc = 0; 85 | va_list ap; 86 | 87 | va_start (ap, n); 88 | 89 | while ((i++ < n) && !(rc = (strcmp (src, va_arg (ap, char *)) == 0))) {;} 90 | 91 | va_end (ap); 92 | 93 | return (rc); 94 | } 95 | 96 | static int overcommit_opt_process (int val, const char *arg, int remote) 97 | { 98 | int overcommit_mode = 0; 99 | 100 | if (strnmatch (arg, 4, "off", "no", "never", "2")) 101 | overcommit_mode = 2; 102 | else if (strnmatch (arg, 2, "always", "1")) 103 | overcommit_mode = 1; 104 | else if (strnmatch (arg, 2, "on", "yes", "0")) 105 | overcommit_mode = 0; 106 | else { 107 | slurm_error ("--overcommit-memory: invalid argument: %s", arg); 108 | return (-1); 109 | } 110 | 111 | if (!remote) { 112 | /* Need to set a flag in environment so slurmd knows that a 113 | * command line option is called and won't apply any environment 114 | * options. 115 | */ 116 | setenv ("SPANK_OVERCOMMIT_MEMORY_FLAG", "1", 1); 117 | return (0); 118 | } 119 | 120 | if (set_overcommit_policy (overcommit_mode) < 0) 121 | return (-1); 122 | 123 | return (0); 124 | } 125 | 126 | static int check_env (spank_t sp, int remote) 127 | { 128 | char buf [64]; 129 | const char var[] = "SLURM_OVERCOMMIT_MEMORY"; 130 | 131 | /* If env_flag is set in environment, ignore options set from 132 | * environment since command line option should override 133 | */ 134 | if (spank_getenv (sp, env_flag, buf, sizeof (buf)) == ESPANK_SUCCESS) { spank_unsetenv (sp, env_flag); 135 | return (0); 136 | } 137 | 138 | if (spank_getenv (sp, var, buf, sizeof (buf)) == ESPANK_SUCCESS) { 139 | if (overcommit_opt_process (0, buf, remote) < 0) { 140 | slurm_error ("Environment setting %s=%s invalid", var, buf); 141 | return (-1); 142 | } 143 | } 144 | 145 | return (0); 146 | } 147 | 148 | static int str2int (const char *str) 149 | { 150 | char *p; 151 | long l = strtol (str, &p, 10); 152 | 153 | if (p && (*p != '\0')) 154 | return (-1); 155 | 156 | return ((int) l); 157 | } 158 | 159 | int parse_options (int ac, char **av) 160 | { 161 | int i; 162 | int retval = 0; 163 | 164 | for (i = 0; i < ac; i++) { 165 | if (strncmp ("ratio=", av[i], 6) == 0) { 166 | char *ratio = av[i] + 6; 167 | if ((overcommit_ratio = str2int (ratio)) < 0) { 168 | slurm_error ("overcommit-memory: Invalid ratio = %s\n", ratio); 169 | retval = -1; 170 | } 171 | } 172 | else { 173 | slurm_error ("overcommit-memory: Invalid option %s\n", av[i]); 174 | retval = -1; 175 | } 176 | } 177 | 178 | return (retval); 179 | } 180 | 181 | int slurm_spank_init (spank_t sp, int ac, char **av) 182 | { 183 | if (parse_options (ac, av) < 0) 184 | return (-1); 185 | 186 | if (!spank_remote (sp)) { 187 | if (check_env (sp, 0) < 0) 188 | return (-1); 189 | return (0); 190 | } 191 | 192 | /* 193 | * Set jobid and stepid from spank_init. Options are processed 194 | * *after* spank_init, but the option handler does not have access 195 | * to the spank_t handle. 196 | */ 197 | spank_get_item (sp, S_JOB_ID, &jobid); 198 | spank_get_item (sp, S_JOB_STEPID, &stepid); 199 | 200 | if (check_env (sp, 1) < 0) 201 | return (-1); 202 | 203 | return (0); 204 | } 205 | 206 | 207 | int slurm_spank_exit (spank_t sp, int ac, char **av) 208 | { 209 | if (!spank_remote (sp) || !ctx) 210 | return (0); 211 | 212 | overcommit_shared_ctx_unregister (ctx); 213 | 214 | return (0); 215 | } 216 | 217 | 218 | /* 219 | * vi: ts=4 sw=4 expandtab 220 | */ 221 | -------------------------------------------------------------------------------- /lua/lua-schedutils/cpuset-str.c: -------------------------------------------------------------------------------- 1 | 2 | #define _GNU_SOURCE 3 | #include /* uint32_t */ 4 | #include /* strtoul */ 5 | #include /* cpu_set_t */ 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include /* ULONG_MAX */ 11 | 12 | char * cpuset_to_cstr (cpu_set_t *mask, char *str) 13 | { 14 | int i; 15 | char *ptr = str; 16 | int entry_made = 0; 17 | 18 | for (i = 0; i < CPU_SETSIZE; i++) { 19 | if (CPU_ISSET(i, mask)) { 20 | int j; 21 | int run = 0; 22 | entry_made = 1; 23 | for (j = i + 1; j < CPU_SETSIZE; j++) { 24 | if (CPU_ISSET(j, mask)) 25 | run++; 26 | else 27 | break; 28 | } 29 | if (!run) 30 | sprintf(ptr, "%d,", i); 31 | else if (run == 1) { 32 | sprintf(ptr, "%d,%d,", i, i + 1); 33 | i++; 34 | } else { 35 | sprintf(ptr, "%d-%d,", i, i + run); 36 | i += run; 37 | } 38 | while (*ptr != 0) 39 | ptr++; 40 | } 41 | } 42 | ptr -= entry_made; 43 | *ptr = 0; 44 | 45 | return str; 46 | } 47 | 48 | static const char * nexttoken (const char *p, int sep) 49 | { 50 | if (p) 51 | p = strchr (p, sep); 52 | if (p) 53 | p++; 54 | return (p); 55 | } 56 | 57 | static int cpuset_last_bit (cpu_set_t *mask) 58 | { 59 | int i; 60 | for (i = CPU_SETSIZE - 1; i >= 0; --i) 61 | if (CPU_ISSET (i, mask)) return i; 62 | return (0); 63 | } 64 | 65 | #define HEXCHARSIZE 8 /* 8 chars per chunk */ 66 | #define HEXCHUNKSZ 32 /* 32 bits per chunk */ 67 | #define NCHUNKS (CPU_SETSIZE + (HEXCHUNKSZ-1))/HEXCHUNKSZ 68 | 69 | /* 70 | * hex_to_cpuset() and cpuset_to_hex() taken from libbitmask and 71 | * modified to work with cpu_set_t: 72 | * 73 | * bitmask user library implementation. 74 | * 75 | * Copyright (c) 2004-2006 Silicon Graphics, Inc. All rights reserved. 76 | * 77 | * Paul Jackson 78 | */ 79 | 80 | #define max(a,b) ((a) > (b) ? (a) : (b)) 81 | int cpuset_to_hex (cpu_set_t *mask, char *str, size_t len) 82 | { 83 | int chunk; 84 | int cnt = 0; 85 | int lastchunk = cpuset_last_bit (mask) / HEXCHUNKSZ; 86 | const char *sep = ""; 87 | 88 | if (len <= 0) 89 | return 0; 90 | 91 | str[0] = 0; 92 | 93 | for (chunk = lastchunk; chunk >= 0; chunk--) { 94 | uint32_t val = 0; 95 | int bit; 96 | 97 | for (bit = HEXCHUNKSZ - 1; bit >= 0; bit--) 98 | val = val << 1 | CPU_ISSET (chunk * HEXCHUNKSZ + bit, mask); 99 | cnt += snprintf (str + cnt, max (len - cnt, 0), "%s%0*x", 100 | sep, HEXCHARSIZE, val); 101 | 102 | sep = ","; 103 | } 104 | 105 | return cnt; 106 | } 107 | 108 | static inline int char_to_val (int c) 109 | { 110 | int cl; 111 | 112 | cl = tolower(c); 113 | if (c >= '0' && c <= '9') 114 | return c - '0'; 115 | else if (cl >= 'a' && cl <= 'f') 116 | return cl + (10 - 'a'); 117 | else 118 | return -1; 119 | } 120 | 121 | static int s_to_cpuset (cpu_set_t *mask, const char *str, int len) 122 | { 123 | int base = 0; 124 | const char *ptr = str + len - 1; 125 | 126 | while (ptr >= str) { 127 | char val = char_to_val(*ptr); 128 | if (val == (char) -1) 129 | return -1; 130 | if (val & 1) 131 | CPU_SET(base, mask); 132 | if (val & 2) 133 | CPU_SET(base + 1, mask); 134 | if (val & 4) 135 | CPU_SET(base + 2, mask); 136 | if (val & 8) 137 | CPU_SET(base + 3, mask); 138 | len--; 139 | ptr--; 140 | base += 4; 141 | } 142 | 143 | return 0; 144 | } 145 | 146 | 147 | int hex_to_cpuset (cpu_set_t *mask, const char *str) 148 | { 149 | const char *p, *q; 150 | int nchunks = 0, chunk; 151 | 152 | CPU_ZERO (mask); 153 | if (strlen(str) == 0) 154 | return 0; 155 | 156 | /* 157 | * Skip any leading 0x 158 | */ 159 | if (strncmp (str, "0x", 2) == 0) 160 | str += 2; 161 | 162 | q = str; 163 | 164 | while (p = q, q = nexttoken (q, ','), p) 165 | nchunks++; 166 | 167 | if (nchunks == 1) 168 | return s_to_cpuset (mask, str, strlen (str)); 169 | 170 | chunk = nchunks - 1; 171 | q = str; 172 | 173 | 174 | while (p = q, q = nexttoken (q, ','), p) { 175 | uint32_t val; 176 | int bit; 177 | char *endptr; 178 | int nchars_read, nchars_unread; 179 | 180 | val = strtoul (p, &endptr, 16); 181 | 182 | nchars_read = endptr - p; 183 | if (nchars_read > HEXCHARSIZE) { 184 | /* We overflowed val, have to do this chunk manually */ 185 | if (s_to_cpuset (mask, p, endptr - p) < 0) 186 | goto err; 187 | } 188 | else { 189 | /* We should have consumed up to next comma, 190 | * or if at last token, up until end of the string 191 | */ 192 | nchars_unread = q - endptr; 193 | if ((q && nchars_unread != 1) || (!q && *endptr != '\0')) 194 | goto err; 195 | 196 | for (bit = HEXCHUNKSZ - 1; bit >= 0; bit--) { 197 | int n = chunk * HEXCHUNKSZ + bit; 198 | if (n >= CPU_SETSIZE) 199 | goto err; 200 | if ((val >> bit) & 1) 201 | CPU_SET (n, mask); 202 | } 203 | } 204 | chunk--; 205 | } 206 | return 0; 207 | err: 208 | CPU_ZERO (mask); 209 | return -1; 210 | } 211 | 212 | 213 | int cstr_to_cpuset(cpu_set_t *mask, const char* str) 214 | { 215 | const char *p, *q; 216 | char *endptr; 217 | q = str; 218 | CPU_ZERO(mask); 219 | 220 | if (strlen (str) == 0) 221 | return 0; 222 | 223 | while (p = q, q = nexttoken(q, ','), p) { 224 | unsigned long a; /* beginning of range */ 225 | unsigned long b; /* end of range */ 226 | unsigned long s; /* stride */ 227 | const char *c1, *c2; 228 | 229 | a = strtoul(p, &endptr, 10); 230 | if (endptr == p || a >= CPU_SETSIZE) 231 | return 1; 232 | /* 233 | * Leading zeros are an error: 234 | */ 235 | if ((a != 0 && *p == '0') || (a == 0 && memcmp (p, "00", 2L) == 0)) 236 | return 1; 237 | 238 | b = a; 239 | s = 1; 240 | 241 | c1 = nexttoken(p, '-'); 242 | c2 = nexttoken(p, ','); 243 | if (c1 != NULL && (c2 == NULL || c1 < c2)) { 244 | 245 | /* 246 | * Previous conversion should have used up all characters 247 | * up to next '-' 248 | */ 249 | if (endptr != (c1-1)) { 250 | return 1; 251 | } 252 | 253 | b = strtoul (c1, &endptr, 10); 254 | if (endptr == c1 || (b >= CPU_SETSIZE)) 255 | return 1; 256 | 257 | c1 = nexttoken(c1, ':'); 258 | if (c1 != NULL && (c2 == NULL || c1 < c2)) { 259 | s = strtoul (c1, &endptr, 10); 260 | if (endptr == c1 || (b >= CPU_SETSIZE)) 261 | return 1; 262 | } 263 | } 264 | 265 | if (!(a <= b)) 266 | return 1; 267 | while (a <= b) { 268 | CPU_SET(a, mask); 269 | a += s; 270 | } 271 | } 272 | 273 | /* Error if there are left over characters */ 274 | if (endptr && *endptr != '\0') 275 | return 1; 276 | 277 | return 0; 278 | } 279 | 280 | -------------------------------------------------------------------------------- /lua/oom-detect.lua: -------------------------------------------------------------------------------- 1 | --[========================================================================== 2 | * 3 | * Copyright (C) 2007-2012 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of slurm-spank-plugins, 10 | * a set of spank plugins for SLURM. 11 | * 12 | * This is free software; you can redistribute it and/or modify it 13 | * under the terms of the GNU General Public License as published by 14 | * the Free Software Foundation; either version 2 of the License, or 15 | * (at your option) any later version. 16 | * 17 | * This is distributed in the hope that it will be useful, but WITHOUT 18 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 19 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 20 | * for more details. 21 | * 22 | * You should have received a copy of the GNU General Public License 23 | * along with this program. If not, see . 24 | -==========================================================================--] 25 | -- 26 | -- Poor man's OOM detection plugin for SLURM. 27 | -- 28 | -- For each exiting task, search dmesg output for a message from 29 | -- the OOM killer matching the exiting task's PID. If found, then 30 | -- print an error on job's stderr and kill remaining job step tasks. 31 | -- 32 | -- The pattern in check_oom_kill() may need to be updated for 33 | -- different kernels. The original version in this file was for 34 | -- RHEL6.2. 35 | -- 36 | 37 | local posix = require 'posix' 38 | 39 | --- Log an error with SLURM's log facility 40 | local function log_err (...) 41 | SPANK.log_error (...) 42 | end 43 | 44 | --- Log an informational message with SLURM's log facility 45 | local function log_info (...) 46 | local msg = string.format (...) 47 | SPANK.log_info ("oom-detect: %s", msg) 48 | end 49 | 50 | --- Get a spank item `item' from spank handle `spank' with name `name' 51 | local function get_item (spank, item, name) 52 | local v, err = spank:get_item (item) 53 | if not v then 54 | log_err ("Unable to get %s: %s", name, err) 55 | return nil 56 | end 57 | return v 58 | end 59 | 60 | --- Create a table of job info from the spank handle `spank' 61 | -- @spank valid spank context for a slurm job 62 | -- Returns a job table with the following entries: 63 | -- job 64 | -- .jobid SLURM job id 65 | -- .stepid SLURM job step id 66 | -- .uid user id 67 | -- .ntasks number of local tasks 68 | -- .task Table of task info 69 | -- .task.id Current task id 70 | -- .pid Current task pid 71 | -- .status Current task exit status 72 | -- .exitcode Current task exit code 73 | -- .signal If task killed by signal, then signo 74 | -- .coredump true if task dumped core at exit 75 | -- 76 | local function job_table_create (spank) 77 | local entries = { 78 | jobid = "S_JOB_ID", 79 | stepid = "S_JOB_STEPID", 80 | uid = "S_JOB_UID", 81 | taskid = "S_TASK_ID", 82 | ntasks = "S_JOB_LOCAL_TASK_COUNT", 83 | pid = "S_TASK_PID", 84 | } 85 | local job = {} 86 | 87 | for name,item in pairs (entries) do 88 | job[name] = get_item (spank, item, name) 89 | if not job[name] then 90 | return nil 91 | end 92 | end 93 | job.task = { 94 | id = tonumber(job.taskid), 95 | pid = tonumber(job.pid), 96 | } 97 | job.task.status, job.task.exitcode, job.task.signal, job.task.coredump = 98 | spank:get_item ("S_TASK_EXIT_STATUS") 99 | 100 | return job 101 | end 102 | 103 | 104 | --- Check for OOM kill information from dmesg output `line' 105 | function check_oom_kill (line) 106 | local p = "Killed process (%d+), UID %d+, %((.+)%).*:(%d+)kB.*:(%d+)kB.*:(%d+)kB" 107 | return string.match (line, p); 108 | end 109 | 110 | --- Log OOM killed task info via syslog 111 | -- @job job table for current job 112 | -- @comm short command name for killed task 113 | -- @mb table of vm stats in MB 114 | -- 115 | function syslog_oom_kill (job, comm, mb) 116 | local fmt = "OOM detected: " .. 117 | "jobid=%u.%u uid=%u taskid=%d ntasks=%d comm=%s " .. 118 | "vsz=%.1fM rss=%.1fM" 119 | local msg = string.format (fmt, job.jobid, job.stepid, 120 | job.uid, job.task.id, job.ntasks, 121 | comm, mb.vsz, mb.rss) 122 | posix.openlog ("slurmd") 123 | posix.syslog (posix.LOG_WARNING, msg) 124 | posix.closelog() 125 | 126 | end 127 | 128 | --- Log OOM kill info for job `job' 129 | -- @job job table for current job 130 | -- @comm short command name for killed task 131 | -- @vsz virtual address space size in Kb 132 | -- @anon_rss Anonymous RSS in kb 133 | -- @file_rss File RSS in kb 134 | -- 135 | function log_oom_kill (job, comm, vsz, anon_rss, file_rss) 136 | local msg = "task%d: [%s] invoked OOM killer: vsz=%.1fM rss=%.1fM" 137 | local mb = { vsz = vsz / 1024, 138 | rss = (anon_rss + file_rss) / 1024 } 139 | log_err (msg, job.taskid, comm, mb.vsz, mb.rss) 140 | syslog_oom_kill (job, comm, mb); 141 | end 142 | 143 | --- 144 | --- Kill remaining tasks in this job step after an OOM event: 145 | -- @job job table for current job 146 | -- 147 | function kill_all_step_tasks (job) 148 | local cmd = string.format ("scontrol listpids %d.%d", job.jobid, job.stepid) 149 | 150 | local f, err = io.popen (cmd) 151 | if f == nil then 152 | log ("%s: %s", cmd, err) 153 | return 154 | end 155 | 156 | local n = 0 157 | for line in f:lines () do 158 | local pid = string.match (line, "(%d+) .+") 159 | if pid then 160 | posix.kill (pid, 9) 161 | n = n + 1 162 | end 163 | end 164 | 165 | f:close() 166 | end 167 | 168 | --- Plugin hook called for each task exit event in the current job step 169 | -- 170 | -- Check eack task exit to see if it was killed by the OOM killer, and print 171 | -- a message to stderr and syslog if so. 172 | -- 173 | function slurm_spank_task_exit (spank) 174 | 175 | local job = job_table_create (spank) 176 | 177 | -- If this task has been terminated by OOM killer, then exit 178 | -- status will be '9' (Killed). Otherwise, don't bother 179 | -- searching dmesg output. 180 | if job.task.signal ~= 9 then 181 | return SPANK.SUCCESS 182 | end 183 | 184 | local f, err = io.popen ("/bin/dmesg") 185 | if f == nil then 186 | log ("/bin/dmesg: %s", err) 187 | return SPANK.FAILURE 188 | end 189 | 190 | for line in f:lines () do 191 | local pid, comm, vsz, rss, file_rss = check_oom_kill (line) 192 | if job.task.pid == tonumber (pid) then 193 | log_oom_kill (job, comm, vsz, rss, file_rss) 194 | kill_all_step_tasks (job) 195 | break 196 | end 197 | end 198 | 199 | f:close() 200 | 201 | return SPANK.SUCCESS 202 | end 203 | -------------------------------------------------------------------------------- /lib/fd.c: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * $Id: fd.c 412 2003-06-03 21:31:19Z achu $ 3 | ***************************************************************************** 4 | * This file is part of the Munge Uid 'N' Gid Emporium (MUNGE). 5 | * For details, see . 6 | * UCRL-CODE-2003-???. 7 | * 8 | * Copyright (C) 2001-2003 The Regents of the University of California. 9 | * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). 10 | * Written by Chris Dunlap . 11 | * 12 | * This is free software; you can redistribute it and/or modify it 13 | * under the terms of the GNU General Public License as published by 14 | * the Free Software Foundation; either version 2 of the License, or 15 | * (at your option) any later version. 16 | * 17 | * This is distributed in the hope that it will be useful, but WITHOUT 18 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 19 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 20 | * for more details. 21 | * 22 | * You should have received a copy of the GNU General Public License; 23 | * if not, write to the Free Software Foundation, Inc., 59 Temple Place, 24 | * Suite 330, Boston, MA 02111-1307 USA. 25 | ***************************************************************************** 26 | * Refer to "fd.h" for documentation on public functions. 27 | *****************************************************************************/ 28 | 29 | 30 | #if HAVE_CONFIG_H 31 | # include "config.h" 32 | #endif /* HAVE_CONFIG_H */ 33 | 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include "fd.h" 41 | 42 | 43 | static int _fd_get_lock (int fd, int cmd, int type); 44 | static pid_t _fd_test_lock (int fd, int type); 45 | 46 | 47 | int 48 | fd_set_close_on_exec (int fd) 49 | { 50 | assert (fd >= 0); 51 | 52 | if (fcntl (fd, F_SETFD, FD_CLOEXEC) < 0) 53 | return (-1); 54 | return (0); 55 | } 56 | 57 | 58 | int 59 | fd_set_nonblocking (int fd) 60 | { 61 | int fval; 62 | 63 | assert (fd >= 0); 64 | 65 | if ((fval = fcntl (fd, F_GETFL, 0)) < 0) 66 | return (-1); 67 | if (fcntl (fd, F_SETFL, fval | O_NONBLOCK) < 0) 68 | return (-1); 69 | return (0); 70 | } 71 | 72 | 73 | int 74 | fd_get_read_lock (int fd) 75 | { 76 | return (_fd_get_lock (fd, F_SETLK, F_RDLCK)); 77 | } 78 | 79 | 80 | int 81 | fd_get_readw_lock (int fd) 82 | { 83 | return (_fd_get_lock (fd, F_SETLKW, F_RDLCK)); 84 | } 85 | 86 | 87 | int 88 | fd_get_write_lock (int fd) 89 | { 90 | return (_fd_get_lock (fd, F_SETLK, F_WRLCK)); 91 | } 92 | 93 | 94 | int 95 | fd_get_writew_lock (int fd) 96 | { 97 | return (_fd_get_lock (fd, F_SETLKW, F_WRLCK)); 98 | } 99 | 100 | 101 | int 102 | fd_release_lock (int fd) 103 | { 104 | return (_fd_get_lock (fd, F_SETLK, F_UNLCK)); 105 | } 106 | 107 | 108 | pid_t 109 | fd_is_read_lock_blocked (int fd) 110 | { 111 | return (_fd_test_lock (fd, F_RDLCK)); 112 | } 113 | 114 | 115 | pid_t 116 | fd_is_write_lock_blocked (int fd) 117 | { 118 | return (_fd_test_lock (fd, F_WRLCK)); 119 | } 120 | 121 | 122 | static int 123 | _fd_get_lock (int fd, int cmd, int type) 124 | { 125 | struct flock lock; 126 | 127 | assert (fd >= 0); 128 | 129 | lock.l_type = type; 130 | lock.l_start = 0; 131 | lock.l_whence = SEEK_SET; 132 | lock.l_len = 0; 133 | 134 | return (fcntl (fd, cmd, &lock)); 135 | } 136 | 137 | 138 | static pid_t 139 | _fd_test_lock (int fd, int type) 140 | { 141 | struct flock lock; 142 | 143 | assert (fd >= 0); 144 | 145 | lock.l_type = type; 146 | lock.l_start = 0; 147 | lock.l_whence = SEEK_SET; 148 | lock.l_len = 0; 149 | 150 | if (fcntl (fd, F_GETLK, &lock) < 0) 151 | return (-1); 152 | if (lock.l_type == F_UNLCK) 153 | return (0); 154 | return (lock.l_pid); 155 | } 156 | 157 | 158 | ssize_t 159 | fd_read_n (int fd, void *buf, size_t n) 160 | { 161 | size_t nleft; 162 | ssize_t nread; 163 | unsigned char *p; 164 | 165 | p = buf; 166 | nleft = n; 167 | while (nleft > 0) { 168 | if ((nread = read (fd, p, nleft)) < 0) { 169 | if (errno == EINTR) 170 | continue; 171 | else 172 | return (-1); 173 | } 174 | else if (nread == 0) { /* EOF */ 175 | break; 176 | } 177 | nleft -= nread; 178 | p += nread; 179 | } 180 | return (n - nleft); 181 | } 182 | 183 | 184 | ssize_t 185 | fd_write_n (int fd, void *buf, size_t n) 186 | { 187 | size_t nleft; 188 | ssize_t nwritten; 189 | unsigned char *p; 190 | 191 | p = buf; 192 | nleft = n; 193 | while (nleft > 0) { 194 | if ((nwritten = write (fd, p, nleft)) < 0) { 195 | if (errno == EINTR) 196 | continue; 197 | else 198 | return (-1); 199 | } 200 | nleft -= nwritten; 201 | p += nwritten; 202 | } 203 | return (n); 204 | } 205 | 206 | 207 | ssize_t 208 | fd_read_line (int fd, void *buf, size_t maxlen) 209 | { 210 | ssize_t n, rc; 211 | unsigned char c, *p; 212 | 213 | n = 0; 214 | p = buf; 215 | while (n < (ssize_t) maxlen - 1) { /* reserve space for NUL-termination */ 216 | 217 | if ((rc = read (fd, &c, 1)) == 1) { 218 | n++; 219 | *p++ = c; 220 | if (c == '\n') 221 | break; /* store newline, like fgets() */ 222 | } 223 | else if (rc == 0) { 224 | if (n == 0) /* EOF, no data read */ 225 | return (0); 226 | else /* EOF, some data read */ 227 | break; 228 | } 229 | else { 230 | if (errno == EINTR) 231 | continue; 232 | return (-1); 233 | } 234 | } 235 | 236 | *p = '\0'; /* NUL-terminate, like fgets() */ 237 | return (n); 238 | } 239 | 240 | /* 241 | * Following added by Mike Haskell 242 | */ 243 | ssize_t 244 | fd_null_read_n (int fd, void *buf, size_t n) 245 | { 246 | unsigned char *mp; 247 | size_t nleft; 248 | ssize_t nread; 249 | unsigned char *p; 250 | unsigned char *q; 251 | 252 | q = p = (unsigned char *)buf; 253 | nleft = n; 254 | while (nleft > 0) { 255 | if ((nread = read (fd, p, nleft)) < 0) { 256 | if (errno == EINTR) 257 | continue; 258 | else 259 | return (-1); 260 | } 261 | else if (nread == 0) { /* EOF */ 262 | break; 263 | } 264 | nleft -= nread; 265 | p += nread; 266 | mp = (unsigned char *) memchr(q, '\0', (n - nleft)); 267 | if (mp <= &q[ (n - nleft - 1)]) { 268 | if (mp != NULL) 269 | break; 270 | } 271 | } 272 | return (n - nleft); 273 | } 274 | -------------------------------------------------------------------------------- /cpuset/pam_slurm_cpuset.c: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #define PAM_SM_ACCOUNT 33 | #include 34 | #include 35 | #include 36 | 37 | #include "create.h" 38 | #include "util.h" 39 | #include "hostlist.h" 40 | #include "slurm.h" 41 | #include "conf.h" 42 | #include "log.h" 43 | 44 | static int create_all_job_cpusets (cpuset_conf_t conf, uid_t uid); 45 | static int migrate_to_user_cpuset (uid_t uid); 46 | static int in_user_cpuset (uid_t uid); 47 | 48 | static pam_handle_t *pam_handle = NULL; 49 | 50 | static const char msg_prefix [] = ""; 51 | static const char msg_suffix [] = "\r"; 52 | 53 | static int debuglevel = 1; 54 | 55 | 56 | static int log_pam_syslog (const char *msg) { 57 | pam_syslog (pam_handle, 0, "%s", msg); 58 | return (0); 59 | } 60 | 61 | static int log_pam_error (const char *msg) { 62 | pam_error (pam_handle, "%s%s%s", msg_prefix, msg, msg_suffix); 63 | return (0); 64 | } 65 | 66 | static int parse_options (cpuset_conf_t conf, int ac, const char **av) 67 | { 68 | int i; 69 | for (i = 0; i < ac; i++) { 70 | if (strcmp ("debug", av[i]) == 0) 71 | debuglevel++; 72 | else if (strncmp ("debug=", av[i], 6) == 0) 73 | debuglevel = 1 + str2int (av[i] + 6); 74 | else if (cpuset_conf_parse_opt (conf, av[i]) < 0) 75 | return (-1); 76 | } 77 | return (0); 78 | } 79 | 80 | PAM_EXTERN int 81 | pam_sm_acct_mgmt (pam_handle_t *pamh, int flags, int ac, const char **av) 82 | { 83 | int rc; 84 | int n; 85 | const char *user; 86 | struct passwd *pw; 87 | uid_t uid; 88 | const void **uptr = (const void **) &user; 89 | int lockfd; 90 | 91 | cpuset_conf_t conf = cpuset_conf_create (); 92 | 93 | pam_handle = pamh; 94 | 95 | log_add_dest (debuglevel, log_pam_syslog); 96 | log_add_dest (0, log_pam_error); 97 | log_set_prefix (""); 98 | 99 | if ((rc = pam_get_item (pamh, PAM_USER, uptr)) != PAM_SUCCESS 100 | || user == NULL 101 | || *user == '\0') { 102 | log_err ("get PAM_USER: %s", pam_strerror (pamh, rc)); 103 | return (PAM_USER_UNKNOWN); 104 | } 105 | 106 | if (!(pw = getpwnam (user))) { 107 | log_err ("User (%s) does not exist.", user); 108 | return (PAM_USER_UNKNOWN); 109 | } 110 | 111 | uid = pw->pw_uid; 112 | 113 | if (uid == 0) 114 | return (PAM_SUCCESS); 115 | 116 | /* 117 | * If we're already in the user's cpuset, bail early 118 | */ 119 | if (in_user_cpuset (uid)) 120 | return (PAM_SUCCESS); 121 | 122 | /* 123 | * Read any configuration: 124 | */ 125 | if (parse_options (conf, ac, av) < 0) 126 | return (PAM_SYSTEM_ERR); 127 | 128 | log_update (debuglevel, log_pam_syslog); 129 | 130 | /* 131 | * If we didn't parse a config file due to "conf=" above, 132 | * then parse the system config. 133 | */ 134 | if (!cpuset_conf_file (conf)) 135 | cpuset_conf_parse_system (conf); 136 | 137 | /* 138 | * Now we have to create cpusets for all running jobs 139 | * on the system for this user, so that they have the 140 | * correct number of CPUs accounted to them upon logging 141 | * in. 142 | */ 143 | 144 | if ((lockfd = slurm_cpuset_create (conf)) < 0) { 145 | log_err ("Unable to initialilze slurm cpuset"); 146 | return (PAM_SYSTEM_ERR); 147 | } 148 | 149 | /* 150 | * create_all_job_cpusets returns the number of CPUs 151 | * the user has allocated on this node (or -1 for failure) 152 | */ 153 | 154 | if ((n = create_all_job_cpusets (conf, uid)) < 0) { 155 | log_err ("Failed to create user cpuset for uid=%d", uid); 156 | slurm_cpuset_unlock (lockfd); 157 | return (PAM_SYSTEM_ERR); 158 | } 159 | else if (n == 0) { 160 | log_err ("Access denied: User %s (uid=%d) has no active SLURM jobs.", 161 | user, uid); 162 | slurm_cpuset_unlock (lockfd); 163 | return (PAM_PERM_DENIED); 164 | } 165 | 166 | if (migrate_to_user_cpuset (uid) < 0) { 167 | log_err ("Failed to create user cpuset for uid=%d", uid); 168 | slurm_cpuset_unlock (lockfd); 169 | return (PAM_SYSTEM_ERR); 170 | } 171 | slurm_cpuset_unlock (lockfd); 172 | 173 | log_msg ("Access granted for user %s (uid=%d) with %d CPUs", 174 | user, uid, n); 175 | 176 | cpuset_conf_destroy (conf); 177 | 178 | return (PAM_SUCCESS); 179 | } 180 | 181 | static int in_user_cpuset (uid_t uid) 182 | { 183 | char p [1024]; 184 | char q [1024]; 185 | int n; 186 | 187 | if (!cpuset_getcpusetpath (0, p, sizeof (p))) 188 | return (0); 189 | 190 | n = snprintf (q, sizeof (q), "/slurm/%d", uid); 191 | if ((n <= 0) || (n >= sizeof (q))) 192 | return (0); 193 | 194 | return (strncmp (p, q, strlen (q)) == 0); 195 | } 196 | 197 | static int migrate_to_user_cpuset (uid_t uid) 198 | { 199 | int rc; 200 | char path [128]; 201 | 202 | rc = snprintf (path, sizeof (path), "/slurm/%d", uid); 203 | if (rc < 0 || rc > sizeof (path)) 204 | return (-1); 205 | 206 | if (cpuset_move (0, path) < 0) 207 | return (-1); 208 | 209 | return (0); 210 | } 211 | 212 | int hostname_hostid (const char *host, const char *nodes) 213 | { 214 | int n; 215 | hostlist_t h = hostlist_create (nodes); 216 | 217 | if (!(h = hostlist_create (nodes))) 218 | return (0); 219 | 220 | n = hostlist_find (h, host); 221 | hostlist_destroy (h); 222 | 223 | return (n); 224 | } 225 | 226 | int cpus_on_node (job_info_t *j, const char *host) 227 | { 228 | return slurm_job_cpus_allocated_on_node (j->job_resrcs, host); 229 | } 230 | 231 | int create_all_job_cpusets (cpuset_conf_t conf, uid_t uid) 232 | { 233 | int i; 234 | char hostname[256]; 235 | char *p; 236 | job_info_msg_t * msg; 237 | int total_cpus = 0; 238 | 239 | if (gethostname (hostname, sizeof (hostname)) < 0) { 240 | return (-1); 241 | } 242 | 243 | if ((p = strchr (hostname, '.'))) 244 | *p = '\0'; 245 | 246 | dyn_slurm_open (); 247 | if (slurm_load_jobs (0, &msg, SHOW_ALL|SHOW_DETAIL) < 0) { 248 | return (-1); 249 | } 250 | 251 | for (i = 0; i < msg->record_count; i++) { 252 | job_info_t *j = &msg->job_array[i]; 253 | int ncpus; 254 | 255 | if ((j->user_id != uid) || (j->job_state != JOB_RUNNING)) 256 | continue; 257 | 258 | if ((ncpus = cpus_on_node (j, hostname)) <= 0) 259 | continue; 260 | 261 | if (!job_cpuset_exists (j->job_id, j->user_id) && 262 | create_cpuset_for_job (conf, j->job_id, j->user_id, ncpus) < 0) { 263 | log_err ("job %u: Failed to create cpuset: %m", j->job_id); 264 | continue; 265 | } 266 | 267 | total_cpus += ncpus; 268 | } 269 | 270 | slurm_free_job_info_msg (msg); 271 | 272 | dyn_slurm_close (); 273 | 274 | return (total_cpus); 275 | } 276 | 277 | /* 278 | * vi: ts=4 sw=4 expandtab 279 | */ 280 | 281 | -------------------------------------------------------------------------------- /slurm-spank-plugins.spec: -------------------------------------------------------------------------------- 1 | ## 2 | # $Id: chaos-spankings.spec 7813 2008-09-25 23:08:25Z grondo $ 3 | ## 4 | 5 | # 6 | # Allow defining --with and --without build options or %_with and %without in . 7 | # _with builds option by default unless --without is specified 8 | # _without builds option iff --with specified 9 | # 10 | %define _with_opt() %{expand:%%{!?_without_%{1}:%%global _with_%{1} 1}} 11 | %define _without_opt() %{expand:%%{?_with_%{1}:%%global _with_%{1} 1}} 12 | 13 | # 14 | # _with helper macro to test for slurm_with_* 15 | # 16 | %define _with() %{expand:%%{?_with_%{1}:1}%%{!?_with_%{1}:0}} 17 | 18 | # 19 | # Build llnl plugins and cpuset by default on chaos systems 20 | # 21 | 22 | %if 0%{?chaos} 23 | %_with_opt llnl_plugins 24 | %_with_opt lua 25 | %else 26 | %_without_opt llnl_plugins 27 | %_without_opt cpuset 28 | %_without_opt lua 29 | %endif 30 | 31 | %if %{?chaos}0 && 0%{?chaos} < 5 32 | %_with_opt sgijob 33 | %_with_opt cpuset 34 | %else 35 | %_without_opt sgijob 36 | %_without_opt cpuset 37 | %endif 38 | 39 | 40 | 41 | Name: 42 | Version: 43 | Release: 44 | 45 | Summary: SLURM SPANK modules for CHAOS systems 46 | Group: System Environment/Base 47 | License: GPL 48 | 49 | BuildRoot: %{_tmppath}/%{name}-%{version} 50 | Source0: %{name}-%{version}.tgz 51 | Requires: slurm 52 | Obsoletes: chaos-spankings 53 | 54 | BuildRequires: slurm-devel bison flex 55 | 56 | %if %{_with cpuset} 57 | BuildRequires: libbitmask libcpuset 58 | BuildRequires: pam-devel 59 | %endif 60 | 61 | %if %{_with sgijob} 62 | BuildRequires: job 63 | %endif 64 | 65 | %if %{_with lua} 66 | BuildRequires: lua-devel >= 5.1 67 | %endif 68 | 69 | 70 | %description 71 | This package contains a set of SLURM spank plugins which enhance and 72 | extend SLURM functionality for users and administrators. 73 | 74 | Currently includes: 75 | - renice.so : add --renice option to srun allowing users to set priority 76 | of job 77 | - system-safe.so : Implement pre-forked system(3) replacement in case MPI 78 | implementation doesn't support fork(2). 79 | - iotrace.so : Enable tracing of IO calls through LD_PRELOAD trick 80 | - use-env.so : Add --use-env flag to srun to override environment 81 | variables for job 82 | - auto-affinity.so: 83 | Try to set CPU affinity on jobs using some kind of 84 | presumably sane defaults. Also adds an --auto-affinity 85 | option for tweaking the default behavior. 86 | 87 | - overcommit-memory.so : 88 | Allow users to choose overcommit mode on nodes of 89 | their job. 90 | 91 | - pty.so : Run task 0 of SLURM job under pseudo tty. 92 | - preserve-env.so: Attempt to preserve exactly the SLURM_* environment 93 | variables in remote tasks. Meant to be used like: 94 | salloc -n100 srun --preserve-slurm-env -n1 -N1 --pty bash 95 | - setsched.so : enable administrators to enforce a particular kernel scheduling 96 | policy for tasks spawned by slurm 97 | 98 | %if %{_with llnl_plugins} 99 | %package llnl 100 | Summary: SLURM spank plugins LLNL-only 101 | Group: System Environment/Base 102 | Requires: slurm 103 | %if %{_with sgijob} 104 | Requires: job 105 | %endif 106 | Obsoletes: chaos-spankings 107 | 108 | %description llnl 109 | The set of SLURM SPANK plugins that will only run on LLNL systems. 110 | Includes: 111 | - private-mount.so : 112 | Run jobs or tasks in a private file system namespace 113 | and privately mount file systems from /etc/slurm/fstab. 114 | %endif 115 | 116 | 117 | %if %{_with cpuset} 118 | %package cpuset 119 | Summary: Cpuset spank plugin for slurm. 120 | Group: System Environment/Base 121 | Requires: libbitmask libcpuset slurm pam 122 | Obsoletes: chaos-spankings-cpuset 123 | 124 | %description cpuset 125 | This package contains a SLURM spank plugin for enabling 126 | the use of cpusets to constrain CPU use of jobs on nodes to 127 | the number of CPUs allocated. This plugin is specifically 128 | designed for systems sharing nodes and using CPU scheduling 129 | (i.e. using the sched/cons_res plugin). Most importantly the 130 | plugin will be harmful when overallocating CPUs on nodes. The 131 | plugin is enabled by adding the line: 132 | 133 | required cpuset.so [options] 134 | 135 | to /etc/slurm/plugstack.conf. 136 | 137 | A PAM module - pam_slurm_cpuset.so - is also provided for 138 | constraining user logins in a similar fashion. For more 139 | information see the slurm-cpuset(8) man page provided with 140 | this package. 141 | 142 | %endif 143 | 144 | %if %{_with lua} 145 | %package lua 146 | Summary: lua spank plugin for slurm. 147 | Group: System Environment/Base 148 | Requires:lua >= 5.1 149 | 150 | %description lua 151 | The lua.so spank plugin for SLURM allows lua scripts to take the 152 | place of compiled C shared objects in the SLURM spank(8) framework. 153 | All the power of the C SPANK API is exported to lua via this 154 | plugin, which loads one or scripts and executes lua functions during 155 | the appropriate SLURM phase (as described in the spank(8) manpage). 156 | 157 | %endif 158 | 159 | %prep 160 | %setup 161 | 162 | %build 163 | make \ 164 | %{?_with_llnl_plugins:BUILD_LLNL_ONLY=1} \ 165 | %{?_with_cpuset:BUILD_CPUSET=1} \ 166 | %{?_with_lua:WITH_LUA=1} \ 167 | %{?chaos:HAVE_SPANK_OPTION_GETOPT=1} \ 168 | CFLAGS="$RPM_OPT_FLAGS" 169 | 170 | %if %{_with lua} 171 | cd lua && make check 172 | %endif 173 | 174 | %install 175 | rm -rf "$RPM_BUILD_ROOT" 176 | mkdir -p "$RPM_BUILD_ROOT" 177 | 178 | make \ 179 | LIBNAME=%{_lib} \ 180 | LIBDIR=%{_libdir} \ 181 | BINDIR=%{_bindir} \ 182 | SBINDIR=/sbin \ 183 | LIBEXECDIR=%{_libexecdir} \ 184 | DESTDIR="$RPM_BUILD_ROOT" \ 185 | %{?_with_llnl_plugins:BUILD_LLNL_ONLY=1} \ 186 | %{?_with_cpuset:BUILD_CPUSET=1} \ 187 | %{?_with_lua:WITH_LUA=1} \ 188 | install 189 | 190 | %if %{_with cpuset} 191 | # slurm-cpuset init script 192 | install -D -m0755 cpuset/cpuset.init \ 193 | $RPM_BUILD_ROOT/%{_sysconfdir}/init.d/slurm-cpuset 194 | %endif 195 | 196 | # create /etc/slurm/plugstack.d directory 197 | mkdir -p $RPM_BUILD_ROOT/%{_sysconfdir}/slurm/plugstack.conf.d 198 | 199 | # 200 | # As of SLURM 1.4.x, preserve-env functionality is availble 201 | # directly in SLURM. We keep the plugin around for reference, 202 | # but do not install it. 203 | # 204 | # create entry for preserve-env.so 205 | #echo " required preserve-env.so" > \ 206 | # $RPM_BUILD_ROOT/%{_sysconfdir}/slurm/plugstack.conf.d/99-preserve-env 207 | rm -f $RPM_BUILD_ROOT/%{_libdir}/slurm/preserve-env.so 208 | 209 | %if %{_with lua} 210 | echo " required lua.so /etc/slurm/lua.d/*.lua" > \ 211 | $RPM_BUILD_ROOT/%{_sysconfdir}/slurm/plugstack.conf.d/99-lua 212 | install -D -m0644 lua/spank-lua.8 $RPM_BUILD_ROOT/%{_mandir}/man8/spank-lua.8 213 | %endif 214 | 215 | %clean 216 | rm -rf "$RPM_BUILD_ROOT" 217 | 218 | %if %{_with cpuset} 219 | %post cpuset 220 | if [ -x /sbin/chkconfig ]; then /sbin/chkconfig --add slurm-cpuset; fi 221 | 222 | %preun cpuset 223 | if [ "$1" = 0 ]; then 224 | if [ -x /sbin/chkconfig ]; then /sbin/chkconfig --del slurm-cpuset; fi 225 | fi 226 | %endif 227 | 228 | %files 229 | %defattr(-,root,root,0755) 230 | %doc NEWS NEWS.old ChangeLog README.use-env 231 | %{_libdir}/slurm/renice.so 232 | %{_libdir}/slurm/system-safe.so 233 | %{_libdir}/slurm/iotrace.so 234 | %{_libdir}/slurm/tmpdir.so 235 | %{_libdir}/slurm/use-env.so 236 | %{_libdir}/slurm/overcommit-memory.so 237 | %{_libdir}/slurm/auto-affinity.so 238 | %{_libdir}/slurm/pty.so 239 | %{_libdir}/slurm/addr-no-randomize.so 240 | %{_libdir}/system-safe-preload.so 241 | %{_libexecdir}/%{name}/overcommit-util 242 | %{_libdir}/slurm/setsched.so 243 | %dir %attr(0755,root,root) %{_sysconfdir}/slurm/plugstack.conf.d 244 | 245 | %if %{_with llnl_plugins} 246 | %files llnl 247 | %defattr(-,root,root,0755) 248 | %doc NEWS NEWS.old ChangeLog 249 | %{_libdir}/slurm/private-mount.so 250 | %endif 251 | 252 | %if %{_with cpuset} 253 | %files cpuset 254 | %defattr(-,root,root,0755) 255 | %doc NEWS NEWS.old ChangeLog cpuset/README 256 | %{_sysconfdir}/init.d/slurm-cpuset 257 | %{_libdir}/slurm/cpuset.so 258 | /%{_lib}/security/pam_slurm_cpuset.so 259 | /sbin/cpuset_release_agent 260 | %{_mandir}/man1/use-cpusets.* 261 | %{_mandir}/man8/pam_slurm_cpuset.* 262 | %{_mandir}/man8/slurm-cpuset.* 263 | %endif 264 | 265 | 266 | %if %{_with lua} 267 | %files lua 268 | %{_sysconfdir}/slurm/plugstack.conf.d/99-lua 269 | %{_libdir}/slurm/lua.so 270 | %{_mandir}/man8/spank-lua* 271 | %{_libdir}/lua/5.1/* 272 | %endif 273 | 274 | -------------------------------------------------------------------------------- /system-safe-preload.c: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | /* 26 | * safe-system.so : Making system(3) safe for MPI jobs everywhere. 27 | */ 28 | 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | 41 | extern char **environ; 42 | 43 | typedef int (*system_f) (const char * cmd); 44 | 45 | static void * libc_handle; 46 | static system_f real_system; 47 | 48 | static int client_fd = -1; 49 | static int server_fd = -1; 50 | 51 | static int write_n (int fd, const void *buf, size_t n) 52 | { 53 | size_t nleft; 54 | ssize_t nwritten; 55 | unsigned const char *p; 56 | 57 | p = buf; 58 | nleft = n; 59 | while (nleft > 0) { 60 | if ((nwritten = write (fd, p, nleft)) < 0) { 61 | if (errno == EINTR) 62 | continue; 63 | else 64 | return (-1); 65 | } 66 | nleft -= nwritten; 67 | p += nwritten; 68 | } 69 | return (n); 70 | } 71 | 72 | static int read_n (int fd, void *buf, size_t n) 73 | { 74 | size_t nleft; 75 | ssize_t nread; 76 | unsigned char *p; 77 | 78 | p = buf; 79 | nleft = n; 80 | while (nleft > 0) { 81 | if ((nread = read (fd, p, nleft)) < 0) { 82 | if (errno == EINTR) 83 | continue; 84 | else 85 | return (-1); 86 | } 87 | else if (nread == 0) { /* EOF */ 88 | break; 89 | } 90 | nleft -= nread; 91 | p += nread; 92 | } 93 | return (n - nleft); 94 | } 95 | 96 | 97 | static int create_socketpair (void) 98 | { 99 | int pfds[2]; 100 | 101 | if (socketpair (AF_UNIX, SOCK_STREAM, 0, pfds) < 0) { 102 | fprintf (stderr, "systemsafe: socketpair failed: %s\n", strerror (errno)); 103 | return (-1); 104 | } 105 | 106 | client_fd = pfds[0]; 107 | server_fd = pfds[1]; 108 | 109 | fcntl (client_fd, F_SETFD, FD_CLOEXEC); 110 | fcntl (server_fd, F_SETFD, FD_CLOEXEC); 111 | 112 | return (0); 113 | } 114 | 115 | static int read_string (int fd, char **bufp) 116 | { 117 | int len = 0; 118 | int rc; 119 | 120 | *bufp = NULL; 121 | 122 | /* 123 | * Read string length 124 | */ 125 | if ((rc = read_n (fd, &len, sizeof (int))) < 0) { 126 | fprintf (stderr, "systemsafe: read_string: %s\n", strerror (errno)); 127 | return (-1); 128 | } 129 | 130 | if (rc == 0) 131 | return (0); 132 | 133 | if ((*bufp = malloc (len + 1)) == NULL) { 134 | fprintf (stderr, "systemsafe: read_string: malloc (%d): %s\n", 135 | len, strerror (errno)); 136 | return (-1); 137 | } 138 | 139 | if ((rc = read_n (fd, *bufp, len)) < 0) { 140 | fprintf (stderr, "systemsafe: read_string: %s\n", strerror (errno)); 141 | return (-1); 142 | } 143 | 144 | if (rc == 0) 145 | return (0); 146 | 147 | (*bufp) [len] = '\0'; 148 | 149 | return (len); 150 | } 151 | 152 | static int write_string (int fd, const char *str) 153 | { 154 | int len = strlen (str); 155 | int rc; 156 | 157 | if (write_n (fd, &len, sizeof (int)) < 0) { 158 | fprintf (stderr, "systemsafe: write: %s\n", strerror (errno)); 159 | return (-1); 160 | } 161 | 162 | rc = write_n (fd, str, len); 163 | 164 | return (rc); 165 | } 166 | 167 | void free_env (char **env) 168 | { 169 | int i = 0; 170 | while (env [i]) 171 | free (env [i++]); 172 | free (env); 173 | return; 174 | } 175 | 176 | int read_env (int fd, char ***envp) 177 | { 178 | int envc = 0; 179 | int i; 180 | 181 | if (read_n (fd, &envc, sizeof (int)) < 0) { 182 | fprintf (stderr, "systemsafe: read_env: %s\n", strerror (errno)); 183 | return (-1); 184 | } 185 | 186 | if (!(*envp = malloc ((envc + 1) * sizeof (**envp)))) { 187 | fprintf (stderr, "systemsafe: read_env: malloc: %s\n", strerror (errno)); 188 | return (-1); 189 | } 190 | 191 | for (i = 0; i < envc; i++) { 192 | char *entry; 193 | if (read_string (fd, &entry) < 0) { 194 | fprintf (stderr, "systemsafe: %s\n", strerror (errno)); 195 | free_env (*envp); 196 | return (-1); 197 | } 198 | 199 | if (strncmp ("LD_PRELOAD=", entry, 10) == 0) 200 | entry [11] = '\0'; 201 | 202 | (*envp)[i] = entry; 203 | } 204 | 205 | (*envp)[envc] = NULL; 206 | 207 | return (0); 208 | } 209 | 210 | static void handle_system_request (int fd) 211 | { 212 | char *cmd, *path, **env, **oldenv; 213 | int rc; 214 | 215 | if ((rc = read_string (fd, &cmd)) < 0) { 216 | fprintf (stderr, "systemsafe: read cmd: %s\n", strerror (errno)); 217 | exit (0); 218 | } 219 | 220 | if (rc == 0) /* EOF, time to exit */ 221 | exit (0); 222 | 223 | if (read_string (fd, &path) < 0) { 224 | fprintf (stderr, "systemsafe: read path: %s\n", strerror (errno)); 225 | exit (0); 226 | } 227 | 228 | if (read_env (fd, &env) < 0) { 229 | fprintf (stderr, "systemsafe: read env: %s\n", strerror (errno)); 230 | exit (0); 231 | } 232 | 233 | if (chdir (path) < 0) 234 | fprintf (stderr, "systemsafe: Failed to chdir to %s: %s\n", 235 | path, strerror (errno)); 236 | 237 | oldenv = environ; 238 | environ = env; 239 | 240 | rc = (*real_system) (cmd); 241 | 242 | write_n (fd, &rc, sizeof (int)); 243 | 244 | environ = oldenv; 245 | free_env (env); 246 | free (cmd); 247 | free (path); 248 | 249 | return; 250 | } 251 | 252 | static void system_server (void) 253 | { 254 | char c = 0; 255 | close (client_fd); 256 | write (server_fd, &c, 1); 257 | for (;;) 258 | handle_system_request (server_fd); 259 | return; 260 | } 261 | 262 | static int create_system_server (void) 263 | { 264 | pid_t pid; 265 | char c; 266 | 267 | create_socketpair (); 268 | 269 | if ((pid = fork ()) < 0) 270 | return (-1); 271 | 272 | if (pid == 0) { 273 | system_server (); 274 | exit (0); 275 | } 276 | 277 | close (server_fd); 278 | 279 | /* 280 | * Wait for system_server setup to complete 281 | */ 282 | read (client_fd, &c, 1); 283 | 284 | return (0); 285 | } 286 | 287 | static int write_env (int fd) 288 | { 289 | int i, envc = 0; 290 | 291 | while (environ[envc]) 292 | envc++; 293 | 294 | write (fd, &envc, sizeof (int)); 295 | 296 | for (i = 0; i < envc; i++) 297 | write_string (fd, environ [i]); 298 | 299 | return (0); 300 | } 301 | 302 | int system (const char *cmd) 303 | { 304 | int rc; 305 | char path [4096]; 306 | 307 | if (cmd == NULL) { 308 | errno = EINVAL; 309 | return (-1); 310 | } 311 | 312 | write_string (client_fd, cmd); 313 | write_string (client_fd, getcwd (path, sizeof (path))); 314 | write_env (client_fd); 315 | 316 | if (read (client_fd, &rc, sizeof (int)) < 0) { 317 | fprintf (stderr, "system: failed to read status from server: %s\n", 318 | strerror (errno)); 319 | return (-1); 320 | } 321 | 322 | return (rc); 323 | } 324 | 325 | void __attribute__ ((constructor)) fork_safe_init (void) 326 | { 327 | if ((libc_handle = dlopen ("libc.so.6", RTLD_LAZY)) == NULL) { 328 | exit (1); 329 | } 330 | 331 | if ((real_system = dlsym (libc_handle, "system")) == NULL) 332 | exit (2); 333 | 334 | create_system_server (); 335 | 336 | return; 337 | } 338 | 339 | 340 | /* 341 | * vi: ts=4 sw=4 expandtab 342 | */ 343 | 344 | -------------------------------------------------------------------------------- /cpuset/conf.c: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * 3 | * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. 4 | * Produced at Lawrence Livermore National Laboratory. 5 | * Written by Mark Grondona . 6 | * 7 | * UCRL-CODE-235358 8 | * 9 | * This file is part of chaos-spankings, a set of spank plugins for SLURM. 10 | * 11 | * This is free software; you can redistribute it and/or modify it 12 | * under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2 of the License, or 14 | * (at your option) any later version. 15 | * 16 | * This is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 | * for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program. If not, see . 23 | ****************************************************************************/ 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #include "conf.h" 31 | #include "log.h" 32 | 33 | #include "conf-parser.h" 34 | 35 | static const char * default_config = "/etc/slurm/slurm-cpuset.conf"; 36 | 37 | struct cpuset_conf { 38 | char filename [1024]; 39 | 40 | enum fit_policy policy; 41 | 42 | unsigned filename_valid:1; 43 | unsigned reverse_order:1; 44 | unsigned alloc_idle_nodes:1; 45 | unsigned use_idle_if_multiple:1; 46 | unsigned constrain_mems:1; 47 | unsigned kill_orphans:1; 48 | }; 49 | 50 | 51 | /* 52 | * Accessor routines 53 | */ 54 | enum fit_policy cpuset_conf_policy (cpuset_conf_t conf) 55 | { 56 | return (conf->policy); 57 | } 58 | 59 | int cpuset_conf_alloc_idle (cpuset_conf_t conf) 60 | { 61 | return (conf->alloc_idle_nodes); 62 | } 63 | 64 | int cpuset_conf_alloc_idle_gt (cpuset_conf_t conf) 65 | { 66 | return (conf->alloc_idle_nodes && !conf->use_idle_if_multiple); 67 | } 68 | 69 | int cpuset_conf_alloc_idle_multiple (cpuset_conf_t conf) 70 | { 71 | return (conf->alloc_idle_nodes && conf->use_idle_if_multiple); 72 | } 73 | 74 | int cpuset_conf_constrain_mem (cpuset_conf_t conf) 75 | { 76 | return (conf->constrain_mems); 77 | } 78 | 79 | int cpuset_conf_kill_orphans (cpuset_conf_t conf) 80 | { 81 | return (conf->kill_orphans); 82 | } 83 | 84 | int cpuset_conf_reverse_order (cpuset_conf_t conf) 85 | { 86 | return (conf->reverse_order); 87 | } 88 | 89 | int cpuset_conf_set_policy (cpuset_conf_t conf, enum fit_policy policy) 90 | { 91 | if (!conf) 92 | return (-1); 93 | conf->policy = policy; 94 | return (0); 95 | } 96 | 97 | int cpuset_conf_set_policy_string (cpuset_conf_t conf, const char *name) 98 | { 99 | if (strcmp (name, "best-fit") == 0) 100 | return (cpuset_conf_set_policy (conf, BEST_FIT)); 101 | else if (strcmp (name, "worst-fit") == 0) 102 | return (cpuset_conf_set_policy (conf, WORST_FIT)); 103 | else if (strcmp (name, "first-fit") == 0) 104 | return (cpuset_conf_set_policy (conf, FIRST_FIT)); 105 | else 106 | return (-1); 107 | } 108 | 109 | int cpuset_conf_set_alloc_idle (cpuset_conf_t conf, int alloc_idle) 110 | { 111 | if (!conf) 112 | return (-1); 113 | conf->alloc_idle_nodes = alloc_idle; 114 | return (0); 115 | } 116 | 117 | int cpuset_conf_set_alloc_idle_mode (cpuset_conf_t conf, int multiple_only) 118 | { 119 | if (!conf) 120 | return (-1); 121 | conf->use_idle_if_multiple = multiple_only; 122 | return (0); 123 | } 124 | 125 | int cpuset_conf_set_alloc_idle_string (cpuset_conf_t conf, const char *s) 126 | { 127 | if (strcmp (s, "0") == 0 || 128 | strcasecmp (s, "never") == 0 || 129 | strcasecmp (s, "no") == 0) 130 | return (cpuset_conf_set_alloc_idle (conf, 0)); 131 | 132 | if (strcmp (s, "1") == 0 || 133 | strcasecmp (s, "yes") == 0) 134 | return (cpuset_conf_set_alloc_idle (conf, 1)); 135 | 136 | if (strcasecmp (s, "multiple") == 0 || 137 | strcasecmp (s, "mult") == 0) 138 | return (cpuset_conf_set_alloc_idle_mode (conf, 1)); 139 | 140 | if (strcasecmp (s, "gt") == 0 || 141 | strcasecmp (s, "greater") == 0) 142 | return (cpuset_conf_set_alloc_idle_mode (conf, 0)); 143 | 144 | log_err ("Unknown alloc-idle setting \"%s\"\n", s); 145 | 146 | return (-1); 147 | } 148 | 149 | int cpuset_conf_parse_opt (cpuset_conf_t conf, const char *opt) 150 | { 151 | /* 152 | * First check to see if we're setting a policy 153 | */ 154 | if (cpuset_conf_set_policy_string (conf, opt) == 0) 155 | return (0); 156 | 157 | if (strncmp ("policy=", opt, 7) == 0) { 158 | if (cpuset_conf_set_policy_string (conf, opt + 7) < 0) 159 | return (log_err ("Unknown allocation policy \"%s\"", opt)); 160 | } 161 | 162 | /* 163 | * Next check for new config file via "conf=" 164 | */ 165 | if (strncmp ("conf=", opt, 5) == 0) 166 | return (cpuset_conf_parse (conf, opt + 5)); 167 | 168 | if ((strcmp ("!idle-1st", opt) == 0) || 169 | (strcmp ("no-idle", opt) == 0)) 170 | return (cpuset_conf_set_alloc_idle (conf, 0)); 171 | 172 | if (strncmp ("idle-1st=", opt, 9) == 0) 173 | return (cpuset_conf_set_alloc_idle_string (conf, opt + 9)); 174 | 175 | if (strncmp ("idle-first=", opt, 11) == 0) 176 | return (cpuset_conf_set_alloc_idle_string (conf, opt + 11)); 177 | 178 | if ((strcmp ("!mem", opt) == 0) || 179 | (strcmp ("nomem", opt) == 0) || 180 | (strcmp ("!constrain-mem", opt) == 0)) 181 | return (cpuset_conf_set_constrain_mem (conf, 0)); 182 | 183 | if ((strcmp ("mem", opt) == 0) || 184 | (strcmp ("constrain-mem", opt) == 0)) 185 | return (cpuset_conf_set_constrain_mem (conf, 1)); 186 | 187 | if ((strcmp ("reverse", opt) == 0) || 188 | (strcmp ("order=reverse", opt) == 0)) 189 | return (cpuset_conf_set_order (conf, 1)); 190 | 191 | if ((strcmp ("order=normal", opt) == 0)) 192 | return (cpuset_conf_set_order (conf, 0)); 193 | 194 | return (log_err ("Unknown option \"%s\"\n", opt)); 195 | } 196 | 197 | int cpuset_conf_set_constrain_mem (cpuset_conf_t conf, int constrain_mem) 198 | { 199 | if (!conf) 200 | return (-1); 201 | conf->constrain_mems = constrain_mem; 202 | return (0); 203 | } 204 | 205 | int cpuset_conf_set_kill_orphans (cpuset_conf_t conf, int kill_orphans) 206 | { 207 | if (!conf) 208 | return (-1); 209 | conf->kill_orphans = kill_orphans; 210 | return (0); 211 | } 212 | 213 | int cpuset_conf_set_order (cpuset_conf_t conf, int reverse) 214 | { 215 | if (!conf) 216 | return (-1); 217 | conf->reverse_order = reverse; 218 | return (0); 219 | } 220 | 221 | 222 | /* 223 | * Create and Destroy: 224 | */ 225 | cpuset_conf_t cpuset_conf_create () 226 | { 227 | cpuset_conf_t conf = malloc (sizeof (*conf)); 228 | 229 | if (conf == NULL) 230 | return (NULL); 231 | 232 | memset (conf->filename, 0, sizeof (conf->filename)); 233 | conf->filename_valid = 0; 234 | 235 | /* 236 | * Set defaults 237 | */ 238 | conf->policy = BEST_FIT; 239 | conf->reverse_order = 0; 240 | conf->alloc_idle_nodes = 1; 241 | conf->use_idle_if_multiple = 1; 242 | conf->constrain_mems = 1; 243 | conf->kill_orphans = 0; 244 | 245 | return (conf); 246 | } 247 | 248 | void cpuset_conf_destroy (cpuset_conf_t conf) 249 | { 250 | if (conf) free (conf); 251 | } 252 | 253 | 254 | /* 255 | * Parsing 256 | */ 257 | 258 | static int parse_if_exists (cpuset_conf_t conf, const char *file) 259 | { 260 | if (access (file, F_OK) < 0) 261 | return (0); 262 | 263 | if (access (file, R_OK) < 0) { 264 | log_err ("File %s exists but is not readable.\n", file); 265 | return (-1); 266 | } 267 | 268 | if (cpuset_conf_parse (conf, file) < 0) 269 | return (-1); 270 | 271 | /* Successfully read config file */ 272 | return (0); 273 | } 274 | 275 | int cpuset_conf_parse_system (cpuset_conf_t conf) 276 | { 277 | return (parse_if_exists (conf, default_config)); 278 | } 279 | 280 | const char * cpuset_conf_file (cpuset_conf_t conf) 281 | { 282 | if (!conf->filename_valid) 283 | return (NULL); 284 | return (conf->filename); 285 | } 286 | 287 | void cpuset_conf_set_file (cpuset_conf_t conf, const char *file) 288 | { 289 | strncpy (conf->filename, file, sizeof (conf->filename)); 290 | conf->filename_valid = 1; 291 | } 292 | 293 | /* 294 | * Later, perhaps allow a per-user conf file in ~/.slurm/cpuset.conf... 295 | */ 296 | 297 | /* 298 | * vi: ts=4 sw=4 expandtab 299 | */ 300 | --------------------------------------------------------------------------------