├── examples ├── bin │ ├── poweroff │ └── reboot └── etc │ ├── fstab │ └── init ├── reap.c ├── Makefile ├── pivot.c ├── COPYING ├── stop.c ├── runfg.c ├── ueventwait ├── seal.c ├── syslogd ├── ueventd ├── landmask.c ├── uevent.c ├── syslog.c ├── README └── daemon.c /examples/bin/poweroff: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | kill -TERM 1 3 | -------------------------------------------------------------------------------- /examples/bin/reboot: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | kill -INT 1 3 | -------------------------------------------------------------------------------- /reap.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main(void) { 6 | signal(SIGCHLD, SIG_IGN); 7 | while (1) 8 | pause(); 9 | return EXIT_SUCCESS; 10 | } 11 | -------------------------------------------------------------------------------- /examples/etc/fstab: -------------------------------------------------------------------------------- 1 | PARTLABEL=root / auto discard,nodev 1 1 2 | PARTLABEL=boot /boot auto discard,nodev,noexec 1 1 3 | devtmpfs /dev devtmpfs mode=0755,noexec 0 0 4 | devpts /dev/pts devpts newinstance,noexec,ptmxmode=0666 0 0 5 | proc /proc proc nodev,noexec 0 0 6 | tmpfs /run tmpfs mode=0755,nodev,noexec 0 0 7 | sysfs /sys sysfs nodev,noexec 0 0 8 | cgroup2 /sys/fs/cgroup cgroup2 memory_recursiveprot,nodev,noexec,nsdelegate 0 0 9 | tmpfs /tmp tmpfs mode=1777,nodev,nosuid 0 0 10 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | BINDIR := $(PREFIX)/bin 2 | CFLAGS := -Os -Wall -Wfatal-errors 3 | 4 | SCRIPTS := syslogd ueventd ueventwait 5 | BINARIES := daemon landmask pivot reap runfg seal stop syslog uevent 6 | 7 | %:: %.c Makefile 8 | $(CC) $(CFLAGS) -o $@ $(filter %.c,$^) 9 | 10 | all: $(SCRIPTS) $(BINARIES) 11 | 12 | install: $(SCRIPTS) $(BINARIES) 13 | mkdir -p $(DESTDIR)$(BINDIR) 14 | install -s $(BINARIES) $(DESTDIR)$(BINDIR) 15 | install $(SCRIPTS) $(DESTDIR)$(BINDIR) 16 | 17 | clean: 18 | rm -f $(BINARIES) 19 | 20 | .PHONY: all install clean 21 | -------------------------------------------------------------------------------- /pivot.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | int main(int argc, char **argv) { 11 | if (argc < 2) { 12 | fprintf(stderr, "Usage: %s NEW-ROOT [PUT-OLD]\n", argv[0]); 13 | return 64; 14 | } 15 | 16 | if (argc > 2) { 17 | if (syscall(__NR_pivot_root, argv[1], argv[2]) < 0) 18 | err(EXIT_FAILURE, "cannot pivot to new root %s", argv[1]); 19 | return EXIT_SUCCESS; 20 | } 21 | 22 | if (chdir(argv[1]) < 0 || syscall(__NR_pivot_root, ".", ".") < 0) 23 | err(EXIT_FAILURE, "cannot pivot to new root %s", argv[1]); 24 | if (mount(NULL, ".", NULL, MS_SLAVE | MS_REC, NULL) < 0) 25 | err(EXIT_FAILURE, "cannot disable old root mount propagation"); 26 | if (umount2(".", MNT_DETACH) < 0) 27 | err(EXIT_FAILURE, "cannot detach old root"); 28 | 29 | return EXIT_SUCCESS; 30 | } 31 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | Copyright (C) 2006-2025 Chris Webb. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to 5 | deal in the Software without restriction, including without limitation the 6 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 | sell copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 | IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /stop.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | int main(int argc, char **argv) { 9 | if (argc == 2 && !strcmp(argv[1], "halt")) 10 | return reboot(RB_HALT_SYSTEM); 11 | else if (argc == 2 && !strcmp(argv[1], "kexec")) 12 | return reboot(RB_KEXEC); 13 | else if (argc == 2 && !strcmp(argv[1], "poweroff")) 14 | return fork() > 0 ? pause() : reboot(RB_POWER_OFF); 15 | else if (argc == 2 && !strcmp(argv[1], "reboot")) 16 | return reboot(RB_AUTOBOOT); 17 | else if (argc == 2 && !strcmp(argv[1], "suspend")) 18 | return reboot(RB_SW_SUSPEND); 19 | 20 | fprintf(stderr, "\ 21 | Usage: %s ACTION\n\ 22 | Actions:\n\ 23 | halt halt the machine\n\ 24 | kexec jump to a new kernel loaded for kexec\n\ 25 | poweroff switch off the machine\n\ 26 | reboot restart the machine\n\ 27 | suspend hibernate the machine to disk\n\ 28 | All actions are performed immediately without flushing buffers or a\n\ 29 | graceful shutdown. Data may be lost on unsynced mounted filesystems.\n\ 30 | ", argv[0]); 31 | return EX_USAGE; 32 | } 33 | -------------------------------------------------------------------------------- /runfg.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | int main(int argc, char **argv) { 10 | int result = EXIT_FAILURE, status; 11 | pid_t child, command; 12 | 13 | if (argc < 2) { 14 | fprintf(stderr, "Usage: %s CMD [ARG]...\n", argv[0]); 15 | return 64; 16 | } 17 | 18 | if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0) 19 | err(EXIT_FAILURE, "prctl PR_SET_CHILD_SUBREAPER"); 20 | 21 | switch (command = fork()) { 22 | case -1: 23 | err(EXIT_FAILURE, "fork"); 24 | case 0: 25 | execvp(argv[1], argv + 1); 26 | err(EXIT_FAILURE, "exec %s", argv[1]); 27 | } 28 | 29 | while (1) { 30 | child = waitpid(-1, &status, 0); 31 | if (child < 0) { 32 | if (errno == ECHILD) 33 | return result; 34 | if (errno != EINTR) 35 | err(EXIT_FAILURE, "waitpid"); 36 | } else if (child == command) { 37 | if (WIFEXITED(status)) 38 | result = WEXITSTATUS(status); 39 | if (WIFSIGNALED(status)) 40 | result = 128 + WTERMSIG(status); 41 | command = -1; 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /ueventwait: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | shopt -s extglob 4 | SYSFS=${SYSFS:-/sys} 5 | 6 | usage() { 7 | echo "Usage: ${0##*/} KEY=PATTERN..." >&2 8 | exit 64 9 | } 10 | 11 | if [[ $# -eq 0 ]]; then 12 | usage 13 | fi 14 | 15 | declare -A PATTERNS=( [ACTION]=add ) 16 | while [[ $# -gt 0 ]]; do 17 | if [[ $1 == +([^=])=* ]]; then 18 | PATTERNS[${1%%=*}]=${1#*=} 19 | else 20 | usage 21 | fi 22 | shift 23 | done 24 | 25 | exec < <(uevent -l 1 /dev/null" EXIT 27 | read -r READY 28 | 29 | if [[ add == ${PATTERNS[ACTION]} ]]; then 30 | while read -r UEVENT; do 31 | declare -A ENV=( [ACTION]=add ) 32 | 33 | while IFS='=' read -r KEY VALUE; do 34 | if [[ -n $KEY ]]; then 35 | ENV[$KEY]=$VALUE 36 | fi 37 | done 2>/dev/null < "$UEVENT" 38 | 39 | for KEY in "${!PATTERNS[@]}"; do 40 | if [[ ${ENV[$KEY]} != ${PATTERNS[$KEY]} ]]; then 41 | continue 2 42 | fi 43 | done 44 | 45 | echo "${UEVENT%/uevent}" 46 | exit 0 47 | done < <(find $SYSFS/devices -name uevent -type f) 48 | fi 49 | 50 | declare -A ENV=() 51 | while read -r KEY VALUE; do 52 | if [[ -n $KEY ]]; then 53 | ENV[$KEY]=$VALUE 54 | continue 55 | fi 56 | 57 | for KEY in "${!PATTERNS[@]}"; do 58 | if [[ ${ENV[$KEY]} != ${PATTERNS[$KEY]} ]]; then 59 | declare -A ENV=() 60 | continue 2 61 | fi 62 | done 63 | 64 | echo "$SYSFS${ENV[DEVPATH]}" 65 | exit 0 66 | done 67 | 68 | exit 1 69 | -------------------------------------------------------------------------------- /seal.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | const int seals = F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE; 13 | 14 | int main(int argc, char **argv, char **envp) { 15 | char *dir, *file, *path; 16 | int src, dst; 17 | ssize_t length; 18 | 19 | if (argc < 2) { 20 | fprintf(stderr, "Usage: %s PROG [ARG]...\n", argv[0]); 21 | return 64; 22 | } 23 | 24 | path = getenv("PATH"); 25 | src = -1; 26 | 27 | if (strchr(argv[1], '/')) { 28 | if (access(file = argv[1], X_OK) < 0) 29 | err(EXIT_FAILURE, "%s", file); 30 | if (src = open(file, O_RDONLY), src < 0) 31 | err(EXIT_FAILURE, "open %s", file); 32 | } 33 | 34 | while (src < 0 && path) { 35 | dir = strsep(&path, ":"); 36 | if (asprintf(&file, "%s%s%s", dir, *dir ? "/" : "", argv[1]) < 0) 37 | err(EXIT_FAILURE, "malloc"); 38 | if (access(file, X_OK) < 0) 39 | free(file); 40 | else if (src = open(file, O_RDONLY), src < 0) 41 | err(EXIT_FAILURE, "open %s", file); 42 | } 43 | 44 | if (src < 0) { 45 | errno = ENOENT; 46 | err(EXIT_FAILURE, "%s", argv[1]); 47 | } 48 | 49 | dst = memfd_create(file, MFD_CLOEXEC | MFD_ALLOW_SEALING); 50 | if (dst < 0) 51 | err(EXIT_FAILURE, "memfd_create"); 52 | 53 | while (length = sendfile(dst, src, NULL, BUFSIZ), length != 0) 54 | if (length < 0 && errno != EAGAIN && errno != EINTR) 55 | err(EXIT_FAILURE, "sendfile"); 56 | close(src); 57 | free(file); 58 | 59 | if (fcntl(dst, F_ADD_SEALS, seals) < 0) 60 | err(1, "fcntl F_ADD_SEALS"); 61 | fexecve(dst, argv + 1, envp); 62 | err(1, "fexecve"); 63 | } 64 | -------------------------------------------------------------------------------- /examples/etc/init: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o pipefail 4 | export PATH=/bin 5 | 6 | case "$1" in 7 | start | "") 8 | hostname example 9 | ctrlaltdel soft 10 | 11 | mount -a -t proc 12 | mount -a -m -t cgroup2,devpts,devtmpfs,sysfs,tmpfs 13 | mount -m -r -B /run/empty /run/empty 14 | mkdir -m 1777 -p /run/lock /run/shm 15 | 16 | ln -f -n -s pts/ptmx /dev/ptmx 17 | ln -f -n -s /proc/self/fd /dev/fd 18 | ln -f -n -s fd/0 /dev/stdin 19 | ln -f -n -s fd/1 /dev/stdout 20 | ln -f -n -s fd/2 /dev/stderr 21 | 22 | if ZRAM=$(zramctl -f -s 1G); then 23 | mkswap $ZRAM >/dev/null 24 | swapon -d $ZRAM 25 | fi 26 | 27 | mount -o remount,ro / 28 | fsck -a -A -P -T >/dev/null 29 | if [[ $? -le 1 ]]; then 30 | mount -o remount,rw / 31 | mount -a -t nonfs 32 | fi 33 | 34 | dmesg --console-off 35 | while read TTY _; do 36 | daemon -c -r agetty $TTY 37 | done < /proc/consoles 38 | 39 | ip link set lo up 40 | ip link set eth0 up 41 | ip address add 192.168.1.2/24 dev eth0 42 | ip route add default via 192.168.1.1 43 | 44 | syslogd -k 45 | ssh-keygen -A && $(type -P sshd) 46 | 47 | daemon "$0" watchdog 48 | exec "$0" reap 49 | ;; 50 | 51 | stop) 52 | kill -TERM -1 && sleep 2 && kill -KILL -1 53 | echo && dmesg --console-on 54 | 55 | if swapoff -a && umount -a -r; then 56 | echo "Remounted filesystems read-only" 57 | elif sync; then 58 | echo "Flushed filesystem writes" 59 | fi 60 | 61 | exec stop "${@:2}" 62 | ;; 63 | 64 | watchdog) 65 | if [[ -c /dev/watchdog ]]; then 66 | exec <><(:) >/dev/watchdog && wait 67 | trap 'printf V && exit' TERM 68 | while printf '\0'; do read -t 15; done 69 | fi 70 | ;; 71 | 72 | *) 73 | trap 'exec "$0" reap 3>&-' HUP 74 | trap 'exec "$0" stop reboot 3>&-' INT 75 | trap 'exec "$0" stop poweroff 3>&-' TERM 76 | exec 3<><(:) && while true; do read -u 3; done 77 | ;; 78 | esac 79 | -------------------------------------------------------------------------------- /syslogd: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CONFFILE=/etc/syslogd.conf 4 | LOGDIR=/var/log 5 | OPTIONS=() 6 | PIDFILE=/run/syslogd.pid 7 | RESTART=0 8 | SYNC=0 9 | 10 | classify() { 11 | if (( LEVEL > 6 )); then 12 | FACILITY=debug 13 | elif [[ $FACILITY == auth* ]]; then 14 | FACILITY=auth 15 | elif [[ $FACILITY != @(kern|mail) ]]; then 16 | FACILITY=daemon 17 | fi 18 | } 19 | 20 | log() { 21 | if [[ ! -d $LOGDIR/$FACILITY ]]; then 22 | [[ $FACILITY == debug ]] && return 23 | mkdir -m 0700 -p "$LOGDIR/$FACILITY" 24 | fi 25 | printf '%s %s\n' "$TIME" "$ENTRY" >>"$LOGDIR/$FACILITY/$DATE" 26 | (( SYNC )) && sync "$LOGDIR/$FACILITY/$DATE" 27 | } 28 | 29 | usage() { 30 | cat >&2 <%s\n' $((FACILITY + LEVEL)) "$ENTRY" >>/dev/kmsg 58 | } 59 | OPTIONS+=('-n') 60 | ;; 61 | p) 62 | PIDFILE=$OPTARG 63 | ;; 64 | s) 65 | SYNC=1 66 | ;; 67 | *) 68 | usage 69 | ;; 70 | esac 71 | done 72 | 73 | (( OPTIND <= $# )) && usage 74 | 75 | if [[ -f $CONFFILE ]] && ! source "$CONFFILE"; then 76 | exit 1 77 | fi 78 | 79 | if ! { exec 3>>"$PIDFILE" && flock -n 3; } 2>/dev/null; then 80 | echo "Failed to lock $PIDFILE; is ${0##*/} already running?" >&2 81 | exit 1 82 | fi 83 | 84 | if read -a STAT &- && daemon -- "$0" "$@"; then 86 | if timeout 2 daemon -w /dev/log; then 87 | exit 0 88 | fi 89 | echo "Background syslogd failed to start" >&2 90 | else 91 | rm -f "$PIDFILE" 92 | fi 93 | exit 1 94 | fi 95 | 96 | echo $$ >"$PIDFILE" 97 | trap 'trap "" TERM && kill -TERM 0 && rm -f "$PIDFILE"' EXIT 98 | trap 'exec -- "$0" "$@"' HUP 99 | 100 | if [[ ! -p /dev/stdin ]]; then 101 | exec < <(syslog "${OPTIONS[@]}" 3>&-) 102 | fi 103 | 104 | while read -r PEERPID PEERUID PEERGID FACILITY LEVEL DATE TIME ENTRY; do 105 | classify && log 106 | done 107 | -------------------------------------------------------------------------------- /ueventd: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BROADCAST=0 4 | CONFFILE=/etc/ueventd.conf 5 | PIDFILE=/run/ueventd.pid 6 | RESTART=0 7 | SYSFS=${SYSFS:-/sys} 8 | TRIGGER=0 9 | 10 | usage() { 11 | cat >&2 <>"$PIDFILE" && flock -n 3; } 2>/dev/null; then 60 | echo "Failed to lock $PIDFILE; is ${0##*/} already running?" >&2 61 | exit 1 62 | fi 63 | 64 | if read -a STAT "$PIDFILE" 71 | trap 'trap "" TERM && kill -TERM 0 && rm -f "$PIDFILE"' EXIT 72 | trap 'exec -- "$0" "$@"' HUP 73 | 74 | if [[ ! -p /dev/stdin ]]; then 75 | exec < <(uevent -l 1 &-) 76 | read -r READY 77 | fi 78 | 79 | if (( BROADCAST )) && [[ ! -p /dev/stdout ]]; then 80 | exec > >(uevent -b $BROADCAST >/dev/null 3>&-) 81 | fi 82 | 83 | if (( TRIGGER )); then 84 | find $SYSFS/{module,bus,devices} -name uevent -type f \ 85 | | while read UEVENT; do echo change >"$UEVENT"; done 86 | for (( INDEX = 1; INDEX <= $#; INDEX++ )); do 87 | [[ ${!INDEX} == "-t" ]] && set -- "${@:1:INDEX - 1}" "${@:INDEX + 1}" 88 | done 89 | fi 90 | 91 | declare -A ENV=() 92 | while read -r KEY VALUE; do 93 | if [[ -n $KEY ]]; then 94 | ENV[$KEY]=$VALUE 95 | continue 96 | fi 97 | 98 | unset ACTION DEVNAME DEVPATH DRIVER INTERFACE KEY SUBSYSTEM SYSPATH VALUE 99 | [[ -v ENV[ACTION] ]] && ACTION=${ENV[ACTION]} 100 | [[ -v ENV[DEVNAME] ]] && DEVNAME=${ENV[DEVNAME]} 101 | [[ -v ENV[DEVPATH] ]] && DEVPATH=${ENV[DEVPATH]} 102 | [[ -v ENV[DRIVER] ]] && DRIVER=${ENV[DRIVER]} 103 | [[ -v ENV[INTERFACE] ]] && DRIVER=${ENV[INTERFACE]} 104 | [[ -v ENV[SUBSYSTEM] ]] && SUBSYSTEM=${ENV[SUBSYSTEM]} 105 | [[ -v ENV[DEVPATH] ]] && SYSPATH=$SYSFS${ENV[DEVPATH]} 106 | 107 | case "$ACTION" in 108 | add | change | move | offline | online | overflow | remove) 109 | event "$ACTION" "$DEVPATH" && "$ACTION" "$DEVPATH" || unset ENV 110 | ;; 111 | *) 112 | event "$ACTION" "$DEVPATH" || unset ENV 113 | ;; 114 | esac /dev/null 3>&- 115 | 116 | if (( BROADCAST && ${#ENV[@]} )); then 117 | for KEY in "${!ENV[@]}"; do 118 | printf '%s %s\n' "$KEY" "${ENV[$KEY]}" 119 | done 120 | printf '\n' 121 | fi 122 | 123 | declare -A ENV=() 124 | done 125 | -------------------------------------------------------------------------------- /landmask.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | const uint64_t fs_file = 0 14 | | LANDLOCK_ACCESS_FS_EXECUTE 15 | | LANDLOCK_ACCESS_FS_READ_FILE 16 | | LANDLOCK_ACCESS_FS_WRITE_FILE 17 | | LANDLOCK_ACCESS_FS_TRUNCATE; 18 | 19 | const uint64_t fs_read = 0 20 | | LANDLOCK_ACCESS_FS_EXECUTE 21 | | LANDLOCK_ACCESS_FS_READ_FILE 22 | | LANDLOCK_ACCESS_FS_READ_DIR; 23 | 24 | const uint64_t fs_write = 0 25 | | LANDLOCK_ACCESS_FS_WRITE_FILE 26 | | LANDLOCK_ACCESS_FS_TRUNCATE 27 | | LANDLOCK_ACCESS_FS_REMOVE_DIR 28 | | LANDLOCK_ACCESS_FS_REMOVE_FILE 29 | | LANDLOCK_ACCESS_FS_MAKE_CHAR 30 | | LANDLOCK_ACCESS_FS_MAKE_DIR 31 | | LANDLOCK_ACCESS_FS_MAKE_REG 32 | | LANDLOCK_ACCESS_FS_MAKE_SOCK 33 | | LANDLOCK_ACCESS_FS_MAKE_FIFO 34 | | LANDLOCK_ACCESS_FS_MAKE_BLOCK 35 | | LANDLOCK_ACCESS_FS_MAKE_SYM 36 | | LANDLOCK_ACCESS_FS_REFER; 37 | 38 | static void allowpath(int ruleset, const char *path, char access) { 39 | struct landlock_path_beneath_attr attr = { 0 }; 40 | struct stat status; 41 | 42 | if ((attr.parent_fd = open(path, O_PATH | O_NOFOLLOW | O_CLOEXEC)) < 0) 43 | err(EXIT_FAILURE, "%s", path); 44 | if (fstat(attr.parent_fd, &status) < 0) 45 | err(EXIT_FAILURE, "fstat"); 46 | 47 | attr.allowed_access = fs_read; 48 | if (access == 'w') 49 | attr.allowed_access |= fs_write; 50 | if (!S_ISDIR(status.st_mode)) 51 | attr.allowed_access &= fs_file; 52 | 53 | if (syscall(__NR_landlock_add_rule, ruleset, 54 | LANDLOCK_RULE_PATH_BENEATH, &attr, 0) < 0) 55 | err(EXIT_FAILURE, "landlock_add_rule"); 56 | 57 | close(attr.parent_fd); 58 | } 59 | 60 | static void allowport(int ruleset, const char *port, char access) { 61 | struct landlock_net_port_attr attr = { 0 }; 62 | char *trailing; 63 | 64 | if (access == 't') 65 | attr.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP; 66 | if (access == 'T') 67 | attr.allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP; 68 | 69 | attr.port = strtoul(port, &trailing, 10); 70 | if (*port == 0 || *trailing != 0 || attr.port > 65535) 71 | errx(EXIT_FAILURE, "%s: Invalid port number", port); 72 | 73 | if (syscall(__NR_landlock_add_rule, ruleset, 74 | LANDLOCK_RULE_NET_PORT, &attr, 0) < 0) 75 | err(EXIT_FAILURE, "landlock_add_rule"); 76 | } 77 | 78 | static int usage(char *progname) { 79 | fprintf(stderr, "\ 80 | Usage: %s [OPTIONS] CMD [ARG]...\n\ 81 | Options:\n\ 82 | -d DIR change directory to DIR before running CMD\n\ 83 | -r PATH allow CMD read-only access to PATH\n\ 84 | -w PATH allow CMD read-write access to PATH\n\ 85 | -t PORT allow CMD to listen on TCP port PORT\n\ 86 | -T PORT allow CMD to connect to TCP port PORT\n\ 87 | ", progname); 88 | return 64; 89 | } 90 | 91 | int main(int argc, char **argv) { 92 | int option, ruleset; 93 | char *dir = NULL; 94 | 95 | if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) 96 | err(EXIT_FAILURE, "prctl PR_SET_NO_NEW_PRIVS"); 97 | 98 | if ((ruleset = syscall(__NR_landlock_create_ruleset, 99 | &(struct landlock_ruleset_attr) { 100 | .handled_access_fs = fs_read | fs_write, 101 | .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP 102 | | LANDLOCK_ACCESS_NET_CONNECT_TCP, 103 | .scoped = LANDLOCK_SCOPE_SIGNAL 104 | | LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET 105 | }, sizeof(struct landlock_ruleset_attr), 0)) < 0) 106 | err(EXIT_FAILURE, "landlock_create_ruleset"); 107 | 108 | while ((option = getopt(argc, argv, ":d:r:w:t:T:")) > 0) 109 | switch (option) { 110 | case 'd': 111 | dir = optarg; 112 | break; 113 | case 'r': 114 | case 'w': 115 | allowpath(ruleset, optarg, option); 116 | break; 117 | case 't': 118 | case 'T': 119 | allowport(ruleset, optarg, option); 120 | break; 121 | default: 122 | return usage(argv[0]); 123 | } 124 | 125 | if (optind >= argc) 126 | return usage(argv[0]); 127 | 128 | if (syscall(__NR_landlock_restrict_self, ruleset, 0) < 0) 129 | err(EXIT_FAILURE, "landlock_restrict_self"); 130 | close(ruleset); 131 | 132 | if (dir && chdir(dir) < 0) 133 | err(EXIT_FAILURE, "%s", dir); 134 | execvp(argv[optind], argv + optind); 135 | err(EXIT_FAILURE, "execvp"); 136 | } 137 | -------------------------------------------------------------------------------- /uevent.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #define BUFFER 4096 13 | 14 | static struct sockaddr_nl netlink = { .nl_family = AF_NETLINK }; 15 | 16 | static int broadcast(void) { 17 | char *action = NULL, *devpath = NULL, *event = NULL, *line = NULL; 18 | size_t length = 0, linesize = 0, size = 0; 19 | int sock, socksize = 1 << 21; 20 | ssize_t count; 21 | 22 | if ((sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT)) < 0) 23 | err(EXIT_FAILURE, "socket"); 24 | 25 | setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &socksize, sizeof(int)); 26 | setsockopt(sock, SOL_SOCKET, SO_SNDBUFFORCE, &socksize, sizeof(int)); 27 | 28 | if (connect(sock, (struct sockaddr *) &netlink, sizeof(netlink)) < 0) 29 | err(EXIT_FAILURE, "connect"); 30 | 31 | while ((count = getline(&line, &linesize, stdin)) > 0 || length) { 32 | if (line && count > 0 && line[count - 1] == '\n') 33 | count--; 34 | 35 | if (line && count > 0) { 36 | /* Append property line and null-terminate it. */ 37 | while (size < length + count + 1) 38 | if (event = realloc(event, size += 4096), event == NULL) 39 | err(EXIT_FAILURE, "realloc"); 40 | memcpy(event + length, line, count); 41 | event[length + count] = 0; 42 | 43 | /* Support both KEY VALUE and KEY=VALUE input lines. */ 44 | for (size_t i = length; i < length + count; i++) 45 | if (event[i] == ' ' || event[i] == '=') { 46 | event[i] = '='; 47 | break; 48 | } 49 | 50 | /* Keep track of ACTION and DEVPATH for constructing header. */ 51 | if (strncmp(event + length, "ACTION=", strlen("ACTION=")) == 0) 52 | action = event + length + strlen("ACTION="); 53 | if (strncmp(event + length, "DEVPATH=", strlen("DEVPATH=")) == 0) 54 | devpath = event + length + strlen("DEVPATH="); 55 | 56 | length += count + 1; 57 | continue; 58 | } 59 | 60 | if (action && devpath) { 61 | /* Make space for the ACTION@DEVPATH header then prepend it. */ 62 | count = 2 + strlen(action) + strlen(devpath); 63 | 64 | while (size < length + count) 65 | if (event = realloc(event, size += 4096), event == NULL) 66 | err(EXIT_FAILURE, "realloc"); 67 | 68 | memmove(event + count, event, length); 69 | snprintf(event, count, "%s@%s", action + count, devpath + count); 70 | 71 | /* Attempt to broadcast uevent but tolerate failures. */ 72 | while (send(sock, event, length + count, 0) < 0) 73 | if (errno != EAGAIN && errno != EINTR) 74 | break; 75 | } 76 | action = devpath = NULL, length = 0; 77 | } 78 | 79 | free(line); 80 | close(sock); 81 | return ferror(stdin) ? EXIT_FAILURE : EXIT_SUCCESS; 82 | } 83 | 84 | void usage(char *progname) { 85 | fprintf(stderr, "\ 86 | Usage:\n\ 87 | %1$s -l GROUPS listen for uevents, printing them to stdout\n\ 88 | %1$s -b GROUPS read uevents from stdin and broadcast them\n\ 89 | ", progname); 90 | exit(EX_USAGE); 91 | } 92 | 93 | int main(int argc, char **argv) { 94 | char buffer[BUFFER + 1], *cursor, *separator; 95 | int sock, socksize = 1 << 21; 96 | ssize_t length; 97 | 98 | if (argc == 3 && strcmp(argv[1], "-b") == 0) { 99 | netlink.nl_groups = strtoul(argv[2], NULL, 0); 100 | if (netlink.nl_groups == 0) 101 | errx(EXIT_FAILURE, "Invalid netlink group mask: %s", argv[2]); 102 | return broadcast(); 103 | } 104 | 105 | if (argc == 3 && strcmp(argv[1], "-l") == 0) { 106 | netlink.nl_groups = strtoul(argv[2], NULL, 0); 107 | if (netlink.nl_groups == 0) 108 | errx(EXIT_FAILURE, "Invalid netlink group mask: %s", argv[2]); 109 | } else { 110 | usage(argv[0]); 111 | } 112 | 113 | if ((sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT)) < 0) 114 | err(EXIT_FAILURE, "socket"); 115 | 116 | setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &socksize, sizeof(int)); 117 | setsockopt(sock, SOL_SOCKET, SO_RCVBUFFORCE, &socksize, sizeof(int)); 118 | 119 | if (bind(sock, (struct sockaddr *) &netlink, sizeof(netlink)) < 0) 120 | err(EXIT_FAILURE, "bind"); 121 | 122 | putchar('\n'); 123 | fflush(stdout); 124 | 125 | while (1) { 126 | if ((length = recv(sock, &buffer, sizeof(buffer) - 1, 0)) < 0) { 127 | if (errno == ENOBUFS) { 128 | printf("ACTION overflow\n\n"); 129 | fflush(stdout); 130 | } else if (errno != EAGAIN && errno != EINTR) { 131 | err(EXIT_FAILURE, "recv"); 132 | } 133 | continue; 134 | } 135 | 136 | /* Null-terminate the uevent and replace stray newlines with spaces. */ 137 | buffer[length] = 0; 138 | for (cursor = buffer; cursor < buffer + length; cursor++) 139 | if (*cursor == '\n') 140 | *cursor = ' '; 141 | 142 | if (strlen(buffer) >= length - 1) { 143 | /* No properties; fake a simple environment based on the header. */ 144 | if ((cursor = strchr(buffer, '@'))) { 145 | *cursor++ = 0; 146 | printf("ACTION %s\n", buffer); 147 | printf("DEVPATH %s\n", cursor); 148 | } 149 | } else { 150 | /* Ignore header as properties will include ACTION and DEVPATH. */ 151 | cursor = buffer; 152 | while (cursor += strlen(cursor) + 1, cursor < buffer + length) { 153 | if ((separator = strchr(cursor, '='))) 154 | *separator = ' '; 155 | puts(cursor); 156 | } 157 | } 158 | putchar('\n'); 159 | fflush(stdout); 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /syslog.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #define SYSLOG_NAMES 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #define BUFFER 65536 21 | 22 | #ifndef UTCLOG 23 | /* syslog(3) time stamps are UTC from musl and local time from glibc. */ 24 | #ifdef __GLIBC__ 25 | #define UTCLOG 0 26 | #else 27 | #define UTCLOG 1 28 | #endif 29 | #endif 30 | 31 | static char buffer[BUFFER + 1], *zone; 32 | static int boot = 0, numeric = 0; 33 | 34 | static int syslog_date(char *line, struct tm *date) { 35 | char *cursor; 36 | time_t now, offset; 37 | 38 | time(&now); 39 | (UTCLOG ? gmtime_r : localtime_r)(&now, date); 40 | if ((cursor = strptime(line, " %b %d %H:%M:%S ", date))) { 41 | /* Pick tm_year so the timestamp is closest to now. */ 42 | offset = now - (UTCLOG ? timegm : mktime)(date); 43 | date->tm_year += (offset - 15778800) / 31557600; 44 | now = (UTCLOG ? timegm : mktime)(date); 45 | } 46 | 47 | (zone && zone[0] ? localtime_r : gmtime_r)(&now, date); 48 | return cursor ? cursor - line : 0; 49 | } 50 | 51 | static char *syslog_facility(int priority) { 52 | for (size_t i = 0; facilitynames[i].c_val >= 0; i++) 53 | if (facilitynames[i].c_val == (priority & LOG_FACMASK)) 54 | return facilitynames[i].c_name; 55 | return "unknown"; /* facility not found in facilitynames[] */ 56 | } 57 | 58 | static int syslog_priority(char *line, int *priority) { 59 | int start = 0; 60 | 61 | sscanf(line, "<%d>%n", priority, &start); 62 | return start; 63 | } 64 | 65 | int syslog_open(void) { 66 | int fd; 67 | struct sockaddr_un addr = { 68 | .sun_family = AF_UNIX, 69 | .sun_path = "/dev/log" 70 | }; 71 | 72 | if ((fd = socket(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0)) < 0) 73 | err(EXIT_FAILURE, "socket"); 74 | 75 | unlink(addr.sun_path); 76 | umask(0111); /* Syslog socket should be writeable by everyone. */ 77 | if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) 78 | err(EXIT_FAILURE, "bind %s", addr.sun_path); 79 | 80 | if (setsockopt(fd, SOL_SOCKET, SO_PASSCRED, &(int) { 1 }, 81 | sizeof(int)) < 0) 82 | err(EXIT_FAILURE, "setsockopt SO_PASSCRED %s", addr.sun_path); 83 | return fd; 84 | } 85 | 86 | static void syslog_recv(int fd) { 87 | char *cursor; 88 | int length, priority; 89 | struct iovec block; 90 | struct msghdr header; 91 | struct tm date; 92 | struct ucred id; 93 | union { 94 | struct cmsghdr hdr; 95 | char data[CMSG_SPACE(sizeof(struct ucred))]; 96 | } cmsg; 97 | 98 | block.iov_base = buffer; 99 | block.iov_len = sizeof(buffer) - 1; 100 | header.msg_name = NULL; 101 | header.msg_namelen = 0; 102 | header.msg_iov = █ 103 | header.msg_iovlen = 1; 104 | header.msg_control = &cmsg; 105 | header.msg_controllen = sizeof(cmsg); 106 | header.msg_flags = 0; 107 | 108 | if ((length = recvmsg(fd, &header, 0)) <= 0) 109 | return; 110 | 111 | id.pid = id.uid = id.gid = 0; 112 | if (cmsg.hdr.cmsg_level == SOL_SOCKET) 113 | if (cmsg.hdr.cmsg_type == SCM_CREDENTIALS) 114 | memcpy(&id, CMSG_DATA(&cmsg.hdr), sizeof(struct ucred)); 115 | 116 | for (cursor = buffer; cursor < buffer + length; cursor++) 117 | if (*cursor == 0 || *cursor == '\n') 118 | *cursor = 0; 119 | else if ((*cursor < 32 && *cursor != '\t') || *cursor == 127) 120 | *cursor = ' '; 121 | buffer[length] = 0; 122 | 123 | cursor = buffer; 124 | priority = LOG_DAEMON | LOG_NOTICE; 125 | 126 | while (cursor < buffer + length) { 127 | cursor += syslog_priority(cursor, &priority); 128 | cursor += syslog_date(cursor, &date); 129 | 130 | if (*cursor) { 131 | printf("%u %u %u", id.pid, id.uid, id.gid); 132 | if (numeric) 133 | printf(" %u", priority & LOG_FACMASK); 134 | else 135 | printf(" %s", syslog_facility(priority)); 136 | printf(" %u %04u-%02u-%02u %02u:%02u:%02u", priority & LOG_PRIMASK, 137 | date.tm_year + 1900, date.tm_mon + 1, date.tm_mday, date.tm_hour, 138 | date.tm_min, date.tm_sec); 139 | if (zone && zone[0]) 140 | printf("%c%02u%02u", date.tm_gmtoff < 0 ? '-' : '+', 141 | abs((int) date.tm_gmtoff) / 3600, 142 | abs((int) date.tm_gmtoff) / 60 % 60); 143 | printf(" %s\n", cursor); 144 | } 145 | cursor += strlen(cursor) + 1; 146 | } 147 | fflush(stdout); 148 | } 149 | 150 | static void kernel_read(int fd) { 151 | char *cursor; 152 | int length, priority; 153 | struct tm date; 154 | time_t now; 155 | 156 | if ((length = read(fd, buffer, sizeof(buffer) - 1)) <= 0) 157 | return; 158 | 159 | time(&now); 160 | (zone && zone[0] ? localtime_r : gmtime_r)(&now, &date); 161 | 162 | for (cursor = buffer; cursor < buffer + length; cursor++) 163 | if (*cursor == 0 || *cursor == '\n') 164 | *cursor = 0; 165 | else if ((*cursor < 32 && *cursor != '\t') || *cursor == 127) 166 | *cursor = ' '; 167 | buffer[length] = 0; 168 | 169 | priority = strtoul(buffer, &cursor, 10); 170 | if (cursor == buffer) 171 | priority = LOG_KERN | LOG_NOTICE; 172 | 173 | cursor = strchr(buffer, ';'); 174 | cursor = cursor ? cursor + 1 : buffer; 175 | 176 | if (*cursor) { 177 | if (numeric) 178 | printf("0 0 0 %u", priority & LOG_FACMASK); 179 | else 180 | printf("0 0 0 %s", syslog_facility(priority)); 181 | printf(" %u %04u-%02u-%02u %02u:%02u:%02u", priority & LOG_PRIMASK, 182 | date.tm_year + 1900, date.tm_mon + 1, date.tm_mday, date.tm_hour, 183 | date.tm_min, date.tm_sec); 184 | if (zone && zone[0]) 185 | printf("%c%02u%02u", date.tm_gmtoff < 0 ? '-' : '+', 186 | abs((int) date.tm_gmtoff) / 3600, 187 | abs((int) date.tm_gmtoff) / 60 % 60); 188 | printf(" %s\n", cursor); 189 | fflush(stdout); 190 | } 191 | } 192 | 193 | void usage(char *progname) { 194 | fprintf(stderr, "\ 195 | Usage: %s [OPTIONS]\n\ 196 | Options:\n\ 197 | -b include old messages from the kernel ring buffer\n\ 198 | -n print facility numbers instead of names\n\ 199 | ", progname); 200 | exit(EX_USAGE); 201 | } 202 | 203 | int main(int argc, char **argv) { 204 | struct pollfd fds[2]; 205 | int option; 206 | 207 | while ((option = getopt(argc, argv, ":bn")) > 0) 208 | switch (option) { 209 | case 'b': 210 | boot = 1; 211 | break; 212 | case 'n': 213 | numeric = 1; 214 | break; 215 | default: 216 | usage(argv[0]); 217 | } 218 | 219 | if (argc > optind) 220 | usage(argv[0]); 221 | 222 | if ((fds[0].fd = open("/dev/kmsg", O_RDONLY | O_NONBLOCK)) < 0) 223 | err(EXIT_FAILURE, "open /dev/kmsg"); 224 | lseek(fds[0].fd, 0, boot ? SEEK_SET : SEEK_END); 225 | fds[1].fd = syslog_open(); 226 | 227 | zone = getenv("TZ"); 228 | 229 | fds[0].events = fds[1].events = POLLIN; 230 | while(1) { 231 | while (poll(fds, 2, -1) < 0) 232 | if (errno != EAGAIN && errno != EINTR) 233 | err(EXIT_FAILURE, "poll"); 234 | if (fds[0].revents & POLLIN) 235 | kernel_read(fds[0].fd); 236 | if (fds[1].revents & POLLIN) 237 | syslog_recv(fds[1].fd); 238 | } 239 | } 240 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Arachsys init 2 | ============= 3 | 4 | This is the lightweight BSD-style init and syslog system used in Arachsys 5 | Linux. It includes a number of small utilities, described below. 6 | 7 | 8 | daemon 9 | ------ 10 | 11 | FreeBSD has included daemon(8) since 5.0-RELEASE in early 2003. This is a 12 | Linux-specific reimplementation which supports the same options as the 13 | FreeBSD version, together with additional features to make it a useful 14 | building-block for simple dependency-based parallel execution during system 15 | boot. 16 | 17 | Its basic purpose is to detach from the controlling terminal and execute a 18 | specified command as a background daemon. In common with the original, it 19 | has options to change directory before starting, to lock, write and remove 20 | a pidfile on behalf of the command, to restart the command when it exits, 21 | and to drop privileges to a different user and group before execution. 22 | 23 | This version can also start a logger process to send output to syslog and 24 | uses inotify to implement simple dependencies, waiting for specified 25 | filesystem paths to be created before starting the command. (Typically this 26 | is used with pidfiles or unix sockets in /run.) 27 | 28 | A simple subset of traditional inetd or tcpserver functionality is also 29 | available: daemon can listen on TCP or unix stream sockets and run the 30 | specified command as a handler for each inbound connection. 31 | 32 | Note that the daemon process is intentionally run as a session and process 33 | group leader. On Linux, a session leader without a controlling terminal can 34 | acquire one just by opening a terminal device. Pass the -f flag to disable 35 | this behaviour: daemon will fork twice so it no longer leads the session. 36 | 37 | 38 | init and reap 39 | ------------- 40 | 41 | Previous versions of this collection provided a minimal /bin/init, which 42 | launched an /etc/rc.startup script at boot, reaped orphans while waiting for 43 | a signal to shut down, then ran an /etc/rc.shutdown script to gracefully 44 | terminate the system. Finally, /bin/init would call reboot() to halt, reboot 45 | or power-off depending on the signal that was sent. 46 | 47 | However, competent shells will always reap adopted children, so this was 48 | unnecessarily complicated. It is sufficient to make /etc/init an executable 49 | script which starts the system exactly as /etc/rc.startup did, sleeps 50 | awaiting a signal to shutdown, then cleanly terminates the system like 51 | /etc/rc.startup, finally executing the stop utility below to reboot the 52 | kernel. 53 | 54 | Like the old /bin/init, an /etc/init script could sleep awaiting a signal, 55 | or for a more flexible interface, block reading commands from a /dev/initctl 56 | named pipe. 57 | 58 | A demonstration /etc/init is included in the examples/ subdirectory, along 59 | with an example /etc/fstab showing the required pseudo-filesystems and 60 | one-line scripts to trigger poweroff and reboot actions. 61 | 62 | Sometimes a completely null init can be useful, such as for PID 1 in a PID 63 | namespace. The reap utility is intended to fill this role: it does nothing 64 | except explicitly ignore SIGCHLD to discard the exit status of adopted 65 | children and prevent them from becoming zombies. You could also exec it at 66 | the end of an /etc/init script if you'd prefer to avoid a long-running 67 | shell process as system init. 68 | 69 | 70 | landmask 71 | -------- 72 | 73 | Landlock is a stackable Linux Security Module which allows processes 74 | to restrict their own access to the filesystem and TCP ports. These 75 | restrictions will also be enforced on all subsequently created children, 76 | and there is no way to remove or loosen the security policy once applied. 77 | 78 | landmask uses this facility to sandbox another command, masking its access 79 | to the filesystem and TCP networking with exceptions specified by command 80 | line options. 81 | 82 | The command can read paths specified with -r PATH, and both read and 83 | write paths specified with -w PATH. If PATH is a directory, the policy 84 | also applies to all paths beneath it. Any path that doesn't match one of 85 | these read-only or read-write rules is denied by default. 86 | 87 | In addition, the command can only listen on TCP ports given with -t PORT, 88 | and can only make outbound TCP connections to ports given with -T PORT. 89 | It cannot send signals to processes outside the sandbox, and cannot access 90 | abstract AF_UNIX sockets. 91 | 92 | 93 | pivot 94 | ----- 95 | 96 | This is a replacement for pivot_root from util-linux. Run with two 97 | arguments as 98 | 99 | pivot NEW-ROOT PUT-OLD 100 | 101 | it simply makes a pivot_root() syscall to move the root filesystem of the 102 | current mount namespace to the directory PUT-OLD and make NEW-ROOT the new 103 | root filesystem. 104 | 105 | However, unlike util-linux pivot_root, it can also be run with a single 106 | argument NEW-ROOT, omitting PUT-OLD. In this case, it uses a pivot_root() 107 | call to stack the old and new root filesystems on the same mount point, 108 | then completely detaches the old root filesystem before returning. 109 | 110 | Performing the detach operation atomically in a single command is helpful 111 | when constructing secure containers from a script. It eliminates the need 112 | to trust the umount binary within the container. 113 | 114 | Despite the extra functionality, pivot is smaller than util-linux pivot_root 115 | and doesn't defile /bin with an ugly command name containing an underscore. 116 | 117 | 118 | runfg 119 | ----- 120 | 121 | An anti-backgrounding wrapper in the style of Dan Bernstein's fghack, 122 | this uses the Linux-specific PR_SET_CHILD_SUBREAPER prctl to capture 123 | all descendants of the command it runs. It waits for them to exit before 124 | returning the exit status of the original command. Unlike fghack, it does 125 | not rely on unexpected file descriptors being left open, but as a subreaper 126 | it unavoidably adopts pre-existing children as well as the one it spawns. 127 | 128 | 129 | seal 130 | ---- 131 | 132 | Linux treats /proc/self/exe and /proc/PID/exe in a strange magic way. 133 | Although stat() sees a symlink to the absolute path of the binary, open() 134 | accesses the binary itself whether or not the symlink can be resolved in 135 | the filesystem namespace of the opening process. 136 | 137 | Sometimes when sandboxing processes, this can leak a path to a host binary 138 | from inside an otherwise isolated container. For example, this led to the 139 | CVE-2019-5736 vulnerability in runC 'privileged containers'. 140 | 141 | One robust defence against this is to exec such processes from a sealed 142 | memfd rather than directly from the host filesystem. The seal utility 143 | provides an easy way to do this for an arbitrary program. Invoked as 144 | 145 | seal PROG [ARG]... 146 | 147 | it locates PROG on the PATH, clones it to a new sealed memfd, then executes 148 | the memfd with the given arguments using fexecve(). 149 | 150 | The behaviour of the shell and execvp/execlp is mirrored as closely as 151 | possible: PROG must be executable and program names containing '/' 152 | characters are assumed to be a full pathname, bypassing PATH. 153 | 154 | 155 | stop 156 | ---- 157 | 158 | Since a shell script cannot directly perform the final reboot() system call 159 | at the end of shutdown, the stop utility is provided to do this. This 160 | expects a single argument of 'halt', 'kexec', poweroff', 'reboot' or 161 | 'shutdown' to indicate the type of reboot() call required. Run without an 162 | action argument, stop will list the available actions together with a 163 | warning about its lack of gracefulness. 164 | 165 | 166 | syslog and syslogd 167 | ------------------ 168 | 169 | This little system logger daemon takes a different approach to its 170 | mainstream competitors, more in keeping with the Unix 'toolkit' philosophy. 171 | 172 | syslog reads messages as they arrive at /dev/log and /dev/kmsg, printing 173 | them to stdout in a format chosen for ease of handling in a shell-script 174 | read loop. 175 | 176 | By default, syslog uses UTC timestamps. Each line of output consists of 177 | eight space-separated fields: 178 | 179 | - process ID of the sender, or 0 for a kernel messsage 180 | - numeric user ID of the sender, or 0 for a kernel message 181 | - numeric group ID of the sender, or 0 for a kernel message 182 | - facility name: daemon, kern, authpriv, etc. 183 | - numeric log level from 0 (LOG_EMERG) to 7 (LOG_DEBUG) 184 | - date in the format YYYY-MM-DD 185 | - time in the 24-hour format HH:MM:SS 186 | - the log message itself 187 | 188 | If TZ is non-empty in the environment, local time is used instead of UTC and 189 | the zone offset in the format +HHMM or -HHMM is appended to the time field. 190 | This resolves any ambiguity with times during daylight saving changes. To 191 | stamp log entries with the default local zone, run with TZ=:/etc/localtime. 192 | 193 | When run with the -b option, syslog also prints old messages in the kernel 194 | ring buffer. This is useful for capturing kernel boot messages at system 195 | startup. With the -n option, the output format includes numeric facilities 196 | instead of names. 197 | 198 | On glibc systems, syslog(3) sends datagrams to /dev/log with dates in the 199 | time zone of the calling process. On musl systems, these time stamps are 200 | always UTC. The right behaviour should be chosen automatically but can be 201 | explicitly configured at compile time with -DUTCLOG=0 or -DUTCLOG=1. 202 | 203 | A simple syslogd script which wraps syslog is installed with it. 204 | 205 | 206 | uevent, ueventd and ueventwait 207 | ------------------------------ 208 | 209 | The kernel notifies userspace of device creation with uevents sent to 210 | clients listening on a NETLINK_KOBJECT_UEVENT sockets. As they arrive, 211 | 'uevent -l 1' lists the uevent properties to stdout in a space-separated 212 | key/value format with a blank line terminating the record. This format is 213 | chosen for easy of handling in a shell-script read loop. 214 | 215 | On startup, once uevent is bound to the netlink socket, it emits an 216 | initial blank line which can be used to avoid a race in scripts which 217 | also scan /sys for existing devices. 218 | 219 | An example uevent property list for a newly created disk device is 220 | 221 | ACTION add 222 | DEVPATH /devices/pci0000:00/0000:00:1f.2/ata1/host0/target0:0:0/0:0:0:0/block/sda 223 | SUBSYSTEM block 224 | MAJOR 8 225 | MINOR 0 226 | DEVNAME sda 227 | DEVTYPE disk 228 | SEQNUM 5561 229 | 230 | DEVPATH is the path within the sysfs mount for the relevant device, and 231 | DEVNAME (if set) is the path of the kernel-created device node in devtmpfs. 232 | Network interfaces will instead have an INTERFACE property with their name 233 | that was allocated by the kernel. 234 | 235 | More generally, uevent can listen on any combination of netlink groups, 236 | specified as a mask argument in 'uevent -l GROUPS'. The kernel reports 237 | uevents on group 1, but groups 2, 4, 8, ... are available for userspace. 238 | 239 | Run as 'uevent -b GROUPS', uevent will instead read key/value properties 240 | from stdin, terminated by a blank line, and broadcast them via netlink. 241 | 242 | A simple ueventd script to handle uevent output is installed with it, as 243 | cleaner, more flexible replacement for udev. To use this, define bash 244 | functions add(), remove(), change(), etc. (matching the event ACTION types) 245 | in /etc/ueventd.conf, which is sourced by the script on start. The event() 246 | shell function is also called for all events, with the ACTION and DEVPATH in 247 | its first two arguments. 248 | 249 | All of the shell functions defined in /etc/ueventd.conf will be called with 250 | the uevent environment list (properties) in an associative array ENV 251 | together with the most commonly accessed properties in the shell variables 252 | ACTION, DEVNAME, DEVPATH, DRIVER, INTERFACE and SUBSYSTEM. SYSPATH is also 253 | set to the absolute path of the device directory, i.e. ${SYSFS}${DEVPATH} 254 | where $SYSFS is typically /sys. 255 | 256 | To rebroadcast filtered events to userspace, such as programs linked against 257 | libudev-zero, run ueventd with the -b option and adjust ENV as required in 258 | the handler functions. To completely suppress an event, unset ENV or return 259 | with non-zero status. 260 | 261 | The ueventwait script provides a lighter-weight mechanism to wait for 262 | a single device without a persistent ueventd, matching devices against 263 | arguments of the form KEY=PATTERN, where KEY is a property name and PATTERN 264 | is a bash extended-glob pattern to match against its value. It scans /sys 265 | to check if a matching device already exists, awaits one using a uevent 266 | listener if not, and reports the sysfs path of the device to stdout. 267 | 268 | 269 | Building and installing 270 | ----------------------- 271 | 272 | Run 'make install' at the top of the source tree to install the scripts 273 | and binaries in /bin. Alternatively, you can set DESTDIR and/or BINDIR to 274 | install in a different location, or strip and copy the compiled binaries 275 | and scripts into the correct place manually. 276 | 277 | Arachsys init was developed on GNU/Linux and is unlikely to be portable to 278 | other platforms as it uses a number of Linux-specific facilities. Please 279 | report any problems or bugs to Chris Webb . 280 | 281 | 282 | Copying 283 | ------- 284 | 285 | Arachsys init was written by Chris Webb and is 286 | distributed as Free Software under the terms of the MIT license in COPYING. 287 | -------------------------------------------------------------------------------- /daemon.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | static id_t gid, uid; 25 | static size_t listeners; 26 | static struct inotify_event *event; 27 | static struct pollfd *pollfd; 28 | static int signals[2]; 29 | 30 | static struct { 31 | char *priority, *tag; 32 | int fd; 33 | } logger; 34 | 35 | static struct { 36 | char *path; 37 | int fd; 38 | } pidfile; 39 | 40 | static void await(const char *path, int inotify, int parent) { 41 | struct pollfd fd = { .fd = inotify, .events = POLLIN }; 42 | struct stat test; 43 | char *slash; 44 | int watch; 45 | 46 | /* Take a short-cut if path already exists and is a parent dir. */ 47 | if (parent) { 48 | if (chdir(path) >= 0) 49 | return; 50 | if (errno != ENOENT) 51 | err(EXIT_FAILURE, "chdir %s", path); 52 | } 53 | 54 | /* If leading slashes are present, chdir to the root and remove them. */ 55 | if (*path == '/') { 56 | if (chdir("/") < 0) 57 | err(EXIT_FAILURE, "chdir /"); 58 | while (*path == '/') 59 | path++; 60 | } 61 | 62 | /* Remove any stray trailing slashes. */ 63 | slash = strrchr(path, '/'); 64 | if (slash && slash[1] == 0) { 65 | while (*slash == '/') 66 | *slash-- = 0; 67 | slash = strrchr(path, '/'); 68 | } 69 | 70 | /* Recurse to await the parent dir if necessary. */ 71 | if (slash) { 72 | *slash = 0; 73 | await(path, inotify, 1); 74 | path = slash + 1; 75 | } 76 | 77 | if (*path == 0) 78 | return; 79 | 80 | /* Now wait for the correct leaf name to arrive in our working dir. */ 81 | watch = inotify_add_watch(inotify, ".", IN_CREATE | IN_MOVED_TO); 82 | if (watch < 0) 83 | err(EXIT_FAILURE, "inotify_add_watch"); 84 | 85 | while (1) { 86 | /* Check if it already exists after setting watch to avoid a race. */ 87 | if (parent) { 88 | if (chdir(path) >= 0) 89 | goto out; 90 | if (errno != ENOENT) 91 | err(EXIT_FAILURE, "chdir %s", path); 92 | } else { 93 | if (lstat(path, &test) >= 0) 94 | goto out; 95 | if (errno != ENOENT) 96 | err(EXIT_FAILURE, "lstat %s", path); 97 | } 98 | 99 | if (event == NULL) 100 | event = malloc(sizeof(*event) + PATH_MAX + 1); 101 | if (event == NULL) 102 | err(EXIT_FAILURE, "malloc"); 103 | 104 | /* Otherwise, wait one second for a matching create/move-into event. */ 105 | while (1) { 106 | while (poll(&fd, 1, 1000) < 0) 107 | if (errno != EAGAIN && errno != EINTR) 108 | err(EXIT_FAILURE, "poll"); 109 | if (fd.revents == 0) 110 | break; 111 | 112 | if (read(inotify, event, sizeof(*event) + PATH_MAX + 1) < 0) { 113 | if (errno != EAGAIN && errno != EINTR) 114 | err(EXIT_FAILURE, "read"); 115 | break; 116 | } 117 | 118 | /* Read only succeeds if it can fill the entire struct. */ 119 | if (!strcmp(path, event->name)) 120 | break; 121 | } 122 | } 123 | 124 | out: 125 | inotify_rm_watch(inotify, watch); 126 | } 127 | 128 | static void listen_add(int fd) { 129 | if ((listeners & 15) == 0) { 130 | pollfd = realloc(pollfd, (listeners + 16) * sizeof(struct pollfd)); 131 | if (pollfd == NULL) 132 | err(EXIT_FAILURE, "realloc"); 133 | } 134 | pollfd[listeners++].fd = fd; 135 | } 136 | 137 | static void listen_tcp(const char *address) { 138 | struct addrinfo hints = { .ai_socktype = SOCK_STREAM }, *info, *list; 139 | char host[256], port[32]; 140 | int fd, status; 141 | 142 | if (sscanf(address, "[%255[^]]]:%31[^:]", host, port) != 2) { 143 | if (sscanf(address, "%255[^:]:%31[^:]", host, port) != 2) { 144 | if (sscanf(address, ":%31[^:]", port) != 1) 145 | errx(EXIT_FAILURE, "%s: Invalid address", address); 146 | snprintf(host, sizeof(host), "::"); 147 | } 148 | } 149 | 150 | if ((status = getaddrinfo(host, port, &hints, &list)) != 0) 151 | errx(EXIT_FAILURE, "getaddrinfo: %s", gai_strerror(status)); 152 | 153 | for (info = list; info != NULL; info = info->ai_next) { 154 | if ((fd = socket(info->ai_family, info->ai_socktype, 0)) < 0) 155 | err(EXIT_FAILURE, "socket"); 156 | setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &(int) { 1 }, sizeof(int)); 157 | fcntl(fd, F_SETFD, FD_CLOEXEC); 158 | fcntl(fd, F_SETFL, O_NONBLOCK); 159 | 160 | if (bind(fd, info->ai_addr, info->ai_addrlen) < 0) 161 | err(EXIT_FAILURE, "bind"); 162 | if (listen(fd, SOMAXCONN) < 0) 163 | err(EXIT_FAILURE, "listen"); 164 | listen_add(fd); 165 | } 166 | } 167 | 168 | static void listen_unix(const char *path) { 169 | struct sockaddr_un address; 170 | size_t length = strlen(path); 171 | int fd; 172 | 173 | /* On Linux, address.sun_path is NUL-padded not NUL-terminated. */ 174 | if (length > sizeof(address.sun_path)) 175 | errx(EXIT_FAILURE, "Socket path is too long to bind"); 176 | length += offsetof(struct sockaddr_un, sun_path); 177 | 178 | address.sun_family = AF_UNIX; 179 | strncpy(address.sun_path, path, sizeof(address.sun_path)); 180 | unlink(path); 181 | 182 | if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) 183 | err(EXIT_FAILURE, "socket"); 184 | fcntl(fd, F_SETFD, FD_CLOEXEC); 185 | fcntl(fd, F_SETFL, O_NONBLOCK); 186 | 187 | if (bind(fd, (struct sockaddr *) &address, length) < 0) 188 | err(EXIT_FAILURE, "bind"); 189 | if (listen(fd, SOMAXCONN) < 0) 190 | err(EXIT_FAILURE, "listen"); 191 | listen_add(fd); 192 | } 193 | 194 | static void logger_setup(const char *spec) { 195 | int status; 196 | pid_t pid; 197 | 198 | if (logger.tag) 199 | errx(EXIT_FAILURE, "-l cannot be specified more than once"); 200 | if (!*spec || *spec == ':') 201 | errx(EXIT_FAILURE, "Invalid or missing syslog identifier tag"); 202 | if (!(logger.tag = strdup(spec))) 203 | err(EXIT_FAILURE, "strdup"); 204 | 205 | /* Logging to file indicated by absolute path. */ 206 | if (*logger.tag == '/') { 207 | logger.fd = open(logger.tag, O_RDWR | O_APPEND | O_CREAT, 0666); 208 | if (logger.fd < 0) 209 | err(EXIT_FAILURE, "%s", logger.tag); 210 | if (flock(logger.fd, LOCK_EX | LOCK_NB) < 0) 211 | errx(EXIT_FAILURE, "%s already locked", logger.tag); 212 | return; 213 | } 214 | 215 | /* Log spec format is TAG:PRIORITY. */ 216 | if ((logger.priority = strchr(logger.tag, ':'))) { 217 | *logger.priority++ = 0; 218 | if (!*logger.priority) 219 | logger.priority = NULL; 220 | } 221 | 222 | /* Test the logger settings so we can exit early if they are invalid. */ 223 | switch (pid = fork()) { 224 | case -1: 225 | err(EXIT_FAILURE, "fork"); 226 | case 0: 227 | if (chdir("/") < 0) 228 | err(EXIT_FAILURE, "chdir"); 229 | execlp("logger", "logger", "-f", "/dev/null", 230 | "-p", logger.priority ? logger.priority : "daemon.notice", 231 | "-t", logger.tag, NULL); 232 | err(EXIT_FAILURE, "exec"); 233 | } 234 | 235 | if (waitpid(pid, &status, 0) < 0) 236 | err(EXIT_FAILURE, "waitpid"); 237 | 238 | /* The logger subprocess writes its own message to stderr on failure. */ 239 | if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) 240 | exit(EXIT_FAILURE); 241 | } 242 | 243 | static void logger_start(void) { 244 | int logpipe[2]; 245 | 246 | /* Redirect stdout and stderr to /dev/null if logging isn't configured. */ 247 | if (!logger.tag) { 248 | if (dup2(STDIN_FILENO, STDOUT_FILENO) < 0) 249 | err(EXIT_FAILURE, "dup2"); 250 | if (dup2(STDIN_FILENO, STDERR_FILENO) < 0) 251 | err(EXIT_FAILURE, "dup2"); 252 | return; 253 | } 254 | 255 | /* Redirect stdout and stderr if a log file has been specified. */ 256 | if (*logger.tag == '/') { 257 | if (dup2(logger.fd, STDOUT_FILENO) < 0) 258 | err(EXIT_FAILURE, "dup2"); 259 | if (dup2(logger.fd, STDERR_FILENO) < 0) 260 | err(EXIT_FAILURE, "dup2"); 261 | if (logger.fd != STDOUT_FILENO && logger.fd != STDERR_FILENO) 262 | close(logger.fd); 263 | return; 264 | } 265 | 266 | if (pipe(logpipe) < 0) 267 | err(EXIT_FAILURE, "pipe"); 268 | switch (fork()) { 269 | case -1: 270 | err(EXIT_FAILURE, "fork"); 271 | case 0: 272 | /* Don't unintentionally keep the pwd busy in the logger process. */ 273 | if (chdir("/") < 0) 274 | err(EXIT_FAILURE, "chdir"); 275 | /* Redirect stdout and stderr to /dev/null for the logger process. */ 276 | if (dup2(STDIN_FILENO, STDOUT_FILENO) < 0) 277 | err(EXIT_FAILURE, "dup2"); 278 | if (dup2(STDIN_FILENO, STDERR_FILENO) < 0) 279 | err(EXIT_FAILURE, "dup2"); 280 | /* Run logger(1) with stdin coming from the read end of the pipe. */ 281 | if (dup2(logpipe[0], STDIN_FILENO) < 0) 282 | err(EXIT_FAILURE, "dup2"); 283 | close(logpipe[0]); 284 | close(logpipe[1]); 285 | execlp("logger", "logger", 286 | "-p", logger.priority ? logger.priority : "daemon.notice", 287 | "-t", logger.tag, NULL); 288 | err(EXIT_FAILURE, "exec"); 289 | } 290 | 291 | /* Redirect our stdout and stderr to the write end of the pipe. */ 292 | if (dup2(logpipe[1], STDOUT_FILENO) < 0) 293 | err(EXIT_FAILURE, "dup2"); 294 | if (dup2(logpipe[1], STDERR_FILENO) < 0) 295 | err(EXIT_FAILURE, "dup2"); 296 | close(logpipe[0]); 297 | close(logpipe[1]); 298 | } 299 | 300 | static void pidfile_close(void) { 301 | if (pidfile.path) { 302 | close(pidfile.fd); 303 | unlink(pidfile.path); 304 | } 305 | } 306 | 307 | static void pidfile_open(const char *path) { 308 | pidfile.fd = open(path, O_RDWR | O_CLOEXEC | O_CREAT, 0666); 309 | if (pidfile.fd < 0) 310 | err(EXIT_FAILURE, "%s", path); 311 | if (flock(pidfile.fd, LOCK_EX | LOCK_NB) < 0) 312 | errx(EXIT_FAILURE, "%s already locked", path); 313 | if (!(pidfile.path = realpath(path, NULL))) 314 | err(EXIT_FAILURE, "%s", path); 315 | if (ftruncate(pidfile.fd, 0) < 0) 316 | err(EXIT_FAILURE, "%s", path); 317 | atexit(pidfile_close); 318 | } 319 | 320 | static void pidfile_write(void) { 321 | if (pidfile.path && dprintf(pidfile.fd, "%d\n", getpid()) < 0) 322 | err(EXIT_FAILURE, "dprintf"); 323 | } 324 | 325 | static pid_t reap(int *status) { 326 | pid_t child; 327 | 328 | while ((child = waitpid(-1, status, WNOHANG)) < 0) 329 | if (errno != EINTR) 330 | break; 331 | return child > 0 ? child : 0; 332 | } 333 | 334 | static int signal_get(void) { 335 | unsigned char signal = 0; 336 | while (read(signals[0], &signal, 1) < 0) 337 | if (errno != EINTR) 338 | break; 339 | return signal; 340 | } 341 | 342 | static void signal_put(int signal) { 343 | while (write(signals[1], &(unsigned char) { signal }, 1) < 0) 344 | if (errno != EINTR) 345 | break; 346 | } 347 | 348 | static void execute(char **argv) { 349 | if (gid > 0 && setgid(gid) < 0) 350 | err(EXIT_FAILURE, "setgid"); 351 | if (uid > 0 && setuid(uid) < 0) 352 | err(EXIT_FAILURE, "setuid"); 353 | execvp(argv[0], argv); 354 | err(EXIT_FAILURE, "exec"); 355 | } 356 | 357 | static int serve(char **argv, size_t limit) { 358 | int connection; 359 | size_t count = 0; 360 | 361 | listen_add(signals[0]); 362 | pollfd[listeners - 1].events = POLLIN; 363 | 364 | while (1) { 365 | /* Only listen for new connections when below the connection limit. */ 366 | for (size_t i = 0; i + 1 < listeners; i++) 367 | pollfd[i].events = count < limit ? POLLIN : 0; 368 | 369 | if (poll(pollfd, listeners, -1) < 0) { 370 | if (errno != EINTR && errno != EAGAIN) 371 | err(EXIT_FAILURE, "poll"); 372 | continue; 373 | } 374 | 375 | /* Deal with signals first in case they free additional slots. */ 376 | if (pollfd[listeners - 1].revents & POLLIN) 377 | switch (signal_get()) { 378 | case SIGCHLD: 379 | while (reap(NULL) > 0) 380 | if (count > 0) 381 | count--; 382 | break; 383 | case SIGINT: 384 | case SIGTERM: 385 | return EXIT_SUCCESS; 386 | } 387 | 388 | /* Accept connections from ready listeners until we hit our limit. */ 389 | for (size_t i = 0; i + 1 < listeners; i++) 390 | if (pollfd[i].revents & POLLIN && count < limit) 391 | if ((connection = accept(pollfd[i].fd, NULL, NULL)) >= 0) { 392 | switch (fork()) { 393 | case -1: 394 | break; 395 | case 0: 396 | if (dup2(connection, STDIN_FILENO) < 0) 397 | err(EXIT_FAILURE, "dup2"); 398 | if (dup2(connection, STDOUT_FILENO) < 0) 399 | err(EXIT_FAILURE, "dup2"); 400 | close(connection); 401 | execute(argv); 402 | default: 403 | count++; 404 | } 405 | close(connection); 406 | } 407 | } 408 | } 409 | 410 | static int supervise(char **argv, int restart) { 411 | int signal; 412 | pid_t child, command; 413 | time_t wait; 414 | 415 | do { 416 | switch (command = fork()) { 417 | case -1: 418 | err(EXIT_FAILURE, "fork"); 419 | case 0: 420 | setsid(); /* Ignore errors but should always work after fork. */ 421 | execute(argv); 422 | } 423 | 424 | wait = time(NULL) + 5; 425 | while (command) 426 | switch (signal = signal_get()) { 427 | case SIGCHLD: 428 | /* Reap every child, watching out for the command pid. */ 429 | while ((child = reap(NULL))) 430 | if (child == command) 431 | command = 0; 432 | break; 433 | case SIGTERM: 434 | restart = 0; 435 | /* Fall through to the default behaviour. */ 436 | case SIGHUP: 437 | case SIGINT: 438 | case SIGUSR1: 439 | case SIGUSR2: 440 | /* Pass signals on to our child process. */ 441 | kill(command, signal); 442 | wait = 0; 443 | } 444 | 445 | /* Try to avoid restarting a crashing command in a tight loop. */ 446 | if (restart && time(NULL) < wait) 447 | errx(EXIT_FAILURE, "Child died within 5 seconds: not restarting"); 448 | } while (restart); 449 | 450 | return EXIT_SUCCESS; 451 | } 452 | 453 | static void usage(char *progname) { 454 | fprintf(stderr, "\ 455 | Usage: %s [OPTIONS] CMD [ARG]...\n\ 456 | Options:\n\ 457 | -d DIR change directory to DIR before running the command\n\ 458 | -f fork twice so the command is not a session leader\n\ 459 | -l TAG:PRI redirect stdout and stderr to a logger subprocess,\n\ 460 | using syslog tag TAG and priority/facility PRI\n\ 461 | -l LOGFILE append stdout and stderr to a file LOGFILE, which must be\n\ 462 | given as an absolute path whose first character is '/'\n\ 463 | -n LIMIT allow no more than LIMIT concurrent socket connections\n\ 464 | -p PIDFILE lock PIDFILE and write pid to it, removing it on exit\n\ 465 | -r supervise the running command, restarting it if it dies\n\ 466 | and passing on TERM, INT, HUP, USR1 and USR2 signals\n\ 467 | -s PATH listen on a unix stream socket and run the command with\n\ 468 | stdin and stdout attached to each connection\n\ 469 | -t HOST:PORT listen on a TCP stream socket and run the command with\n\ 470 | stdin and stdout attached to each connection\n\ 471 | -u UID:GID run the command with the specified numeric uid and gid\n\ 472 | -u USERNAME run the command with the uid and gid of user USERNAME\n\ 473 | -w PATH wait until PATH exists before running the command\n\ 474 | ", progname); 475 | exit(EX_USAGE); 476 | } 477 | 478 | int main(int argc, char **argv) { 479 | char *dir = NULL, *options, *path; 480 | int fd, inotify, option, pwd, tail, waitargs; 481 | size_t doublefork = 0, limit = -1, restart = 0; 482 | struct passwd *user; 483 | 484 | /* Redirect stdin from /dev/null. */ 485 | if ((fd = open("/dev/null", O_RDWR)) < 0) 486 | err(EXIT_FAILURE, "open /dev/null"); 487 | if (fd != STDIN_FILENO) { 488 | if ((dup2(fd, STDIN_FILENO)) < 0) 489 | err(EXIT_FAILURE, "dup2"); 490 | close(fd); 491 | } 492 | 493 | /* Redirect stdout and/or stderr to /dev/null if closed. */ 494 | if (fcntl(STDOUT_FILENO, F_GETFD) < 0 && errno == EBADF) 495 | if ((dup2(STDIN_FILENO, STDOUT_FILENO)) < 0) 496 | err(EXIT_FAILURE, "dup2"); 497 | if (fcntl(STDERR_FILENO, F_GETFD) < 0 && errno == EBADF) 498 | if ((dup2(STDIN_FILENO, STDERR_FILENO)) < 0) 499 | err(EXIT_FAILURE, "dup2"); 500 | 501 | options = "+:cd:fl:n:p:rs:t:u:w:", waitargs = 0; 502 | while ((option = getopt(argc, argv, options)) > 0) 503 | switch (option) { 504 | case 'c': 505 | /* Special case of -d DIR, for compatibility with BSD daemon(1). */ 506 | dir = "/"; 507 | break; 508 | case 'd': 509 | dir = optarg; 510 | if ((fd = open(dir, O_RDONLY | O_DIRECTORY)) < 0) 511 | err(EXIT_FAILURE, "%s", dir); 512 | close(fd); 513 | break; 514 | case 'f': 515 | doublefork = 1; 516 | break; 517 | case 'l': 518 | logger_setup(optarg); 519 | break; 520 | case 'n': 521 | if (sscanf(optarg, "%zu%n", &limit, &tail) >= 1) 522 | if (optarg[tail] == 0) 523 | break; 524 | errx(EXIT_FAILURE, "Invalid connection limit"); 525 | case 'p': 526 | pidfile_open(optarg); 527 | break; 528 | case 'r': 529 | restart = 1; 530 | break; 531 | case 's': 532 | listen_unix(optarg); 533 | break; 534 | case 't': 535 | listen_tcp(optarg); 536 | break; 537 | case 'u': 538 | if (sscanf(optarg, "%u:%u%n", &uid, &gid, &tail) >= 2) 539 | if (optarg[tail] == 0) 540 | break; 541 | if ((user = getpwnam(optarg))) { 542 | uid = user->pw_uid; 543 | gid = user->pw_gid; 544 | break; 545 | } 546 | errx(EXIT_FAILURE, "Invalid username"); 547 | case 'w': 548 | waitargs++; 549 | break; 550 | default: 551 | usage(argv[0]); 552 | } 553 | 554 | /* When run with just -w arguments, we await paths in the foreground. */ 555 | if (waitargs > 0 && argc == 2 * waitargs + 1) 556 | goto await; 557 | 558 | if (argc <= optind) 559 | usage(argv[0]); 560 | 561 | /* Fork into the background then create a session and process group. */ 562 | switch (fork()) { 563 | case -1: 564 | err(EXIT_FAILURE, "fork"); 565 | case 0: 566 | setsid(); /* Ignore errors but should always work after fork. */ 567 | break; 568 | default: 569 | _exit(EXIT_SUCCESS); /* Don't delete pidfile in atexit() handler. */ 570 | } 571 | 572 | if (doublefork) { 573 | /* Fork again to ensure we are not the session leader. */ 574 | switch (fork()) { 575 | case -1: 576 | err(EXIT_FAILURE, "fork"); 577 | case 0: 578 | break; 579 | default: 580 | _exit(EXIT_SUCCESS); /* Don't delete pidfile in atexit() handler. */ 581 | } 582 | } 583 | 584 | logger_start(); 585 | pidfile_write(); 586 | 587 | await: 588 | if (waitargs > 0) { 589 | if ((inotify = inotify_init1(IN_CLOEXEC | IN_NONBLOCK)) < 0) 590 | err(EXIT_FAILURE, "inotify_init1"); 591 | 592 | /* Open the working directory so we can restore it after each await(). */ 593 | if ((pwd = open(".", O_RDONLY | O_DIRECTORY)) < 0) 594 | err(EXIT_FAILURE, "open pwd"); 595 | 596 | optind = 0; /* Need to reset optind to reprocess -w arguments. */ 597 | while ((option = getopt(argc, argv, options)) > 0) 598 | if (option == 'w') { 599 | if (!(path = strdup(optarg))) 600 | err(EXIT_FAILURE, "strdup"); 601 | await(path, inotify, 0); 602 | free(path); 603 | if (fchdir(pwd) < 0) 604 | err(EXIT_FAILURE, "fchdir"); 605 | } 606 | 607 | close(inotify); 608 | close(pwd); 609 | } 610 | 611 | /* Exit if we were just awaiting paths in the foreground. */ 612 | if (argc <= optind) 613 | return EXIT_SUCCESS; 614 | 615 | if (dir && chdir(dir) < 0) 616 | err(EXIT_FAILURE, "chdir"); 617 | 618 | /* If we don't need to supervise it, just exec the command. */ 619 | if (!restart && !pidfile.path && !listeners) 620 | execute(argv + optind); 621 | 622 | /* Use a signals pipe to avoid async-unsafe handlers. */ 623 | if (pipe2(signals, O_CLOEXEC) < 0) 624 | err(EXIT_FAILURE, "pipe"); 625 | 626 | /* Avoid using SIG_IGN as this disposition persists across exec. */ 627 | signal(SIGHUP, signal_put); 628 | signal(SIGINT, signal_put); 629 | signal(SIGPIPE, signal_put); 630 | signal(SIGTERM, signal_put); 631 | signal(SIGCHLD, signal_put); 632 | signal(SIGUSR1, signal_put); 633 | signal(SIGUSR2, signal_put); 634 | 635 | if (listeners > 0) 636 | return serve(argv + optind, limit); 637 | return supervise(argv + optind, restart); 638 | } 639 | --------------------------------------------------------------------------------