├── Makefile ├── README └── mkbox.c /Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: mkbox 3 | 4 | mkbox: mkbox.c 5 | $(CC) -Wall -O1 -g -o mkbox mkbox.c 6 | 7 | clean: 8 | rm -f mkbox 9 | 10 | test: mkbox 11 | mkdir -p sandbox databox sandbox/bin 12 | cp /bin/busybox sandbox/bin 13 | chmod 755 sandbox/bin/busybox 14 | ( cd sandbox/bin && for x in $$(busybox --list) ; do ln -fs busybox $$x ; done ) 15 | ./mkbox --data=`pwd`/databox --with-proc sandbox 16 | 17 | clean-test:: 18 | rm -rf sandbox databox 19 | 20 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | 2 | mkbox: an experiment in nonprivileged sandboxing in Linux 3 | --------------------------------------------------------- 4 | 5 | Requires kernel 3.12 or newer with CONFIG_USER_NS. 6 | 7 | Tested on Ubuntu 14.04 LTS / Linux 3.13.7. 8 | 9 | Huge thanks to Andy Lutomirski, who provided the roadmap in the form of 10 | a patch to Sandstorm (https://github.com/kentonv/sandstorm/pull/12) as 11 | well as kindly assisting with some early debugging as I fell into various 12 | pits he had previously explored. 13 | 14 | Disclaimer: This is a toy built to learn about these APIs and is 15 | certainly incomplete, likely buggy, etc. Use at your own risk. 16 | 17 | "make test" will build mkbox and create sandbox (which will be r/o /), 18 | and databox (which will be r/w /data), and copy /bin/busybox into 19 | sandbox/bin and create busybox's symlinks in there as well, then 20 | invoke: mkbox sandbox `pwd`/databox 21 | 22 | computer$ id 23 | uid=1000(somebody) gid=1000(somebody) groups=1000(somebody) 24 | 25 | computer$ make test 26 | cc -Wall -O1 -g -o mkbox mkbox.c 27 | mkdir -p sandbox databox sandbox/bin 28 | cp /bin/busybox sandbox/bin 29 | chmod 755 sandbox/bin/busybox 30 | ( cd sandbox/bin && for x in $(busybox --list) ; do ln -fs busybox $x ; done ) 31 | ./mkbox sandbox `pwd`/databox 32 | mkbox: pid=14259, child=14260 33 | 34 | 35 | BusyBox v1.21.1 (Ubuntu 1:1.21.0-1ubuntu1) built-in shell (ash) 36 | Enter 'help' for a list of built-in commands. 37 | 38 | / $ id 39 | uid=3333 gid=3333 groups=65534,3333 40 | 41 | / $ ls -l 42 | drwxrwxr-x 2 3333 3333 4096 Apr 27 04:34 bin 43 | drwxrwxr-x 2 3333 3333 4096 Apr 27 04:33 data 44 | drwxr-xr-x 2 3333 3333 80 Apr 27 04:34 dev 45 | 46 | 47 | /* in another shell */ 48 | 49 | computer$ cat /proc/14259/mounts 50 | rootfs / rootfs rw 0 0 51 | /dev/root / ext3 ro,nosuid,noatime,errors=remount-ro,barrier=0,data=writeback 0 0 52 | /dev/root /data ext3 rw,noatime,errors=remount-ro,barrier=0,data=writeback 0 0 53 | sandbox-dev /dev tmpfs ro,nosuid,noexec,noatime,size=64k,nr_inodes=16,mode=755,uid=1000,gid=1000 0 0 54 | devtmpfs /dev/null devtmpfs rw,relatime,size=1019296k,nr_inodes=254824,mode=755 0 0 55 | devtmpfs /dev/zero devtmpfs rw,relatime,size=1019296k,nr_inodes=254824,mode=755 0 0 56 | 57 | -------------------------------------------------------------------------------- /mkbox.c: -------------------------------------------------------------------------------- 1 | /* mkbox.c 2 | * 3 | * Copyright 2014 Brian Swetland 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #define _GNU_SOURCE 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | /* can't find headers for these, but they're in glibc... */ 34 | int pivot_root(const char *new_root, const char *put_old); 35 | int capset(cap_user_header_t h, cap_user_data_t d); 36 | int capset(cap_user_header_t h, cap_user_data_t d); 37 | 38 | static int checkreturn(int res, const char *name, int line) { 39 | if (res >= 0) 40 | return res; 41 | fprintf(stderr, "mkbox.c:%d: error: %s() failed: r=%d errno=%d (%s)\n", 42 | line, name, res, errno, strerror(errno)); 43 | exit(-1); 44 | } 45 | 46 | #define ok(fname, arg...) checkreturn(fname(arg), #fname, __LINE__) 47 | 48 | int dropcaps(void) { 49 | struct __user_cap_header_struct header; 50 | struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3]; 51 | header.version = _LINUX_CAPABILITY_VERSION_3; 52 | header.pid = 0; 53 | memset(data, 0, sizeof(data)); 54 | return capset(&header, data); 55 | } 56 | 57 | void usage(void) { 58 | fprintf(stderr, 59 | "usage: mkbox [ options ]* \n" 60 | "\n" 61 | "options: --with-dev mount /dev at sandbox's /dev\n" 62 | " (otherwise only /dev/{null,zero,random})\n" 63 | " --with-sys mount /sys at sandbox's /sys\n" 64 | " --with-proc mount /proc at sandbox's /proc\n" 65 | " --with-tmp mount tmpfs at sandbox's /tmp\n" 66 | " --data= mount at sandbox's /data (rw)\n" 67 | " --init= exec in sandbox (default: /bin/sh)\n" 68 | "\n" 69 | ); 70 | } 71 | 72 | int main(int argc, char **argv) { 73 | int newuid = 3333; 74 | int newgid = 3333; 75 | int with_sys = 0; 76 | int with_proc = 0; 77 | int with_dev = 0; 78 | int with_tmp = 0; 79 | char buf[1024]; 80 | int fd; 81 | const char *sandbox = NULL; 82 | const char *databox = NULL; 83 | const char *initbin = "/bin/sh"; 84 | uid_t uid; 85 | gid_t gid; 86 | pid_t cpid; 87 | 88 | argv++; 89 | argc--; 90 | while (argc > 0) { 91 | if (argv[0][0] != '-') break; 92 | if (!strcmp(argv[0], "--with-sys")) { 93 | with_sys = 1; 94 | } else if (!strcmp(argv[0], "--with-proc")) { 95 | with_proc = 1; 96 | } else if (!strcmp(argv[0], "--with-dev")) { 97 | with_dev = 1; 98 | } else if (!strcmp(argv[0], "--with-tmp")) { 99 | with_tmp = 1; 100 | } else if (!strncmp(argv[0], "--init=", 7)) { 101 | initbin = argv[0] + 7; 102 | } else if (!strncmp(argv[0], "--data=", 7)) { 103 | databox = argv[0] + 7; 104 | } else { 105 | usage(); 106 | return -1; 107 | } 108 | argv++; 109 | argc--; 110 | } 111 | if (argc != 1) { 112 | usage(); 113 | return -1; 114 | } 115 | sandbox = argv[0]; 116 | 117 | uid = getuid(); 118 | gid = getgid(); 119 | 120 | ok(unshare, CLONE_NEWPID| 121 | CLONE_NEWNS|CLONE_NEWUTS| 122 | CLONE_NEWIPC|CLONE_NEWUSER); 123 | 124 | /* ensure that changes to our mount namespace do not "leak" to 125 | * outside namespaces (what mount --make-rprivate / does) 126 | */ 127 | mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL); 128 | 129 | /* mount the sandbox on top of itself in our new namespace */ 130 | /* it will become our root filesystem */ 131 | ok(mount, sandbox, sandbox, NULL, MS_BIND|MS_NOSUID, NULL); 132 | 133 | /* step inside the to-be-root-directory */ 134 | ok(chdir, sandbox); 135 | 136 | /* setup needed subdirectories */ 137 | rmdir("data"); 138 | rmdir("dev"); 139 | rmdir(".oldroot"); 140 | ok(mkdir, "data", 0755); 141 | ok(mkdir, "dev", 0755); 142 | ok(mkdir, ".oldroot", 0755); 143 | 144 | if (databox) { 145 | /* mount read-write data volume */ 146 | ok(mount, databox, "data", NULL, MS_BIND|MS_NOSUID|MS_NODEV, NULL); 147 | } 148 | 149 | if (with_proc) { 150 | rmdir(".oldproc"); 151 | rmdir("proc"); 152 | ok(mkdir, ".oldproc", 0755); 153 | ok(mkdir, "proc", 0755); 154 | /* we need to hang on to the old proc in order to mount our 155 | * new proc later on 156 | */ 157 | ok(mount, "/proc", ".oldproc", NULL, MS_BIND|MS_REC, NULL); 158 | } 159 | if (with_sys) { 160 | rmdir("sys"); 161 | ok(mkdir, "sys", 0755); 162 | ok(mount, "/sys", "sys", NULL, MS_BIND|MS_REC, NULL); 163 | } 164 | 165 | if (with_dev) { 166 | ok(mount, "/dev", "dev", NULL, MS_BIND|MS_REC, NULL); 167 | } else { 168 | /* mount a tmpfs for dev */ 169 | ok(mount, "sandbox-dev", "dev", "tmpfs", 170 | MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_NOATIME, 171 | "size=64k,nr_inodes=16,mode=755"); 172 | 173 | /* populate bare minimum device nodes */ 174 | /* create bind points */ 175 | ok(mknod, "dev/null", S_IFREG | 0666, 0); 176 | ok(mknod, "dev/zero", S_IFREG | 0666, 0); 177 | ok(mknod, "dev/random", S_IFREG | 0666, 0); 178 | ok(mknod, "dev/urandom", S_IFREG | 0666, 0); 179 | 180 | /* bind mount the device nodes we want */ 181 | ok(mount, "/dev/null", "dev/null", NULL, MS_BIND, NULL); 182 | ok(mount, "/dev/zero", "dev/zero", NULL, MS_BIND, NULL); 183 | ok(mount, "/dev/urandom", "dev/random", NULL, MS_BIND, NULL); 184 | ok(mount, "/dev/urandom", "dev/urandom", NULL, MS_BIND, NULL); 185 | 186 | /* note: MS_RDONLY does not work when doing the initial bind */ 187 | ok(mount, "dev", "dev", NULL, 188 | MS_REMOUNT | MS_BIND | MS_NOEXEC | 189 | MS_NOSUID | MS_NODEV | MS_RDONLY, 190 | NULL); 191 | } 192 | if (with_tmp) { 193 | rmdir("tmp"); 194 | ok(mkdir, "tmp", 0770); 195 | ok(mount, "sandbox-tmp", "tmp", "tmpfs", 196 | MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_NOATIME, 197 | "size=16m,nr_inodes=4k,mode=770"); 198 | } 199 | 200 | /* map new UID/GID to outer UID/GID */ 201 | sprintf(buf, "%d %d 1\n", newuid, uid); 202 | fd = ok(open, "/proc/self/uid_map", O_WRONLY); 203 | ok(write, fd, buf, strlen(buf)); 204 | ok(close, fd); 205 | 206 | /* must disallow setgroups() before writing to gid_map on 207 | * versions of linux with this feature: 208 | */ 209 | if ((fd = open("/proc/self/setgroups", O_WRONLY)) >= 0) { 210 | ok(write, fd, "deny", 4); 211 | ok(close, fd); 212 | } 213 | sprintf(buf, "%d %d 1\n", newgid, gid); 214 | fd = ok(open, "/proc/self/gid_map", O_WRONLY); 215 | ok(write, fd, buf, strlen(buf)); 216 | ok(close, fd); 217 | 218 | /* initially we're nobody, change to newgid/newuid */ 219 | ok(setresgid, newgid, newgid, newgid); 220 | ok(setresuid, newuid, newuid, newuid); 221 | 222 | /* sandbox becomes our new root, detach the old one */ 223 | ok(pivot_root, ".", ".oldroot"); 224 | ok(umount2, ".oldroot", MNT_DETACH); 225 | ok(rmdir, ".oldroot"); 226 | 227 | /* we must fork to become pid 1 in the new pid namespace */ 228 | cpid = ok(fork); 229 | 230 | if (cpid == 0) { 231 | if (getpid() != 1) { 232 | fprintf(stderr, "mkbox child pid != 1?!\n"); 233 | return -1; 234 | } 235 | if (with_proc) { 236 | ok(mount, "/proc", "/proc", "proc", MS_NOSUID, NULL); 237 | ok(umount2, "/.oldproc", MNT_DETACH); 238 | rmdir("/.oldproc"); 239 | } 240 | 241 | /* remount root to finalize permissions */ 242 | ok(mount, "/", "/", NULL, 243 | MS_RDONLY|MS_BIND|MS_NOSUID|MS_REMOUNT, 244 | NULL); 245 | 246 | /* discard all capability bits */ 247 | ok(dropcaps); 248 | 249 | ok(execl, initbin, initbin, NULL); 250 | exit(0); 251 | } 252 | 253 | fprintf(stderr, "mkbox: pid=%d, child=%d\n", getpid(), cpid); 254 | for (;;) { 255 | int status = 0; 256 | pid_t pid = wait(&status); 257 | if (pid < 0) { 258 | fprintf(stderr, "mkbox: wait() errno=%d\n", errno); 259 | continue; 260 | } 261 | fprintf(stderr, "mkbox: proc %d exited with status %d\n", 262 | pid, status); 263 | if (pid == cpid) 264 | break; 265 | } 266 | 267 | fprintf(stderr, "mkbox: exiting\n"); 268 | return 0; 269 | } 270 | --------------------------------------------------------------------------------