├── .gitignore
├── LICENSE
├── README.md
├── cmd
    └── runprog
    │   ├── array_flags.go
    │   ├── config
    │       ├── config.go
    │       ├── config_amd64.go
    │       ├── config_arm.go
    │       ├── config_arm64.go
    │       ├── config_loader.go
    │       └── config_type.go
    │   ├── fileutil.go
    │   ├── main.go
    │   ├── main_darwin.go
    │   └── main_linux.go
├── container
    ├── benchmark_linux_test.go
    ├── consts_linux.go
    ├── container_cmd_linux.go
    ├── container_exec_linux.go
    ├── container_init_linux.go
    ├── doc.go
    ├── environment_linux.go
    ├── host_cmd_linux.go
    ├── host_exec_linux.go
    ├── lookup_linux.go
    ├── protocol_linux.go
    ├── signal_linux.go
    ├── signal_linux_mips64x.go
    ├── socket_linux.go
    └── utils.go
├── go.mod
├── go.sum
├── pkg
    ├── cgroup
    │   ├── benchmark_linux_test.go
    │   ├── cgroup_info_linux.go
    │   ├── cgroup_linux.go
    │   ├── consts_linux.go
    │   ├── doc.go
    │   ├── utils_linux.go
    │   ├── v1_linux.go
    │   ├── v1controller_linux.go
    │   └── v2_linux.go
    ├── forkexec
    │   ├── bench_linux_test.go
    │   ├── clone3_linux.go
    │   ├── consts_linux.go
    │   ├── doc.go
    │   ├── errloc_linux.go
    │   ├── fork_child_darwin.go
    │   ├── fork_child_linux.go
    │   ├── fork_darwin.go
    │   ├── fork_linux.go
    │   ├── fork_linux_test.go
    │   ├── fork_unix.go
    │   ├── fork_util.go
    │   ├── runner_darwin.go
    │   ├── runner_linux.go
    │   ├── sandbox_darwin_test.go
    │   ├── sandbox_load_darwin.go
    │   ├── syscall_darwin.go
    │   ├── test.sb
    │   ├── userns_linux.go
    │   ├── vfork
    │   │   ├── asm_linux_386.s
    │   │   ├── asm_linux_amd64.s
    │   │   ├── asm_linux_arm.s
    │   │   ├── asm_linux_arm64.s
    │   │   ├── asm_linux_loong64.s
    │   │   ├── asm_linux_mips64x.s
    │   │   ├── asm_linux_mipsx.s
    │   │   ├── asm_linux_ppc64x.s
    │   │   ├── asm_linux_riscv64.s
    │   │   ├── asm_linux_s390x.s
    │   │   └── syscall.go
    │   ├── zsyscall_darwin.go
    │   └── zsyscall_darwin.s
    ├── memfd
    │   ├── doc.go
    │   ├── memfd_linux.go
    │   ├── memfd_linux_test.go
    │   └── memfd_other.go
    ├── mount
    │   ├── builder.go
    │   ├── builder_linux.go
    │   ├── builder_linux_test.go
    │   ├── doc.go
    │   ├── mount.go
    │   ├── mount_linux.go
    │   └── mount_linux_test.go
    ├── pipe
    │   ├── buffer.go
    │   └── buffer_test.go
    ├── rlimit
    │   ├── rlimit.go
    │   └── rlimit_test.go
    ├── seccomp
    │   ├── filter_linux.go
    │   └── libseccomp
    │   │   ├── action.go
    │   │   ├── action_linux.go
    │   │   ├── builder_linux.go
    │   │   ├── doc.go
    │   │   ├── seccomp_linux_test.go
    │   │   └── syscall_name_linux.go
    └── unixsocket
    │   ├── benchmark_linux_test.go
    │   ├── socket_linux.go
    │   └── socket_linux_test.go
├── ptracer
    ├── context_helper_linux.go
    ├── context_helper_linux_test.go
    ├── context_linux.go
    ├── context_linux_amd64.go
    ├── context_linux_arm.go
    ├── context_linux_arm64.go
    ├── context_other.go
    ├── doc.go
    ├── ptrace_linux.go
    ├── tracer.go
    └── tracer_track_linux.go
└── runner
    ├── doc.go
    ├── limit.go
    ├── ptrace
        ├── filehandler
        │   ├── fileset.go
        │   ├── fileset_test.go
        │   ├── handle.go
        │   └── syscallcounter.go
        ├── handle_linux.go
        ├── run_linux.go
        └── runner_linux.go
    ├── result.go
    ├── runner.go
    ├── size.go
    ├── status.go
    └── unshare
        ├── doc.go
        ├── run_linux.go
        └── runner_linux.go


/.gitignore:
--------------------------------------------------------------------------------
 1 | # OS
 2 | .DS_Store
 3 | 
 4 | # Test Env
 5 | test*/
 6 | env*.sh
 7 | 
 8 | # Test Files
 9 | /runprog
10 | /test
11 | 
12 | .vscode
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 criyle
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/cmd/runprog/array_flags.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import "fmt"
 4 | 
 5 | type arrayFlags []string
 6 | 
 7 | func (f *arrayFlags) String() string {
 8 | 	return fmt.Sprint([]string(*f))
 9 | }
10 | 
11 | func (f *arrayFlags) Set(value string) error {
12 | 	*f = append(*f, value)
13 | 	return nil
14 | }
15 | 


--------------------------------------------------------------------------------
/cmd/runprog/config/config.go:
--------------------------------------------------------------------------------
  1 | package config
  2 | 
  3 | // This file includes configs for the run program settings
  4 | 
  5 | var (
  6 | 	// default read permission files
  7 | 	defaultReadableFiles = []string{
  8 | 		"/etc/ld.so.nohwcap",
  9 | 		"/etc/ld.so.preload",
 10 | 		"/etc/ld.so.cache",
 11 | 		"/usr/lib/locale/locale-archive",
 12 | 		"/proc/self/exe",
 13 | 		"/etc/timezone",
 14 | 		"/usr/share/zoneinfo/",
 15 | 		"/dev/random",
 16 | 		"/dev/urandom",
 17 | 		"/proc/meminfo",
 18 | 		"/etc/localtime",
 19 | 	}
 20 | 
 21 | 	// default write permission files
 22 | 	defaultWritableFiles = []string{"/dev/null"}
 23 | 
 24 | 	// default allowed safe syscalls
 25 | 	defaultSyscallAllows = []string{
 26 | 		// file access through fd
 27 | 		"read",
 28 | 		"write",
 29 | 		"readv",
 30 | 		"writev",
 31 | 		"close",
 32 | 		"fstat",
 33 | 		"lseek",
 34 | 		"dup",
 35 | 		"dup2",
 36 | 		"dup3",
 37 | 		"ioctl",
 38 | 		"fcntl",
 39 | 		"fadvise64",
 40 | 		"pread64",
 41 | 		"pwrite64",
 42 | 
 43 | 		// memory action
 44 | 		"mmap",
 45 | 		"mprotect",
 46 | 		"munmap",
 47 | 		"brk",
 48 | 		"mremap",
 49 | 		"msync",
 50 | 		"mincore",
 51 | 		"madvise",
 52 | 
 53 | 		// signal action
 54 | 		"rt_sigaction",
 55 | 		"rt_sigprocmask",
 56 | 		"rt_sigreturn",
 57 | 		"rt_sigpending",
 58 | 		"sigaltstack",
 59 | 
 60 | 		// get current work dir
 61 | 		"getcwd",
 62 | 
 63 | 		// process exit
 64 | 		"exit",
 65 | 		"exit_group",
 66 | 
 67 | 		// others
 68 | 		"arch_prctl",
 69 | 
 70 | 		"gettimeofday",
 71 | 		"getrlimit",
 72 | 		"getrusage",
 73 | 		"times",
 74 | 		"time",
 75 | 		"clock_gettime",
 76 | 
 77 | 		"restart_syscall",
 78 | 	}
 79 | 
 80 | 	// default syscalls to trace
 81 | 	defaultSyscallTraces = []string{
 82 | 		// execute file
 83 | 		"execve",
 84 | 		"execveat",
 85 | 
 86 | 		// file open
 87 | 		"open",
 88 | 		"openat",
 89 | 
 90 | 		// file delete
 91 | 		"unlink",
 92 | 		"unlinkat",
 93 | 
 94 | 		// soft link
 95 | 		"readlink",
 96 | 		"readlinkat",
 97 | 
 98 | 		// permission check
 99 | 		"lstat",
100 | 		"stat",
101 | 		"access",
102 | 		"faccessat",
103 | 	}
104 | 
105 | 	// process related syscall if allowProc enabled
106 | 	defaultProcSyscalls = []string{"clone", "fork", "vfork", "nanosleep", "execve"}
107 | 
108 | 	// config for different type of program
109 | 	// workpath and arg0 have additional read / stat permission
110 | 	runptraceConfig = map[string]ProgramConfig{
111 | 		"python2.7": {
112 | 			Syscall: SyscallConfig{
113 | 				ExtraAllow: []string{
114 | 					"futex", "getdents", "getdents64", "prlimit64", "getpid", "sysinfo",
115 | 				},
116 | 				ExtraCount: map[string]int{
117 | 					"set_tid_address": 1,
118 | 					"set_robust_list": 1,
119 | 				},
120 | 			},
121 | 			FileAccess: FileAccessConfig{
122 | 				ExtraRead: []string{
123 | 					"/usr/bin/python2.7",
124 | 					"/usr/lib/python2.7/",
125 | 					"/usr/bin/lib/python2.7/",
126 | 					"/usr/local/lib/python2.7/",
127 | 					"/usr/lib/pymodules/python2.7/",
128 | 					"/usr/bin/Modules/",
129 | 					"/usr/bin/pybuilddir.txt",
130 | 					"/usr/lib/locale/",
131 | 					"./answer.code",
132 | 				},
133 | 				ExtraStat: []string{
134 | 					"/usr", "/usr/bin",
135 | 				},
136 | 			},
137 | 			RunCommand: []string{"/usr/bin/python2.7", "-E", "-s", "-B"},
138 | 		},
139 | 		"python3": {
140 | 			Syscall: SyscallConfig{
141 | 				ExtraAllow: []string{
142 | 					"futex", "getdents", "getdents64", "prlimit64", "getpid", "sysinfo", "getrandom",
143 | 				},
144 | 				ExtraCount: map[string]int{
145 | 					"set_tid_address": 1,
146 | 					"set_robust_list": 1,
147 | 				},
148 | 			},
149 | 			FileAccess: FileAccessConfig{
150 | 				ExtraRead: []string{
151 | 					"/usr/bin/python3",
152 | 					"/usr/lib/python3/",
153 | 					"/usr/bin/python3.6",
154 | 					"/usr/lib/python3.6/",
155 | 					"/usr/bin/lib/python3.6/",
156 | 					"/usr/local/lib/python3.6/",
157 | 					"/usr/bin/pyvenv.cfg",
158 | 					"/usr/pyvenv.cfg",
159 | 					"/usr/bin/Modules",
160 | 					"/usr/bin/pybuilddir.txt",
161 | 					"/usr/lib/dist-python",
162 | 					"/usr/lib/locale/",
163 | 					"./answer.code",
164 | 				},
165 | 				ExtraStat: []string{
166 | 					"/usr", "/usr/bin", "/usr/lib", "/usr/lib/python36.zip",
167 | 				},
168 | 			},
169 | 			RunCommand: []string{"/usr/bin/python3", "-I", "-B"},
170 | 		},
171 | 		"compiler": {
172 | 			Syscall: SyscallConfig{
173 | 				ExtraAllow: []string{
174 | 					"gettid", "set_tid_address", "set_robust_list", "futex",
175 | 					"getpid", "vfork", "fork", "clone", "execve", "wait4",
176 | 					"clock_gettime", "clock_getres",
177 | 					"setrlimit", "pipe",
178 | 					"getdents64", "getdents",
179 | 					"umask", "rename", "chmod", "mkdir",
180 | 					"chdir", "fchdir",
181 | 					"ftruncate",
182 | 					"sched_getaffinity", "sched_yield",
183 | 					"uname", "sysinfo",
184 | 					"prlimit64", "getrandom",
185 | 					"fchmodat",
186 | 				},
187 | 				ExtraBan: []string{"socket", "connect", "geteuid", "getuid"},
188 | 			},
189 | 			FileAccess: FileAccessConfig{
190 | 				ExtraWrite: []string{
191 | 					"/tmp/", "./",
192 | 				},
193 | 				ExtraRead: []string{
194 | 					"./",
195 | 					"../runtime/",
196 | 					"/etc/oracle/java/usagetracker.properties",
197 | 					"/usr/",
198 | 					"/lib/",
199 | 					"/lib64/",
200 | 					"/bin/",
201 | 					"/sbin/",
202 | 					"/sys/devices/system/cpu/",
203 | 					"/proc/",
204 | 					"/etc/timezone",
205 | 					"/etc/fpc-2.6.2.cfg.d/",
206 | 					"/etc/fpc.cfg",
207 | 					"/*",
208 | 					"/", // system_root
209 | 				},
210 | 				ExtraBan: []string{
211 | 					"/etc/nsswitch.conf",
212 | 					"/etc/passwd",
213 | 				},
214 | 			},
215 | 		},
216 | 	}
217 | )
218 | 


--------------------------------------------------------------------------------
/cmd/runprog/config/config_amd64.go:
--------------------------------------------------------------------------------
 1 | package config
 2 | 
 3 | // This file includes configs for the run program settings
 4 | 
 5 | var (
 6 | 	archReadableFiles = []string{
 7 | 		"/lib/x86_64-linux-gnu/",
 8 | 		"/usr/lib/x86_64-linux-gnu/",
 9 | 	}
10 | 
11 | 	archSyscallAllows = []string{}
12 | 
13 | 	archSyscallTraces = []string{}
14 | )
15 | 


--------------------------------------------------------------------------------
/cmd/runprog/config/config_arm.go:
--------------------------------------------------------------------------------
 1 | package config
 2 | 
 3 | // This file includes configs for the run program settings
 4 | 
 5 | var (
 6 | 	archReadableFiles = []string{
 7 | 		"/lib/arm-linux-gnueabihf/",
 8 | 		"/usr/lib/arm-linux-gnueabihf/",
 9 | 	}
10 | 
11 | 	archSyscallAllows = []string{
12 | 		"fstat64", // 32-bit
13 | 		"_llseek", // 32-bit
14 | 		"fcntl64", // 32-bit
15 | 		"mmap2",   // 32-bit
16 | 		// arch
17 | 		"uname",
18 | 		"set_tls",
19 | 		"arm_fadvise64_64",
20 | 	}
21 | 
22 | 	archSyscallTraces = []string{
23 | 		"lstat64", // 32-bit
24 | 		"stat64",  // 32-bit
25 | 	}
26 | )
27 | 


--------------------------------------------------------------------------------
/cmd/runprog/config/config_arm64.go:
--------------------------------------------------------------------------------
 1 | package config
 2 | 
 3 | // This file includes configs for the run program settings
 4 | 
 5 | var (
 6 | 	archReadableFiles = []string{
 7 | 		"/lib/aarch64-linux-gnu/",
 8 | 		"/usr/lib/aarch64-linux-gnu/",
 9 | 	}
10 | 
11 | 	archSyscallAllows = []string{
12 | 		"newfstatat",
13 | 	}
14 | 
15 | 	archSyscallTraces = []string{}
16 | )
17 | 


--------------------------------------------------------------------------------
/cmd/runprog/config/config_loader.go:
--------------------------------------------------------------------------------
 1 | package config
 2 | 
 3 | import "github.com/criyle/go-sandbox/runner/ptrace/filehandler"
 4 | 
 5 | // GetConf return file access check set, syscall counter, allow and traced syscall arrays and new args
 6 | func GetConf(pType, workPath string, args, addRead, addWrite []string,
 7 | 	allowProc bool) ([]string, []string, []string, *filehandler.Handler) {
 8 | 	var (
 9 | 		fs    = filehandler.NewFileSets()
10 | 		sc    = filehandler.NewSyscallCounter()
11 | 		allow = append(append([]string{}, defaultSyscallAllows...), archSyscallAllows...)
12 | 		trace = append(append([]string{}, defaultSyscallTraces...), archSyscallTraces...)
13 | 	)
14 | 
15 | 	fs.Readable.AddRange(defaultReadableFiles, workPath)
16 | 	fs.Readable.AddRange(archReadableFiles, workPath)
17 | 	fs.Writable.AddRange(defaultWritableFiles, workPath)
18 | 	fs.AddFilePermission(args[0], filehandler.FilePermRead)
19 | 	fs.AddFilePermission(workPath, filehandler.FilePermRead)
20 | 
21 | 	fs.Readable.AddRange(addRead, workPath)
22 | 	fs.Writable.AddRange(addWrite, workPath)
23 | 
24 | 	if c, o := runptraceConfig[pType]; o {
25 | 		allow = append(allow, c.Syscall.ExtraAllow...)
26 | 		trace = append(trace, c.Syscall.ExtraBan...)
27 | 		sc.AddRange(c.Syscall.ExtraCount)
28 | 		fs.Readable.AddRange(c.FileAccess.ExtraRead, workPath)
29 | 		fs.Writable.AddRange(c.FileAccess.ExtraWrite, workPath)
30 | 		fs.Statable.AddRange(c.FileAccess.ExtraStat, workPath)
31 | 		fs.SoftBan.AddRange(c.FileAccess.ExtraBan, workPath)
32 | 		args = append(c.RunCommand, args...)
33 | 	}
34 | 	if allowProc {
35 | 		allow = append(allow, defaultProcSyscalls...)
36 | 	}
37 | 	allow, trace = cleanTrace(allow, trace)
38 | 
39 | 	return args, allow, trace, &filehandler.Handler{
40 | 		FileSet:        fs,
41 | 		SyscallCounter: sc,
42 | 	}
43 | }
44 | 
45 | func keySetToSlice(m map[string]bool) []string {
46 | 	rt := make([]string, 0, len(m))
47 | 	for k := range m {
48 | 		rt = append(rt, k)
49 | 	}
50 | 	return rt
51 | }
52 | 
53 | func cleanTrace(allow, trace []string) ([]string, []string) {
54 | 	// make sure allow, trace no duplicate
55 | 	traceMap := make(map[string]bool)
56 | 	for _, s := range trace {
57 | 		traceMap[s] = true
58 | 	}
59 | 	allowMap := make(map[string]bool)
60 | 	for _, s := range allow {
61 | 		if !traceMap[s] {
62 | 			allowMap[s] = true
63 | 		}
64 | 	}
65 | 	return keySetToSlice(allowMap), keySetToSlice(traceMap)
66 | }
67 | 


--------------------------------------------------------------------------------
/cmd/runprog/config/config_type.go:
--------------------------------------------------------------------------------
 1 | package config
 2 | 
 3 | // ProgramConfig defines the extra config apply to program type
 4 | type ProgramConfig struct {
 5 | 	Syscall    SyscallConfig
 6 | 	FileAccess FileAccessConfig
 7 | 	RunCommand []string
 8 | }
 9 | 
10 | // SyscallConfig defines extra syscallConfig apply to program type
11 | type SyscallConfig struct {
12 | 	ExtraAllow, ExtraBan []string
13 | 	ExtraCount           map[string]int
14 | }
15 | 
16 | // FileAccessConfig defines extra file access permission for the program type
17 | type FileAccessConfig struct {
18 | 	ExtraRead, ExtraWrite, ExtraStat, ExtraBan []string
19 | }
20 | 


--------------------------------------------------------------------------------
/cmd/runprog/fileutil.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import "os"
 4 | 
 5 | // prepareFile opens file for new process
 6 | func prepareFiles(inputFile, outputFile, errorFile string) ([]*os.File, error) {
 7 | 	var err error
 8 | 	files := make([]*os.File, 3)
 9 | 	if inputFile != "" {
10 | 		files[0], err = os.OpenFile(inputFile, os.O_RDONLY, 0755)
11 | 		if err != nil {
12 | 			goto openerror
13 | 		}
14 | 	}
15 | 	if outputFile != "" {
16 | 		files[1], err = os.OpenFile(outputFile, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0755)
17 | 		if err != nil {
18 | 			goto openerror
19 | 		}
20 | 	}
21 | 	if errorFile != "" {
22 | 		files[2], err = os.OpenFile(errorFile, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0755)
23 | 		if err != nil {
24 | 			goto openerror
25 | 		}
26 | 	}
27 | 	return files, nil
28 | openerror:
29 | 	closeFiles(files)
30 | 	return nil, err
31 | }
32 | 
33 | // closeFiles close all file in the list
34 | func closeFiles(files []*os.File) {
35 | 	for _, f := range files {
36 | 		if f != nil {
37 | 			f.Close()
38 | 		}
39 | 	}
40 | }
41 | 


--------------------------------------------------------------------------------
/cmd/runprog/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"flag"
 5 | 	"fmt"
 6 | 	"os"
 7 | 
 8 | 	"github.com/criyle/go-sandbox/runner"
 9 | )
10 | 
11 | const (
12 | 	pathEnv = "PATH=/usr/local/bin:/usr/bin:/bin"
13 | )
14 | 
15 | func printUsage() {
16 | 	fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [options] <args>\n", os.Args[0])
17 | 	flag.PrintDefaults()
18 | 	os.Exit(2)
19 | }
20 | 
21 | // Status defines uoj/run_program constants
22 | type Status int
23 | 
24 | // UOJ run_program constants
25 | const (
26 | 	StatusNormal  Status = iota // 0
27 | 	StatusInvalid               // 1
28 | 	StatusRE                    // 2
29 | 	StatusMLE                   // 3
30 | 	StatusTLE                   // 4
31 | 	StatusOLE                   // 5
32 | 	StatusBan                   // 6
33 | 	StatusFatal                 // 7
34 | )
35 | 
36 | func getStatus(s runner.Status) int {
37 | 	switch s {
38 | 	case runner.StatusNormal:
39 | 		return int(StatusNormal)
40 | 	case runner.StatusInvalid:
41 | 		return int(StatusInvalid)
42 | 	case runner.StatusTimeLimitExceeded:
43 | 		return int(StatusTLE)
44 | 	case runner.StatusMemoryLimitExceeded:
45 | 		return int(StatusMLE)
46 | 	case runner.StatusOutputLimitExceeded:
47 | 		return int(StatusOLE)
48 | 	case runner.StatusDisallowedSyscall:
49 | 		return int(StatusBan)
50 | 	case runner.StatusSignalled, runner.StatusNonzeroExitStatus:
51 | 		return int(StatusRE)
52 | 	default:
53 | 		return int(StatusFatal)
54 | 	}
55 | }
56 | 
57 | func debug(v ...interface{}) {
58 | 	if showDetails {
59 | 		fmt.Fprintln(os.Stderr, v...)
60 | 	}
61 | }
62 | 


--------------------------------------------------------------------------------
/cmd/runprog/main_darwin.go:
--------------------------------------------------------------------------------
  1 | // Command runprog executes program defined restricted environment including seccomp-ptraced, namespaced and containerized.
  2 | package main
  3 | 
  4 | import (
  5 | 	"flag"
  6 | 	"fmt"
  7 | 	"os"
  8 | 	"syscall"
  9 | 	"time"
 10 | 
 11 | 	"github.com/criyle/go-sandbox/pkg/forkexec"
 12 | 	"github.com/criyle/go-sandbox/pkg/rlimit"
 13 | 	"github.com/criyle/go-sandbox/runner"
 14 | 	"golang.org/x/sys/unix"
 15 | )
 16 | 
 17 | var (
 18 | 	timeLimit, realTimeLimit, memoryLimit, outputLimit, stackLimit uint64
 19 | 	inputFileName, outputFileName, errorFileName, workPath         string
 20 | 
 21 | 	profilePath, result string
 22 | 	showDetails         bool
 23 | 
 24 | 	args []string
 25 | )
 26 | 
 27 | func main() {
 28 | 	flag.Usage = printUsage
 29 | 	flag.Uint64Var(&timeLimit, "tl", 1, "Set time limit (in second)")
 30 | 	flag.Uint64Var(&realTimeLimit, "rtl", 0, "Set real time limit (in second)")
 31 | 	flag.Uint64Var(&memoryLimit, "ml", 256, "Set memory limit (in mb)")
 32 | 	flag.Uint64Var(&outputLimit, "ol", 64, "Set output limit (in mb)")
 33 | 	flag.Uint64Var(&stackLimit, "sl", 32, "Set stack limit (in mb)")
 34 | 	flag.StringVar(&inputFileName, "in", "", "Set input file name")
 35 | 	flag.StringVar(&outputFileName, "out", "", "Set output file name")
 36 | 	flag.StringVar(&errorFileName, "err", "", "Set error file name")
 37 | 	flag.StringVar(&workPath, "work-path", "", "Set the work path of the program")
 38 | 	flag.StringVar(&profilePath, "p", "", "sandbox profile")
 39 | 	flag.BoolVar(&showDetails, "show-trace-details", false, "Show trace details")
 40 | 	flag.StringVar(&result, "res", "stdout", "Set the file name for output the result")
 41 | 	flag.Parse()
 42 | 
 43 | 	args = flag.Args()
 44 | 	if len(args) == 0 {
 45 | 		printUsage()
 46 | 	}
 47 | 
 48 | 	if realTimeLimit < timeLimit {
 49 | 		realTimeLimit = timeLimit + 2
 50 | 	}
 51 | 	if stackLimit > memoryLimit {
 52 | 		stackLimit = memoryLimit
 53 | 	}
 54 | 	if workPath == "" {
 55 | 		workPath, _ = os.Getwd()
 56 | 	}
 57 | 
 58 | 	var (
 59 | 		f   *os.File
 60 | 		err error
 61 | 	)
 62 | 	if result == "stdout" {
 63 | 		f = os.Stdout
 64 | 	} else if result == "stderr" {
 65 | 		f = os.Stderr
 66 | 	} else {
 67 | 		f, err = os.Create(result)
 68 | 		if err != nil {
 69 | 			debug("Failed to open result file:", err)
 70 | 			return
 71 | 		}
 72 | 		defer f.Close()
 73 | 	}
 74 | 
 75 | 	rt, err := start()
 76 | 	debug(rt, err)
 77 | 	if e, ok := err.(syscall.Errno); ok {
 78 | 		debug("errno", int(e))
 79 | 	}
 80 | 
 81 | 	if rt == nil {
 82 | 		rt = &runner.Result{
 83 | 			Status: runner.StatusRunnerError,
 84 | 		}
 85 | 	}
 86 | 	if err == nil && rt.Status != runner.StatusNormal {
 87 | 		err = rt.Status
 88 | 	}
 89 | 	debug("setupTime: ", rt.SetUpTime)
 90 | 	debug("runningTime: ", rt.RunningTime)
 91 | 	if err != nil {
 92 | 		debug(err)
 93 | 		c, ok := err.(runner.Status)
 94 | 		if !ok {
 95 | 			c = runner.StatusRunnerError
 96 | 		}
 97 | 		// Handle fatal error from trace
 98 | 		fmt.Fprintf(f, "%d %d %d %d\n", getStatus(c), int(rt.Time/time.Millisecond), uint64(rt.Memory)>>10, rt.ExitStatus)
 99 | 		if c == runner.StatusRunnerError {
100 | 			os.Exit(1)
101 | 		}
102 | 	} else {
103 | 		fmt.Fprintf(f, "%d %d %d %d\n", 0, int(rt.Time/time.Millisecond), uint64(rt.Memory)>>10, rt.ExitStatus)
104 | 	}
105 | }
106 | 
107 | func start() (*runner.Result, error) {
108 | 	var sTime, mTime, fTime time.Time
109 | 	sTime = time.Now()
110 | 	files, err := prepareFiles(inputFileName, outputFileName, errorFileName)
111 | 	if err != nil {
112 | 		return nil, err
113 | 	}
114 | 	defer closeFiles(files)
115 | 
116 | 	var profile string
117 | 	if profilePath != "" {
118 | 		c, err := os.ReadFile(profilePath)
119 | 		if err != nil {
120 | 			return nil, fmt.Errorf("profile: %w", err)
121 | 		}
122 | 		profile = string(c)
123 | 	}
124 | 
125 | 	// if not defined, then use the original value
126 | 	fds := make([]uintptr, len(files))
127 | 	for i, f := range files {
128 | 		if f != nil {
129 | 			fds[i] = f.Fd()
130 | 		} else {
131 | 			fds[i] = uintptr(i)
132 | 		}
133 | 	}
134 | 
135 | 	rlims := rlimit.RLimits{
136 | 		CPU:          timeLimit,
137 | 		CPUHard:      realTimeLimit,
138 | 		FileSize:     outputLimit << 20,
139 | 		Data:         memoryLimit << 20,
140 | 		AddressSpace: memoryLimit << 20,
141 | 		Stack:        stackLimit << 20,
142 | 	}
143 | 
144 | 	debug(rlims)
145 | 	debug(args)
146 | 
147 | 	r := forkexec.Runner{
148 | 		Args:           args,
149 | 		Env:            []string{pathEnv},
150 | 		RLimits:        rlims.PrepareRLimit(),
151 | 		Files:          fds,
152 | 		WorkDir:        workPath,
153 | 		SandboxProfile: profile,
154 | 		SyncFunc: func(pid int) error {
155 | 			mTime = time.Now()
156 | 			return nil
157 | 		},
158 | 	}
159 | 	pid, err := r.Start()
160 | 	if err != nil {
161 | 		return nil, err
162 | 	}
163 | 
164 | 	defer func() {
165 | 		killAll(pid)
166 | 		collectZombie(pid)
167 | 	}()
168 | 
169 | 	var (
170 | 		wstatus syscall.WaitStatus
171 | 		rusage  syscall.Rusage
172 | 	)
173 | 	for {
174 | 		_, err = syscall.Wait4(pid, &wstatus, 0, &rusage)
175 | 		if err == syscall.EINTR {
176 | 			continue
177 | 		}
178 | 		fTime = time.Now()
179 | 		if err != nil {
180 | 			return nil, err
181 | 		}
182 | 		result := runner.Result{
183 | 			Status:      runner.StatusNormal,
184 | 			Time:        time.Duration(rusage.Utime.Nano()),
185 | 			Memory:      runner.Size(rusage.Maxrss), // seems MacOS uses bytes instead of kb
186 | 			SetUpTime:   mTime.Sub(sTime),
187 | 			RunningTime: fTime.Sub(mTime),
188 | 		}
189 | 		if uint64(result.Time) > timeLimit*1e9 {
190 | 			result.Status = runner.StatusTimeLimitExceeded
191 | 		}
192 | 		if uint64(result.Memory) > memoryLimit<<20 {
193 | 			result.Status = runner.StatusMemoryLimitExceeded
194 | 		}
195 | 
196 | 		switch {
197 | 		case wstatus.Exited():
198 | 			if status := wstatus.ExitStatus(); status != 0 {
199 | 				result.Status = runner.StatusNonzeroExitStatus
200 | 			}
201 | 			return &result, nil
202 | 
203 | 		case wstatus.Signaled():
204 | 			sig := wstatus.Signal()
205 | 			switch sig {
206 | 			case unix.SIGXCPU, unix.SIGKILL:
207 | 				result.Status = runner.StatusTimeLimitExceeded
208 | 			case unix.SIGXFSZ:
209 | 				result.Status = runner.StatusOutputLimitExceeded
210 | 			case unix.SIGSYS:
211 | 				result.Status = runner.StatusDisallowedSyscall
212 | 			default:
213 | 				result.Status = runner.StatusSignalled
214 | 			}
215 | 			result.ExitStatus = int(sig)
216 | 			return &result, nil
217 | 		}
218 | 	}
219 | }
220 | 
221 | // kill all tracee according to pids
222 | func killAll(pgid int) {
223 | 	unix.Kill(-pgid, unix.SIGKILL)
224 | }
225 | 
226 | // collect died child processes
227 | func collectZombie(pgid int) {
228 | 	var wstatus unix.WaitStatus
229 | 	for {
230 | 		if _, err := unix.Wait4(-pgid, &wstatus, unix.WNOHANG, nil); err != unix.EINTR && err != nil {
231 | 			break
232 | 		}
233 | 	}
234 | }
235 | 


--------------------------------------------------------------------------------
/container/benchmark_linux_test.go:
--------------------------------------------------------------------------------
  1 | package container
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"errors"
  6 | 	"os"
  7 | 	"runtime"
  8 | 	"syscall"
  9 | 	"testing"
 10 | 
 11 | 	"github.com/criyle/go-sandbox/runner"
 12 | )
 13 | 
 14 | func init() {
 15 | 	Init()
 16 | }
 17 | 
 18 | func BenchmarkContainer(b *testing.B) {
 19 | 	tmpDir, err := os.MkdirTemp("", "")
 20 | 	if err != nil {
 21 | 		b.Error(err)
 22 | 	}
 23 | 	builder := &Builder{
 24 | 		Root:   tmpDir,
 25 | 		Stderr: os.Stderr,
 26 | 	}
 27 | 	n := runtime.GOMAXPROCS(0)
 28 | 	ch := make(chan Environment, n)
 29 | 	for i := 0; i < n; i++ {
 30 | 		m, err := builder.Build()
 31 | 		if err != nil {
 32 | 			b.Error(err)
 33 | 		}
 34 | 		b.Cleanup(func() {
 35 | 			m.Destroy()
 36 | 		})
 37 | 		ch <- m
 38 | 	}
 39 | 	b.ResetTimer()
 40 | 	b.RunParallel(func(pb *testing.PB) {
 41 | 		m := <-ch
 42 | 		for pb.Next() {
 43 | 			r := m.Execve(context.TODO(), ExecveParam{
 44 | 				Args: []string{"/bin/true"},
 45 | 				Env:  []string{"PATH=/bin"},
 46 | 			})
 47 | 			if r.Status != runner.StatusNormal {
 48 | 				b.Error(r.Status, r.Error)
 49 | 			}
 50 | 		}
 51 | 	})
 52 | }
 53 | 
 54 | type testCase struct {
 55 | 	name     string
 56 | 	param    ExecveParam
 57 | 	expected runner.Status
 58 | }
 59 | 
 60 | var err error = errors.New("test error")
 61 | 
 62 | var successParam = ExecveParam{
 63 | 	Args: []string{"/bin/true"},
 64 | 	Env:  []string{"PATH=/bin"},
 65 | }
 66 | 
 67 | var tests []testCase = []testCase{
 68 | 	{
 69 | 		name:     "Success",
 70 | 		param:    successParam,
 71 | 		expected: runner.StatusNormal,
 72 | 	},
 73 | 	{
 74 | 		name: "SuccessWithSync",
 75 | 		param: ExecveParam{
 76 | 			Args:     []string{"/bin/true"},
 77 | 			Env:      []string{"PATH=/bin"},
 78 | 			SyncFunc: func(p int) error { return nil },
 79 | 		},
 80 | 		expected: runner.StatusNormal,
 81 | 	},
 82 | 	{
 83 | 		name: "NotExists",
 84 | 		param: ExecveParam{
 85 | 			Args: []string{"not_exists"},
 86 | 			Env:  []string{"PATH=/bin"},
 87 | 		},
 88 | 		expected: runner.StatusRunnerError,
 89 | 	},
 90 | 	{
 91 | 		name: "NotExistsWithSync",
 92 | 		param: ExecveParam{
 93 | 			Args:     []string{"not_exists"},
 94 | 			Env:      []string{"PATH=/bin"},
 95 | 			SyncFunc: func(p int) error { return nil },
 96 | 		},
 97 | 		expected: runner.StatusRunnerError,
 98 | 	},
 99 | 	{
100 | 		name: "SyncFuncFail",
101 | 		param: ExecveParam{
102 | 			Args: []string{"/bin/true"},
103 | 			Env:  []string{"PATH=/bin"},
104 | 			SyncFunc: func(pid int) error {
105 | 				return err
106 | 			},
107 | 		},
108 | 		expected: runner.StatusRunnerError,
109 | 	},
110 | 	{
111 | 		name: "SyncFuncFailAfterExec",
112 | 		param: ExecveParam{
113 | 			Args: []string{"/bin/true"},
114 | 			Env:  []string{"PATH=/bin"},
115 | 			SyncFunc: func(pid int) error {
116 | 				return err
117 | 			},
118 | 			SyncAfterExec: true,
119 | 		},
120 | 		expected: runner.StatusRunnerError,
121 | 	},
122 | }
123 | 
124 | type credgen struct{}
125 | 
126 | func (c credgen) Get() syscall.Credential {
127 | 	return syscall.Credential{
128 | 		Uid: 10000,
129 | 		Gid: 10000,
130 | 	}
131 | }
132 | 
133 | func TestContainerSetCred(t *testing.T) {
134 | 	t.Parallel()
135 | 	if os.Getpid() != 1 {
136 | 		t.Skip("root required for this test")
137 | 	}
138 | 	runTest(t, successParam, runner.StatusNormal, credgen{})
139 | }
140 | 
141 | func runTest(t *testing.T, param ExecveParam, expected runner.Status, credGen CredGenerator) {
142 | 	t.Parallel()
143 | 	m := getEnv(t, credGen)
144 | 	r := m.Execve(context.TODO(), param)
145 | 	if r.Status != expected {
146 | 		t.Fatal(r.Status, r.Error, r)
147 | 	}
148 | 	if err := m.Ping(); err != nil {
149 | 		t.Fatal(err)
150 | 	}
151 | 	// can also success once more (no protocol mismatch)
152 | 	r = m.Execve(context.TODO(), successParam)
153 | 	if r.Status != runner.StatusNormal {
154 | 		t.Fatal(r.Status, r.Error, r)
155 | 	}
156 | }
157 | 
158 | func TestCases(t *testing.T) {
159 | 	for _, c := range tests {
160 | 		t.Run(c.name, func(t *testing.T) {
161 | 			runTest(t, c.param, c.expected, nil)
162 | 		})
163 | 	}
164 | }
165 | 
166 | func getEnv(t *testing.T, credGen CredGenerator) Environment {
167 | 	tmpDir, err := os.MkdirTemp("", "")
168 | 	if err != nil {
169 | 		t.Fatal(err)
170 | 	}
171 | 	t.Cleanup(func() {
172 | 		os.Remove(tmpDir)
173 | 	})
174 | 	builder := &Builder{
175 | 		Root:          tmpDir,
176 | 		CredGenerator: credGen,
177 | 		Stderr:        os.Stderr,
178 | 	}
179 | 	m, err := builder.Build()
180 | 	if err != nil {
181 | 		t.Fatal(err)
182 | 	}
183 | 	t.Cleanup(func() {
184 | 		m.Destroy()
185 | 	})
186 | 	return m
187 | }
188 | 


--------------------------------------------------------------------------------
/container/consts_linux.go:
--------------------------------------------------------------------------------
 1 | package container
 2 | 
 3 | type cmdType int8
 4 | 
 5 | const (
 6 | 	cmdPing cmdType = iota + 1
 7 | 	cmdOpen
 8 | 	cmdDelete
 9 | 	cmdReset
10 | 	cmdExecve
11 | 	cmdOk
12 | 	cmdKill
13 | 	cmdConf
14 | 
15 | 	initArg = "container_init"
16 | 
17 | 	containerUID = 1000
18 | 	containerGID = 1000
19 | 
20 | 	containerName = "go-sandbox"
21 | 	containerWD   = "/w"
22 | 
23 | 	containerMaxProc = 1
24 | )
25 | 
26 | var defaultSymLinks = []SymbolicLink{
27 | 	{LinkPath: "/dev/fd", Target: "/proc/self/fd"},
28 | 	{LinkPath: "/dev/stdin", Target: "/proc/self/fd/0"},
29 | 	{LinkPath: "/dev/stdout", Target: "/proc/self/fd/1"},
30 | 	{LinkPath: "/dev/stderr", Target: "/proc/self/fd/2"},
31 | }
32 | 
33 | var defaultMaskPaths = []string{
34 | 	// https://github.com/containerd/containerd/blob/f0a32c66dad1e9de716c9960af806105d691cd78/oci/spec.go#L165-L176
35 | 	"/proc/acpi",
36 | 	"/proc/asound",
37 | 	"/proc/kcore",
38 | 	"/proc/keys",
39 | 	"/proc/latency_stats",
40 | 	"/proc/timer_list",
41 | 	"/proc/timer_stats",
42 | 	"/proc/sched_debug",
43 | 	"/sys/firmware",
44 | 	"/proc/scsi",
45 | 
46 | 	"/usr/lib/wsl",
47 | }
48 | 


--------------------------------------------------------------------------------
/container/container_cmd_linux.go:
--------------------------------------------------------------------------------
  1 | package container
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"errors"
  6 | 	"fmt"
  7 | 	"os"
  8 | 	"path/filepath"
  9 | 	"strings"
 10 | 
 11 | 	"github.com/criyle/go-sandbox/pkg/unixsocket"
 12 | )
 13 | 
 14 | func (c *containerServer) handlePing() error {
 15 | 	return c.sendReply(reply{}, unixsocket.Msg{})
 16 | }
 17 | 
 18 | func (c *containerServer) handleConf(conf *confCmd) error {
 19 | 	if conf != nil {
 20 | 		c.containerConfig = conf.Conf
 21 | 		if err := initContainer(conf.Conf); err != nil {
 22 | 			return err
 23 | 		}
 24 | 		if c.ContainerUID == 0 {
 25 | 			c.ContainerUID = containerUID
 26 | 		}
 27 | 		if c.ContainerGID == 0 {
 28 | 			c.ContainerGID = containerGID
 29 | 		}
 30 | 		env, err := readDotEnv()
 31 | 		if err != nil {
 32 | 			return err
 33 | 		}
 34 | 		c.defaultEnv = env
 35 | 	}
 36 | 	return c.sendReply(reply{}, unixsocket.Msg{})
 37 | }
 38 | 
 39 | func (c *containerServer) handleOpen(open []OpenCmd) error {
 40 | 	if len(open) == 0 {
 41 | 		return c.sendErrorReply("open: no open parameter received")
 42 | 	}
 43 | 
 44 | 	// open files
 45 | 	fds := make([]int, 0, len(open))
 46 | 	fileToClose := make([]*os.File, 0, len(open)) // let sendMsg close these files
 47 | 	for _, o := range open {
 48 | 		outFile, err := os.OpenFile(o.Path, o.Flag, o.Perm)
 49 | 		if err != nil {
 50 | 			for _, f := range fileToClose {
 51 | 				f.Close()
 52 | 			}
 53 | 			return c.sendErrorReply("open: %v", err)
 54 | 		}
 55 | 		fileToClose = append(fileToClose, outFile)
 56 | 		fds = append(fds, int(outFile.Fd()))
 57 | 	}
 58 | 
 59 | 	return c.sendReplyFiles(reply{}, unixsocket.Msg{Fds: fds}, fileToClose)
 60 | }
 61 | 
 62 | func (c *containerServer) handleDelete(delete *deleteCmd) error {
 63 | 	if delete == nil {
 64 | 		return c.sendErrorReply("delete: no parameter provided")
 65 | 	}
 66 | 	if err := os.Remove(delete.Path); err != nil {
 67 | 		return c.sendErrorReply("delete: %v", err)
 68 | 	}
 69 | 	return c.sendReply(reply{}, unixsocket.Msg{})
 70 | }
 71 | 
 72 | func (c *containerServer) handleReset() error {
 73 | 	for _, m := range c.Mounts {
 74 | 		if !m.IsTmpFs() {
 75 | 			continue
 76 | 		}
 77 | 		if err := removeContents(filepath.Join("/", m.Target)); err != nil {
 78 | 			return c.sendErrorReply("reset: %v %v", m.Target, err)
 79 | 		}
 80 | 	}
 81 | 	return c.sendReply(reply{}, unixsocket.Msg{})
 82 | }
 83 | 
 84 | // readDotEnv attempts to read /.env file and save as default environment variables
 85 | func readDotEnv() ([]string, error) {
 86 | 	f, err := os.Open("/.env")
 87 | 	if err != nil {
 88 | 		if errors.Is(err, os.ErrNotExist) {
 89 | 			return nil, nil
 90 | 		}
 91 | 		return nil, fmt.Errorf("dotenv: open /.env: %w", err)
 92 | 	}
 93 | 	defer f.Close()
 94 | 
 95 | 	var ret []string
 96 | 	scanner := bufio.NewScanner(f)
 97 | 	for scanner.Scan() {
 98 | 		line := strings.TrimSpace(scanner.Text())
 99 | 		if len(line) == 0 || strings.HasPrefix(line, "#") {
100 | 			continue
101 | 		}
102 | 		if !strings.Contains(line, "=") {
103 | 			return nil, fmt.Errorf("dotenv: invalid line: %s", line)
104 | 		}
105 | 		ret = append(ret, line)
106 | 	}
107 | 	return ret, nil
108 | }
109 | 


--------------------------------------------------------------------------------
/container/container_exec_linux.go:
--------------------------------------------------------------------------------
  1 | package container
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"syscall"
  6 | 	"time"
  7 | 
  8 | 	"github.com/criyle/go-sandbox/pkg/forkexec"
  9 | 	"github.com/criyle/go-sandbox/pkg/unixsocket"
 10 | 	"github.com/criyle/go-sandbox/runner"
 11 | )
 12 | 
 13 | func (c *containerServer) handleExecve(cmd *execCmd, msg unixsocket.Msg) error {
 14 | 	var (
 15 | 		files    []uintptr
 16 | 		execFile uintptr
 17 | 		cgroupFd uintptr
 18 | 		cred     *syscall.Credential
 19 | 	)
 20 | 	if cmd == nil {
 21 | 		return c.sendErrorReply("handle: no parameter provided")
 22 | 	}
 23 | 	if len(msg.Fds) > 0 {
 24 | 		files = intSliceToUintptr(msg.Fds)
 25 | 		// don't leak fds to child
 26 | 		closeOnExecFds(msg.Fds)
 27 | 		// release files after execve
 28 | 		defer closeFds(msg.Fds)
 29 | 	}
 30 | 
 31 | 	// if fexecve, then the first fd must be executable
 32 | 	if cmd.FdExec {
 33 | 		if len(files) == 0 {
 34 | 			return c.sendErrorReply("handle: expected fexecve fd")
 35 | 		}
 36 | 		execFile = files[0]
 37 | 		files = files[1:]
 38 | 	}
 39 | 	// if cgroupFd, then the cgroupFd follows
 40 | 	if cmd.FdCgroup {
 41 | 		if len(files) == 0 {
 42 | 			return c.sendErrorReply("handle: expected cgroup fd")
 43 | 		}
 44 | 		cgroupFd = files[0]
 45 | 		files = files[1:]
 46 | 	}
 47 | 
 48 | 	var env []string
 49 | 	env = append(env, c.defaultEnv...)
 50 | 	env = append(env, cmd.Env...)
 51 | 
 52 | 	if len(cmd.Argv) > 0 {
 53 | 		exePath, err := lookPath(cmd.Argv[0], env)
 54 | 		if err != nil {
 55 | 			return c.sendErrorReply("handle: %s: %v", cmd.Argv[0], err)
 56 | 		}
 57 | 		cmd.Argv[0] = exePath
 58 | 	}
 59 | 
 60 | 	syncPid := func(pid int) error {
 61 | 		msg := unixsocket.Msg{
 62 | 			Cred: &syscall.Ucred{
 63 | 				Pid: int32(pid),
 64 | 				Uid: uint32(syscall.Getuid()),
 65 | 				Gid: uint32(syscall.Getgid()),
 66 | 			},
 67 | 		}
 68 | 		if err := c.sendReply(reply{}, msg); err != nil {
 69 | 			return fmt.Errorf("sync func: send reply: %w", err)
 70 | 		}
 71 | 		cmd, _, err := c.recvCmd()
 72 | 		if err != nil {
 73 | 			return fmt.Errorf("sync func: recv cmd: %w", err)
 74 | 		}
 75 | 		if cmd.Cmd == cmdKill {
 76 | 			return fmt.Errorf("sync func: received kill")
 77 | 		}
 78 | 		return nil
 79 | 	}
 80 | 	var syncFunc func(pid int) error
 81 | 	if !cmd.SyncAfter {
 82 | 		syncFunc = syncPid
 83 | 	}
 84 | 
 85 | 	if c.Cred {
 86 | 		cred = &syscall.Credential{
 87 | 			Uid:         uint32(c.ContainerUID),
 88 | 			Gid:         uint32(c.ContainerGID),
 89 | 			NoSetGroups: true,
 90 | 		}
 91 | 	}
 92 | 
 93 | 	var seccomp *syscall.SockFprog
 94 | 	if cmd.Seccomp != nil {
 95 | 		seccomp = cmd.Seccomp.SockFprog()
 96 | 	}
 97 | 
 98 | 	r := forkexec.Runner{
 99 | 		Args:       cmd.Argv,
100 | 		Env:        env,
101 | 		ExecFile:   execFile,
102 | 		RLimits:    cmd.RLimits,
103 | 		Files:      files,
104 | 		WorkDir:    c.WorkDir,
105 | 		NoNewPrivs: true,
106 | 		DropCaps:   true,
107 | 		SyncFunc:   syncFunc,
108 | 		Credential: cred,
109 | 		CTTY:       cmd.CTTY,
110 | 		Seccomp:    seccomp,
111 | 		CgroupFd:   cgroupFd,
112 | 
113 | 		UnshareCgroupAfterSync: c.UnshareCgroup,
114 | 	}
115 | 	// starts the runner, error is handled same as wait4 to make communication equal
116 | 	pid, err := r.Start()
117 | 	if err != nil {
118 | 		s := "<nil>"
119 | 		if len(cmd.Argv) > 0 {
120 | 			s = cmd.Argv[0]
121 | 		}
122 | 		return c.sendErrorReply("start: %s: %v", s, err)
123 | 	}
124 | 	if cmd.SyncAfter {
125 | 		if err := syncPid(1); err != nil {
126 | 			syscall.Kill(-1, syscall.SIGKILL)
127 | 
128 | 			c.waitPid <- pid
129 | 			ret := <-c.waitPidResult
130 | 			err := c.sendReply(convertReply(ret), unixsocket.Msg{})
131 | 
132 | 			c.waitAll <- struct{}{}
133 | 			<-c.waitAllDone
134 | 			return err
135 | 		}
136 | 	}
137 | 	return c.handleExecveStarted(pid)
138 | }
139 | 
140 | func (c *containerServer) handleExecveStarted(pid int) error {
141 | 	// At this point, either recv kill / send result would be happened
142 | 	// host -> container: kill
143 | 	// container -> host: result
144 | 	// container -> host: done
145 | 
146 | 	// Let's register a wait event
147 | 	c.waitPid <- pid
148 | 
149 | 	var ret waitPidResult
150 | 	select {
151 | 	case <-c.done: // socket error happened
152 | 		return c.err
153 | 
154 | 	case <-c.recvCh: // kill cmd received
155 | 		syscall.Kill(-1, syscall.SIGKILL)
156 | 		ret = <-c.waitPidResult
157 | 		c.waitAll <- struct{}{}
158 | 
159 | 		if err := c.sendReply(convertReply(ret), unixsocket.Msg{}); err != nil {
160 | 			return err
161 | 		}
162 | 
163 | 	case ret = <-c.waitPidResult: // child process returned
164 | 		syscall.Kill(-1, syscall.SIGKILL)
165 | 		c.waitAll <- struct{}{}
166 | 
167 | 		if err := c.sendReply(convertReply(ret), unixsocket.Msg{}); err != nil {
168 | 			return err
169 | 		}
170 | 		if _, _, err := c.recvCmd(); err != nil { // kill cmd received
171 | 			return err
172 | 		}
173 | 	}
174 | 	<-c.waitAllDone
175 | 	return nil
176 | }
177 | 
178 | func convertReply(ret waitPidResult) reply {
179 | 	if ret.Err != nil {
180 | 		return reply{
181 | 			Error: &errorReply{
182 | 				Msg: fmt.Sprintf("execve: wait4: %v", ret.Err),
183 | 			},
184 | 		}
185 | 	}
186 | 
187 | 	waitStatus := ret.WaitStatus
188 | 	rusage := ret.Rusage
189 | 
190 | 	status := runner.StatusNormal
191 | 	userTime := time.Duration(rusage.Utime.Nano()) // ns
192 | 	userMem := runner.Size(rusage.Maxrss << 10)    // bytes
193 | 	switch {
194 | 	case waitStatus.Exited():
195 | 		exitStatus := waitStatus.ExitStatus()
196 | 		if exitStatus != 0 {
197 | 			status = runner.StatusNonzeroExitStatus
198 | 		}
199 | 		return reply{
200 | 			ExecReply: &execReply{
201 | 				Status:     status,
202 | 				ExitStatus: exitStatus,
203 | 				Time:       userTime,
204 | 				Memory:     userMem,
205 | 			},
206 | 		}
207 | 
208 | 	case waitStatus.Signaled():
209 | 		switch waitStatus.Signal() {
210 | 		// kill signal treats as TLE
211 | 		case syscall.SIGXCPU, syscall.SIGKILL:
212 | 			status = runner.StatusTimeLimitExceeded
213 | 		case syscall.SIGXFSZ:
214 | 			status = runner.StatusOutputLimitExceeded
215 | 		case syscall.SIGSYS:
216 | 			status = runner.StatusDisallowedSyscall
217 | 		default:
218 | 			status = runner.StatusSignalled
219 | 		}
220 | 		return reply{
221 | 			ExecReply: &execReply{
222 | 				ExitStatus: int(waitStatus.Signal()),
223 | 				Status:     status,
224 | 				Time:       userTime,
225 | 				Memory:     userMem,
226 | 			},
227 | 		}
228 | 
229 | 	default:
230 | 		return reply{
231 | 			Error: &errorReply{
232 | 				Msg: fmt.Sprintf("execve: unknown status: %v", waitStatus),
233 | 			},
234 | 		}
235 | 	}
236 | }
237 | 


--------------------------------------------------------------------------------
/container/doc.go:
--------------------------------------------------------------------------------
 1 | // Package container provides pre-forked container environment to
 2 | // run programs in isolated Linux namespaces.
 3 | //
 4 | // # Overview
 5 | //
 6 | // It creates container within unshared container and communicate
 7 | // with host process using unix socket with
 8 | // oob for fd / pid and commands encoded by gob.
 9 | //
10 | // # Protocol
11 | //
12 | // Host to container communication protocol is single threaded and always initiated by
13 | // the host:
14 | //
15 | // ## ping (alive check)
16 | //
17 | // - send: ping
18 | // - reply: pong
19 | //
20 | // ## conf (set configuration)
21 | //
22 | // - send: conf
23 | // - reply:
24 | //
25 | // ## open (open files in given mode inside container):
26 | //
27 | // - send: []OpenCmd
28 | // - reply: "success", file fds / "error"
29 | //
30 | // ## delete (unlink file / rmdir dir inside container):
31 | //
32 | // - send: path
33 | // - reply: "finished" / "error"
34 | //
35 | // ## reset (clean up container for later use (clear workdir / tmp)):
36 | //
37 | // - send:
38 | // - reply: "success"
39 | //
40 | // ## execve: (execute file inside container):
41 | //
42 | // - send: argv, env, rLimits, fds
43 | // - reply:
44 | // - success: "success", pid
45 | // - failed: "failed"
46 | // - send (success): "init_finished" (as cmd)
47 | // - reply: "finished" / send: "kill" (as cmd)
48 | // - send: "kill" (as cmd) / reply: "finished"
49 | // - reply:
50 | //
51 | // Any socket related error will cause the container exit with all process inside container
52 | package container
53 | 


--------------------------------------------------------------------------------
/container/host_cmd_linux.go:
--------------------------------------------------------------------------------
  1 | package container
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"os"
  6 | 	"syscall"
  7 | 	"time"
  8 | 
  9 | 	"github.com/criyle/go-sandbox/pkg/unixsocket"
 10 | )
 11 | 
 12 | // Ping send ping message to container, wait for 3 second before timeout
 13 | func (c *container) Ping() error {
 14 | 	c.mu.Lock()
 15 | 	defer c.mu.Unlock()
 16 | 
 17 | 	// avoid infinite wait (max 3s)
 18 | 	const pingWait = 3 * time.Second
 19 | 	c.socket.SetDeadline(time.Now().Add(pingWait))
 20 | 	defer c.socket.SetDeadline(time.Time{})
 21 | 
 22 | 	// send ping
 23 | 	cmd := cmd{
 24 | 		Cmd: cmdPing,
 25 | 	}
 26 | 	if err := c.sendCmd(cmd, unixsocket.Msg{}); err != nil {
 27 | 		return fmt.Errorf("ping: %w", err)
 28 | 	}
 29 | 	// receive no error
 30 | 	return c.recvAckReply("ping")
 31 | }
 32 | 
 33 | // conf send configuration to container (used by builder only)
 34 | func (c *container) conf(conf *containerConfig) error {
 35 | 	c.mu.Lock()
 36 | 	defer c.mu.Unlock()
 37 | 
 38 | 	cmd := cmd{
 39 | 		Cmd:     cmdConf,
 40 | 		ConfCmd: &confCmd{Conf: *conf},
 41 | 	}
 42 | 	if err := c.sendCmd(cmd, unixsocket.Msg{}); err != nil {
 43 | 		return fmt.Errorf("conf: %w", err)
 44 | 	}
 45 | 	return c.recvAckReply("conf")
 46 | }
 47 | 
 48 | // Open open files in container
 49 | func (c *container) Open(p []OpenCmd) ([]*os.File, error) {
 50 | 	c.mu.Lock()
 51 | 	defer c.mu.Unlock()
 52 | 
 53 | 	syscall.ForkLock.RLock()
 54 | 	defer syscall.ForkLock.RUnlock()
 55 | 
 56 | 	// send copyin
 57 | 	cmd := cmd{
 58 | 		Cmd:     cmdOpen,
 59 | 		OpenCmd: p,
 60 | 	}
 61 | 	if err := c.sendCmd(cmd, unixsocket.Msg{}); err != nil {
 62 | 		return nil, fmt.Errorf("open: %w", err)
 63 | 	}
 64 | 	reply, msg, err := c.recvReply()
 65 | 	if err != nil {
 66 | 		return nil, fmt.Errorf("open: %w", err)
 67 | 	}
 68 | 	if reply.Error != nil {
 69 | 		return nil, fmt.Errorf("open: %v", reply.Error)
 70 | 	}
 71 | 	if len(msg.Fds) != len(p) {
 72 | 		closeFds(msg.Fds)
 73 | 		return nil, fmt.Errorf("open: unexpected number of fds: got %d, want %d", len(msg.Fds), len(p))
 74 | 	}
 75 | 
 76 | 	ret := make([]*os.File, 0, len(p))
 77 | 	for i, fd := range msg.Fds {
 78 | 		syscall.CloseOnExec(fd)
 79 | 		f := os.NewFile(uintptr(fd), p[i].Path)
 80 | 		if f == nil {
 81 | 			closeFds(msg.Fds)
 82 | 			return nil, fmt.Errorf("open: failed to create file for fd: %d", fd)
 83 | 		}
 84 | 		ret = append(ret, f)
 85 | 	}
 86 | 	return ret, nil
 87 | }
 88 | 
 89 | // Delete remove file from container
 90 | func (c *container) Delete(p string) error {
 91 | 	c.mu.Lock()
 92 | 	defer c.mu.Unlock()
 93 | 
 94 | 	cmd := cmd{
 95 | 		Cmd:       cmdDelete,
 96 | 		DeleteCmd: &deleteCmd{Path: p},
 97 | 	}
 98 | 	if err := c.sendCmd(cmd, unixsocket.Msg{}); err != nil {
 99 | 		return fmt.Errorf("delete: %w", err)
100 | 	}
101 | 	return c.recvAckReply("delete")
102 | }
103 | 
104 | // Reset remove all from /tmp and /w
105 | func (c *container) Reset() error {
106 | 	c.mu.Lock()
107 | 	defer c.mu.Unlock()
108 | 
109 | 	cmd := cmd{
110 | 		Cmd: cmdReset,
111 | 	}
112 | 	if err := c.sendCmd(cmd, unixsocket.Msg{}); err != nil {
113 | 		return fmt.Errorf("reset: %w", err)
114 | 	}
115 | 	return c.recvAckReply("reset")
116 | }
117 | 


--------------------------------------------------------------------------------
/container/host_exec_linux.go:
--------------------------------------------------------------------------------
  1 | package container
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"time"
  7 | 
  8 | 	"github.com/criyle/go-sandbox/pkg/rlimit"
  9 | 	"github.com/criyle/go-sandbox/pkg/seccomp"
 10 | 	"github.com/criyle/go-sandbox/pkg/unixsocket"
 11 | 	"github.com/criyle/go-sandbox/runner"
 12 | )
 13 | 
 14 | // ExecveParam is parameters to run process inside container
 15 | type ExecveParam struct {
 16 | 	// Args holds command line arguments
 17 | 	Args []string
 18 | 
 19 | 	// Env specifies the environment of the process
 20 | 	Env []string
 21 | 
 22 | 	// Files specifies file descriptors for the child process
 23 | 	Files []uintptr
 24 | 
 25 | 	// ExecFile specifies file descriptor for executable file using fexecve
 26 | 	ExecFile uintptr
 27 | 
 28 | 	// CgroupFD specifies file descriptor for cgroup V2
 29 | 	CgroupFD uintptr
 30 | 
 31 | 	// RLimits specifies POSIX Resource limit through setrlimit
 32 | 	RLimits []rlimit.RLimit
 33 | 
 34 | 	// Seccomp specifies seccomp filter
 35 | 	Seccomp seccomp.Filter
 36 | 
 37 | 	// CTTY specifies whether to set controlling TTY
 38 | 	CTTY bool
 39 | 
 40 | 	// SyncFunc calls with pid just before execve (for attach the process to cgroups)
 41 | 	SyncFunc func(pid int) error
 42 | 
 43 | 	// SyncAfterExec makes syncFunc sync after the start of the execution
 44 | 	// Thus, since pid is not guarantee to be exist (may exit early), it is not passed
 45 | 	SyncAfterExec bool
 46 | }
 47 | 
 48 | // Execve runs process inside container. It accepts context cancellation as time limit exceeded.
 49 | func (c *container) Execve(ctx context.Context, param ExecveParam) runner.Result {
 50 | 	c.mu.Lock()
 51 | 	defer c.mu.Unlock()
 52 | 
 53 | 	sTime := time.Now()
 54 | 
 55 | 	// if execve with fd, put fd at the first parameter
 56 | 	var files []int
 57 | 	if param.ExecFile > 0 {
 58 | 		files = append(files, int(param.ExecFile))
 59 | 	}
 60 | 	if param.CgroupFD > 0 {
 61 | 		files = append(files, int(param.CgroupFD))
 62 | 	}
 63 | 	files = append(files, uintptrSliceToInt(param.Files)...)
 64 | 	msg := unixsocket.Msg{
 65 | 		Fds: files,
 66 | 	}
 67 | 	execCmd := &execCmd{
 68 | 		Argv:      param.Args,
 69 | 		Env:       param.Env,
 70 | 		RLimits:   param.RLimits,
 71 | 		Seccomp:   param.Seccomp,
 72 | 		FdExec:    param.ExecFile > 0,
 73 | 		CTTY:      param.CTTY,
 74 | 		SyncAfter: param.SyncAfterExec,
 75 | 		FdCgroup:  param.CgroupFD > 0,
 76 | 	}
 77 | 	cm := cmd{
 78 | 		Cmd:     cmdExecve,
 79 | 		ExecCmd: execCmd,
 80 | 	}
 81 | 	if err := c.sendCmd(cm, msg); err != nil {
 82 | 		return errResult("execve: sendCmd %v", err)
 83 | 	}
 84 | 	// sync function
 85 | 	rep, msg, err := c.recvReply()
 86 | 	if err != nil {
 87 | 		return errResult("execve: recvReply %v", err)
 88 | 	}
 89 | 	// if sync function did not involved
 90 | 	if rep.Error != nil {
 91 | 		return errResult("execve: %v", rep.Error)
 92 | 	}
 93 | 	// if pid not received
 94 | 	if msg.Cred == nil {
 95 | 		// tell kill function to exit and sync
 96 | 		c.execveSyncKill()
 97 | 		// tell err exec function to exit and sync
 98 | 		c.execveSyncKill()
 99 | 		return errResult("execve: no pid received")
100 | 	}
101 | 	if param.SyncFunc != nil {
102 | 		if err := param.SyncFunc(int(msg.Cred.Pid)); err != nil {
103 | 			// tell sync function to exit and recv error
104 | 			c.execveSyncKill()
105 | 			return errResult("execve: syncfunc failed %v", err)
106 | 		}
107 | 	}
108 | 	// send to syncFunc ack ok
109 | 	if err := c.sendCmd(cmd{Cmd: cmdOk}, unixsocket.Msg{}); err != nil {
110 | 		return errResult("execve: ack failed %v", err)
111 | 	}
112 | 
113 | 	// wait for done
114 | 	return c.waitForDone(ctx, sTime)
115 | }
116 | 
117 | func (c *container) waitForDone(ctx context.Context, sTime time.Time) runner.Result {
118 | 	mTime := time.Now()
119 | 	select {
120 | 	case <-c.done: // socket error
121 | 		return convertReplyResult(reply{}, sTime, mTime, c.err)
122 | 
123 | 	case <-ctx.Done(): // cancel
124 | 		c.sendCmd(cmd{Cmd: cmdKill}, unixsocket.Msg{}) // kill
125 | 		reply, _, err := c.recvReply()
126 | 		return convertReplyResult(reply, sTime, mTime, err)
127 | 
128 | 	case ret := <-c.recvCh: // result
129 | 		err := c.sendCmd(cmd{Cmd: cmdKill}, unixsocket.Msg{}) // kill
130 | 		return convertReplyResult(ret.Reply, sTime, mTime, err)
131 | 	}
132 | }
133 | 
134 | func convertReplyResult(reply reply, sTime, mTime time.Time, err error) runner.Result {
135 | 	// handle potential error
136 | 	if err != nil {
137 | 		return runner.Result{
138 | 			Status: runner.StatusRunnerError,
139 | 			Error:  err.Error(),
140 | 		}
141 | 	}
142 | 	if reply.Error != nil {
143 | 		return runner.Result{
144 | 			Status: runner.StatusRunnerError,
145 | 			Error:  reply.Error.Error(),
146 | 		}
147 | 	}
148 | 	if reply.ExecReply == nil {
149 | 		return runner.Result{
150 | 			Status: runner.StatusRunnerError,
151 | 			Error:  "execve: no reply received",
152 | 		}
153 | 	}
154 | 	// emit result after all communication finish
155 | 	return runner.Result{
156 | 		Status:      reply.ExecReply.Status,
157 | 		ExitStatus:  reply.ExecReply.ExitStatus,
158 | 		Time:        reply.ExecReply.Time,
159 | 		Memory:      reply.ExecReply.Memory,
160 | 		SetUpTime:   mTime.Sub(sTime),
161 | 		RunningTime: time.Since(mTime),
162 | 	}
163 | }
164 | 
165 | // execveSyncKill will send kill and recv reply
166 | func (c *container) execveSyncKill() {
167 | 	c.sendCmd(cmd{Cmd: cmdKill}, unixsocket.Msg{})
168 | 	c.recvReply()
169 | }
170 | 
171 | func errResult(f string, v ...interface{}) runner.Result {
172 | 	return runner.Result{
173 | 		Status: runner.StatusRunnerError,
174 | 		Error:  fmt.Sprintf(f, v...),
175 | 	}
176 | }
177 | 


--------------------------------------------------------------------------------
/container/lookup_linux.go:
--------------------------------------------------------------------------------
 1 | package container
 2 | 
 3 | import (
 4 | 	"errors"
 5 | 	"io/fs"
 6 | 	"os"
 7 | 	"path/filepath"
 8 | 	"strings"
 9 | )
10 | 
11 | var (
12 | 	errNotFound = errors.New("executable file not found in $PATH")
13 | 	errNoPath   = errors.New("no PATH environment variable provided for look up")
14 | )
15 | 
16 | func findExecutable(file string) error {
17 | 	d, err := os.Stat(file)
18 | 	if err != nil {
19 | 		return err
20 | 	}
21 | 	if m := d.Mode(); !m.IsDir() && m&0111 != 0 {
22 | 		return nil
23 | 	}
24 | 	return fs.ErrPermission
25 | }
26 | 
27 | func lookPath(name string, env []string) (string, error) {
28 | 	// don't look if abs path provided
29 | 	if filepath.Base(name) != name {
30 | 		return name, nil
31 | 	}
32 | 
33 | 	// don't look if exist in current dir
34 | 	if err := findExecutable(name); err == nil {
35 | 		return name, nil
36 | 	}
37 | 
38 | 	path, err := findPath(env)
39 | 	if err != nil {
40 | 		return "", err
41 | 	}
42 | 	for _, dir := range path {
43 | 		if dir == "" {
44 | 			dir = "."
45 | 		}
46 | 		p := filepath.Join(dir, name)
47 | 		if err := findExecutable(p); err == nil {
48 | 			return p, nil
49 | 		}
50 | 	}
51 | 	return "", errNotFound
52 | }
53 | 
54 | func findPath(env []string) ([]string, error) {
55 | 	// find PATH=
56 | 	const pathPrefix = "PATH="
57 | 	for i := len(env) - 1; i >= 0; i-- {
58 | 		s := env[i]
59 | 		if strings.HasPrefix(s, pathPrefix) {
60 | 			return filepath.SplitList(s[len(pathPrefix):]), nil
61 | 		}
62 | 	}
63 | 	return nil, errNoPath
64 | }
65 | 


--------------------------------------------------------------------------------
/container/protocol_linux.go:
--------------------------------------------------------------------------------
 1 | package container
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"syscall"
 6 | 	"time"
 7 | 
 8 | 	"github.com/criyle/go-sandbox/pkg/mount"
 9 | 	"github.com/criyle/go-sandbox/pkg/rlimit"
10 | 	"github.com/criyle/go-sandbox/pkg/seccomp"
11 | 	"github.com/criyle/go-sandbox/runner"
12 | )
13 | 
14 | // cmd is the control message send into container
15 | type cmd struct {
16 | 	DeleteCmd *deleteCmd // delete argument
17 | 	ExecCmd   *execCmd   // execve argument
18 | 	ConfCmd   *confCmd   // to set configuration
19 | 
20 | 	OpenCmd []OpenCmd // open argument
21 | 
22 | 	Cmd cmdType // type of the cmd
23 | }
24 | 
25 | // OpenCmd correspond to a single open syscall
26 | type OpenCmd struct {
27 | 	Path string
28 | 	Flag int
29 | 	Perm os.FileMode
30 | }
31 | 
32 | // deleteCmd stores delete command
33 | type deleteCmd struct {
34 | 	Path string
35 | }
36 | 
37 | // execCmd stores execve parameter
38 | type execCmd struct {
39 | 	Argv      []string        // execve argv
40 | 	Env       []string        // execve env
41 | 	RLimits   []rlimit.RLimit // execve posix rlimit
42 | 	Seccomp   seccomp.Filter  // seccomp filter
43 | 	FdExec    bool            // if use fexecve (fd[0] as exec)
44 | 	FdCgroup  bool            // if use cgroupFd
45 | 	CTTY      bool            // if set CTTY
46 | 	SyncAfter bool            // if sync function calls after execve returns
47 | }
48 | 
49 | // confCmd stores conf parameter
50 | type confCmd struct {
51 | 	Conf containerConfig
52 | }
53 | 
54 | // ContainerConfig set the container config
55 | type containerConfig struct {
56 | 	WorkDir string
57 | 
58 | 	HostName   string
59 | 	DomainName string
60 | 
61 | 	ContainerRoot string
62 | 	Mounts        []mount.Mount
63 | 	SymbolicLinks []SymbolicLink
64 | 	MaskPaths     []string
65 | 	InitCommand   []string
66 | 
67 | 	ContainerUID  int
68 | 	ContainerGID  int
69 | 	Cred          bool
70 | 	UnshareCgroup bool
71 | }
72 | 
73 | // reply is the reply message send back to controller
74 | type reply struct {
75 | 	Error     *errorReply // nil if no error
76 | 	ExecReply *execReply
77 | }
78 | 
79 | // errorReply stores error returned back from container
80 | type errorReply struct {
81 | 	Errno *syscall.Errno
82 | 	Msg   string
83 | }
84 | 
85 | // execReply stores execve result
86 | type execReply struct {
87 | 	ExitStatus int           // waitpid exit status
88 | 	Status     runner.Status // return status
89 | 	Time       time.Duration // waitpid user CPU (ns)
90 | 	Memory     runner.Size   // waitpid user memory (byte)
91 | }
92 | 
93 | func (e *errorReply) Error() string {
94 | 	return e.Msg
95 | }
96 | 


--------------------------------------------------------------------------------
/container/signal_linux.go:
--------------------------------------------------------------------------------
 1 | //go:build linux && !mips64 && !mips64le
 2 | 
 3 | package container
 4 | 
 5 | import (
 6 | 	"os"
 7 | 	"syscall"
 8 | )
 9 | 
10 | var signalToIgnore = []os.Signal{
11 | 	// signals that cause run-time panic
12 | 	syscall.SIGBUS, syscall.SIGFPE, syscall.SIGSEGV,
13 | 	// signals that cause the program to exit
14 | 	syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM,
15 | 	// signals that cause the program to exit with a stack dump
16 | 	syscall.SIGQUIT, syscall.SIGILL, syscall.SIGTRAP, syscall.SIGABRT, syscall.SIGSTKFLT, syscall.SIGSYS,
17 | }
18 | 


--------------------------------------------------------------------------------
/container/signal_linux_mips64x.go:
--------------------------------------------------------------------------------
 1 | //go:build linux && (mips64 || mips64le)
 2 | 
 3 | package container
 4 | 
 5 | import (
 6 | 	"os"
 7 | 	"syscall"
 8 | )
 9 | 
10 | var signalToIgnore = []os.Signal{
11 | 	// signals that cause run-time panic
12 | 	syscall.SIGBUS, syscall.SIGFPE, syscall.SIGSEGV,
13 | 	// signals that cause the program to exit
14 | 	syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM,
15 | 	// signals that cause the program to exit with a stack dump
16 | 	syscall.SIGQUIT, syscall.SIGILL, syscall.SIGTRAP, syscall.SIGABRT, syscall.SIGSYS,
17 | }
18 | 


--------------------------------------------------------------------------------
/container/socket_linux.go:
--------------------------------------------------------------------------------
 1 | package container
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"encoding/gob"
 6 | 	"fmt"
 7 | 
 8 | 	"github.com/criyle/go-sandbox/pkg/unixsocket"
 9 | )
10 | 
11 | // 16k buffer size
12 | const bufferSize = 16 << 10
13 | 
14 | type socket struct {
15 | 	*unixsocket.Socket
16 | 
17 | 	buff []byte
18 | 
19 | 	decoder  *gob.Decoder
20 | 	recvBuff bufferRotator
21 | 
22 | 	encoder  *gob.Encoder
23 | 	sendBuff bytes.Buffer
24 | }
25 | 
26 | // bufferRotator replace the underlying Buffers to avoid allocation
27 | type bufferRotator struct {
28 | 	*bytes.Buffer
29 | }
30 | 
31 | func (b *bufferRotator) Rotate(buffer *bytes.Buffer) {
32 | 	b.Buffer = buffer
33 | }
34 | 
35 | func newSocket(s *unixsocket.Socket) *socket {
36 | 	soc := socket{
37 | 		Socket: s,
38 | 	}
39 | 	soc.buff = make([]byte, bufferSize)
40 | 	soc.decoder = gob.NewDecoder(&soc.recvBuff)
41 | 	soc.encoder = gob.NewEncoder(&soc.sendBuff)
42 | 
43 | 	return &soc
44 | }
45 | 
46 | func (s *socket) RecvMsg(e any) (msg unixsocket.Msg, err error) {
47 | 	n, msg, err := s.Socket.RecvMsg(s.buff)
48 | 	if err != nil {
49 | 		return msg, fmt.Errorf("recv msg: %w", err)
50 | 	}
51 | 	s.recvBuff.Rotate(bytes.NewBuffer(s.buff[:n]))
52 | 
53 | 	if err := s.decoder.Decode(e); err != nil {
54 | 		return msg, fmt.Errorf("recv msg: decode: %w", err)
55 | 	}
56 | 	return msg, nil
57 | }
58 | 
59 | func (s *socket) SendMsg(e any, msg unixsocket.Msg) error {
60 | 	s.sendBuff.Reset()
61 | 	if err := s.encoder.Encode(e); err != nil {
62 | 		return fmt.Errorf("send msg: encode: %w", err)
63 | 	}
64 | 
65 | 	if err := s.Socket.SendMsg(s.sendBuff.Bytes(), msg); err != nil {
66 | 		return fmt.Errorf("send msg: %w", err)
67 | 	}
68 | 	return nil
69 | }
70 | 


--------------------------------------------------------------------------------
/container/utils.go:
--------------------------------------------------------------------------------
 1 | package container
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"path/filepath"
 6 | 	"syscall"
 7 | )
 8 | 
 9 | func intSliceToUintptr(s []int) []uintptr {
10 | 	var r []uintptr
11 | 	if len(s) > 0 {
12 | 		r = make([]uintptr, len(s))
13 | 		for i, x := range s {
14 | 			r[i] = uintptr(x)
15 | 		}
16 | 	}
17 | 	return r
18 | }
19 | 
20 | func uintptrSliceToInt(s []uintptr) []int {
21 | 	var r []int
22 | 	if len(s) > 0 {
23 | 		r = make([]int, len(s))
24 | 		for i, x := range s {
25 | 			r[i] = int(x)
26 | 		}
27 | 	}
28 | 	return r
29 | }
30 | 
31 | func closeOnExecFds(s []int) {
32 | 	for _, f := range s {
33 | 		syscall.CloseOnExec(f)
34 | 	}
35 | }
36 | 
37 | func closeFds(s []int) {
38 | 	for _, f := range s {
39 | 		syscall.Close(f)
40 | 	}
41 | }
42 | 
43 | // removeContents delete content of a directory
44 | func removeContents(dir string) error {
45 | 	d, err := os.Open(dir)
46 | 	if err != nil {
47 | 		return err
48 | 	}
49 | 	defer d.Close()
50 | 
51 | 	names, err := d.Readdirnames(-1)
52 | 	if err != nil {
53 | 		return err
54 | 	}
55 | 
56 | 	for _, name := range names {
57 | 		err1 := os.RemoveAll(filepath.Join(dir, name))
58 | 		if err != nil {
59 | 			err = err1
60 | 		}
61 | 	}
62 | 	return err
63 | }
64 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/criyle/go-sandbox
 2 | 
 3 | go 1.24
 4 | 
 5 | require (
 6 | 	github.com/elastic/go-seccomp-bpf v1.5.0
 7 | 	golang.org/x/net v0.38.0
 8 | 	golang.org/x/sys v0.31.0
 9 | )
10 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 3 | github.com/elastic/go-seccomp-bpf v1.5.0 h1:gJV+U1iP+YC70ySyGUUNk2YLJW5/IkEw4FZBJfW8ZZY=
 4 | github.com/elastic/go-seccomp-bpf v1.5.0/go.mod h1:umdhQ/3aybliBF2jjiZwS492I/TOKz+ZRvsLT3hVe1o=
 5 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 6 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 7 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
 8 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 9 | golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8=
10 | golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
11 | golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
12 | golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
13 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
14 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
15 | 


--------------------------------------------------------------------------------
/pkg/cgroup/benchmark_linux_test.go:
--------------------------------------------------------------------------------
 1 | package cgroup
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"testing"
 6 | )
 7 | 
 8 | func BenchmarkCgroup(b *testing.B) {
 9 | 	if err := EnableV2Nesting(); err != nil {
10 | 		b.Fatal(err)
11 | 	}
12 | 	ct, err := GetAvailableControllerV2()
13 | 	if err != nil {
14 | 		b.Fatal(err)
15 | 	}
16 | 	builder, err := New("benchmark", ct)
17 | 	if err != nil {
18 | 		b.Fatal(err)
19 | 	}
20 | 	defer builder.Destroy()
21 | 	b.ResetTimer()
22 | 	for i := 0; i < b.N; i++ {
23 | 		cg, err := builder.New("test")
24 | 		if err != nil {
25 | 			b.Fatal(err)
26 | 		}
27 | 		if err := cg.SetCPUSet([]byte("0")); err != nil {
28 | 			b.Fatal(err)
29 | 		}
30 | 		if err := cg.SetMemoryLimit(4096); err != nil {
31 | 			b.Fatal(err)
32 | 		}
33 | 		if err := cg.SetProcLimit(1); err != nil {
34 | 			b.Fatal(err)
35 | 		}
36 | 		if _, err := cg.CPUUsage(); err != nil {
37 | 			b.Fatal(err)
38 | 		}
39 | 		if _, err := cg.MemoryMaxUsage(); err != nil {
40 | 			b.Fatal(err)
41 | 		}
42 | 		cg.Destroy()
43 | 	}
44 | }
45 | 
46 | func TestCgroupAll(t *testing.T) {
47 | 	// ensure root privilege when testing
48 | 	if os.Getuid() != 0 {
49 | 		t.Skip("no root privilege")
50 | 	}
51 | 	if err := EnableV2Nesting(); err != nil {
52 | 		t.Fatal(err)
53 | 	}
54 | 	ct, err := GetAvailableControllerV2()
55 | 	if err != nil {
56 | 		t.Fatal(err)
57 | 	}
58 | 	builder, err := New("benchmark", ct)
59 | 	if err != nil {
60 | 		t.Fatal(err)
61 | 	}
62 | 	defer builder.Destroy()
63 | 	if err != nil {
64 | 		t.Fatal(err)
65 | 	}
66 | 	cg, err := builder.New("test")
67 | 	if err != nil {
68 | 		t.Fatal(err)
69 | 	}
70 | 	t.Cleanup(func() {
71 | 		cg.Destroy()
72 | 	})
73 | 	if err := cg.SetCPUSet([]byte("0")); err != nil {
74 | 		t.Fatal(err)
75 | 	}
76 | 	if err := cg.SetMemoryLimit(4096); err != nil {
77 | 		t.Fatal(err)
78 | 	}
79 | 	if err := cg.SetProcLimit(1); err != nil {
80 | 		t.Fatal(err)
81 | 	}
82 | 	if _, err := cg.CPUUsage(); err != nil {
83 | 		t.Fatal(err)
84 | 	}
85 | 	if _, err := cg.MemoryMaxUsage(); err != nil {
86 | 		t.Fatal(err)
87 | 	}
88 | }
89 | 


--------------------------------------------------------------------------------
/pkg/cgroup/cgroup_info_linux.go:
--------------------------------------------------------------------------------
  1 | package cgroup
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"fmt"
  6 | 	"os"
  7 | 	"path/filepath"
  8 | 	"strconv"
  9 | 	"strings"
 10 | )
 11 | 
 12 | const numberOfControllers = 5
 13 | 
 14 | // Controllers defines enabled controller of a cgroup
 15 | type Controllers struct {
 16 | 	CPU     bool
 17 | 	CPUSet  bool
 18 | 	CPUAcct bool
 19 | 	Memory  bool
 20 | 	Pids    bool
 21 | }
 22 | 
 23 | // Set changes the enabled status of a specific controller
 24 | func (c *Controllers) Set(ct string, value bool) {
 25 | 	switch ct {
 26 | 	case CPU:
 27 | 		c.CPU = value
 28 | 	case CPUSet:
 29 | 		c.CPUSet = value
 30 | 	case CPUAcct:
 31 | 		c.CPUAcct = value
 32 | 	case Memory:
 33 | 		c.Memory = value
 34 | 	case Pids:
 35 | 		c.Pids = value
 36 | 	}
 37 | }
 38 | 
 39 | // Intersect reset the specific controller if it is not enabled in the other
 40 | func (c *Controllers) Intersect(o *Controllers) {
 41 | 	c.CPU = c.CPU && o.CPU
 42 | 	c.CPUSet = c.CPUSet && o.CPUSet
 43 | 	c.CPUAcct = c.CPUAcct && o.CPUAcct
 44 | 	c.Memory = c.Memory && o.Memory
 45 | 	c.Pids = c.Pids && o.Pids
 46 | }
 47 | 
 48 | // Contains returns true if the current controller enabled all controllers in the other controller
 49 | func (c *Controllers) Contains(o *Controllers) bool {
 50 | 	return (c.CPU || !o.CPU) && (c.CPUSet || !o.CPUSet) && (c.CPUAcct || !o.CPUAcct) &&
 51 | 		(c.Memory || !o.Memory) && (c.Pids || !o.Pids)
 52 | }
 53 | 
 54 | // Names returns a list of string of all enabled container names
 55 | func (c *Controllers) Names() []string {
 56 | 	names := make([]string, 0, numberOfControllers)
 57 | 	for _, v := range []struct {
 58 | 		e bool
 59 | 		n string
 60 | 	}{
 61 | 		{c.CPU, CPU},
 62 | 		{c.CPUAcct, CPUAcct},
 63 | 		{c.CPUSet, CPUSet},
 64 | 		{c.Memory, Memory},
 65 | 		{c.Pids, Pids},
 66 | 	} {
 67 | 		if v.e {
 68 | 			names = append(names, v.n)
 69 | 		}
 70 | 	}
 71 | 	return names
 72 | }
 73 | 
 74 | func (c *Controllers) String() string {
 75 | 	return "[" + strings.Join(c.Names(), ", ") + "]"
 76 | }
 77 | 
 78 | // Info reads the cgroup mount info from /proc/cgroups
 79 | type Info struct {
 80 | 	Hierarchy  int
 81 | 	NumCgroups int
 82 | 	Enabled    bool
 83 | }
 84 | 
 85 | // GetCgroupV1Info read /proc/cgroups and return the result
 86 | func GetCgroupV1Info() (map[string]Info, error) {
 87 | 	f, err := os.Open(procCgroupsPath)
 88 | 	if err != nil {
 89 | 		return nil, err
 90 | 	}
 91 | 	defer f.Close()
 92 | 
 93 | 	rt := make(map[string]Info)
 94 | 	s := bufio.NewScanner(f)
 95 | 	for s.Scan() {
 96 | 		text := s.Text()
 97 | 		if text[0] == '#' {
 98 | 			continue
 99 | 		}
100 | 		parts := strings.Fields(text)
101 | 		if len(parts) < 4 {
102 | 			continue
103 | 		}
104 | 
105 | 		// format: subsys_name hierarchy num_cgroups enabled
106 | 		name := parts[0]
107 | 		hierarchy, err := strconv.Atoi(parts[1])
108 | 		if err != nil {
109 | 			return nil, err
110 | 		}
111 | 		numCgroups, err := strconv.Atoi(parts[2])
112 | 		if err != nil {
113 | 			return nil, err
114 | 		}
115 | 		enabled := parts[3] != "0"
116 | 		rt[name] = Info{
117 | 			Hierarchy:  hierarchy,
118 | 			NumCgroups: numCgroups,
119 | 			Enabled:    enabled,
120 | 		}
121 | 	}
122 | 	if err := s.Err(); err != nil {
123 | 		return nil, err
124 | 	}
125 | 	return rt, nil
126 | }
127 | 
128 | // GetCurrentCgroupPrefix returns the cgroup prefix of current process
129 | func GetCurrentCgroupPrefix() (string, error) {
130 | 	c, err := os.ReadFile(procSelfCgroup)
131 | 	if err != nil {
132 | 		return "", err
133 | 	}
134 | 	firstLine, _, _ := strings.Cut(string(c), "\n")
135 | 	f := strings.Split(firstLine, ":")
136 | 	if len(f) < 3 {
137 | 		return "", fmt.Errorf("invalid " + procSelfCgroup)
138 | 	}
139 | 	return f[2][1:], nil
140 | }
141 | 
142 | // GetAvailableController returns available cgroup controller in the system
143 | func GetAvailableController() (*Controllers, error) {
144 | 	if DetectedCgroupType == TypeV1 {
145 | 		return GetAvailableControllerV1()
146 | 	}
147 | 	return GetAvailableControllerV2()
148 | }
149 | 
150 | // GetAvailableControllerWithPrefix returns available cgroup controller within the cgroup prefix
151 | func GetAvailableControllerWithPrefix(prefix string) (*Controllers, error) {
152 | 	if DetectedCgroupType == TypeV1 {
153 | 		return GetAvailableControllerV1()
154 | 	}
155 | 	return getAvailableControllerV2(prefix)
156 | }
157 | 
158 | // GetAvailableControllerV1 reads /proc/cgroups and get all available controller as set
159 | func GetAvailableControllerV1() (*Controllers, error) {
160 | 	info, err := GetCgroupV1Info()
161 | 	if err != nil {
162 | 		return nil, err
163 | 	}
164 | 
165 | 	rt := &Controllers{}
166 | 	for k, v := range info {
167 | 		if !v.Enabled {
168 | 			continue
169 | 		}
170 | 		rt.Set(k, true)
171 | 	}
172 | 	return rt, nil
173 | }
174 | 
175 | // GetAvailableControllerV2 reads /sys/fs/cgroup/cgroup.controllers to get all controller
176 | func GetAvailableControllerV2() (*Controllers, error) {
177 | 	return getAvailableControllerV2(".")
178 | }
179 | 
180 | func getAvailableControllerV2(prefix string) (*Controllers, error) {
181 | 	return getAvailableControllerV2path(filepath.Join(basePath, prefix, cgroupControllers))
182 | }
183 | 
184 | func getAvailableControllerV2path(p string) (*Controllers, error) {
185 | 	c, err := readFile(p)
186 | 	if err != nil {
187 | 		return nil, err
188 | 	}
189 | 
190 | 	m := &Controllers{}
191 | 	f := strings.Fields(string(c))
192 | 	for _, v := range f {
193 | 		m.Set(v, true)
194 | 	}
195 | 	return m, nil
196 | }
197 | 


--------------------------------------------------------------------------------
/pkg/cgroup/consts_linux.go:
--------------------------------------------------------------------------------
 1 | package cgroup
 2 | 
 3 | // Cgroup constants
 4 | const (
 5 | 	// systemd mounted cgroups
 6 | 	basePath        = "/sys/fs/cgroup"
 7 | 	cgroupProcs     = "cgroup.procs"
 8 | 	procCgroupsPath = "/proc/cgroups"
 9 | 	procSelfCgroup  = "/proc/self/cgroup"
10 | 
11 | 	cgroupSubtreeControl = "cgroup.subtree_control"
12 | 	cgroupControllers    = "cgroup.controllers"
13 | 
14 | 	filePerm = 0644
15 | 	dirPerm  = 0755
16 | 
17 | 	CPU     = "cpu"
18 | 	CPUAcct = "cpuacct"
19 | 	CPUSet  = "cpuset"
20 | 	Memory  = "memory"
21 | 	Pids    = "pids"
22 | )
23 | 
24 | // Type defines the version of cgroup
25 | type Type int
26 | 
27 | // Type enum for cgroup
28 | const (
29 | 	TypeV1 = iota + 1
30 | 	TypeV2
31 | )
32 | 
33 | func (t Type) String() string {
34 | 	switch t {
35 | 	case TypeV1:
36 | 		return "v1"
37 | 	case TypeV2:
38 | 		return "v2"
39 | 	default:
40 | 		return "invalid"
41 | 	}
42 | }
43 | 


--------------------------------------------------------------------------------
/pkg/cgroup/doc.go:
--------------------------------------------------------------------------------
 1 | // Package cgroup provides builder to create cgroup
 2 | // under systemd defined mount path (i.e.,sys/fs/cgroup) including v1 and
 3 | // v2 implementation.
 4 | //
 5 | // Available cgroup controller:
 6 | //
 7 | //	cpu
 8 | //	cpuset
 9 | //	cpuacct
10 | //	memory
11 | //	pids
12 | //
13 | // Current not available: devices, freezer, net_cls, blkio, perf_event, net_prio, huge_tlb, rdma
14 | package cgroup
15 | 


--------------------------------------------------------------------------------
/pkg/cgroup/utils_linux.go:
--------------------------------------------------------------------------------
  1 | package cgroup
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"fmt"
  6 | 	"io/fs"
  7 | 	"math/rand/v2"
  8 | 	"os"
  9 | 	"path/filepath"
 10 | 	"strconv"
 11 | 	"strings"
 12 | 	"syscall"
 13 | 
 14 | 	"golang.org/x/sys/unix"
 15 | )
 16 | 
 17 | // EnsureDirExists creates directories if the path not exists
 18 | func EnsureDirExists(path string) error {
 19 | 	if _, err := os.Stat(path); os.IsNotExist(err) {
 20 | 		return os.MkdirAll(path, dirPerm)
 21 | 	}
 22 | 	return os.ErrExist
 23 | }
 24 | 
 25 | // CreateV1ControllerPath create path for controller with given group, prefix
 26 | func CreateV1ControllerPath(controller, prefix string) (string, error) {
 27 | 	p := filepath.Join(basePath, controller, prefix)
 28 | 	return p, EnsureDirExists(p)
 29 | }
 30 | 
 31 | const initPath = "init"
 32 | 
 33 | // EnableV2Nesting migrates all process in the container to nested /init path
 34 | // and enables all available controllers in the root cgroup
 35 | func EnableV2Nesting() error {
 36 | 	if DetectType() != TypeV2 {
 37 | 		return nil
 38 | 	}
 39 | 
 40 | 	p, err := readFile(filepath.Join(basePath, cgroupProcs))
 41 | 	if err != nil {
 42 | 		return err
 43 | 	}
 44 | 	procs := strings.Split(string(p), "\n")
 45 | 	if len(procs) == 0 {
 46 | 		return nil
 47 | 	}
 48 | 
 49 | 	// mkdir init
 50 | 	if err := os.Mkdir(filepath.Join(basePath, initPath), dirPerm); err != nil && !errors.Is(err, os.ErrExist) {
 51 | 		return err
 52 | 	}
 53 | 	// move all process into init cgroup
 54 | 	procFile, err := os.OpenFile(filepath.Join(basePath, initPath, cgroupProcs), os.O_RDWR, filePerm)
 55 | 	if err != nil {
 56 | 		return err
 57 | 	}
 58 | 	for _, v := range procs {
 59 | 		if _, err := procFile.WriteString(v); err != nil {
 60 | 			continue
 61 | 			//return err
 62 | 		}
 63 | 	}
 64 | 	procFile.Close()
 65 | 	return nil
 66 | }
 67 | 
 68 | // ReadProcesses reads cgroup.procs file and return pids individually
 69 | func ReadProcesses(path string) ([]int, error) {
 70 | 	content, err := readFile(path)
 71 | 	if err != nil {
 72 | 		return nil, err
 73 | 	}
 74 | 	procs := strings.Split(string(content), "\n")
 75 | 	rt := make([]int, len(procs))
 76 | 	for i, x := range procs {
 77 | 		if len(x) == 0 {
 78 | 			continue
 79 | 		}
 80 | 		rt[i], err = strconv.Atoi(x)
 81 | 		if err != nil {
 82 | 			return nil, err
 83 | 		}
 84 | 	}
 85 | 	return rt, nil
 86 | }
 87 | 
 88 | // AddProcesses add processes into cgroup.procs file
 89 | func AddProcesses(path string, procs []int) error {
 90 | 	f, err := os.OpenFile(path, os.O_RDWR, filePerm)
 91 | 	if err != nil {
 92 | 		return err
 93 | 	}
 94 | 	defer f.Close()
 95 | 	for _, p := range procs {
 96 | 		if _, err := f.WriteString(strconv.Itoa(p)); err != nil {
 97 | 			return err
 98 | 		}
 99 | 	}
100 | 	return nil
101 | }
102 | 
103 | // DetectType detects current mounted cgroup type in systemd default path
104 | func DetectType() Type {
105 | 	// if /sys/fs/cgroup is mounted as CGROUPV2 or TMPFS (V1)
106 | 	var st unix.Statfs_t
107 | 	if err := unix.Statfs(basePath, &st); err != nil {
108 | 		// ignore errors, defaulting to CgroupV1
109 | 		return TypeV1
110 | 	}
111 | 	if st.Type == unix.CGROUP2_SUPER_MAGIC {
112 | 		return TypeV2
113 | 	}
114 | 	return TypeV1
115 | }
116 | 
117 | func remove(name string) error {
118 | 	if name != "" {
119 | 		// os.Remove tried to Unlink, then Rmdir. Since we only delete directories, use
120 | 		// Rmdir directly
121 | 		return syscall.Rmdir(name)
122 | 	}
123 | 	return nil
124 | }
125 | 
126 | var errPatternHasSeparator = errors.New("pattern contains path separator")
127 | 
128 | // prefixAndSuffix splits pattern by the last wildcard "*", if applicable,
129 | // returning prefix as the part before "*" and suffix as the part after "*".
130 | func prefixAndSuffix(pattern string) (prefix, suffix string, err error) {
131 | 	for i := 0; i < len(pattern); i++ {
132 | 		if os.IsPathSeparator(pattern[i]) {
133 | 			return "", "", errPatternHasSeparator
134 | 		}
135 | 	}
136 | 	if pos := strings.LastIndexByte(pattern, '*'); pos != -1 {
137 | 		prefix, suffix = pattern[:pos], pattern[pos+1:]
138 | 	} else {
139 | 		prefix = pattern
140 | 	}
141 | 	return prefix, suffix, nil
142 | }
143 | 
144 | func readFile(p string) ([]byte, error) {
145 | 	data, err := os.ReadFile(p)
146 | 	for err != nil && errors.Is(err, syscall.EINTR) {
147 | 		data, err = os.ReadFile(p)
148 | 	}
149 | 	return data, err
150 | }
151 | 
152 | func writeFile(p string, content []byte, perm fs.FileMode) error {
153 | 	err := os.WriteFile(p, content, perm)
154 | 	for err != nil && errors.Is(err, syscall.EINTR) {
155 | 		err = os.WriteFile(p, content, perm)
156 | 	}
157 | 	return err
158 | }
159 | 
160 | func nextRandom() string {
161 | 	return strconv.Itoa(int(rand.Int32()))
162 | }
163 | 
164 | // randomBuild creates a cgroup with random directory, similar to os.MkdirTemp
165 | func randomBuild(pattern string, build func(string) (Cgroup, error)) (Cgroup, error) {
166 | 	prefix, suffix, err := prefixAndSuffix(pattern)
167 | 	if err != nil {
168 | 		return nil, fmt.Errorf("cgroup.builder: random %w", err)
169 | 	}
170 | 
171 | 	try := 0
172 | 	for {
173 | 		name := prefix + nextRandom() + suffix
174 | 		cg, err := build(name)
175 | 		if err == nil {
176 | 			return cg, nil
177 | 		}
178 | 		if errors.Is(err, os.ErrExist) || (cg != nil && cg.Existing()) {
179 | 			if try++; try < 10000 {
180 | 				continue
181 | 			}
182 | 			return nil, fmt.Errorf("cgroup.builder: tried 10000 times but failed")
183 | 		}
184 | 		return nil, fmt.Errorf("cgroup.builder: random %w", err)
185 | 	}
186 | }
187 | 


--------------------------------------------------------------------------------
/pkg/cgroup/v1controller_linux.go:
--------------------------------------------------------------------------------
 1 | package cgroup
 2 | 
 3 | import (
 4 | 	"errors"
 5 | 	"path/filepath"
 6 | 	"strconv"
 7 | 	"strings"
 8 | )
 9 | 
10 | // v1controller is the accessor for single cgroup resource with given path
11 | type v1controller struct {
12 | 	path string
13 | }
14 | 
15 | // ErrNotInitialized returned when trying to read from not initialized cgroup
16 | var ErrNotInitialized = errors.New("cgroup was not initialized")
17 | 
18 | // newV1Controller creates a cgroup accessor with given path (path needs to be created in advance)
19 | func newV1Controller(p string) *v1controller {
20 | 	return &v1controller{path: p}
21 | }
22 | 
23 | // WriteUint writes uint64 into given file
24 | func (c *v1controller) WriteUint(filename string, i uint64) error {
25 | 	if c == nil || c.path == "" {
26 | 		return nil
27 | 	}
28 | 	return c.WriteFile(filename, []byte(strconv.FormatUint(i, 10)))
29 | }
30 | 
31 | // ReadUint read uint64 from given file
32 | func (c *v1controller) ReadUint(filename string) (uint64, error) {
33 | 	if c == nil || c.path == "" {
34 | 		return 0, ErrNotInitialized
35 | 	}
36 | 	b, err := c.ReadFile(filename)
37 | 	if err != nil {
38 | 		return 0, err
39 | 	}
40 | 	s, err := strconv.ParseUint(strings.TrimSpace(string(b)), 10, 64)
41 | 	if err != nil {
42 | 		return 0, err
43 | 	}
44 | 	return s, nil
45 | }
46 | 
47 | // WriteFile writes cgroup file and handles potential EINTR error while writes to
48 | // the slow device (cgroup)
49 | func (c *v1controller) WriteFile(name string, content []byte) error {
50 | 	if c == nil || c.path == "" {
51 | 		return ErrNotInitialized
52 | 	}
53 | 	p := filepath.Join(c.path, name)
54 | 	return writeFile(p, content, filePerm)
55 | }
56 | 
57 | // ReadFile reads cgroup file and handles potential EINTR error while read to
58 | // the slow device (cgroup)
59 | func (c *v1controller) ReadFile(name string) ([]byte, error) {
60 | 	if c == nil || c.path == "" {
61 | 		return nil, nil
62 | 	}
63 | 	p := filepath.Join(c.path, name)
64 | 	return readFile(p)
65 | }
66 | 
67 | func (c *v1controller) AddProc(pids ...int) error {
68 | 	return AddProcesses(filepath.Join(c.path, cgroupProcs), pids)
69 | }
70 | 


--------------------------------------------------------------------------------
/pkg/cgroup/v2_linux.go:
--------------------------------------------------------------------------------
  1 | package cgroup
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"bytes"
  6 | 	"os"
  7 | 	"path/filepath"
  8 | 	"strconv"
  9 | 	"strings"
 10 | 	"sync"
 11 | )
 12 | 
 13 | // V2 provides cgroup interface for v2
 14 | type V2 struct {
 15 | 	path        string
 16 | 	control     *Controllers
 17 | 	subtreeOnce sync.Once
 18 | 	subtreeErr  error
 19 | 	existing    bool
 20 | }
 21 | 
 22 | var _ Cgroup = &V2{}
 23 | 
 24 | func (c *V2) Open() (*os.File, error) {
 25 | 	return os.OpenFile(c.path, 0, dirPerm)
 26 | }
 27 | 
 28 | func (c *V2) String() string {
 29 | 	ct, _ := getAvailableControllerV2path(filepath.Join(c.path, cgroupControllers))
 30 | 	return "v2(" + c.path + ")" + ct.String()
 31 | }
 32 | 
 33 | // AddProc adds processes into the cgroup
 34 | func (c *V2) AddProc(pids ...int) error {
 35 | 	return AddProcesses(filepath.Join(c.path, cgroupProcs), pids)
 36 | }
 37 | 
 38 | // Processes returns all processes within the cgroup
 39 | func (c *V2) Processes() ([]int, error) {
 40 | 	return ReadProcesses(filepath.Join(c.path, cgroupProcs))
 41 | }
 42 | 
 43 | // New creates a sub-cgroup based on the existing one
 44 | func (c *V2) New(name string) (Cgroup, error) {
 45 | 	if err := c.enableSubtreeControl(); err != nil {
 46 | 		return nil, err
 47 | 	}
 48 | 	v2 := &V2{
 49 | 		path:    filepath.Join(c.path, name),
 50 | 		control: c.control,
 51 | 	}
 52 | 	if err := os.Mkdir(v2.path, dirPerm); err != nil {
 53 | 		if !os.IsExist(err) {
 54 | 			return nil, err
 55 | 		}
 56 | 		v2.existing = true
 57 | 	}
 58 | 	return v2, nil
 59 | }
 60 | 
 61 | // Nest creates a sub-cgroup, moves current process into that cgroup
 62 | func (c *V2) Nest(name string) (Cgroup, error) {
 63 | 	v2 := &V2{
 64 | 		path:    filepath.Join(c.path, name),
 65 | 		control: c.control,
 66 | 	}
 67 | 	if err := os.Mkdir(v2.path, dirPerm); err != nil {
 68 | 		if !os.IsExist(err) {
 69 | 			return nil, err
 70 | 		}
 71 | 		v2.existing = true
 72 | 	}
 73 | 	p, err := c.Processes()
 74 | 	if err != nil {
 75 | 		return nil, err
 76 | 	}
 77 | 	if err := v2.AddProc(p...); err != nil {
 78 | 		return nil, err
 79 | 	}
 80 | 	if err := c.enableSubtreeControl(); err != nil {
 81 | 		return nil, err
 82 | 	}
 83 | 	return v2, nil
 84 | }
 85 | 
 86 | func (c *V2) enableSubtreeControl() error {
 87 | 	c.subtreeOnce.Do(func() {
 88 | 		ct, err := getAvailableControllerV2path(filepath.Join(c.path, cgroupControllers))
 89 | 		if err != nil {
 90 | 			c.subtreeErr = err
 91 | 			return
 92 | 		}
 93 | 		ect, err := getAvailableControllerV2path(filepath.Join(c.path, cgroupSubtreeControl))
 94 | 		if err != nil {
 95 | 			c.subtreeErr = err
 96 | 			return
 97 | 		}
 98 | 		if ect.Contains(ct) {
 99 | 			return
100 | 		}
101 | 		s := ct.Names()
102 | 		controlMsg := []byte("+" + strings.Join(s, " +"))
103 | 		c.subtreeErr = writeFile(filepath.Join(c.path, cgroupSubtreeControl), controlMsg, filePerm)
104 | 	})
105 | 	return c.subtreeErr
106 | }
107 | 
108 | // Random creates a sub-cgroup based on the existing one but the name is randomly generated
109 | func (c *V2) Random(pattern string) (Cgroup, error) {
110 | 	return randomBuild(pattern, c.New)
111 | }
112 | 
113 | // Destroy destroys the cgroup
114 | func (c *V2) Destroy() error {
115 | 	if !c.existing {
116 | 		return remove(c.path)
117 | 	}
118 | 	return nil
119 | }
120 | 
121 | // Existing returns true if the cgroup was opened rather than created
122 | func (c *V2) Existing() bool {
123 | 	return c.existing
124 | }
125 | 
126 | // CPUUsage reads cpu.stat usage_usec
127 | func (c *V2) CPUUsage() (uint64, error) {
128 | 	b, err := c.ReadFile("cpu.stat")
129 | 	if err != nil {
130 | 		return 0, err
131 | 	}
132 | 	s := bufio.NewScanner(bytes.NewReader(b))
133 | 	for s.Scan() {
134 | 		parts := strings.Fields(s.Text())
135 | 		if len(parts) == 2 && parts[0] == "usage_usec" {
136 | 			v, err := strconv.Atoi(parts[1])
137 | 			if err != nil {
138 | 				return 0, err
139 | 			}
140 | 			return uint64(v) * 1000, nil // to ns
141 | 		}
142 | 	}
143 | 	return 0, os.ErrNotExist
144 | }
145 | 
146 | // MemoryUsage reads memory.current
147 | func (c *V2) MemoryUsage() (uint64, error) {
148 | 	if !c.control.Memory {
149 | 		return 0, ErrNotInitialized
150 | 	}
151 | 	return c.ReadUint("memory.current")
152 | }
153 | 
154 | // MemoryMaxUsage reads memory.peak
155 | func (c *V2) MemoryMaxUsage() (uint64, error) {
156 | 	if !c.control.Memory {
157 | 		return 0, ErrNotInitialized
158 | 	}
159 | 	return c.ReadUint("memory.peak")
160 | }
161 | 
162 | // ProcessPeak reads pids.peak
163 | func (c *V2) ProcessPeak() (uint64, error) {
164 | 	if !c.control.Pids {
165 | 		return 0, ErrNotInitialized
166 | 	}
167 | 	return c.ReadUint("pids.peak")
168 | }
169 | 
170 | // SetCPUBandwidth set cpu.max quota period
171 | func (c *V2) SetCPUBandwidth(quota, period uint64) error {
172 | 	if !c.control.CPU {
173 | 		return ErrNotInitialized
174 | 	}
175 | 	content := strconv.FormatUint(quota, 10) + " " + strconv.FormatUint(period, 10)
176 | 	return c.WriteFile("cpu.max", []byte(content))
177 | }
178 | 
179 | // SetCPUSet sets cpuset.cpus
180 | func (c *V2) SetCPUSet(content []byte) error {
181 | 	if !c.control.CPUSet {
182 | 		return ErrNotInitialized
183 | 	}
184 | 	return c.WriteFile("cpuset.cpus", content)
185 | }
186 | 
187 | // SetMemoryLimit memory.max
188 | func (c *V2) SetMemoryLimit(l uint64) error {
189 | 	if !c.control.Memory {
190 | 		return ErrNotInitialized
191 | 	}
192 | 	return c.WriteUint("memory.max", l)
193 | }
194 | 
195 | // SetProcLimit pids.max
196 | func (c *V2) SetProcLimit(l uint64) error {
197 | 	if !c.control.Pids {
198 | 		return ErrNotInitialized
199 | 	}
200 | 	return c.WriteUint("pids.max", l)
201 | }
202 | 
203 | // WriteUint writes uint64 into given file
204 | func (c *V2) WriteUint(filename string, i uint64) error {
205 | 	return c.WriteFile(filename, []byte(strconv.FormatUint(i, 10)))
206 | }
207 | 
208 | // ReadUint read uint64 from given file
209 | func (c *V2) ReadUint(filename string) (uint64, error) {
210 | 	b, err := c.ReadFile(filename)
211 | 	if err != nil {
212 | 		return 0, err
213 | 	}
214 | 	s, err := strconv.ParseUint(strings.TrimSpace(string(b)), 10, 64)
215 | 	if err != nil {
216 | 		return 0, err
217 | 	}
218 | 	return s, nil
219 | }
220 | 
221 | // WriteFile writes cgroup file and handles potential EINTR error while writes to
222 | // the slow device (cgroup)
223 | func (c *V2) WriteFile(name string, content []byte) error {
224 | 	p := filepath.Join(c.path, name)
225 | 	return writeFile(p, content, filePerm)
226 | }
227 | 
228 | // ReadFile reads cgroup file and handles potential EINTR error while read to
229 | // the slow device (cgroup)
230 | func (c *V2) ReadFile(name string) ([]byte, error) {
231 | 	p := filepath.Join(c.path, name)
232 | 	return readFile(p)
233 | }
234 | 


--------------------------------------------------------------------------------
/pkg/forkexec/bench_linux_test.go:
--------------------------------------------------------------------------------
  1 | package forkexec
  2 | 
  3 | import (
  4 | 	"os"
  5 | 	"syscall"
  6 | 	"testing"
  7 | 
  8 | 	"github.com/criyle/go-sandbox/pkg/mount"
  9 | 	"golang.org/x/sys/unix"
 10 | )
 11 | 
 12 | // All testing data were from docker env on amd64 arch
 13 | 
 14 | const (
 15 | 	roBind = unix.MS_BIND | unix.MS_NOSUID | unix.MS_PRIVATE | unix.MS_RDONLY
 16 | )
 17 | 
 18 | var (
 19 | 	defaultBind = []string{"/usr", "/lib", "/lib64", "/bin"}
 20 | )
 21 | 
 22 | func BenchmarkStdFork(b *testing.B) {
 23 | 	f := openNull(b)
 24 | 	defer f.Close()
 25 | 	b.RunParallel(func(pb *testing.PB) {
 26 | 		for pb.Next() {
 27 | 			pid, err := syscall.ForkExec("/bin/echo", nil, &syscall.ProcAttr{
 28 | 				Env:   []string{"PATH=/bin"},
 29 | 				Files: []uintptr{f.Fd(), f.Fd(), f.Fd()},
 30 | 			})
 31 | 			if err != nil {
 32 | 				b.Fatal(err)
 33 | 			}
 34 | 			wait4(pid, b)
 35 | 		}
 36 | 	})
 37 | }
 38 | 
 39 | func BenchmarkStdForkUser(b *testing.B) {
 40 | 	f := openNull(b)
 41 | 	defer f.Close()
 42 | 	b.RunParallel(func(pb *testing.PB) {
 43 | 		for pb.Next() {
 44 | 			pid, err := syscall.ForkExec("/bin/echo", nil, &syscall.ProcAttr{
 45 | 				Env:   []string{"PATH=/bin"},
 46 | 				Files: []uintptr{f.Fd(), f.Fd(), f.Fd()},
 47 | 				Sys: &syscall.SysProcAttr{
 48 | 					Cloneflags: syscall.CLONE_NEWUSER,
 49 | 				},
 50 | 			})
 51 | 			if err != nil {
 52 | 				b.Fatal(err)
 53 | 			}
 54 | 			wait4(pid, b)
 55 | 		}
 56 | 	})
 57 | }
 58 | 
 59 | // BenchmarkSimpleFork is about 0.70ms/op
 60 | func BenchmarkSimpleFork(b *testing.B) {
 61 | 	r, f := getRunner(b)
 62 | 	defer f.Close()
 63 | 	benchmarkRun(r, b)
 64 | }
 65 | 
 66 | // BenchmarkUnsharePid is about 0.79ms/op
 67 | func BenchmarkUnsharePid(b *testing.B) {
 68 | 	r, f := getRunner(b)
 69 | 	defer f.Close()
 70 | 	r.CloneFlags = unix.CLONE_NEWPID
 71 | 	benchmarkRun(r, b)
 72 | }
 73 | 
 74 | // BenchmarkUnshareUser is about 0.84ms/op
 75 | func BenchmarkUnshareUser(b *testing.B) {
 76 | 	r, f := getRunner(b)
 77 | 	defer f.Close()
 78 | 	r.CloneFlags = unix.CLONE_NEWUSER
 79 | 	benchmarkRun(r, b)
 80 | }
 81 | 
 82 | // BenchmarkUnshareUts is about 0.78ms/op
 83 | func BenchmarkUnshareUts(b *testing.B) {
 84 | 	r, f := getRunner(b)
 85 | 	defer f.Close()
 86 | 	r.CloneFlags = unix.CLONE_NEWUTS
 87 | 	benchmarkRun(r, b)
 88 | }
 89 | 
 90 | // BenchmarkUnshareCgroup is about 0.85ms/op
 91 | func BenchmarkUnshareCgroup(b *testing.B) {
 92 | 	r, f := getRunner(b)
 93 | 	defer f.Close()
 94 | 	r.CloneFlags = unix.CLONE_NEWCGROUP
 95 | 	benchmarkRun(r, b)
 96 | }
 97 | 
 98 | // BenchmarkUnshareIpc is about 51ms/op
 99 | func BenchmarkUnshareIpc(b *testing.B) {
100 | 	r, f := getRunner(b)
101 | 	defer f.Close()
102 | 	r.CloneFlags = unix.CLONE_NEWIPC
103 | 	benchmarkRun(r, b)
104 | }
105 | 
106 | // BenchmarkUnshareMount is about 51ms/op
107 | func BenchmarkUnshareMount(b *testing.B) {
108 | 	r, f := getRunner(b)
109 | 	defer f.Close()
110 | 	r.CloneFlags = unix.CLONE_NEWNS
111 | 	benchmarkRun(r, b)
112 | }
113 | 
114 | // BenchmarkUnshareNet is about 426ms/op
115 | func BenchmarkUnshareNet(b *testing.B) {
116 | 	r, f := getRunner(b)
117 | 	defer f.Close()
118 | 	r.CloneFlags = unix.CLONE_NEWNET
119 | 	benchmarkRun(r, b)
120 | }
121 | 
122 | // BenchmarkFastUnshareMountPivot is about 104ms/op
123 | func BenchmarkFastUnshareMountPivot(b *testing.B) {
124 | 	root, err := os.MkdirTemp("", "ns")
125 | 	if err != nil {
126 | 		b.Errorf("failed to create temp dir")
127 | 	}
128 | 	defer os.RemoveAll(root)
129 | 	r, f := getRunner(b)
130 | 	defer f.Close()
131 | 	r.CloneFlags = unix.CLONE_NEWNS | unix.CLONE_NEWPID | unix.CLONE_NEWUSER | unix.CLONE_NEWUTS | unix.CLONE_NEWCGROUP
132 | 	r.PivotRoot = root
133 | 	r.NoNewPrivs = true
134 | 	r.DropCaps = true
135 | 	r.Mounts = getMounts(defaultBind)
136 | 	benchmarkRun(r, b)
137 | }
138 | 
139 | // BenchmarkUnshareAll is about 800ms/op
140 | func BenchmarkUnshareAll(b *testing.B) {
141 | 	r, f := getRunner(b)
142 | 	defer f.Close()
143 | 	r.CloneFlags = UnshareFlags
144 | 	r.NoNewPrivs = true
145 | 	r.DropCaps = true
146 | 	benchmarkRun(r, b)
147 | }
148 | 
149 | // BenchmarkUnshareMountPivot is about 880ms/op
150 | func BenchmarkUnshareMountPivot(b *testing.B) {
151 | 	root, err := os.MkdirTemp("", "ns")
152 | 	if err != nil {
153 | 		b.Errorf("failed to create temp dir")
154 | 	}
155 | 	defer os.RemoveAll(root)
156 | 	r, f := getRunner(b)
157 | 	defer f.Close()
158 | 	r.CloneFlags = UnshareFlags
159 | 	r.PivotRoot = root
160 | 	r.NoNewPrivs = true
161 | 	r.DropCaps = true
162 | 	r.Mounts = getMounts(defaultBind)
163 | 	benchmarkRun(r, b)
164 | }
165 | 
166 | func getRunner(b *testing.B) (*Runner, *os.File) {
167 | 	f := openNull(b)
168 | 	return &Runner{
169 | 		Args:    []string{"/bin/echo"},
170 | 		Env:     []string{"PATH=/bin"},
171 | 		Files:   []uintptr{f.Fd(), f.Fd(), f.Fd()},
172 | 		WorkDir: "/bin",
173 | 	}, f
174 | }
175 | 
176 | func benchmarkRun(r *Runner, b *testing.B) {
177 | 	b.ResetTimer()
178 | 	b.RunParallel(func(pb *testing.PB) {
179 | 		for pb.Next() {
180 | 			pid, err := r.Start()
181 | 			if err != nil {
182 | 				b.Fatal(err)
183 | 			}
184 | 			wait4(pid, b)
185 | 		}
186 | 	})
187 | }
188 | 
189 | func getMounts(dirs []string) []mount.SyscallParams {
190 | 	builder := mount.NewBuilder()
191 | 	for _, d := range dirs {
192 | 		builder.WithMount(mount.Mount{
193 | 			Source: d,
194 | 			Target: d[1:],
195 | 			Flags:  roBind,
196 | 		})
197 | 	}
198 | 	m, _ := builder.FilterNotExist().Build()
199 | 	return m
200 | }
201 | 
202 | func openNull(b *testing.B) *os.File {
203 | 	f, err := os.OpenFile("/dev/null", os.O_RDWR, 0666)
204 | 	if err != nil {
205 | 		b.Errorf("Failed to open %v", err)
206 | 	}
207 | 	return f
208 | }
209 | 
210 | func wait4(pid int, b *testing.B) {
211 | 	var wstat syscall.WaitStatus
212 | 	for {
213 | 		syscall.Wait4(pid, &wstat, 0, nil)
214 | 		if wstat.Exited() {
215 | 			if s := wstat.ExitStatus(); s != 0 {
216 | 				b.Errorf("Exited: %d", s)
217 | 			}
218 | 			break
219 | 		}
220 | 	}
221 | }
222 | 


--------------------------------------------------------------------------------
/pkg/forkexec/clone3_linux.go:
--------------------------------------------------------------------------------
 1 | package forkexec
 2 | 
 3 | // cloneArgs holds arguments for clone3 Linux syscall.
 4 | // from src/syscall/exec_linux.go:196
 5 | type cloneArgs struct {
 6 | 	flags      uint64 // Flags bit mask
 7 | 	pidFD      uint64 // Where to store PID file descriptor (int *)
 8 | 	childTID   uint64 // Where to store child TID, in child's memory (pid_t *)
 9 | 	parentTID  uint64 // Where to store child TID, in parent's memory (pid_t *)
10 | 	exitSignal uint64 // Signal to deliver to parent on child termination
11 | 	stack      uint64 // Pointer to lowest byte of stack
12 | 	stackSize  uint64 // Size of stack
13 | 	tls        uint64 // Location of new TLS
14 | 	setTID     uint64 // Pointer to a pid_t array (since Linux 5.5)
15 | 	setTIDSize uint64 // Number of elements in set_tid (since Linux 5.5)
16 | 	cgroup     uint64 // File descriptor for target cgroup of child (since Linux 5.7)
17 | }
18 | 


--------------------------------------------------------------------------------
/pkg/forkexec/consts_linux.go:
--------------------------------------------------------------------------------
 1 | package forkexec
 2 | 
 3 | import (
 4 | 	"golang.org/x/sys/unix"
 5 | )
 6 | 
 7 | // defines missing consts from syscall package
 8 | const (
 9 | 	SECCOMP_SET_MODE_STRICT   = 0
10 | 	SECCOMP_SET_MODE_FILTER   = 1
11 | 	SECCOMP_FILTER_FLAG_TSYNC = 1
12 | 
13 | 	// Unshare flags
14 | 	UnshareFlags = unix.CLONE_NEWIPC | unix.CLONE_NEWNET | unix.CLONE_NEWNS |
15 | 		unix.CLONE_NEWPID | unix.CLONE_NEWUSER | unix.CLONE_NEWUTS | unix.CLONE_NEWCGROUP
16 | 
17 | 	// Read-only bind mount need to be remounted
18 | 	bindRo = unix.MS_BIND | unix.MS_RDONLY
19 | )
20 | 
21 | // used by unshare remount / to private
22 | var (
23 | 	none  = []byte("none\000")
24 | 	slash = []byte("/\000")
25 | 	empty = []byte("\000")
26 | 	tmpfs = []byte("tmpfs\000")
27 | 
28 | 	// tmp dir made by pivot_root
29 | 	oldRoot = []byte("old_root\000")
30 | 
31 | 	// set groups for unshare user
32 | 	setGIDAllow = []byte("allow")
33 | 	setGIDDeny  = []byte("deny")
34 | 
35 | 	// go does not allow constant uintptr to be negative...
36 | 	_AT_FDCWD = unix.AT_FDCWD
37 | 
38 | 	// Drop all capabilities
39 | 	dropCapHeader = unix.CapUserHeader{
40 | 		Version: unix.LINUX_CAPABILITY_VERSION_3,
41 | 		Pid:     0,
42 | 	}
43 | 
44 | 	dropCapData = unix.CapUserData{
45 | 		Effective:   0,
46 | 		Permitted:   0,
47 | 		Inheritable: 0,
48 | 	}
49 | 
50 | 	// 1ms
51 | 	etxtbsyRetryInterval = unix.Timespec{
52 | 		Nsec: 1 * 1000 * 1000,
53 | 	}
54 | )
55 | 
56 | const (
57 | 	_SECURE_NOROOT = 1 << iota
58 | 	_SECURE_NOROOT_LOCKED
59 | 
60 | 	_SECURE_NO_SETUID_FIXUP
61 | 	_SECURE_NO_SETUID_FIXUP_LOCKED
62 | 
63 | 	_SECURE_KEEP_CAPS
64 | 	_SECURE_KEEP_CAPS_LOCKED
65 | 
66 | 	_SECURE_NO_CAP_AMBIENT_RAISE
67 | 	_SECURE_NO_CAP_AMBIENT_RAISE_LOCKED
68 | )
69 | 


--------------------------------------------------------------------------------
/pkg/forkexec/doc.go:
--------------------------------------------------------------------------------
1 | // Package forkexec provides interface to run a subprocess with seccomp filter, rlimit and
2 | // containerized or ptraced.
3 | //
4 | // unshare cgroup namespace requires kernel >= 4.6
5 | // seccomp, unshare pid / user namespaces requires kernel >= 3.8
6 | // pipe2, dup3 requires kernel >= 2.6.27
7 | package forkexec
8 | 


--------------------------------------------------------------------------------
/pkg/forkexec/errloc_linux.go:
--------------------------------------------------------------------------------
  1 | package forkexec
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"syscall"
  6 | )
  7 | 
  8 | // ErrorLocation defines the location where child process failed to exec
  9 | type ErrorLocation int
 10 | 
 11 | // ChildError defines the specific error and location where it failed
 12 | type ChildError struct {
 13 | 	Err      syscall.Errno
 14 | 	Location ErrorLocation
 15 | 	Index    int
 16 | }
 17 | 
 18 | // Location constants
 19 | const (
 20 | 	LocClone ErrorLocation = iota + 1
 21 | 	LocCloseWrite
 22 | 	LocUnshareUserRead
 23 | 	LocGetPid
 24 | 	LocKeepCapability
 25 | 	LocSetGroups
 26 | 	LocSetGid
 27 | 	LocSetUid
 28 | 	LocDup3
 29 | 	LocFcntl
 30 | 	LocSetSid
 31 | 	LocIoctl
 32 | 	LocMountRoot
 33 | 	LocMountTmpfs
 34 | 	LocMountChdir
 35 | 	LocMount
 36 | 	LocMountMkdir
 37 | 	LocPivotRoot
 38 | 	LocUmount
 39 | 	LocUnlink
 40 | 	LocMountRootReadonly
 41 | 	LocChdir
 42 | 	LocSetRlimit
 43 | 	LocSetNoNewPrivs
 44 | 	LocDropCapability
 45 | 	LocSetCap
 46 | 	LocPtraceMe
 47 | 	LocStop
 48 | 	LocSeccomp
 49 | 	LocSyncWrite
 50 | 	LocSyncRead
 51 | 	LocExecve
 52 | )
 53 | 
 54 | var locToString = []string{
 55 | 	"unknown",
 56 | 	"clone",
 57 | 	"close_write",
 58 | 	"unshare_user_read",
 59 | 	"getpid",
 60 | 	"keep_capability",
 61 | 	"setgroups",
 62 | 	"setgid",
 63 | 	"setuid",
 64 | 	"dup3",
 65 | 	"fcntl",
 66 | 	"setsid",
 67 | 	"ioctl",
 68 | 	"mount(root)",
 69 | 	"mount(tmpfs)",
 70 | 	"mount(chdir)",
 71 | 	"mount",
 72 | 	"mount(mkdir)",
 73 | 	"pivot_root",
 74 | 	"umount",
 75 | 	"unlink",
 76 | 	"mount(readonly)",
 77 | 	"chdir",
 78 | 	"setrlimt",
 79 | 	"set_no_new_privs",
 80 | 	"drop_capability",
 81 | 	"set_cap",
 82 | 	"ptrace_me",
 83 | 	"stop",
 84 | 	"seccomp",
 85 | 	"sync_write",
 86 | 	"sync_read",
 87 | 	"execve",
 88 | }
 89 | 
 90 | func (e ErrorLocation) String() string {
 91 | 	if e >= LocClone && e <= LocExecve {
 92 | 		return locToString[e]
 93 | 	}
 94 | 	return "unknown"
 95 | }
 96 | 
 97 | func (e ChildError) Error() string {
 98 | 	if e.Index > 0 {
 99 | 		return fmt.Sprintf("%s(%d): %s", e.Location.String(), e.Index, e.Err.Error())
100 | 	}
101 | 	return fmt.Sprintf("%s: %s", e.Location.String(), e.Err.Error())
102 | }
103 | 


--------------------------------------------------------------------------------
/pkg/forkexec/fork_child_darwin.go:
--------------------------------------------------------------------------------
  1 | package forkexec
  2 | 
  3 | import (
  4 | 	"syscall"
  5 | 	"unsafe"
  6 | )
  7 | 
  8 | // Reference to src/syscall/exec_darwin.go
  9 | //go:norace
 10 | func forkAndExecInChild(r *Runner, argv0 *byte, argv, env []*byte, workdir, profile *byte, p [2]int) (r1 uintptr, err1 syscall.Errno) {
 11 | 	var (
 12 | 		err2   syscall.Errno
 13 | 		errBuf *byte
 14 | 	)
 15 | 
 16 | 	// similar to exec_linux, avoid side effect by shuffling around
 17 | 	fd, nextfd := prepareFds(r.Files)
 18 | 	pipe := p[1]
 19 | 
 20 | 	// About to call fork.
 21 | 	// No more allocation or calls of non-assembly functions.
 22 | 	beforeFork()
 23 | 
 24 | 	// UnshareFlags (new namespaces) is activated by clone syscall
 25 | 	r1, _, err1 = rawSyscall(libc_fork_trampoline_addr, 0, 0, 0)
 26 | 	if err1 != 0 || r1 != 0 {
 27 | 		// in parent process, immediate return
 28 | 		return
 29 | 	}
 30 | 
 31 | 	// In child process
 32 | 	afterForkInChild()
 33 | 	// Notice: cannot call any GO functions beyond this point
 34 | 
 35 | 	// Close write end of pipe
 36 | 	if _, _, err1 = rawSyscall(libc_close_trampoline_addr, uintptr(p[0]), 0, 0); err1 != 0 {
 37 | 		goto childerror
 38 | 	}
 39 | 
 40 | 	// Set pg id
 41 | 	_, _, err1 = rawSyscall(libc_setpgid_trampoline_addr, 0, 0, 0)
 42 | 	if err1 != 0 {
 43 | 		goto childerror
 44 | 	}
 45 | 
 46 | 	// Pass 1 & pass 2 assigns fds for child process
 47 | 	// Pass 1: fd[i] < i => nextfd
 48 | 	if pipe < nextfd {
 49 | 		_, _, err1 = rawSyscall(libc_dup2_trampoline_addr, uintptr(pipe), uintptr(nextfd), 0)
 50 | 		if err1 != 0 {
 51 | 			goto childerror
 52 | 		}
 53 | 		rawSyscall(libc_fcntl_trampoline_addr, uintptr(nextfd), syscall.F_SETFD, syscall.FD_CLOEXEC)
 54 | 		pipe = nextfd
 55 | 		nextfd++
 56 | 	}
 57 | 	for i := 0; i < len(fd); i++ {
 58 | 		if fd[i] >= 0 && fd[i] < int(i) {
 59 | 			// Avoid fd rewrite
 60 | 			if nextfd == pipe {
 61 | 				nextfd++
 62 | 			}
 63 | 			_, _, err1 = rawSyscall(libc_dup2_trampoline_addr, uintptr(fd[i]), uintptr(nextfd), 0)
 64 | 			if err1 != 0 {
 65 | 				goto childerror
 66 | 			}
 67 | 			rawSyscall(libc_fcntl_trampoline_addr, uintptr(nextfd), syscall.F_SETFD, syscall.FD_CLOEXEC)
 68 | 			// Set up close on exec
 69 | 			fd[i] = nextfd
 70 | 			nextfd++
 71 | 		}
 72 | 	}
 73 | 	// Pass 2: fd[i] => i
 74 | 	for i := 0; i < len(fd); i++ {
 75 | 		if fd[i] == -1 {
 76 | 			rawSyscall(libc_close_trampoline_addr, uintptr(i), 0, 0)
 77 | 			continue
 78 | 		}
 79 | 		if fd[i] == int(i) {
 80 | 			// dup2(i, i) will not clear close on exec flag, need to reset the flag
 81 | 			_, _, err1 = rawSyscall(libc_fcntl_trampoline_addr, uintptr(fd[i]), syscall.F_SETFD, 0)
 82 | 			if err1 != 0 {
 83 | 				goto childerror
 84 | 			}
 85 | 			continue
 86 | 		}
 87 | 		_, _, err1 = rawSyscall(libc_dup2_trampoline_addr, uintptr(fd[i]), uintptr(i), 0)
 88 | 		if err1 != 0 {
 89 | 			goto childerror
 90 | 		}
 91 | 	}
 92 | 
 93 | 	// chdir for child
 94 | 	if workdir != nil {
 95 | 		_, _, err1 = rawSyscall(libc_chdir_trampoline_addr, uintptr(unsafe.Pointer(workdir)), 0, 0)
 96 | 		if err1 != 0 {
 97 | 			goto childerror
 98 | 		}
 99 | 	}
100 | 
101 | 	// Set limit
102 | 	for _, rlim := range r.RLimits {
103 | 		_, _, err1 = rawSyscall(libc_setrlimit_trampoline_addr, uintptr(rlim.Res), uintptr(unsafe.Pointer(&rlim.Rlim)), 0)
104 | 		if err1 != 0 {
105 | 			if err1 == syscall.EINVAL && (rlim.Res == syscall.RLIMIT_DATA || rlim.Res == syscall.RLIMIT_AS) {
106 | 				continue
107 | 			}
108 | 			goto childerror
109 | 		}
110 | 	}
111 | 
112 | 	// Load sandbox profile
113 | 	if profile != nil {
114 | 		r1, _, err1 = rawSyscall(libc_sandbox_init_trampoline_addr, uintptr(unsafe.Pointer(profile)), 0, uintptr(unsafe.Pointer(&errBuf)))
115 | 		if err1 != 0 {
116 | 			goto childerror
117 | 		}
118 | 		if r1 != 0 {
119 | 			err1 = 253
120 | 			goto childerror
121 | 		}
122 | 		rawSyscall(libc_sandbox_free_error_trampoline_addr, uintptr(unsafe.Pointer(errBuf)), 0, 0)
123 | 	}
124 | 
125 | 	// Sync before exec
126 | 	err2 = 0
127 | 	r1, _, err1 = rawSyscall(libc_write_trampoline_addr, uintptr(pipe), uintptr(unsafe.Pointer(&err2)), unsafe.Sizeof(err2))
128 | 	if r1 == 0 || err1 != 0 {
129 | 		goto childerror
130 | 	}
131 | 
132 | 	r1, _, err1 = rawSyscall(libc_read_trampoline_addr, uintptr(pipe), uintptr(unsafe.Pointer(&err2)), unsafe.Sizeof(err2))
133 | 	if r1 == 0 || err1 != 0 {
134 | 		goto childerror
135 | 	}
136 | 
137 | 	// Time to exec.
138 | 	_, _, err1 = rawSyscall(libc_execve_trampoline_addr,
139 | 		uintptr(unsafe.Pointer(argv0)),
140 | 		uintptr(unsafe.Pointer(&argv[0])),
141 | 		uintptr(unsafe.Pointer(&env[0])))
142 | 
143 | childerror:
144 | 	// send error code on pipe
145 | 	rawSyscall(libc_write_trampoline_addr, uintptr(pipe), uintptr(unsafe.Pointer(&err1)), unsafe.Sizeof(err1))
146 | 	for {
147 | 		rawSyscall(libc_exit_trampoline_addr, uintptr(err1+err2), 0, 0)
148 | 	}
149 | }
150 | 


--------------------------------------------------------------------------------
/pkg/forkexec/fork_darwin.go:
--------------------------------------------------------------------------------
  1 | package forkexec
  2 | 
  3 | import (
  4 | 	"syscall"
  5 | 	"unsafe"
  6 | 
  7 | 	"golang.org/x/sys/unix"
  8 | )
  9 | 
 10 | // Start will fork, load seccomp and execve and being traced by ptrace
 11 | // Return pid and potential error
 12 | // The runtime OS thread must be locked before calling this function
 13 | // if ptrace is set to true
 14 | func (r *Runner) Start() (int, error) {
 15 | 	argv0, argv, env, err := prepareExec(r.Args, r.Env)
 16 | 	if err != nil {
 17 | 		return 0, err
 18 | 	}
 19 | 
 20 | 	// prepare work dir
 21 | 	workdir, err := syscallStringFromString(r.WorkDir)
 22 | 	if err != nil {
 23 | 		return 0, err
 24 | 	}
 25 | 
 26 | 	// prepare sandbox profile
 27 | 	profile, err := syscallStringFromString(r.SandboxProfile)
 28 | 	if err != nil {
 29 | 		return 0, err
 30 | 	}
 31 | 
 32 | 	// ensure the socketpair created did not leak to child
 33 | 	syscall.ForkLock.Lock()
 34 | 
 35 | 	// socketpair p is also used to sync with parent before final execve
 36 | 	// p[0] is used by parent and p[1] is used by child
 37 | 	var p [2]int
 38 | 	if err := forkExecSocketPair(&p); err != nil {
 39 | 		syscall.ForkLock.Unlock()
 40 | 		return 0, err
 41 | 	}
 42 | 
 43 | 	// fork in child
 44 | 	pid, err1 := forkAndExecInChild(r, argv0, argv, env, workdir, profile, p)
 45 | 
 46 | 	// restore all signals
 47 | 	afterFork()
 48 | 
 49 | 	syscall.ForkLock.Unlock()
 50 | 
 51 | 	return syncWithChild(r, p, int(pid), err1)
 52 | }
 53 | 
 54 | func forkExecSocketPair(p *[2]int) error {
 55 | 	var err error
 56 | 	*p, err = syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM, 0)
 57 | 	if err != nil {
 58 | 		return err
 59 | 	}
 60 | 	_, err = fcntl(p[0], syscall.F_SETFD, syscall.FD_CLOEXEC)
 61 | 	if err != nil {
 62 | 		return err
 63 | 	}
 64 | 	_, err = fcntl(p[1], syscall.F_SETFD, syscall.FD_CLOEXEC)
 65 | 	if err != nil {
 66 | 		return err
 67 | 	}
 68 | 	return nil
 69 | }
 70 | 
 71 | func syncWithChild(r *Runner, p [2]int, pid int, err1 syscall.Errno) (int, error) {
 72 | 	var (
 73 | 		r1   uintptr
 74 | 		err2 syscall.Errno
 75 | 		err  error
 76 | 	)
 77 | 
 78 | 	// sync with child
 79 | 	unix.Close(p[1])
 80 | 
 81 | 	// clone syscall failed
 82 | 	if err1 != 0 {
 83 | 		unix.Close(p[0])
 84 | 		return 0, syscall.Errno(err1)
 85 | 	}
 86 | 	r1, _, err1 = syscall3(libc_read_trampoline_addr, uintptr(p[0]), uintptr(unsafe.Pointer(&err2)), uintptr(unsafe.Sizeof(err2)))
 87 | 	// child returned error code
 88 | 	if r1 != unsafe.Sizeof(err2) || err2 != 0 || err1 != 0 {
 89 | 		err = handlePipeError(r1, err2)
 90 | 		goto fail
 91 | 	}
 92 | 
 93 | 	// if syncfunc return error, then fail child immediately
 94 | 	if r.SyncFunc != nil {
 95 | 		if err = r.SyncFunc(int(pid)); err != nil {
 96 | 			goto fail
 97 | 		}
 98 | 	}
 99 | 	// otherwise, ack child (err1 == 0)
100 | 	r1, _, err1 = syscall3(libc_write_trampoline_addr, uintptr(p[0]), uintptr(unsafe.Pointer(&err1)), uintptr(unsafe.Sizeof(err1)))
101 | 	if err1 != 0 {
102 | 		goto fail
103 | 	}
104 | 
105 | 	// if read anything mean child failed after sync (close_on_exec so it should not block)
106 | 	r1, _, err1 = syscall3(libc_read_trampoline_addr, uintptr(p[0]), uintptr(unsafe.Pointer(&err2)), uintptr(unsafe.Sizeof(err2)))
107 | 	unix.Close(p[0])
108 | 	if r1 != 0 || err1 != 0 {
109 | 		err = handlePipeError(r1, err2)
110 | 		goto failAfterClose
111 | 	}
112 | 	return int(pid), nil
113 | 
114 | fail:
115 | 	unix.Close(p[0])
116 | 
117 | failAfterClose:
118 | 	handleChildFailed(int(pid))
119 | 	return 0, err
120 | }
121 | 
122 | // check pipe error
123 | func handlePipeError(r1 uintptr, errno syscall.Errno) error {
124 | 	if r1 == unsafe.Sizeof(errno) {
125 | 		return syscall.Errno(errno)
126 | 	}
127 | 	return syscall.EPIPE
128 | }
129 | 
130 | func handleChildFailed(pid int) {
131 | 	var wstatus syscall.WaitStatus
132 | 	// make sure not blocked
133 | 	syscall.Kill(pid, syscall.SIGKILL)
134 | 	// child failed; wait for it to exit, to make sure the zombies don't accumulate
135 | 	_, err := syscall.Wait4(pid, &wstatus, 0, nil)
136 | 	for err == syscall.EINTR {
137 | 		_, err = syscall.Wait4(pid, &wstatus, 0, nil)
138 | 	}
139 | }
140 | 


--------------------------------------------------------------------------------
/pkg/forkexec/fork_linux.go:
--------------------------------------------------------------------------------
  1 | package forkexec
  2 | 
  3 | import (
  4 | 	"syscall"
  5 | 	"unsafe" // required for go:linkname.
  6 | 
  7 | 	"golang.org/x/sys/unix"
  8 | )
  9 | 
 10 | // Start will fork, load seccomp and execve and being traced by ptrace
 11 | // Return pid and potential error
 12 | // The runtime OS thread must be locked before calling this function
 13 | // if ptrace is set to true
 14 | func (r *Runner) Start() (int, error) {
 15 | 	argv0, argv, env, err := prepareExec(r.Args, r.Env)
 16 | 	if err != nil {
 17 | 		return 0, err
 18 | 	}
 19 | 
 20 | 	// prepare work dir
 21 | 	workdir, err := syscallStringFromString(r.WorkDir)
 22 | 	if err != nil {
 23 | 		return 0, err
 24 | 	}
 25 | 
 26 | 	// prepare hostname
 27 | 	hostname, err := syscallStringFromString(r.HostName)
 28 | 	if err != nil {
 29 | 		return 0, err
 30 | 	}
 31 | 
 32 | 	// prepare domainname
 33 | 	domainname, err := syscallStringFromString(r.DomainName)
 34 | 	if err != nil {
 35 | 		return 0, err
 36 | 	}
 37 | 
 38 | 	// prepare pivot_root param
 39 | 	pivotRoot, err := syscallStringFromString(r.PivotRoot)
 40 | 	if err != nil {
 41 | 		return 0, err
 42 | 	}
 43 | 
 44 | 	// socketpair p used to notify child the uid / gid mapping have been setup
 45 | 	// socketpair p is also used to sync with parent before final execve
 46 | 	// p[0] is used by parent and p[1] is used by child
 47 | 	p, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
 48 | 	if err != nil {
 49 | 		return 0, err
 50 | 	}
 51 | 
 52 | 	// fork in child
 53 | 	pid, err1 := forkAndExecInChild(r, argv0, argv, env, workdir, hostname, domainname, pivotRoot, p)
 54 | 
 55 | 	// restore all signals
 56 | 	afterFork()
 57 | 	syscall.ForkLock.Unlock()
 58 | 
 59 | 	return syncWithChild(r, p, int(pid), err1)
 60 | }
 61 | 
 62 | func syncWithChild(r *Runner, p [2]int, pid int, err1 syscall.Errno) (int, error) {
 63 | 	var (
 64 | 		err2        syscall.Errno
 65 | 		err         error
 66 | 		unshareUser = r.CloneFlags&unix.CLONE_NEWUSER == unix.CLONE_NEWUSER
 67 | 		childErr    ChildError
 68 | 		n           int
 69 | 	)
 70 | 
 71 | 	// sync with child
 72 | 	unix.Close(p[1])
 73 | 
 74 | 	// clone syscall failed
 75 | 	if err1 != 0 {
 76 | 		unix.Close(p[0])
 77 | 		childErr.Location = LocClone
 78 | 		childErr.Err = err1
 79 | 		return 0, childErr
 80 | 	}
 81 | 
 82 | 	// synchronize with child for uid / gid map
 83 | 	if unshareUser {
 84 | 		if err = writeIDMaps(r, int(pid)); err != nil {
 85 | 			err2 = err.(syscall.Errno)
 86 | 		}
 87 | 		syscall.RawSyscall(syscall.SYS_WRITE, uintptr(p[0]), uintptr(unsafe.Pointer(&err2)), uintptr(unsafe.Sizeof(err2)))
 88 | 	}
 89 | 
 90 | 	// if syncfunc return error, then fail child immediately
 91 | 	// only sync if there is a syncFunc
 92 | 	if r.SyncFunc != nil {
 93 | 		n, err = readChildErr(p[0], &childErr)
 94 | 		// child returned error code
 95 | 		if (n != int(unsafe.Sizeof(err2)) && n != int(unsafe.Sizeof(childErr))) || childErr.Err != 0 || err != nil {
 96 | 			childErr.Err = handlePipeError(n, childErr.Err)
 97 | 			goto fail
 98 | 		}
 99 | 		if err = r.SyncFunc(int(pid)); err != nil {
100 | 			goto fail
101 | 		}
102 | 		// otherwise, ack child (err1 == 0)
103 | 		syscall.RawSyscall(syscall.SYS_WRITE, uintptr(p[0]), uintptr(unsafe.Pointer(&err1)), uintptr(unsafe.Sizeof(err1)))
104 | 	}
105 | 
106 | 	// if stopped before execve by signal SIGSTOP or PTRACE_ME, then do not wait until execve
107 | 	if r.StopBeforeSeccomp || (r.Seccomp != nil && r.Ptrace) {
108 | 		// let's wait it in another goroutine to avoid SIGPIPE
109 | 		go func() {
110 | 			readChildErr(p[0], &childErr)
111 | 			unix.Close(p[0])
112 | 		}()
113 | 		return int(pid), nil
114 | 	}
115 | 
116 | 	// if read anything mean child failed after sync (close_on_exec so it should not block)
117 | 	n, err = readChildErr(p[0], &childErr)
118 | 	unix.Close(p[0])
119 | 	if n != 0 || err != nil {
120 | 		childErr.Err = handlePipeError(n, childErr.Err)
121 | 		goto failAfterClose
122 | 	}
123 | 	return int(pid), nil
124 | 
125 | fail:
126 | 	unix.Close(p[0])
127 | 
128 | failAfterClose:
129 | 	handleChildFailed(int(pid))
130 | 	if childErr.Err == 0 {
131 | 		return 0, err
132 | 	}
133 | 	return 0, childErr
134 | }
135 | 
136 | func readChildErr(fd int, childErr *ChildError) (n int, err error) {
137 | 	for {
138 | 		n, err = readlen(fd, (*byte)(unsafe.Pointer(childErr)), int(unsafe.Sizeof(*childErr)))
139 | 		if err != syscall.EINTR {
140 | 			break
141 | 		}
142 | 	}
143 | 	return
144 | }
145 | 
146 | // https://cs.opensource.google/go/go/+/refs/tags/go1.18.1:src/syscall/zsyscall_linux_amd64.go;l=944
147 | func readlen(fd int, p *byte, np int) (n int, err error) {
148 | 	r0, _, e1 := syscall.Syscall(syscall.SYS_READ, uintptr(fd), uintptr(unsafe.Pointer(p)), uintptr(np))
149 | 	n = int(r0)
150 | 	if e1 != 0 {
151 | 		err = syscall.Errno(e1)
152 | 	}
153 | 	return
154 | }
155 | 
156 | // check pipe error
157 | func handlePipeError(r1 int, errno syscall.Errno) syscall.Errno {
158 | 	if uintptr(r1) >= unsafe.Sizeof(errno) {
159 | 		return syscall.Errno(errno)
160 | 	}
161 | 	return syscall.EPIPE
162 | }
163 | 
164 | func handleChildFailed(pid int) {
165 | 	var wstatus syscall.WaitStatus
166 | 	// make sure not blocked
167 | 	syscall.Kill(pid, syscall.SIGKILL)
168 | 	// child failed; wait for it to exit, to make sure the zombies don't accumulate
169 | 	_, err := syscall.Wait4(pid, &wstatus, 0, nil)
170 | 	for err == syscall.EINTR {
171 | 		_, err = syscall.Wait4(pid, &wstatus, 0, nil)
172 | 	}
173 | }
174 | 


--------------------------------------------------------------------------------
/pkg/forkexec/fork_linux_test.go:
--------------------------------------------------------------------------------
  1 | package forkexec
  2 | 
  3 | import (
  4 | 	"os"
  5 | 	"syscall"
  6 | 	"testing"
  7 | 
  8 | 	"github.com/criyle/go-sandbox/pkg/mount"
  9 | )
 10 | 
 11 | func TestFork_DropCaps(t *testing.T) {
 12 | 	t.Parallel()
 13 | 	r := Runner{
 14 | 		Args:       []string{"/bin/echo"},
 15 | 		CloneFlags: syscall.CLONE_NEWUSER,
 16 | 		DropCaps:   true,
 17 | 	}
 18 | 	_, err := r.Start()
 19 | 	if err != nil {
 20 | 		t.Fatal(err)
 21 | 	}
 22 | }
 23 | 
 24 | func TestFork_ETXTBSY(t *testing.T) {
 25 | 	f, err := os.CreateTemp("", "")
 26 | 	if err != nil {
 27 | 		t.Fatal(err)
 28 | 	}
 29 | 	t.Cleanup(func() {
 30 | 		os.Remove(f.Name())
 31 | 		f.Close()
 32 | 	})
 33 | 
 34 | 	if err := f.Chmod(0777); err != nil {
 35 | 		t.Fatal(err)
 36 | 	}
 37 | 
 38 | 	echo, err := os.Open("/bin/echo")
 39 | 	if err != nil {
 40 | 		t.Fatal(err)
 41 | 	}
 42 | 	defer echo.Close()
 43 | 
 44 | 	_, err = f.ReadFrom(echo)
 45 | 	if err != nil {
 46 | 		t.Fatal(err)
 47 | 	}
 48 | 
 49 | 	r := Runner{
 50 | 		Args:     []string{f.Name()},
 51 | 		ExecFile: f.Fd(),
 52 | 	}
 53 | 	_, err = r.Start()
 54 | 	e, ok := err.(ChildError)
 55 | 	if !ok {
 56 | 		t.Fatalf("not a child error")
 57 | 	}
 58 | 	if e.Err != syscall.ETXTBSY && e.Location != LocExecve && e.Index != 0 {
 59 | 		t.Fatal(err)
 60 | 	}
 61 | }
 62 | 
 63 | func TestFork_OK(t *testing.T) {
 64 | 	t.Parallel()
 65 | 	f, err := os.CreateTemp("", "")
 66 | 	if err != nil {
 67 | 		t.Fatal(err)
 68 | 	}
 69 | 	defer os.Remove(f.Name())
 70 | 
 71 | 	if err := f.Chmod(0777); err != nil {
 72 | 		t.Fatal(err)
 73 | 	}
 74 | 
 75 | 	echo, err := os.Open("/bin/echo")
 76 | 	if err != nil {
 77 | 		t.Fatal(err)
 78 | 	}
 79 | 	defer echo.Close()
 80 | 
 81 | 	_, err = f.ReadFrom(echo)
 82 | 	if err != nil {
 83 | 		t.Fatal(err)
 84 | 	}
 85 | 	f.Close()
 86 | 
 87 | 	r := Runner{
 88 | 		Args: []string{f.Name()},
 89 | 	}
 90 | 	_, err = r.Start()
 91 | 	if err != nil {
 92 | 		t.Fatal(err)
 93 | 	}
 94 | }
 95 | 
 96 | func TestFork_ENOENT(t *testing.T) {
 97 | 	t.Parallel()
 98 | 	m, err := mount.NewBuilder().
 99 | 		WithMount(
100 | 			mount.Mount{
101 | 				Source: "NOT_EXISTS",
102 | 			}).Build()
103 | 	if err != nil {
104 | 		t.Fatal(err)
105 | 	}
106 | 	r := Runner{
107 | 		Args:       []string{"/bin/echo"},
108 | 		CloneFlags: syscall.CLONE_NEWNS | syscall.CLONE_NEWUSER,
109 | 		Mounts:     m,
110 | 	}
111 | 	_, err = r.Start()
112 | 	e, ok := err.(ChildError)
113 | 	if !ok {
114 | 		t.Fatalf("not a child error")
115 | 	}
116 | 	if e.Err != syscall.ENOENT && e.Location != LocExecve {
117 | 		t.Fatal(err)
118 | 	}
119 | }
120 | 


--------------------------------------------------------------------------------
/pkg/forkexec/fork_unix.go:
--------------------------------------------------------------------------------
 1 | package forkexec
 2 | 
 3 | import _ "unsafe" // to use go:linkname
 4 | 
 5 | //go:linkname beforeFork syscall.runtime_BeforeFork
 6 | func beforeFork()
 7 | 
 8 | //go:linkname afterFork syscall.runtime_AfterFork
 9 | func afterFork()
10 | 
11 | //go:linkname afterForkInChild syscall.runtime_AfterForkInChild
12 | func afterForkInChild()
13 | 


--------------------------------------------------------------------------------
/pkg/forkexec/fork_util.go:
--------------------------------------------------------------------------------
 1 | package forkexec
 2 | 
 3 | import (
 4 | 	"syscall"
 5 | )
 6 | 
 7 | // prepareExec prepares execve parameters
 8 | func prepareExec(Args, Env []string) (*byte, []*byte, []*byte, error) {
 9 | 	// make exec args0
10 | 	argv0, err := syscall.BytePtrFromString(Args[0])
11 | 	if err != nil {
12 | 		return nil, nil, nil, err
13 | 	}
14 | 	// make exec args
15 | 	argv, err := syscall.SlicePtrFromStrings(Args)
16 | 	if err != nil {
17 | 		return nil, nil, nil, err
18 | 	}
19 | 	// make env
20 | 	env, err := syscall.SlicePtrFromStrings(Env)
21 | 	if err != nil {
22 | 		return nil, nil, nil, err
23 | 	}
24 | 	return argv0, argv, env, nil
25 | }
26 | 
27 | // prepareFds prepares fd array
28 | func prepareFds(files []uintptr) ([]int, int) {
29 | 	fd := make([]int, len(files))
30 | 	nextfd := len(files)
31 | 	for i, ufd := range files {
32 | 		if nextfd < int(ufd) {
33 | 			nextfd = int(ufd)
34 | 		}
35 | 		fd[i] = int(ufd)
36 | 	}
37 | 	nextfd++
38 | 	return fd, nextfd
39 | }
40 | 
41 | // syscallStringFromString prepares *byte if string is not empty, other wise nil
42 | func syscallStringFromString(str string) (*byte, error) {
43 | 	if str != "" {
44 | 		return syscall.BytePtrFromString(str)
45 | 	}
46 | 	return nil, nil
47 | }
48 | 


--------------------------------------------------------------------------------
/pkg/forkexec/runner_darwin.go:
--------------------------------------------------------------------------------
 1 | package forkexec
 2 | 
 3 | import (
 4 | 	"github.com/criyle/go-sandbox/pkg/rlimit"
 5 | )
 6 | 
 7 | // Runner is the configuration including the exec path, argv
 8 | // and resource limits.
 9 | type Runner struct {
10 | 	// argv and env for execve syscall for the child process
11 | 	Args []string
12 | 	Env  []string
13 | 
14 | 	// POSIX Resource limit set by set rlimit
15 | 	RLimits []rlimit.RLimit
16 | 
17 | 	// file descriptors map for new process, from 0 to len - 1
18 | 	Files []uintptr
19 | 
20 | 	// work path set by chdir(dir) (current working directory for child)
21 | 	// if pivot_root is defined, this will execute after changed to new root
22 | 	WorkDir string
23 | 
24 | 	// sandbox profile defines the sandbox profile for sandbox_init syscall
25 | 	SandboxProfile string
26 | 
27 | 	// Parent and child process with sync status through a socket pair.
28 | 	// SyncFunc will invoke with the child pid. If SyncFunc return some error,
29 | 	// parent will signal child to stop and report the error
30 | 	// SyncFunc is called right before execve, thus it could track cpu more accurately
31 | 	SyncFunc func(int) error
32 | }
33 | 


--------------------------------------------------------------------------------
/pkg/forkexec/runner_linux.go:
--------------------------------------------------------------------------------
  1 | package forkexec
  2 | 
  3 | import (
  4 | 	"syscall"
  5 | 
  6 | 	"github.com/criyle/go-sandbox/pkg/mount"
  7 | 	"github.com/criyle/go-sandbox/pkg/rlimit"
  8 | )
  9 | 
 10 | // Runner is the configuration including the exec path, argv
 11 | // and resource limits. It can creates tracee for ptrace-based tracer.
 12 | // It can also create unshared process in another namespace
 13 | type Runner struct {
 14 | 	// argv and env for execve syscall for the child process
 15 | 	Args []string
 16 | 	Env  []string
 17 | 
 18 | 	// if exec_fd is defined, then at the end, fd_execve is called
 19 | 	ExecFile uintptr
 20 | 
 21 | 	// POSIX Resource limit set by set rlimit
 22 | 	RLimits []rlimit.RLimit
 23 | 
 24 | 	// file descriptors map for new process, from 0 to len - 1
 25 | 	Files []uintptr
 26 | 
 27 | 	// work path set by chdir(dir) (current working directory for child)
 28 | 	// if pivot_root is defined, this will execute after changed to new root
 29 | 	WorkDir string
 30 | 
 31 | 	// seccomp syscall filter applied to child
 32 | 	Seccomp *syscall.SockFprog
 33 | 
 34 | 	// clone unshare flag to create linux namespace, effective when clone child
 35 | 	// since unshare syscall does not join the new pid group
 36 | 	CloneFlags uintptr
 37 | 
 38 | 	// mounts defines the mount syscalls after unshare mount namespace
 39 | 	// need CAP_SYS_ADMIN inside the namespace (e.g. unshare user namespace)
 40 | 	// if pivot root is provided, relative target is better for chdir-mount meta
 41 | 	// and pivot root will mount as tmpfs before any mount
 42 | 	Mounts []mount.SyscallParams
 43 | 
 44 | 	// pivot_root defines a readonly new root after unshare mount namespace
 45 | 	// it should be a directory in absolute path and should used with mounts
 46 | 	// Call path:
 47 | 	// mount("tmpfs", root, "tmpfs", 0, nil)
 48 | 	// chdir(root)
 49 | 	// [do mounts]
 50 | 	// mkdir("old_root")
 51 | 	// pivot_root(root, "old_root")
 52 | 	// umount("old_root", MNT_DETACH)
 53 | 	// rmdir("old_root")
 54 | 	// mount("tmpfs", "/", "tmpfs", MS_BIND | MS_REMOUNT | MS_RDONLY | MS_NOATIME | MS_NOSUID, nil)
 55 | 	PivotRoot string
 56 | 
 57 | 	// HostName and DomainName to be set after unshare UTS & user (CAP_SYS_ADMIN)
 58 | 	HostName, DomainName string
 59 | 
 60 | 	// UidMappings / GidMappings for unshared user namespaces, no-op if mapping is null
 61 | 	UIDMappings []syscall.SysProcIDMap
 62 | 	GIDMappings []syscall.SysProcIDMap
 63 | 
 64 | 	// CgroupFd to use when clone3 with CLONE_INTO_CGROUP with kernel >=5.7 and cgroup v2
 65 | 	CgroupFd uintptr
 66 | 
 67 | 	// Credential holds user and group identities to be assumed
 68 | 	// by a child process started by StartProcess.
 69 | 	Credential *syscall.Credential
 70 | 
 71 | 	// Parent and child process with sync status through a socket pair.
 72 | 	// SyncFunc will invoke with the child pid. If SyncFunc return some error,
 73 | 	// parent will signal child to stop and report the error
 74 | 	// SyncFunc is called right before execve, thus it could track cpu more accurately
 75 | 	SyncFunc func(int) error
 76 | 
 77 | 	// ptrace controls child process to call ptrace(PTRACE_TRACEME)
 78 | 	// runtime.LockOSThread is required for tracer to call ptrace syscalls
 79 | 	Ptrace bool
 80 | 
 81 | 	// no_new_privs calls prctl(PR_SET_NO_NEW_PRIVS) to 0 to disable calls to
 82 | 	// setuid processes. It is automatically enabled when seccomp filter is provided
 83 | 	NoNewPrivs bool
 84 | 
 85 | 	// stop before seccomp calls kill(getpid(), SIGSTOP) to wait for tracer to continue
 86 | 	// right before the calls to seccomp. It is automatically enabled when seccomp
 87 | 	// filter and ptrace are provided since kill might not be available after
 88 | 	// seccomp and execve might be traced by ptrace
 89 | 	// cannot stop after seccomp since kill might not be allowed by seccomp filter
 90 | 	StopBeforeSeccomp bool
 91 | 
 92 | 	// GidMappingsEnableSetgroups allows / disallows setgroups syscall.
 93 | 	// deny if GIDMappings is nil
 94 | 	GIDMappingsEnableSetgroups bool
 95 | 
 96 | 	// drop_caps calls cap_set(self, 0) to drop all capabilities
 97 | 	// from effective, permitted, inheritable capability sets before execve
 98 | 	// it should avoid calls to set ambient capabilities
 99 | 	DropCaps bool
100 | 
101 | 	// UnshareCgroupAfterSync specifies whether to unshare cgroup namespace after
102 | 	// sync (the syncFunc might be add the child to the cgroup)
103 | 	UnshareCgroupAfterSync bool
104 | 
105 | 	// CTTY specifies if set the fd 0 as controlling TTY
106 | 	CTTY bool
107 | }
108 | 


--------------------------------------------------------------------------------
/pkg/forkexec/sandbox_darwin_test.go:
--------------------------------------------------------------------------------
 1 | package forkexec
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"testing"
 6 | )
 7 | 
 8 | func TestWrite(t *testing.T) {
 9 | 	c, err := os.ReadFile("test.sb")
10 | 	if err != nil {
11 | 		t.Error(err)
12 | 		return
13 | 	}
14 | 
15 | 	// before load profile, it is ok
16 | 	f, err := os.OpenFile("/tmp/sandbox_test", os.O_CREATE|os.O_RDWR, 0777)
17 | 	if err != nil {
18 | 		t.Error(err)
19 | 		return
20 | 	}
21 | 	f.Close()
22 | 
23 | 	if err = SandboxLoadProfile(string(c)); err != nil {
24 | 		t.Error(err)
25 | 		return
26 | 	}
27 | 
28 | 	// after is not ok
29 | 	f, err = os.OpenFile("/tmp/sandbox_test", os.O_CREATE|os.O_RDWR, 0777)
30 | 	if !os.IsPermission(err) {
31 | 		t.Error(err)
32 | 		return
33 | 	}
34 | 	f.Close()
35 | }
36 | 


--------------------------------------------------------------------------------
/pkg/forkexec/sandbox_load_darwin.go:
--------------------------------------------------------------------------------
 1 | package forkexec
 2 | 
 3 | import (
 4 | 	"errors"
 5 | 	"os"
 6 | 	"syscall"
 7 | 	"unsafe"
 8 | )
 9 | 
10 | func goString(b *byte) string {
11 | 	l := 0
12 | 	sb := (*[1 << 20]byte)(unsafe.Pointer(b))
13 | 	for sb[l] > 0 {
14 | 		l++
15 | 	}
16 | 	return string(sb[: l-1 : l-1])
17 | }
18 | 
19 | // SandboxLoadProfile loads profile by sandbox_init
20 | func SandboxLoadProfile(profile string) (err error) {
21 | 	var errBuf *byte
22 | 	p, err := syscall.BytePtrFromString(profile)
23 | 	if err != nil {
24 | 		return
25 | 	}
26 | 	if err := SandboxInit(p, 0, &errBuf); err != nil {
27 | 		defer SandboxFreeError(errBuf)
28 | 		if errBuf != nil {
29 | 			s := goString(errBuf)
30 | 			return os.NewSyscallError("sandbox_init", errors.New(s))
31 | 		}
32 | 		return os.NewSyscallError("sandbox_init", err)
33 | 	}
34 | 	return
35 | }
36 | 


--------------------------------------------------------------------------------
/pkg/forkexec/syscall_darwin.go:
--------------------------------------------------------------------------------
 1 | package forkexec
 2 | 
 3 | import (
 4 | 	"syscall"
 5 | 	_ "unsafe" // use go:linkname
 6 | )
 7 | 
 8 | //go:linkname syscall3 syscall.syscall
 9 | func syscall3(fn, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno)
10 | 
11 | //go:linkname rawSyscall syscall.rawSyscall
12 | func rawSyscall(fn, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno)
13 | 
14 | //go:linkname rawSyscall6 syscall.rawSyscall6
15 | func rawSyscall6(fn, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err syscall.Errno)
16 | 
17 | var libc_fork_trampoline_addr uintptr
18 | 
19 | var libc_close_trampoline_addr uintptr
20 | 
21 | var libc_read_trampoline_addr uintptr
22 | 
23 | var libc_write_trampoline_addr uintptr
24 | 
25 | var libc_fcntl_trampoline_addr uintptr
26 | 
27 | var libc_dup2_trampoline_addr uintptr
28 | 
29 | var libc_chdir_trampoline_addr uintptr
30 | 
31 | var libc_setrlimit_trampoline_addr uintptr
32 | 
33 | var libc_execve_trampoline_addr uintptr
34 | 
35 | var libc_exit_trampoline_addr uintptr
36 | 
37 | var libc_setpgid_trampoline_addr uintptr
38 | 
39 | //go:linkname fcntl syscall.fcntl
40 | func fcntl(fd int, cmd int, arg int) (val int, err error)
41 | 


--------------------------------------------------------------------------------
/pkg/forkexec/test.sb:
--------------------------------------------------------------------------------
 1 | ; Test Sandbox Profile
 2 | ; No network / socket
 3 | ; No system / sysctl 
 4 | (version 1)
 5 | 
 6 | (deny default)
 7 | 
 8 | ; allow posix ipc
 9 | (allow ipc-posix*)
10 | 
11 | ; allow file access /
12 | (allow file-read* (subpath "/usr/lib"))
13 | 
14 | ; allow execve 
15 | (allow process-exec)
16 | 
17 | ; allow fork
18 | (allow process-fork)
19 | 
20 | ; allow signal to self
21 | (allow signal (target self))
22 | 


--------------------------------------------------------------------------------
/pkg/forkexec/userns_linux.go:
--------------------------------------------------------------------------------
 1 | package forkexec
 2 | 
 3 | import (
 4 | 	"strconv"
 5 | 	"syscall"
 6 | 
 7 | 	"golang.org/x/sys/unix"
 8 | )
 9 | 
10 | // writeUidGidMappings writes User ID and Group ID mappings for user namespaces
11 | // for a process and it is called from the parent process.
12 | func writeIDMaps(r *Runner, pid int) error {
13 | 	var uidMappings, gidMappings, setGroups []byte
14 | 	pidStr := strconv.Itoa(pid)
15 | 
16 | 	if r.UIDMappings == nil {
17 | 		uidMappings = []byte("0 " + strconv.Itoa(unix.Geteuid()) + " 1")
18 | 	} else {
19 | 		uidMappings = formatIDMappings(r.UIDMappings)
20 | 	}
21 | 	if err := writeFile("/proc/"+pidStr+"/uid_map", uidMappings); err != nil {
22 | 		return err
23 | 	}
24 | 
25 | 	if r.GIDMappings == nil || !r.GIDMappingsEnableSetgroups {
26 | 		setGroups = setGIDDeny
27 | 	} else {
28 | 		setGroups = setGIDAllow
29 | 	}
30 | 	if err := writeFile("/proc/"+pidStr+"/setgroups", setGroups); err != nil {
31 | 		return err
32 | 	}
33 | 
34 | 	if r.GIDMappings == nil {
35 | 		gidMappings = []byte("0 " + strconv.Itoa(unix.Getegid()) + " 1")
36 | 	} else {
37 | 		gidMappings = formatIDMappings(r.GIDMappings)
38 | 	}
39 | 	if err := writeFile("/proc/"+pidStr+"/gid_map", gidMappings); err != nil {
40 | 		return err
41 | 	}
42 | 	return nil
43 | }
44 | 
45 | func formatIDMappings(idMap []syscall.SysProcIDMap) []byte {
46 | 	var data []byte
47 | 	for _, im := range idMap {
48 | 		data = append(data, []byte(strconv.Itoa(im.ContainerID)+" "+strconv.Itoa(im.HostID)+" "+strconv.Itoa(im.Size)+"\n")...)
49 | 	}
50 | 	return data
51 | }
52 | 
53 | // writeFile writes file
54 | func writeFile(path string, content []byte) error {
55 | 	fd, err := unix.Open(path, unix.O_RDWR|unix.O_CLOEXEC, 0)
56 | 	if err != nil {
57 | 		return err
58 | 	}
59 | 	if _, err := unix.Write(fd, content); err != nil {
60 | 		unix.Close(fd)
61 | 		return err
62 | 	}
63 | 	if err := unix.Close(fd); err != nil {
64 | 		return err
65 | 	}
66 | 	return nil
67 | }
68 | 


--------------------------------------------------------------------------------
/pkg/forkexec/vfork/asm_linux_386.s:
--------------------------------------------------------------------------------
 1 | // Copyright 2009 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | #include "textflag.h"
 6 | 
 7 | // See ../runtime/sys_linux_386.s for the reason why we always use int 0x80
 8 | // instead of the glibc-specific "CALL 0x10(GS)".
 9 | #define INVOKE_SYSCALL	INT	$0x80
10 | 
11 | // func RawVforkSyscall(trap, a1, a2, a3 uintptr) (r1, err uintptr)
12 | TEXT ·RawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-24
13 | 	MOVL	trap+0(FP), AX	// syscall entry
14 | 	MOVL	a1+4(FP), BX
15 | 	MOVL	a2+8(FP), CX
16 | 	MOVL	a3+12(FP), DX
17 | 	POPL	SI // preserve return address
18 | 	INVOKE_SYSCALL
19 | 	PUSHL	SI
20 | 	CMPL	AX, $0xfffff001
21 | 	JLS	ok
22 | 	MOVL	$-1, r1+16(FP)
23 | 	NEGL	AX
24 | 	MOVL	AX, err+20(FP)
25 | 	RET
26 | ok:
27 | 	MOVL	AX, r1+16(FP)
28 | 	MOVL	$0, err+20(FP)
29 | 	RET
30 | 


--------------------------------------------------------------------------------
/pkg/forkexec/vfork/asm_linux_amd64.s:
--------------------------------------------------------------------------------
 1 | // Copyright 2009 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | #include "textflag.h"
 6 | 
 7 | // func RawVforkSyscall(trap, a1, a2, a3 uintptr) (r1, err uintptr)
 8 | TEXT ·RawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-48
 9 | 	MOVQ	a1+8(FP), DI
10 | 	MOVQ	a2+16(FP), SI
11 | 	MOVQ	a3+24(FP), DX
12 | 	MOVQ	$0, R10
13 | 	MOVQ	$0, R8
14 | 	MOVQ	$0, R9
15 | 	MOVQ	trap+0(FP), AX	// syscall entry
16 | 	POPQ	R12 // preserve return address
17 | 	SYSCALL
18 | 	PUSHQ	R12
19 | 	CMPQ	AX, $0xfffffffffffff001
20 | 	JLS	ok2
21 | 	MOVQ	$-1, r1+32(FP)
22 | 	NEGQ	AX
23 | 	MOVQ	AX, err+40(FP)
24 | 	RET
25 | ok2:
26 | 	MOVQ	AX, r1+32(FP)
27 | 	MOVQ	$0, err+40(FP)
28 | 	RET
29 | 


--------------------------------------------------------------------------------
/pkg/forkexec/vfork/asm_linux_arm.s:
--------------------------------------------------------------------------------
 1 | // Copyright 2009 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | #include "textflag.h"
 6 | 
 7 | // func RawVforkSyscall(trap, a1, a2, a3 uintptr) (r1, err uintptr)
 8 | TEXT ·RawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-24
 9 | 	MOVW	trap+0(FP), R7	// syscall entry
10 | 	MOVW	a1+4(FP), R0
11 | 	MOVW	a2+8(FP), R1
12 | 	MOVW	a3+12(FP), R2
13 | 	SWI	$0
14 | 	MOVW	$0xfffff001, R1
15 | 	CMP	R1, R0
16 | 	BLS	ok
17 | 	MOVW	$-1, R1
18 | 	MOVW	R1, r1+16(FP)
19 | 	RSB	$0, R0, R0
20 | 	MOVW	R0, err+20(FP)
21 | 	RET
22 | ok:
23 | 	MOVW	R0, r1+16(FP)
24 | 	MOVW	$0, R0
25 | 	MOVW	R0, err+20(FP)
26 | 	RET
27 | 


--------------------------------------------------------------------------------
/pkg/forkexec/vfork/asm_linux_arm64.s:
--------------------------------------------------------------------------------
 1 | // Copyright 2009 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | #include "textflag.h"
 6 | 
 7 | // func RawVforkSyscall(trap, a1, a2, a3 uintptr) (r1, err uintptr)
 8 | TEXT ·RawVforkSyscall(SB),NOSPLIT,$0-48
 9 | 	MOVD	a1+8(FP), R0
10 | 	MOVD	a2+16(FP), R1
11 | 	MOVD	a3+24(FP), R2
12 | 	MOVD	$0, R3
13 | 	MOVD	$0, R4
14 | 	MOVD	$0, R5
15 | 	MOVD	trap+0(FP), R8	// syscall entry
16 | 	SVC
17 | 	CMN	$4095, R0
18 | 	BCC	ok
19 | 	MOVD	$-1, R4
20 | 	MOVD	R4, r1+32(FP)	// r1
21 | 	NEG	R0, R0
22 | 	MOVD	R0, err+40(FP)	// errno
23 | 	RET
24 | ok:
25 | 	MOVD	R0, r1+32(FP)	// r1
26 | 	MOVD	ZR, err+40(FP)	// errno
27 | 	RET
28 | 


--------------------------------------------------------------------------------
/pkg/forkexec/vfork/asm_linux_loong64.s:
--------------------------------------------------------------------------------
 1 | // Copyright 2009 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | #include "textflag.h"
 6 | 
 7 | // func RawVforkSyscall(trap, a1, a2, a3 uintptr) (r1, err uintptr)
 8 | TEXT ·RawVforkSyscall(SB),NOSPLIT,$0-48
 9 | 	MOVV	a1+8(FP), R4
10 | 	MOVV	a2+16(FP), R5
11 | 	MOVV	a3+24(FP), R6
12 | 	MOVV	$0, R7
13 | 	MOVV	$0, R8
14 | 	MOVV	$0, R9
15 | 	MOVV	trap+0(FP), R11	// syscall entry
16 | 	SYSCALL
17 | 	MOVW	$-4096, R12
18 | 	BGEU	R12, R4, ok
19 | 	MOVV	$-1, R12
20 | 	MOVV	R12, r1+32(FP)	// r1
21 | 	SUBVU	R4, R0, R4
22 | 	MOVV	R4, err+40(FP)	// errno
23 | 	RET
24 | ok:
25 | 	MOVV	R4, r1+32(FP)	// r1
26 | 	MOVV	R0, err+40(FP)	// errno
27 | 	RET
28 | 


--------------------------------------------------------------------------------
/pkg/forkexec/vfork/asm_linux_mips64x.s:
--------------------------------------------------------------------------------
 1 | // Copyright 2009 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | //go:build linux && (mips64 || mips64le)
 6 | 
 7 | #include "textflag.h"
 8 | 
 9 | // func RawVforkSyscall(trap, a1, a2, a3 uintptr) (r1, err uintptr)
10 | TEXT ·RawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-48
11 | 	MOVV	a1+8(FP), R4
12 | 	MOVV	a2+16(FP), R5
13 | 	MOVV	a3+24(FP), R6
14 | 	MOVV	R0, R7
15 | 	MOVV	R0, R8
16 | 	MOVV	R0, R9
17 | 	MOVV	trap+0(FP), R2	// syscall entry
18 | 	SYSCALL
19 | 	BEQ	R7, ok
20 | 	MOVV	$-1, R1
21 | 	MOVV	R1, r1+32(FP)	// r1
22 | 	MOVV	R2, err+40(FP)	// errno
23 | 	RET
24 | ok:
25 | 	MOVV	R2, r1+32(FP)	// r1
26 | 	MOVV	R0, err+40(FP)	// errno
27 | 	RET
28 | 


--------------------------------------------------------------------------------
/pkg/forkexec/vfork/asm_linux_mipsx.s:
--------------------------------------------------------------------------------
 1 | // Copyright 2009 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | //go:build linux && (mips || mipsle)
 6 | 
 7 | #include "textflag.h"
 8 | 
 9 | // func RawVforkSyscall(trap, a1, a2, a3 uintptr) (r1, err uintptr)
10 | TEXT ·RawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-24
11 | 	MOVW	a1+4(FP), R4
12 | 	MOVW	a2+8(FP), R5
13 | 	MOVW	a3+12(FP), R6
14 | 	MOVW	trap+0(FP), R2	// syscall entry
15 | 	SYSCALL
16 | 	BEQ	R7, ok
17 | 	MOVW	$-1, R1
18 | 	MOVW	R1, r1+16(FP)	// r1
19 | 	MOVW	R2, err+20(FP)	// errno
20 | 	RET
21 | ok:
22 | 	MOVW	R2, r1+16(FP)	// r1
23 | 	MOVW	R0, err+20(FP)	// errno
24 | 	RET
25 | 


--------------------------------------------------------------------------------
/pkg/forkexec/vfork/asm_linux_ppc64x.s:
--------------------------------------------------------------------------------
 1 | // Copyright 2009 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | //go:build linux && (ppc64 || ppc64le)
 6 | 
 7 | #include "textflag.h"
 8 | 
 9 | // func RawVforkSyscall(trap, a1, a2, a3 uintptr) (r1, err uintptr)
10 | TEXT ·RawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-48
11 | 	MOVD	a1+8(FP), R3
12 | 	MOVD	a2+16(FP), R4
13 | 	MOVD	a3+24(FP), R5
14 | 	MOVD	R0, R6
15 | 	MOVD	R0, R7
16 | 	MOVD	R0, R8
17 | 	MOVD	trap+0(FP), R9	// syscall entry
18 | 	SYSCALL R9
19 | 	BVC	ok
20 | 	MOVD	$-1, R4
21 | 	MOVD	R4, r1+32(FP)	// r1
22 | 	MOVD	R3, err+40(FP)	// errno
23 | 	RET
24 | ok:
25 | 	MOVD	R3, r1+32(FP)	// r1
26 | 	MOVD	R0, err+40(FP)	// errno
27 | 	RET
28 | 


--------------------------------------------------------------------------------
/pkg/forkexec/vfork/asm_linux_riscv64.s:
--------------------------------------------------------------------------------
 1 | // Copyright 2009 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | #include "textflag.h"
 6 | 
 7 | // func RawVforkSyscall(trap, a1, a2, a3 uintptr) (r1, err uintptr)
 8 | TEXT ·RawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-48
 9 | 	MOV	a1+8(FP), A0
10 | 	MOV	a2+16(FP), A1
11 | 	MOV	a3+24(FP), A2
12 | 	MOV	ZERO, A3
13 | 	MOV	ZERO, A4
14 | 	MOV	ZERO, A5
15 | 	MOV	trap+0(FP), A7	// syscall entry
16 | 	ECALL
17 | 	MOV	$-4096, T0
18 | 	BLTU	T0, A0, err
19 | 	MOV	A0, r1+32(FP)	// r1
20 | 	MOV	ZERO, err+40(FP)	// errno
21 | 	RET
22 | err:
23 | 	MOV	$-1, T0
24 | 	MOV	T0, r1+32(FP)	// r1
25 | 	SUB	A0, ZERO, A0
26 | 	MOV	A0, err+40(FP)	// errno
27 | 	RET
28 | 


--------------------------------------------------------------------------------
/pkg/forkexec/vfork/asm_linux_s390x.s:
--------------------------------------------------------------------------------
 1 | // Copyright 2009 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | #include "textflag.h"
 6 | 
 7 | // func RawVforkSyscall(trap, a1, a2, a3 uintptr) (r1, err uintptr)
 8 | TEXT ·RawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-48
 9 | 	MOVD	a1+8(FP), R2
10 | 	MOVD	a2+16(FP), R3
11 | 	MOVD	a3+24(FP), R4
12 | 	MOVD	$0, R5
13 | 	MOVD	$0, R6
14 | 	MOVD	$0, R7
15 | 	MOVD	trap+0(FP), R1	// syscall entry
16 | 	SYSCALL
17 | 	MOVD	$0xfffffffffffff001, R8
18 | 	CMPUBLT	R2, R8, ok2
19 | 	MOVD	$-1, r1+32(FP)
20 | 	NEG	R2, R2
21 | 	MOVD	R2, err+40(FP)	// errno
22 | 	RET
23 | ok2:
24 | 	MOVD	R2, r1+32(FP)
25 | 	MOVD	$0, err+40(FP)	// errno
26 | 	RET
27 | 


--------------------------------------------------------------------------------
/pkg/forkexec/vfork/syscall.go:
--------------------------------------------------------------------------------
 1 | // Package vfork provides the mirror of the un-exported syscall.rawVforkSyscall.
 2 | // The assembly code is copied from go1.24 syscall package
 3 | package vfork
 4 | 
 5 | import "syscall"
 6 | 
 7 | // RawVforkSyscall provided the mirrored version from un-exported syscall.rawVforkSyscall
 8 | // The go:linkname does not work for assembly function and it was suggested by the go team
 9 | // to copy over the assembly functions
10 | //
11 | // See go.dev/issue/71892
12 | func RawVforkSyscall(trap, a1, a2, a3 uintptr) (r1 uintptr, err syscall.Errno)
13 | 


--------------------------------------------------------------------------------
/pkg/forkexec/zsyscall_darwin.go:
--------------------------------------------------------------------------------
 1 | package forkexec
 2 | 
 3 | import (
 4 | 	"syscall"
 5 | 	"unsafe"
 6 | )
 7 | 
 8 | // SandboxInit calls sandbox_init
 9 | func SandboxInit(profile *byte, flags uint64, errorBuf **byte) (err error) {
10 | 	var r1 uintptr
11 | 	r1, _, err = syscall3(libc_sandbox_init_trampoline_addr, uintptr(unsafe.Pointer(profile)), uintptr(flags), uintptr(unsafe.Pointer(errorBuf)))
12 | 	if r1 != 0 {
13 | 		err = syscall.EINVAL
14 | 	} else {
15 | 		err = nil
16 | 	}
17 | 	return
18 | }
19 | 
20 | // SandboxFreeError calls sandbox_free_error
21 | func SandboxFreeError(errorBuf *byte) {
22 | 	syscall3(libc_sandbox_free_error_trampoline_addr, uintptr(unsafe.Pointer(errorBuf)), 0, 0)
23 | }
24 | 
25 | var libc_sandbox_init_trampoline_addr uintptr
26 | 
27 | //go:cgo_import_dynamic libc_sandbox_init sandbox_init "/usr/lib/libSystem.B.dylib"
28 | 
29 | var libc_sandbox_free_error_trampoline_addr uintptr
30 | 
31 | //go:cgo_import_dynamic libc_sandbox_free_error sandbox_free_error "/usr/lib/libSystem.B.dylib"
32 | 


--------------------------------------------------------------------------------
/pkg/forkexec/zsyscall_darwin.s:
--------------------------------------------------------------------------------
 1 | #include "textflag.h"
 2 | 
 3 | TEXT libc_sandbox_init_trampoline<>(SB),NOSPLIT,$0-0
 4 | 	JMP	libc_sandbox_init(SB)
 5 | 
 6 | GLOBL	·libc_sandbox_init_trampoline_addr(SB), RODATA, $8
 7 | DATA	·libc_sandbox_init_trampoline_addr(SB)/8, $libc_sandbox_init_trampoline<>(SB)
 8 | 
 9 | TEXT libc_sandbox_free_error_trampoline<>(SB),NOSPLIT,$0-0
10 | 	JMP	libc_sandbox_free_error(SB)
11 | 
12 | GLOBL	·libc_sandbox_free_error_trampoline_addr(SB), RODATA, $8
13 | DATA	·libc_sandbox_free_error_trampoline_addr(SB)/8, $libc_sandbox_free_error_trampoline<>(SB)
14 | 
15 | TEXT libc_fork_trampoline<>(SB),NOSPLIT,$0-0
16 | 	JMP	libc_fork(SB)
17 | 
18 | GLOBL	·libc_fork_trampoline_addr(SB), RODATA, $8
19 | DATA	·libc_fork_trampoline_addr(SB)/8, $libc_fork_trampoline<>(SB)
20 | 
21 | TEXT libc_close_trampoline<>(SB),NOSPLIT,$0-0
22 | 	JMP	libc_close(SB)
23 | 
24 | GLOBL	·libc_close_trampoline_addr(SB), RODATA, $8
25 | DATA	·libc_close_trampoline_addr(SB)/8, $libc_close_trampoline<>(SB)
26 | 
27 | TEXT libc_read_trampoline<>(SB),NOSPLIT,$0-0
28 | 	JMP	libc_read(SB)
29 | 
30 | GLOBL	·libc_read_trampoline_addr(SB), RODATA, $8
31 | DATA	·libc_read_trampoline_addr(SB)/8, $libc_read_trampoline<>(SB)
32 | 
33 | TEXT libc_write_trampoline<>(SB),NOSPLIT,$0-0
34 | 	JMP	libc_write(SB)
35 | 
36 | GLOBL	·libc_write_trampoline_addr(SB), RODATA, $8
37 | DATA	·libc_write_trampoline_addr(SB)/8, $libc_write_trampoline<>(SB)
38 | 
39 | TEXT libc_fcntl_trampoline<>(SB),NOSPLIT,$0-0
40 | 	JMP	libc_fcntl(SB)
41 | 
42 | GLOBL	·libc_fcntl_trampoline_addr(SB), RODATA, $8
43 | DATA	·libc_fcntl_trampoline_addr(SB)/8, $libc_fcntl_trampoline<>(SB)
44 | 
45 | TEXT libc_dup2_trampoline<>(SB),NOSPLIT,$0-0
46 | 	JMP	libc_dup2(SB)
47 | 
48 | GLOBL	·libc_dup2_trampoline_addr(SB), RODATA, $8
49 | DATA	·libc_dup2_trampoline_addr(SB)/8, $libc_dup2_trampoline<>(SB)
50 | 
51 | TEXT libc_chdir_trampoline<>(SB),NOSPLIT,$0-0
52 | 	JMP	libc_chdir(SB)
53 | 
54 | GLOBL	·libc_chdir_trampoline_addr(SB), RODATA, $8
55 | DATA	·libc_chdir_trampoline_addr(SB)/8, $libc_chdir_trampoline<>(SB)
56 | 
57 | TEXT libc_setrlimit_trampoline<>(SB),NOSPLIT,$0-0
58 | 	JMP	libc_setrlimit(SB)
59 | 
60 | GLOBL	·libc_setrlimit_trampoline_addr(SB), RODATA, $8
61 | DATA	·libc_setrlimit_trampoline_addr(SB)/8, $libc_setrlimit_trampoline<>(SB)
62 | 
63 | TEXT libc_execve_trampoline<>(SB),NOSPLIT,$0-0
64 | 	JMP	libc_execve(SB)
65 | 
66 | GLOBL	·libc_execve_trampoline_addr(SB), RODATA, $8
67 | DATA	·libc_execve_trampoline_addr(SB)/8, $libc_execve_trampoline<>(SB)
68 | 
69 | TEXT libc_exit_trampoline<>(SB),NOSPLIT,$0-0
70 | 	JMP	libc_exit(SB)
71 | 
72 | GLOBL	·libc_exit_trampoline_addr(SB), RODATA, $8
73 | DATA	·libc_exit_trampoline_addr(SB)/8, $libc_exit_trampoline<>(SB)
74 | 
75 | TEXT libc_setpgid_trampoline<>(SB),NOSPLIT,$0-0
76 | 	JMP	libc_setpgid(SB)
77 | 
78 | GLOBL	·libc_setpgid_trampoline_addr(SB), RODATA, $8
79 | DATA	·libc_setpgid_trampoline_addr(SB)/8, $libc_setpgid_trampoline<>(SB)
80 | 


--------------------------------------------------------------------------------
/pkg/memfd/doc.go:
--------------------------------------------------------------------------------
1 | // Package memfd provides interface to Linux memfd to create and seal a memory file.
2 | // Requires kernel >= 3.17
3 | package memfd
4 | 


--------------------------------------------------------------------------------
/pkg/memfd/memfd_linux.go:
--------------------------------------------------------------------------------
 1 | package memfd
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"io"
 6 | 	"os"
 7 | 
 8 | 	"golang.org/x/sys/unix"
 9 | )
10 | 
11 | const createFlag = unix.MFD_CLOEXEC | unix.MFD_ALLOW_SEALING
12 | const roSeal = unix.F_SEAL_SEAL | unix.F_SEAL_SHRINK | unix.F_SEAL_GROW | unix.F_SEAL_WRITE
13 | 
14 | // New creates a new memfd, caller need to close the file
15 | func New(name string) (*os.File, error) {
16 | 	fd, err := unix.MemfdCreate(name, createFlag)
17 | 	if err != nil {
18 | 		return nil, fmt.Errorf("memfd: memfd_create: %w", err)
19 | 	}
20 | 	file := os.NewFile(uintptr(fd), name)
21 | 	if file == nil {
22 | 		unix.Close(fd)
23 | 		return nil, fmt.Errorf("memfd: new file failed for %q", name)
24 | 	}
25 | 	return file, nil
26 | }
27 | 
28 | // DupToMemfd reads content from reader to sealed (readonly) memfd for given name
29 | func DupToMemfd(name string, reader io.Reader) (*os.File, error) {
30 | 	file, err := New(name)
31 | 	if err != nil {
32 | 		return nil, fmt.Errorf("memfd: dup: %w", err)
33 | 	}
34 | 	// linux syscall sendfile might be more efficient here if reader is a file
35 | 	if _, err = file.ReadFrom(reader); err != nil {
36 | 		file.Close()
37 | 		return nil, fmt.Errorf("memfd: read from: %w", err)
38 | 	}
39 | 	// make memfd readonly
40 | 	if _, err = unix.FcntlInt(file.Fd(), unix.F_ADD_SEALS, roSeal); err != nil {
41 | 		file.Close()
42 | 		return nil, fmt.Errorf("memfd: seal: %w", err)
43 | 	}
44 | 	if _, err := file.Seek(0, 0); err != nil {
45 | 		file.Close()
46 | 		return nil, fmt.Errorf("memfd: seek: %w", err)
47 | 	}
48 | 	return file, nil
49 | }
50 | 


--------------------------------------------------------------------------------
/pkg/memfd/memfd_linux_test.go:
--------------------------------------------------------------------------------
 1 | package memfd
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"io"
 6 | 	"os"
 7 | 	"testing"
 8 | )
 9 | 
10 | func TestNew(t *testing.T) {
11 | 	f, err := New("test-memfd")
12 | 	if err != nil {
13 | 		t.Fatalf("New() error: %v", err)
14 | 	}
15 | 	defer f.Close()
16 | 
17 | 	// Write and read to verify it's a valid file
18 | 	data := []byte("hello world")
19 | 	n, err := f.Write(data)
20 | 	if err != nil {
21 | 		t.Fatalf("Write error: %v", err)
22 | 	}
23 | 	if n != len(data) {
24 | 		t.Errorf("Write n = %d, want %d", n, len(data))
25 | 	}
26 | 	_, err = f.Seek(0, io.SeekStart)
27 | 	if err != nil {
28 | 		t.Fatalf("Seek error: %v", err)
29 | 	}
30 | 	read := make([]byte, len(data))
31 | 	n, err = f.Read(read)
32 | 	if err != nil && err != io.EOF {
33 | 		t.Fatalf("Read error: %v", err)
34 | 	}
35 | 	if string(read[:n]) != string(data) {
36 | 		t.Errorf("Read = %q, want %q", string(read[:n]), string(data))
37 | 	}
38 | }
39 | 
40 | func TestDupToMemfd(t *testing.T) {
41 | 	content := []byte("memfd content")
42 | 	r := bytes.NewReader(content)
43 | 	f, err := DupToMemfd("dup-memfd", r)
44 | 	if err != nil {
45 | 		t.Fatalf("DupToMemfd error: %v", err)
46 | 	}
47 | 	defer f.Close()
48 | 
49 | 	// Should be sealed (readonly), so writing should fail
50 | 	_, err = f.Write([]byte("fail"))
51 | 	if err == nil {
52 | 		t.Error("expected write to sealed memfd to fail, but it succeeded")
53 | 	}
54 | 
55 | 	// Should be able to read the content
56 | 	_, err = f.Seek(0, io.SeekStart)
57 | 	if err != nil {
58 | 		t.Fatalf("Seek error: %v", err)
59 | 	}
60 | 	got, err := io.ReadAll(f)
61 | 	if err != nil {
62 | 		t.Fatalf("ReadAll error: %v", err)
63 | 	}
64 | 	if string(got) != string(content) {
65 | 		t.Errorf("ReadAll = %q, want %q", string(got), string(content))
66 | 	}
67 | }
68 | 
69 | func TestDupToMemfd_ErrorPropagation(t *testing.T) {
70 | 	// Pass a reader that always errors
71 | 	r := errorReader{}
72 | 	_, err := DupToMemfd("dup-memfd-err", r)
73 | 	if err == nil {
74 | 		t.Error("expected error from DupToMemfd, got nil")
75 | 	}
76 | }
77 | 
78 | type errorReader struct{}
79 | 
80 | func (errorReader) Read([]byte) (int, error) { return 0, os.ErrInvalid }
81 | 


--------------------------------------------------------------------------------
/pkg/memfd/memfd_other.go:
--------------------------------------------------------------------------------
 1 | //go:build !linux
 2 | 
 3 | package memfd
 4 | 
 5 | import (
 6 | 	"fmt"
 7 | 	"io"
 8 | 	"os"
 9 | 	"runtime"
10 | )
11 | 
12 | var errNotImplemented = fmt.Errorf("memfd: unsupported on platform: %s", runtime.GOOS)
13 | 
14 | func New(name string) (*os.File, error) {
15 | 	return nil, errNotImplemented
16 | }
17 | 
18 | func DupToMemfd(name string, reader io.Reader) (*os.File, error) {
19 | 	return nil, errNotImplemented
20 | }
21 | 


--------------------------------------------------------------------------------
/pkg/mount/builder.go:
--------------------------------------------------------------------------------
 1 | package mount
 2 | 
 3 | // Builder builds fork_exec friendly mount syscall format
 4 | type Builder struct {
 5 | 	Mounts []Mount
 6 | }
 7 | 
 8 | // NewBuilder creates new mount builder instance
 9 | func NewBuilder() *Builder {
10 | 	return &Builder{}
11 | }
12 | 


--------------------------------------------------------------------------------
/pkg/mount/builder_linux.go:
--------------------------------------------------------------------------------
  1 | package mount
  2 | 
  3 | import (
  4 | 	"os"
  5 | 	"strings"
  6 | 
  7 | 	"golang.org/x/sys/unix"
  8 | )
  9 | 
 10 | const (
 11 | 	bind  = unix.MS_BIND | unix.MS_NOSUID | unix.MS_PRIVATE | unix.MS_REC
 12 | 	mFlag = unix.MS_NOSUID | unix.MS_NOATIME | unix.MS_NODEV
 13 | )
 14 | 
 15 | // NewDefaultBuilder creates default builder for minimal rootfs
 16 | func NewDefaultBuilder() *Builder {
 17 | 	return NewBuilder().
 18 | 		WithBind("/usr", "usr", true).
 19 | 		WithBind("/lib", "lib", true).
 20 | 		WithBind("/lib64", "lib64", true).
 21 | 		WithBind("/bin", "bin", true)
 22 | }
 23 | 
 24 | // Build creates sequence of syscalls for fork_exec
 25 | func (b *Builder) Build() ([]SyscallParams, error) {
 26 | 	var err error
 27 | 	ret := make([]SyscallParams, 0, len(b.Mounts))
 28 | 	for _, m := range b.Mounts {
 29 | 		var mknod bool
 30 | 		if mknod, err = isBindMountFileOrNotExists(m); err != nil {
 31 | 			return nil, err
 32 | 		}
 33 | 		sp, err := m.ToSyscall()
 34 | 		if err != nil {
 35 | 			return nil, err
 36 | 		}
 37 | 		sp.MakeNod = mknod
 38 | 		ret = append(ret, *sp)
 39 | 	}
 40 | 	return ret, nil
 41 | }
 42 | 
 43 | // FilterNotExist removes bind mount that does not exists
 44 | func (b *Builder) FilterNotExist() *Builder {
 45 | 	rt := b.Mounts[:0]
 46 | 	for _, m := range b.Mounts {
 47 | 		if m.IsBindMount() {
 48 | 			if _, err := os.Stat(m.Source); os.IsNotExist(err) {
 49 | 				continue
 50 | 			}
 51 | 		}
 52 | 		rt = append(rt, m)
 53 | 	}
 54 | 	b.Mounts = rt
 55 | 	return b
 56 | }
 57 | 
 58 | func isBindMountFileOrNotExists(m Mount) (bool, error) {
 59 | 	if m.IsBindMount() {
 60 | 		if fi, err := os.Stat(m.Source); os.IsNotExist(err) {
 61 | 			return false, err
 62 | 		} else if !fi.IsDir() {
 63 | 			return true, err
 64 | 		}
 65 | 	}
 66 | 	return false, nil
 67 | }
 68 | 
 69 | // WithMounts adds mounts to builder
 70 | func (b *Builder) WithMounts(m []Mount) *Builder {
 71 | 	b.Mounts = append(b.Mounts, m...)
 72 | 	return b
 73 | }
 74 | 
 75 | // WithMount adds single mount to builder
 76 | func (b *Builder) WithMount(m Mount) *Builder {
 77 | 	b.Mounts = append(b.Mounts, m)
 78 | 	return b
 79 | }
 80 | 
 81 | // WithBind adds a bind mount to builder
 82 | func (b *Builder) WithBind(source, target string, readonly bool) *Builder {
 83 | 	var flags uintptr = bind
 84 | 	if readonly {
 85 | 		flags |= unix.MS_RDONLY
 86 | 	}
 87 | 	b.Mounts = append(b.Mounts, Mount{
 88 | 		Source: source,
 89 | 		Target: target,
 90 | 		Flags:  flags,
 91 | 	})
 92 | 	return b
 93 | }
 94 | 
 95 | // WithTmpfs adds a tmpfs mount to builder
 96 | func (b *Builder) WithTmpfs(target, data string) *Builder {
 97 | 	b.Mounts = append(b.Mounts, Mount{
 98 | 		Source: "tmpfs",
 99 | 		Target: target,
100 | 		FsType: "tmpfs",
101 | 		Flags:  mFlag,
102 | 		Data:   data,
103 | 	})
104 | 	return b
105 | }
106 | 
107 | // WithProc adds proc file system mounted read-only
108 | func (b *Builder) WithProc() *Builder {
109 | 	return b.WithProcRW(false)
110 | }
111 | 
112 | // WithProcRW adds proc file system, possibly read-write
113 | func (b *Builder) WithProcRW(canWrite bool) *Builder {
114 | 	var flags uintptr = unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC
115 | 	if !canWrite {
116 | 		flags |= unix.MS_RDONLY
117 | 	}
118 | 	b.Mounts = append(b.Mounts, Mount{
119 | 		Source: "proc",
120 | 		Target: "proc",
121 | 		FsType: "proc",
122 | 		Flags:  flags,
123 | 	})
124 | 	return b
125 | }
126 | 
127 | func (b Builder) String() string {
128 | 	var sb strings.Builder
129 | 	sb.WriteString("Mounts: ")
130 | 	for i, m := range b.Mounts {
131 | 		sb.WriteString(m.String())
132 | 		if i != len(b.Mounts)-1 {
133 | 			sb.WriteString(", ")
134 | 		}
135 | 	}
136 | 	return sb.String()
137 | }
138 | 


--------------------------------------------------------------------------------
/pkg/mount/builder_linux_test.go:
--------------------------------------------------------------------------------
  1 | package mount
  2 | 
  3 | import (
  4 | 	"os"
  5 | 	"strings"
  6 | 	"testing"
  7 | )
  8 | 
  9 | func TestBuilder_WithBind(t *testing.T) {
 10 | 	b := NewBuilder().WithBind("/src", "/dst", true)
 11 | 	if len(b.Mounts) != 1 {
 12 | 		t.Fatalf("expected 1 mount, got %d", len(b.Mounts))
 13 | 	}
 14 | 	m := b.Mounts[0]
 15 | 	if m.Source != "/src" || m.Target != "/dst" {
 16 | 		t.Errorf("unexpected mount: %+v", m)
 17 | 	}
 18 | 	if !m.IsBindMount() {
 19 | 		t.Errorf("expected bind mount")
 20 | 	}
 21 | 	if !m.IsReadOnly() {
 22 | 		t.Errorf("expected readonly mount")
 23 | 	}
 24 | }
 25 | 
 26 | func TestBuilder_WithTmpfs(t *testing.T) {
 27 | 	b := NewBuilder().WithTmpfs("/tmp", "size=64m")
 28 | 	if len(b.Mounts) != 1 {
 29 | 		t.Fatalf("expected 1 mount, got %d", len(b.Mounts))
 30 | 	}
 31 | 	m := b.Mounts[0]
 32 | 	if !m.IsTmpFs() {
 33 | 		t.Errorf("expected tmpfs mount")
 34 | 	}
 35 | 	if m.Target != "/tmp" || m.Data != "size=64m" {
 36 | 		t.Errorf("unexpected mount: %+v", m)
 37 | 	}
 38 | }
 39 | 
 40 | func TestBuilder_WithProc(t *testing.T) {
 41 | 	b := NewBuilder().WithProc()
 42 | 	if len(b.Mounts) != 1 {
 43 | 		t.Fatalf("expected 1 mount, got %d", len(b.Mounts))
 44 | 	}
 45 | 	m := b.Mounts[0]
 46 | 	if m.FsType != "proc" {
 47 | 		t.Errorf("expected proc fsType")
 48 | 	}
 49 | 	if !m.IsReadOnly() {
 50 | 		t.Errorf("expected readonly proc mount")
 51 | 	}
 52 | }
 53 | 
 54 | func TestBuilder_WithProcRW(t *testing.T) {
 55 | 	b := NewBuilder().WithProcRW(true)
 56 | 	if len(b.Mounts) != 1 {
 57 | 		t.Fatalf("expected 1 mount, got %d", len(b.Mounts))
 58 | 	}
 59 | 	m := b.Mounts[0]
 60 | 	if m.FsType != "proc" {
 61 | 		t.Errorf("expected proc fsType")
 62 | 	}
 63 | 	if m.IsReadOnly() {
 64 | 		t.Errorf("expected read-write proc mount")
 65 | 	}
 66 | }
 67 | 
 68 | func TestBuilder_WithMounts(t *testing.T) {
 69 | 	m1 := Mount{Source: "/a", Target: "/b"}
 70 | 	m2 := Mount{Source: "/c", Target: "/d"}
 71 | 	b := NewBuilder().WithMounts([]Mount{m1, m2})
 72 | 	if len(b.Mounts) != 2 {
 73 | 		t.Fatalf("expected 2 mounts, got %d", len(b.Mounts))
 74 | 	}
 75 | }
 76 | 
 77 | func TestBuilder_WithMount(t *testing.T) {
 78 | 	m := Mount{Source: "/a", Target: "/b"}
 79 | 	b := NewBuilder().WithMount(m)
 80 | 	if len(b.Mounts) != 1 {
 81 | 		t.Fatalf("expected 1 mount, got %d", len(b.Mounts))
 82 | 	}
 83 | }
 84 | 
 85 | func TestBuilder_String(t *testing.T) {
 86 | 	b := NewBuilder().
 87 | 		WithBind("/src", "/dst", false).
 88 | 		WithTmpfs("/tmp", "size=1m").
 89 | 		WithProc()
 90 | 	s := b.String()
 91 | 	if !strings.HasPrefix(s, "Mounts: ") {
 92 | 		t.Errorf("unexpected prefix: %q", s)
 93 | 	}
 94 | 	if !strings.Contains(s, "bind[/src:/dst:rw]") {
 95 | 		t.Errorf("missing bind: %q", s)
 96 | 	}
 97 | 	if !strings.Contains(s, "tmpfs[/tmp]") {
 98 | 		t.Errorf("missing tmpfs: %q", s)
 99 | 	}
100 | 	if !strings.Contains(s, "proc[ro]") {
101 | 		t.Errorf("missing proc: %q", s)
102 | 	}
103 | }
104 | 
105 | func TestBuilder_FilterNotExist(t *testing.T) {
106 | 	tmpDir := t.TempDir()
107 | 	tmpFilePath := tmpDir + "/mounttest"
108 | 	f, err := os.Create(tmpFilePath)
109 | 	if err != nil {
110 | 		t.Fatal(err)
111 | 	}
112 | 	f.Close()
113 | 
114 | 	b := NewBuilder().
115 | 		WithBind(f.Name(), "/dst1", false).
116 | 		WithBind("/not/exist", "/dst2", false)
117 | 	b.FilterNotExist()
118 | 	if len(b.Mounts) != 1 {
119 | 		t.Errorf("expected 1 mount after filter, got %d", len(b.Mounts))
120 | 	}
121 | 	if b.Mounts[0].Source != f.Name() {
122 | 		t.Errorf("unexpected mount: %+v", b.Mounts[0])
123 | 	}
124 | }
125 | 


--------------------------------------------------------------------------------
/pkg/mount/doc.go:
--------------------------------------------------------------------------------
1 | // Package mount provides general data structure for mount and mount namespace (multiple mounts) definition.
2 | package mount
3 | 


--------------------------------------------------------------------------------
/pkg/mount/mount.go:
--------------------------------------------------------------------------------
 1 | package mount
 2 | 
 3 | import (
 4 | 	"syscall"
 5 | )
 6 | 
 7 | // Mount defines syscall for mount points
 8 | type Mount struct {
 9 | 	Source, Target, FsType, Data string
10 | 	Flags                        uintptr
11 | }
12 | 
13 | // SyscallParams defines the raw syscall arguments to mount
14 | type SyscallParams struct {
15 | 	Source, Target, FsType, Data *byte
16 | 	Flags                        uintptr
17 | 	Prefixes                     []*byte
18 | 	MakeNod                      bool
19 | }
20 | 
21 | // ToSyscall convert Mount to SyscallPrams
22 | func (m *Mount) ToSyscall() (*SyscallParams, error) {
23 | 	var data *byte
24 | 	source, err := syscall.BytePtrFromString(m.Source)
25 | 	if err != nil {
26 | 		return nil, err
27 | 	}
28 | 	target, err := syscall.BytePtrFromString(m.Target)
29 | 	if err != nil {
30 | 		return nil, err
31 | 	}
32 | 	fsType, err := syscall.BytePtrFromString(m.FsType)
33 | 	if err != nil {
34 | 		return nil, err
35 | 	}
36 | 	if m.Data != "" {
37 | 		data, err = syscall.BytePtrFromString(m.Data)
38 | 		if err != nil {
39 | 			return nil, err
40 | 		}
41 | 	}
42 | 	prefix := pathPrefix(m.Target)
43 | 	paths, err := arrayPtrFromStrings(prefix)
44 | 	if err != nil {
45 | 		return nil, err
46 | 	}
47 | 	return &SyscallParams{
48 | 		Source:   source,
49 | 		Target:   target,
50 | 		FsType:   fsType,
51 | 		Flags:    m.Flags,
52 | 		Data:     data,
53 | 		Prefixes: paths,
54 | 	}, nil
55 | }
56 | 
57 | // pathPrefix get all components from path
58 | func pathPrefix(path string) []string {
59 | 	ret := make([]string, 0)
60 | 	for i := 1; i < len(path); i++ {
61 | 		if path[i] == '/' {
62 | 			ret = append(ret, path[:i])
63 | 		}
64 | 	}
65 | 	ret = append(ret, path)
66 | 	return ret
67 | }
68 | 
69 | // arrayPtrFromStrings converts strings to c style strings
70 | func arrayPtrFromStrings(str []string) ([]*byte, error) {
71 | 	bytes := make([]*byte, 0, len(str))
72 | 	for _, s := range str {
73 | 		b, err := syscall.BytePtrFromString(s)
74 | 		if err != nil {
75 | 			return nil, err
76 | 		}
77 | 		bytes = append(bytes, b)
78 | 	}
79 | 	return bytes, nil
80 | }
81 | 


--------------------------------------------------------------------------------
/pkg/mount/mount_linux.go:
--------------------------------------------------------------------------------
 1 | package mount
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 	"path/filepath"
 7 | 	"syscall"
 8 | )
 9 | 
10 | // Mount calls mount syscall
11 | func (m *Mount) Mount() error {
12 | 	if err := ensureMountTargetExists(m.Source, m.Target); err != nil {
13 | 		return fmt.Errorf("mkdir: %w", err)
14 | 	}
15 | 	if err := syscall.Mount(m.Source, m.Target, m.FsType, m.Flags, m.Data); err != nil {
16 | 		return fmt.Errorf("mount: %w", err)
17 | 	}
18 | 	// Read-only bind mount need to be remounted
19 | 	const bindRo = syscall.MS_BIND | syscall.MS_RDONLY
20 | 	if m.Flags&bindRo == bindRo {
21 | 		if err := syscall.Mount("", m.Target, m.FsType, m.Flags|syscall.MS_REMOUNT, m.Data); err != nil {
22 | 			return fmt.Errorf("remount: %w", err)
23 | 		}
24 | 	}
25 | 	return nil
26 | }
27 | 
28 | // IsBindMount returns if it is a bind mount
29 | func (m Mount) IsBindMount() bool {
30 | 	return m.Flags&syscall.MS_BIND == syscall.MS_BIND
31 | }
32 | 
33 | // IsReadOnly returns if it is a readonly mount
34 | func (m Mount) IsReadOnly() bool {
35 | 	return m.Flags&syscall.MS_RDONLY == syscall.MS_RDONLY
36 | }
37 | 
38 | // IsTmpFs returns if the fsType is tmpfs
39 | func (m Mount) IsTmpFs() bool {
40 | 	return m.FsType == "tmpfs"
41 | }
42 | 
43 | func ensureMountTargetExists(source, target string) error {
44 | 	isFile := false
45 | 	if fi, err := os.Stat(source); err == nil {
46 | 		isFile = !fi.IsDir()
47 | 	}
48 | 	dir := target
49 | 	if isFile {
50 | 		dir = filepath.Dir(target)
51 | 	}
52 | 	if err := os.MkdirAll(dir, 0755); err != nil {
53 | 		return err
54 | 	}
55 | 	if isFile {
56 | 		if err := syscall.Mknod(target, 0755, 0); err != nil {
57 | 			// double check if file exists
58 | 			f, err1 := os.Lstat(target)
59 | 			if err1 == nil && f.Mode().IsRegular() {
60 | 				return nil
61 | 			}
62 | 			return err
63 | 		}
64 | 	}
65 | 	return nil
66 | }
67 | 
68 | func (m Mount) String() string {
69 | 	flag := "rw"
70 | 	if m.Flags&syscall.MS_RDONLY == syscall.MS_RDONLY {
71 | 		flag = "ro"
72 | 	}
73 | 	switch {
74 | 	case m.Flags&syscall.MS_BIND == syscall.MS_BIND:
75 | 		return fmt.Sprintf("bind[%s:%s:%s]", m.Source, m.Target, flag)
76 | 
77 | 	case m.FsType == "tmpfs":
78 | 		return fmt.Sprintf("tmpfs[%s]", m.Target)
79 | 
80 | 	case m.FsType == "proc":
81 | 		return fmt.Sprintf("proc[%s]", flag)
82 | 
83 | 	default:
84 | 		return fmt.Sprintf("mount[%s,%s:%s:%x,%s]", m.FsType, m.Source, m.Target, m.Flags, m.Data)
85 | 	}
86 | }
87 | 


--------------------------------------------------------------------------------
/pkg/mount/mount_linux_test.go:
--------------------------------------------------------------------------------
  1 | package mount
  2 | 
  3 | import (
  4 | 	"os"
  5 | 	"path/filepath"
  6 | 	"syscall"
  7 | 	"testing"
  8 | )
  9 | 
 10 | func TestMount_IsBindMount(t *testing.T) {
 11 | 	m := Mount{Flags: syscall.MS_BIND}
 12 | 	if !m.IsBindMount() {
 13 | 		t.Errorf("expected IsBindMount true")
 14 | 	}
 15 | 	m.Flags = 0
 16 | 	if m.IsBindMount() {
 17 | 		t.Errorf("expected IsBindMount false")
 18 | 	}
 19 | }
 20 | 
 21 | func TestMount_IsReadOnly(t *testing.T) {
 22 | 	m := Mount{Flags: syscall.MS_RDONLY}
 23 | 	if !m.IsReadOnly() {
 24 | 		t.Errorf("expected IsReadOnly true")
 25 | 	}
 26 | 	m.Flags = 0
 27 | 	if m.IsReadOnly() {
 28 | 		t.Errorf("expected IsReadOnly false")
 29 | 	}
 30 | }
 31 | 
 32 | func TestMount_IsTmpFs(t *testing.T) {
 33 | 	m := Mount{FsType: "tmpfs"}
 34 | 	if !m.IsTmpFs() {
 35 | 		t.Errorf("expected IsTmpFs true")
 36 | 	}
 37 | 	m.FsType = "other"
 38 | 	if m.IsTmpFs() {
 39 | 		t.Errorf("expected IsTmpFs false")
 40 | 	}
 41 | }
 42 | 
 43 | func TestMount_String(t *testing.T) {
 44 | 	tests := []struct {
 45 | 		m    Mount
 46 | 		want string
 47 | 	}{
 48 | 		{
 49 | 			m:    Mount{Source: "/src", Target: "/dst", Flags: syscall.MS_BIND, FsType: "", Data: ""},
 50 | 			want: "bind[/src:/dst:rw]",
 51 | 		},
 52 | 		{
 53 | 			m:    Mount{Source: "/src", Target: "/dst", Flags: syscall.MS_BIND | syscall.MS_RDONLY, FsType: "", Data: ""},
 54 | 			want: "bind[/src:/dst:ro]",
 55 | 		},
 56 | 		{
 57 | 			m:    Mount{Source: "", Target: "/tmp", FsType: "tmpfs"},
 58 | 			want: "tmpfs[/tmp]",
 59 | 		},
 60 | 		{
 61 | 			m:    Mount{Source: "", Target: "proc", FsType: "proc", Flags: syscall.MS_RDONLY},
 62 | 			want: "proc[ro]",
 63 | 		},
 64 | 		{
 65 | 			m:    Mount{Source: "src", Target: "dst", FsType: "other", Flags: 0, Data: "data"},
 66 | 			want: "mount[other,src:dst:0,data]",
 67 | 		},
 68 | 	}
 69 | 	for _, tt := range tests {
 70 | 		got := tt.m.String()
 71 | 		if got != tt.want {
 72 | 			t.Errorf("Mount.String() = %q, want %q", got, tt.want)
 73 | 		}
 74 | 	}
 75 | }
 76 | 
 77 | func TestEnsureMountTargetExists_Dir(t *testing.T) {
 78 | 	tmpDir := t.TempDir()
 79 | 	target := filepath.Join(tmpDir, "foo/bar")
 80 | 	err := ensureMountTargetExists(tmpDir, target)
 81 | 	if err != nil {
 82 | 		t.Fatalf("ensureMountTargetExists error: %v", err)
 83 | 	}
 84 | 	info, err := os.Stat(target)
 85 | 	if err != nil {
 86 | 		t.Fatalf("stat error: %v", err)
 87 | 	}
 88 | 	if !info.IsDir() {
 89 | 		t.Errorf("expected directory at %s", target)
 90 | 	}
 91 | }
 92 | 
 93 | func TestEnsureMountTargetExists_File(t *testing.T) {
 94 | 	tmpDir := t.TempDir()
 95 | 	srcFile := filepath.Join(tmpDir, "srcfile")
 96 | 	if err := os.WriteFile(srcFile, []byte("x"), 0644); err != nil {
 97 | 		t.Fatalf("write srcfile: %v", err)
 98 | 	}
 99 | 	target := filepath.Join(tmpDir, "targetfile")
100 | 	err := ensureMountTargetExists(srcFile, target)
101 | 	if err != nil {
102 | 		t.Fatalf("ensureMountTargetExists error: %v", err)
103 | 	}
104 | 	// Should be a file or at least exist
105 | 	info, err := os.Lstat(target)
106 | 	if err != nil {
107 | 		t.Fatalf("lstat error: %v", err)
108 | 	}
109 | 	if info.IsDir() {
110 | 		t.Errorf("expected file at %s, got directory", target)
111 | 	}
112 | }
113 | 


--------------------------------------------------------------------------------
/pkg/pipe/buffer.go:
--------------------------------------------------------------------------------
 1 | // Package pipe provides a wrapper to create a pipe and
 2 | // collect at most max bytes from the reader side
 3 | package pipe
 4 | 
 5 | import (
 6 | 	"bytes"
 7 | 	"fmt"
 8 | 	"io"
 9 | 	"os"
10 | )
11 | 
12 | // Buffer is used to create a writable pipe and read
13 | // at most max bytes to a buffer
14 | type Buffer struct {
15 | 	W      *os.File
16 | 	Buffer *bytes.Buffer
17 | 	Done   <-chan struct{}
18 | 	Max    int64
19 | }
20 | 
21 | // NewPipe create a pipe with a goroutine to copy its read-end to writer
22 | // returns the write end and signal for finish
23 | // caller need to close w
24 | func NewPipe(writer io.Writer, n int64) (<-chan struct{}, *os.File, error) {
25 | 	r, w, err := os.Pipe()
26 | 	if err != nil {
27 | 		return nil, nil, err
28 | 	}
29 | 	done := make(chan struct{})
30 | 	go func() {
31 | 		io.CopyN(writer, r, int64(n))
32 | 		close(done)
33 | 		// ensure no blocking / SIGPIPE on the other end
34 | 		io.Copy(io.Discard, r)
35 | 		r.Close()
36 | 	}()
37 | 	return done, w, nil
38 | }
39 | 
40 | // NewBuffer creates a os pipe, caller need to
41 | // caller need to close w
42 | // Notice: if rely on done for finish, w need be closed in parent process
43 | func NewBuffer(max int64) (*Buffer, error) {
44 | 	buffer := new(bytes.Buffer)
45 | 	done, w, err := NewPipe(buffer, max+1)
46 | 	if err != nil {
47 | 		return nil, err
48 | 	}
49 | 	return &Buffer{
50 | 		W:      w,
51 | 		Max:    max,
52 | 		Buffer: buffer,
53 | 		Done:   done,
54 | 	}, nil
55 | }
56 | 
57 | func (b Buffer) String() string {
58 | 	return fmt.Sprintf("Buffer[%d/%d]", b.Buffer.Len(), b.Max)
59 | }
60 | 


--------------------------------------------------------------------------------
/pkg/pipe/buffer_test.go:
--------------------------------------------------------------------------------
  1 | package pipe
  2 | 
  3 | import (
  4 | 	"io"
  5 | 	"strings"
  6 | 	"testing"
  7 | 	"time"
  8 | )
  9 | 
 10 | func TestNewBuffer_WriteAndRead(t *testing.T) {
 11 | 	const max = 10
 12 | 	buf, err := NewBuffer(max)
 13 | 	if err != nil {
 14 | 		t.Fatalf("NewBuffer error: %v", err)
 15 | 	}
 16 | 	defer buf.W.Close()
 17 | 
 18 | 	// Write less than max bytes
 19 | 	input := "hello"
 20 | 	n, err := buf.W.Write([]byte(input))
 21 | 	if err != nil {
 22 | 		t.Fatalf("Write error: %v", err)
 23 | 	}
 24 | 	if n != len(input) {
 25 | 		t.Errorf("Write bytes = %d, want %d", n, len(input))
 26 | 	}
 27 | 	buf.W.Close()
 28 | 	<-buf.Done
 29 | 
 30 | 	got := buf.Buffer.String()
 31 | 	if got != input {
 32 | 		t.Errorf("Buffer content = %q, want %q", got, input)
 33 | 	}
 34 | }
 35 | 
 36 | func TestNewBuffer_MaxBytes(t *testing.T) {
 37 | 	const max = 5
 38 | 	buf, err := NewBuffer(max)
 39 | 	if err != nil {
 40 | 		t.Fatalf("NewBuffer error: %v", err)
 41 | 	}
 42 | 	defer buf.W.Close()
 43 | 
 44 | 	// Write more than max bytes
 45 | 	input := "toolonginput"
 46 | 	_, err = io.Copy(buf.W, strings.NewReader(input))
 47 | 	if err != nil {
 48 | 		t.Fatalf("Copy error: %v", err)
 49 | 	}
 50 | 	buf.W.Close()
 51 | 	<-buf.Done
 52 | 
 53 | 	got := buf.Buffer.String()
 54 | 	if len(got) != int(max+1) {
 55 | 		t.Errorf("Buffer length = %d, want %d", len(got), max+1)
 56 | 	}
 57 | 	if got != input[:max+1] {
 58 | 		t.Errorf("Buffer content = %q, want %q", got, input[:max+1])
 59 | 	}
 60 | }
 61 | 
 62 | func TestBuffer_String(t *testing.T) {
 63 | 	const max = 8
 64 | 	buf, err := NewBuffer(max)
 65 | 	if err != nil {
 66 | 		t.Fatalf("NewBuffer error: %v", err)
 67 | 	}
 68 | 	defer buf.W.Close()
 69 | 
 70 | 	_, _ = buf.W.Write([]byte("abc"))
 71 | 	buf.W.Close()
 72 | 	<-buf.Done
 73 | 
 74 | 	want := "Buffer[3/8]"
 75 | 	if buf.String() != want {
 76 | 		t.Errorf("String() = %q, want %q", buf.String(), want)
 77 | 	}
 78 | }
 79 | 
 80 | func TestNewBuffer_DoneCloses(t *testing.T) {
 81 | 	const max = 4
 82 | 	buf, err := NewBuffer(max)
 83 | 	if err != nil {
 84 | 		t.Fatalf("NewBuffer error: %v", err)
 85 | 	}
 86 | 	defer buf.W.Close()
 87 | 
 88 | 	done := make(chan struct{})
 89 | 	go func() {
 90 | 		_, _ = buf.W.Write([]byte("test"))
 91 | 		buf.W.Close()
 92 | 		close(done)
 93 | 	}()
 94 | 
 95 | 	select {
 96 | 	case <-buf.Done:
 97 | 		// ok
 98 | 	case <-time.After(1 * time.Second):
 99 | 		t.Fatal("timeout waiting for Done channel")
100 | 	}
101 | }
102 | 


--------------------------------------------------------------------------------
/pkg/rlimit/rlimit.go:
--------------------------------------------------------------------------------
  1 | // Package rlimit provides data structure for resource limits by setrlimit syscall on linux.
  2 | package rlimit
  3 | 
  4 | import (
  5 | 	"fmt"
  6 | 	"strings"
  7 | 	"syscall"
  8 | 
  9 | 	"github.com/criyle/go-sandbox/runner"
 10 | )
 11 | 
 12 | // RLimits defines the rlimit applied by setrlimit syscall to traced process
 13 | type RLimits struct {
 14 | 	CPU          uint64 // in s
 15 | 	CPUHard      uint64 // in s
 16 | 	Data         uint64 // in bytes
 17 | 	FileSize     uint64 // in bytes
 18 | 	Stack        uint64 // in bytes
 19 | 	AddressSpace uint64 // in bytes
 20 | 	OpenFile     uint64 // count
 21 | 	DisableCore  bool   // set core to 0
 22 | }
 23 | 
 24 | // RLimit is the resource limits defined by Linux setrlimit
 25 | type RLimit struct {
 26 | 	// Res is the resource type (e.g. syscall.RLIMIT_CPU)
 27 | 	Res int
 28 | 	// Rlim is the limit applied to that resource
 29 | 	Rlim syscall.Rlimit
 30 | }
 31 | 
 32 | func getRlimit(cur, max uint64) syscall.Rlimit {
 33 | 	return syscall.Rlimit{Cur: cur, Max: max}
 34 | }
 35 | 
 36 | // PrepareRLimit creates rlimit structures for tracee
 37 | // TimeLimit in s, SizeLimit in byte
 38 | func (r *RLimits) PrepareRLimit() []RLimit {
 39 | 	var ret []RLimit
 40 | 	if r.CPU > 0 {
 41 | 		cpuHard := r.CPUHard
 42 | 		if cpuHard < r.CPU {
 43 | 			cpuHard = r.CPU
 44 | 		}
 45 | 
 46 | 		ret = append(ret, RLimit{
 47 | 			Res:  syscall.RLIMIT_CPU,
 48 | 			Rlim: getRlimit(r.CPU, cpuHard),
 49 | 		})
 50 | 	}
 51 | 	if r.Data > 0 {
 52 | 		ret = append(ret, RLimit{
 53 | 			Res:  syscall.RLIMIT_DATA,
 54 | 			Rlim: getRlimit(r.Data, r.Data),
 55 | 		})
 56 | 	}
 57 | 	if r.FileSize > 0 {
 58 | 		ret = append(ret, RLimit{
 59 | 			Res:  syscall.RLIMIT_FSIZE,
 60 | 			Rlim: getRlimit(r.FileSize, r.FileSize),
 61 | 		})
 62 | 	}
 63 | 	if r.Stack > 0 {
 64 | 		ret = append(ret, RLimit{
 65 | 			Res:  syscall.RLIMIT_STACK,
 66 | 			Rlim: getRlimit(r.Stack, r.Stack),
 67 | 		})
 68 | 	}
 69 | 	if r.AddressSpace > 0 {
 70 | 		ret = append(ret, RLimit{
 71 | 			Res:  syscall.RLIMIT_AS,
 72 | 			Rlim: getRlimit(r.AddressSpace, r.AddressSpace),
 73 | 		})
 74 | 	}
 75 | 	if r.OpenFile > 0 {
 76 | 		ret = append(ret, RLimit{
 77 | 			Res:  syscall.RLIMIT_NOFILE,
 78 | 			Rlim: getRlimit(r.OpenFile, r.OpenFile),
 79 | 		})
 80 | 	}
 81 | 	if r.DisableCore {
 82 | 		ret = append(ret, RLimit{
 83 | 			Res:  syscall.RLIMIT_CORE,
 84 | 			Rlim: getRlimit(0, 0),
 85 | 		})
 86 | 	}
 87 | 	return ret
 88 | }
 89 | 
 90 | func (r RLimit) String() string {
 91 | 	t := ""
 92 | 	switch r.Res {
 93 | 	case syscall.RLIMIT_CPU:
 94 | 		return fmt.Sprintf("CPU[%d s:%d s]", r.Rlim.Cur, r.Rlim.Max)
 95 | 	case syscall.RLIMIT_NOFILE:
 96 | 		return fmt.Sprintf("OpenFile[%d:%d]", r.Rlim.Cur, r.Rlim.Max)
 97 | 	case syscall.RLIMIT_DATA:
 98 | 		t = "Data"
 99 | 	case syscall.RLIMIT_FSIZE:
100 | 		t = "File"
101 | 	case syscall.RLIMIT_STACK:
102 | 		t = "Stack"
103 | 	case syscall.RLIMIT_AS:
104 | 		t = "AddressSpace"
105 | 	case syscall.RLIMIT_CORE:
106 | 		t = "Core"
107 | 	}
108 | 	return fmt.Sprintf("%s[%v:%v]", t, runner.Size(r.Rlim.Cur), runner.Size(r.Rlim.Max))
109 | }
110 | 
111 | func (r RLimits) String() string {
112 | 	var sb strings.Builder
113 | 	sb.WriteString("RLimits[")
114 | 	for i, rl := range r.PrepareRLimit() {
115 | 		if i > 0 {
116 | 			sb.WriteByte(',')
117 | 		}
118 | 		sb.WriteString(rl.String())
119 | 	}
120 | 	sb.WriteString("]")
121 | 	return sb.String()
122 | }
123 | 


--------------------------------------------------------------------------------
/pkg/rlimit/rlimit_test.go:
--------------------------------------------------------------------------------
  1 | //go:build linux
  2 | 
  3 | package rlimit
  4 | 
  5 | import (
  6 | 	"syscall"
  7 | 	"testing"
  8 | )
  9 | 
 10 | func TestPrepareRLimit(t *testing.T) {
 11 | 	tests := []struct {
 12 | 		name   string
 13 | 		rl     RLimits
 14 | 		expect []int
 15 | 	}{
 16 | 		{
 17 | 			name:   "Empty",
 18 | 			rl:     RLimits{},
 19 | 			expect: []int{},
 20 | 		},
 21 | 		{
 22 | 			name:   "CPU only",
 23 | 			rl:     RLimits{CPU: 1},
 24 | 			expect: []int{syscall.RLIMIT_CPU},
 25 | 		},
 26 | 		{
 27 | 			name:   "Data only",
 28 | 			rl:     RLimits{Data: 1024},
 29 | 			expect: []int{syscall.RLIMIT_DATA},
 30 | 		},
 31 | 		{
 32 | 			name:   "All fields",
 33 | 			rl:     RLimits{CPU: 1, CPUHard: 2, Data: 1024, FileSize: 2048, Stack: 4096, AddressSpace: 8192, OpenFile: 16, DisableCore: true},
 34 | 			expect: []int{syscall.RLIMIT_CPU, syscall.RLIMIT_DATA, syscall.RLIMIT_FSIZE, syscall.RLIMIT_STACK, syscall.RLIMIT_AS, syscall.RLIMIT_NOFILE, syscall.RLIMIT_CORE},
 35 | 		},
 36 | 		{
 37 | 			name:   "DisableCore only",
 38 | 			rl:     RLimits{DisableCore: true},
 39 | 			expect: []int{syscall.RLIMIT_CORE},
 40 | 		},
 41 | 	}
 42 | 
 43 | 	for _, tt := range tests {
 44 | 		t.Run(tt.name, func(t *testing.T) {
 45 | 			rls := tt.rl.PrepareRLimit()
 46 | 			if len(rls) != len(tt.expect) {
 47 | 				t.Fatalf("expected %d rlimits, got %d", len(tt.expect), len(rls))
 48 | 			}
 49 | 			for i, r := range rls {
 50 | 				if r.Res != tt.expect[i] {
 51 | 					t.Errorf("expected Res %d at %d, got %d", tt.expect[i], i, r.Res)
 52 | 				}
 53 | 			}
 54 | 		})
 55 | 	}
 56 | }
 57 | 
 58 | func TestRLimitString(t *testing.T) {
 59 | 	tests := []struct {
 60 | 		name string
 61 | 		rl   RLimit
 62 | 		want string
 63 | 	}{
 64 | 		{
 65 | 			name: "CPU",
 66 | 			rl:   RLimit{Res: syscall.RLIMIT_CPU, Rlim: syscall.Rlimit{Cur: 1, Max: 2}},
 67 | 			want: "CPU[1 s:2 s]",
 68 | 		},
 69 | 		{
 70 | 			name: "NOFILE",
 71 | 			rl:   RLimit{Res: syscall.RLIMIT_NOFILE, Rlim: syscall.Rlimit{Cur: 10, Max: 20}},
 72 | 			want: "OpenFile[10:20]",
 73 | 		},
 74 | 		{
 75 | 			name: "DATA",
 76 | 			rl:   RLimit{Res: syscall.RLIMIT_DATA, Rlim: syscall.Rlimit{Cur: 1024, Max: 2048}},
 77 | 			want: "Data[1.0 KiB:2.0 KiB]",
 78 | 		},
 79 | 		{
 80 | 			name: "FSIZE",
 81 | 			rl:   RLimit{Res: syscall.RLIMIT_FSIZE, Rlim: syscall.Rlimit{Cur: 100, Max: 200}},
 82 | 			want: "File[100 B:200 B]",
 83 | 		},
 84 | 		{
 85 | 			name: "STACK",
 86 | 			rl:   RLimit{Res: syscall.RLIMIT_STACK, Rlim: syscall.Rlimit{Cur: 4096, Max: 8192}},
 87 | 			want: "Stack[4.0 KiB:8.0 KiB]",
 88 | 		},
 89 | 		{
 90 | 			name: "AS",
 91 | 			rl:   RLimit{Res: syscall.RLIMIT_AS, Rlim: syscall.Rlimit{Cur: 123, Max: 456}},
 92 | 			want: "AddressSpace[123 B:456 B]",
 93 | 		},
 94 | 		{
 95 | 			name: "CORE",
 96 | 			rl:   RLimit{Res: syscall.RLIMIT_CORE, Rlim: syscall.Rlimit{Cur: 0, Max: 0}},
 97 | 			want: "Core[0 B:0 B]",
 98 | 		},
 99 | 	}
100 | 
101 | 	for _, tt := range tests {
102 | 		t.Run(tt.name, func(t *testing.T) {
103 | 			got := tt.rl.String()
104 | 			if got != tt.want {
105 | 				t.Errorf("got %q, want %q", got, tt.want)
106 | 			}
107 | 		})
108 | 	}
109 | }
110 | 
111 | func TestRLimitsString(t *testing.T) {
112 | 	rl := RLimits{
113 | 		CPU:          1,
114 | 		CPUHard:      2,
115 | 		Data:         1024,
116 | 		FileSize:     2048,
117 | 		Stack:        4096,
118 | 		AddressSpace: 8192,
119 | 		OpenFile:     16,
120 | 		DisableCore:  true,
121 | 	}
122 | 	want := "RLimits[CPU[1 s:2 s],Data[1.0 KiB:1.0 KiB],File[2.0 KiB:2.0 KiB],Stack[4.0 KiB:4.0 KiB],AddressSpace[8.0 KiB:8.0 KiB],OpenFile[16:16],Core[0 B:0 B]]"
123 | 	got := rl.String()
124 | 	if got != want {
125 | 		t.Errorf("got %q, want %q", got, want)
126 | 	}
127 | }
128 | 
129 | func TestRLimitsString_Empty(t *testing.T) {
130 | 	rl := RLimits{}
131 | 	want := "RLimits[]"
132 | 	got := rl.String()
133 | 	if got != want {
134 | 		t.Errorf("got %q, want %q", got, want)
135 | 	}
136 | }
137 | 


--------------------------------------------------------------------------------
/pkg/seccomp/filter_linux.go:
--------------------------------------------------------------------------------
 1 | // Package seccomp provides a generated filter format for seccomp filter
 2 | package seccomp
 3 | 
 4 | import "syscall"
 5 | 
 6 | // Filter is the BPF seccomp filter value
 7 | type Filter []syscall.SockFilter
 8 | 
 9 | // SockFprog converts Filter to SockFprog for seccomp syscall
10 | func (f Filter) SockFprog() *syscall.SockFprog {
11 | 	b := []syscall.SockFilter(f)
12 | 	return &syscall.SockFprog{
13 | 		Len:    uint16(len(b)),
14 | 		Filter: &b[0],
15 | 	}
16 | }
17 | 


--------------------------------------------------------------------------------
/pkg/seccomp/libseccomp/action.go:
--------------------------------------------------------------------------------
 1 | package libseccomp
 2 | 
 3 | // Action is seccomp trap action
 4 | type Action uint32
 5 | 
 6 | // Action defines seccomp action to the syscall
 7 | // default value 0 is invalid
 8 | const (
 9 | 	ActionAllow Action = iota + 1
10 | 	ActionErrno
11 | 	ActionTrace
12 | 	ActionKill
13 | )
14 | 
15 | // MsgDisallow, Msghandle defines the action needed when trapped by
16 | // seccomp filter
17 | const (
18 | 	MsgDisallow int16 = iota + 1
19 | 	MsgHandle
20 | )
21 | 
22 | // Action get the basic action
23 | func (a Action) Action() Action {
24 | 	return Action(a & 0xffff)
25 | }
26 | 


--------------------------------------------------------------------------------
/pkg/seccomp/libseccomp/action_linux.go:
--------------------------------------------------------------------------------
 1 | package libseccomp
 2 | 
 3 | import (
 4 | 	libseccomp "github.com/elastic/go-seccomp-bpf"
 5 | )
 6 | 
 7 | // ToSeccompAction convert action to libseccomp compatible action
 8 | func ToSeccompAction(a Action) libseccomp.Action {
 9 | 	var action libseccomp.Action
10 | 	switch a.Action() {
11 | 	case ActionAllow:
12 | 		action = libseccomp.ActionAllow
13 | 	case ActionErrno:
14 | 		action = libseccomp.ActionErrno
15 | 	case ActionTrace:
16 | 		action = libseccomp.ActionTrace
17 | 	default:
18 | 		action = libseccomp.ActionKillProcess
19 | 	}
20 | 	// the least 16 bit of ret value is SECCOMP_RET_DATA
21 | 	// although it might not officially supported by go-seccomp-bpf
22 | 	// action = action.WithReturnData(int(a.ReturnCode()))
23 | 	return action
24 | }
25 | 


--------------------------------------------------------------------------------
/pkg/seccomp/libseccomp/builder_linux.go:
--------------------------------------------------------------------------------
 1 | package libseccomp
 2 | 
 3 | import (
 4 | 	"syscall"
 5 | 
 6 | 	"github.com/criyle/go-sandbox/pkg/seccomp"
 7 | 	libseccomp "github.com/elastic/go-seccomp-bpf"
 8 | 	"golang.org/x/net/bpf"
 9 | )
10 | 
11 | // Builder is used to build the filter
12 | type Builder struct {
13 | 	Allow, Trace []string
14 | 	Default      Action
15 | }
16 | 
17 | var actTrace = libseccomp.ActionTrace
18 | 
19 | // Build builds the filter
20 | func (b *Builder) Build() (seccomp.Filter, error) {
21 | 	policy := libseccomp.Policy{
22 | 		DefaultAction: ToSeccompAction(b.Default),
23 | 		Syscalls: []libseccomp.SyscallGroup{
24 | 			{
25 | 				Action: libseccomp.ActionAllow,
26 | 				Names:  b.Allow,
27 | 			},
28 | 			{
29 | 				Action: actTrace,
30 | 				Names:  b.Trace,
31 | 			},
32 | 		},
33 | 	}
34 | 	program, err := policy.Assemble()
35 | 	if err != nil {
36 | 		return nil, err
37 | 	}
38 | 	return ExportBPF(program)
39 | }
40 | 
41 | // ExportBPF convert libseccomp filter to kernel readable BPF content
42 | func ExportBPF(filter []bpf.Instruction) (seccomp.Filter, error) {
43 | 	raw, err := bpf.Assemble(filter)
44 | 	if err != nil {
45 | 		return nil, err
46 | 	}
47 | 	return sockFilter(raw), nil
48 | }
49 | 
50 | func sockFilter(raw []bpf.RawInstruction) []syscall.SockFilter {
51 | 	filter := make([]syscall.SockFilter, 0, len(raw))
52 | 	for _, instruction := range raw {
53 | 		filter = append(filter, syscall.SockFilter{
54 | 			Code: instruction.Op,
55 | 			Jt:   instruction.Jt,
56 | 			Jf:   instruction.Jf,
57 | 			K:    instruction.K,
58 | 		})
59 | 	}
60 | 	return filter
61 | }
62 | 


--------------------------------------------------------------------------------
/pkg/seccomp/libseccomp/doc.go:
--------------------------------------------------------------------------------
1 | // Package libseccomp provides a wrapper for "github.com/seccomp/libseccomp-golang"
2 | package libseccomp
3 | 


--------------------------------------------------------------------------------
/pkg/seccomp/libseccomp/seccomp_linux_test.go:
--------------------------------------------------------------------------------
 1 | package libseccomp
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/criyle/go-sandbox/pkg/seccomp"
 7 | )
 8 | 
 9 | var (
10 | 	defaultSyscallAllows = []string{
11 | 		"read", "write", "readv", "writev", "close", "fstat", "lseek", "dup", "dup2", "dup3", "ioctl", "fcntl", "fadvise64",
12 | 		"mmap", "mprotect", "munmap", "brk", "mremap", "msync", "mincore", "madvise",
13 | 		"rt_sigaction", "rt_sigprocmask", "rt_sigreturn", "rt_sigpending", "sigaltstack",
14 | 		"getcwd", "exit", "exit_group", "arch_prctl",
15 | 		"gettimeofday", "getrlimit", "getrusage", "times", "time", "clock_gettime", "restart_syscall",
16 | 	}
17 | 
18 | 	defaultSyscallTraces = []string{
19 | 		"execve", "open", "openat", "unlink", "unlinkat", "readlink", "readlinkat", "lstat", "stat", "access", "faccessat",
20 | 	}
21 | )
22 | 
23 | func TestBuildFilter(t *testing.T) {
24 | 	_, err := buildFilterMock()
25 | 	if err != nil {
26 | 		t.Error("BuildFilter failed")
27 | 	}
28 | }
29 | 
30 | // BenchmarkBuildDefaultFilter is about 0.2ms/op
31 | func BenchmarkBuildDefaultFilter(b *testing.B) {
32 | 	for i := 0; i < b.N; i++ {
33 | 		builder := Builder{
34 | 			Allow:   defaultSyscallAllows,
35 | 			Trace:   defaultSyscallTraces,
36 | 			Default: ActionTrace,
37 | 		}
38 | 		builder.Build()
39 | 	}
40 | }
41 | 
42 | func buildFilterMock() (seccomp.Filter, error) {
43 | 	b := Builder{
44 | 		Allow:   []string{"fork"},
45 | 		Trace:   []string{"execve"},
46 | 		Default: ActionTrace,
47 | 	}
48 | 	return b.Build()
49 | }
50 | 


--------------------------------------------------------------------------------
/pkg/seccomp/libseccomp/syscall_name_linux.go:
--------------------------------------------------------------------------------
 1 | package libseccomp
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 
 6 | 	"github.com/elastic/go-seccomp-bpf/arch"
 7 | )
 8 | 
 9 | var info, errInfo = arch.GetInfo("")
10 | 
11 | // ToSyscallName convert syscallno to syscall name
12 | func ToSyscallName(sysno uint) (string, error) {
13 | 	if errInfo != nil {
14 | 		return "", errInfo
15 | 	}
16 | 	n, ok := info.SyscallNumbers[int(sysno)]
17 | 	if !ok {
18 | 		return "", fmt.Errorf("syscall number does not exist: %d", sysno)
19 | 	}
20 | 	return n, nil
21 | }
22 | 


--------------------------------------------------------------------------------
/pkg/unixsocket/benchmark_linux_test.go:
--------------------------------------------------------------------------------
 1 | package unixsocket
 2 | 
 3 | import "testing"
 4 | 
 5 | func BenchmarkBaseline(b *testing.B) {
 6 | 	s, t, err := NewSocketPair()
 7 | 	if err != nil {
 8 | 		b.Fatal(err)
 9 | 	}
10 | 	m := make([]byte, 1024)
11 | 	b.ResetTimer()
12 | 	go func() {
13 | 		msg := []byte("message")
14 | 		for i := 0; i < b.N; i++ {
15 | 			s.SendMsg(msg, Msg{})
16 | 		}
17 | 	}()
18 | 
19 | 	for i := 0; i < b.N; i++ {
20 | 		t.RecvMsg(m)
21 | 	}
22 | }
23 | 
24 | func BenchmarkGoroutine(b *testing.B) {
25 | 	s, t, err := NewSocketPair()
26 | 	if err != nil {
27 | 		b.Fatal(err)
28 | 	}
29 | 	m := make([]byte, 1024)
30 | 	b.ResetTimer()
31 | 	go func() {
32 | 		msg := []byte("message")
33 | 		for i := 0; i < b.N; i++ {
34 | 			s.SendMsg(msg, Msg{})
35 | 		}
36 | 	}()
37 | 
38 | 	for i := 0; i < b.N; i++ {
39 | 		c := make(chan struct{})
40 | 		go func() {
41 | 			defer close(c)
42 | 			t.RecvMsg(m)
43 | 		}()
44 | 		<-c
45 | 	}
46 | }
47 | 
48 | func BenchmarkChannel(b *testing.B) {
49 | 	c := make(chan []byte)
50 | 	benchGoroutine(b, c)
51 | }
52 | 
53 | func BenchmarkChannelBuffed(b *testing.B) {
54 | 	c := make(chan []byte, 1)
55 | 	benchGoroutine(b, c)
56 | }
57 | 
58 | func BenchmarkChannelBuffed4(b *testing.B) {
59 | 	c := make(chan []byte, 4)
60 | 	benchGoroutine(b, c)
61 | }
62 | 
63 | func BenchmarkEmptyGoroutine(b *testing.B) {
64 | 	for i := 0; i < b.N; i++ {
65 | 		c := make(chan struct{})
66 | 		go func() {
67 | 			close(c)
68 | 		}()
69 | 		<-c
70 | 	}
71 | }
72 | 
73 | func benchGoroutine(b *testing.B, c chan []byte) {
74 | 	s, t, err := NewSocketPair()
75 | 	if err != nil {
76 | 		b.Fatal(err)
77 | 	}
78 | 
79 | 	go func() {
80 | 		msg := []byte("message")
81 | 		for i := 0; i < b.N; i++ {
82 | 			s.SendMsg(msg, Msg{})
83 | 		}
84 | 	}()
85 | 
86 | 	b.ResetTimer()
87 | 	go func() {
88 | 		m := make([]byte, 1024)
89 | 		for i := 0; i < b.N; i++ {
90 | 			t.RecvMsg(m)
91 | 			c <- m
92 | 		}
93 | 	}()
94 | 
95 | 	for i := 0; i < b.N; i++ {
96 | 		<-c
97 | 	}
98 | }
99 | 


--------------------------------------------------------------------------------
/pkg/unixsocket/socket_linux.go:
--------------------------------------------------------------------------------
  1 | // Package unixsocket provides wrapper for Linux unix socket to send and recv oob messages
  2 | // including fd and user credential.
  3 | package unixsocket
  4 | 
  5 | import (
  6 | 	"bytes"
  7 | 	"fmt"
  8 | 	"net"
  9 | 	"os"
 10 | 	"syscall"
 11 | )
 12 | 
 13 | // oob size default to page size
 14 | const oobSize = 4 << 10 // 4kb
 15 | 
 16 | // Socket wrappers a unix socket connection
 17 | type Socket struct {
 18 | 	*net.UnixConn
 19 | 	sendBuff []byte
 20 | 	recvBuff []byte
 21 | }
 22 | 
 23 | // Msg is the oob msg with the message
 24 | type Msg struct {
 25 | 	Fds  []int          // unix rights
 26 | 	Cred *syscall.Ucred // unix credential
 27 | }
 28 | 
 29 | func newSocket(conn *net.UnixConn) *Socket {
 30 | 	return &Socket{
 31 | 		UnixConn: conn,
 32 | 		sendBuff: make([]byte, oobSize),
 33 | 		recvBuff: make([]byte, oobSize),
 34 | 	}
 35 | }
 36 | 
 37 | // NewSocket creates Socket conn struct using existing unix socket fd
 38 | // creates by socketpair or net.DialUnix and mark it as close_on_exec (avoid fd leak)
 39 | // it need SOCK_SEQPACKET socket for reliable transfer
 40 | // it will need SO_PASSCRED to pass unix credential, Notice: in the documentation,
 41 | // if cred is not specified, self information will be sent
 42 | func NewSocket(fd int) (*Socket, error) {
 43 | 	syscall.SetNonblock(fd, true)
 44 | 	syscall.CloseOnExec(fd)
 45 | 
 46 | 	file := os.NewFile(uintptr(fd), "unix-socket")
 47 | 	if file == nil {
 48 | 		return nil, fmt.Errorf("new socket: %d is not a valid fd", fd)
 49 | 	}
 50 | 	defer file.Close()
 51 | 
 52 | 	conn, err := net.FileConn(file)
 53 | 	if err != nil {
 54 | 		return nil, fmt.Errorf("new socket: fileconn: %w", err)
 55 | 	}
 56 | 
 57 | 	unixConn, ok := conn.(*net.UnixConn)
 58 | 	if !ok {
 59 | 		conn.Close()
 60 | 		return nil, fmt.Errorf("new socket: %d is not a valid unix socket connection", fd)
 61 | 	}
 62 | 	return newSocket(unixConn), nil
 63 | }
 64 | 
 65 | // NewSocketPair creates connected unix socketpair using SOCK_SEQPACKET
 66 | func NewSocketPair() (*Socket, *Socket, error) {
 67 | 	fd, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0)
 68 | 	if err != nil {
 69 | 		return nil, nil, fmt.Errorf("new socket pair: socketpair: %w", err)
 70 | 	}
 71 | 
 72 | 	ins, err := NewSocket(fd[0])
 73 | 	if err != nil {
 74 | 		syscall.Close(fd[0])
 75 | 		syscall.Close(fd[1])
 76 | 		return nil, nil, fmt.Errorf("new socket pair: sender: %w", err)
 77 | 	}
 78 | 
 79 | 	outs, err := NewSocket(fd[1])
 80 | 	if err != nil {
 81 | 		ins.Close()
 82 | 		syscall.Close(fd[1])
 83 | 		return nil, nil, fmt.Errorf("new socket pair: receiver: %w", err)
 84 | 	}
 85 | 
 86 | 	return ins, outs, nil
 87 | }
 88 | 
 89 | // SetPassCred set sockopt for pass cred for unix socket
 90 | func (s *Socket) SetPassCred(option int) error {
 91 | 	sysconn, err := s.SyscallConn()
 92 | 	if err != nil {
 93 | 		return err
 94 | 	}
 95 | 	return sysconn.Control(func(fd uintptr) {
 96 | 		syscall.SetsockoptInt(int(fd), syscall.SOL_SOCKET, syscall.SO_PASSCRED, option)
 97 | 	})
 98 | }
 99 | 
100 | // SendMsg sendmsg to unix socket and encode possible unix right / credential
101 | func (s *Socket) SendMsg(b []byte, m Msg) error {
102 | 	oob := bytes.NewBuffer(s.sendBuff[:0])
103 | 	if len(m.Fds) > 0 {
104 | 		oob.Write(syscall.UnixRights(m.Fds...))
105 | 	}
106 | 	if m.Cred != nil {
107 | 		oob.Write(syscall.UnixCredentials(m.Cred))
108 | 	}
109 | 
110 | 	_, _, err := s.WriteMsgUnix(b, oob.Bytes(), nil)
111 | 	if err != nil {
112 | 		return err
113 | 	}
114 | 	return nil
115 | }
116 | 
117 | // RecvMsg recvmsg from unix socket and parse possible unix right / credential
118 | func (s *Socket) RecvMsg(b []byte) (int, Msg, error) {
119 | 	var msg Msg
120 | 	n, oobn, _, _, err := s.ReadMsgUnix(b, s.recvBuff)
121 | 	if err != nil {
122 | 		return 0, msg, err
123 | 	}
124 | 	// parse oob msg
125 | 	msgs, err := syscall.ParseSocketControlMessage(s.recvBuff[:oobn])
126 | 	if err != nil {
127 | 		return 0, msg, err
128 | 	}
129 | 	msg, err = parseMsg(msgs)
130 | 	if err != nil {
131 | 		return 0, msg, err
132 | 	}
133 | 	return n, msg, nil
134 | }
135 | 
136 | func parseMsg(msgs []syscall.SocketControlMessage) (msg Msg, err error) {
137 | 	defer func() {
138 | 		if err != nil {
139 | 			for _, f := range msg.Fds {
140 | 				syscall.Close(f)
141 | 			}
142 | 			msg.Fds = nil
143 | 		}
144 | 	}()
145 | 	for _, m := range msgs {
146 | 		if m.Header.Level != syscall.SOL_SOCKET {
147 | 			continue
148 | 		}
149 | 
150 | 		switch m.Header.Type {
151 | 		case syscall.SCM_CREDENTIALS:
152 | 			cred, err := syscall.ParseUnixCredentials(&m)
153 | 			if err != nil {
154 | 				return msg, err
155 | 			}
156 | 			msg.Cred = cred
157 | 
158 | 		case syscall.SCM_RIGHTS:
159 | 			fds, err := syscall.ParseUnixRights(&m)
160 | 			if err != nil {
161 | 				return msg, err
162 | 			}
163 | 			msg.Fds = fds
164 | 		}
165 | 	}
166 | 	return msg, nil
167 | }
168 | 


--------------------------------------------------------------------------------
/pkg/unixsocket/socket_linux_test.go:
--------------------------------------------------------------------------------
  1 | package unixsocket
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"os"
  6 | 	"syscall"
  7 | 	"testing"
  8 | )
  9 | 
 10 | func TestBaseline(t *testing.T) {
 11 | 	a, b, err := NewSocketPair()
 12 | 	if err != nil {
 13 | 		t.Fatal(err)
 14 | 	}
 15 | 	m := make([]byte, 1024)
 16 | 
 17 | 	go func() {
 18 | 		msg := []byte("message")
 19 | 		a.SendMsg(msg, Msg{})
 20 | 	}()
 21 | 
 22 | 	n, _, err := b.RecvMsg(m)
 23 | 	if err != nil {
 24 | 		t.Fatal(err)
 25 | 	}
 26 | 
 27 | 	if !bytes.Equal(m[:n], []byte("message")) {
 28 | 		t.Fatal("not equal")
 29 | 	}
 30 | }
 31 | 
 32 | func TestSendRecvMsg_Fds(t *testing.T) {
 33 | 	a, b, err := NewSocketPair()
 34 | 	if err != nil {
 35 | 		t.Fatal(err)
 36 | 	}
 37 | 	defer a.Close()
 38 | 	defer b.Close()
 39 | 
 40 | 	// Create a file to send its fd
 41 | 	tmpfile, err := os.CreateTemp("", "unixsocket-fd")
 42 | 	if err != nil {
 43 | 		t.Fatal(err)
 44 | 	}
 45 | 	defer os.Remove(tmpfile.Name())
 46 | 	defer tmpfile.Close()
 47 | 
 48 | 	msg := []byte("fdtest")
 49 | 	go func() {
 50 | 		a.SendMsg(msg, Msg{Fds: []int{int(tmpfile.Fd())}})
 51 | 	}()
 52 | 
 53 | 	buf := make([]byte, 64)
 54 | 	n, m, err := b.RecvMsg(buf)
 55 | 	if err != nil {
 56 | 		t.Fatal(err)
 57 | 	}
 58 | 	if !bytes.Equal(buf[:n], msg) {
 59 | 		t.Errorf("RecvMsg got %q, want %q", buf[:n], msg)
 60 | 	}
 61 | 	if len(m.Fds) != 1 {
 62 | 		t.Errorf("expected 1 fd, got %d", len(m.Fds))
 63 | 	}
 64 | 	if m.Fds != nil {
 65 | 		syscall.Close(m.Fds[0])
 66 | 	}
 67 | }
 68 | 
 69 | func TestSendRecvMsg_Cred(t *testing.T) {
 70 | 	if os.Geteuid() != 0 {
 71 | 		t.Skip("skipping credential test: requires root privileges")
 72 | 		return
 73 | 	}
 74 | 	a, b, err := NewSocketPair()
 75 | 	if err != nil {
 76 | 		t.Fatal(err)
 77 | 	}
 78 | 	defer a.Close()
 79 | 	defer b.Close()
 80 | 
 81 | 	// Enable credential passing
 82 | 	if err := a.SetPassCred(1); err != nil {
 83 | 		t.Fatal(err)
 84 | 	}
 85 | 	if err := b.SetPassCred(1); err != nil {
 86 | 		t.Fatal(err)
 87 | 	}
 88 | 
 89 | 	msg := []byte("credtest")
 90 | 	go func() {
 91 | 		a.SendMsg(msg, Msg{Cred: &syscall.Ucred{Pid: 123, Uid: 456, Gid: 789}})
 92 | 	}()
 93 | 
 94 | 	buf := make([]byte, 64)
 95 | 	n, m, err := b.RecvMsg(buf)
 96 | 	if err != nil {
 97 | 		t.Fatal(err)
 98 | 	}
 99 | 	if !bytes.Equal(buf[:n], msg) {
100 | 		t.Errorf("RecvMsg got %q, want %q", buf[:n], msg)
101 | 	}
102 | 	if m.Cred == nil {
103 | 		t.Error("expected credential, got nil")
104 | 	}
105 | }
106 | 
107 | func TestNewSocketPair_Close(t *testing.T) {
108 | 	a, b, err := NewSocketPair()
109 | 	if err != nil {
110 | 		t.Fatal(err)
111 | 	}
112 | 	if err := a.Close(); err != nil {
113 | 		t.Errorf("a.Close() error: %v", err)
114 | 	}
115 | 	if err := b.Close(); err != nil {
116 | 		t.Errorf("b.Close() error: %v", err)
117 | 	}
118 | }
119 | 
120 | func TestNewSocket_InvalidFd(t *testing.T) {
121 | 	// Use an invalid fd
122 | 	_, err := NewSocket(-1)
123 | 	if err == nil {
124 | 		t.Error("expected error for invalid fd, got nil")
125 | 	}
126 | }
127 | 
128 | func TestSetPassCred_InvalidSocket(t *testing.T) {
129 | 	a, b, err := NewSocketPair()
130 | 	if err != nil {
131 | 		t.Fatal(err)
132 | 	}
133 | 	defer a.Close()
134 | 	defer b.Close()
135 | 
136 | 	// Close the socket to make it invalid
137 | 	a.Close()
138 | 	err = a.SetPassCred(1)
139 | 	if err == nil {
140 | 		t.Error("expected error on SetPassCred for closed socket, got nil")
141 | 	}
142 | }
143 | 


--------------------------------------------------------------------------------
/ptracer/context_helper_linux.go:
--------------------------------------------------------------------------------
 1 | package ptracer
 2 | 
 3 | import (
 4 | 	"syscall"
 5 | 	"unsafe"
 6 | 
 7 | 	unix "golang.org/x/sys/unix"
 8 | )
 9 | 
10 | // TODO: make this method not to call ptrace too much
11 | func ptraceReadStr(pid int, addr uintptr, buff []byte) {
12 | 	syscall.PtracePeekData(pid, addr, buff)
13 | }
14 | 
15 | func processVMReadv(pid int, localIov, remoteIov []unix.Iovec,
16 | 	flags uintptr) (r1, r2 uintptr, err syscall.Errno) {
17 | 	return syscall.Syscall6(unix.SYS_PROCESS_VM_READV, uintptr(pid),
18 | 		uintptr(unsafe.Pointer(&localIov[0])), uintptr(len(localIov)),
19 | 		uintptr(unsafe.Pointer(&remoteIov[0])), uintptr(len(remoteIov)),
20 | 		flags)
21 | }
22 | 
23 | func vmRead(pid int, addr uintptr, buff []byte) (int, error) {
24 | 	l := len(buff)
25 | 	localIov := getIovecs(&buff[0], l)
26 | 	remoteIov := getIovecs((*byte)(unsafe.Pointer(addr)), l)
27 | 	n, _, err := processVMReadv(pid, localIov, remoteIov, uintptr(0))
28 | 	if err == 0 {
29 | 		return int(n), nil
30 | 	}
31 | 	return int(n), err
32 | }
33 | 
34 | func getIovecs(base *byte, l int) []unix.Iovec {
35 | 	return []unix.Iovec{getIovec(base, l)}
36 | }
37 | 
38 | func vmReadStr(pid int, addr uintptr, buff []byte) error {
39 | 	// Handle unaligned address: calculate remaining bytes to page boundary
40 | 	totalRead := 0 // Total bytes read so far
41 | 	// Calculate distance to next page boundary, nextRead is the number of bytes to read
42 | 	nextRead := pageSize - int(addr%uintptr(pageSize))
43 | 	if nextRead == 0 {
44 | 		nextRead = pageSize // If exactly at page boundary, use full page size
45 | 	}
46 | 
47 | 	// Read in a loop until buffer is full or termination condition is met
48 | 	for len(buff) > 0 {
49 | 		// If remaining buffer is smaller than planned read size, reduce read size
50 | 		if restToRead := len(buff); restToRead < nextRead {
51 | 			nextRead = restToRead
52 | 		}
53 | 
54 | 		// Read data from current position
55 | 		curRead, err := vmRead(pid, addr+uintptr(totalRead), buff[:nextRead])
56 | 		if err != nil {
57 | 			return err // Read error
58 | 		}
59 | 		if curRead == 0 {
60 | 			break // No more data to read
61 | 		}
62 | 		if hasNull(buff[:curRead]) {
63 | 			break // Found string terminator
64 | 		}
65 | 
66 | 		// Update counters and buffer
67 | 		totalRead += curRead  // Update total bytes read
68 | 		buff = buff[curRead:] // Move buffer pointer
69 | 		nextRead = pageSize   // Reset to full page size
70 | 	}
71 | 	return nil
72 | }
73 | 
74 | func hasNull(buff []byte) bool {
75 | 	for _, b := range buff {
76 | 		if b == 0 {
77 | 			return true
78 | 		}
79 | 	}
80 | 	return false
81 | }
82 | 
83 | func clen(b []byte) int {
84 | 	for i := 0; i < len(b); i++ {
85 | 		if b[i] == 0 {
86 | 			return i
87 | 		}
88 | 	}
89 | 	return len(b) + 1
90 | }
91 | 


--------------------------------------------------------------------------------
/ptracer/context_linux.go:
--------------------------------------------------------------------------------
 1 | package ptracer
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"syscall"
 6 | )
 7 | 
 8 | // Context is the context for current syscall trap
 9 | // used to retrive syscall number and arguments
10 | type Context struct {
11 | 	// Pid is current context process pid
12 | 	Pid int
13 | 	// current reg context (platform dependent)
14 | 	regs syscall.PtraceRegs
15 | }
16 | 
17 | var (
18 | 	// UseVMReadv determine whether use ProcessVMReadv syscall to read str
19 | 	// initial true and becomes false if tried and failed with ENOSYS
20 | 	UseVMReadv = true
21 | 	pageSize   = 4 << 10
22 | )
23 | 
24 | func init() {
25 | 	pageSize = os.Getpagesize()
26 | }
27 | 
28 | func getTrapContext(pid int) (*Context, error) {
29 | 	var regs syscall.PtraceRegs
30 | 	//err := syscall.PtraceGetRegs(pid, &regs)
31 | 	err := ptraceGetRegSet(pid, &regs)
32 | 	if err != nil {
33 | 		return nil, err
34 | 	}
35 | 	return &Context{
36 | 		Pid:  pid,
37 | 		regs: regs,
38 | 	}, nil
39 | }
40 | 
41 | // GetString get the string from process data segment
42 | func (c *Context) GetString(addr uintptr) string {
43 | 	buff := make([]byte, syscall.PathMax)
44 | 	if UseVMReadv {
45 | 		if err := vmReadStr(c.Pid, addr, buff); err != nil {
46 | 			// if ENOSYS, then disable this function
47 | 			if no, ok := err.(syscall.Errno); ok {
48 | 				if no == syscall.ENOSYS {
49 | 					UseVMReadv = false
50 | 				}
51 | 			}
52 | 		} else {
53 | 			return string(buff[:clen(buff)])
54 | 		}
55 | 	}
56 | 	syscall.PtracePeekData(c.Pid, addr, buff)
57 | 	return string(buff[:clen(buff)])
58 | }
59 | 


--------------------------------------------------------------------------------
/ptracer/context_linux_amd64.go:
--------------------------------------------------------------------------------
 1 | package ptracer
 2 | 
 3 | import (
 4 | 	"syscall"
 5 | 
 6 | 	unix "golang.org/x/sys/unix"
 7 | )
 8 | 
 9 | // SyscallNo get current syscall no
10 | func (c *Context) SyscallNo() uint {
11 | 	return uint(c.regs.Orig_rax)
12 | }
13 | 
14 | // Arg0 gets the arg0 for the current syscall
15 | func (c *Context) Arg0() uint {
16 | 	return uint(c.regs.Rdi)
17 | }
18 | 
19 | // Arg1 gets the arg1 for the current syscall
20 | func (c *Context) Arg1() uint {
21 | 	return uint(c.regs.Rsi)
22 | }
23 | 
24 | // Arg2 gets the arg2 for the current syscall
25 | func (c *Context) Arg2() uint {
26 | 	return uint(c.regs.Rdx)
27 | }
28 | 
29 | // Arg3 gets the arg3 for the current syscall
30 | func (c *Context) Arg3() uint {
31 | 	return uint(c.regs.R10)
32 | }
33 | 
34 | // Arg4 gets the arg4 for the current syscall
35 | func (c *Context) Arg4() uint {
36 | 	return uint(c.regs.R8)
37 | }
38 | 
39 | // Arg5 gets the arg5 for the current syscall
40 | func (c *Context) Arg5() uint {
41 | 	return uint(c.regs.R9)
42 | }
43 | 
44 | // SetReturnValue set the return value if skip the syscall
45 | func (c *Context) SetReturnValue(retval int) {
46 | 	c.regs.Rax = uint64(retval)
47 | }
48 | 
49 | func (c *Context) skipSyscall() error {
50 | 	c.regs.Orig_rax = ^uint64(0) //-1
51 | 	return syscall.PtraceSetRegs(c.Pid, &c.regs)
52 | }
53 | 
54 | func getIovec(base *byte, l int) unix.Iovec {
55 | 	return unix.Iovec{
56 | 		Base: base,
57 | 		Len:  uint64(l),
58 | 	}
59 | }
60 | 


--------------------------------------------------------------------------------
/ptracer/context_linux_arm.go:
--------------------------------------------------------------------------------
 1 | package ptracer
 2 | 
 3 | import (
 4 | 	"syscall"
 5 | 
 6 | 	unix "golang.org/x/sys/unix"
 7 | )
 8 | 
 9 | // SyscallNo get current syscall no
10 | func (c *Context) SyscallNo() uint {
11 | 	return uint(c.regs.Uregs[7]) // R7
12 | }
13 | 
14 | // Arg0 gets the arg0 for the current syscall
15 | func (c *Context) Arg0() uint {
16 | 	return uint(c.regs.Uregs[17]) //Orig_R0
17 | }
18 | 
19 | // Arg1 gets the arg1 for the current syscall
20 | func (c *Context) Arg1() uint {
21 | 	return uint(c.regs.Uregs[1]) // R1
22 | }
23 | 
24 | // Arg2 gets the arg2 for the current syscall
25 | func (c *Context) Arg2() uint {
26 | 	return uint(c.regs.Uregs[2]) // R2
27 | }
28 | 
29 | // Arg3 gets the arg3 for the current syscall
30 | func (c *Context) Arg3() uint {
31 | 	return uint(c.regs.Uregs[3]) // R3
32 | }
33 | 
34 | // Arg4 gets the arg4 for the current syscall
35 | func (c *Context) Arg4() uint {
36 | 	return uint(c.regs.Uregs[4]) // R4
37 | }
38 | 
39 | // Arg5 gets the arg5 for the current syscall
40 | func (c *Context) Arg5() uint {
41 | 	return uint(c.regs.Uregs[5]) //R5
42 | }
43 | 
44 | // SetReturnValue set the return value if skip the syscall
45 | func (c *Context) SetReturnValue(retval int) {
46 | 	c.regs.Uregs[0] = uint32(retval) // R0
47 | }
48 | 
49 | func (c *Context) skipSyscall() error {
50 | 	err := syscall.PtraceSetRegs(c.Pid, &c.regs)
51 | 	if err != nil {
52 | 		return err
53 | 	}
54 | 	return ptraceArmSetSyscall(c.Pid, -1)
55 | }
56 | 
57 | func getIovec(base *byte, l int) unix.Iovec {
58 | 	return unix.Iovec{
59 | 		Base: base,
60 | 		Len:  uint32(l),
61 | 	}
62 | }
63 | 


--------------------------------------------------------------------------------
/ptracer/context_linux_arm64.go:
--------------------------------------------------------------------------------
 1 | package ptracer
 2 | 
 3 | import (
 4 | 	unix "golang.org/x/sys/unix"
 5 | )
 6 | 
 7 | // SyscallNo get current syscall no
 8 | func (c *Context) SyscallNo() uint {
 9 | 	return uint(c.regs.Regs[8]) // R8
10 | }
11 | 
12 | // Arg0 gets the arg0 for the current syscall
13 | func (c *Context) Arg0() uint {
14 | 	return uint(c.regs.Regs[0]) //R0
15 | }
16 | 
17 | // Arg1 gets the arg1 for the current syscall
18 | func (c *Context) Arg1() uint {
19 | 	return uint(c.regs.Regs[1]) // R1
20 | }
21 | 
22 | // Arg2 gets the arg2 for the current syscall
23 | func (c *Context) Arg2() uint {
24 | 	return uint(c.regs.Regs[2]) // R2
25 | }
26 | 
27 | // Arg3 gets the arg3 for the current syscall
28 | func (c *Context) Arg3() uint {
29 | 	return uint(c.regs.Regs[3]) // R3
30 | }
31 | 
32 | // Arg4 gets the arg4 for the current syscall
33 | func (c *Context) Arg4() uint {
34 | 	return uint(c.regs.Regs[4]) // R4
35 | }
36 | 
37 | // Arg5 gets the arg5 for the current syscall
38 | func (c *Context) Arg5() uint {
39 | 	return uint(c.regs.Regs[5]) //R5
40 | }
41 | 
42 | // SetReturnValue set the return value if skip the syscall
43 | func (c *Context) SetReturnValue(retval int) {
44 | 	c.regs.Regs[0] = uint64(retval) // R0
45 | }
46 | 
47 | func (c *Context) skipSyscall() error {
48 | 	err := ptraceSetRegSet(c.Pid, &c.regs)
49 | 	if err != nil {
50 | 		return err
51 | 	}
52 | 	return ptraceArm64SetSyscall(c.Pid, -1)
53 | }
54 | 
55 | func getIovec(base *byte, l int) unix.Iovec {
56 | 	return unix.Iovec{
57 | 		Base: base,
58 | 		Len:  uint64(l),
59 | 	}
60 | }
61 | 


--------------------------------------------------------------------------------
/ptracer/context_other.go:
--------------------------------------------------------------------------------
 1 | //go:build !linux
 2 | 
 3 | package ptracer
 4 | 
 5 | // Context empty structure filler for other OS
 6 | type Context struct {
 7 | 	Pid int
 8 | }
 9 | 
10 | func (c *Context) SyscallNo() uint {
11 | 	return 0
12 | }
13 | 
14 | func (c *Context) Arg0() uint {
15 | 	return 0
16 | }
17 | 
18 | func (c *Context) Arg1() uint {
19 | 	return 0
20 | }
21 | 
22 | func (c *Context) Arg2() uint {
23 | 	return 0
24 | }
25 | 
26 | func (c *Context) Arg3() uint {
27 | 	return 0
28 | }
29 | 
30 | func (c *Context) Arg4() uint {
31 | 	return 0
32 | }
33 | 
34 | func (c *Context) Arg5() uint {
35 | 	return 0
36 | }
37 | 
38 | func (c *Context) SetReturnValue(retval int) {
39 | 
40 | }
41 | 
42 | func (c *Context) GetString(addr uintptr) string {
43 | 	return ""
44 | }
45 | 


--------------------------------------------------------------------------------
/ptracer/doc.go:
--------------------------------------------------------------------------------
1 | // Package ptracer provides platform independent ptrace pooling loop
2 | // interface to trace program syscalls on Linux.
3 | package ptracer
4 | 


--------------------------------------------------------------------------------
/ptracer/ptrace_linux.go:
--------------------------------------------------------------------------------
 1 | package ptracer
 2 | 
 3 | import (
 4 | 	"syscall"
 5 | 	"unsafe"
 6 | )
 7 | 
 8 | // ptrace constants
 9 | const (
10 | 	NT_PRSTATUS        = 1
11 | 	NT_ARM_SYSTEM_CALL = 0x404
12 | 
13 | 	PTRACE_SET_SYSCALL = 23
14 | )
15 | 
16 | func ptrace(request int, pid int, addr uintptr, data uintptr) (err error) {
17 | 	_, _, e1 := syscall.Syscall6(syscall.SYS_PTRACE, uintptr(request), uintptr(pid), uintptr(addr), uintptr(data), 0, 0)
18 | 	if e1 != 0 {
19 | 		err = e1
20 | 	}
21 | 	return
22 | }
23 | 
24 | func ptraceGetRegSet(pid int, regs *syscall.PtraceRegs) error {
25 | 	iov := getIovec((*byte)(unsafe.Pointer(regs)), int(unsafe.Sizeof(*regs)))
26 | 	return ptrace(syscall.PTRACE_GETREGSET, pid, NT_PRSTATUS, uintptr(unsafe.Pointer(&iov)))
27 | }
28 | 
29 | func ptraceSetRegSet(pid int, regs *syscall.PtraceRegs) error {
30 | 	iov := getIovec((*byte)(unsafe.Pointer(regs)), int(unsafe.Sizeof(*regs)))
31 | 	return ptrace(syscall.PTRACE_SETREGSET, pid, NT_PRSTATUS, uintptr(unsafe.Pointer(&iov)))
32 | }
33 | 
34 | func ptraceArm64SetSyscall(pid int, syscallNo int) error {
35 | 	iov := getIovec((*byte)(unsafe.Pointer(&syscallNo)), int(unsafe.Sizeof(syscallNo)))
36 | 	return ptrace(syscall.PTRACE_SETREGSET, pid, NT_ARM_SYSTEM_CALL, uintptr(unsafe.Pointer(&iov)))
37 | }
38 | 
39 | func ptraceArmSetSyscall(pid int, syscallNo int) error {
40 | 	return ptrace(PTRACE_SET_SYSCALL, pid, 0, uintptr(syscallNo))
41 | }
42 | 


--------------------------------------------------------------------------------
/ptracer/tracer.go:
--------------------------------------------------------------------------------
 1 | package ptracer
 2 | 
 3 | import "github.com/criyle/go-sandbox/runner"
 4 | 
 5 | // TraceAction defines the action returned by TraceHandle
 6 | type TraceAction int
 7 | 
 8 | const (
 9 | 	// TraceAllow does not do anything
10 | 	TraceAllow TraceAction = iota
11 | 	// TraceBan skips the syscall and set the return code specified by SetReturnCode
12 | 	TraceBan
13 | 	// TraceKill referred as dangerous action have been detected
14 | 	TraceKill
15 | )
16 | 
17 | // Tracer defines a ptracer instance
18 | type Tracer struct {
19 | 	Handler
20 | 	Runner
21 | 	runner.Limit
22 | }
23 | 
24 | // Runner represents the process runner
25 | type Runner interface {
26 | 	// Starts starts the child process and return pid and error if failed
27 | 	// the child process should enable ptrace and should stop before ptrace
28 | 	Start() (int, error)
29 | }
30 | 
31 | // Handler defines customized handler for traced syscall
32 | type Handler interface {
33 | 	// Handle returns action take to the traced program
34 | 	Handle(*Context) TraceAction
35 | 
36 | 	// Debug prints debug information when in debug mode
37 | 	Debug(v ...interface{})
38 | }
39 | 


--------------------------------------------------------------------------------
/runner/doc.go:
--------------------------------------------------------------------------------
 1 | // Package runner provides common interface for program runner together with
 2 | // common types including Result, Limit, Size and Status.
 3 | //
 4 | // # Status
 5 | //
 6 | // Status defines the program running result status including
 7 | //
 8 | //	Normal
 9 | //	Program Error
10 | //	    Resource Limit Exceeded (Time / Memory / Output)
11 | //	    Unauthorized Access (Disallowed Syscall)
12 | //	    Runtime Error (Signaled / Nonzero Exit Status)
13 | //	Program Runner Error
14 | //
15 | // # Size
16 | //
17 | // Size defines size in bytes, underlying type is uint64 so it
18 | // is effective to store up to EiB of size
19 | //
20 | // # Limit
21 | //
22 | // Limit defines Time & Memory restriction on program runner
23 | //
24 | // # Result
25 | //
26 | // Result defines program running result including
27 | // Status, ExitStatus, Detailed Error, Time, Memory,
28 | // SetupTime and RunningTime (in real clock)
29 | //
30 | // # Runner
31 | //
32 | // General interface to run a program, including a context
33 | // for cancellation
34 | package runner
35 | 


--------------------------------------------------------------------------------
/runner/limit.go:
--------------------------------------------------------------------------------
 1 | package runner
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"time"
 6 | )
 7 | 
 8 | // Limit represents the resource limit for traced process
 9 | type Limit struct {
10 | 	TimeLimit   time.Duration // user CPU time limit (in ns)
11 | 	MemoryLimit Size          // user memory limit (in bytes)
12 | }
13 | 
14 | func (l Limit) String() string {
15 | 	return fmt.Sprintf("Limit[Time=%v, Memory=%v]", l.TimeLimit, l.MemoryLimit)
16 | }
17 | 


--------------------------------------------------------------------------------
/runner/ptrace/filehandler/fileset.go:
--------------------------------------------------------------------------------
  1 | package filehandler
  2 | 
  3 | import (
  4 | 	"path/filepath"
  5 | 	"strings"
  6 | )
  7 | 
  8 | // FileSet stores the file permissions in the hierarchical set
  9 | type FileSet struct {
 10 | 	Set        map[string]bool
 11 | 	SystemRoot bool
 12 | }
 13 | 
 14 | // FilePerm stores the permission apply to the file
 15 | type FilePerm int
 16 | 
 17 | // FilePermWrite / Read / Stat are permissions
 18 | const (
 19 | 	FilePermWrite = iota + 1
 20 | 	FilePermRead
 21 | 	FilePermStat
 22 | )
 23 | 
 24 | // NewFileSet creates the new file set
 25 | func NewFileSet() FileSet {
 26 | 	return FileSet{make(map[string]bool), false}
 27 | }
 28 | 
 29 | // IsInSetSmart same from uoj-judger
 30 | func (s *FileSet) IsInSetSmart(name string) bool {
 31 | 	if s.Set[name] {
 32 | 		return true
 33 | 	}
 34 | 	if name == "/" && s.SystemRoot {
 35 | 		return true
 36 | 	}
 37 | 	// check ...
 38 | 	level := 0
 39 | 	for level = 0; name != ""; level++ {
 40 | 		if level == 1 && s.Set[name+"/*"] {
 41 | 			return true
 42 | 		}
 43 | 		if s.Set[name+"/"] {
 44 | 			return true
 45 | 		}
 46 | 		name = dirname(name)
 47 | 	}
 48 | 	if level == 1 && s.Set["/*"] {
 49 | 		return true
 50 | 	}
 51 | 	if s.Set["/"] {
 52 | 		return true
 53 | 	}
 54 | 	return false
 55 | }
 56 | 
 57 | // Add adds a single file path into the FileSet
 58 | func (s *FileSet) Add(name string) {
 59 | 	if name == "/" {
 60 | 		s.SystemRoot = true
 61 | 	} else {
 62 | 		s.Set[name] = true
 63 | 	}
 64 | }
 65 | 
 66 | // AddRange adds multiple files into the FileSet
 67 | // If path is relative path, add according to the workPath
 68 | func (s *FileSet) AddRange(names []string, workPath string) {
 69 | 	for _, n := range names {
 70 | 		if filepath.IsAbs(n) {
 71 | 			if n == "/" {
 72 | 				s.SystemRoot = true
 73 | 			} else {
 74 | 				s.Set[n] = true
 75 | 			}
 76 | 		} else {
 77 | 			s.Set[filepath.Join(workPath, n)+"/"] = true
 78 | 		}
 79 | 	}
 80 | }
 81 | 
 82 | // FileSets aggregates multiple permissions including write / read / stat / soft ban
 83 | type FileSets struct {
 84 | 	Writable, Readable, Statable, SoftBan FileSet
 85 | }
 86 | 
 87 | // NewFileSets creates new FileSets struct
 88 | func NewFileSets() *FileSets {
 89 | 	return &FileSets{NewFileSet(), NewFileSet(), NewFileSet(), NewFileSet()}
 90 | }
 91 | 
 92 | // IsWritableFile determines whether the file path inside the write set
 93 | func (s *FileSets) IsWritableFile(name string) bool {
 94 | 	return s.Writable.IsInSetSmart(name) || s.Writable.IsInSetSmart(realPath(name))
 95 | }
 96 | 
 97 | // IsReadableFile determines whether the file path inside the read / write set
 98 | func (s *FileSets) IsReadableFile(name string) bool {
 99 | 	return s.IsWritableFile(name) || s.Readable.IsInSetSmart(name) || s.Readable.IsInSetSmart(realPath(name))
100 | }
101 | 
102 | // IsStatableFile determines whether the file path inside the stat / read / write set
103 | func (s *FileSets) IsStatableFile(name string) bool {
104 | 	return s.IsReadableFile(name) || s.Statable.IsInSetSmart(name) || s.Statable.IsInSetSmart(realPath(name))
105 | }
106 | 
107 | // IsSoftBanFile determines whether the file path inside the softban set
108 | func (s *FileSets) IsSoftBanFile(name string) bool {
109 | 	return s.SoftBan.IsInSetSmart(name) || s.SoftBan.IsInSetSmart(realPath(name))
110 | }
111 | 
112 | // AddFilePermission adds the file into fileSets according to the given permission
113 | func (s *FileSets) AddFilePermission(name string, mode FilePerm) {
114 | 	if mode == FilePermWrite {
115 | 		s.Writable.Add(name)
116 | 	} else if mode == FilePermRead {
117 | 		s.Readable.Add(name)
118 | 	} else if mode == FilePermStat {
119 | 		s.Statable.Add(name)
120 | 	}
121 | 	for name = dirname(name); name != ""; name = dirname(name) {
122 | 		s.Statable.Add(name)
123 | 	}
124 | }
125 | 
126 | // GetExtraSet evaluates the concatenated file set according to real path or raw path
127 | func GetExtraSet(extra, raw []string) []string {
128 | 	rt := make([]string, 0, len(extra)+len(raw))
129 | 	rt = append(rt, raw...)
130 | 	for _, v := range extra {
131 | 		rt = append(rt, realPath(v))
132 | 	}
133 | 	return rt
134 | }
135 | 
136 | // dirname return path without last "/"
137 | func dirname(path string) string {
138 | 	if p := strings.LastIndex(path, "/"); p >= 0 {
139 | 		return path[:p]
140 | 	}
141 | 	return ""
142 | }
143 | 
144 | func realPath(p string) string {
145 | 	f, err := filepath.EvalSymlinks(p)
146 | 	if err != nil {
147 | 		return ""
148 | 	}
149 | 	return f
150 | }
151 | 


--------------------------------------------------------------------------------
/runner/ptrace/filehandler/fileset_test.go:
--------------------------------------------------------------------------------
  1 | package filehandler
  2 | 
  3 | import (
  4 | 	"maps"
  5 | 	"testing"
  6 | )
  7 | 
  8 | // Unit test for IsInSetSmart
  9 | func TestFileSet_IsInSetSmart(t *testing.T) {
 10 | 	// Create a new FileSet
 11 | 	fs := NewFileSet()
 12 | 
 13 | 	// Add paths to the FileSet
 14 | 	fs.Add("/path/to/file")
 15 | 	fs.Add("/path/to/dir/")
 16 | 	fs.Add("/path/to/dir/*")
 17 | 	fs.Add("/")
 18 | 
 19 | 	// Test cases
 20 | 	tests := []struct {
 21 | 		name     string
 22 | 		input    string
 23 | 		expected bool
 24 | 	}{
 25 | 		{"Exact match", "/path/to/file", true},
 26 | 		{"Directory match", "/path/to/dir", true},
 27 | 		{"Wildcard match", "/path/to/dir/subfile", true},
 28 | 		{"Root match", "/", true},
 29 | 		{"Non-existent path", "/non/existent/path", false},
 30 | 	}
 31 | 
 32 | 	for _, test := range tests {
 33 | 		t.Run(test.name, func(t *testing.T) {
 34 | 			result := fs.IsInSetSmart(test.input)
 35 | 			if result != test.expected {
 36 | 				t.Errorf("IsInSetSmart(%q) = %v; expected %v", test.input, result, test.expected)
 37 | 			}
 38 | 		})
 39 | 	}
 40 | }
 41 | 
 42 | // Unit test for Add method
 43 | func TestFileSet_Add(t *testing.T) {
 44 | 	// Create a new FileSet
 45 | 	fs := NewFileSet()
 46 | 
 47 | 	if fs.SystemRoot {
 48 | 		t.Errorf("NewFileSet() failed; expected SystemRoot to be false")
 49 | 	}
 50 | 
 51 | 	// Test adding a path that is not the root directory
 52 | 	fs.Add("/path/to/file")
 53 | 	if fs.SystemRoot {
 54 | 		t.Errorf("Add(\"/path/to/file\") failed; expected SystemRoot to be false")
 55 | 	}
 56 | 
 57 | 	// Test adding the root directory
 58 | 	fs.Add("/")
 59 | 	if !fs.SystemRoot {
 60 | 		t.Errorf("Add(\"/\") failed; expected SystemRoot to be true")
 61 | 	}
 62 | 
 63 | 	// Test adding a regular path
 64 | 	fs.Add("/path/to/file")
 65 | 	if !fs.Set["/path/to/file"] {
 66 | 		t.Errorf("Add(\"/path/to/file\") failed; expected path to be in the set")
 67 | 	}
 68 | 
 69 | 	// Test adding another path
 70 | 	fs.Add("/another/path")
 71 | 	if !fs.Set["/another/path"] {
 72 | 		t.Errorf("Add(\"/another/path\") failed; expected path to be in the set")
 73 | 	}
 74 | 
 75 | 	// Test adding a path with a trailing slash
 76 | 	fs.Add("/path/to/dir/")
 77 | 	if !fs.Set["/path/to/dir/"] {
 78 | 		t.Errorf("Add(\"/path/to/dir/\") failed; expected path to be in the set")
 79 | 	}
 80 | 
 81 | 	// Test adding a path with a wildcard
 82 | 	fs.Add("/path/to/dir/*")
 83 | 	if !fs.Set["/path/to/dir/*"] {
 84 | 		t.Errorf("Add(\"/path/to/dir/*\") failed; expected path to be in the set")
 85 | 	}
 86 | 
 87 | 	// Test adding a relative path
 88 | 	fs.Add("relative/path")
 89 | 	if !fs.Set["relative/path"] {
 90 | 		t.Errorf("Add(\"relative/path\") failed; expected path to be in the set")
 91 | 	}
 92 | }
 93 | 
 94 | // Unit test for AddRange method
 95 | func TestFileSet_AddRange(t *testing.T) {
 96 | 	// Create a new FileSet
 97 | 	fs := NewFileSet()
 98 | 
 99 | 	// Test cases
100 | 	tests := []struct {
101 | 		name       string
102 | 		paths      []string
103 | 		workPath   string
104 | 		expected   map[string]bool
105 | 		systemRoot bool
106 | 	}{
107 | 		{
108 | 			name:     "Add absolute paths",
109 | 			paths:    []string{"/path/to/file", "/another/path"},
110 | 			workPath: "/work/dir",
111 | 			expected: map[string]bool{
112 | 				"/path/to/file": true,
113 | 				"/another/path": true,
114 | 			},
115 | 			systemRoot: false,
116 | 		},
117 | 		{
118 | 			name:       "Add root directory",
119 | 			paths:      []string{"/"},
120 | 			workPath:   "/work/dir",
121 | 			expected:   map[string]bool{},
122 | 			systemRoot: true,
123 | 		},
124 | 		{
125 | 			name:     "Add relative paths",
126 | 			paths:    []string{"relative/path", "another/relative/path"},
127 | 			workPath: "/work/dir",
128 | 			expected: map[string]bool{
129 | 				"/work/dir/relative/path/":         true,
130 | 				"/work/dir/another/relative/path/": true,
131 | 			},
132 | 			systemRoot: false,
133 | 		},
134 | 	}
135 | 
136 | 	for _, test := range tests {
137 | 		t.Run(test.name, func(t *testing.T) {
138 | 			// Reset the FileSet
139 | 			fs = NewFileSet()
140 | 
141 | 			// Call AddRange
142 | 			fs.AddRange(test.paths, test.workPath)
143 | 
144 | 			// Check SystemRoot
145 | 			if fs.SystemRoot != test.systemRoot {
146 | 				t.Errorf("SystemRoot = %v; expected %v", fs.SystemRoot, test.systemRoot)
147 | 			}
148 | 
149 | 			// Check the Set
150 | 			if !maps.Equal(fs.Set, test.expected) {
151 | 				t.Errorf("Set = %v; expected %v", fs.Set, test.expected)
152 | 			}
153 | 		})
154 | 	}
155 | }
156 | 


--------------------------------------------------------------------------------
/runner/ptrace/filehandler/handle.go:
--------------------------------------------------------------------------------
 1 | package filehandler
 2 | 
 3 | import (
 4 | 	"github.com/criyle/go-sandbox/ptracer"
 5 | )
 6 | 
 7 | // Handler defines file access restricted handler to call the ptrace
 8 | // safe runner
 9 | type Handler struct {
10 | 	FileSet        *FileSets
11 | 	SyscallCounter SyscallCounter
12 | }
13 | 
14 | // CheckRead checks whether the file have read permission
15 | func (h *Handler) CheckRead(fn string) ptracer.TraceAction {
16 | 	if !h.FileSet.IsReadableFile(fn) {
17 | 		return h.onDgsFileDetect(fn)
18 | 	}
19 | 	return ptracer.TraceAllow
20 | }
21 | 
22 | // CheckWrite checks whether the file have write permission
23 | func (h *Handler) CheckWrite(fn string) ptracer.TraceAction {
24 | 	if !h.FileSet.IsWritableFile(fn) {
25 | 		return h.onDgsFileDetect(fn)
26 | 	}
27 | 	return ptracer.TraceAllow
28 | }
29 | 
30 | // CheckStat checks whether the file have stat permission
31 | func (h *Handler) CheckStat(fn string) ptracer.TraceAction {
32 | 	if !h.FileSet.IsStatableFile(fn) {
33 | 		return h.onDgsFileDetect(fn)
34 | 	}
35 | 	return ptracer.TraceAllow
36 | }
37 | 
38 | // CheckSyscall checks syscalls other than allowed and traced against the
39 | // SyscallCounter
40 | func (h *Handler) CheckSyscall(syscallName string) ptracer.TraceAction {
41 | 	// if it is traced, then try to count syscall
42 | 	if inside, allow := h.SyscallCounter.Check(syscallName); inside {
43 | 		if allow {
44 | 			return ptracer.TraceAllow
45 | 		}
46 | 		return ptracer.TraceKill
47 | 	}
48 | 	// if it is traced but not counted, it should be soft banned
49 | 	return ptracer.TraceBan
50 | }
51 | 
52 | // onDgsFileDetect soft ban file if in soft ban set
53 | // otherwise stops the trace process
54 | func (h *Handler) onDgsFileDetect(name string) ptracer.TraceAction {
55 | 	if h.FileSet.IsSoftBanFile(name) {
56 | 		return ptracer.TraceBan
57 | 	}
58 | 	return ptracer.TraceKill
59 | }
60 | 


--------------------------------------------------------------------------------
/runner/ptrace/filehandler/syscallcounter.go:
--------------------------------------------------------------------------------
 1 | package filehandler
 2 | 
 3 | // SyscallCounter defines a count-down for each each syscall occurs
 4 | type SyscallCounter map[string]int
 5 | 
 6 | // NewSyscallCounter creates a new SyscallCounter
 7 | func NewSyscallCounter() SyscallCounter {
 8 | 	return SyscallCounter(make(map[string]int))
 9 | }
10 | 
11 | // Add adds single counter to SyscallCounter
12 | func (s SyscallCounter) Add(name string, count int) {
13 | 	s[name] = count
14 | }
15 | 
16 | // AddRange add multiple counter to SyscallCounter
17 | func (s SyscallCounter) AddRange(m map[string]int) {
18 | 	for k, v := range m {
19 | 		s[k] = v
20 | 	}
21 | }
22 | 
23 | // Check return inside, allow
24 | func (s SyscallCounter) Check(name string) (bool, bool) {
25 | 	n, o := s[name]
26 | 	if o {
27 | 		s[name] = n - 1
28 | 		if n <= 1 {
29 | 			return true, false
30 | 		}
31 | 		return true, true
32 | 	}
33 | 	return false, true
34 | }
35 | 


--------------------------------------------------------------------------------
/runner/ptrace/handle_linux.go:
--------------------------------------------------------------------------------
  1 | package ptrace
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"os"
  6 | 	"path/filepath"
  7 | 	"syscall"
  8 | 
  9 | 	"github.com/criyle/go-sandbox/pkg/seccomp/libseccomp"
 10 | 	"github.com/criyle/go-sandbox/ptracer"
 11 | )
 12 | 
 13 | type tracerHandler struct {
 14 | 	ShowDetails, Unsafe bool
 15 | 	Handler             Handler
 16 | }
 17 | 
 18 | func (h *tracerHandler) Debug(v ...interface{}) {
 19 | 	if h.ShowDetails {
 20 | 		fmt.Fprintln(os.Stderr, v...)
 21 | 	}
 22 | }
 23 | 
 24 | func (h *tracerHandler) getString(ctx *ptracer.Context, addr uint) string {
 25 | 	return absPath(ctx.Pid, ctx.GetString(uintptr(addr)))
 26 | }
 27 | 
 28 | func (h *tracerHandler) checkOpen(ctx *ptracer.Context, addr uint, flags uint) ptracer.TraceAction {
 29 | 	fn := h.getString(ctx, addr)
 30 | 	isReadOnly := (flags&syscall.O_ACCMODE == syscall.O_RDONLY) &&
 31 | 		(flags&syscall.O_CREAT == 0) &&
 32 | 		(flags&syscall.O_EXCL == 0) &&
 33 | 		(flags&syscall.O_TRUNC == 0)
 34 | 
 35 | 	h.Debug("open: ", fn, getFileMode(flags))
 36 | 	if isReadOnly {
 37 | 		return h.Handler.CheckRead(fn)
 38 | 	}
 39 | 	return h.Handler.CheckWrite(fn)
 40 | }
 41 | 
 42 | func (h *tracerHandler) checkRead(ctx *ptracer.Context, addr uint) ptracer.TraceAction {
 43 | 	fn := h.getString(ctx, addr)
 44 | 	h.Debug("check read: ", fn)
 45 | 	return h.Handler.CheckRead(fn)
 46 | }
 47 | 
 48 | func (h *tracerHandler) checkWrite(ctx *ptracer.Context, addr uint) ptracer.TraceAction {
 49 | 	fn := h.getString(ctx, addr)
 50 | 	h.Debug("check write: ", fn)
 51 | 	return h.Handler.CheckWrite(fn)
 52 | }
 53 | 
 54 | func (h *tracerHandler) checkStat(ctx *ptracer.Context, addr uint) ptracer.TraceAction {
 55 | 	fn := h.getString(ctx, addr)
 56 | 	h.Debug("check stat: ", fn)
 57 | 	return h.Handler.CheckStat(fn)
 58 | }
 59 | 
 60 | func (h *tracerHandler) Handle(ctx *ptracer.Context) ptracer.TraceAction {
 61 | 	syscallNo := ctx.SyscallNo()
 62 | 	syscallName, err := libseccomp.ToSyscallName(syscallNo)
 63 | 	h.Debug("syscall:", syscallNo, syscallName, err)
 64 | 	if err != nil {
 65 | 		h.Debug("invalid syscall no")
 66 | 		return ptracer.TraceKill
 67 | 	}
 68 | 
 69 | 	action := ptracer.TraceKill
 70 | 	switch syscallName {
 71 | 	case "open":
 72 | 		action = h.checkOpen(ctx, ctx.Arg0(), ctx.Arg1())
 73 | 	case "openat":
 74 | 		action = h.checkOpen(ctx, ctx.Arg1(), ctx.Arg2())
 75 | 
 76 | 	case "readlink":
 77 | 		action = h.checkRead(ctx, ctx.Arg0())
 78 | 	case "readlinkat":
 79 | 		action = h.checkRead(ctx, ctx.Arg1())
 80 | 
 81 | 	case "unlink":
 82 | 		action = h.checkWrite(ctx, ctx.Arg0())
 83 | 	case "unlinkat":
 84 | 		action = h.checkWrite(ctx, ctx.Arg1())
 85 | 
 86 | 	case "access":
 87 | 		action = h.checkStat(ctx, ctx.Arg0())
 88 | 	case "faccessat", "newfstatat":
 89 | 		action = h.checkStat(ctx, ctx.Arg1())
 90 | 
 91 | 	case "stat", "stat64":
 92 | 		action = h.checkStat(ctx, ctx.Arg0())
 93 | 	case "lstat", "lstat64":
 94 | 		action = h.checkStat(ctx, ctx.Arg0())
 95 | 
 96 | 	case "execve":
 97 | 		action = h.checkRead(ctx, ctx.Arg0())
 98 | 	case "execveat":
 99 | 		action = h.checkRead(ctx, ctx.Arg1())
100 | 
101 | 	case "chmod":
102 | 		action = h.checkWrite(ctx, ctx.Arg0())
103 | 	case "rename":
104 | 		action = h.checkWrite(ctx, ctx.Arg0())
105 | 
106 | 	default:
107 | 		action = h.Handler.CheckSyscall(syscallName)
108 | 		if h.Unsafe && action == ptracer.TraceKill {
109 | 			action = ptracer.TraceBan
110 | 		}
111 | 	}
112 | 
113 | 	switch action {
114 | 	case ptracer.TraceAllow:
115 | 		return ptracer.TraceAllow
116 | 	case ptracer.TraceBan:
117 | 		h.Debug("<soft ban syscall>")
118 | 		return softBanSyscall(ctx)
119 | 	default:
120 | 		return ptracer.TraceKill
121 | 	}
122 | }
123 | 
124 | func softBanSyscall(ctx *ptracer.Context) ptracer.TraceAction {
125 | 	ctx.SetReturnValue(-int(BanRet))
126 | 	return ptracer.TraceBan
127 | }
128 | 
129 | func getFileMode(flags uint) string {
130 | 	switch flags & syscall.O_ACCMODE {
131 | 	case syscall.O_RDONLY:
132 | 		return "r "
133 | 	case syscall.O_WRONLY:
134 | 		return "w "
135 | 	case syscall.O_RDWR:
136 | 		return "wr"
137 | 	default:
138 | 		return "??"
139 | 	}
140 | }
141 | 
142 | // getProcCwd gets the process CWD
143 | func getProcCwd(pid int) string {
144 | 	fileName := "/proc/self/cwd"
145 | 	if pid > 0 {
146 | 		fileName = fmt.Sprintf("/proc/%d/cwd", pid)
147 | 	}
148 | 	s, err := os.Readlink(fileName)
149 | 	if err != nil {
150 | 		return ""
151 | 	}
152 | 	return s
153 | }
154 | 
155 | // absPath calculates the absolute path for a process
156 | // built-in function did the dirty works to resolve relative paths
157 | func absPath(pid int, p string) string {
158 | 	// if relative path
159 | 	if !filepath.IsAbs(p) {
160 | 		return filepath.Join(getProcCwd(pid), p)
161 | 	}
162 | 	return filepath.Clean(p)
163 | }
164 | 


--------------------------------------------------------------------------------
/runner/ptrace/run_linux.go:
--------------------------------------------------------------------------------
 1 | package ptrace
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"os"
 6 | 
 7 | 	"github.com/criyle/go-sandbox/pkg/forkexec"
 8 | 	"github.com/criyle/go-sandbox/ptracer"
 9 | 	"github.com/criyle/go-sandbox/runner"
10 | )
11 | 
12 | // Run starts the tracing process
13 | func (r *Runner) Run(c context.Context) runner.Result {
14 | 	ch := &forkexec.Runner{
15 | 		Args:     r.Args,
16 | 		Env:      r.Env,
17 | 		ExecFile: r.ExecFile,
18 | 		RLimits:  r.RLimits,
19 | 		Files:    r.Files,
20 | 		WorkDir:  r.WorkDir,
21 | 		Seccomp:  r.Seccomp.SockFprog(),
22 | 		Ptrace:   true,
23 | 		SyncFunc: r.SyncFunc,
24 | 
25 | 		UnshareCgroupAfterSync: os.Getuid() == 0,
26 | 	}
27 | 
28 | 	th := &tracerHandler{
29 | 		ShowDetails: r.ShowDetails,
30 | 		Unsafe:      r.Unsafe,
31 | 		Handler:     r.Handler,
32 | 	}
33 | 
34 | 	tracer := ptracer.Tracer{
35 | 		Handler: th,
36 | 		Runner:  ch,
37 | 		Limit:   r.Limit,
38 | 	}
39 | 	return tracer.Trace(c)
40 | }
41 | 


--------------------------------------------------------------------------------
/runner/ptrace/runner_linux.go:
--------------------------------------------------------------------------------
 1 | package ptrace
 2 | 
 3 | import (
 4 | 	"syscall"
 5 | 
 6 | 	"github.com/criyle/go-sandbox/pkg/rlimit"
 7 | 	"github.com/criyle/go-sandbox/pkg/seccomp"
 8 | 	"github.com/criyle/go-sandbox/ptracer"
 9 | 	"github.com/criyle/go-sandbox/runner"
10 | )
11 | 
12 | // Runner defines the spec to run a program safely by ptracer
13 | type Runner struct {
14 | 	// argv and env for the child process
15 | 	// work path set by setcwd (current working directory for child)
16 | 	Args    []string
17 | 	Env     []string
18 | 	WorkDir string
19 | 
20 | 	// fexecve
21 | 	ExecFile uintptr
22 | 
23 | 	// file descriptors for new process, from 0 to len - 1
24 | 	Files []uintptr
25 | 
26 | 	// Resource limit set by set rlimit
27 | 	RLimits []rlimit.RLimit
28 | 
29 | 	// Res limit enforced by tracer
30 | 	Limit runner.Limit
31 | 
32 | 	// Defines seccomp filter for the ptrace runner
33 | 	// file access syscalls need to set as ActionTrace
34 | 	// allowed need to set as ActionAllow
35 | 	// default action should be ActionTrace / ActionKill
36 | 	Seccomp seccomp.Filter
37 | 
38 | 	// Traced syscall handler
39 | 	Handler Handler
40 | 
41 | 	// ShowDetails / Unsafe debug flag
42 | 	ShowDetails, Unsafe bool
43 | 
44 | 	// Use by cgroup to add proc
45 | 	SyncFunc func(pid int) error
46 | }
47 | 
48 | // BanRet defines the return value for a syscall ban action
49 | var BanRet = syscall.EACCES
50 | 
51 | // Handler defines the action when a file access encountered
52 | type Handler interface {
53 | 	CheckRead(string) ptracer.TraceAction
54 | 	CheckWrite(string) ptracer.TraceAction
55 | 	CheckStat(string) ptracer.TraceAction
56 | 	CheckSyscall(string) ptracer.TraceAction
57 | }
58 | 


--------------------------------------------------------------------------------
/runner/result.go:
--------------------------------------------------------------------------------
 1 | package runner
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"time"
 6 | )
 7 | 
 8 | // Result is the program runner result
 9 | type Result struct {
10 | 	Status            // result status
11 | 	ExitStatus int    // exit status (signal number if signalled)
12 | 	Error      string // potential detailed error message (for program runner error)
13 | 
14 | 	Time     time.Duration // used user CPU time  (underlying type int64 in ns)
15 | 	Memory   Size          // used user memory    (underlying type uint64 in bytes)
16 | 	ProcPeak uint64        // maximum processes
17 | 
18 | 	// metrics for the program runner
19 | 	SetUpTime   time.Duration
20 | 	RunningTime time.Duration
21 | }
22 | 
23 | func (r Result) String() string {
24 | 	switch r.Status {
25 | 	case StatusNormal:
26 | 		return fmt.Sprintf("Result[%v %v][%v %v]", r.Time, r.Memory, r.SetUpTime, r.RunningTime)
27 | 
28 | 	case StatusSignalled:
29 | 		return fmt.Sprintf("Result[Signalled(%d)][%v %v][%v %v]", r.ExitStatus, r.Time, r.Memory, r.SetUpTime, r.RunningTime)
30 | 
31 | 	case StatusRunnerError:
32 | 		return fmt.Sprintf("Result[RunnerFailed(%s)][%v %v][%v %v]", r.Error, r.Time, r.Memory, r.SetUpTime, r.RunningTime)
33 | 
34 | 	default:
35 | 		return fmt.Sprintf("Result[%v(%s %d)][%v %v][%v %v]", r.Status, r.Error, r.ExitStatus, r.Time, r.Memory, r.SetUpTime, r.RunningTime)
36 | 	}
37 | }
38 | 


--------------------------------------------------------------------------------
/runner/runner.go:
--------------------------------------------------------------------------------
 1 | package runner
 2 | 
 3 | import (
 4 | 	"context"
 5 | )
 6 | 
 7 | // Runner interface defines method to start running
 8 | type Runner interface {
 9 | 	Run(context.Context) Result
10 | }
11 | 


--------------------------------------------------------------------------------
/runner/size.go:
--------------------------------------------------------------------------------
 1 | package runner
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"strconv"
 6 | )
 7 | 
 8 | // Size stores number of byte for the object. E.g. Memory.
 9 | // Maximum size is bounded by 64-bit limit
10 | type Size uint64
11 | 
12 | // String stringer interface for print
13 | func (s Size) String() string {
14 | 	t := uint64(s)
15 | 	switch {
16 | 	case t < 1<<10:
17 | 		return fmt.Sprintf("%d B", t)
18 | 	case t < 1<<20:
19 | 		return fmt.Sprintf("%.1f KiB", float64(t)/float64(1<<10))
20 | 	case t < 1<<30:
21 | 		return fmt.Sprintf("%.1f MiB", float64(t)/float64(1<<20))
22 | 	default:
23 | 		return fmt.Sprintf("%.1f GiB", float64(t)/float64(1<<30))
24 | 	}
25 | }
26 | 
27 | // Set parse the size value from string
28 | func (s *Size) Set(str string) error {
29 | 	switch str[len(str)-1] {
30 | 	case 'b', 'B':
31 | 		str = str[:len(str)-1]
32 | 	}
33 | 
34 | 	factor := 0
35 | 	switch str[len(str)-1] {
36 | 	case 'k', 'K':
37 | 		factor = 10
38 | 		str = str[:len(str)-1]
39 | 	case 'm', 'M':
40 | 		factor = 20
41 | 		str = str[:len(str)-1]
42 | 	case 'g', 'G':
43 | 		factor = 30
44 | 		str = str[:len(str)-1]
45 | 	}
46 | 
47 | 	t, err := strconv.Atoi(str)
48 | 	if err != nil {
49 | 		return err
50 | 	}
51 | 	*s = Size(t << factor)
52 | 	return nil
53 | }
54 | 
55 | // Byte return size in bytes
56 | func (s Size) Byte() uint64 {
57 | 	return uint64(s)
58 | }
59 | 
60 | // KiB return size in KiB
61 | func (s Size) KiB() uint64 {
62 | 	return uint64(s) >> 10
63 | }
64 | 
65 | // MiB return size in MiB
66 | func (s Size) MiB() uint64 {
67 | 	return uint64(s) >> 20
68 | }
69 | 
70 | // GiB return size in GiB
71 | func (s Size) GiB() uint64 {
72 | 	return uint64(s) >> 30
73 | }
74 | 
75 | // TiB return size in TiB
76 | func (s Size) TiB() uint64 {
77 | 	return uint64(s) >> 40
78 | }
79 | 
80 | // PiB return size in PiB
81 | func (s Size) PiB() uint64 {
82 | 	return uint64(s) >> 50
83 | }
84 | 
85 | // EiB return size in EiB
86 | func (s Size) EiB() uint64 {
87 | 	return uint64(s) >> 60
88 | }
89 | 


--------------------------------------------------------------------------------
/runner/status.go:
--------------------------------------------------------------------------------
 1 | package runner
 2 | 
 3 | // Status is the result Status
 4 | type Status int
 5 | 
 6 | // Result Status for program runner
 7 | const (
 8 | 	StatusInvalid Status = iota // 0 not initialized
 9 | 	// Normal
10 | 	StatusNormal // 1 normal
11 | 
12 | 	// Resource Limit Exceeded
13 | 	StatusTimeLimitExceeded   // 2 tle
14 | 	StatusMemoryLimitExceeded // 3 mle
15 | 	StatusOutputLimitExceeded // 4 ole
16 | 
17 | 	// Unauthorized Access
18 | 	StatusDisallowedSyscall // 5 ban
19 | 
20 | 	// Runtime Error
21 | 	StatusSignalled         // 6 signalled
22 | 	StatusNonzeroExitStatus // 7 nonzero exit status
23 | 
24 | 	// Programmer Runner Error
25 | 	StatusRunnerError // 8 runner error
26 | )
27 | 
28 | var (
29 | 	statusString = []string{
30 | 		"Invalid",
31 | 		"",
32 | 		"Time Limit Exceeded",
33 | 		"Memory Limit Exceeded",
34 | 		"Output Limit Exceeded",
35 | 		"Disallowed Syscall",
36 | 		"Signalled",
37 | 		"Nonzero Exit Status",
38 | 		"Runner Error",
39 | 	}
40 | )
41 | 
42 | func (t Status) String() string {
43 | 	i := int(t)
44 | 	if i >= 0 && i < len(statusString) {
45 | 		return statusString[i]
46 | 	}
47 | 	return statusString[0]
48 | }
49 | 
50 | func (t Status) Error() string {
51 | 	return t.String()
52 | }
53 | 


--------------------------------------------------------------------------------
/runner/unshare/doc.go:
--------------------------------------------------------------------------------
1 | // Package unshare implements runner that uses Linux unshare syscall & mount namespace & rlimit
2 | // to restrict program access
3 | package unshare
4 | 


--------------------------------------------------------------------------------
/runner/unshare/run_linux.go:
--------------------------------------------------------------------------------
  1 | package unshare
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"os"
  7 | 	"time"
  8 | 
  9 | 	"golang.org/x/sys/unix"
 10 | 
 11 | 	"github.com/criyle/go-sandbox/pkg/forkexec"
 12 | 	"github.com/criyle/go-sandbox/runner"
 13 | )
 14 | 
 15 | const (
 16 | 	// UnshareFlags is flags used to create namespaces except NET and IPC
 17 | 	UnshareFlags = unix.CLONE_NEWNS | unix.CLONE_NEWPID | unix.CLONE_NEWUSER | unix.CLONE_NEWUTS | unix.CLONE_NEWCGROUP
 18 | )
 19 | 
 20 | // Run starts the unshared process
 21 | func (r *Runner) Run(c context.Context) (result runner.Result) {
 22 | 	ch := &forkexec.Runner{
 23 | 		Args:       r.Args,
 24 | 		Env:        r.Env,
 25 | 		ExecFile:   r.ExecFile,
 26 | 		RLimits:    r.RLimits,
 27 | 		Files:      r.Files,
 28 | 		WorkDir:    r.WorkDir,
 29 | 		Seccomp:    r.Seccomp.SockFprog(),
 30 | 		NoNewPrivs: true,
 31 | 		CloneFlags: UnshareFlags,
 32 | 		Mounts:     r.Mounts,
 33 | 		HostName:   r.HostName,
 34 | 		DomainName: r.DomainName,
 35 | 		PivotRoot:  r.Root,
 36 | 		DropCaps:   true,
 37 | 		SyncFunc:   r.SyncFunc,
 38 | 
 39 | 		UnshareCgroupAfterSync: true,
 40 | 	}
 41 | 
 42 | 	var (
 43 | 		wstatus unix.WaitStatus // wait4 wait status
 44 | 		rusage  unix.Rusage     // wait4 rusage
 45 | 		status  = runner.StatusNormal
 46 | 		sTime   = time.Now() // start time
 47 | 		fTime   time.Time    // finish time for setup
 48 | 	)
 49 | 
 50 | 	// Start the runner
 51 | 	pgid, err := ch.Start()
 52 | 	r.println("Starts: ", pgid, err)
 53 | 	if err != nil {
 54 | 		result.Status = runner.StatusRunnerError
 55 | 		result.Error = err.Error()
 56 | 		return
 57 | 	}
 58 | 
 59 | 	ctx, cancel := context.WithCancel(c)
 60 | 	defer cancel()
 61 | 
 62 | 	// handle cancel
 63 | 	go func() {
 64 | 		<-ctx.Done()
 65 | 		killAll(pgid)
 66 | 	}()
 67 | 
 68 | 	// kill all tracee upon return
 69 | 	defer func() {
 70 | 		killAll(pgid)
 71 | 		collectZombie(pgid)
 72 | 		result.SetUpTime = fTime.Sub(sTime)
 73 | 		result.RunningTime = time.Since(fTime)
 74 | 	}()
 75 | 
 76 | 	fTime = time.Now()
 77 | 	for {
 78 | 		_, err := unix.Wait4(pgid, &wstatus, 0, &rusage)
 79 | 		if err == unix.EINTR {
 80 | 			continue
 81 | 		}
 82 | 		r.println("wait4: ", wstatus)
 83 | 		if err != nil {
 84 | 			result.Status = runner.StatusRunnerError
 85 | 			result.Error = err.Error()
 86 | 			return
 87 | 		}
 88 | 
 89 | 		// update resource usage and check against limits
 90 | 		userTime := time.Duration(rusage.Utime.Nano()) // ns
 91 | 		userMem := runner.Size(rusage.Maxrss << 10)    // bytes
 92 | 
 93 | 		// check tle / mle
 94 | 		if userTime > r.Limit.TimeLimit {
 95 | 			status = runner.StatusTimeLimitExceeded
 96 | 		}
 97 | 		if userMem > r.Limit.MemoryLimit {
 98 | 			status = runner.StatusMemoryLimitExceeded
 99 | 		}
100 | 		result = runner.Result{
101 | 			Status: status,
102 | 			Time:   userTime,
103 | 			Memory: userMem,
104 | 		}
105 | 		if status != runner.StatusNormal {
106 | 			return
107 | 		}
108 | 
109 | 		switch {
110 | 		case wstatus.Exited():
111 | 			result.Status = runner.StatusNormal
112 | 			result.ExitStatus = wstatus.ExitStatus()
113 | 			if result.ExitStatus != 0 {
114 | 				result.Status = runner.StatusNonzeroExitStatus
115 | 			}
116 | 			return
117 | 
118 | 		case wstatus.Signaled():
119 | 			sig := wstatus.Signal()
120 | 			switch sig {
121 | 			case unix.SIGXCPU, unix.SIGKILL:
122 | 				status = runner.StatusTimeLimitExceeded
123 | 			case unix.SIGXFSZ:
124 | 				status = runner.StatusOutputLimitExceeded
125 | 			case unix.SIGSYS:
126 | 				status = runner.StatusDisallowedSyscall
127 | 			default:
128 | 				status = runner.StatusSignalled
129 | 			}
130 | 			result.Status = status
131 | 			result.ExitStatus = int(sig)
132 | 			return
133 | 		}
134 | 	}
135 | }
136 | 
137 | // kill all tracee according to pids
138 | func killAll(pgid int) {
139 | 	unix.Kill(-pgid, unix.SIGKILL)
140 | }
141 | 
142 | // collect died child processes
143 | func collectZombie(pgid int) {
144 | 	var wstatus unix.WaitStatus
145 | 	for {
146 | 		if _, err := unix.Wait4(-pgid, &wstatus, unix.WALL|unix.WNOHANG, nil); err != unix.EINTR && err != nil {
147 | 			break
148 | 		}
149 | 	}
150 | }
151 | 
152 | func (r *Runner) println(v ...interface{}) {
153 | 	if r.ShowDetails {
154 | 		fmt.Fprintln(os.Stderr, v...)
155 | 	}
156 | }
157 | 


--------------------------------------------------------------------------------
/runner/unshare/runner_linux.go:
--------------------------------------------------------------------------------
 1 | package unshare
 2 | 
 3 | import (
 4 | 	"github.com/criyle/go-sandbox/pkg/mount"
 5 | 	"github.com/criyle/go-sandbox/pkg/rlimit"
 6 | 	"github.com/criyle/go-sandbox/pkg/seccomp"
 7 | 	"github.com/criyle/go-sandbox/runner"
 8 | )
 9 | 
10 | // Runner runs program in unshared namespaces
11 | type Runner struct {
12 | 	// argv and env for the child process
13 | 	Args []string
14 | 	Env  []string
15 | 
16 | 	// fexecve param
17 | 	ExecFile uintptr
18 | 
19 | 	// workdir is the current dir after unshare mount namespaces
20 | 	WorkDir string
21 | 
22 | 	// file descriptors for new process, from 0 to len - 1
23 | 	Files []uintptr
24 | 
25 | 	// Resource limit set by set rlimit
26 | 	RLimits []rlimit.RLimit
27 | 
28 | 	// Resource limit enforced by tracer
29 | 	Limit runner.Limit
30 | 
31 | 	// Seccomp defines the seccomp filter attach to the process (should be whitelist only)
32 | 	Seccomp seccomp.Filter
33 | 
34 | 	// New root
35 | 	Root string
36 | 
37 | 	// Mount syscalls
38 | 	Mounts []mount.SyscallParams
39 | 
40 | 	// hostname & domainname
41 | 	HostName, DomainName string
42 | 
43 | 	// Show Details
44 | 	ShowDetails bool
45 | 
46 | 	// Use by cgroup to add proc
47 | 	SyncFunc func(pid int) error
48 | }
49 | 


--------------------------------------------------------------------------------