├── .clang-format ├── .clangd ├── .github └── workflows │ └── dockerpush.yml ├── .gitignore ├── .gitmodules ├── CONTRIBUTING ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── caps.cc ├── caps.h ├── cgroup.cc ├── cgroup.h ├── cgroup2.cc ├── cgroup2.h ├── cmdline.cc ├── cmdline.h ├── config.cc ├── config.h ├── config.proto ├── configs ├── apache.cfg ├── bash-with-fake-geteuid.cfg ├── bash-with-fake-geteuid.json ├── demo-dont-use-chrome-with-net.cfg ├── firefox-with-cloned-net.cfg ├── firefox-with-net-wayland.cfg ├── firefox-with-net.cfg ├── hexchat-with-net.cfg ├── home-documents-with-xorg-no-net.cfg ├── imagemagick-convert.cfg ├── static-busybox-with-execveat.cfg ├── telegram.cfg ├── tomcat8.cfg ├── weechat-with-net.cfg ├── xchat-with-net.cfg └── znc-with-net.cfg ├── contain.cc ├── contain.h ├── cpu.cc ├── cpu.h ├── logs.cc ├── logs.h ├── macros.h ├── mnt.cc ├── mnt.h ├── net.cc ├── net.h ├── nsjail.1 ├── nsjail.cc ├── nsjail.h ├── pid.cc ├── pid.h ├── sandbox.cc ├── sandbox.h ├── subproc.cc ├── subproc.h ├── user.cc ├── user.h ├── util.cc ├── util.h ├── uts.cc └── uts.h /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: Google 2 | ColumnLimit: 100 3 | IndentWidth: 8 4 | UseTab: Always 5 | --- 6 | Language: Cpp 7 | AlignAfterOpenBracket: DontAlign 8 | AlignEscapedNewlines: Right 9 | AlignOperands: true 10 | AllowShortFunctionsOnASingleLine: false 11 | AlwaysBreakBeforeMultilineStrings: false 12 | IndentCaseLabels: false 13 | --- 14 | Language: Proto 15 | -------------------------------------------------------------------------------- /.clangd: -------------------------------------------------------------------------------- 1 | CompileFlags: 2 | Add: [-std=c++17, -Ikafel/include, -I/usr/include/libnl3, -fno-exceptions, -Wno-unused, -Wno-unused-parameter] 3 | -------------------------------------------------------------------------------- /.github/workflows/dockerpush.yml: -------------------------------------------------------------------------------- 1 | name: Docker 2 | 3 | on: 4 | push: 5 | # Publish `master` as Docker `latest` image. 6 | branches: 7 | - master 8 | 9 | # Publish `v1.2.3` tags as releases. 10 | tags: 11 | - v* 12 | 13 | # Run tests for any PRs. 14 | pull_request: 15 | 16 | env: 17 | IMAGE_NAME: nsjail 18 | 19 | jobs: 20 | # Run tests. 21 | # See also https://docs.docker.com/docker-hub/builds/automated-testing/ 22 | test: 23 | runs-on: ubuntu-latest 24 | 25 | steps: 26 | - uses: actions/checkout@v2 27 | 28 | - name: Run tests 29 | run: docker build . --file Dockerfile 30 | 31 | # Push image to GitHub Package Registry. 32 | # See also https://docs.docker.com/docker-hub/builds/ 33 | push: 34 | # Ensure test job passes before pushing image. 35 | needs: test 36 | 37 | runs-on: ubuntu-latest 38 | if: github.event_name == 'push' 39 | 40 | steps: 41 | - uses: actions/checkout@v2 42 | 43 | - name: Build image 44 | run: docker build . --file Dockerfile --tag image 45 | 46 | - name: Log into registry 47 | run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login docker.pkg.github.com -u ${{ github.actor }} --password-stdin 48 | 49 | - name: Push image 50 | run: | 51 | IMAGE_ID=docker.pkg.github.com/${{ github.repository }}/$IMAGE_NAME 52 | 53 | # Strip git ref prefix from version 54 | VERSION=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,') 55 | 56 | # Strip "v" prefix from tag name 57 | [[ "${{ github.ref }}" == "refs/tags/"* ]] && VERSION=$(echo $VERSION | sed -e 's/^v//') 58 | 59 | # Use Docker `latest` tag convention 60 | [ "$VERSION" == "master" ] && VERSION=latest 61 | 62 | echo IMAGE_ID=$IMAGE_ID 63 | echo VERSION=$VERSION 64 | 65 | docker tag image $IMAGE_ID:$VERSION 66 | docker push $IMAGE_ID:$VERSION 67 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | config.pb.h 3 | config.pb.cc 4 | nsjail 5 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "kafel"] 2 | path = kafel 3 | url = https://github.com/google/kafel.git 4 | -------------------------------------------------------------------------------- /CONTRIBUTING: -------------------------------------------------------------------------------- 1 | Want to contribute? Great! First, read this page (including the small print at the end). 2 | 3 | ### Before you contribute 4 | Before we can use your code, you must sign the 5 | [Google Individual Contributor License Agreement](https://developers.google.com/open-source/cla/individual?csw=1) 6 | (CLA), which you can do online. The CLA is necessary mainly because you own the 7 | copyright to your changes, even after your contribution becomes part of our 8 | codebase, so we need your permission to use and distribute your code. We also 9 | need to be sure of various other things—for instance that you'll tell us if you 10 | know that your code infringes on other people's patents. You don't have to sign 11 | the CLA until after you've submitted your code for review and a member has 12 | approved it, but you must do it before we can put your code into our codebase. 13 | Before you start working on a larger contribution, you should get in touch with 14 | us first through the issue tracker with your idea so that we can help out and 15 | possibly guide you. Coordinating up front makes it much easier to avoid 16 | frustration later on. 17 | 18 | ### Code reviews 19 | All submissions, including submissions by project members, require review. We 20 | use Github pull requests for this purpose. 21 | 22 | ### The small print 23 | Contributions made by corporations are covered by a different agreement than 24 | the one above, the Software Grant and Corporate Contributor License Agreement. 25 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:bookworm-slim AS base 2 | 3 | # Install run-time dependencies in base image 4 | RUN apt-get -y update && apt-get install -y \ 5 | libc6 \ 6 | libstdc++6 \ 7 | libprotobuf32 \ 8 | libnl-route-3-200 9 | 10 | FROM base AS build 11 | 12 | # Install build dependencies only in builder image 13 | RUN apt-get install -y \ 14 | autoconf \ 15 | bison \ 16 | flex \ 17 | gcc \ 18 | g++ \ 19 | git \ 20 | libprotobuf-dev \ 21 | libnl-route-3-dev \ 22 | libtool \ 23 | make \ 24 | pkg-config \ 25 | protobuf-compiler 26 | 27 | COPY . /nsjail 28 | 29 | RUN cd /nsjail && make clean && make 30 | 31 | FROM base AS run 32 | 33 | # Copy over build result and trim image 34 | RUN rm -rf /var/lib/apt/lists/* 35 | COPY --from=build /nsjail/nsjail /bin 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # nsjail - Makefile 3 | # ----------------------------------------- 4 | # 5 | # Copyright 2014 Google Inc. All Rights Reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | PKG_CONFIG=$(shell command -v pkg-config 2> /dev/null) 20 | ifeq ($(PKG_CONFIG),) 21 | $(error "Install pkg-config to make it work") 22 | endif 23 | 24 | CC ?= gcc 25 | CXX ?= g++ 26 | 27 | COMMON_FLAGS += -O2 -c \ 28 | -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 \ 29 | -fPIE \ 30 | -Wformat -Wformat-security -Wno-format-nonliteral \ 31 | -Wall -Wextra -Werror \ 32 | -Ikafel/include 33 | 34 | CXXFLAGS += $(USER_DEFINES) $(COMMON_FLAGS) $(shell pkg-config --cflags protobuf) \ 35 | -std=c++20 -fno-exceptions -Wno-unused -Wno-unused-parameter 36 | LDFLAGS += -pie -Wl,-z,noexecstack -lpthread $(shell pkg-config --libs protobuf) 37 | 38 | BIN = nsjail 39 | LIBS = kafel/libkafel.a 40 | SRCS_CXX = caps.cc cgroup.cc cgroup2.cc cmdline.cc config.cc contain.cc cpu.cc logs.cc mnt.cc net.cc nsjail.cc pid.cc sandbox.cc subproc.cc uts.cc user.cc util.cc 41 | SRCS_H = $(SRCS_CXX:.cc=.h) macros.h 42 | SRCS_PROTO = config.proto 43 | SRCS_PB_CXX = $(SRCS_PROTO:.proto=.pb.cc) 44 | SRCS_PB_H = $(SRCS_PROTO:.proto=.pb.h) 45 | SRCS_PB_O = $(SRCS_PROTO:.proto=.pb.o) 46 | OBJS = $(SRCS_CXX:.cc=.o) $(SRCS_PB_CXX:.cc=.o) 47 | 48 | ifdef DEBUG 49 | CXXFLAGS += -g -ggdb -gdwarf-4 50 | endif 51 | 52 | NL3_EXISTS := $(shell pkg-config --exists libnl-route-3.0 && echo yes) 53 | ifeq ($(NL3_EXISTS), yes) 54 | CXXFLAGS += $(shell pkg-config --cflags libnl-route-3.0) 55 | LDFLAGS += $(shell pkg-config --libs libnl-route-3.0) 56 | endif 57 | 58 | .PHONY: all clean depend indent 59 | 60 | .o: %.cc 61 | $(CXX) $(CXXFLAGS) $< -o $@ 62 | 63 | all: $(BIN) 64 | 65 | $(BIN): $(LIBS) $(OBJS) 66 | ifneq ($(NL3_EXISTS), yes) 67 | $(warning "============================================================") 68 | $(warning "You probably miss libnl3(-dev)/libnl-route-3(-dev) libraries") 69 | $(warning "============================================================") 70 | endif 71 | $(CXX) -o $(BIN) $(OBJS) $(LIBS) $(LDFLAGS) 72 | 73 | .PHONY: kafel_init 74 | kafel_init: 75 | ifeq ("$(wildcard kafel/Makefile)","") 76 | git submodule update --init 77 | endif 78 | 79 | kafel/include/kafel.h: kafel_init 80 | # LDFLAGS is unset as a workaround for Kafel using the parent LDFLAGS 81 | # incorrectly. 82 | kafel/libkafel.a: kafel_init 83 | LDFLAGS="" CFLAGS=-fPIE $(MAKE) -C kafel 84 | 85 | # Sequence of proto deps, which doesn't fit automatic make rules 86 | $(SRCS_PB_O): $(SRCS_PB_H) $(SRCS_PB_CXX) 87 | $(SRCS_PB_CXX) $(SRCS_PB_H): $(SRCS_PROTO) 88 | protoc --cpp_out=. $(SRCS_PROTO) 89 | 90 | .PHONY: clean 91 | clean: 92 | $(RM) core Makefile.bak $(OBJS) $(SRCS_PB_CXX) $(SRCS_PB_H) $(SRCS_PB_O) $(BIN) 93 | ifneq ("$(wildcard kafel/Makefile)","") 94 | $(MAKE) -C kafel clean 95 | endif 96 | 97 | .PHONY: depend 98 | depend: all 99 | makedepend -Y -Ykafel/include -- -- $(SRCS_CXX) $(SRCS_PB_CXX) 100 | 101 | .PHONY: indent 102 | indent: 103 | clang-format -i -sort-includes $(SRCS_H) $(SRCS_CXX) $(SRCS_PROTO) configs/*.json 104 | 105 | # DO NOT DELETE THIS LINE -- make depend depends on it. 106 | 107 | caps.o: caps.h nsjail.h logs.h macros.h util.h 108 | cgroup.o: cgroup.h nsjail.h logs.h util.h 109 | cgroup2.o: cgroup2.h nsjail.h logs.h util.h 110 | cmdline.o: cmdline.h nsjail.h caps.h config.h logs.h macros.h mnt.h user.h 111 | cmdline.o: util.h 112 | config.o: config.h nsjail.h caps.h cmdline.h config.pb.h logs.h macros.h 113 | config.o: mnt.h user.h util.h 114 | contain.o: contain.h nsjail.h caps.h cgroup.h cpu.h logs.h macros.h mnt.h 115 | contain.o: net.h pid.h user.h util.h uts.h 116 | cpu.o: cpu.h nsjail.h logs.h util.h 117 | logs.o: logs.h macros.h util.h nsjail.h 118 | mnt.o: mnt.h nsjail.h logs.h macros.h subproc.h util.h 119 | net.o: net.h nsjail.h logs.h subproc.h 120 | nsjail.o: nsjail.h cgroup2.h cmdline.h logs.h macros.h net.h sandbox.h 121 | nsjail.o: subproc.h util.h 122 | pid.o: pid.h nsjail.h logs.h subproc.h 123 | sandbox.o: sandbox.h nsjail.h kafel/include/kafel.h logs.h util.h 124 | subproc.o: subproc.h nsjail.h cgroup.h cgroup2.h contain.h logs.h macros.h 125 | subproc.o: net.h sandbox.h user.h util.h 126 | uts.o: uts.h nsjail.h logs.h 127 | user.o: user.h nsjail.h logs.h macros.h subproc.h util.h 128 | util.o: util.h nsjail.h logs.h macros.h 129 | config.pb.o: config.pb.h 130 | -------------------------------------------------------------------------------- /caps.cc: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - capability-related operations 4 | ----------------------------------------- 5 | 6 | Copyright 2014 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #include "caps.h" 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | #include 32 | #include 33 | 34 | #include "logs.h" 35 | #include "macros.h" 36 | #include "util.h" 37 | 38 | #if !defined(CAP_AUDIT_READ) 39 | #define CAP_AUDIT_READ 37 40 | #endif /* !defined(CAP_AUDIT_READ) */ 41 | #if !defined(CAP_PERFMON) 42 | #define CAP_PERFMON 38 43 | #endif /* !defined(CAP_PERFMON) */ 44 | #if !defined(CAP_BPF) 45 | #define CAP_BPF 39 46 | #endif /* !defined(CAP_BPF) */ 47 | #if !defined(CAP_CHECKPOINT_RESTORE) 48 | #define CAP_CHECKPOINT_RESTORE 40 49 | #endif /* !defined(CAP_CHECKPOINT_RESTORE) */ 50 | 51 | namespace caps { 52 | 53 | struct { 54 | const int val; 55 | const char* const name; 56 | } static const capNames[] = { 57 | NS_VALSTR_STRUCT(CAP_CHOWN), 58 | NS_VALSTR_STRUCT(CAP_DAC_OVERRIDE), 59 | NS_VALSTR_STRUCT(CAP_DAC_READ_SEARCH), 60 | NS_VALSTR_STRUCT(CAP_FOWNER), 61 | NS_VALSTR_STRUCT(CAP_FSETID), 62 | NS_VALSTR_STRUCT(CAP_KILL), 63 | NS_VALSTR_STRUCT(CAP_SETGID), 64 | NS_VALSTR_STRUCT(CAP_SETUID), 65 | NS_VALSTR_STRUCT(CAP_SETPCAP), 66 | NS_VALSTR_STRUCT(CAP_LINUX_IMMUTABLE), 67 | NS_VALSTR_STRUCT(CAP_NET_BIND_SERVICE), 68 | NS_VALSTR_STRUCT(CAP_NET_BROADCAST), 69 | NS_VALSTR_STRUCT(CAP_NET_ADMIN), 70 | NS_VALSTR_STRUCT(CAP_NET_RAW), 71 | NS_VALSTR_STRUCT(CAP_IPC_LOCK), 72 | NS_VALSTR_STRUCT(CAP_IPC_OWNER), 73 | NS_VALSTR_STRUCT(CAP_SYS_MODULE), 74 | NS_VALSTR_STRUCT(CAP_SYS_RAWIO), 75 | NS_VALSTR_STRUCT(CAP_SYS_CHROOT), 76 | NS_VALSTR_STRUCT(CAP_SYS_PTRACE), 77 | NS_VALSTR_STRUCT(CAP_SYS_PACCT), 78 | NS_VALSTR_STRUCT(CAP_SYS_ADMIN), 79 | NS_VALSTR_STRUCT(CAP_SYS_BOOT), 80 | NS_VALSTR_STRUCT(CAP_SYS_NICE), 81 | NS_VALSTR_STRUCT(CAP_SYS_RESOURCE), 82 | NS_VALSTR_STRUCT(CAP_SYS_TIME), 83 | NS_VALSTR_STRUCT(CAP_SYS_TTY_CONFIG), 84 | NS_VALSTR_STRUCT(CAP_MKNOD), 85 | NS_VALSTR_STRUCT(CAP_LEASE), 86 | NS_VALSTR_STRUCT(CAP_AUDIT_WRITE), 87 | NS_VALSTR_STRUCT(CAP_AUDIT_CONTROL), 88 | NS_VALSTR_STRUCT(CAP_SETFCAP), 89 | NS_VALSTR_STRUCT(CAP_MAC_OVERRIDE), 90 | NS_VALSTR_STRUCT(CAP_MAC_ADMIN), 91 | NS_VALSTR_STRUCT(CAP_SYSLOG), 92 | NS_VALSTR_STRUCT(CAP_WAKE_ALARM), 93 | NS_VALSTR_STRUCT(CAP_BLOCK_SUSPEND), 94 | NS_VALSTR_STRUCT(CAP_AUDIT_READ), 95 | NS_VALSTR_STRUCT(CAP_PERFMON), 96 | NS_VALSTR_STRUCT(CAP_BPF), 97 | NS_VALSTR_STRUCT(CAP_CHECKPOINT_RESTORE), 98 | }; 99 | 100 | int nameToVal(const char* name) { 101 | for (const auto& cap : capNames) { 102 | if (util::StrEq(name, cap.name)) { 103 | return cap.val; 104 | } 105 | } 106 | LOG_W("Unknown capability: '%s'", name); 107 | return -1; 108 | } 109 | 110 | static const std::string capToStr(int val) { 111 | for (const auto& cap : capNames) { 112 | if (val == cap.val) { 113 | return cap.name; 114 | } 115 | } 116 | 117 | std::string res; 118 | res.append("CAP_UNKNOWN(").append(std::to_string(val)).append(")"); 119 | return res; 120 | } 121 | 122 | static cap_user_data_t getCaps() { 123 | static __thread struct __user_cap_data_struct cap_data[_LINUX_CAPABILITY_U32S_3]; 124 | const struct __user_cap_header_struct cap_hdr = { 125 | .version = _LINUX_CAPABILITY_VERSION_3, 126 | .pid = 0, 127 | }; 128 | if (util::syscall(__NR_capget, (uintptr_t)&cap_hdr, (uintptr_t)&cap_data) == -1) { 129 | PLOG_W("capget() failed"); 130 | return nullptr; 131 | } 132 | return cap_data; 133 | } 134 | 135 | static bool setCaps(const cap_user_data_t cap_data) { 136 | const struct __user_cap_header_struct cap_hdr = { 137 | .version = _LINUX_CAPABILITY_VERSION_3, 138 | .pid = 0, 139 | }; 140 | if (util::syscall(__NR_capset, (uintptr_t)&cap_hdr, (uintptr_t)cap_data) == -1) { 141 | PLOG_W("capset() failed"); 142 | return false; 143 | } 144 | return true; 145 | } 146 | 147 | static void clearInheritable(cap_user_data_t cap_data) { 148 | for (size_t i = 0; i < _LINUX_CAPABILITY_U32S_3; i++) { 149 | cap_data[i].inheritable = 0U; 150 | } 151 | } 152 | 153 | static bool getPermitted(cap_user_data_t cap_data, unsigned int cap) { 154 | size_t off_byte = CAP_TO_INDEX(cap); 155 | unsigned mask = CAP_TO_MASK(cap); 156 | return cap_data[off_byte].permitted & mask; 157 | } 158 | 159 | static bool getEffective(cap_user_data_t cap_data, unsigned int cap) { 160 | size_t off_byte = CAP_TO_INDEX(cap); 161 | unsigned mask = CAP_TO_MASK(cap); 162 | return cap_data[off_byte].effective & mask; 163 | } 164 | 165 | static bool getInheritable(cap_user_data_t cap_data, unsigned int cap) { 166 | size_t off_byte = CAP_TO_INDEX(cap); 167 | unsigned mask = CAP_TO_MASK(cap); 168 | return cap_data[off_byte].inheritable & mask; 169 | } 170 | 171 | static void setInheritable(cap_user_data_t cap_data, unsigned int cap) { 172 | size_t off_byte = CAP_TO_INDEX(cap); 173 | unsigned mask = CAP_TO_MASK(cap); 174 | cap_data[off_byte].inheritable |= mask; 175 | } 176 | 177 | #if !defined(PR_CAP_AMBIENT) 178 | #define PR_CAP_AMBIENT 47 179 | #define PR_CAP_AMBIENT_RAISE 2 180 | #define PR_CAP_AMBIENT_CLEAR_ALL 4 181 | #endif /* !defined(PR_CAP_AMBIENT) */ 182 | static bool initNsKeepCaps(cap_user_data_t cap_data) { 183 | /* Copy all permitted caps to the inheritable set */ 184 | std::string dbgmsg1; 185 | for (const auto& i : capNames) { 186 | if (getPermitted(cap_data, i.val)) { 187 | util::StrAppend(&dbgmsg1, " %s", i.name); 188 | setInheritable(cap_data, i.val); 189 | } 190 | } 191 | LOG_D("Adding the following capabilities to the inheritable set:%s", dbgmsg1.c_str()); 192 | 193 | if (!setCaps(cap_data)) { 194 | return false; 195 | } 196 | 197 | /* Make sure the inheritable set is preserved across execve via the ambient set */ 198 | std::string dbgmsg2; 199 | for (const auto& i : capNames) { 200 | if (!getPermitted(cap_data, i.val)) { 201 | continue; 202 | } 203 | if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, (unsigned long)i.val, 0UL, 0UL) == 204 | -1) { 205 | PLOG_W("prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, %s)", i.name); 206 | } else { 207 | util::StrAppend(&dbgmsg2, " %s", i.name); 208 | } 209 | } 210 | LOG_D("Added the following capabilities to the ambient set:%s", dbgmsg2.c_str()); 211 | 212 | return true; 213 | } 214 | 215 | bool initNs(nsjconf_t* nsjconf) { 216 | cap_user_data_t cap_data = getCaps(); 217 | if (cap_data == nullptr) { 218 | return false; 219 | } 220 | 221 | /* Let's start with an empty inheritable set to avoid any mistakes */ 222 | clearInheritable(cap_data); 223 | /* 224 | * Remove all capabilities from the ambient set first. It works with newer kernel versions 225 | * only, so don't panic() if it fails 226 | */ 227 | if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0UL, 0UL, 0UL) == -1) { 228 | PLOG_W("prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL)"); 229 | } 230 | 231 | if (nsjconf->keep_caps) { 232 | return initNsKeepCaps(cap_data); 233 | } 234 | 235 | /* Set all requested caps in the inheritable set if these are present in the permitted set 236 | */ 237 | std::string dbgmsg; 238 | for (const auto& cap : nsjconf->caps) { 239 | if (!getPermitted(cap_data, cap)) { 240 | LOG_W("Capability %s is not permitted in the namespace", 241 | capToStr(cap).c_str()); 242 | return false; 243 | } 244 | dbgmsg.append(" ").append(capToStr(cap)); 245 | setInheritable(cap_data, cap); 246 | } 247 | LOG_D("Adding the following capabilities to the inheritable set:%s", dbgmsg.c_str()); 248 | 249 | if (!setCaps(cap_data)) { 250 | return false; 251 | } 252 | 253 | /* 254 | * Make sure all other caps (those which were not explicitly requested) are removed from the 255 | * bounding set. We need to have CAP_SETPCAP to do that now 256 | */ 257 | dbgmsg.clear(); 258 | if (getEffective(cap_data, CAP_SETPCAP)) { 259 | for (const auto& i : capNames) { 260 | if (getInheritable(cap_data, i.val)) { 261 | continue; 262 | } 263 | if (prctl(PR_CAPBSET_READ, (unsigned long)i.val, 0UL, 0UL, 0UL) == -1 && 264 | errno == EINVAL) { 265 | LOG_D("Skipping unsupported capability: %s", i.name); 266 | continue; 267 | } 268 | dbgmsg.append(" ").append(i.name); 269 | if (prctl(PR_CAPBSET_DROP, (unsigned long)i.val, 0UL, 0UL, 0UL) == -1) { 270 | PLOG_W("prctl(PR_CAPBSET_DROP, %s)", i.name); 271 | return false; 272 | } 273 | } 274 | LOG_D( 275 | "Dropped the following capabilities from the bounding set:%s", dbgmsg.c_str()); 276 | } 277 | 278 | /* Make sure inheritable set is preserved across execve via the modified ambient set */ 279 | dbgmsg.clear(); 280 | for (const auto& cap : nsjconf->caps) { 281 | if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, (unsigned long)cap, 0UL, 0UL) == 282 | -1) { 283 | PLOG_W("prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, %s)", 284 | capToStr(cap).c_str()); 285 | } else { 286 | dbgmsg.append(" ").append(capToStr(cap)); 287 | } 288 | } 289 | LOG_D("Added the following capabilities to the ambient set:%s", dbgmsg.c_str()); 290 | 291 | return true; 292 | } 293 | 294 | } // namespace caps 295 | -------------------------------------------------------------------------------- /caps.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - capability-related operations 4 | ----------------------------------------- 5 | 6 | Copyright 2017 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #ifndef NS_CAPS_H 23 | #define NS_CAPS_H 24 | 25 | #include 26 | #include 27 | 28 | #include "nsjail.h" 29 | 30 | namespace caps { 31 | 32 | int nameToVal(const char* name); 33 | bool initNs(nsjconf_t* nsjconf); 34 | 35 | } // namespace caps 36 | 37 | #endif /* NS_CAPS_H */ 38 | -------------------------------------------------------------------------------- /cgroup.cc: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - cgroup namespacing 4 | ----------------------------------------- 5 | 6 | Copyright 2014 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #include "cgroup.h" 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | #include 34 | 35 | #include "logs.h" 36 | #include "util.h" 37 | 38 | namespace cgroup { 39 | 40 | static bool createCgroup(const std::string& cgroup_path, pid_t pid) { 41 | LOG_D("Create %s for pid=%d", QC(cgroup_path), (int)pid); 42 | if (mkdir(cgroup_path.c_str(), 0700) == -1 && errno != EEXIST) { 43 | PLOG_W("mkdir(%s, 0700) failed", QC(cgroup_path)); 44 | return false; 45 | } 46 | return true; 47 | } 48 | 49 | static bool writeToCgroup( 50 | const std::string& cgroup_path, const std::string& value, const std::string& what) { 51 | LOG_D("Setting %s to '%s'", QC(cgroup_path), value.c_str()); 52 | if (!util::writeBufToFile( 53 | cgroup_path.c_str(), value.c_str(), value.length(), O_WRONLY | O_CLOEXEC)) { 54 | LOG_W("Could not update %s", what.c_str()); 55 | return false; 56 | } 57 | return true; 58 | } 59 | 60 | static bool addPidToTaskList(const std::string& cgroup_path, pid_t pid) { 61 | std::string pid_str = std::to_string(pid); 62 | std::string tasks_path = cgroup_path + "/tasks"; 63 | LOG_D("Adding pid='%s' to %s", pid_str.c_str(), QC(tasks_path)); 64 | return writeToCgroup(tasks_path, pid_str, "'" + tasks_path + "' task list"); 65 | } 66 | 67 | static bool initNsFromParentMem(nsjconf_t* nsjconf, pid_t pid) { 68 | size_t memsw_max = nsjconf->cgroup_mem_memsw_max; 69 | if (nsjconf->cgroup_mem_swap_max >= (ssize_t)0) { 70 | memsw_max = nsjconf->cgroup_mem_swap_max + nsjconf->cgroup_mem_max; 71 | } 72 | 73 | if (nsjconf->cgroup_mem_max == (size_t)0 && memsw_max == (size_t)0) { 74 | return true; 75 | } 76 | 77 | std::string mem_cgroup_path = nsjconf->cgroup_mem_mount + '/' + nsjconf->cgroup_mem_parent + 78 | "/NSJAIL." + std::to_string(pid); 79 | RETURN_ON_FAILURE(createCgroup(mem_cgroup_path, pid)); 80 | 81 | /* 82 | * Use OOM-killer instead of making processes hang/sleep 83 | */ 84 | RETURN_ON_FAILURE(writeToCgroup( 85 | mem_cgroup_path + "/memory.oom_control", "0", "memory cgroup oom control")); 86 | 87 | if (nsjconf->cgroup_mem_max > (size_t)0) { 88 | std::string mem_max_str = std::to_string(nsjconf->cgroup_mem_max); 89 | RETURN_ON_FAILURE(writeToCgroup(mem_cgroup_path + "/memory.limit_in_bytes", 90 | mem_max_str, "memory cgroup max limit")); 91 | } 92 | 93 | if (memsw_max > (size_t)0) { 94 | std::string mem_memsw_max_str = std::to_string(memsw_max); 95 | RETURN_ON_FAILURE(writeToCgroup(mem_cgroup_path + "/memory.memsw.limit_in_bytes", 96 | mem_memsw_max_str, "memory+Swap cgroup max limit")); 97 | } 98 | 99 | return addPidToTaskList(mem_cgroup_path, pid); 100 | } 101 | 102 | static bool initNsFromParentPids(nsjconf_t* nsjconf, pid_t pid) { 103 | if (nsjconf->cgroup_pids_max == 0U) { 104 | return true; 105 | } 106 | 107 | std::string pids_cgroup_path = nsjconf->cgroup_pids_mount + '/' + 108 | nsjconf->cgroup_pids_parent + "/NSJAIL." + 109 | std::to_string(pid); 110 | RETURN_ON_FAILURE(createCgroup(pids_cgroup_path, pid)); 111 | 112 | std::string pids_max_str = std::to_string(nsjconf->cgroup_pids_max); 113 | RETURN_ON_FAILURE( 114 | writeToCgroup(pids_cgroup_path + "/pids.max", pids_max_str, "pids cgroup max limit")); 115 | 116 | return addPidToTaskList(pids_cgroup_path, pid); 117 | } 118 | 119 | static bool initNsFromParentNetCls(nsjconf_t* nsjconf, pid_t pid) { 120 | if (nsjconf->cgroup_net_cls_classid == 0U) { 121 | return true; 122 | } 123 | 124 | std::string net_cls_cgroup_path = nsjconf->cgroup_net_cls_mount + '/' + 125 | nsjconf->cgroup_net_cls_parent + "/NSJAIL." + 126 | std::to_string(pid); 127 | RETURN_ON_FAILURE(createCgroup(net_cls_cgroup_path, pid)); 128 | 129 | std::string net_cls_classid_str; 130 | { 131 | std::stringstream ss; 132 | ss << "0x" << std::hex << nsjconf->cgroup_net_cls_classid; 133 | net_cls_classid_str = ss.str(); 134 | } 135 | RETURN_ON_FAILURE(writeToCgroup(net_cls_cgroup_path + "/net_cls.classid", 136 | net_cls_classid_str, "net_cls cgroup classid")); 137 | 138 | return addPidToTaskList(net_cls_cgroup_path, pid); 139 | } 140 | 141 | static bool initNsFromParentCpu(nsjconf_t* nsjconf, pid_t pid) { 142 | if (nsjconf->cgroup_cpu_ms_per_sec == 0U) { 143 | return true; 144 | } 145 | 146 | std::string cpu_cgroup_path = nsjconf->cgroup_cpu_mount + '/' + nsjconf->cgroup_cpu_parent + 147 | "/NSJAIL." + std::to_string(pid); 148 | RETURN_ON_FAILURE(createCgroup(cpu_cgroup_path, pid)); 149 | 150 | RETURN_ON_FAILURE( 151 | writeToCgroup(cpu_cgroup_path + "/cpu.cfs_period_us", "1000000", "cpu period")); 152 | 153 | std::string cpu_ms_per_sec_str = std::to_string(nsjconf->cgroup_cpu_ms_per_sec * 1000U); 154 | RETURN_ON_FAILURE( 155 | writeToCgroup(cpu_cgroup_path + "/cpu.cfs_quota_us", cpu_ms_per_sec_str, "cpu quota")); 156 | 157 | return addPidToTaskList(cpu_cgroup_path, pid); 158 | } 159 | 160 | bool initNsFromParent(nsjconf_t* nsjconf, pid_t pid) { 161 | RETURN_ON_FAILURE(initNsFromParentMem(nsjconf, pid)); 162 | RETURN_ON_FAILURE(initNsFromParentPids(nsjconf, pid)); 163 | RETURN_ON_FAILURE(initNsFromParentNetCls(nsjconf, pid)); 164 | return initNsFromParentCpu(nsjconf, pid); 165 | } 166 | 167 | static void removeCgroup(const std::string& cgroup_path) { 168 | LOG_D("Remove %s", QC(cgroup_path)); 169 | if (rmdir(cgroup_path.c_str()) == -1) { 170 | PLOG_W("rmdir(%s) failed", QC(cgroup_path)); 171 | } 172 | } 173 | 174 | void finishFromParent(nsjconf_t* nsjconf, pid_t pid) { 175 | if (nsjconf->cgroup_mem_max != (size_t)0 || nsjconf->cgroup_mem_memsw_max != (size_t)0) { 176 | std::string mem_cgroup_path = nsjconf->cgroup_mem_mount + '/' + 177 | nsjconf->cgroup_mem_parent + "/NSJAIL." + 178 | std::to_string(pid); 179 | removeCgroup(mem_cgroup_path); 180 | } 181 | if (nsjconf->cgroup_pids_max != 0U) { 182 | std::string pids_cgroup_path = nsjconf->cgroup_pids_mount + '/' + 183 | nsjconf->cgroup_pids_parent + "/NSJAIL." + 184 | std::to_string(pid); 185 | removeCgroup(pids_cgroup_path); 186 | } 187 | if (nsjconf->cgroup_net_cls_classid != 0U) { 188 | std::string net_cls_cgroup_path = nsjconf->cgroup_net_cls_mount + '/' + 189 | nsjconf->cgroup_net_cls_parent + "/NSJAIL." + 190 | std::to_string(pid); 191 | removeCgroup(net_cls_cgroup_path); 192 | } 193 | if (nsjconf->cgroup_cpu_ms_per_sec != 0U) { 194 | std::string cpu_cgroup_path = nsjconf->cgroup_cpu_mount + '/' + 195 | nsjconf->cgroup_cpu_parent + "/NSJAIL." + 196 | std::to_string(pid); 197 | removeCgroup(cpu_cgroup_path); 198 | } 199 | } 200 | 201 | bool initNs(void) { 202 | return true; 203 | } 204 | 205 | } // namespace cgroup 206 | -------------------------------------------------------------------------------- /cgroup.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - cgroup namespacing 4 | ----------------------------------------- 5 | 6 | Copyright 2014 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #ifndef NS_CGROUP_H 23 | #define NS_CGROUP_H 24 | 25 | #include 26 | #include 27 | 28 | #include "nsjail.h" 29 | 30 | namespace cgroup { 31 | 32 | bool initNsFromParent(nsjconf_t* nsjconf, pid_t pid); 33 | bool initNs(void); 34 | void finishFromParent(nsjconf_t* nsjconf, pid_t pid); 35 | 36 | } // namespace cgroup 37 | 38 | #endif /* _CGROUP_H */ 39 | -------------------------------------------------------------------------------- /cgroup2.cc: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - cgroup2 namespacing 4 | ----------------------------------------- 5 | 6 | Copyright 2014 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #include "cgroup2.h" 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | #include 36 | #include 37 | #include 38 | 39 | #include "logs.h" 40 | #include "util.h" 41 | 42 | namespace cgroup2 { 43 | 44 | static bool addPidToProcList(const std::string &cgroup_path, pid_t pid) { 45 | std::string pid_str = std::to_string(pid); 46 | 47 | LOG_D("Adding pid='%s' to cgroup.procs", pid_str.c_str()); 48 | if (!util::writeBufToFile((cgroup_path + "/cgroup.procs").c_str(), pid_str.c_str(), 49 | pid_str.length(), O_WRONLY)) { 50 | LOG_W("Could not update cgroup.procs"); 51 | return false; 52 | } 53 | return true; 54 | } 55 | 56 | static std::string getCgroupPath(nsjconf_t *nsjconf, pid_t pid) { 57 | return nsjconf->cgroupv2_mount + "/NSJAIL." + std::to_string(pid); 58 | } 59 | static std::string getJailCgroupPath(nsjconf_t *nsjconf) { 60 | return nsjconf->cgroupv2_mount + "/NSJAIL_SELF." + std::to_string(getpid()); 61 | } 62 | 63 | static bool createCgroup(const std::string &cgroup_path, pid_t pid) { 64 | LOG_D("Create '%s' for pid=%d", cgroup_path.c_str(), (int)pid); 65 | if (mkdir(cgroup_path.c_str(), 0700) == -1 && errno != EEXIST) { 66 | PLOG_W("mkdir('%s', 0700) failed", cgroup_path.c_str()); 67 | return false; 68 | } 69 | return true; 70 | } 71 | 72 | static bool moveSelfIntoChildCgroup(nsjconf_t *nsjconf) { 73 | /* 74 | * Move ourselves into another group to avoid the 'No internal processes' rule 75 | * https://unix.stackexchange.com/a/713343 76 | */ 77 | std::string jail_cgroup_path = getJailCgroupPath(nsjconf); 78 | LOG_I("nsjail is moving itself to a new child cgroup: %s\n", jail_cgroup_path.c_str()); 79 | RETURN_ON_FAILURE(createCgroup(jail_cgroup_path, getpid())); 80 | RETURN_ON_FAILURE(addPidToProcList(jail_cgroup_path, 0)); 81 | return true; 82 | } 83 | 84 | static bool enableCgroupSubtree(nsjconf_t *nsjconf, const std::string &controller, pid_t pid) { 85 | std::string cgroup_path = nsjconf->cgroupv2_mount; 86 | LOG_D("Enable cgroup.subtree_control +'%s' to '%s' for pid=%d", controller.c_str(), 87 | cgroup_path.c_str(), pid); 88 | std::string val = "+" + controller; 89 | 90 | /* 91 | * Try once without moving the nsjail process and if that fails then try moving the nsjail 92 | * process into a child cgroup before trying a second time. 93 | */ 94 | if (util::writeBufToFile((cgroup_path + "/cgroup.subtree_control").c_str(), val.c_str(), 95 | val.length(), O_WRONLY, false)) { 96 | return true; 97 | } 98 | if (errno == EBUSY) { 99 | RETURN_ON_FAILURE(moveSelfIntoChildCgroup(nsjconf)); 100 | if (util::writeBufToFile((cgroup_path + "/cgroup.subtree_control").c_str(), 101 | val.c_str(), val.length(), O_WRONLY)) { 102 | return true; 103 | } 104 | } 105 | LOG_E( 106 | "Could not apply '%s' to cgroup.subtree_control in '%s'. nsjail MUST be run from root " 107 | "and the cgroup mount path must refer to the root/host cgroup to use cgroupv2. If you " 108 | "use Docker, you may need to run the container with --cgroupns=host so that nsjail can" 109 | " access the host/root cgroupv2 hierarchy. An alternative is mounting (or remounting) " 110 | "the cgroupv2 filesystem but using the flag is just simpler.", 111 | val.c_str(), cgroup_path.c_str()); 112 | return false; 113 | } 114 | 115 | static bool writeToCgroup( 116 | const std::string &cgroup_path, const std::string &resource, const std::string &value) { 117 | LOG_I("Setting '%s' to '%s'", resource.c_str(), value.c_str()); 118 | 119 | if (!util::writeBufToFile( 120 | (cgroup_path + "/" + resource).c_str(), value.c_str(), value.length(), O_WRONLY)) { 121 | LOG_W("Could not update %s", resource.c_str()); 122 | return false; 123 | } 124 | return true; 125 | } 126 | 127 | static void removeCgroup(const std::string &cgroup_path) { 128 | LOG_D("Remove '%s'", cgroup_path.c_str()); 129 | if (rmdir(cgroup_path.c_str()) == -1) { 130 | PLOG_W("rmdir('%s') failed", cgroup_path.c_str()); 131 | } 132 | } 133 | 134 | static bool needMemoryController(nsjconf_t *nsjconf) { 135 | /* 136 | * Check if we need 'memory' 137 | * This matches the check in initNsFromParentMem() 138 | */ 139 | ssize_t swap_max = nsjconf->cgroup_mem_swap_max; 140 | if (nsjconf->cgroup_mem_memsw_max > (size_t)0) { 141 | swap_max = nsjconf->cgroup_mem_memsw_max - nsjconf->cgroup_mem_max; 142 | } 143 | if (nsjconf->cgroup_mem_max == (size_t)0 && swap_max < (ssize_t)0) { 144 | return false; 145 | } 146 | return true; 147 | } 148 | 149 | static bool needPidsController(nsjconf_t *nsjconf) { 150 | return nsjconf->cgroup_pids_max != 0; 151 | } 152 | 153 | static bool needCpuController(nsjconf_t *nsjconf) { 154 | return nsjconf->cgroup_cpu_ms_per_sec != 0U; 155 | } 156 | 157 | /* 158 | * We will use this buf to read from cgroup.subtree_control to see if 159 | * the root cgroup has the necessary controllers listed 160 | */ 161 | #define SUBTREE_CONTROL_BUF_LEN 0x40 162 | 163 | bool setup(nsjconf_t *nsjconf) { 164 | /* 165 | * Read from cgroup.subtree_control in the root to see if 166 | * the controllers we need are there. 167 | */ 168 | auto p = nsjconf->cgroupv2_mount + "/cgroup.subtree_control"; 169 | char buf[SUBTREE_CONTROL_BUF_LEN]; 170 | int read = util::readFromFile(p.c_str(), buf, SUBTREE_CONTROL_BUF_LEN - 1); 171 | if (read < 0) { 172 | LOG_W("cgroupv2 setup: Could not read root subtree_control"); 173 | return false; 174 | } 175 | buf[read] = 0; 176 | 177 | /* Are the controllers we need there? */ 178 | bool subtree_ok = (!needMemoryController(nsjconf) || strstr(buf, "memory")) && 179 | (!needPidsController(nsjconf) || strstr(buf, "pids")) && 180 | (!needCpuController(nsjconf) || strstr(buf, "cpu")); 181 | if (!subtree_ok) { 182 | /* Now we can write to the root cgroup.subtree_control */ 183 | if (needMemoryController(nsjconf)) { 184 | RETURN_ON_FAILURE(enableCgroupSubtree(nsjconf, "memory", getpid())); 185 | } 186 | 187 | if (needPidsController(nsjconf)) { 188 | RETURN_ON_FAILURE(enableCgroupSubtree(nsjconf, "pids", getpid())); 189 | } 190 | 191 | if (needCpuController(nsjconf)) { 192 | RETURN_ON_FAILURE(enableCgroupSubtree(nsjconf, "cpu", getpid())); 193 | } 194 | } 195 | return true; 196 | } 197 | 198 | bool detectCgroupv2(nsjconf_t *nsjconf) { 199 | /* 200 | * Check cgroupv2_mount, if it is a cgroup2 mount, use it. 201 | */ 202 | struct statfs buf; 203 | if (statfs(nsjconf->cgroupv2_mount.c_str(), &buf)) { 204 | LOG_D("statfs %s failed with %d", nsjconf->cgroupv2_mount.c_str(), errno); 205 | nsjconf->use_cgroupv2 = false; 206 | return false; 207 | } 208 | nsjconf->use_cgroupv2 = (buf.f_type == CGROUP2_SUPER_MAGIC); 209 | return true; 210 | } 211 | 212 | static bool initNsFromParentMem(nsjconf_t *nsjconf, pid_t pid) { 213 | ssize_t swap_max = nsjconf->cgroup_mem_swap_max; 214 | if (nsjconf->cgroup_mem_memsw_max > (size_t)0) { 215 | swap_max = nsjconf->cgroup_mem_memsw_max - nsjconf->cgroup_mem_max; 216 | } 217 | 218 | if (nsjconf->cgroup_mem_max == (size_t)0 && swap_max < (ssize_t)0) { 219 | return true; 220 | } 221 | 222 | std::string cgroup_path = getCgroupPath(nsjconf, pid); 223 | RETURN_ON_FAILURE(createCgroup(cgroup_path, pid)); 224 | RETURN_ON_FAILURE(addPidToProcList(cgroup_path, pid)); 225 | 226 | if (nsjconf->cgroup_mem_max > (size_t)0) { 227 | RETURN_ON_FAILURE(writeToCgroup( 228 | cgroup_path, "memory.max", std::to_string(nsjconf->cgroup_mem_max))); 229 | } 230 | 231 | if (swap_max >= (ssize_t)0) { 232 | RETURN_ON_FAILURE( 233 | writeToCgroup(cgroup_path, "memory.swap.max", std::to_string(swap_max))); 234 | } 235 | 236 | return true; 237 | } 238 | 239 | static bool initNsFromParentPids(nsjconf_t *nsjconf, pid_t pid) { 240 | if (nsjconf->cgroup_pids_max == 0U) { 241 | return true; 242 | } 243 | std::string cgroup_path = getCgroupPath(nsjconf, pid); 244 | RETURN_ON_FAILURE(createCgroup(cgroup_path, pid)); 245 | RETURN_ON_FAILURE(addPidToProcList(cgroup_path, pid)); 246 | return writeToCgroup(cgroup_path, "pids.max", std::to_string(nsjconf->cgroup_pids_max)); 247 | } 248 | 249 | static bool initNsFromParentCpu(nsjconf_t *nsjconf, pid_t pid) { 250 | if (nsjconf->cgroup_cpu_ms_per_sec == 0U) { 251 | return true; 252 | } 253 | 254 | std::string cgroup_path = getCgroupPath(nsjconf, pid); 255 | RETURN_ON_FAILURE(createCgroup(cgroup_path, pid)); 256 | RETURN_ON_FAILURE(addPidToProcList(cgroup_path, pid)); 257 | 258 | /* 259 | * The maximum bandwidth limit in the format: `$MAX $PERIOD`. 260 | * This indicates that the group may consume up to $MAX in each $PERIOD 261 | * duration. 262 | */ 263 | std::string cpu_ms_per_sec_str = std::to_string(nsjconf->cgroup_cpu_ms_per_sec * 1000U); 264 | cpu_ms_per_sec_str += " 1000000"; 265 | return writeToCgroup(cgroup_path, "cpu.max", cpu_ms_per_sec_str); 266 | } 267 | 268 | bool initNsFromParent(nsjconf_t *nsjconf, pid_t pid) { 269 | RETURN_ON_FAILURE(initNsFromParentMem(nsjconf, pid)); 270 | RETURN_ON_FAILURE(initNsFromParentPids(nsjconf, pid)); 271 | return initNsFromParentCpu(nsjconf, pid); 272 | } 273 | 274 | void finishFromParent(nsjconf_t *nsjconf, pid_t pid) { 275 | if (nsjconf->cgroup_mem_max != (size_t)0 || nsjconf->cgroup_pids_max != 0U || 276 | nsjconf->cgroup_cpu_ms_per_sec != 0U) { 277 | removeCgroup(getCgroupPath(nsjconf, pid)); 278 | } 279 | } 280 | 281 | } // namespace cgroup2 282 | -------------------------------------------------------------------------------- /cgroup2.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - cgroup2 namespacing 4 | ----------------------------------------- 5 | 6 | Copyright 2014 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #ifndef NS_CGROUP2_H 23 | #define NS_CGROUP2_H 24 | 25 | #include 26 | #include 27 | 28 | #include "nsjail.h" 29 | 30 | namespace cgroup2 { 31 | 32 | bool initNsFromParent(nsjconf_t* nsjconf, pid_t pid); 33 | bool initNs(void); 34 | void finishFromParent(nsjconf_t* nsjconf, pid_t pid); 35 | bool setup(nsjconf_t* nsjconf); 36 | bool detectCgroupv2(nsjconf_t* nsjconf); 37 | 38 | } // namespace cgroup2 39 | 40 | #endif /* _CGROUP2_H */ 41 | -------------------------------------------------------------------------------- /cmdline.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - cmdline parsing 4 | ----------------------------------------- 5 | 6 | Copyright 2014 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #ifndef NS_CMDLINE_H 23 | #define NS_CMDLINE_H 24 | 25 | #include 26 | 27 | #include 28 | #include 29 | 30 | #include "nsjail.h" 31 | 32 | namespace cmdline { 33 | 34 | uint64_t parseRLimit(int res, const char* optarg, unsigned long mul); 35 | void logParams(nsjconf_t* nsjconf); 36 | void addEnv(nsjconf_t* nsjconf, const std::string& env); 37 | std::unique_ptr parseArgs(int argc, char* argv[]); 38 | 39 | } // namespace cmdline 40 | 41 | #endif /* _CMDLINE_H */ 42 | -------------------------------------------------------------------------------- /config.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - config parsing 4 | ----------------------------------------- 5 | 6 | Copyright 2017 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #ifndef NS_CONFIG_H 23 | #define NS_CONFIG_H 24 | 25 | #include 26 | 27 | #include "nsjail.h" 28 | 29 | namespace config { 30 | 31 | bool parseFile(nsjconf_t* nsjconf, const char* file); 32 | 33 | } // namespace config 34 | 35 | #endif /* NS_CONFIG_H */ 36 | -------------------------------------------------------------------------------- /config.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package nsjail; 4 | 5 | enum Mode { 6 | LISTEN = 0; /* Listening on a TCP port */ 7 | ONCE = 1; /* Running the command once only */ 8 | RERUN = 2; /* Re-executing the command (forever) */ 9 | EXECVE = 3; /* Executing command w/o the supervisor */ 10 | } 11 | /* Should be self explanatory */ 12 | enum LogLevel { 13 | DEBUG = 0; /* Equivalent to the '-v' cmd-line option */ 14 | INFO = 1; /* Default level */ 15 | WARNING = 2; /* Equivalent to the '-q' cmd-line option */ 16 | ERROR = 3; 17 | FATAL = 4; 18 | } 19 | message IdMap { 20 | /* Empty string means "current uid/gid" */ 21 | optional string inside_id = 1 [default = ""]; 22 | optional string outside_id = 2 [default = ""]; 23 | /* See 'man user_namespaces' for the meaning of count */ 24 | optional uint32 count = 3 [default = 1]; 25 | /* Does this map use /usr/bin/new[u|g]idmap binary? */ 26 | optional bool use_newidmap = 4 [default = false]; 27 | } 28 | message MountPt { 29 | /* Can be skipped for filesystems like 'proc' */ 30 | optional string src = 1 [default = ""]; 31 | /* Should 'src' path be prefixed with this envar? */ 32 | optional string prefix_src_env = 2 [default = ""]; 33 | /* If specified, contains buffer that will be written to the dst file */ 34 | optional bytes src_content = 3 [default = ""]; 35 | /* Mount point inside jail */ 36 | required string dst = 4 [default = ""]; 37 | /* Should 'dst' path be prefixed with this envar? */ 38 | optional string prefix_dst_env = 5 [default = ""]; 39 | /* Can be empty for mount --bind mounts */ 40 | optional string fstype = 6 [default = ""]; 41 | /* E.g. size=5000000 for 'tmpfs' */ 42 | optional string options = 7 [default = ""]; 43 | /* Is it a 'mount --bind src dst' type of mount? */ 44 | optional bool is_bind = 8 [default = false]; 45 | /* Is it a R/W mount? */ 46 | optional bool rw = 9 [default = false]; 47 | /* Is it a directory? If not specified an internal 48 | heuristics will be used to determine that */ 49 | optional bool is_dir = 10; 50 | /* Should the sandboxing fail if we cannot mount this resource? */ 51 | optional bool mandatory = 11 [default = true]; 52 | /* Is it a symlink (instead of real mount point)? */ 53 | optional bool is_symlink = 12 [default = false]; 54 | /* Is it a nosuid mount */ 55 | optional bool nosuid = 13 [default = false]; 56 | /* Is it a nodev mount */ 57 | optional bool nodev = 14 [default = false]; 58 | /* Is it a noexec mount */ 59 | optional bool noexec = 15 [default = false]; 60 | } 61 | enum RLimit { 62 | VALUE = 0; /* Use the provided value */ 63 | SOFT = 1; /* Use the current soft rlimit */ 64 | HARD = 2; /* Use the current hard rlimit */ 65 | INF = 3; /* Use RLIM64_INFINITY */ 66 | } 67 | message Exe { 68 | /* Will be used both as execv's path and as argv[0] */ 69 | required string path = 1; 70 | /* This will be argv[1] and so on.. */ 71 | repeated string arg = 2; 72 | /* Override argv[0] */ 73 | optional string arg0 = 3; 74 | /* Should execveat() be used to execute a file-descriptor instead? */ 75 | optional bool exec_fd = 4 [default = false]; 76 | } 77 | message NsJailConfig { 78 | /* Optional name and description for this config */ 79 | optional string name = 1 [default = ""]; 80 | repeated string description = 2; 81 | 82 | /* Execution mode: see 'msg Mode' description for more */ 83 | optional Mode mode = 3 [default = ONCE]; 84 | /* Hostname inside jail */ 85 | optional string hostname = 4 [default = "NSJAIL"]; 86 | /* Initial current working directory for the binary */ 87 | optional string cwd = 5 [default = "/"]; 88 | 89 | /* Defines whether to use switch_root or pivot_root */ 90 | optional bool no_pivotroot = 6 [default = false]; 91 | 92 | /* TCP port to listen to. Valid with mode=LISTEN only */ 93 | optional uint32 port = 7 [default = 0]; 94 | /* Host to bind to for mode=LISTEN. Must be in IPv6 format */ 95 | optional string bindhost = 8 [default = "::"]; 96 | /* For mode=LISTEN, maximum number of connections across all IPs */ 97 | optional uint32 max_conns = 9 [default = 0]; 98 | /* For mode=LISTEN, maximum number of connections from a single IP */ 99 | optional uint32 max_conns_per_ip = 10 [default = 0]; 100 | 101 | /* Wall-time time limit for commands */ 102 | optional uint32 time_limit = 11 [default = 600]; 103 | /* Should nsjail go into background? */ 104 | optional bool daemon = 12 [default = false]; 105 | /* Maximum number of CPUs to use: 0 - no limit */ 106 | optional uint32 max_cpus = 13 [default = 0]; 107 | /* Niceness level of the jailed process */ 108 | optional int32 nice_level = 14 [default = 19]; 109 | 110 | /* FD to log to. */ 111 | optional int32 log_fd = 15; 112 | /* File to save logs to. */ 113 | optional string log_file = 16; 114 | /* Minimum log level displayed. 115 | See 'msg LogLevel' description for more */ 116 | optional LogLevel log_level = 17; 117 | 118 | /* Should the current environment variables be kept 119 | when executing the binary */ 120 | optional bool keep_env = 18 [default = false]; 121 | /* EnvVars to be set before executing binaries. If the envar doesn't contain '=' 122 | (e.g. just the 'DISPLAY' string), the current envar value will be used */ 123 | repeated string envar = 19; 124 | 125 | /* Should capabilities be preserved or dropped */ 126 | optional bool keep_caps = 20 [default = false]; 127 | /* Which capabilities should be preserved if keep_caps == false. 128 | Format: "CAP_SYS_PTRACE" */ 129 | repeated string cap = 21; 130 | /* Should nsjail close FD=0,1,2 before executing the process */ 131 | optional bool silent = 22 [default = false]; 132 | /* Should the child process have control over terminal? 133 | Can be useful to allow /bin/sh to provide 134 | job control / signals. Dangerous, can be used to put 135 | characters into the controlling terminal back */ 136 | optional bool skip_setsid = 23 [default = false]; 137 | /* Redirect sdterr of the process to /dev/null instead of the socket or original TTY */ 138 | optional bool stderr_to_null = 24 [default = false]; 139 | /* Which FDs should be passed to the newly executed process 140 | By default only FD=0,1,2 are passed */ 141 | repeated int32 pass_fd = 25; 142 | /* Setting it to true will allow to have set-uid binaries 143 | inside the jail */ 144 | optional bool disable_no_new_privs = 26 [default = false]; 145 | /* Set this to true to forward fatal signals to the child process instead 146 | * of always using SIGKILL. */ 147 | optional bool forward_signals = 27 [default = false]; 148 | /* Disable rdtsc and rdtscp instructions. WARNING: To make it effective, you also need to 149 | * forbid `prctl(PR_SET_TSC, PR_TSC_ENABLE, ...)` in seccomp rules! (x86 and x86_64 only). 150 | * Dynamic binaries produced by GCC seem to rely on RDTSC, but static ones should work. */ 151 | optional bool disable_tsc = 28 [default = false]; 152 | 153 | /* Various rlimits, the rlimit_as/rlimit_core/... are used only if 154 | rlimit_as_type/rlimit_core_type/... are set to RLimit::VALUE */ 155 | optional uint64 rlimit_as = 29 [default = 4096]; /* In MiB */ 156 | optional RLimit rlimit_as_type = 30 [default = VALUE]; 157 | optional uint64 rlimit_core = 31 [default = 0]; /* In MiB */ 158 | optional RLimit rlimit_core_type = 32 [default = VALUE]; 159 | optional uint64 rlimit_cpu = 33 [default = 600]; /* In seconds */ 160 | optional RLimit rlimit_cpu_type = 34 [default = VALUE]; 161 | optional uint64 rlimit_fsize = 35 [default = 1]; /* In MiB */ 162 | optional RLimit rlimit_fsize_type = 36 [default = VALUE]; 163 | optional uint64 rlimit_nofile = 37 [default = 32]; 164 | optional RLimit rlimit_nofile_type = 38 [default = VALUE]; 165 | /* RLIMIT_NPROC is system-wide - tricky to use; use the soft limit value by 166 | * default here */ 167 | optional uint64 rlimit_nproc = 39 [default = 1024]; 168 | optional RLimit rlimit_nproc_type = 40 [default = SOFT]; 169 | /* In MiB, use the soft limit value by default */ 170 | optional uint64 rlimit_stack = 41 [default = 8]; 171 | optional RLimit rlimit_stack_type = 42 [default = SOFT]; 172 | /* In KB, use the soft limit value by default */ 173 | optional uint64 rlimit_memlock = 43 [default = 64]; 174 | optional RLimit rlimit_memlock_type = 44 [default = SOFT]; 175 | optional uint64 rlimit_rtprio = 45 [default = 0]; 176 | optional RLimit rlimit_rtprio_type = 46 [default = SOFT]; 177 | optional uint64 rlimit_msgqueue = 47 [default = 1024]; /* In bytes */ 178 | optional RLimit rlimit_msgqueue_type = 48 [default = SOFT]; 179 | 180 | /* Disable all rlimits, default to limits set by parent */ 181 | optional bool disable_rl = 49 [default = false]; 182 | 183 | /* See 'man personality' for more */ 184 | optional bool persona_addr_compat_layout = 50 [default = false]; 185 | optional bool persona_mmap_page_zero = 51 [default = false]; 186 | optional bool persona_read_implies_exec = 52 [default = false]; 187 | optional bool persona_addr_limit_3gb = 53 [default = false]; 188 | optional bool persona_addr_no_randomize = 54 [default = false]; 189 | 190 | /* Which name-spaces should be used? */ 191 | optional bool clone_newnet = 55 [default = true]; 192 | optional bool clone_newuser = 56 [default = true]; 193 | optional bool clone_newns = 57 [default = true]; 194 | optional bool clone_newpid = 58 [default = true]; 195 | optional bool clone_newipc = 59 [default = true]; 196 | optional bool clone_newuts = 60 [default = true]; 197 | /* Disable for kernel versions < 4.6 as it's not supported there */ 198 | optional bool clone_newcgroup = 61 [default = true]; 199 | /* Supported with kernel versions >= 5.3 */ 200 | optional bool clone_newtime = 62 [default = false]; 201 | 202 | /* Mappings for UIDs and GIDs. See the description for 'msg IdMap' 203 | for more */ 204 | repeated IdMap uidmap = 63; 205 | repeated IdMap gidmap = 64; 206 | 207 | /* Should /proc be mounted (R/O)? This can also be added in the 'mount' 208 | section below */ 209 | optional bool mount_proc = 65 [default = false]; 210 | /* Mount points inside the jail. See the description for 'msg MountPt' 211 | for more */ 212 | repeated MountPt mount = 66; 213 | 214 | /* Kafel seccomp-bpf policy file or a string: 215 | Homepage of the project: https://github.com/google/kafel */ 216 | optional string seccomp_policy_file = 67; 217 | repeated string seccomp_string = 68; 218 | /* Setting it to true makes audit write seccomp logs to dmesg */ 219 | optional bool seccomp_log = 69 [default = false]; 220 | 221 | /* If > 0, maximum cumulative size of RAM used inside any jail */ 222 | optional uint64 cgroup_mem_max = 70 [default = 0]; /* In bytes */ 223 | /* If > 0, maximum cumulative size of RAM + swap used inside any jail */ 224 | optional uint64 cgroup_mem_memsw_max = 71 [default = 0]; /* In bytes */ 225 | /* If >= 0, maximum cumulative size of swap used inside any jail */ 226 | optional int64 cgroup_mem_swap_max = 72 [default = -1]; /* In bytes */ 227 | /* Mount point for cgroups-memory in your system */ 228 | optional string cgroup_mem_mount = 73 [default = "/sys/fs/cgroup/memory"]; 229 | /* Writeable directory (for the nsjail user) under cgroup_mem_mount */ 230 | optional string cgroup_mem_parent = 74 [default = "NSJAIL"]; 231 | 232 | /* If > 0, maximum number of PIDs (threads/processes) inside jail */ 233 | optional uint64 cgroup_pids_max = 75 [default = 0]; 234 | /* Mount point for cgroups-pids in your system */ 235 | optional string cgroup_pids_mount = 76 [default = "/sys/fs/cgroup/pids"]; 236 | /* Writeable directory (for the nsjail user) under cgroup_pids_mount */ 237 | optional string cgroup_pids_parent = 77 [default = "NSJAIL"]; 238 | 239 | /* If > 0, Class identifier of network packets inside jail */ 240 | optional uint32 cgroup_net_cls_classid = 78 [default = 0]; 241 | /* Mount point for cgroups-net-cls in your system */ 242 | optional string cgroup_net_cls_mount = 79 [default = "/sys/fs/cgroup/net_cls"]; 243 | /* Writeable directory (for the nsjail user) under cgroup_net_mount */ 244 | optional string cgroup_net_cls_parent = 80 [default = "NSJAIL"]; 245 | 246 | /* If > 0, number of milliseconds of CPU time per second that jailed processes can use */ 247 | optional uint32 cgroup_cpu_ms_per_sec = 81 [default = 0]; 248 | /* Mount point for cgroups-cpu in your system */ 249 | optional string cgroup_cpu_mount = 82 [default = "/sys/fs/cgroup/cpu"]; 250 | /* Writeable directory (for the nsjail user) under cgroup_cpu_mount */ 251 | optional string cgroup_cpu_parent = 83 [default = "NSJAIL"]; 252 | 253 | /* Mount point for cgroup v2 in your system */ 254 | optional string cgroupv2_mount = 84 [default = "/sys/fs/cgroup"]; 255 | /* Use cgroup v2 */ 256 | optional bool use_cgroupv2 = 85 [default = false]; 257 | /* Check whether cgroupv2 is available, and use it if available. */ 258 | optional bool detect_cgroupv2 = 86 [default = false]; 259 | 260 | /* Should the 'lo' interface be brought up (active) inside this jail? */ 261 | optional bool iface_no_lo = 87 [default = false]; 262 | 263 | /* Put this interface inside the jail */ 264 | repeated string iface_own = 88; 265 | 266 | /* Parameters for the cloned MACVLAN interface inside jail */ 267 | optional string macvlan_iface = 89; /* Interface to be cloned, eg 'eth0' */ 268 | optional string macvlan_vs_ip = 90 [default = "192.168.0.2"]; 269 | optional string macvlan_vs_nm = 91 [default = "255.255.255.0"]; 270 | optional string macvlan_vs_gw = 92 [default = "192.168.0.1"]; 271 | optional string macvlan_vs_ma = 93 [default = ""]; 272 | optional string macvlan_vs_mo = 94 [default = "private"]; 273 | 274 | /* Binary path (with arguments) to be executed. If not specified here, it 275 | can be specified with cmd-line as "-- /path/to/command arg1 arg2" */ 276 | optional Exe exec_bin = 95; 277 | } 278 | -------------------------------------------------------------------------------- /configs/apache.cfg: -------------------------------------------------------------------------------- 1 | # Example config for nsjail 2 | 3 | name: "apache-with-cloned-net" 4 | 5 | description: "Tested under Ubuntu 17.04. Other Linux distros might " 6 | description: "use different locations for the Apache's HTTPD configuration " 7 | description: "files and system libraries" 8 | description: "Run as: sudo ./nsjail --config configs/apache.cfg" 9 | 10 | mode: ONCE 11 | hostname: "APACHE-NSJ" 12 | 13 | rlimit_as: 1024 14 | rlimit_fsize: 1024 15 | rlimit_cpu_type: INF 16 | rlimit_nofile: 64 17 | 18 | time_limit: 0 19 | 20 | cap: "CAP_NET_BIND_SERVICE" 21 | 22 | envar: "APACHE_RUN_DIR=/run/apache2" 23 | envar: "APACHE_PID_FILE=/run/apache2/apache2.pid" 24 | envar: "APACHE_RUN_USER=www-data" 25 | envar: "APACHE_RUN_GROUP=www-data" 26 | envar: "APACHE_LOG_DIR=/run/apache2" 27 | envar: "APACHE_LOCK_DIR=/run/apache2" 28 | 29 | uidmap { 30 | inside_id: "1" 31 | outside_id: "www-data" 32 | } 33 | 34 | gidmap { 35 | inside_id: "1" 36 | outside_id: "www-data" 37 | } 38 | 39 | mount { 40 | src: "/etc/apache2" 41 | dst: "/etc/apache2" 42 | is_bind: true 43 | } 44 | mount { 45 | src: "/etc/mime.types" 46 | dst: "/etc/mime.types" 47 | is_bind: true 48 | } 49 | mount { 50 | src: "/etc/localtime" 51 | dst: "/etc/localtime" 52 | is_bind: true 53 | } 54 | mount { 55 | src_content: "www-data:x:1:1:www-data:/var/www:/bin/false" 56 | dst: "/etc/passwd" 57 | } 58 | mount { 59 | src_content: "www-data:x:1:" 60 | dst: "/etc/group" 61 | } 62 | mount { 63 | dst: "/tmp" 64 | fstype: "tmpfs" 65 | rw: true 66 | } 67 | mount { 68 | dst: "/run/apache2" 69 | fstype: "tmpfs" 70 | rw: true 71 | } 72 | mount { 73 | src: "/dev/urandom" 74 | dst: "/dev/urandom" 75 | is_bind: true 76 | rw: true 77 | } 78 | mount { 79 | dst: "/dev/shm" 80 | fstype: "tmpfs" 81 | rw: true 82 | } 83 | mount { 84 | dst: "/proc" 85 | fstype: "proc" 86 | } 87 | mount { 88 | src: "/lib64" 89 | dst: "/lib64" 90 | is_bind: true 91 | } 92 | mount { 93 | src: "/lib" 94 | dst: "/lib" 95 | is_bind: true 96 | } 97 | mount { 98 | src: "/usr/lib" 99 | dst: "/usr/lib" 100 | is_bind: true 101 | } 102 | mount { 103 | src: "/var/www/html" 104 | dst: "/var/www/html" 105 | is_bind: true 106 | } 107 | mount { 108 | src: "/usr/share/apache2" 109 | dst: "/usr/share/apache2" 110 | is_bind: true 111 | } 112 | mount { 113 | src: "/var/lib/apache2" 114 | dst: "/var/lib/apache2" 115 | is_bind: true 116 | } 117 | mount { 118 | src: "/usr/sbin/apache2" 119 | dst: "/usr/sbin/apache2" 120 | is_bind: true 121 | } 122 | 123 | seccomp_string: " KILL_PROCESS {" 124 | seccomp_string: " ptrace," 125 | seccomp_string: " process_vm_readv," 126 | seccomp_string: " process_vm_writev" 127 | seccomp_string: " }" 128 | seccomp_string: " DEFAULT ALLOW" 129 | 130 | macvlan_iface: "enp0s31f6" 131 | macvlan_vs_ip: "192.168.10.223" 132 | macvlan_vs_nm: "255.255.255.0" 133 | macvlan_vs_gw: "192.168.10.1" 134 | 135 | exec_bin { 136 | path: "/usr/sbin/apache2" 137 | arg : "-DFOREGROUND" 138 | } 139 | -------------------------------------------------------------------------------- /configs/bash-with-fake-geteuid.cfg: -------------------------------------------------------------------------------- 1 | # Example config for nsjail 2 | 3 | name: "bash-with-fake-geteuid" 4 | 5 | description: "An example/demo policy which allows to execute /bin/bash and other commands in " 6 | description: "a fairly restricted jail containing only some directories from the main " 7 | description: "system, and with blocked __NR_syslog syscall. Also, __NR_geteuid returns -1337 " 8 | description: "value, which /usr/bin/id will show as euid=4294965959, and ptrace is blocked " 9 | description: "but returns success, hence strange behavior of the strace command. " 10 | description: "This is an example/demo policy, hence it repeats many default values from the " 11 | description: "https://github.com/google/nsjail/blob/master/config.proto PB schema " 12 | 13 | mode: ONCE 14 | hostname: "JAILED-BASH" 15 | cwd: "/tmp" 16 | 17 | bindhost: "127.0.0.1" 18 | max_conns_per_ip: 10 19 | port: 31337 20 | 21 | time_limit: 100 22 | daemon: false 23 | max_cpus: 1 24 | 25 | keep_env: false 26 | envar: "ENVAR1=VALUE1" 27 | envar: "ENVAR2=VALUE2" 28 | envar: "TERM=linux" 29 | envar: "HOME=/" 30 | envar: "PS1=[\\H:\\t:\\s-\\V:\\w]\\$ " 31 | 32 | keep_caps: true 33 | cap: "CAP_NET_ADMIN" 34 | cap: "CAP_NET_RAW" 35 | silent: false 36 | stderr_to_null: false 37 | skip_setsid: true 38 | pass_fd: 100 39 | pass_fd: 3 40 | disable_no_new_privs: false 41 | 42 | rlimit_as: 128 43 | rlimit_core: 0 44 | rlimit_cpu: 10 45 | rlimit_fsize: 0 46 | rlimit_nofile: 32 47 | rlimit_stack_type: SOFT 48 | rlimit_nproc_type: SOFT 49 | 50 | persona_addr_compat_layout: false 51 | persona_mmap_page_zero: false 52 | persona_read_implies_exec: false 53 | persona_addr_limit_3gb: false 54 | persona_addr_no_randomize: false 55 | 56 | clone_newnet: true 57 | clone_newuser: true 58 | clone_newns: true 59 | clone_newpid: true 60 | clone_newipc: true 61 | clone_newuts: true 62 | clone_newcgroup: true 63 | 64 | uidmap { 65 | inside_id: "0" 66 | outside_id: "" 67 | count: 1 68 | } 69 | 70 | gidmap { 71 | inside_id: "0" 72 | outside_id: "" 73 | count: 1 74 | } 75 | 76 | mount_proc: false 77 | 78 | mount { 79 | src: "/lib" 80 | dst: "/lib" 81 | is_bind: true 82 | rw: false 83 | } 84 | 85 | mount { 86 | src: "/bin" 87 | dst: "/bin" 88 | is_bind: true 89 | rw: false 90 | } 91 | 92 | mount { 93 | src: "/sbin" 94 | dst: "/sbin" 95 | is_bind: true 96 | rw: false 97 | } 98 | 99 | mount { 100 | src: "/usr" 101 | dst: "/usr" 102 | is_bind: true 103 | rw: false 104 | } 105 | 106 | mount { 107 | src: "/lib64" 108 | dst: "/lib64" 109 | is_bind: true 110 | rw: false 111 | mandatory: false 112 | } 113 | 114 | mount { 115 | src: "/lib32" 116 | dst: "/lib32" 117 | is_bind: true 118 | rw: false 119 | mandatory: false 120 | } 121 | 122 | mount { 123 | dst: "/tmp" 124 | fstype: "tmpfs" 125 | rw: true 126 | is_bind: false 127 | noexec: true 128 | nodev: true 129 | nosuid: true 130 | } 131 | 132 | mount { 133 | src: "/dev/null" 134 | dst: "/dev/null" 135 | rw: true 136 | is_bind: true 137 | } 138 | 139 | mount { 140 | dst: "/proc" 141 | fstype: "proc" 142 | rw: false 143 | } 144 | 145 | mount { 146 | src_content: "This file was created dynamically" 147 | dst: "/DYNAMIC_FILE" 148 | } 149 | 150 | mount { 151 | src: "/nonexistent_777" 152 | dst: "/nonexistent_777" 153 | is_bind: true 154 | mandatory: false 155 | } 156 | 157 | mount { 158 | src: "/proc/self/fd" 159 | dst: "/dev/fd" 160 | is_symlink: true 161 | } 162 | 163 | mount { 164 | src: "/some/unimportant/target" 165 | dst: "/proc/no/symlinks/can/be/created/in/proc" 166 | is_symlink: true 167 | mandatory: false 168 | } 169 | 170 | seccomp_string: "ERRNO(1337) { geteuid } " 171 | seccomp_string: "ERRNO(1) { ptrace, sched_setaffinity } " 172 | seccomp_string: "KILL_PROCESS { syslog } " 173 | seccomp_string: "DEFAULT ALLOW " 174 | 175 | exec_bin { 176 | path: "/bin/bash" 177 | arg0: "sh" 178 | arg: "-i" 179 | } 180 | -------------------------------------------------------------------------------- /configs/bash-with-fake-geteuid.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "bash-with-fake-geteuid", 3 | "description": [ 4 | "An example/demo policy which allows to execute /bin/bash and other commands in ", 5 | "a fairly restricted jail containing only some directories from the main ", 6 | "system, and with blocked __NR_syslog syscall. Also, __NR_geteuid returns -1337 ", 7 | "value, which /usr/bin/id will show as euid=4294965959, and ptrace is blocked ", 8 | "but returns success, hence strange behavior of the strace command. ", 9 | "This is an example/demo policy, hence it repeats many default values from the ", 10 | "https://github.com/google/nsjail/blob/master/config.proto PB schema " 11 | ], 12 | "mode": "ONCE", 13 | "hostname": "JAILED-BASH", 14 | "cwd": "/tmp", 15 | "port": 31337, 16 | "bindhost": "127.0.0.1", 17 | "maxConnsPerIp": 10, 18 | "timeLimit": 100, 19 | "daemon": false, 20 | "maxCpus": 1, 21 | "keepEnv": false, 22 | "envar": [ 23 | "ENVAR1=VALUE1", 24 | "ENVAR2=VALUE2", 25 | "TERM=linux", 26 | "HOME=/", 27 | "PS1=[\\H:\\t:\\s-\\V:\\w]\\$ " 28 | ], 29 | "keepCaps": true, 30 | "cap": [ 31 | "CAP_NET_ADMIN", 32 | "CAP_NET_RAW" 33 | ], 34 | "silent": false, 35 | "skipSetsid": true, 36 | "stderrToNull": false, 37 | "passFd": [ 38 | 100, 39 | 3 40 | ], 41 | "disableNoNewPrivs": false, 42 | "rlimitAs": "128", 43 | "rlimitCore": "0", 44 | "rlimitCpu": "10", 45 | "rlimitFsize": "0", 46 | "rlimitNofile": "32", 47 | "rlimitNprocType": "SOFT", 48 | "rlimitStackType": "SOFT", 49 | "personaAddrCompatLayout": false, 50 | "personaMmapPageZero": false, 51 | "personaReadImpliesExec": false, 52 | "personaAddrLimit3gb": false, 53 | "personaAddrNoRandomize": false, 54 | "cloneNewnet": true, 55 | "cloneNewuser": true, 56 | "cloneNewns": true, 57 | "cloneNewpid": true, 58 | "cloneNewipc": true, 59 | "cloneNewuts": true, 60 | "cloneNewcgroup": true, 61 | "uidmap": [ 62 | { 63 | "insideId": "0", 64 | "outsideId": "", 65 | "count": 1 66 | } 67 | ], 68 | "gidmap": [ 69 | { 70 | "insideId": "0", 71 | "outsideId": "", 72 | "count": 1 73 | } 74 | ], 75 | "mountProc": false, 76 | "mount": [ 77 | { 78 | "src": "/lib", 79 | "dst": "/lib", 80 | "isBind": true, 81 | "rw": false 82 | }, 83 | { 84 | "src": "/bin", 85 | "dst": "/bin", 86 | "isBind": true, 87 | "rw": false 88 | }, 89 | { 90 | "src": "/sbin", 91 | "dst": "/sbin", 92 | "isBind": true, 93 | "rw": false 94 | }, 95 | { 96 | "src": "/usr", 97 | "dst": "/usr", 98 | "isBind": true, 99 | "rw": false 100 | }, 101 | { 102 | "src": "/lib64", 103 | "dst": "/lib64", 104 | "isBind": true, 105 | "rw": false, 106 | "mandatory": false 107 | }, 108 | { 109 | "src": "/lib32", 110 | "dst": "/lib32", 111 | "isBind": true, 112 | "rw": false, 113 | "mandatory": false 114 | }, 115 | { 116 | "dst": "/tmp", 117 | "fstype": "tmpfs", 118 | "isBind": false, 119 | "rw": true, 120 | "nosuid": true, 121 | "nodev": true, 122 | "noexec": true 123 | }, 124 | { 125 | "src": "/dev/null", 126 | "dst": "/dev/null", 127 | "isBind": true, 128 | "rw": true 129 | }, 130 | { 131 | "dst": "/proc", 132 | "fstype": "proc", 133 | "rw": false 134 | }, 135 | { 136 | "srcContent": "VGhpcyBmaWxlIHdhcyBjcmVhdGVkIGR5bmFtaWNhbGx5", 137 | "dst": "/DYNAMIC_FILE" 138 | }, 139 | { 140 | "src": "/nonexistent_777", 141 | "dst": "/nonexistent_777", 142 | "isBind": true, 143 | "mandatory": false 144 | }, 145 | { 146 | "src": "/proc/self/fd", 147 | "dst": "/dev/fd", 148 | "isSymlink": true 149 | }, 150 | { 151 | "src": "/some/unimportant/target", 152 | "dst": "/proc/no/symlinks/can/be/created/in/proc", 153 | "mandatory": false, 154 | "isSymlink": true 155 | } 156 | ], 157 | "seccompString": [ 158 | "ERRNO(1337) { geteuid }\t", 159 | "ERRNO(1) { ptrace, sched_setaffinity }\t\t", 160 | "KILL_PROCESS { syslog }\t\t", 161 | "DEFAULT ALLOW\t\t\t" 162 | ], 163 | "execBin": { 164 | "path": "/bin/bash", 165 | "arg": [ 166 | "-i" 167 | ], 168 | "arg0": "sh" 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /configs/demo-dont-use-chrome-with-net.cfg: -------------------------------------------------------------------------------- 1 | # Example config for nsjail 2 | 3 | name: "chrome-with-net" 4 | 5 | description: "Don't use for anything serious - this is just a demo policy. See notes" 6 | description: "at the end of this description for more." 7 | description: "" 8 | description: "This policy allows to run Chrome inside a jail. Access to networking is" 9 | description: "permitted with this setup (clone_newnet: false)." 10 | description: "" 11 | description: "The only permitted home directory is $HOME/.mozilla and $HOME/Documents." 12 | description: "The rest of available on the FS files/dires are libs and X-related files/dirs." 13 | description: "" 14 | description: "Run as:" 15 | description: "" 16 | description: "./nsjail --config configs/chrome-with-net.cfg" 17 | description: "" 18 | description: "You can then go to https://uploadfiles.io/ and try to upload a file in order" 19 | description: "to see how your local directory (also, all system directories) look like." 20 | description: "" 21 | description: "Note: Using this profile for anything serious is *A VERY BAD* idea. Chrome" 22 | description: "provides excellent FS&syscall sandbox for Linux, as this profile disables" 23 | description: "this sandboxing with --no-sandbox and substitutes Chrome's syscall/ns policy" 24 | description: "with more relaxed namespacing." 25 | 26 | mode: ONCE 27 | hostname: "CHROME" 28 | cwd: "/user" 29 | 30 | time_limit: 0 31 | 32 | envar: "HOME=/user" 33 | envar: "DISPLAY" 34 | envar: "TMP=/tmp" 35 | 36 | rlimit_as: 4096 37 | rlimit_cpu: 1000 38 | rlimit_fsize: 1024 39 | rlimit_nofile: 1024 40 | 41 | clone_newnet: false 42 | 43 | mount { 44 | dst: "/proc" 45 | fstype: "proc" 46 | } 47 | 48 | mount { 49 | src: "/lib" 50 | dst: "/lib" 51 | is_bind: true 52 | } 53 | 54 | mount { 55 | src: "/usr/lib" 56 | dst: "/usr/lib" 57 | is_bind: true 58 | } 59 | 60 | mount { 61 | src: "/lib64" 62 | dst: "/lib64" 63 | is_bind: true 64 | mandatory: false 65 | } 66 | 67 | mount { 68 | src: "/lib32" 69 | dst: "/lib32" 70 | is_bind: true 71 | mandatory: false 72 | } 73 | 74 | mount { 75 | src: "/bin" 76 | dst: "/bin" 77 | is_bind: true 78 | } 79 | 80 | mount { 81 | src: "/usr/bin" 82 | dst: "/usr/bin" 83 | is_bind: true 84 | } 85 | 86 | mount { 87 | src: "/opt/google/chrome" 88 | dst: "/opt/google/chrome" 89 | is_bind: true 90 | } 91 | 92 | mount { 93 | src: "/usr/share" 94 | dst: "/usr/share" 95 | is_bind: true 96 | } 97 | 98 | mount { 99 | src: "/dev/urandom" 100 | dst: "/dev/urandom" 101 | is_bind: true 102 | rw: true 103 | } 104 | 105 | mount { 106 | src: "/dev/null" 107 | dst: "/dev/null" 108 | is_bind: true 109 | rw: true 110 | } 111 | 112 | mount { 113 | src: "/dev/fd/" 114 | dst: "/dev/fd/" 115 | is_bind: true 116 | rw: true 117 | } 118 | 119 | mount { 120 | src: "/etc/resolv.conf" 121 | dst: "/etc/resolv.conf" 122 | is_bind: true 123 | mandatory: false 124 | } 125 | 126 | mount { 127 | dst: "/tmp" 128 | fstype: "tmpfs" 129 | rw: true 130 | is_bind: false 131 | } 132 | 133 | mount { 134 | dst: "/dev/shm" 135 | fstype: "tmpfs" 136 | rw: true 137 | is_bind: false 138 | } 139 | 140 | mount { 141 | dst: "/user" 142 | fstype: "tmpfs" 143 | rw: true 144 | } 145 | 146 | mount { 147 | prefix_src_env: "HOME" 148 | src: "/Documents" 149 | dst: "/user/Documents" 150 | rw: true 151 | is_bind: true 152 | mandatory: false 153 | } 154 | 155 | mount { 156 | prefix_src_env: "HOME" 157 | src: "/.config/google-chrome" 158 | dst: "/user/.config/google-chrome" 159 | is_bind: true 160 | rw: true 161 | mandatory: false 162 | } 163 | 164 | mount { 165 | src: "/tmp/.X11-unix/X0" 166 | dst: "/tmp/.X11-unix/X0" 167 | is_bind: true 168 | } 169 | 170 | seccomp_string: " KILL_PROCESS {" 171 | seccomp_string: " ptrace," 172 | seccomp_string: " process_vm_readv," 173 | seccomp_string: " process_vm_writev" 174 | seccomp_string: " }" 175 | seccomp_string: " DEFAULT ALLOW" 176 | 177 | exec_bin { 178 | path: "/opt/google/chrome/google-chrome" 179 | arg: "--no-sandbox" 180 | } 181 | -------------------------------------------------------------------------------- /configs/firefox-with-cloned-net.cfg: -------------------------------------------------------------------------------- 1 | # Example config for nsjail 2 | 3 | name: "firefox-with-cloned-net" 4 | 5 | description: "This policy allows to run firefox inside a jail on a separate eth interface." 6 | description: "A separate networking context separates process from the global \"lo\", and" 7 | description: "from global abstract socket namespace." 8 | description: "" 9 | description: "The only permitted home directory is $HOME/.mozilla and $HOME/Documents." 10 | description: "The rest of available on the FS files/dires are libs and X-related files/dirs." 11 | description: "" 12 | description: "As this needs to be run as root, you will have to set-up correct uid&gid" 13 | description: "mappings (here: jagger), name of your local interface (here: 'enp0s31f6')," 14 | description: "and correct IPv4 addresses." 15 | description: "" 16 | description: "IPv6 should work out-of-the-box, given that your local IPv6 discovery is set" 17 | description: "up correctly." 18 | description: "" 19 | description: "Run as:" 20 | description: "" 21 | description: "sudo ./nsjail --config configs/firefox-with-cloned-net.cfg" 22 | description: "" 23 | description: "You can then go to https://uploadfiles.io/ and try to upload a file in order" 24 | description: "to see how your local directory (also, all system directories) look like." 25 | 26 | mode: ONCE 27 | hostname: "FF-MACVTAP" 28 | cwd: "/user" 29 | 30 | time_limit: 0 31 | 32 | envar: "HOME=/user" 33 | envar: "DISPLAY" 34 | envar: "TMP=/tmp" 35 | envar: "FONTCONFIG_FILE=/etc/fonts/fonts.conf" 36 | envar: "FC_CONFIG_FILE=/etc/fonts/fonts.conf" 37 | 38 | rlimit_as: 4096 39 | rlimit_cpu: 1000 40 | rlimit_fsize: 1024 41 | rlimit_nofile: 512 42 | 43 | uidmap { 44 | inside_id: "9999999" 45 | outside_id: "jagger" 46 | } 47 | 48 | gidmap { 49 | inside_id: "9999999" 50 | outside_id: "jagger" 51 | } 52 | 53 | mount { 54 | dst: "/proc" 55 | fstype: "proc" 56 | rw: true 57 | } 58 | 59 | mount { 60 | src: "/lib" 61 | dst: "/lib" 62 | is_bind: true 63 | } 64 | 65 | mount { 66 | src: "/usr/lib" 67 | dst: "/usr/lib" 68 | is_bind: true 69 | } 70 | 71 | mount { 72 | src: "/lib64" 73 | dst: "/lib64" 74 | is_bind: true 75 | mandatory: false 76 | } 77 | 78 | mount { 79 | src: "/lib32" 80 | dst: "/lib32" 81 | is_bind: true 82 | mandatory: false 83 | } 84 | 85 | mount { 86 | src: "/usr/lib/firefox" 87 | dst: "/usr/lib/firefox" 88 | is_bind: true 89 | } 90 | 91 | mount { 92 | src: "/usr/bin/firefox" 93 | dst: "/usr/bin/firefox" 94 | is_bind: true 95 | } 96 | 97 | mount { 98 | src: "/usr/share" 99 | dst: "/usr/share" 100 | is_bind: true 101 | } 102 | 103 | mount { 104 | src_content: "\n\n/usr/share/fonts/tmp/fontconfig" 105 | dst: "/etc/fonts/fonts.conf" 106 | } 107 | 108 | mount { 109 | src: "/dev/urandom" 110 | dst: "/dev/urandom" 111 | is_bind: true 112 | rw: true 113 | } 114 | 115 | mount { 116 | src: "/dev/null" 117 | dst: "/dev/null" 118 | is_bind: true 119 | rw: true 120 | } 121 | 122 | mount { 123 | src_content: "nameserver 8.8.8.8" 124 | dst: "/etc/resolv.conf" 125 | } 126 | 127 | mount { 128 | dst: "/tmp" 129 | fstype: "tmpfs" 130 | rw: true 131 | is_bind: false 132 | } 133 | 134 | mount { 135 | dst: "/dev/shm" 136 | fstype: "tmpfs" 137 | rw: true 138 | is_bind: false 139 | } 140 | 141 | mount { 142 | dst: "/user" 143 | fstype: "tmpfs" 144 | rw: true 145 | } 146 | 147 | mount { 148 | prefix_src_env: "HOME" 149 | src: "/Documents" 150 | dst: "/user/Documents" 151 | rw: true 152 | is_bind: true 153 | mandatory: false 154 | } 155 | 156 | mount { 157 | prefix_src_env: "HOME" 158 | src: "/.mozilla" 159 | dst: "/user/.mozilla" 160 | is_bind: true 161 | rw: true 162 | mandatory: false 163 | } 164 | 165 | mount { 166 | prefix_src_env: "XAUTHORITY" 167 | src: "" 168 | dst: "/user/.Xauthority" 169 | is_bind: true 170 | rw: false 171 | mandatory: false 172 | } 173 | 174 | mount { 175 | src: "/tmp/.X11-unix/X0" 176 | dst: "/tmp/.X11-unix/X0" 177 | is_bind: true 178 | mandatory: false 179 | } 180 | 181 | mount { 182 | src: "/tmp/.X11-unix/X1" 183 | dst: "/tmp/.X11-unix/X1" 184 | is_bind: true 185 | mandatory: false 186 | } 187 | 188 | seccomp_string: "KILL_PROCESS {" 189 | seccomp_string: " ptrace," 190 | seccomp_string: " process_vm_readv," 191 | seccomp_string: " process_vm_writev" 192 | seccomp_string: "}" 193 | seccomp_string: "DEFAULT ALLOW" 194 | 195 | macvlan_iface: "enp0s31f6" 196 | macvlan_vs_ip: "192.168.10.223" 197 | macvlan_vs_nm: "255.255.255.0" 198 | macvlan_vs_gw: "192.168.10.1" 199 | 200 | exec_bin { 201 | path: "/usr/lib/firefox/firefox" 202 | } 203 | -------------------------------------------------------------------------------- /configs/firefox-with-net-wayland.cfg: -------------------------------------------------------------------------------- 1 | # Example config for nsjail 2 | 3 | name: "firefox-with-net" 4 | 5 | description: "This policy allows to run firefox inside a jail. Access to networking is" 6 | description: "permitted with this setup (clone_newnet: false)." 7 | description: "" 8 | description: "The only permitted home directory is $HOME/.mozilla and $HOME/Documents." 9 | description: "The rest of available on the FS files/dires are libs and X-related files/dirs." 10 | description: "" 11 | description: "Run as:" 12 | description: "" 13 | description: "./nsjail --config configs/firefox-with-net-wayland.cfg" 14 | description: "" 15 | description: "You can then go to https://uploadfiles.io/ and try to upload a file in order" 16 | description: "to see how your local directory (also, all system directories) look like." 17 | 18 | mode: ONCE 19 | hostname: "FIREFOX" 20 | cwd: "/user" 21 | 22 | time_limit: 0 23 | 24 | clone_newnet: false 25 | 26 | envar: "HOME=/user" 27 | envar: "TMP=/tmp" 28 | envar: "FONTCONFIG_FILE=/etc/fonts/fonts.conf" 29 | envar: "FC_CONFIG_FILE=/etc/fonts/fonts.conf" 30 | envar: "MOZ_ENABLE_WAYLAND=1" 31 | envar: "XDG_RUNTIME_DIR=/user/run/" 32 | envar: "WAYLAND_DISPLAY" 33 | 34 | rlimit_as: 4096 35 | rlimit_cpu: 1000 36 | rlimit_fsize: 1024 37 | rlimit_nofile: 512 38 | 39 | uidmap { 40 | inside_id: "9999999" 41 | } 42 | 43 | gidmap { 44 | inside_id: "9999999" 45 | } 46 | 47 | mount { 48 | dst: "/proc" 49 | fstype: "proc" 50 | rw: true 51 | } 52 | 53 | mount { 54 | src: "/lib" 55 | dst: "/lib" 56 | is_bind: true 57 | } 58 | 59 | mount { 60 | src: "/usr/lib" 61 | dst: "/usr/lib" 62 | is_bind: true 63 | } 64 | 65 | mount { 66 | src: "/lib64" 67 | dst: "/lib64" 68 | is_bind: true 69 | mandatory: false 70 | } 71 | 72 | mount { 73 | src: "/lib32" 74 | dst: "/lib32" 75 | is_bind: true 76 | mandatory: false 77 | } 78 | 79 | mount { 80 | src: "/usr/lib/firefox" 81 | dst: "/usr/lib/firefox" 82 | is_bind: true 83 | } 84 | 85 | mount { 86 | src: "/usr/bin/firefox" 87 | dst: "/usr/bin/firefox" 88 | is_bind: true 89 | } 90 | 91 | mount { 92 | src: "/usr/share" 93 | dst: "/usr/share" 94 | is_bind: true 95 | } 96 | 97 | mount { 98 | src_content: "\n\n/usr/share/fonts/tmp/fontconfig" 99 | dst: "/etc/fonts/fonts.conf" 100 | } 101 | 102 | mount { 103 | src: "/dev/urandom" 104 | dst: "/dev/urandom" 105 | is_bind: true 106 | rw: true 107 | } 108 | 109 | mount { 110 | src: "/dev/null" 111 | dst: "/dev/null" 112 | is_bind: true 113 | rw: true 114 | } 115 | 116 | mount { 117 | src_content: "nameserver 8.8.8.8" 118 | dst: "/etc/resolv.conf" 119 | } 120 | 121 | mount { 122 | dst: "/tmp" 123 | fstype: "tmpfs" 124 | rw: true 125 | is_bind: false 126 | } 127 | 128 | mount { 129 | dst: "/dev/shm" 130 | fstype: "tmpfs" 131 | rw: true 132 | is_bind: false 133 | } 134 | 135 | mount { 136 | dst: "/user" 137 | fstype: "tmpfs" 138 | rw: true 139 | } 140 | 141 | mount { 142 | prefix_src_env: "HOME" 143 | src: "/Documents" 144 | dst: "/user/Documents" 145 | rw: true 146 | is_bind: true 147 | mandatory: false 148 | } 149 | 150 | mount { 151 | prefix_src_env: "HOME" 152 | src: "/.mozilla" 153 | dst: "/user/.mozilla" 154 | is_bind: true 155 | rw: true 156 | mandatory: false 157 | } 158 | 159 | mount { 160 | # Change it to your user id 161 | src: "/run/user/1000/wayland-0" 162 | dst: "/user/run/wayland-0" 163 | is_bind: true 164 | rw: true 165 | } 166 | 167 | exec_bin { 168 | path: "/usr/lib/firefox/firefox" 169 | } 170 | -------------------------------------------------------------------------------- /configs/firefox-with-net.cfg: -------------------------------------------------------------------------------- 1 | # Example config for nsjail 2 | 3 | name: "firefox-with-net" 4 | 5 | description: "This policy allows to run firefox inside a jail. Access to networking is" 6 | description: "permitted with this setup (clone_newnet: false)." 7 | description: "" 8 | description: "The only permitted home directory is $HOME/.mozilla and $HOME/Documents." 9 | description: "The rest of available on the FS files/dires are libs and X-related files/dirs." 10 | description: "" 11 | description: "Run as:" 12 | description: "" 13 | description: "./nsjail --config configs/firefox-with-net.cfg" 14 | description: "" 15 | description: "You can then go to https://uploadfiles.io/ and try to upload a file in order" 16 | description: "to see how your local directory (also, all system directories) look like." 17 | 18 | mode: ONCE 19 | hostname: "FIREFOX" 20 | cwd: "/user" 21 | 22 | time_limit: 0 23 | 24 | clone_newnet: false 25 | 26 | envar: "HOME=/user" 27 | envar: "DISPLAY" 28 | envar: "TMP=/tmp" 29 | envar: "FONTCONFIG_FILE=/etc/fonts/fonts.conf" 30 | envar: "FC_CONFIG_FILE=/etc/fonts/fonts.conf" 31 | 32 | rlimit_as: 4096 33 | rlimit_cpu: 1000 34 | rlimit_fsize: 1024 35 | rlimit_nofile: 512 36 | 37 | uidmap { 38 | inside_id: "9999999" 39 | } 40 | 41 | gidmap { 42 | inside_id: "9999999" 43 | } 44 | 45 | mount { 46 | dst: "/proc" 47 | fstype: "proc" 48 | rw: true 49 | } 50 | 51 | mount { 52 | src: "/lib" 53 | dst: "/lib" 54 | is_bind: true 55 | } 56 | 57 | mount { 58 | src: "/usr/lib" 59 | dst: "/usr/lib" 60 | is_bind: true 61 | } 62 | 63 | mount { 64 | src: "/lib64" 65 | dst: "/lib64" 66 | is_bind: true 67 | mandatory: false 68 | } 69 | 70 | mount { 71 | src: "/lib32" 72 | dst: "/lib32" 73 | is_bind: true 74 | mandatory: false 75 | } 76 | 77 | mount { 78 | src: "/usr/lib/firefox" 79 | dst: "/usr/lib/firefox" 80 | is_bind: true 81 | } 82 | 83 | mount { 84 | src: "/usr/bin/firefox" 85 | dst: "/usr/bin/firefox" 86 | is_bind: true 87 | } 88 | 89 | mount { 90 | src: "/usr/share" 91 | dst: "/usr/share" 92 | is_bind: true 93 | } 94 | 95 | mount { 96 | src_content: "\n\n/usr/share/fonts/tmp/fontconfig" 97 | dst: "/etc/fonts/fonts.conf" 98 | } 99 | 100 | mount { 101 | src: "/dev/urandom" 102 | dst: "/dev/urandom" 103 | is_bind: true 104 | rw: true 105 | } 106 | 107 | mount { 108 | src: "/dev/null" 109 | dst: "/dev/null" 110 | is_bind: true 111 | rw: true 112 | } 113 | 114 | mount { 115 | src_content: "nameserver 8.8.8.8" 116 | dst: "/etc/resolv.conf" 117 | } 118 | 119 | mount { 120 | dst: "/tmp" 121 | fstype: "tmpfs" 122 | rw: true 123 | is_bind: false 124 | } 125 | 126 | mount { 127 | dst: "/dev/shm" 128 | fstype: "tmpfs" 129 | rw: true 130 | is_bind: false 131 | } 132 | 133 | mount { 134 | dst: "/user" 135 | fstype: "tmpfs" 136 | rw: true 137 | } 138 | 139 | mount { 140 | prefix_src_env: "HOME" 141 | src: "/Documents" 142 | dst: "/user/Documents" 143 | rw: true 144 | is_bind: true 145 | mandatory: false 146 | } 147 | 148 | mount { 149 | prefix_src_env: "HOME" 150 | src: "/.mozilla" 151 | dst: "/user/.mozilla" 152 | is_bind: true 153 | rw: true 154 | mandatory: false 155 | } 156 | 157 | mount { 158 | prefix_src_env: "XAUTHORITY" 159 | src: "" 160 | dst: "/user/.Xauthority" 161 | is_bind: true 162 | rw: false 163 | mandatory: false 164 | } 165 | 166 | mount { 167 | src: "/tmp/.X11-unix/X0" 168 | dst: "/tmp/.X11-unix/X0" 169 | is_bind: true 170 | mandatory: false 171 | } 172 | 173 | mount { 174 | src: "/tmp/.X11-unix/X1" 175 | dst: "/tmp/.X11-unix/X1" 176 | is_bind: true 177 | mandatory: false 178 | } 179 | 180 | seccomp_string: "KILL_PROCESS {" 181 | seccomp_string: " ptrace," 182 | seccomp_string: " process_vm_readv," 183 | seccomp_string: " process_vm_writev" 184 | seccomp_string: "}" 185 | seccomp_string: "DEFAULT ALLOW" 186 | 187 | exec_bin { 188 | path: "/usr/lib/firefox/firefox" 189 | } 190 | -------------------------------------------------------------------------------- /configs/hexchat-with-net.cfg: -------------------------------------------------------------------------------- 1 | # Example config for nsjail 2 | 3 | name: "hexchat-with-net" 4 | 5 | description: "This policy allows to run hexchat inside a jail. Access to networking is" 6 | description: "permitted with this setup (clone_newnet: false)." 7 | description: "" 8 | description: "The only permitted home directory is $HOME/.hexchat and $HOME/Documents." 9 | description: "The rest of available on the FS files/dires are libs and X-related files/dirs." 10 | description: "" 11 | description: "Run as:" 12 | description: "./nsjail --config configs/hexchat-with-net.cfg" 13 | 14 | mode: ONCE 15 | hostname: "HEXCHAT" 16 | cwd: "/user" 17 | daemon: true 18 | 19 | time_limit: 0 20 | max_cpus: 2 21 | 22 | envar: "HOME=/user" 23 | envar: "DISPLAY" 24 | envar: "TMP=/tmp" 25 | envar: "FONTCONFIG_FILE=/etc/fonts/fonts.conf" 26 | envar: "FC_CONFIG_FILE=/etc/fonts/fonts.conf" 27 | envar: "LANG" 28 | 29 | rlimit_as: 4096 30 | rlimit_cpu_type: INF 31 | rlimit_fsize: 4096 32 | rlimit_nofile: 128 33 | 34 | clone_newnet: false 35 | 36 | mount { 37 | dst: "/proc" 38 | fstype: "proc" 39 | } 40 | 41 | mount { 42 | src: "/lib" 43 | dst: "/lib" 44 | is_bind: true 45 | } 46 | 47 | mount { 48 | src: "/usr/lib" 49 | dst: "/usr/lib" 50 | is_bind: true 51 | } 52 | 53 | mount { 54 | src: "/lib64" 55 | dst: "/lib64" 56 | is_bind: true 57 | mandatory: false 58 | } 59 | 60 | mount { 61 | src: "/lib32" 62 | dst: "/lib32" 63 | is_bind: true 64 | mandatory: false 65 | } 66 | 67 | mount { 68 | src_content: "\n\n/usr/share/fonts/tmp/fontconfig" 69 | dst: "/etc/fonts/fonts.conf" 70 | } 71 | 72 | mount { 73 | src: "/usr/share" 74 | dst: "/usr/share" 75 | is_bind: true 76 | } 77 | 78 | mount { 79 | src: "/dev/urandom" 80 | dst: "/dev/urandom" 81 | is_bind: true 82 | rw: true 83 | } 84 | 85 | mount { 86 | src_content: "nameserver 8.8.8.8" 87 | dst: "/etc/resolv.conf" 88 | } 89 | 90 | mount { 91 | dst: "/tmp" 92 | fstype: "tmpfs" 93 | rw: true 94 | is_bind: false 95 | } 96 | 97 | mount { 98 | dst: "/dev/shm" 99 | fstype: "tmpfs" 100 | rw: true 101 | is_bind: false 102 | } 103 | 104 | mount { 105 | dst: "/user" 106 | fstype: "tmpfs" 107 | rw: true 108 | } 109 | 110 | mount { 111 | prefix_src_env: "HOME" 112 | src: "/Documents" 113 | dst: "/user/Documents" 114 | rw: true 115 | is_bind: true 116 | mandatory: false 117 | } 118 | 119 | mount { 120 | prefix_src_env: "HOME" 121 | src: "/.config/hexchat" 122 | dst: "/user/.config/hexchat" 123 | is_bind: true 124 | rw: true 125 | mandatory: false 126 | } 127 | 128 | mount { 129 | prefix_src_env: "XAUTHORITY" 130 | src: "" 131 | dst: "/user/.Xauthority" 132 | is_bind: true 133 | rw: false 134 | mandatory: false 135 | } 136 | 137 | mount { 138 | src: "/tmp/.X11-unix" 139 | dst: "/tmp/.X11-unix" 140 | is_bind: true 141 | mandatory: false 142 | } 143 | 144 | seccomp_string: "KILL_PROCESS { " 145 | seccomp_string: " ptrace, " 146 | seccomp_string: " process_vm_readv, " 147 | seccomp_string: " process_vm_writev " 148 | seccomp_string: "}, " 149 | seccomp_string: "ERRNO(1) { " 150 | seccomp_string: " sched_setaffinity " 151 | seccomp_string: "} " 152 | seccomp_string: "DEFAULT ALLOW " 153 | 154 | exec_bin { 155 | path: "/usr/bin/hexchat" 156 | exec_fd: true 157 | } 158 | -------------------------------------------------------------------------------- /configs/home-documents-with-xorg-no-net.cfg: -------------------------------------------------------------------------------- 1 | # Example config for nsjail 2 | 3 | name: "documents-with-xorg" 4 | 5 | description: "This policy allows to run many X-org based tool, which are allowed" 6 | description: "to access $HOME/Documents directory only. An example of use is:" 7 | description: "" 8 | description: "./nsjail --config configs/documents-with-xorg.cfg -- \\" 9 | description: " /usr/bin/geeqie /user/Documents/" 10 | description: "" 11 | description: "What is more, this policy doesn't allow to access networking." 12 | 13 | mode: ONCE 14 | hostname: "NSJAIL" 15 | cwd: "/user" 16 | 17 | time_limit: 1000 18 | 19 | envar: "DISPLAY" 20 | envar: "HOME=/user" 21 | envar: "TMP=/tmp" 22 | 23 | rlimit_as: 2048 24 | rlimit_cpu: 1000 25 | rlimit_fsize: 1024 26 | rlimit_nofile: 16 27 | 28 | mount { 29 | src: "/lib" 30 | dst: "/lib" 31 | is_bind: true 32 | } 33 | 34 | mount { 35 | src: "/lib64" 36 | dst: "/lib64" 37 | is_bind: true 38 | mandatory: false 39 | } 40 | 41 | mount { 42 | src: "/lib32" 43 | dst: "/lib32" 44 | is_bind: true 45 | mandatory: false 46 | } 47 | 48 | mount { 49 | src: "/bin" 50 | dst: "/bin" 51 | is_bind: true 52 | } 53 | 54 | mount { 55 | src: "/usr/bin" 56 | dst: "/usr/bin" 57 | is_bind: true 58 | } 59 | 60 | mount { 61 | src: "/usr/share" 62 | dst: "/usr/share" 63 | is_bind: true 64 | } 65 | 66 | mount { 67 | src: "/usr/lib" 68 | dst: "/usr/lib" 69 | is_bind: true 70 | } 71 | 72 | mount { 73 | src: "/usr/lib64" 74 | dst: "/usr/lib64" 75 | is_bind: true 76 | mandatory: false 77 | } 78 | 79 | mount { 80 | src: "/usr/lib32" 81 | dst: "/usr/lib32" 82 | is_bind: true 83 | mandatory: false 84 | } 85 | 86 | mount { 87 | dst: "/tmp" 88 | fstype: "tmpfs" 89 | rw: true 90 | } 91 | 92 | mount { 93 | dst: "/dev/shm" 94 | fstype: "tmpfs" 95 | rw: true 96 | } 97 | 98 | mount { 99 | dst: "/user" 100 | fstype: "tmpfs" 101 | rw: true 102 | } 103 | 104 | mount { 105 | prefix_src_env: "HOME" 106 | src: "/Documents" 107 | dst: "/user/Documents" 108 | is_bind: true 109 | } 110 | 111 | mount { 112 | src: "/tmp/.X11-unix" 113 | dst: "/tmp/.X11-unix" 114 | is_bind: true 115 | rw: true 116 | } 117 | 118 | mount { 119 | src: "/dev/null" 120 | dst: "/dev/null" 121 | is_bind: true 122 | rw: true 123 | } 124 | 125 | mount { 126 | src: "/dev/random" 127 | dst: "/dev/random" 128 | is_bind: true 129 | rw: true 130 | } 131 | 132 | mount { 133 | src: "/dev/urandom" 134 | dst: "/dev/urandom" 135 | is_bind: true 136 | rw: true 137 | } 138 | 139 | mount { 140 | src: "/etc/passwd" 141 | dst: "/etc/passwd" 142 | is_bind: true 143 | } 144 | 145 | seccomp_string: "KILL_PROCESS {" 146 | seccomp_string: " ptrace," 147 | seccomp_string: " process_vm_readv," 148 | seccomp_string: " process_vm_writev" 149 | seccomp_string: "}" 150 | seccomp_string: "DEFAULT ALLOW" 151 | -------------------------------------------------------------------------------- /configs/imagemagick-convert.cfg: -------------------------------------------------------------------------------- 1 | # Example config for nsjail 2 | 3 | name: "imagemagick-convert" 4 | 5 | description: "This policy allows to run ImageMagick's convert inside a jail." 6 | description: "Your $HOME's Documents will be mapped as /user/Documents" 7 | description: "" 8 | description: "Run as:" 9 | description: "" 10 | description: "./nsjail --config imagemagick-convert.cfg -- /usr/bin/convert jpg:/user/Documents/input.jpg png:/user/Documents/output.png " 11 | description: "or " 12 | description: "./nsjail --config imagemagick-convert.cfg -- /usr/bin/convert jpg:- png:- file.png" 13 | 14 | mode: ONCE 15 | hostname: "IM-CONVERT" 16 | cwd: "/user" 17 | 18 | time_limit: 120 19 | 20 | envar: "HOME=/user" 21 | envar: "TMP=/tmp" 22 | 23 | rlimit_as: 2048 24 | rlimit_cpu: 1000 25 | rlimit_fsize: 1024 26 | rlimit_nofile: 64 27 | 28 | mount { 29 | src: "/lib" 30 | dst: "/lib" 31 | is_bind: true 32 | } 33 | 34 | mount { 35 | src: "/usr/lib" 36 | dst: "/usr/lib" 37 | is_bind: true 38 | } 39 | 40 | mount { 41 | src: "/lib64" 42 | dst: "/lib64" 43 | is_bind: true 44 | mandatory: false 45 | } 46 | 47 | mount { 48 | src: "/lib32" 49 | dst: "/lib32" 50 | is_bind: true 51 | mandatory: false 52 | } 53 | 54 | mount { 55 | dst: "/tmp" 56 | fstype: "tmpfs" 57 | rw: true 58 | is_bind: false 59 | } 60 | 61 | mount { 62 | dst: "/user" 63 | fstype: "tmpfs" 64 | rw: true 65 | } 66 | 67 | mount { 68 | prefix_src_env: "HOME" 69 | src: "/Documents" 70 | dst: "/user/Documents" 71 | rw: true 72 | is_bind: true 73 | mandatory: false 74 | } 75 | 76 | seccomp_string: "ALLOW {" 77 | seccomp_string: " read, write, open, openat, close, newstat, newfstat," 78 | seccomp_string: " newlstat, lseek, mmap, mprotect, munmap, brk," 79 | seccomp_string: " rt_sigaction, rt_sigprocmask, pwrite64, access," 80 | seccomp_string: " getpid, execveat, getdents, unlink, fchmod," 81 | seccomp_string: " getrlimit, getrusage, sysinfo, times, futex," 82 | seccomp_string: " arch_prctl, sched_getaffinity, set_tid_address," 83 | seccomp_string: " clock_gettime, set_robust_list, exit_group," 84 | seccomp_string: " clone, getcwd, pread64, readlink, prlimit64, madvise" 85 | seccomp_string: "}" 86 | seccomp_string: "DEFAULT KILL_PROCESS" 87 | 88 | exec_bin { 89 | path: "" 90 | arg0: "/usr/bin/convert" 91 | exec_fd: true 92 | } 93 | -------------------------------------------------------------------------------- /configs/static-busybox-with-execveat.cfg: -------------------------------------------------------------------------------- 1 | # Example config for nsjail 2 | 3 | name: "static-busybox-with-execveat" 4 | description: "An example/demo policy which allows to execute /bin/busybox-static in an " 5 | description: "empty (only /proc) mount namespace which doesn't even include busybox itself" 6 | 7 | mode: ONCE 8 | hostname: "BUSYBOX" 9 | cwd: "/" 10 | 11 | time_limit: 100 12 | 13 | keep_env: false 14 | envar: "TERM=linux" 15 | envar: "PS1=$ " 16 | 17 | skip_setsid: true 18 | 19 | clone_newcgroup: true 20 | 21 | uidmap { 22 | inside_id: "999999" 23 | outside_id: "" 24 | count: 1 25 | } 26 | 27 | gidmap { 28 | inside_id: "999999" 29 | outside_id: "" 30 | count: 1 31 | } 32 | 33 | mount_proc: false 34 | 35 | mount { 36 | dst: "/proc" 37 | fstype: "proc" 38 | rw: false 39 | } 40 | 41 | seccomp_string: "ERRNO(0) { ptrace }" 42 | seccomp_string: "DEFAULT ALLOW" 43 | 44 | exec_bin { 45 | path: "/bin/busybox" 46 | arg: "sh" 47 | exec_fd: true 48 | } 49 | -------------------------------------------------------------------------------- /configs/telegram.cfg: -------------------------------------------------------------------------------- 1 | # Example config for nsjail 2 | 3 | name: "telegram-with-net" 4 | 5 | description: "This policy allows to run telegram inside a jail. Access to networking is" 6 | description: "permitted with this setup (clone_newnet: false)." 7 | description: "" 8 | description: "The only permitted home directory is $HOME/.local/share/TelegramDesktop/" 9 | description: "The rest of available on the FS files/dires are libs and wayland-related files/dirs." 10 | description: "" 11 | description: "Run as:" 12 | description: "./nsjail --config configs/telegram.cfg" 13 | 14 | mode: ONCE 15 | hostname: "TELEGRAM" 16 | cwd: "/user" 17 | daemon: true 18 | 19 | time_limit: 0 20 | max_cpus: 2 21 | 22 | envar: "HOME=/user" 23 | envar: "WAYLAND_DISPLAY" 24 | envar: "TMP=/tmp" 25 | envar: "LANG" 26 | envar: "XDG_RUNTIME_DIR=/run/user/1000" 27 | envar: "QT_QPA_PLATFORM=wayland" 28 | 29 | rlimit_as: 4096 30 | rlimit_cpu_type: INF 31 | rlimit_fsize: 4096 32 | rlimit_nofile: 128 33 | 34 | clone_newnet: false 35 | 36 | mount { 37 | dst: "/proc" 38 | fstype: "proc" 39 | } 40 | 41 | mount { 42 | src: "/lib" 43 | dst: "/lib" 44 | is_bind: true 45 | } 46 | 47 | mount { 48 | src: "/usr/lib" 49 | dst: "/usr/lib" 50 | is_bind: true 51 | } 52 | 53 | mount { 54 | src: "/lib64" 55 | dst: "/lib64" 56 | is_bind: true 57 | mandatory: false 58 | } 59 | 60 | mount { 61 | src: "/usr/share" 62 | dst: "/usr/share" 63 | is_bind: true 64 | } 65 | 66 | mount { 67 | src: "/dev/urandom" 68 | dst: "/dev/urandom" 69 | is_bind: true 70 | rw: true 71 | } 72 | 73 | mount { 74 | src_content: "nameserver 8.8.8.8" 75 | dst: "/etc/resolv.conf" 76 | } 77 | 78 | mount { 79 | dst: "/tmp" 80 | fstype: "tmpfs" 81 | rw: true 82 | is_bind: false 83 | } 84 | 85 | mount { 86 | dst: "/dev/shm" 87 | fstype: "tmpfs" 88 | rw: true 89 | is_bind: false 90 | } 91 | 92 | mount { 93 | dst: "/user" 94 | fstype: "tmpfs" 95 | rw: true 96 | } 97 | 98 | mount { 99 | prefix_src_env: "HOME" 100 | src: "/.local/share/TelegramDesktop/" 101 | dst: "/user/.local/share/TelegramDesktop/" 102 | is_bind: true 103 | rw: true 104 | mandatory: false 105 | } 106 | 107 | mount { 108 | dst: "/run/user/1000" 109 | fstype: "tmpfs" 110 | rw: true 111 | is_bind: false 112 | } 113 | 114 | mount { 115 | src: "/run/user/1000/wayland-0" 116 | dst: "/run/user/1000/wayland-0" 117 | is_bind: true 118 | rw: false 119 | mandatory: true 120 | } 121 | 122 | mount { 123 | src: "/run/user/1000/pulse/native" 124 | dst: "/run/user/1000/pulse/native" 125 | is_bind: true 126 | rw: false 127 | mandatory: false 128 | } 129 | 130 | seccomp_string: "KILL_PROCESS { " 131 | seccomp_string: " ptrace, " 132 | seccomp_string: " process_vm_readv, " 133 | seccomp_string: " process_vm_writev " 134 | seccomp_string: "}, " 135 | seccomp_string: "ERRNO(1) { " 136 | seccomp_string: " sched_setaffinity " 137 | seccomp_string: "} " 138 | seccomp_string: "DEFAULT ALLOW " 139 | 140 | exec_bin { 141 | path: "/usr/bin/telegram-desktop" 142 | exec_fd: true 143 | } 144 | -------------------------------------------------------------------------------- /configs/tomcat8.cfg: -------------------------------------------------------------------------------- 1 | # Example config for nsjail 2 | 3 | name: "tomcat8" 4 | 5 | description: "Tested under Ubuntu 16.04 with tomcat8=8.0.32-1ubuntu1.9," 6 | description: "libnl-route-3-200=3.2.27-1ubuntu0.16.04.1," 7 | description: "libprotobuf9v5=2.6.1-1.3," 8 | description: "openjdk-8-jre=8u191-b12-2ubuntu0.16.04.1. " 9 | description: "Run as: sudo ./nsjail --config configs/tomcat.cfg" 10 | 11 | mode: ONCE 12 | hostname: "TOMCAT-NSJ" 13 | 14 | envar: "JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/jre" 15 | envar: "JVM_TMP=/tmp" 16 | envar: "CATALINA_TMPDIR=/tmp" 17 | envar: "CATALINA_HOME=/usr/share/tomcat8" 18 | envar: "CATALINA_BASE=/var/lib/tomcat8" 19 | envar: "CATALINA_OPTS=-server -XX:+UseParallelGC" 20 | envar: "JAVA_OPTS=-Djava.awt.headless=true -Djava.net.preferIPv4Stack=true -Xms256M -Xmx512M -Djava.security.egd=file:/dev/./urandom" 21 | 22 | rlimit_as: 2048 23 | rlimit_fsize: 1024 24 | rlimit_cpu_type: INF 25 | rlimit_nofile: 1024 26 | 27 | time_limit: 0 28 | 29 | cap: "CAP_NET_BIND_SERVICE" 30 | 31 | uidmap { 32 | inside_id: "tomcat8" 33 | outside_id: "tomcat8" 34 | } 35 | 36 | gidmap { 37 | inside_id: "tomcat8" 38 | outside_id: "tomcat8" 39 | } 40 | 41 | mount_proc: false 42 | 43 | mount { 44 | src: "/etc/tomcat8" 45 | dst: "/etc/tomcat8" 46 | is_bind: true 47 | rw: false 48 | } 49 | 50 | mount { 51 | src: "/var/lib/tomcat8" 52 | dst: "/var/lib/tomcat8" 53 | is_bind: true 54 | rw: true 55 | } 56 | 57 | mount { 58 | src: "/var/log/tomcat8" 59 | dst: "/var/log/tomcat8" 60 | is_bind: true 61 | rw: true 62 | } 63 | 64 | mount { 65 | src: "/var/cache/tomcat8" 66 | dst: "/var/cache/tomcat8" 67 | is_bind: true 68 | rw: true 69 | } 70 | 71 | mount { 72 | src: "/usr/share/tomcat8" 73 | dst: "/usr/share/tomcat8" 74 | is_bind: true 75 | rw: false 76 | } 77 | 78 | mount { 79 | src: "/bin" 80 | dst: "/bin" 81 | is_bind: true 82 | rw: false 83 | } 84 | 85 | mount { 86 | src: "/lib" 87 | dst: "/lib" 88 | is_bind: true 89 | rw: false 90 | } 91 | 92 | mount { 93 | src: "/lib64" 94 | dst: "/lib64" 95 | is_bind: true 96 | rw: false 97 | } 98 | 99 | mount { 100 | src: "/usr/bin" 101 | dst: "/usr/bin" 102 | is_bind: true 103 | rw: false 104 | } 105 | 106 | mount { 107 | src: "/usr/lib" 108 | dst: "/usr/lib" 109 | is_bind: true 110 | rw: false 111 | } 112 | 113 | mount { 114 | src: "/usr/share/java" 115 | dst: "/usr/share/java" 116 | is_bind: true 117 | rw: false 118 | } 119 | 120 | mount { 121 | dst: "/tmp" 122 | fstype: "tmpfs" 123 | rw: true 124 | } 125 | 126 | mount { 127 | dst: "/proc" 128 | fstype: "proc" 129 | rw: false 130 | } 131 | 132 | exec_bin { 133 | path: "/usr/share/tomcat8/bin/catalina.sh" 134 | arg : "run" 135 | } 136 | -------------------------------------------------------------------------------- /configs/weechat-with-net.cfg: -------------------------------------------------------------------------------- 1 | # Example config for nsjail 2 | 3 | name: "weechat-with-net" 4 | 5 | description: "This policy allows to run weechat in a jail. " 6 | description: "Networking is permitted with this setup (clone_newnet: false). " 7 | description: "" 8 | description: "The only permitted home directory is $HOME/.weechat." 9 | description: "" 10 | description: "Run as: nsjail --config configs/weechat-with-net.cfg" 11 | 12 | mode: ONCE 13 | hostname: "WEECHAT" 14 | cwd: "/tmp" 15 | daemon: false 16 | 17 | time_limit: 0 18 | max_cpus: 1 19 | 20 | envar: "HOME" 21 | envar: "TERM" 22 | envar: "LANG" 23 | envar: "LC_CTYPE" 24 | envar: "TMP=/tmp" 25 | 26 | log_fd: 2 27 | 28 | rlimit_as: 4096 29 | rlimit_cpu_type: INF 30 | rlimit_fsize: 4096 31 | rlimit_nofile: 128 32 | 33 | clone_newnet: false 34 | 35 | mount { 36 | dst: "/proc" 37 | fstype: "proc" 38 | } 39 | 40 | mount { 41 | src: "/lib" 42 | dst: "/lib" 43 | is_bind: true 44 | } 45 | 46 | mount { 47 | src: "/usr/lib" 48 | dst: "/usr/lib" 49 | is_bind: true 50 | } 51 | 52 | mount { 53 | src: "/lib64" 54 | dst: "/lib64" 55 | is_bind: true 56 | mandatory: false 57 | } 58 | 59 | mount { 60 | src: "/lib32" 61 | dst: "/lib32" 62 | is_bind: true 63 | mandatory: false 64 | } 65 | 66 | mount { 67 | src: "/usr/share" 68 | dst: "/usr/share" 69 | is_bind: true 70 | } 71 | 72 | mount { 73 | src: "/dev/urandom" 74 | dst: "/dev/urandom" 75 | is_bind: true 76 | rw: true 77 | } 78 | 79 | mount { 80 | src: "/dev/null" 81 | dst: "/dev/null" 82 | is_bind: true 83 | rw: true 84 | } 85 | 86 | mount { 87 | src: "/etc/resolv.conf" 88 | dst: "/etc/resolv.conf" 89 | is_bind: true 90 | mandatory: false 91 | } 92 | 93 | mount { 94 | src: "/etc/ssl" 95 | dst: "/etc/ssl" 96 | is_bind: true 97 | } 98 | 99 | mount { 100 | dst: "/tmp" 101 | fstype: "tmpfs" 102 | rw: true 103 | is_bind: false 104 | } 105 | 106 | mount { 107 | dst: "/dev/shm" 108 | fstype: "tmpfs" 109 | rw: true 110 | is_bind: false 111 | } 112 | 113 | mount { 114 | prefix_dst_env: "HOME" 115 | dst: "" 116 | fstype: "tmpfs" 117 | rw: true 118 | is_bind: false 119 | } 120 | 121 | mount { 122 | prefix_src_env: "HOME" 123 | src: "/.weechat" 124 | prefix_dst_env: "HOME" 125 | dst: "/.weechat" 126 | rw: true 127 | is_bind: true 128 | mandatory: true 129 | } 130 | 131 | seccomp_string: "KILL_PROCESS { " 132 | seccomp_string: " ptrace, " 133 | seccomp_string: " process_vm_readv, " 134 | seccomp_string: " process_vm_writev " 135 | seccomp_string: "}, " 136 | seccomp_string: "ERRNO(1) { " 137 | seccomp_string: " sched_setaffinity " 138 | seccomp_string: "} " 139 | seccomp_string: "DEFAULT ALLOW " 140 | 141 | exec_bin { 142 | path: "/usr/bin/weechat" 143 | exec_fd: true 144 | } 145 | -------------------------------------------------------------------------------- /configs/xchat-with-net.cfg: -------------------------------------------------------------------------------- 1 | # Example config for nsjail 2 | 3 | name: "xchat-with-net" 4 | 5 | description: "This policy allows to run xchat inside a jail. Access to networking is" 6 | description: "permitted with this setup (clone_newnet: false)." 7 | description: "" 8 | description: "The only permitted home directory is $HOME/.xchat2 and $HOME/Documents." 9 | description: "The rest of available on the FS files/dires are libs and X-related files/dirs." 10 | description: "" 11 | description: "Run as:" 12 | description: "./nsjail --config configs/xchat-with-net.cfg" 13 | 14 | mode: ONCE 15 | hostname: "XCHAT" 16 | cwd: "/user" 17 | daemon: true 18 | 19 | time_limit: 0 20 | max_cpus: 2 21 | 22 | envar: "HOME=/user" 23 | envar: "DISPLAY" 24 | envar: "TMP=/tmp" 25 | envar: "FONTCONFIG_FILE=/etc/fonts/fonts.conf" 26 | envar: "FC_CONFIG_FILE=/etc/fonts/fonts.conf" 27 | envar: "LANG" 28 | 29 | rlimit_as: 4096 30 | rlimit_cpu_type: INF 31 | rlimit_fsize: 4096 32 | rlimit_nofile: 128 33 | 34 | clone_newnet: false 35 | 36 | mount { 37 | dst: "/proc" 38 | fstype: "proc" 39 | } 40 | 41 | mount { 42 | src: "/lib" 43 | dst: "/lib" 44 | is_bind: true 45 | } 46 | 47 | mount { 48 | src: "/usr/lib" 49 | dst: "/usr/lib" 50 | is_bind: true 51 | } 52 | 53 | mount { 54 | src: "/lib64" 55 | dst: "/lib64" 56 | is_bind: true 57 | mandatory: false 58 | } 59 | 60 | mount { 61 | src: "/lib32" 62 | dst: "/lib32" 63 | is_bind: true 64 | mandatory: false 65 | } 66 | 67 | mount { 68 | src_content: "\n\n/usr/share/fonts/tmp/fontconfig" 69 | dst: "/etc/fonts/fonts.conf" 70 | } 71 | 72 | mount { 73 | src: "/usr/share" 74 | dst: "/usr/share" 75 | is_bind: true 76 | } 77 | 78 | mount { 79 | src: "/dev/urandom" 80 | dst: "/dev/urandom" 81 | is_bind: true 82 | rw: true 83 | } 84 | 85 | mount { 86 | src_content: "nameserver 8.8.8.8" 87 | dst: "/etc/resolv.conf" 88 | } 89 | 90 | mount { 91 | dst: "/tmp" 92 | fstype: "tmpfs" 93 | rw: true 94 | is_bind: false 95 | } 96 | 97 | mount { 98 | dst: "/dev/shm" 99 | fstype: "tmpfs" 100 | rw: true 101 | is_bind: false 102 | } 103 | 104 | mount { 105 | dst: "/user" 106 | fstype: "tmpfs" 107 | rw: true 108 | } 109 | 110 | mount { 111 | prefix_src_env: "HOME" 112 | src: "/Documents" 113 | dst: "/user/Documents" 114 | rw: true 115 | is_bind: true 116 | mandatory: false 117 | } 118 | 119 | mount { 120 | prefix_src_env: "HOME" 121 | src: "/.config/xchat2" 122 | dst: "/user/.xchat2" 123 | is_bind: true 124 | rw: true 125 | mandatory: false 126 | } 127 | 128 | mount { 129 | prefix_src_env: "XAUTHORITY" 130 | src: "" 131 | dst: "/user/.Xauthority" 132 | is_bind: true 133 | rw: false 134 | mandatory: false 135 | } 136 | 137 | mount { 138 | src: "/tmp/.X11-unix" 139 | dst: "/tmp/.X11-unix" 140 | is_bind: true 141 | mandatory: false 142 | } 143 | 144 | seccomp_string: "KILL_PROCESS { " 145 | seccomp_string: " ptrace, " 146 | seccomp_string: " process_vm_readv, " 147 | seccomp_string: " process_vm_writev " 148 | seccomp_string: "}, " 149 | seccomp_string: "ERRNO(1) { " 150 | seccomp_string: " sched_setaffinity " 151 | seccomp_string: "} " 152 | seccomp_string: "DEFAULT ALLOW " 153 | 154 | exec_bin { 155 | path: "/usr/bin/xchat" 156 | exec_fd: true 157 | } 158 | -------------------------------------------------------------------------------- /configs/znc-with-net.cfg: -------------------------------------------------------------------------------- 1 | # Example config for nsjail 2 | 3 | name: "znc-with-net" 4 | 5 | description: "This policy allows to run znc a jail. " 6 | description: "Networking is permitted with this setup (clone_newnet: false). " 7 | description: "" 8 | description: "The only permitted home directory is $HOME/.znc." 9 | description: "" 10 | description: "Run as: nsjail --config configs/znc-with-net.cfg" 11 | 12 | mode: ONCE 13 | hostname: "ZNC" 14 | cwd: "/home/znc" 15 | daemon: true 16 | 17 | time_limit: 0 18 | max_cpus: 1 19 | 20 | envar: "HOME=/home/znc" 21 | envar: "TMP=/tmp" 22 | 23 | log_fd: 2 24 | 25 | rlimit_as: 4096 26 | rlimit_cpu_type: INF 27 | rlimit_fsize: 4096 28 | rlimit_nofile: 128 29 | 30 | clone_newnet: false 31 | 32 | mount { 33 | dst: "/proc" 34 | fstype: "proc" 35 | } 36 | 37 | mount { 38 | src: "/lib" 39 | dst: "/lib" 40 | is_bind: true 41 | } 42 | 43 | mount { 44 | src: "/usr/lib" 45 | dst: "/usr/lib" 46 | is_bind: true 47 | } 48 | 49 | mount { 50 | src: "/lib64" 51 | dst: "/lib64" 52 | is_bind: true 53 | mandatory: false 54 | } 55 | 56 | mount { 57 | src: "/lib32" 58 | dst: "/lib32" 59 | is_bind: true 60 | mandatory: false 61 | } 62 | 63 | mount { 64 | src: "/usr/share" 65 | dst: "/usr/share" 66 | is_bind: true 67 | } 68 | 69 | mount { 70 | src: "/dev/urandom" 71 | dst: "/dev/urandom" 72 | is_bind: true 73 | rw: true 74 | } 75 | 76 | mount { 77 | src: "/dev/null" 78 | dst: "/dev/null" 79 | is_bind: true 80 | rw: true 81 | } 82 | 83 | mount { 84 | src: "/etc/resolv.conf" 85 | dst: "/etc/resolv.conf" 86 | is_bind: true 87 | mandatory: false 88 | } 89 | 90 | mount { 91 | src: "/etc/ssl" 92 | dst: "/etc/ssl" 93 | is_bind: true 94 | } 95 | 96 | mount { 97 | dst: "/tmp" 98 | fstype: "tmpfs" 99 | rw: true 100 | is_bind: false 101 | } 102 | 103 | mount { 104 | dst: "/dev/shm" 105 | fstype: "tmpfs" 106 | rw: true 107 | is_bind: false 108 | } 109 | 110 | mount { 111 | dst: "/home/znc" 112 | fstype: "tmpfs" 113 | rw: true 114 | is_bind: false 115 | } 116 | 117 | mount { 118 | prefix_src_env: "HOME" 119 | src: "/.znc" 120 | dst: "/home/znc/.znc" 121 | rw: true 122 | is_bind: true 123 | mandatory: true 124 | } 125 | 126 | seccomp_string: "KILL_PROCESS { " 127 | seccomp_string: " ptrace, " 128 | seccomp_string: " process_vm_readv, " 129 | seccomp_string: " process_vm_writev " 130 | seccomp_string: "}, " 131 | seccomp_string: "ERRNO(1) { " 132 | seccomp_string: " sched_setaffinity " 133 | seccomp_string: "} " 134 | seccomp_string: "DEFAULT ALLOW " 135 | 136 | exec_bin { 137 | path: "/usr/bin/znc" 138 | arg: "-f" 139 | exec_fd: true 140 | } 141 | -------------------------------------------------------------------------------- /contain.cc: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - isolating the binary 4 | ----------------------------------------- 5 | 6 | Copyright 2014 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #include "contain.h" 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | #include 41 | 42 | #include "caps.h" 43 | #include "cgroup.h" 44 | #include "cpu.h" 45 | #include "logs.h" 46 | #include "macros.h" 47 | #include "mnt.h" 48 | #include "net.h" 49 | #include "pid.h" 50 | #include "user.h" 51 | #include "util.h" 52 | #include "uts.h" 53 | 54 | namespace contain { 55 | 56 | static bool containUserNs(nsjconf_t* nsjconf) { 57 | return user::initNsFromChild(nsjconf); 58 | } 59 | 60 | static bool containInitPidNs(nsjconf_t* nsjconf) { 61 | return pid::initNs(nsjconf); 62 | } 63 | 64 | static bool containInitNetNs(nsjconf_t* nsjconf) { 65 | return net::initNsFromChild(nsjconf); 66 | } 67 | 68 | static bool containInitUtsNs(nsjconf_t* nsjconf) { 69 | return uts::initNs(nsjconf); 70 | } 71 | 72 | static bool containInitCgroupNs(void) { 73 | return cgroup::initNs(); 74 | } 75 | 76 | static bool containDropPrivs(nsjconf_t* nsjconf) { 77 | #ifndef PR_SET_NO_NEW_PRIVS 78 | #define PR_SET_NO_NEW_PRIVS 38 79 | #endif 80 | if (!nsjconf->disable_no_new_privs) { 81 | if (prctl(PR_SET_NO_NEW_PRIVS, 1UL, 0UL, 0UL, 0UL) == -1) { 82 | /* Only new kernels support it */ 83 | PLOG_W("prctl(PR_SET_NO_NEW_PRIVS, 1)"); 84 | } 85 | } 86 | 87 | if (!caps::initNs(nsjconf)) { 88 | return false; 89 | } 90 | 91 | return true; 92 | } 93 | 94 | static bool containPrepareEnv(nsjconf_t* nsjconf) { 95 | if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0) == -1) { 96 | PLOG_E("prctl(PR_SET_PDEATHSIG, SIGKILL)"); 97 | return false; 98 | } 99 | if (nsjconf->personality && personality(nsjconf->personality) == -1) { 100 | PLOG_E("personality(%lx)", nsjconf->personality); 101 | return false; 102 | } 103 | LOG_D("setpriority(%d)", nsjconf->nice_level); 104 | errno = 0; 105 | if (setpriority(PRIO_PROCESS, 0, nsjconf->nice_level) == -1 && errno != 0) { 106 | PLOG_W("setpriority(%d)", nsjconf->nice_level); 107 | } 108 | if (!nsjconf->skip_setsid) { 109 | setsid(); 110 | } 111 | return true; 112 | } 113 | 114 | static bool containInitMountNs(nsjconf_t* nsjconf) { 115 | return mnt::initNs(nsjconf); 116 | } 117 | 118 | static bool containCPU(nsjconf_t* nsjconf) { 119 | return cpu::initCpu(nsjconf); 120 | } 121 | 122 | static bool containTSC(nsjconf_t* nsjconf) { 123 | if (nsjconf->disable_tsc) { 124 | #if defined(__x86_64__) || defined(__i386__) 125 | if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV, 0, 0, 0) == -1) { 126 | PLOG_E("prctl(PR_SET_TSC, PR_TSC_SIGSEGV)"); 127 | return false; 128 | } 129 | #else /* defined(__x86_64__) || defined(__i386__) */ 130 | LOG_W("prctl(PR_SET_TSC, PR_TSC_SIGSEGV) requested, but it's supported under " 131 | "x86/x86-64 CPU architectures only. Ignoring it!"); 132 | #endif /* defined(__x86_64__) || defined(__i386__) */ 133 | } 134 | return true; 135 | } 136 | 137 | static bool containSetLimits(nsjconf_t* nsjconf) { 138 | if (nsjconf->disable_rl) { 139 | return true; 140 | } 141 | 142 | struct rlimit64 rl; 143 | rl.rlim_cur = rl.rlim_max = nsjconf->rl_as; 144 | if (util::setrlimit(RLIMIT_AS, rl) == -1) { 145 | PLOG_E("util::setrlimit(0, RLIMIT_AS, %" PRIu64 ")", nsjconf->rl_as); 146 | return false; 147 | } 148 | rl.rlim_cur = rl.rlim_max = nsjconf->rl_core; 149 | if (util::setrlimit(RLIMIT_CORE, rl) == -1) { 150 | PLOG_E("util::setrlimit(0, RLIMIT_CORE, %" PRIu64 ")", nsjconf->rl_core); 151 | return false; 152 | } 153 | rl.rlim_cur = rl.rlim_max = nsjconf->rl_cpu; 154 | if (util::setrlimit(RLIMIT_CPU, rl) == -1) { 155 | PLOG_E("util::setrlimit(0, RLIMIT_CPU, %" PRIu64 ")", nsjconf->rl_cpu); 156 | return false; 157 | } 158 | rl.rlim_cur = rl.rlim_max = nsjconf->rl_fsize; 159 | if (util::setrlimit(RLIMIT_FSIZE, rl) == -1) { 160 | PLOG_E("util::setrlimit(0, RLIMIT_FSIZE, %" PRIu64 ")", nsjconf->rl_fsize); 161 | return false; 162 | } 163 | rl.rlim_cur = rl.rlim_max = nsjconf->rl_nofile; 164 | if (util::setrlimit(RLIMIT_NOFILE, rl) == -1) { 165 | PLOG_E("util::setrlimit(0, RLIMIT_NOFILE, %" PRIu64 ")", nsjconf->rl_nofile); 166 | return false; 167 | } 168 | rl.rlim_cur = rl.rlim_max = nsjconf->rl_nproc; 169 | if (util::setrlimit(RLIMIT_NPROC, rl) == -1) { 170 | PLOG_E("util::setrlimit(0, RLIMIT_NPROC, %" PRIu64 ")", nsjconf->rl_nproc); 171 | return false; 172 | } 173 | rl.rlim_cur = rl.rlim_max = nsjconf->rl_stack; 174 | if (util::setrlimit(RLIMIT_STACK, rl) == -1) { 175 | PLOG_E("util::setrlimit(0, RLIMIT_STACK, %" PRIu64 ")", nsjconf->rl_stack); 176 | return false; 177 | } 178 | rl.rlim_cur = rl.rlim_max = nsjconf->rl_mlock; 179 | if (util::setrlimit(RLIMIT_MEMLOCK, rl) == -1) { 180 | PLOG_E("util::setrlimit(0, RLIMIT_MEMLOCK, %" PRIu64 ")", nsjconf->rl_mlock); 181 | return false; 182 | } 183 | rl.rlim_cur = rl.rlim_max = nsjconf->rl_rtpr; 184 | if (util::setrlimit(RLIMIT_RTPRIO, rl) == -1) { 185 | PLOG_E("util::setrlimit(0, RLIMIT_RTPRIO, %" PRIu64 ")", nsjconf->rl_rtpr); 186 | return false; 187 | } 188 | rl.rlim_cur = rl.rlim_max = nsjconf->rl_msgq; 189 | if (util::setrlimit(RLIMIT_MSGQUEUE, rl) == -1) { 190 | PLOG_E("util::setrlimit(0, RLIMIT_MSGQUEUE , %" PRIu64 ")", nsjconf->rl_msgq); 191 | return false; 192 | } 193 | return true; 194 | } 195 | 196 | static bool containPassFd(nsjconf_t* nsjconf, int fd) { 197 | return (std::find(nsjconf->openfds.begin(), nsjconf->openfds.end(), fd) != 198 | nsjconf->openfds.end()); 199 | } 200 | 201 | static bool containMakeFdsCOENaive(nsjconf_t* nsjconf) { 202 | /* 203 | * Don't use getrlimit(RLIMIT_NOFILE) here, as it can return an artifically small value 204 | * (e.g. 32), which could be smaller than a maximum assigned number to file-descriptors 205 | * in this process. Just use some reasonably sane value (e.g. 1024) 206 | */ 207 | for (unsigned fd = 0; fd < 1024; fd++) { 208 | int flags = TEMP_FAILURE_RETRY(fcntl(fd, F_GETFD, 0)); 209 | if (flags == -1 && errno == EBADF) { 210 | continue; 211 | } 212 | if (flags == -1) { 213 | PLOG_E("Couldn't get flags for fd=%d", fd) 214 | return false; 215 | } 216 | if (containPassFd(nsjconf, fd)) { 217 | LOG_D("fd=%d will be passed to the child process", fd); 218 | if (TEMP_FAILURE_RETRY(fcntl(fd, F_SETFD, flags & ~(FD_CLOEXEC))) == -1) { 219 | PLOG_E("Could not set FD_CLOEXEC for fd=%d", fd); 220 | return false; 221 | } 222 | } else { 223 | if (TEMP_FAILURE_RETRY(fcntl(fd, F_SETFD, flags | FD_CLOEXEC)) == -1) { 224 | PLOG_E("Could not set FD_CLOEXEC for fd=%d", fd); 225 | return false; 226 | } 227 | } 228 | } 229 | return true; 230 | } 231 | 232 | static bool containMakeFdsCOEProc(nsjconf_t* nsjconf) { 233 | int dirfd = open("/proc/self/fd", O_DIRECTORY | O_RDONLY | O_CLOEXEC); 234 | if (dirfd == -1) { 235 | PLOG_D("open('/proc/self/fd', O_DIRECTORY|O_RDONLY|O_CLOEXEC)"); 236 | return false; 237 | } 238 | DIR* dir = fdopendir(dirfd); 239 | if (dir == nullptr) { 240 | PLOG_W("fdopendir(fd=%d)", dirfd); 241 | close(dirfd); 242 | return false; 243 | } 244 | /* Make all fds above stderr close-on-exec */ 245 | for (;;) { 246 | errno = 0; 247 | struct dirent* entry = readdir(dir); 248 | if (entry == nullptr && errno != 0) { 249 | PLOG_D("readdir('/proc/self/fd')"); 250 | closedir(dir); 251 | return false; 252 | } 253 | if (entry == nullptr) { 254 | break; 255 | } 256 | if (util::StrEq(".", entry->d_name)) { 257 | continue; 258 | } 259 | if (util::StrEq("..", entry->d_name)) { 260 | continue; 261 | } 262 | errno = 0; 263 | int fd = strtoimax(entry->d_name, NULL, 10); 264 | if (errno != 0) { 265 | PLOG_W("Cannot convert /proc/self/fd/%s to a number", entry->d_name); 266 | continue; 267 | } 268 | int flags = TEMP_FAILURE_RETRY(fcntl(fd, F_GETFD, 0)); 269 | if (flags == -1) { 270 | PLOG_D("fcntl(fd=%d, F_GETFD, 0)", fd); 271 | closedir(dir); 272 | return false; 273 | } 274 | if (containPassFd(nsjconf, fd)) { 275 | LOG_D("fd=%d will be passed to the child process", fd); 276 | if (TEMP_FAILURE_RETRY(fcntl(fd, F_SETFD, flags & ~(FD_CLOEXEC))) == -1) { 277 | PLOG_E("Could not clear FD_CLOEXEC for fd=%d", fd); 278 | closedir(dir); 279 | return false; 280 | } 281 | } else { 282 | LOG_D("fd=%d will be closed before execve()", fd); 283 | if (TEMP_FAILURE_RETRY(fcntl(fd, F_SETFD, flags | FD_CLOEXEC)) == -1) { 284 | PLOG_E("Could not set FD_CLOEXEC for fd=%d", fd); 285 | closedir(dir); 286 | return false; 287 | } 288 | } 289 | } 290 | closedir(dir); 291 | return true; 292 | } 293 | 294 | static bool containMakeFdsCOE(nsjconf_t* nsjconf) { 295 | if (containMakeFdsCOEProc(nsjconf)) { 296 | return true; 297 | } 298 | if (containMakeFdsCOENaive(nsjconf)) { 299 | return true; 300 | } 301 | LOG_E("Couldn't mark relevant file-descriptors as close-on-exec with any known method"); 302 | return false; 303 | } 304 | 305 | bool setupFD(nsjconf_t* nsjconf, int fd_in, int fd_out, int fd_err) { 306 | if (nsjconf->stderr_to_null) { 307 | LOG_D("Redirecting fd=2 (STDERR_FILENO) to /dev/null"); 308 | if ((fd_err = TEMP_FAILURE_RETRY(open("/dev/null", O_RDWR))) == -1) { 309 | PLOG_E("open('/dev/null', O_RDWR"); 310 | return false; 311 | } 312 | } 313 | if (nsjconf->is_silent) { 314 | LOG_D("Redirecting fd=0-2 (STDIN/OUT/ERR_FILENO) to /dev/null"); 315 | if (TEMP_FAILURE_RETRY(fd_in = fd_out = fd_err = open("/dev/null", O_RDWR)) == -1) { 316 | PLOG_E("open('/dev/null', O_RDWR)"); 317 | return false; 318 | } 319 | } 320 | /* Set stdin/stdout/stderr to the net */ 321 | if (fd_in != STDIN_FILENO && TEMP_FAILURE_RETRY(dup2(fd_in, STDIN_FILENO)) == -1) { 322 | PLOG_E("dup2(%d, STDIN_FILENO)", fd_in); 323 | return false; 324 | } 325 | if (fd_out != STDOUT_FILENO && TEMP_FAILURE_RETRY(dup2(fd_out, STDOUT_FILENO)) == -1) { 326 | PLOG_E("dup2(%d, STDOUT_FILENO)", fd_out); 327 | return false; 328 | } 329 | if (fd_err != STDERR_FILENO && TEMP_FAILURE_RETRY(dup2(fd_err, STDERR_FILENO)) == -1) { 330 | PLOG_E("dup2(%d, STDERR_FILENO)", fd_err); 331 | return false; 332 | } 333 | return true; 334 | } 335 | 336 | bool containProc(nsjconf_t* nsjconf) { 337 | RETURN_ON_FAILURE(containUserNs(nsjconf)); 338 | RETURN_ON_FAILURE(containInitPidNs(nsjconf)); 339 | RETURN_ON_FAILURE(containInitMountNs(nsjconf)); 340 | RETURN_ON_FAILURE(containInitNetNs(nsjconf)); 341 | RETURN_ON_FAILURE(containInitUtsNs(nsjconf)); 342 | RETURN_ON_FAILURE(containInitCgroupNs()); 343 | RETURN_ON_FAILURE(containDropPrivs(nsjconf)); 344 | ; 345 | /* */ 346 | /* As non-root */ 347 | RETURN_ON_FAILURE(containCPU(nsjconf)); 348 | RETURN_ON_FAILURE(containTSC(nsjconf)); 349 | RETURN_ON_FAILURE(containSetLimits(nsjconf)); 350 | RETURN_ON_FAILURE(containPrepareEnv(nsjconf)); 351 | RETURN_ON_FAILURE(containMakeFdsCOE(nsjconf)); 352 | 353 | return true; 354 | } 355 | 356 | } // namespace contain 357 | -------------------------------------------------------------------------------- /contain.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - isolating the binary 4 | ----------------------------------------- 5 | 6 | Copyright 2014 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #ifndef NS_CONTAIN_H 23 | #define NS_CONTAIN_H 24 | 25 | #include 26 | 27 | #include "nsjail.h" 28 | 29 | namespace contain { 30 | 31 | bool setupFD(nsjconf_t* nsjconf, int fd_in, int fd_out, int fd_err); 32 | bool containProc(nsjconf_t* nsjconf); 33 | 34 | } // namespace contain 35 | 36 | #endif /* NS_CONTAIN_H */ 37 | -------------------------------------------------------------------------------- /cpu.cc: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - CPU affinity 4 | ----------------------------------------- 5 | 6 | Copyright 2017 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #include "cpu.h" 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #include 31 | 32 | #include "logs.h" 33 | #include "util.h" 34 | 35 | namespace cpu { 36 | 37 | static const std::string listCpusInSet(cpu_set_t* mask) { 38 | std::string ret; 39 | for (size_t i = 0; i < CPU_SETSIZE; i++) { 40 | if (CPU_ISSET(i, mask)) { 41 | if (!ret.empty()) { 42 | ret.append(","); 43 | } 44 | ret.append(std::to_string(i)); 45 | } 46 | } 47 | return ret; 48 | } 49 | 50 | static size_t getNthOnlineCpu(cpu_set_t* mask, size_t n) { 51 | for (size_t i = 0, j = 0; i < CPU_SETSIZE; i++) { 52 | if (CPU_ISSET(i, mask)) { 53 | if (j == n) { 54 | return i; 55 | } 56 | j++; 57 | } 58 | } 59 | LOG_F("No CPU #%zu found, yet there should be %zu left in the mask [%s]", n, 60 | (size_t)CPU_COUNT(mask), listCpusInSet(mask).c_str()); 61 | return 0; 62 | } 63 | 64 | static void setRandomCpu(cpu_set_t* orig_mask, cpu_set_t* new_mask, size_t available_cpus) { 65 | if (available_cpus == 0) { 66 | LOG_F("There are no more CPUs left to use, and there should be at least 1 left"); 67 | } 68 | 69 | size_t n = util::rnd64() % available_cpus; 70 | n = getNthOnlineCpu(orig_mask, n); 71 | 72 | CPU_SET(n, new_mask); 73 | LOG_D("Add CPU #%zu from the original mask=[%s] (size=%zu, available_cpus=%zu), new " 74 | "mask=[%s] (size=%zu)", 75 | n, listCpusInSet(orig_mask).c_str(), (size_t)CPU_COUNT(orig_mask), available_cpus, 76 | listCpusInSet(new_mask).c_str(), (size_t)CPU_COUNT(new_mask)); 77 | CPU_CLR(n, orig_mask); 78 | } 79 | 80 | bool initCpu(nsjconf_t* nsjconf) { 81 | if (nsjconf->max_cpus == 0) { 82 | LOG_D("No max_cpus limit set"); 83 | return true; 84 | } 85 | 86 | std::unique_ptr orig_mask(new cpu_set_t); 87 | if (!orig_mask) { 88 | PLOG_W("Failure allocating cpu_set_t"); 89 | return false; 90 | } 91 | if (sched_getaffinity(0, CPU_ALLOC_SIZE(CPU_SETSIZE), orig_mask.get()) == -1) { 92 | PLOG_W("sched_getaffinity(0, mask_size=%zu)", (size_t)CPU_ALLOC_SIZE(CPU_SETSIZE)); 93 | return false; 94 | } 95 | size_t available_cpus = CPU_COUNT(orig_mask.get()); 96 | 97 | LOG_D("Original CPU set: [%s], with %zu allowed CPUs", 98 | listCpusInSet(orig_mask.get()).c_str(), available_cpus); 99 | 100 | if (nsjconf->max_cpus > available_cpus) { 101 | LOG_W( 102 | "Number of requested CPUs is bigger than number of available CPUs (%zu > %zu)", 103 | nsjconf->max_cpus, available_cpus); 104 | return true; 105 | } 106 | if (nsjconf->max_cpus == available_cpus) { 107 | LOG_D("All CPUs requested (%zu of %zu)", nsjconf->max_cpus, available_cpus); 108 | return true; 109 | } 110 | 111 | std::unique_ptr new_mask(new cpu_set_t); 112 | if (!new_mask) { 113 | PLOG_W("Failure allocating cpu_set_t"); 114 | return false; 115 | } 116 | CPU_ZERO(new_mask.get()); 117 | 118 | for (size_t i = 0; i < nsjconf->max_cpus; i++) { 119 | setRandomCpu(orig_mask.get(), new_mask.get(), available_cpus); 120 | available_cpus--; 121 | } 122 | 123 | LOG_D("Setting new CPU mask=[%s] with %zu allowed CPUs (max_cpus=%zu), %zu CPUs " 124 | "(CPU_COUNT=%zu) left mask=[%s]", 125 | listCpusInSet(new_mask.get()).c_str(), nsjconf->max_cpus, 126 | (size_t)CPU_COUNT(new_mask.get()), available_cpus, (size_t)CPU_COUNT(orig_mask.get()), 127 | listCpusInSet(orig_mask.get()).c_str()); 128 | 129 | if (sched_setaffinity(0, CPU_ALLOC_SIZE(CPU_SETSIZE), new_mask.get()) == -1) { 130 | PLOG_W("sched_setaffinity(mask=%s size=%zu max_cpus=%zu (CPU_COUNT=%zu)) failed", 131 | listCpusInSet(new_mask.get()).c_str(), (size_t)CPU_ALLOC_SIZE(CPU_SETSIZE), 132 | nsjconf->max_cpus, (size_t)CPU_COUNT(new_mask.get())); 133 | return false; 134 | } 135 | 136 | return true; 137 | } 138 | 139 | } // namespace cpu 140 | -------------------------------------------------------------------------------- /cpu.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - CPU affinity 4 | ----------------------------------------- 5 | 6 | Copyright 2017 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #ifndef NS_CPU_H 23 | #define NS_CPU_H 24 | 25 | #include 26 | 27 | #include "nsjail.h" 28 | 29 | namespace cpu { 30 | 31 | bool initCpu(nsjconf_t* nsjconf); 32 | 33 | } // namespace cpu 34 | 35 | #endif /* NS_CPU_H */ 36 | -------------------------------------------------------------------------------- /logs.cc: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - logging 4 | ----------------------------------------- 5 | 6 | Copyright 2014 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #include "logs.h" 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | #include "macros.h" 39 | #include "util.h" 40 | 41 | namespace logs { 42 | 43 | static int _log_fd = STDERR_FILENO; 44 | static bool _log_fd_isatty = true; 45 | static enum llevel_t _log_level = INFO; 46 | static bool _log_set = false; 47 | 48 | static void setDupLogFdOr(int fd, int orfd) { 49 | int saved_errno = errno; 50 | _log_fd = fcntl(fd, F_DUPFD_CLOEXEC, 0); 51 | if (_log_fd == -1) { 52 | _log_fd = fcntl(orfd, F_DUPFD_CLOEXEC, 0); 53 | } 54 | if (_log_fd == -1) { 55 | _log_fd = orfd; 56 | } 57 | _log_fd_isatty = (isatty(_log_fd) == 1); 58 | if (getenv("NO_COLOR")) { 59 | _log_fd_isatty = false; 60 | } 61 | errno = saved_errno; 62 | } 63 | 64 | /* 65 | * Log to stderr by default. Use a dup()d fd, because in the future we'll associate the 66 | * connection socket with fd (0, 1, 2). 67 | */ 68 | __attribute__((constructor)) static void log_init(void) { 69 | setDupLogFdOr(STDERR_FILENO, STDERR_FILENO); 70 | } 71 | 72 | bool logSet() { 73 | return _log_set; 74 | } 75 | 76 | void setLogLevel(enum llevel_t ll) { 77 | _log_level = ll; 78 | } 79 | 80 | enum llevel_t getLogLevel(void) { 81 | return _log_level; 82 | } 83 | 84 | void logFile(const std::string& log_file, int log_fd) { 85 | _log_set = true; 86 | int newlogfd = -1; 87 | if (!log_file.empty()) { 88 | newlogfd = TEMP_FAILURE_RETRY( 89 | open(log_file.c_str(), O_CREAT | O_RDWR | O_APPEND | O_CLOEXEC, 0640)); 90 | if (newlogfd == -1) { 91 | PLOG_W("Couldn't open('%s')", log_file.c_str()); 92 | } 93 | } 94 | /* Close previous log_fd */ 95 | if (_log_fd > STDERR_FILENO) { 96 | close(_log_fd); 97 | } 98 | setDupLogFdOr(newlogfd, log_fd); 99 | close(newlogfd); 100 | } 101 | 102 | void logMsg(enum llevel_t ll, const char* fn, int ln, bool perr, const char* fmt, ...) { 103 | if (ll < _log_level) { 104 | return; 105 | } 106 | 107 | char strerr[512]; 108 | if (perr) { 109 | snprintf(strerr, sizeof(strerr), "%s", strerror(errno)); 110 | } 111 | struct { 112 | const char* const descr; 113 | const char* const prefix; 114 | const bool print_funcline; 115 | const bool print_time; 116 | } static const logLevels[] = { 117 | {"D", "\033[0;4m", true, true}, 118 | {"I", "\033[1m", false, true}, 119 | {"W", "\033[0;33m", true, true}, 120 | {"E", "\033[1;31m", true, true}, 121 | {"F", "\033[7;35m", true, true}, 122 | {"HR", "\033[0m", false, false}, 123 | {"HB", "\033[1m", false, false}, 124 | }; 125 | 126 | /* Start printing logs */ 127 | std::string msg; 128 | if (_log_fd_isatty) { 129 | msg.append(logLevels[ll].prefix); 130 | } 131 | if (ll != HELP && ll != HELP_BOLD) { 132 | msg.append("[").append(logLevels[ll].descr).append("]"); 133 | } 134 | if (logLevels[ll].print_time) { 135 | msg.append("[").append(util::timeToStr(time(NULL))).append("]"); 136 | } 137 | if (logLevels[ll].print_funcline) { 138 | msg.append("[") 139 | .append(std::to_string(getpid())) 140 | .append("] ") 141 | .append(fn) 142 | .append("():") 143 | .append(std::to_string(ln)); 144 | } 145 | 146 | char* strp; 147 | va_list args; 148 | va_start(args, fmt); 149 | int ret = vasprintf(&strp, fmt, args); 150 | va_end(args); 151 | if (ret == -1) { 152 | msg.append(" [logs internal]: MEMORY ALLOCATION ERROR"); 153 | } else { 154 | msg.append(" ").append(strp); 155 | free(strp); 156 | } 157 | if (perr) { 158 | msg.append(": ").append(strerr); 159 | } 160 | if (_log_fd_isatty) { 161 | msg.append("\033[0m"); 162 | } 163 | msg.append("\n"); 164 | /* End printing logs */ 165 | 166 | TEMP_FAILURE_RETRY(write(_log_fd, msg.c_str(), msg.size())); 167 | 168 | if (ll == FATAL) { 169 | exit(0xff); 170 | } 171 | } 172 | 173 | void logStop(int sig) { 174 | LOG_I("Server stops due to fatal signal (%d) caught. Exiting", sig); 175 | } 176 | 177 | } // namespace logs 178 | -------------------------------------------------------------------------------- /logs.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - logging 4 | ----------------------------------------- 5 | 6 | Copyright 2014 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #ifndef NS_LOGS_H 23 | #define NS_LOGS_H 24 | 25 | #include 26 | #include 27 | 28 | #include 29 | 30 | namespace logs { 31 | 32 | #define LOG_HELP(...) logs::logMsg(logs::HELP, __FUNCTION__, __LINE__, false, __VA_ARGS__); 33 | #define LOG_HELP_BOLD(...) \ 34 | logs::logMsg(logs::HELP_BOLD, __FUNCTION__, __LINE__, false, __VA_ARGS__); 35 | 36 | #define LOG_D(...) \ 37 | if (logs::getLogLevel() <= logs::DEBUG) { \ 38 | logs::logMsg(logs::DEBUG, __FUNCTION__, __LINE__, false, __VA_ARGS__); \ 39 | } 40 | #define LOG_I(...) \ 41 | if (logs::getLogLevel() <= logs::INFO) { \ 42 | logs::logMsg(logs::INFO, __FUNCTION__, __LINE__, false, __VA_ARGS__); \ 43 | } 44 | #define LOG_W(...) \ 45 | if (logs::getLogLevel() <= logs::WARNING) { \ 46 | logs::logMsg(logs::WARNING, __FUNCTION__, __LINE__, false, __VA_ARGS__); \ 47 | } 48 | #define LOG_E(...) \ 49 | if (logs::getLogLevel() <= logs::ERROR) { \ 50 | logs::logMsg(logs::ERROR, __FUNCTION__, __LINE__, false, __VA_ARGS__); \ 51 | } 52 | #define LOG_F(...) \ 53 | if (logs::getLogLevel() <= logs::FATAL) { \ 54 | logs::logMsg(logs::FATAL, __FUNCTION__, __LINE__, false, __VA_ARGS__); \ 55 | } 56 | 57 | #define PLOG_D(...) \ 58 | if (logs::getLogLevel() <= logs::DEBUG) { \ 59 | logs::logMsg(logs::DEBUG, __FUNCTION__, __LINE__, true, __VA_ARGS__); \ 60 | } 61 | #define PLOG_I(...) \ 62 | if (logs::getLogLevel() <= logs::INFO) { \ 63 | logs::logMsg(logs::INFO, __FUNCTION__, __LINE__, true, __VA_ARGS__); \ 64 | } 65 | #define PLOG_W(...) \ 66 | if (logs::getLogLevel() <= logs::WARNING) { \ 67 | logs::logMsg(logs::WARNING, __FUNCTION__, __LINE__, true, __VA_ARGS__); \ 68 | } 69 | #define PLOG_E(...) \ 70 | if (logs::getLogLevel() <= logs::ERROR) { \ 71 | logs::logMsg(logs::ERROR, __FUNCTION__, __LINE__, true, __VA_ARGS__); \ 72 | } 73 | #define PLOG_F(...) \ 74 | if (logs::getLogLevel() <= logs::FATAL) { \ 75 | logs::logMsg(logs::FATAL, __FUNCTION__, __LINE__, true, __VA_ARGS__); \ 76 | } 77 | 78 | enum llevel_t { 79 | DEBUG = 0, 80 | INFO, 81 | WARNING, 82 | ERROR, 83 | FATAL, 84 | HELP, 85 | HELP_BOLD, 86 | }; 87 | 88 | void logMsg(enum llevel_t ll, const char* fn, int ln, bool perr, const char* fmt, ...) 89 | __attribute__((format(printf, 5, 6))); 90 | void logStop(int sig); 91 | void setLogLevel(enum llevel_t ll); 92 | enum llevel_t getLogLevel(void); 93 | void logFile(const std::string& log_file, int log_fd); 94 | bool logSet(); 95 | 96 | } // namespace logs 97 | 98 | #endif /* NS_LOGS_H */ 99 | -------------------------------------------------------------------------------- /macros.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - common macros 4 | ----------------------------------------- 5 | 6 | Copyright 2014 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #ifndef NS_COMMON_H 23 | #define NS_COMMON_H 24 | 25 | #include 26 | 27 | #if !defined(TEMP_FAILURE_RETRY) 28 | #define TEMP_FAILURE_RETRY(expression) \ 29 | (__extension__({ \ 30 | long int __result; \ 31 | do __result = (long int)(expression); \ 32 | while (__result == -1L && errno == EINTR); \ 33 | __result; \ 34 | })) 35 | #endif /* !defined(TEMP_FAILURE_RETRY) */ 36 | 37 | #if !defined(ARR_SZ) 38 | #define ARR_SZ(array) (sizeof(array) / sizeof(*array)) 39 | #endif /* !defined(ARR_SZ) */ 40 | 41 | #define NS_VALSTR_STRUCT(x) {(uint64_t)x, #x} 42 | 43 | #endif /* NS_COMMON_H */ 44 | -------------------------------------------------------------------------------- /mnt.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - CLONE_NEWNS routines 4 | ----------------------------------------- 5 | 6 | Copyright 2014 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #ifndef NS_MNT_H 23 | #define NS_MNT_H 24 | 25 | #include 26 | #include 27 | 28 | #include 29 | 30 | #include "nsjail.h" 31 | 32 | namespace mnt { 33 | 34 | typedef enum { 35 | NS_DIR_NO = 0x100, 36 | NS_DIR_YES, 37 | NS_DIR_MAYBE, 38 | } isDir_t; 39 | 40 | bool initNs(nsjconf_t* nsjconf); 41 | bool addMountPtHead(nsjconf_t* nsjconf, const std::string& src, const std::string& dst, 42 | const std::string& fstype, const std::string& options, uintptr_t flags, isDir_t is_dir, 43 | bool is_mandatory, const std::string& src_env, const std::string& dst_env, 44 | const std::string& src_content, bool is_symlink); 45 | bool addMountPtTail(nsjconf_t* nsjconf, const std::string& src, const std::string& dst, 46 | const std::string& fstype, const std::string& options, uintptr_t flags, isDir_t is_dir, 47 | bool is_mandatory, const std::string& src_env, const std::string& dst_env, 48 | const std::string& src_content, bool is_symlink); 49 | const std::string describeMountPt(const mount_t& mpt); 50 | 51 | } // namespace mnt 52 | 53 | #endif /* NS_MNT_H */ 54 | -------------------------------------------------------------------------------- /net.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - networking routines 4 | ----------------------------------------- 5 | 6 | Copyright 2014 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #ifndef NS_NET_H 23 | #define NS_NET_H 24 | 25 | #include 26 | #include 27 | 28 | #include 29 | 30 | #include "nsjail.h" 31 | 32 | namespace net { 33 | 34 | bool limitConns(nsjconf_t* nsjconf, int connsock); 35 | int getRecvSocket(const char* bindhost, int port); 36 | int acceptConn(int listenfd); 37 | const std::string connToText(int fd, bool remote, struct sockaddr_in6* addr_or_null); 38 | bool initNsFromParent(nsjconf_t* nsjconf, int pid); 39 | bool initNsFromChild(nsjconf_t* nsjconf); 40 | 41 | } // namespace net 42 | 43 | #endif /* _NET_H */ 44 | -------------------------------------------------------------------------------- /nsjail.1: -------------------------------------------------------------------------------- 1 | .TH NSJAIL "1" "August 2017" "nsjail" "User Commands" 2 | \" 3 | .SH NAME 4 | nsjail \- process isolation tool for linux 5 | \" 6 | .SH SYNOPSIS 7 | \fInsjail\fP [options] \fB\-\-\fR path_to_command [args] 8 | \" 9 | .SH DESCRIPTION 10 | NsJail is a process isolation tool for Linux. It utilizes Linux namespace subsystem, resource limits, and the seccomp-bpf syscall filters of the Linux kernel 11 | \" 12 | .SH Options 13 | .TP 14 | \fB\-\-help\fR|\fB\-h\fR Help plz.. 15 | .TP 16 | \fB\-\-mode\fR|\fB\-M\fR VALUE 17 | Execution mode (default: o [MODE_STANDALONE_ONCE]): 18 | .IP 19 | \fBl\fR: Wait for connections on a TCP port (specified with \fB\-\-port\fR) [MODE_LISTEN_TCP] 20 | .PP 21 | .IP 22 | \fBo\fR: Launch a single process on the console using clone/execve [MODE_STANDALONE_ONCE] 23 | .PP 24 | .IP 25 | \fBe\fR: Launch a single process on the console using execve [MODE_STANDALONE_EXECVE] 26 | .PP 27 | .IP 28 | \fBr\fR: Launch a single process on the console with clone/execve, keep doing it forever [MODE_STANDALONE_RERUN] 29 | .PP 30 | .TP 31 | \fB\-\-config\fR|\fB\-C\fR VALUE 32 | Configuration file in the config.proto ProtoBuf format (see configs/ directory for examples) 33 | .TP 34 | \fB\-\-exec_file\fR|\fB\-x\fR VALUE 35 | File to exec (default: argv[0]) 36 | .TP 37 | \fB\-\-execute_fd\fR 38 | Use execveat() to execute a file-descriptor instead of executing the binary path. In such case argv[0]/exec_file denotes a file path before mount namespacing 39 | .TP 40 | \fB\-\-chroot\fR|\fB\-c\fR VALUE 41 | Directory containing / of the jail (default: none) 42 | .TP 43 | \fB\-\-rw\fR 44 | Mount chroot dir (/) R/W (default: R/O) 45 | .TP 46 | \fB\-\-user\fR|\fB\-u\fR VALUE 47 | Username/uid of processes inside the jail (default: your current uid). You can also use inside_ns_uid:outside_ns_uid:count convention here. Can be specified multiple times 48 | .TP 49 | \fB\-\-group\fR|\fB\-g\fR VALUE 50 | Groupname/gid of processes inside the jail (default: your current gid). You can also use inside_ns_gid:global_ns_gid:count convention here. Can be specified multiple times 51 | .TP 52 | \fB\-\-hostname\fR|\fB\-H\fR VALUE 53 | UTS name (hostname) of the jail (default: 'NSJAIL') 54 | .TP 55 | \fB\-\-cwd\fR|\fB\-D\fR VALUE 56 | Directory in the namespace the process will run (default: '/') 57 | .TP 58 | \fB\-\-port\fR|\fB\-p\fR VALUE 59 | TCP port to bind to (enables MODE_LISTEN_TCP) (default: 0) 60 | .TP 61 | \fB\-\-bindhost\fR VALUE 62 | IP address to bind the port to (only in [MODE_LISTEN_TCP]), (default: '::') 63 | .TP 64 | \fB\-\-max_conns\fR VALUE 65 | Maximum number of connections across all IPs (only in [MODE_LISTEN_TCP]), (default: 0 (unlimited)) 66 | .TP 67 | \fB\-\-max_conns_per_ip\fR|\fB\-i\fR VALUE 68 | Maximum number of connections per one IP (only in [MODE_LISTEN_TCP]), (default: 0 (unlimited)) 69 | .TP 70 | \fB\-\-log\fR|\fB\-l\fR VALUE 71 | Log file (default: use log_fd) 72 | .TP 73 | \fB\-\-log_fd\fR|\fB\-L\fR VALUE 74 | Log FD (default: 2) 75 | .TP 76 | \fB\-\-time_limit\fR|\fB\-t\fR VALUE 77 | Maximum time that a jail can exist, in seconds (default: 600) 78 | .TP 79 | \fB\-\-max_cpus\fR VALUE 80 | Maximum number of CPUs a single jailed process can use (default: 0 'no limit') 81 | .TP 82 | \fB\-\-daemon\fR|\fB\-d\fR 83 | Daemonize after start 84 | .TP 85 | \fB\-\-verbose\fR|\fB\-v\fR 86 | Verbose output 87 | .TP 88 | \fB\-\-quiet\fR|\fB\-q\fR 89 | Log warning and more important messages only 90 | .TP 91 | \fB\-\-really_quiet\fR|\fB\-Q\fR 92 | Log fatal messages only 93 | .TP 94 | \fB\-\-keep_env\fR|\fB\-e\fR 95 | Pass all environment variables be passed process (default: all envars are cleared) 96 | .TP 97 | \fB\-\-env\fR|\fB\-E\fR VALUE 98 | Additional environment variable (can be used multiple times). If the envar doesn't contain '=' (e.g. just the 'DISPLAY' string), the current envar value will be used 99 | .TP 100 | \fB\-\-keep_caps\fR 101 | Don't drop any capabilities 102 | .TP 103 | \fB\-\-cap\fR VALUE 104 | Retain this capability, e.g. CAP_PTRACE (can be specified multiple times) 105 | .TP 106 | \fB\-\-silent\fR 107 | Redirect child process' fd:0/1/2 to /dev/null 108 | .TP 109 | \fB\-\-stderr_to_null\fR 110 | Redirect FD=2 (STDERR_FILENO) to /dev/null 111 | .TP 112 | \fB\-\-skip_setsid\fR 113 | Don't call setsid(), allows for terminal signal handling in the sandboxed process. Dangerous 114 | .TP 115 | \fB\-\-pass_fd\fR VALUE 116 | Don't close this FD before executing the child process (can be specified multiple times), by default: 0/1/2 are kept open 117 | .TP 118 | \fB\-\-disable_no_new_privs\fR 119 | Don't set the prctl(NO_NEW_PRIVS, 1) (DANGEROUS) 120 | .TP 121 | \fB\-\-rlimit_as\fR VALUE 122 | RLIMIT_AS in MB, 'max' or 'hard' for the current hard limit, 'def' or 'soft' for the current soft limit, 'inf' for RLIM_INFINITY (default: 4096) 123 | .TP 124 | \fB\-\-rlimit_core\fR VALUE 125 | RLIMIT_CORE in MB, 'max' or 'hard' for the current hard limit, 'def' or 'soft' for the current limit, 'inf' for RLIM_INFINITY (default: 0) 126 | .TP 127 | \fB\-\-rlimit_cpu\fR VALUE 128 | RLIMIT_CPU, 'max' or 'hard' for the current hard limit, 'def' or 'soft' for the current soft limit, 'inf' for RLIM_INFINITY (default: 600) 129 | .TP 130 | \fB\-\-rlimit_fsize\fR VALUE 131 | RLIMIT_FSIZE in MB, 'max' or 'hard' for the current hard limit, 'def' or 'soft' for the current soft limit, 'inf' for RLIM_INFINITY (default: 1) 132 | .TP 133 | \fB\-\-rlimit_nofile\fR VALUE 134 | RLIMIT_NOFILE, 'max' or 'hard' for the current hard limit, 'def' or 'soft' for the current limit, 'inf' for RLIM_INFINITY (default: 32) 135 | .TP 136 | \fB\-\-rlimit_nproc\fR VALUE 137 | RLIMIT_NPROC, 'max' or 'hard' for the current hard limit, 'def' or 'soft' for the current soft limit, 'inf' for RLIM_INFINITY (default: 'soft') 138 | .TP 139 | \fB\-\-rlimit_stack\fR VALUE 140 | RLIMIT_STACK in MB, 'max' or 'hard' for the current hard limit, 'def' or 'soft' for the current soft limit, 'inf' for RLIM_INFINITY (default: 'soft') 141 | .TP 142 | \fB\-\-disable_rlimits\fR 143 | Disable all rlimits, default to limits set by parent 144 | .TP 145 | \fB\-\-persona_addr_compat_layout\fR 146 | personality(ADDR_COMPAT_LAYOUT) 147 | .TP 148 | \fB\-\-persona_mmap_page_zero\fR 149 | personality(MMAP_PAGE_ZERO) 150 | .TP 151 | \fB\-\-persona_read_implies_exec\fR 152 | personality(READ_IMPLIES_EXEC) 153 | .TP 154 | \fB\-\-persona_addr_limit_3gb\fR 155 | personality(ADDR_LIMIT_3GB) 156 | .TP 157 | \fB\-\-persona_addr_no_randomize\fR 158 | personality(ADDR_NO_RANDOMIZE) 159 | .TP 160 | \fB\-\-disable_clone_newnet\fR|\-N 161 | Don't use CLONE_NEWNET. Enable global networking inside the jail 162 | .TP 163 | \fB\-\-disable_clone_newuser\fR 164 | Don't use CLONE_NEWUSER. Requires euid==0 165 | .TP 166 | \fB\-\-disable_clone_newns\fR 167 | Don't use CLONE_NEWNS 168 | .TP 169 | \fB\-\-disable_clone_newpid\fR 170 | Don't use CLONE_NEWPID 171 | .TP 172 | \fB\-\-disable_clone_newipc\fR 173 | Don't use CLONE_NEWIPC 174 | .TP 175 | \fB\-\-disable_clone_newuts\fR 176 | Don't use CLONE_NEWUTS 177 | .TP 178 | \fB\-\-disable_clone_newcgroup\fR 179 | Don't use CLONE_NEWCGROUP. Might be required for kernel versions < 4.6 180 | .TP 181 | \fB\-\-uid_mapping\fR|\fB\-U\fR VALUE 182 | Add a custom uid mapping of the form inside_uid:outside_uid:count. Setting this requires newuidmap (set-uid) to be present 183 | .TP 184 | \fB\-\-gid_mapping\fR|\fB\-G\fR VALUE 185 | Add a custom gid mapping of the form inside_gid:outside_gid:count. Setting this requires newgidmap (set-uid) to be present 186 | .TP 187 | \fB\-\-bindmount_ro\fR|\fB\-R\fR VALUE 188 | List of mountpoints to be mounted \fB\-\-bind\fR (ro) inside the container. Can be specified multiple times. Supports 'source' syntax, or 'source:dest' 189 | .TP 190 | \fB\-\-bindmount\fR|\fB\-B\fR VALUE 191 | List of mountpoints to be mounted \fB\-\-bind\fR (rw) inside the container. Can be specified multiple times. Supports 'source' syntax, or 'source:dest' 192 | .TP 193 | \fB\-\-tmpfsmount\fR|\fB\-T\fR VALUE 194 | List of mountpoints to be mounted as tmpfs (R/W) inside the container. Can be specified multiple times. Supports 'dest' syntax. Alternatively, use '-m none:dest:tmpfs:size=8388608' 195 | .TP 196 | \fB\-\-mount\fR|\fB\-m\fR VALUE 197 | Arbitrary mount, format src:dst:fs_type:options 198 | .TP 199 | \fB\-\-symlink\fR|\f\B\-s\fR VALUE 200 | Symlink, format src:dst 201 | .TP 202 | \fB\-\-disable_proc\fR 203 | Disable mounting procfs in the jail 204 | .TP 205 | \fB\-\-proc_path\fR VALUE 206 | Path used to mount procfs (default: '/proc') 207 | .TP 208 | \fB\-\-proc_rw\fR 209 | Is procfs mounted as R/W (default: R/O) 210 | .TP 211 | \fB\-\-seccomp_policy\fR|\fB\-P\fR VALUE 212 | Path to file containing seccomp\-bpf policy (see kafel/) 213 | .TP 214 | \fB\-\-seccomp_string\fR VALUE 215 | String with kafel seccomp\-bpf policy (see kafel/) 216 | .TP 217 | \fB\-\-seccomp_log\fR 218 | Use SECCOMP_FILTER_FLAG_LOG. Log all actions except SECCOMP_RET_ALLOW. Supported since kernel version 4.14 219 | .TP 220 | \fB\-\-cgroup_mem_max\fR VALUE 221 | Maximum number of bytes to use in the group (default: '0' \- disabled) 222 | .TP 223 | \fB\-\-cgroup_mem_memsw_max\fR VALUE 224 | Maximum number of memory+Swap bytes to use in the group (default: '0' \- disabled) 225 | .TP 226 | \fB\-\-cgroup_mem_swap_max\fR VALUE 227 | Maximum number of swap bytes to use in the group (default: '-1' \- disabled) 228 | .TP 229 | \fB\-\-cgroup_mem_mount\fR VALUE 230 | Location of memory cgroup FS (default: '/sys/fs/cgroup/memory') 231 | .TP 232 | \fB\-\-cgroup_mem_parent\fR VALUE 233 | Which pre\-existing memory cgroup to use as a parent (default: 'NSJAIL') 234 | .TP 235 | \fB\-\-cgroup_pids_max\fR VALUE 236 | Maximum number of pids in a cgroup (default: '0' \- disabled) 237 | .TP 238 | \fB\-\-cgroup_pids_mount\fR VALUE 239 | Location of pids cgroup FS (default: '/sys/fs/cgroup/pids') 240 | .TP 241 | \fB\-\-cgroup_pids_parent\fR VALUE 242 | Which pre\-existing pids cgroup to use as a parent (default: 'NSJAIL') 243 | .TP 244 | \fB\-\-cgroup_net_cls_classid\fR VALUE 245 | Class identifier of network packets in the group (default: '0' \- disabled) 246 | .TP 247 | \fB\-\-cgroup_net_cls_mount\fR VALUE 248 | Location of net_cls cgroup FS (default: '/sys/fs/cgroup/net_cls') 249 | .TP 250 | \fB\-\-cgroup_net_cls_parent\fR VALUE 251 | Which pre\-existing net_cls cgroup to use as a parent (default: 'NSJAIL') 252 | .TP 253 | \fB\-\-cgroup_cpu_ms_per_sec\fR VALUE 254 | Number of milliseconds of CPU time per second that the process group can use (default: '0' - no limit) 255 | .TP 256 | \fB\-\-cgroup_cpu_mount\fR VALUE 257 | Location of cpu cgroup FS (default: '/sys/fs/cgroup/net_cls') 258 | .TP 259 | \fB\-\-cgroup_cpu_parent\fR VALUE 260 | Which pre-existing cpu cgroup to use as a parent (default: 'NSJAIL') 261 | .TP 262 | \fB\-\-cgroupv2_mount\fR VALUE 263 | Location of cgroup v2 directory (default: '/sys/fs/cgroup') 264 | .TP 265 | \fB\-\-use_cgroupv2\fR 266 | Use cgroup v2 267 | .TP 268 | \fB\-\-iface_no_lo\fR 269 | Don't bring the 'lo' interface up 270 | .TP 271 | \fB\-\-iface_own\fR VALUE 272 | Move this existing network interface into the new NET namespace. Can be specified multiple times 273 | .TP 274 | \fB\-\-macvlan_iface\fR|\fB\-I\fR VALUE 275 | Interface which will be cloned (MACVLAN) and put inside the subprocess' namespace as 'vs' 276 | .TP 277 | \fB\-\-macvlan_vs_ip\fR VALUE 278 | IP of the 'vs' interface (e.g. "192.168.0.1") 279 | .TP 280 | \fB\-\-macvlan_vs_nm\fR VALUE 281 | Netmask of the 'vs' interface (e.g. "255.255.255.0") 282 | .TP 283 | \fB\-\-macvlan_vs_gw\fR VALUE 284 | Default GW for the 'vs' interface (e.g. "192.168.0.1") 285 | .TP 286 | \fB\-\-macvlan_vs_ma\fR VALUE 287 | MAC-address of the 'vs' interface (e.g. "ba:ad:ba:be:45:00") 288 | \" 289 | .SH Examples 290 | .PP 291 | Wait on a port 31337 for connections, and run /bin/sh: 292 | .IP 293 | nsjail \-Ml \-\-port 31337 \-\-chroot / \-\- /bin/sh \-i 294 | .PP 295 | Re\-run echo command as a sub\-process: 296 | .IP 297 | nsjail \-Mr \-\-chroot / \-\- /bin/echo "ABC" 298 | .PP 299 | Run echo command once only, as a sub\-process: 300 | .IP 301 | nsjail \-Mo \-\-chroot / \-\- /bin/echo "ABC" 302 | .PP 303 | Execute echo command directly, without a supervising process: 304 | .IP 305 | nsjail \-Me \-\-chroot / \-\-disable_proc \-\- /bin/echo "ABC" 306 | \" 307 | -------------------------------------------------------------------------------- /nsjail.cc: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail 4 | ----------------------------------------- 5 | 6 | Copyright 2014 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #include "nsjail.h" 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | 42 | #include "cgroup2.h" 43 | #include "cmdline.h" 44 | #include "logs.h" 45 | #include "macros.h" 46 | #include "net.h" 47 | #include "sandbox.h" 48 | #include "subproc.h" 49 | #include "util.h" 50 | 51 | namespace nsjail { 52 | 53 | static __thread std::atomic sigFatal(0); 54 | static __thread std::atomic showProc(false); 55 | 56 | static void sigHandler(int sig) { 57 | if (sig == SIGALRM || sig == SIGCHLD || sig == SIGPIPE) { 58 | return; 59 | } 60 | if (sig == SIGUSR1 || sig == SIGQUIT) { 61 | showProc = true; 62 | return; 63 | } 64 | sigFatal = sig; 65 | } 66 | 67 | static bool setSigHandler(int sig) { 68 | LOG_D("Setting sighandler for signal %s (%d)", util::sigName(sig).c_str(), sig); 69 | 70 | sigset_t smask; 71 | sigemptyset(&smask); 72 | 73 | struct sigaction sa; 74 | sa.sa_handler = sigHandler; 75 | sa.sa_mask = smask; 76 | sa.sa_flags = 0; 77 | sa.sa_restorer = NULL; 78 | 79 | if (sig == SIGTTIN || sig == SIGTTOU) { 80 | sa.sa_handler = SIG_IGN; 81 | } 82 | if (sigaction(sig, &sa, NULL) == -1) { 83 | PLOG_E("sigaction(%d)", sig); 84 | return false; 85 | } 86 | return true; 87 | } 88 | 89 | static bool setSigHandlers(void) { 90 | for (const auto& i : nssigs) { 91 | if (!setSigHandler(i)) { 92 | return false; 93 | } 94 | } 95 | return true; 96 | } 97 | 98 | static bool setTimer(nsjconf_t* nsjconf) { 99 | if (nsjconf->mode == MODE_STANDALONE_EXECVE) { 100 | return true; 101 | } 102 | 103 | struct itimerval it = { 104 | .it_interval = 105 | { 106 | .tv_sec = 1, 107 | .tv_usec = 0, 108 | }, 109 | .it_value = 110 | { 111 | .tv_sec = 1, 112 | .tv_usec = 0, 113 | }, 114 | }; 115 | if (setitimer(ITIMER_REAL, &it, NULL) == -1) { 116 | PLOG_E("setitimer(ITIMER_REAL)"); 117 | return false; 118 | } 119 | return true; 120 | } 121 | 122 | static bool pipeTraffic(nsjconf_t* nsjconf, int listenfd) { 123 | std::vector fds; 124 | fds.reserve(nsjconf->pipes.size() * 3 + 1); 125 | for (const auto& p : nsjconf->pipes) { 126 | fds.push_back({ 127 | .fd = p.sock_fd, 128 | .events = POLLIN | POLLOUT, 129 | .revents = 0, 130 | }); 131 | fds.push_back({ 132 | .fd = p.pipe_in, 133 | .events = POLLOUT, 134 | .revents = 0, 135 | }); 136 | fds.push_back({ 137 | .fd = p.pipe_out, 138 | .events = POLLIN, 139 | .revents = 0, 140 | }); 141 | } 142 | fds.push_back({ 143 | .fd = listenfd, 144 | .events = POLLIN, 145 | .revents = 0, 146 | }); 147 | LOG_D("Waiting for fd activity"); 148 | while (poll(fds.data(), fds.size(), -1) > 0) { 149 | if (sigFatal > 0 || showProc) { 150 | return false; 151 | } 152 | if (fds.back().revents != 0) { 153 | LOG_D("New connection ready"); 154 | return true; 155 | } 156 | bool cleanup = false; 157 | for (size_t i = 0; i < fds.size() - 1; ++i) { 158 | if (fds[i].revents & POLLIN) { 159 | fds[i].events &= ~POLLIN; 160 | } 161 | if (fds[i].revents & POLLOUT) { 162 | fds[i].events &= ~POLLOUT; 163 | } 164 | } 165 | for (size_t i = 0; i < fds.size() - 3; i += 3) { 166 | const size_t pipe_no = i / 3; 167 | int in, out; 168 | const char* direction; 169 | bool closed = false; 170 | std::tuple direction_map[] = { 171 | std::make_tuple(i, i + 1, "in"), 172 | std::make_tuple(i + 2, i, "out"), 173 | }; 174 | for (const auto& entry : direction_map) { 175 | std::tie(in, out, direction) = entry; 176 | bool in_ready = (fds[in].events & POLLIN) == 0 || 177 | (fds[in].revents & POLLIN) == POLLIN; 178 | bool out_ready = (fds[out].events & POLLOUT) == 0 || 179 | (fds[out].revents & POLLOUT) == POLLOUT; 180 | if (in_ready && out_ready) { 181 | LOG_D("#%zu piping data %s", pipe_no, direction); 182 | ssize_t rv = splice(fds[in].fd, nullptr, fds[out].fd, 183 | nullptr, 4096, SPLICE_F_NONBLOCK); 184 | if (rv == -1 && errno != EAGAIN) { 185 | PLOG_E("splice fd pair #%zu {%d, %d}\n", pipe_no, 186 | fds[in].fd, fds[out].fd); 187 | } 188 | if (rv == 0) { 189 | closed = true; 190 | } 191 | fds[in].events |= POLLIN; 192 | fds[out].events |= POLLOUT; 193 | } 194 | if ((fds[in].revents & (POLLERR | POLLHUP)) != 0 || 195 | (fds[out].revents & (POLLERR | POLLHUP)) != 0) { 196 | closed = true; 197 | } 198 | } 199 | if (closed) { 200 | LOG_D("#%zu connection closed", pipe_no); 201 | cleanup = true; 202 | close(nsjconf->pipes[pipe_no].sock_fd); 203 | close(nsjconf->pipes[pipe_no].pipe_in); 204 | close(nsjconf->pipes[pipe_no].pipe_out); 205 | if (nsjconf->pipes[pipe_no].pid > 0) { 206 | kill(nsjconf->pipes[pipe_no].pid, SIGKILL); 207 | } 208 | nsjconf->pipes[pipe_no] = {}; 209 | } 210 | } 211 | if (cleanup) { 212 | break; 213 | } 214 | } 215 | nsjconf->pipes.erase(std::remove(nsjconf->pipes.begin(), nsjconf->pipes.end(), pipemap_t{}), 216 | nsjconf->pipes.end()); 217 | return false; 218 | } 219 | 220 | static int listenMode(nsjconf_t* nsjconf) { 221 | int listenfd = net::getRecvSocket(nsjconf->bindhost.c_str(), nsjconf->port); 222 | if (listenfd == -1) { 223 | return EXIT_FAILURE; 224 | } 225 | for (;;) { 226 | if (sigFatal > 0) { 227 | subproc::killAndReapAll( 228 | nsjconf, nsjconf->forward_signals ? sigFatal.load() : SIGKILL); 229 | logs::logStop(sigFatal); 230 | close(listenfd); 231 | return EXIT_SUCCESS; 232 | } 233 | if (showProc) { 234 | showProc = false; 235 | subproc::displayProc(nsjconf); 236 | } 237 | if (pipeTraffic(nsjconf, listenfd)) { 238 | int connfd = net::acceptConn(listenfd); 239 | if (connfd >= 0) { 240 | int in[2]; 241 | int out[2]; 242 | if (pipe(in) != 0 || pipe(out) != 0) { 243 | PLOG_E("pipe"); 244 | continue; 245 | } 246 | 247 | pid_t pid = 248 | subproc::runChild(nsjconf, connfd, in[0], out[1], out[1]); 249 | 250 | close(in[0]); 251 | close(out[1]); 252 | 253 | if (pid <= 0) { 254 | close(in[1]); 255 | close(out[0]); 256 | close(connfd); 257 | } else { 258 | nsjconf->pipes.push_back({ 259 | .sock_fd = connfd, 260 | .pipe_in = in[1], 261 | .pipe_out = out[0], 262 | .pid = pid, 263 | }); 264 | } 265 | } 266 | } 267 | subproc::reapProc(nsjconf); 268 | } 269 | } 270 | 271 | static int standaloneMode(nsjconf_t* nsjconf) { 272 | for (;;) { 273 | if (subproc::runChild(nsjconf, /* netfd= */ -1, STDIN_FILENO, STDOUT_FILENO, 274 | STDERR_FILENO) == -1) { 275 | LOG_E("Couldn't launch the child process"); 276 | return 0xff; 277 | } 278 | for (;;) { 279 | int child_status = subproc::reapProc(nsjconf); 280 | if (subproc::countProc(nsjconf) == 0) { 281 | if (nsjconf->mode == MODE_STANDALONE_ONCE) { 282 | return child_status; 283 | } 284 | break; 285 | } 286 | if (showProc) { 287 | showProc = false; 288 | subproc::displayProc(nsjconf); 289 | } 290 | if (sigFatal > 0) { 291 | subproc::killAndReapAll( 292 | nsjconf, nsjconf->forward_signals ? sigFatal.load() : SIGKILL); 293 | logs::logStop(sigFatal); 294 | return (128 + sigFatal); 295 | } 296 | pause(); 297 | } 298 | } 299 | // not reached 300 | } 301 | 302 | std::unique_ptr getTC(int fd) { 303 | std::unique_ptr trm(new struct termios); 304 | 305 | if (ioctl(fd, TCGETS, trm.get()) == -1) { 306 | PLOG_D("ioctl(fd=%d, TCGETS) failed", fd); 307 | return nullptr; 308 | } 309 | LOG_D("Saved the current state of the TTY"); 310 | return trm; 311 | } 312 | 313 | void setTC(int fd, const struct termios* trm) { 314 | if (!trm) { 315 | return; 316 | } 317 | if (ioctl(fd, TCSETS, trm) == -1) { 318 | PLOG_W("ioctl(fd=%d, TCSETS) failed", fd); 319 | return; 320 | } 321 | if (tcflush(fd, TCIFLUSH) == -1) { 322 | PLOG_W("tcflush(fd=%d, TCIFLUSH) failed", fd); 323 | return; 324 | } 325 | } 326 | 327 | } // namespace nsjail 328 | 329 | int main(int argc, char* argv[]) { 330 | std::unique_ptr nsjconf = cmdline::parseArgs(argc, argv); 331 | std::unique_ptr trm = nsjail::getTC(STDIN_FILENO); 332 | 333 | if (!nsjconf) { 334 | LOG_F("Couldn't parse cmdline options"); 335 | } 336 | if (nsjconf->daemonize && (daemon(/* nochdir= */ 1, /* noclose= */ 0) == -1)) { 337 | PLOG_F("daemon"); 338 | } 339 | cmdline::logParams(nsjconf.get()); 340 | if (!nsjail::setSigHandlers()) { 341 | LOG_F("nsjail::setSigHandlers() failed"); 342 | } 343 | if (!nsjail::setTimer(nsjconf.get())) { 344 | LOG_F("nsjail::setTimer() failed"); 345 | } 346 | 347 | if (nsjconf->detect_cgroupv2) { 348 | cgroup2::detectCgroupv2(nsjconf.get()); 349 | LOG_I("Detected cgroups version: %d", nsjconf->use_cgroupv2 ? 2 : 1); 350 | } 351 | 352 | if (nsjconf->use_cgroupv2) { 353 | if (!cgroup2::setup(nsjconf.get())) { 354 | LOG_E("Couldn't setup parent cgroup (cgroupv2)"); 355 | return -1; 356 | } 357 | } 358 | 359 | if (!sandbox::preparePolicy(nsjconf.get())) { 360 | LOG_F("Couldn't prepare sandboxing policy"); 361 | } 362 | 363 | int ret = 0; 364 | if (nsjconf->mode == MODE_LISTEN_TCP) { 365 | ret = nsjail::listenMode(nsjconf.get()); 366 | } else { 367 | ret = nsjail::standaloneMode(nsjconf.get()); 368 | } 369 | 370 | sandbox::closePolicy(nsjconf.get()); 371 | /* Try to restore the underlying console's params in case some program has changed it */ 372 | if (!nsjconf->daemonize) { 373 | nsjail::setTC(STDIN_FILENO, trm.get()); 374 | } 375 | 376 | LOG_D("Returning with %d", ret); 377 | return ret; 378 | } 379 | -------------------------------------------------------------------------------- /nsjail.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail 4 | ----------------------------------------- 5 | 6 | Copyright 2014 Google Inc. All Rights Reserved. 7 | Copyright 2016 Sergiusz Bazanski. All Rights Reserved. 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | http://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | 21 | */ 22 | 23 | #ifndef NS_NSJAIL_H 24 | #define NS_NSJAIL_H 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | #include 36 | #include 37 | #include 38 | 39 | static const int nssigs[] = { 40 | SIGINT, 41 | SIGQUIT, 42 | SIGUSR1, 43 | SIGALRM, 44 | SIGCHLD, 45 | SIGTERM, 46 | SIGTTIN, 47 | SIGTTOU, 48 | SIGPIPE, 49 | }; 50 | 51 | struct pids_t { 52 | time_t start; 53 | std::string remote_txt; 54 | struct sockaddr_in6 remote_addr; 55 | int pid_syscall_fd; 56 | }; 57 | 58 | struct mount_t { 59 | std::string src; 60 | std::string src_content; 61 | std::string dst; 62 | std::string fs_type; 63 | std::string options; 64 | uintptr_t flags; 65 | bool is_dir; 66 | bool is_symlink; 67 | bool is_mandatory; 68 | bool mounted; 69 | }; 70 | 71 | struct idmap_t { 72 | uid_t inside_id; 73 | uid_t outside_id; 74 | size_t count; 75 | bool is_newidmap; 76 | }; 77 | 78 | enum ns_mode_t { 79 | MODE_LISTEN_TCP = 0, 80 | MODE_STANDALONE_ONCE, 81 | MODE_STANDALONE_EXECVE, 82 | MODE_STANDALONE_RERUN 83 | }; 84 | 85 | struct pipemap_t { 86 | int sock_fd; 87 | int pipe_in; 88 | int pipe_out; 89 | pid_t pid; 90 | bool operator==(const pipemap_t& o) { 91 | return sock_fd == o.sock_fd && pipe_in == o.pipe_in && pipe_out == o.pipe_out; 92 | } 93 | }; 94 | 95 | struct nsjconf_t { 96 | std::string exec_file; 97 | bool use_execveat; 98 | int exec_fd; 99 | std::vector argv; 100 | std::string hostname; 101 | std::string cwd; 102 | std::string chroot; 103 | int port; 104 | std::string bindhost; 105 | bool daemonize; 106 | uint64_t tlimit; 107 | size_t max_cpus; 108 | bool keep_env; 109 | bool keep_caps; 110 | bool disable_no_new_privs; 111 | uint64_t rl_as; 112 | uint64_t rl_core; 113 | uint64_t rl_cpu; 114 | uint64_t rl_fsize; 115 | uint64_t rl_nofile; 116 | uint64_t rl_nproc; 117 | uint64_t rl_stack; 118 | uint64_t rl_mlock; 119 | uint64_t rl_rtpr; 120 | uint64_t rl_msgq; 121 | bool disable_rl; 122 | unsigned long personality; 123 | bool clone_newnet; 124 | bool clone_newuser; 125 | bool clone_newns; 126 | bool no_pivotroot; 127 | bool clone_newpid; 128 | bool clone_newipc; 129 | bool clone_newuts; 130 | bool clone_newcgroup; 131 | bool clone_newtime; 132 | enum ns_mode_t mode; 133 | bool is_root_rw; 134 | bool is_silent; 135 | bool stderr_to_null; 136 | bool skip_setsid; 137 | unsigned int max_conns; 138 | unsigned int max_conns_per_ip; 139 | std::string proc_path; 140 | bool is_proc_rw; 141 | bool iface_lo; 142 | std::string iface_vs; 143 | std::string iface_vs_ip; 144 | std::string iface_vs_nm; 145 | std::string iface_vs_gw; 146 | std::string iface_vs_ma; 147 | std::string iface_vs_mo; 148 | bool disable_tsc; 149 | bool forward_signals; 150 | std::string cgroup_mem_mount; 151 | std::string cgroup_mem_parent; 152 | size_t cgroup_mem_max; 153 | size_t cgroup_mem_memsw_max; 154 | ssize_t cgroup_mem_swap_max; 155 | std::string cgroup_pids_mount; 156 | std::string cgroup_pids_parent; 157 | unsigned int cgroup_pids_max; 158 | std::string cgroup_net_cls_mount; 159 | std::string cgroup_net_cls_parent; 160 | unsigned int cgroup_net_cls_classid; 161 | std::string cgroup_cpu_mount; 162 | std::string cgroup_cpu_parent; 163 | unsigned int cgroup_cpu_ms_per_sec; 164 | std::string cgroupv2_mount; 165 | bool use_cgroupv2; 166 | bool detect_cgroupv2; 167 | std::string kafel_file_path; 168 | std::string kafel_string; 169 | struct sock_fprog seccomp_fprog; 170 | bool seccomp_log; 171 | int nice_level; 172 | uid_t orig_uid; 173 | uid_t orig_euid; 174 | std::vector mountpts; 175 | std::map pids; 176 | std::vector uids; 177 | std::vector gids; 178 | std::vector envs; 179 | std::vector openfds; 180 | std::vector caps; 181 | std::vector ifaces; 182 | std::vector pipes; 183 | }; 184 | 185 | #endif /* _NSJAIL_H */ 186 | -------------------------------------------------------------------------------- /pid.cc: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - CLONE_PID routines 4 | ----------------------------------------- 5 | 6 | Copyright 2014 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #include "pid.h" 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | #include "logs.h" 32 | #include "subproc.h" 33 | 34 | namespace pid { 35 | 36 | bool initNs(nsjconf_t* nsjconf) { 37 | if (nsjconf->mode != MODE_STANDALONE_EXECVE) { 38 | return true; 39 | } 40 | if (!nsjconf->clone_newpid) { 41 | return true; 42 | } 43 | 44 | LOG_D("Creating a dummy 'init' process"); 45 | 46 | /* 47 | * If -Me is used then we need to create permanent init inside PID ns, otherwise only the 48 | * first clone/fork will work, and the rest will fail with ENOMEM (see 'man pid_namespaces' 49 | * for details on this behavior) 50 | */ 51 | pid_t pid = subproc::cloneProc(CLONE_FS, 0); 52 | if (pid == -1) { 53 | PLOG_E("Couldn't create a dummy init process"); 54 | return false; 55 | } 56 | if (pid > 0) { 57 | LOG_D("Created a dummy 'init' process with PID=%d", pid); 58 | return true; 59 | } 60 | 61 | if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0UL, 0UL, 0UL) == -1) { 62 | PLOG_W("(prctl(PR_SET_PDEATHSIG, SIGKILL) failed"); 63 | } 64 | if (prctl(PR_SET_NAME, "ns-init", 0UL, 0UL, 0UL) == -1) { 65 | PLOG_W("(prctl(PR_SET_NAME, 'init') failed"); 66 | } 67 | if (prctl(PR_SET_DUMPABLE, 0UL, 0UL, 0UL, 0UL) == -1) { 68 | PLOG_W("(prctl(PR_SET_DUMPABLE, 0) failed"); 69 | } 70 | 71 | /* Act sort-a like a init by reaping zombie processes */ 72 | struct sigaction sa = {}; 73 | sa.sa_handler = SIG_DFL; 74 | sa.sa_flags = SA_NOCLDWAIT | SA_NOCLDSTOP; 75 | sa.sa_restorer = NULL; 76 | sigemptyset(&sa.sa_mask); 77 | 78 | if (sigaction(SIGCHLD, &sa, NULL) == -1) { 79 | PLOG_W("Couldn't set sighandler for SIGCHLD"); 80 | } 81 | 82 | for (;;) { 83 | pause(); 84 | } 85 | } 86 | 87 | } // namespace pid 88 | -------------------------------------------------------------------------------- /pid.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - CLONE_PID routines 4 | ----------------------------------------- 5 | 6 | Copyright 2014 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #ifndef NS_PID_H 23 | #define NS_PID_H 24 | 25 | #include 26 | 27 | #include "nsjail.h" 28 | 29 | namespace pid { 30 | 31 | bool initNs(nsjconf_t* nsjconf); 32 | 33 | } // namespace pid 34 | 35 | #endif /* NS_PID_H */ 36 | -------------------------------------------------------------------------------- /sandbox.cc: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - seccomp-bpf sandboxing 4 | ----------------------------------------- 5 | 6 | Copyright 2014 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #include "sandbox.h" 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | extern "C" { 33 | #include "kafel.h" 34 | } 35 | #include "logs.h" 36 | #include "util.h" 37 | 38 | namespace sandbox { 39 | 40 | #ifndef PR_SET_NO_NEW_PRIVS /* in prctl.h since Linux 3.5 */ 41 | #define PR_SET_NO_NEW_PRIVS 38 42 | #endif /* PR_SET_NO_NEW_PRIVS */ 43 | 44 | #ifndef SECCOMP_FILTER_FLAG_TSYNC 45 | #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) 46 | #endif /* SECCOMP_FILTER_FLAG_TSYNC */ 47 | 48 | #ifndef SECCOMP_FILTER_FLAG_LOG 49 | #define SECCOMP_FILTER_FLAG_LOG (1UL << 1) 50 | #endif /* SECCOMP_FILTER_FLAG_LOG */ 51 | 52 | static bool prepareAndCommit(nsjconf_t* nsjconf) { 53 | if (nsjconf->kafel_file_path.empty() && nsjconf->kafel_string.empty()) { 54 | return true; 55 | } 56 | 57 | if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 58 | PLOG_W("prctl(PR_SET_NO_NEW_PRIVS, 1) failed"); 59 | return false; 60 | } 61 | if (nsjconf->seccomp_log) { 62 | #ifndef __NR_seccomp 63 | LOG_E("The __NR_seccomp is not defined with this kernel's header files (kernel " 64 | "headers too old?)"); 65 | return false; 66 | #else 67 | if (util::syscall(__NR_seccomp, (uintptr_t)SECCOMP_SET_MODE_FILTER, 68 | (uintptr_t)(SECCOMP_FILTER_FLAG_TSYNC | SECCOMP_FILTER_FLAG_LOG), 69 | (uintptr_t)&nsjconf->seccomp_fprog) == -1) { 70 | PLOG_E("seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC | " 71 | "SECCOMP_FILTER_FLAG_LOG) failed"); 72 | return false; 73 | } 74 | return true; 75 | #endif /* __NR_seccomp */ 76 | } 77 | 78 | if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &nsjconf->seccomp_fprog, 0UL, 0UL)) { 79 | PLOG_W("prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER) failed"); 80 | return false; 81 | } 82 | return true; 83 | } 84 | 85 | bool applyPolicy(nsjconf_t* nsjconf) { 86 | return prepareAndCommit(nsjconf); 87 | } 88 | 89 | bool preparePolicy(nsjconf_t* nsjconf) { 90 | if (nsjconf->kafel_file_path.empty() && nsjconf->kafel_string.empty()) { 91 | return true; 92 | } 93 | if (!nsjconf->kafel_file_path.empty() && !nsjconf->kafel_string.empty()) { 94 | LOG_W("You specified both kafel seccomp policy, and kafel seccomp file. Specify " 95 | "one only"); 96 | return false; 97 | } 98 | 99 | kafel_ctxt_t ctxt = kafel_ctxt_create(); 100 | 101 | if (!nsjconf->kafel_file_path.empty()) { 102 | FILE* f = fopen(nsjconf->kafel_file_path.c_str(), "r"); 103 | if (!f) { 104 | PLOG_W("Couldn't open the kafel seccomp policy file '%s'", 105 | nsjconf->kafel_file_path.c_str()); 106 | kafel_ctxt_destroy(&ctxt); 107 | return false; 108 | } 109 | LOG_D("Compiling seccomp policy from file: '%s'", nsjconf->kafel_file_path.c_str()); 110 | kafel_set_input_file(ctxt, f); 111 | } 112 | if (!nsjconf->kafel_string.empty()) { 113 | LOG_D("Compiling seccomp policy from string: '%s'", nsjconf->kafel_string.c_str()); 114 | kafel_set_input_string(ctxt, nsjconf->kafel_string.c_str()); 115 | } 116 | 117 | if (kafel_compile(ctxt, &nsjconf->seccomp_fprog) != 0) { 118 | LOG_W("Could not compile policy: %s", kafel_error_msg(ctxt)); 119 | kafel_ctxt_destroy(&ctxt); 120 | return false; 121 | } 122 | kafel_ctxt_destroy(&ctxt); 123 | return true; 124 | } 125 | 126 | void closePolicy(nsjconf_t* nsjconf) { 127 | if (!nsjconf->seccomp_fprog.filter) { 128 | return; 129 | } 130 | free(nsjconf->seccomp_fprog.filter); 131 | nsjconf->seccomp_fprog.filter = nullptr; 132 | nsjconf->seccomp_fprog.len = 0; 133 | } 134 | 135 | } // namespace sandbox 136 | -------------------------------------------------------------------------------- /sandbox.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - seccomp-bpf sandboxing 4 | ----------------------------------------- 5 | 6 | Copyright 2014 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #ifndef NS_SANDBOX_H 23 | #define NS_SANDBOX_H 24 | 25 | #include 26 | 27 | #include "nsjail.h" 28 | 29 | namespace sandbox { 30 | 31 | bool applyPolicy(nsjconf_t* nsjconf); 32 | bool preparePolicy(nsjconf_t* nsjconf); 33 | void closePolicy(nsjconf_t* nsjconf); 34 | 35 | } // namespace sandbox 36 | 37 | #endif /* NS_SANDBOX_H */ 38 | -------------------------------------------------------------------------------- /subproc.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - subprocess management 4 | ----------------------------------------- 5 | 6 | Copyright 2014 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #ifndef NS_PROC_H 23 | #define NS_PROC_H 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | #include 30 | #include 31 | 32 | #include "nsjail.h" 33 | 34 | namespace subproc { 35 | 36 | /* 0 - network connection limit reached, -1 - error */ 37 | pid_t runChild(nsjconf_t* nsjconf, int listen_fd, int fd_in, int fd_out, int fd_err); 38 | int countProc(nsjconf_t* nsjconf); 39 | void displayProc(nsjconf_t* nsjconf); 40 | void killAndReapAll(nsjconf_t* nsjconf, int signal); 41 | /* Returns the exit code of the first failing subprocess, or 0 if none fail */ 42 | int reapProc(nsjconf_t* nsjconf); 43 | int systemExe(const std::vector& args, char** env); 44 | pid_t cloneProc(uint64_t flags, int exit_signal); 45 | 46 | } // namespace subproc 47 | 48 | #endif /* NS_PROC_H */ 49 | -------------------------------------------------------------------------------- /user.cc: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - CLONE_NEWUSER routines 4 | ----------------------------------------- 5 | 6 | Copyright 2014 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #include "user.h" 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | 41 | #include "logs.h" 42 | #include "macros.h" 43 | #include "subproc.h" 44 | #include "util.h" 45 | 46 | #define STR_(x) #x 47 | #define STR(x) STR_(x) 48 | 49 | constexpr char kNewUidPath[] = 50 | #ifdef NEWUIDMAP_PATH 51 | STR(NEWUIDMAP_PATH); 52 | #else 53 | "/usr/bin/newuidmap"; 54 | #endif 55 | constexpr char kNewGidPath[] = 56 | #ifdef NEWGIDMAP_PATH 57 | STR(NEWGIDMAP_PATH); 58 | #else 59 | "/usr/bin/newgidmap"; 60 | #endif 61 | 62 | namespace user { 63 | 64 | static bool setResGid(gid_t gid) { 65 | LOG_D("setresgid(%d)", gid); 66 | #if defined(__NR_setresgid32) 67 | if (util::syscall(__NR_setresgid32, (uintptr_t)gid, (uintptr_t)gid, (uintptr_t)gid) == -1) { 68 | PLOG_W("setresgid32(%d)", (int)gid); 69 | return false; 70 | } 71 | #else /* defined(__NR_setresgid32) */ 72 | if (util::syscall(__NR_setresgid, (uintptr_t)gid, (uintptr_t)gid, (uintptr_t)gid) == -1) { 73 | PLOG_W("setresgid(%d)", gid); 74 | return false; 75 | } 76 | #endif /* defined(__NR_setresuid32) */ 77 | return true; 78 | } 79 | 80 | static bool setResUid(uid_t uid) { 81 | LOG_D("setresuid(%d)", uid); 82 | #if defined(__NR_setresuid32) 83 | if (util::syscall(__NR_setresuid32, (uintptr_t)uid, (uintptr_t)uid, (uintptr_t)uid) == -1) { 84 | PLOG_W("setresuid32(%d)", (int)uid); 85 | return false; 86 | } 87 | #else /* defined(__NR_setresuid32) */ 88 | if (util::syscall(__NR_setresuid, (uintptr_t)uid, (uintptr_t)uid, (uintptr_t)uid) == -1) { 89 | PLOG_W("setresuid(%d)", uid); 90 | return false; 91 | } 92 | #endif /* defined(__NR_setresuid32) */ 93 | return true; 94 | } 95 | 96 | static bool hasGidMapSelf(nsjconf_t* nsjconf) { 97 | for (const auto& gid : nsjconf->gids) { 98 | if (!gid.is_newidmap) { 99 | return true; 100 | } 101 | } 102 | return false; 103 | } 104 | 105 | static bool setGroupsDeny(nsjconf_t* nsjconf, pid_t pid) { 106 | /* 107 | * No need to write 'deny' to /proc/pid/setgroups if our euid==0, as writing to 108 | * uid_map/gid_map will succeed anyway 109 | */ 110 | if (!nsjconf->clone_newuser || nsjconf->orig_euid == 0 || !hasGidMapSelf(nsjconf)) { 111 | return true; 112 | } 113 | 114 | char fname[PATH_MAX]; 115 | snprintf(fname, sizeof(fname), "/proc/%d/setgroups", pid); 116 | const char* const denystr = "deny"; 117 | if (!util::writeBufToFile(fname, denystr, strlen(denystr), O_WRONLY | O_CLOEXEC)) { 118 | LOG_E("util::writeBufToFile('%s', '%s') failed", fname, denystr); 119 | return false; 120 | } 121 | return true; 122 | } 123 | 124 | static bool uidMapSelf(nsjconf_t* nsjconf, pid_t pid) { 125 | std::string map; 126 | for (const auto& uid : nsjconf->uids) { 127 | if (uid.is_newidmap) { 128 | continue; 129 | } 130 | map.append(std::to_string(uid.inside_id)); 131 | map.append(" "); 132 | map.append(std::to_string(uid.outside_id)); 133 | map.append(" "); 134 | map.append(std::to_string(uid.count)); 135 | map.append("\n"); 136 | } 137 | if (map.empty()) { 138 | return true; 139 | } 140 | 141 | char fname[PATH_MAX]; 142 | snprintf(fname, sizeof(fname), "/proc/%d/uid_map", pid); 143 | LOG_D("Writing '%s' to '%s'", map.c_str(), fname); 144 | if (!util::writeBufToFile(fname, map.data(), map.length(), O_WRONLY | O_CLOEXEC)) { 145 | LOG_E("util::writeBufToFile('%s', '%s') failed", fname, map.c_str()); 146 | return false; 147 | } 148 | 149 | return true; 150 | } 151 | 152 | static bool gidMapSelf(nsjconf_t* nsjconf, pid_t pid) { 153 | std::string map; 154 | for (const auto& gid : nsjconf->gids) { 155 | if (gid.is_newidmap) { 156 | continue; 157 | } 158 | map.append(std::to_string(gid.inside_id)); 159 | map.append(" "); 160 | map.append(std::to_string(gid.outside_id)); 161 | map.append(" "); 162 | map.append(std::to_string(gid.count)); 163 | map.append("\n"); 164 | } 165 | if (map.empty()) { 166 | return true; 167 | } 168 | 169 | char fname[PATH_MAX]; 170 | snprintf(fname, sizeof(fname), "/proc/%d/gid_map", pid); 171 | LOG_D("Writing '%s' to '%s'", map.c_str(), fname); 172 | if (!util::writeBufToFile(fname, map.data(), map.length(), O_WRONLY | O_CLOEXEC)) { 173 | LOG_E("util::writeBufToFile('%s', '%s') failed", fname, map.c_str()); 174 | return false; 175 | } 176 | 177 | return true; 178 | } 179 | 180 | /* Use newgidmap for writing the gid map */ 181 | static bool gidMapExternal(nsjconf_t* nsjconf, pid_t pid) { 182 | bool use = false; 183 | 184 | std::vector argv = {kNewGidPath, std::to_string(pid)}; 185 | for (const auto& gid : nsjconf->gids) { 186 | if (!gid.is_newidmap) { 187 | continue; 188 | } 189 | use = true; 190 | 191 | argv.push_back(std::to_string(gid.inside_id)); 192 | argv.push_back(std::to_string(gid.outside_id)); 193 | argv.push_back(std::to_string(gid.count)); 194 | } 195 | if (!use) { 196 | return true; 197 | } 198 | if (subproc::systemExe(argv, environ) != 0) { 199 | LOG_E("'%s' failed", kNewGidPath); 200 | return false; 201 | } 202 | 203 | return true; 204 | } 205 | 206 | /* Use newuidmap for writing the uid map */ 207 | static bool uidMapExternal(nsjconf_t* nsjconf, pid_t pid) { 208 | bool use = false; 209 | 210 | std::vector argv = {kNewUidPath, std::to_string(pid)}; 211 | for (const auto& uid : nsjconf->uids) { 212 | if (!uid.is_newidmap) { 213 | continue; 214 | } 215 | use = true; 216 | 217 | argv.push_back(std::to_string(uid.inside_id)); 218 | argv.push_back(std::to_string(uid.outside_id)); 219 | argv.push_back(std::to_string(uid.count)); 220 | } 221 | if (!use) { 222 | return true; 223 | } 224 | if (subproc::systemExe(argv, environ) != 0) { 225 | LOG_E("'%s' failed", kNewUidPath); 226 | return false; 227 | } 228 | 229 | return true; 230 | } 231 | 232 | static bool uidGidMap(nsjconf_t* nsjconf, pid_t pid) { 233 | RETURN_ON_FAILURE(gidMapSelf(nsjconf, pid)); 234 | RETURN_ON_FAILURE(gidMapExternal(nsjconf, pid)); 235 | RETURN_ON_FAILURE(uidMapSelf(nsjconf, pid)); 236 | RETURN_ON_FAILURE(uidMapExternal(nsjconf, pid)); 237 | 238 | return true; 239 | } 240 | 241 | bool initNsFromParent(nsjconf_t* nsjconf, pid_t pid) { 242 | if (!setGroupsDeny(nsjconf, pid)) { 243 | return false; 244 | } 245 | if (!nsjconf->clone_newuser) { 246 | return true; 247 | } 248 | if (!uidGidMap(nsjconf, pid)) { 249 | return false; 250 | } 251 | return true; 252 | } 253 | 254 | bool initNsFromChild(nsjconf_t* nsjconf) { 255 | if (!nsjconf->clone_newuser && nsjconf->orig_euid != 0) { 256 | return true; 257 | } 258 | 259 | /* 260 | * Make sure all capabilities are retained after the subsequent setuid/setgid, as they will 261 | * be needed for privileged operations: mounts, uts change etc. 262 | */ 263 | if (prctl(PR_SET_SECUREBITS, SECBIT_KEEP_CAPS | SECBIT_NO_SETUID_FIXUP, 0UL, 0UL, 0UL) == 264 | -1) { 265 | PLOG_E("prctl(PR_SET_SECUREBITS, SECBIT_KEEP_CAPS | SECBIT_NO_SETUID_FIXUP)"); 266 | return false; 267 | } 268 | 269 | /* 270 | * Best effort because of /proc/self/setgroups. We deny 271 | * setgroups(2) calls only if user namespaces are in use. 272 | */ 273 | std::vector groups; 274 | std::string groupsString = "["; 275 | if (!nsjconf->clone_newuser && nsjconf->gids.size() > 1) { 276 | for (auto it = nsjconf->gids.begin() + 1; it != nsjconf->gids.end(); it++) { 277 | groups.push_back(it->inside_id); 278 | groupsString += std::to_string(it->inside_id); 279 | if (it < nsjconf->gids.end() - 1) groupsString += ", "; 280 | } 281 | } 282 | groupsString += "]"; 283 | 284 | if (!setResGid(nsjconf->gids[0].inside_id)) { 285 | PLOG_E("setresgid(%lu)", (unsigned long)nsjconf->gids[0].inside_id); 286 | return false; 287 | } 288 | 289 | LOG_D("setgroups(%zu, %s)", groups.size(), groupsString.c_str()); 290 | if (setgroups(groups.size(), groups.data()) == -1) { 291 | /* Indicate error if specific groups were requested */ 292 | if (groups.size() > 0) { 293 | PLOG_E("setgroups(%zu, %s) failed", groups.size(), groupsString.c_str()); 294 | return false; 295 | } 296 | PLOG_D("setgroups(%zu, %s) failed", groups.size(), groupsString.c_str()); 297 | } 298 | 299 | if (!setResUid(nsjconf->uids[0].inside_id)) { 300 | PLOG_E("setresuid(%lu)", (unsigned long)nsjconf->uids[0].inside_id); 301 | return false; 302 | } 303 | 304 | /* 305 | * Disable securebits again to avoid spawned programs 306 | * unexpectedly retaining capabilities after a UID/GID 307 | * change. 308 | */ 309 | if (prctl(PR_SET_SECUREBITS, 0UL, 0UL, 0UL, 0UL) == -1) { 310 | PLOG_E("prctl(PR_SET_SECUREBITS, 0)"); 311 | return false; 312 | } 313 | 314 | return true; 315 | } 316 | 317 | static uid_t parseUid(const std::string& id) { 318 | if (id.empty()) { 319 | return getuid(); 320 | } 321 | struct passwd* pw = getpwnam(id.c_str()); 322 | if (pw != nullptr) { 323 | return pw->pw_uid; 324 | } 325 | if (util::isANumber(id.c_str())) { 326 | return (uid_t)strtoimax(id.c_str(), NULL, 0); 327 | } 328 | return (uid_t)-1; 329 | } 330 | 331 | static gid_t parseGid(const std::string& id) { 332 | if (id.empty()) { 333 | return getgid(); 334 | } 335 | struct group* gr = getgrnam(id.c_str()); 336 | if (gr != nullptr) { 337 | return gr->gr_gid; 338 | } 339 | if (util::isANumber(id.c_str())) { 340 | return (gid_t)strtoimax(id.c_str(), NULL, 0); 341 | } 342 | return (gid_t)-1; 343 | } 344 | 345 | bool parseId(nsjconf_t* nsjconf, const std::string& i_id, const std::string& o_id, size_t cnt, 346 | bool is_gid, bool is_newidmap) { 347 | if (cnt < 1) { 348 | cnt = 1; 349 | } 350 | 351 | uid_t inside_id; 352 | uid_t outside_id; 353 | 354 | if (is_gid) { 355 | inside_id = parseGid(i_id); 356 | if (inside_id == (uid_t)-1) { 357 | LOG_W("Cannot parse '%s' as GID", i_id.c_str()); 358 | return false; 359 | } 360 | outside_id = parseGid(o_id); 361 | if (outside_id == (uid_t)-1) { 362 | LOG_W("Cannot parse '%s' as GID", o_id.c_str()); 363 | return false; 364 | } 365 | } else { 366 | inside_id = parseUid(i_id); 367 | if (inside_id == (uid_t)-1) { 368 | LOG_W("Cannot parse '%s' as UID", i_id.c_str()); 369 | return false; 370 | } 371 | outside_id = parseUid(o_id); 372 | if (outside_id == (uid_t)-1) { 373 | LOG_W("Cannot parse '%s' as UID", o_id.c_str()); 374 | return false; 375 | } 376 | } 377 | 378 | idmap_t id; 379 | id.inside_id = inside_id; 380 | id.outside_id = outside_id; 381 | id.count = cnt; 382 | id.is_newidmap = is_newidmap; 383 | 384 | if (is_gid) { 385 | nsjconf->gids.push_back(id); 386 | } else { 387 | nsjconf->uids.push_back(id); 388 | } 389 | 390 | return true; 391 | } 392 | 393 | } // namespace user 394 | -------------------------------------------------------------------------------- /user.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - CLONE_NEWUSER routines 4 | ----------------------------------------- 5 | 6 | Copyright 2014 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #ifndef NS_USER_H 23 | #define NS_USER_H 24 | 25 | #include 26 | 27 | #include 28 | 29 | #include "nsjail.h" 30 | 31 | namespace user { 32 | 33 | bool initNsFromParent(nsjconf_t* nsjconf, pid_t pid); 34 | bool initNsFromChild(nsjconf_t* nsjconf); 35 | bool parseId(nsjconf_t* nsjconf, const std::string& i_id, const std::string& o_id, size_t cnt, 36 | bool is_gid, bool is_newidmap); 37 | 38 | } // namespace user 39 | 40 | #endif /* NS_USER_H */ 41 | -------------------------------------------------------------------------------- /util.cc: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - useful procedures 4 | ----------------------------------------- 5 | 6 | Copyright 2016 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #include "util.h" 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | 49 | #include "logs.h" 50 | #include "macros.h" 51 | 52 | namespace util { 53 | 54 | ssize_t readFromFd(int fd, void* buf, size_t len) { 55 | uint8_t* charbuf = (uint8_t*)buf; 56 | 57 | size_t readSz = 0; 58 | while (readSz < len) { 59 | ssize_t sz = TEMP_FAILURE_RETRY(read(fd, &charbuf[readSz], len - readSz)); 60 | if (sz <= 0) { 61 | break; 62 | } 63 | readSz += sz; 64 | } 65 | return readSz; 66 | } 67 | 68 | ssize_t readFromFile(const char* fname, void* buf, size_t len) { 69 | int fd = TEMP_FAILURE_RETRY(open(fname, O_RDONLY | O_CLOEXEC)); 70 | if (fd == -1) { 71 | LOG_E("open(%s, O_RDONLY|O_CLOEXEC)", QC(fname)); 72 | return -1; 73 | } 74 | ssize_t ret = readFromFd(fd, buf, len); 75 | close(fd); 76 | return ret; 77 | } 78 | 79 | bool writeToFd(int fd, const void* buf, size_t len) { 80 | const uint8_t* charbuf = (const uint8_t*)buf; 81 | 82 | size_t writtenSz = 0; 83 | while (writtenSz < len) { 84 | ssize_t sz = TEMP_FAILURE_RETRY(write(fd, &charbuf[writtenSz], len - writtenSz)); 85 | if (sz < 0) { 86 | return false; 87 | } 88 | writtenSz += sz; 89 | } 90 | return true; 91 | } 92 | 93 | bool readFromFileToStr(const char* fname, std::string* str) { 94 | std::fstream fs(fname, std::ios::in | std::ios::binary); 95 | if (!fs.is_open()) { 96 | PLOG_W("Couldn't open file %s", QC(fname)); 97 | return false; 98 | } 99 | 100 | str->clear(); 101 | 102 | while (fs) { 103 | char buf[4096]; 104 | fs.read(buf, sizeof(buf)); 105 | std::streamsize sz = fs.gcount(); 106 | if (sz > 0) { 107 | str->append(buf, sz); 108 | } 109 | if (fs.eof()) { 110 | return true; 111 | } 112 | if (fs.bad() || fs.fail()) { 113 | PLOG_W("Reading from %s failed", QC(fname)); 114 | return false; 115 | } 116 | } 117 | 118 | return true; 119 | } 120 | 121 | bool writeBufToFile( 122 | const char* filename, const void* buf, size_t len, int open_flags, bool log_errors) { 123 | int fd; 124 | TEMP_FAILURE_RETRY(fd = open(filename, open_flags, 0644)); 125 | if (fd == -1) { 126 | if (log_errors) { 127 | PLOG_E("Couldn't open %s for writing", QC(filename)); 128 | } 129 | return false; 130 | } 131 | 132 | if (!writeToFd(fd, buf, len)) { 133 | if (log_errors) { 134 | PLOG_E("Couldn't write '%zu' bytes to file %s (fd='%d')", len, QC(filename), 135 | fd); 136 | } 137 | close(fd); 138 | if (open_flags & O_CREAT) { 139 | unlink(filename); 140 | } 141 | return false; 142 | } 143 | 144 | LOG_D("Written '%zu' bytes to %s", len, QC(filename)); 145 | 146 | close(fd); 147 | return true; 148 | } 149 | 150 | bool createDirRecursively(const char* dir) { 151 | if (dir[0] != '/') { 152 | LOG_W("The directory path must start with '/': '%s' provided", dir); 153 | return false; 154 | } 155 | 156 | int prev_dir_fd = TEMP_FAILURE_RETRY(open("/", O_RDONLY | O_CLOEXEC | O_DIRECTORY)); 157 | if (prev_dir_fd == -1) { 158 | PLOG_W("open('/', O_RDONLY | O_CLOEXEC)"); 159 | return false; 160 | } 161 | 162 | char path[PATH_MAX]; 163 | snprintf(path, sizeof(path), "%s", dir); 164 | char* curr = path; 165 | for (;;) { 166 | while (*curr == '/') { 167 | curr++; 168 | } 169 | 170 | char* next = strchr(curr, '/'); 171 | if (next == nullptr) { 172 | close(prev_dir_fd); 173 | return true; 174 | } 175 | *next = '\0'; 176 | 177 | if (mkdirat(prev_dir_fd, curr, 0755) == -1 && errno != EEXIST) { 178 | PLOG_W("mkdir(%s, 0755)", QC(curr)); 179 | close(prev_dir_fd); 180 | return false; 181 | } 182 | 183 | int dir_fd = TEMP_FAILURE_RETRY(openat(prev_dir_fd, curr, O_DIRECTORY | O_CLOEXEC)); 184 | if (dir_fd == -1) { 185 | PLOG_W("openat('%d', %s, O_DIRECTORY | O_CLOEXEC)", prev_dir_fd, QC(curr)); 186 | close(prev_dir_fd); 187 | return false; 188 | } 189 | close(prev_dir_fd); 190 | prev_dir_fd = dir_fd; 191 | curr = next + 1; 192 | } 193 | } 194 | 195 | std::string* StrAppend(std::string* str, const char* format, ...) { 196 | char* strp; 197 | 198 | va_list args; 199 | va_start(args, format); 200 | int ret = vasprintf(&strp, format, args); 201 | va_end(args); 202 | 203 | if (ret == -1) { 204 | PLOG_E("Memory allocation failed during asprintf()"); 205 | str->append(" [ERROR: mem_allocation_failed] "); 206 | return str; 207 | } 208 | 209 | str->append(strp, ret); 210 | free(strp); 211 | return str; 212 | } 213 | 214 | std::string StrPrintf(const char* format, ...) { 215 | char* strp; 216 | 217 | va_list args; 218 | va_start(args, format); 219 | int ret = vasprintf(&strp, format, args); 220 | va_end(args); 221 | 222 | if (ret == -1) { 223 | PLOG_E("Memory allocation failed during asprintf()"); 224 | return "[ERROR: mem_allocation_failed]"; 225 | } 226 | 227 | std::string str(strp, ret); 228 | free(strp); 229 | return str; 230 | } 231 | 232 | const std::string StrQuote(const std::string& str) { 233 | std::ostringstream ss; 234 | ss << std::quoted(str, '\''); 235 | return ss.str(); 236 | } 237 | 238 | bool isANumber(const char* s) { 239 | for (size_t i = 0; s[i]; s++) { 240 | if (!isdigit(s[i]) && s[i] != 'x') { 241 | return false; 242 | } 243 | } 244 | return true; 245 | } 246 | 247 | bool StrEq(const std::string_view& s1, const std::string_view& s2) { 248 | return (s1 == s2); 249 | } 250 | 251 | static __thread pthread_once_t rndThreadOnce = PTHREAD_ONCE_INIT; 252 | static __thread uint64_t rndX; 253 | 254 | /* MMIX LCG PRNG */ 255 | static const uint64_t a = 6364136223846793005ULL; 256 | static const uint64_t c = 1442695040888963407ULL; 257 | 258 | static void rndInitThread(void) { 259 | #if defined(__NR_getrandom) 260 | if (TEMP_FAILURE_RETRY(util::syscall(__NR_getrandom, (uintptr_t)&rndX, sizeof(rndX), 0)) == 261 | sizeof(rndX)) { 262 | return; 263 | } 264 | #endif /* defined(__NR_getrandom) */ 265 | int fd = TEMP_FAILURE_RETRY(open("/dev/urandom", O_RDONLY | O_CLOEXEC)); 266 | if (fd == -1) { 267 | PLOG_D("Couldn't open /dev/urandom for reading. Using gettimeofday " 268 | "fall-back"); 269 | struct timeval tv; 270 | gettimeofday(&tv, NULL); 271 | rndX = tv.tv_usec + ((uint64_t)tv.tv_sec << 32); 272 | return; 273 | } 274 | if (readFromFd(fd, (uint8_t*)&rndX, sizeof(rndX)) != sizeof(rndX)) { 275 | PLOG_F("Couldn't read '%zu' bytes from /dev/urandom", sizeof(rndX)); 276 | close(fd); 277 | } 278 | close(fd); 279 | } 280 | 281 | uint64_t rnd64(void) { 282 | pthread_once(&rndThreadOnce, rndInitThread); 283 | rndX = a * rndX + c; 284 | return rndX; 285 | } 286 | 287 | const std::string sigName(int signo) { 288 | std::string res; 289 | 290 | struct { 291 | const int signo; 292 | const char* const name; 293 | } static const sigNames[] = { 294 | NS_VALSTR_STRUCT(SIGHUP), 295 | NS_VALSTR_STRUCT(SIGINT), 296 | NS_VALSTR_STRUCT(SIGQUIT), 297 | NS_VALSTR_STRUCT(SIGILL), 298 | NS_VALSTR_STRUCT(SIGTRAP), 299 | NS_VALSTR_STRUCT(SIGABRT), 300 | NS_VALSTR_STRUCT(SIGIOT), 301 | NS_VALSTR_STRUCT(SIGBUS), 302 | NS_VALSTR_STRUCT(SIGFPE), 303 | NS_VALSTR_STRUCT(SIGKILL), 304 | NS_VALSTR_STRUCT(SIGUSR1), 305 | NS_VALSTR_STRUCT(SIGSEGV), 306 | NS_VALSTR_STRUCT(SIGUSR2), 307 | NS_VALSTR_STRUCT(SIGPIPE), 308 | NS_VALSTR_STRUCT(SIGALRM), 309 | NS_VALSTR_STRUCT(SIGTERM), 310 | NS_VALSTR_STRUCT(SIGSTKFLT), 311 | NS_VALSTR_STRUCT(SIGCHLD), 312 | NS_VALSTR_STRUCT(SIGCONT), 313 | NS_VALSTR_STRUCT(SIGSTOP), 314 | NS_VALSTR_STRUCT(SIGTSTP), 315 | NS_VALSTR_STRUCT(SIGTTIN), 316 | NS_VALSTR_STRUCT(SIGTTOU), 317 | NS_VALSTR_STRUCT(SIGURG), 318 | NS_VALSTR_STRUCT(SIGXCPU), 319 | NS_VALSTR_STRUCT(SIGXFSZ), 320 | NS_VALSTR_STRUCT(SIGVTALRM), 321 | NS_VALSTR_STRUCT(SIGPROF), 322 | NS_VALSTR_STRUCT(SIGWINCH), 323 | NS_VALSTR_STRUCT(SIGPOLL), 324 | NS_VALSTR_STRUCT(SIGPWR), 325 | NS_VALSTR_STRUCT(SIGSYS), 326 | }; 327 | 328 | for (const auto& i : sigNames) { 329 | if (signo == i.signo) { 330 | res.append(i.name); 331 | return res; 332 | } 333 | } 334 | 335 | if (signo >= SIGRTMIN) { 336 | res.append("SIG"); 337 | res.append(std::to_string(signo)); 338 | res.append("-RTMIN+"); 339 | res.append(std::to_string(signo - SIGRTMIN)); 340 | return res; 341 | } 342 | 343 | res.append("SIGUNKNOWN("); 344 | res.append(std::to_string(signo)); 345 | res.append(")"); 346 | return res; 347 | } 348 | 349 | const std::string rLimName(int res) { 350 | std::string ret; 351 | 352 | struct { 353 | const int res; 354 | const char* const name; 355 | } static const rLimNames[] = { 356 | NS_VALSTR_STRUCT(RLIMIT_CPU), 357 | NS_VALSTR_STRUCT(RLIMIT_FSIZE), 358 | NS_VALSTR_STRUCT(RLIMIT_DATA), 359 | NS_VALSTR_STRUCT(RLIMIT_STACK), 360 | NS_VALSTR_STRUCT(RLIMIT_CORE), 361 | NS_VALSTR_STRUCT(RLIMIT_RSS), 362 | NS_VALSTR_STRUCT(RLIMIT_NOFILE), 363 | NS_VALSTR_STRUCT(RLIMIT_AS), 364 | NS_VALSTR_STRUCT(RLIMIT_NPROC), 365 | NS_VALSTR_STRUCT(RLIMIT_MEMLOCK), 366 | NS_VALSTR_STRUCT(RLIMIT_LOCKS), 367 | NS_VALSTR_STRUCT(RLIMIT_SIGPENDING), 368 | NS_VALSTR_STRUCT(RLIMIT_MSGQUEUE), 369 | NS_VALSTR_STRUCT(RLIMIT_NICE), 370 | NS_VALSTR_STRUCT(RLIMIT_RTPRIO), 371 | NS_VALSTR_STRUCT(RLIMIT_RTTIME), 372 | }; 373 | 374 | for (const auto& i : rLimNames) { 375 | if (res == i.res) { 376 | ret.append(i.name); 377 | return ret; 378 | } 379 | } 380 | 381 | ret.append("RLIMITUNKNOWN("); 382 | ret.append(std::to_string(res)); 383 | ret.append(")"); 384 | return ret; 385 | } 386 | 387 | const std::string timeToStr(time_t t) { 388 | char timestr[128]; 389 | struct tm utctime; 390 | localtime_r(&t, &utctime); 391 | if (strftime(timestr, sizeof(timestr) - 1, "%FT%T%z", &utctime) == 0) { 392 | return "[Time conv error]"; 393 | } 394 | return timestr; 395 | } 396 | 397 | std::vector strSplit(const std::string str, char delim) { 398 | std::vector vec; 399 | std::istringstream stream(str); 400 | for (std::string word; std::getline(stream, word, delim);) { 401 | vec.push_back(word); 402 | } 403 | return vec; 404 | } 405 | 406 | long syscall(long sysno, uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, 407 | uintptr_t a5) { 408 | return ::syscall(sysno, a0, a1, a2, a3, a4, a5); 409 | } 410 | 411 | long setrlimit(int res, const struct rlimit64& newlim) { 412 | return util::syscall(__NR_prlimit64, 0, res, (uintptr_t)&newlim, (uintptr_t)nullptr); 413 | } 414 | 415 | long getrlimit(int res, struct rlimit64* curlim) { 416 | *curlim = {}; 417 | return util::syscall(__NR_prlimit64, 0, res, (uintptr_t)nullptr, (uintptr_t)curlim); 418 | } 419 | 420 | } // namespace util 421 | -------------------------------------------------------------------------------- /util.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - useful procedures 4 | ----------------------------------------- 5 | 6 | Copyright 2016 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #ifndef NS_UTIL_H 23 | #define NS_UTIL_H 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | #include 32 | #include 33 | 34 | #include "nsjail.h" 35 | 36 | #define RETURN_ON_FAILURE(expr) \ 37 | do { \ 38 | if (!(expr)) { \ 39 | return false; \ 40 | } \ 41 | } while (0) 42 | 43 | #define QC(x) (util::StrQuote(x).c_str()) 44 | 45 | #if !defined(RLIM64_INFINITY) 46 | #define RLIM64_INFINITY (~0ULL) 47 | struct rlimit64 { 48 | uint64_t rlim_cur; 49 | uint64_t rlim_max; 50 | }; 51 | #endif /* !defined(RLIM64_INFINITY) */ 52 | 53 | namespace util { 54 | 55 | ssize_t readFromFd(int fd, void* buf, size_t len); 56 | ssize_t readFromFile(const char* fname, void* buf, size_t len); 57 | bool readFromFileToStr(const char* fname, std::string* str); 58 | bool writeToFd(int fd, const void* buf, size_t len); 59 | bool writeBufToFile( 60 | const char* filename, const void* buf, size_t len, int open_flags, bool log_errors = true); 61 | bool createDirRecursively(const char* dir); 62 | std::string* StrAppend(std::string* str, const char* format, ...) 63 | __attribute__((format(printf, 2, 3))); 64 | std::string StrPrintf(const char* format, ...) __attribute__((format(printf, 1, 2))); 65 | const std::string StrQuote(const std::string& str); 66 | bool StrEq(const std::string_view& s1, const std::string_view& s2); 67 | bool isANumber(const char* s); 68 | uint64_t rnd64(void); 69 | const std::string sigName(int signo); 70 | const std::string rLimName(int res); 71 | const std::string timeToStr(time_t t); 72 | std::vector strSplit(const std::string str, char delim); 73 | long syscall(long sysno, uintptr_t a0 = 0, uintptr_t a1 = 0, uintptr_t a2 = 0, uintptr_t a3 = 0, 74 | uintptr_t a4 = 0, uintptr_t a5 = 0); 75 | long setrlimit(int res, const struct rlimit64& newlim); 76 | long getrlimit(int res, struct rlimit64* curlim); 77 | 78 | } // namespace util 79 | 80 | #endif /* NS_UTIL_H */ 81 | -------------------------------------------------------------------------------- /uts.cc: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - CLONE_NEWUTS routines 4 | ----------------------------------------- 5 | 6 | Copyright 2014 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #include "uts.h" 23 | 24 | #include 25 | #include 26 | 27 | #include "logs.h" 28 | 29 | namespace uts { 30 | 31 | bool initNs(nsjconf_t* nsjconf) { 32 | if (!nsjconf->clone_newuts) { 33 | return true; 34 | } 35 | 36 | LOG_D("Setting hostname to '%s'", nsjconf->hostname.c_str()); 37 | if (sethostname(nsjconf->hostname.data(), nsjconf->hostname.length()) == -1) { 38 | PLOG_E("sethostname('%s')", nsjconf->hostname.c_str()); 39 | return false; 40 | } 41 | return true; 42 | } 43 | 44 | } // namespace uts 45 | -------------------------------------------------------------------------------- /uts.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | nsjail - CLONE_NEWUTS routines 4 | ----------------------------------------- 5 | 6 | Copyright 2014 Google Inc. All Rights Reserved. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | */ 21 | 22 | #ifndef NS_UTS_H 23 | #define NS_UTS_H 24 | 25 | #include 26 | 27 | #include "nsjail.h" 28 | 29 | namespace uts { 30 | 31 | bool initNs(nsjconf_t* nsjconf); 32 | 33 | } // namespace uts 34 | 35 | #endif /* NS_UTS_H */ 36 | --------------------------------------------------------------------------------