├── .gitignore ├── .travis.yml ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── bulk.yaml ├── examples ├── echo.rs └── runcmd.rs ├── src ├── callbacks.rs ├── caps.rs ├── child.rs ├── chroot.rs ├── config.rs ├── debug.rs ├── error.rs ├── fds.rs ├── ffi_util.rs ├── idmap.rs ├── lib.rs ├── linux.rs ├── namespace.rs ├── pipe.rs ├── run.rs ├── runtime.rs ├── status.rs ├── std_api.rs ├── stdio.rs ├── wait.rs └── zombies.rs └── vagga.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | /.vagga 4 | /tmp 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | os: linux 3 | dist: trusty 4 | language: rust 5 | 6 | cache: 7 | - cargo 8 | 9 | before_cache: 10 | - rm -r $TRAVIS_BUILD_DIR/target/debug 11 | 12 | jobs: 13 | include: 14 | - rust: stable 15 | - rust: beta 16 | - rust: nightly 17 | 18 | # deploy 19 | - stage: publish 20 | rust: stable 21 | install: true 22 | script: true 23 | 24 | deploy: 25 | - provider: script 26 | script: 'cargo publish --verbose --token=$CARGO_TOKEN' 27 | on: 28 | tags: true 29 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "unshare" 3 | description = """ 4 | The low-level interface for linux namespaces (containers) 5 | """ 6 | license = "MIT/Apache-2.0" 7 | readme = "README.md" 8 | keywords = ["linux", "container", "namespace", "docker", "process"] 9 | homepage = "https://github.com/tailhook/unshare" 10 | documentation = "https://docs.rs/unshare" 11 | categories = ["os::unix-apis"] 12 | version = "0.7.0" 13 | authors = ["paul@colomiets.name"] 14 | edition = "2018" 15 | 16 | [dependencies] 17 | libc = "0.2.93" 18 | nix = "0.20.0" 19 | 20 | [dev-dependencies] 21 | argparse = "0.2.2" 22 | rand = "0.8.3" 23 | 24 | [lib] 25 | name = "unshare" 26 | path = "src/lib.rs" 27 | 28 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015-2016 The unshare Developers 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Rust Unshare 2 | ============ 3 | 4 | *Status:* 90% feature-complete, works in production in [lithos][1] and powers [vagga][2] 5 | 6 | [Github](https://github.com/tailhook/unshare) | 7 | [Documentaion](http://docs.rs/unshare) | 8 | [Crate](https://crates.io/crates/unshare) 9 | 10 | Unshare is a low-level library to create linux containers. 11 | 12 | It contains the following: 13 | 14 | * Process creation interface similar to `std::process::Command` 15 | * Unsharing arbitrary linux namespaces 16 | * Ability to change root (`chroot/pivot_root`), `uid`, `gid`, `gid_map` 17 | * Some signal mask handling (especially for new processes) 18 | * Forwarding file descriptors and other unixy stuff (sessions, terminals) 19 | * Setting few important prctl flags (`PR_SET_PDEATHSIG`) 20 | * Runs both as root user and as unprivileged user 21 | 22 | Not implemeneted yet: 23 | 24 | * Fine grained capabilities control (currently you may change user or use 25 | user namespaces) 26 | 27 | The following is considered: 28 | 29 | * Capture input (should be, because part of ``std::process`` interface) 30 | * Pseudo tty creation for child 31 | * The `unshare` and `setns` 32 | 33 | The following is out of scope: 34 | 35 | * mounting file systems 36 | * setting up network 37 | * in-container and out of container supervision 38 | * handing child signals 39 | 40 | [1]: http://lithos.readthedocs.org 41 | [2]: http://vagga.readthedocs.org 42 | 43 | 44 | License 45 | ======= 46 | 47 | Licensed under either of 48 | 49 | * Apache License, Version 2.0, (./LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0) 50 | * MIT license (./LICENSE-MIT or http://opensource.org/licenses/MIT) 51 | 52 | at your option. 53 | 54 | Contribution 55 | ------------ 56 | 57 | Unless you explicitly state otherwise, any contribution intentionally 58 | submitted for inclusion in the work by you, as defined in the Apache-2.0 59 | license, shall be dual licensed as above, without any additional terms or 60 | conditions. 61 | -------------------------------------------------------------------------------- /bulk.yaml: -------------------------------------------------------------------------------- 1 | minimum-bulk: v0.4.5 2 | 3 | versions: 4 | 5 | - file: Cargo.toml 6 | block-start: ^\[package\] 7 | block-end: ^\[.*\] 8 | regex: ^version\s*=\s*"(\S+)" 9 | -------------------------------------------------------------------------------- /examples/echo.rs: -------------------------------------------------------------------------------- 1 | extern crate unshare; 2 | 3 | use std::process::exit; 4 | 5 | 6 | fn main() { 7 | let mut cmd = unshare::Command::new("/bin/echo"); 8 | cmd.arg("hello"); 9 | cmd.arg("world!"); 10 | 11 | match cmd.status().unwrap() { 12 | // propagate signal 13 | unshare::ExitStatus::Exited(x) => exit(x as i32), 14 | unshare::ExitStatus::Signaled(x, _) => exit((128+x as i32) as i32), 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /examples/runcmd.rs: -------------------------------------------------------------------------------- 1 | extern crate unshare; 2 | extern crate argparse; 3 | extern crate libc; 4 | 5 | use std::io::{stderr, Write, Read}; 6 | use std::process::exit; 7 | use std::path::PathBuf; 8 | 9 | use unshare::Namespace; 10 | use libc::{uid_t, gid_t}; 11 | use argparse::{ArgumentParser, Store, StoreOption, Collect, StoreTrue}; 12 | use argparse::{ParseOption, PushConst}; 13 | 14 | 15 | fn main() { 16 | let mut command = "".to_string(); 17 | let mut args: Vec = Vec::new(); 18 | let mut alias = None::; 19 | let mut workdir = None::; 20 | let mut verbose = false; 21 | let mut escape_stdout = false; 22 | let mut uid = None::; 23 | let mut gid = None::; 24 | let mut chroot = None::; 25 | let mut namespaces = Vec::::new(); 26 | let mut groups = Vec::::new(); 27 | let mut pid_env_var = None::; 28 | { // this block limits scope of borrows by ap.refer() method 29 | let mut ap = ArgumentParser::new(); 30 | ap.set_description("Run command with changed process state"); 31 | ap.refer(&mut command) 32 | .add_argument("command", Store, "Command to run") 33 | .required(); 34 | ap.refer(&mut args) 35 | .add_argument("arg", Collect, "Arguments for the command") 36 | .required(); 37 | ap.refer(&mut workdir) 38 | .add_option(&["--work-dir"], StoreOption, " 39 | Set working directory of the command"); 40 | ap.refer(&mut verbose) 41 | .add_option(&["-v", "--verbose"], StoreTrue, " 42 | Enable verbose mode (prints command, pid, exit status)"); 43 | ap.refer(&mut escape_stdout) 44 | .add_option(&["--escape-stdout"], StoreTrue, " 45 | Read data written by the utility to stdout and print it back 46 | as a quoted string with binary data escaped"); 47 | ap.refer(&mut uid) 48 | .add_option(&["-U", "--uid"], StoreOption, " 49 | Set user id for the target process"); 50 | ap.refer(&mut gid) 51 | .add_option(&["-G", "--gid"], StoreOption, " 52 | Set group id for the target process"); 53 | ap.refer(&mut groups) 54 | .add_option(&["--add-group"], Collect, " 55 | Add supplementary group id"); 56 | ap.refer(&mut chroot) 57 | .add_option(&["--chroot"], ParseOption, " 58 | Chroot to directory before running command"); 59 | ap.refer(&mut alias) 60 | .add_option(&["--alias", "--arg0"], ParseOption, " 61 | Set alias of the command 62 | (passed as `argv[0]` to the program)"); 63 | ap.refer(&mut pid_env_var) 64 | .add_option(&["--env-var-with-pid"], ParseOption, " 65 | Add environment variable with pid") 66 | .metavar("ENV_VAR_NAME"); 67 | ap.refer(&mut namespaces) 68 | .add_option(&["--unshare-pid"], PushConst(Namespace::Pid), 69 | "Unshare pid namespace") 70 | .add_option(&["--unshare-net"], PushConst(Namespace::Net), 71 | "Unshare net namespace") 72 | .add_option(&["--unshare-mount"], PushConst(Namespace::Mount), 73 | "Unshare mount namespace") 74 | .add_option(&["--unshare-uts"], PushConst(Namespace::Uts), 75 | "Unshare UTS namespace") 76 | .add_option(&["--unshare-ipc"], PushConst(Namespace::Ipc), 77 | "Unshare IPC namespace") 78 | .add_option(&["--unshare-user"], PushConst(Namespace::User), 79 | "Unshare user namespace"); 80 | ap.stop_on_first_argument(true); 81 | ap.parse_args_or_exit(); 82 | } 83 | 84 | let mut cmd = unshare::Command::new(&command); 85 | cmd.args(&args[..]); 86 | alias.map(|x| cmd.arg0(x)); 87 | workdir.map(|dir| cmd.current_dir(dir)); 88 | gid.map(|gid| cmd.gid(gid)); 89 | uid.map(|uid| cmd.uid(uid)); 90 | chroot.map(|dir| cmd.chroot_dir(dir)); 91 | cmd.unshare(&namespaces); 92 | cmd.close_fds(..); 93 | if groups.len() > 0 { cmd.groups(groups); } 94 | if escape_stdout { 95 | cmd.stdout(unshare::Stdio::piped()); 96 | } 97 | if let Some(var) = pid_env_var { 98 | cmd.env_var_with_pid(var); 99 | } 100 | if verbose { 101 | // TODO(tailhook) implement display/debug in Command itself 102 | writeln!(&mut stderr(), "Command {:?}", cmd).ok(); 103 | } 104 | let mut child = match cmd.spawn() { 105 | Ok(child) => { child } 106 | Err(e) => { 107 | writeln!(&mut stderr(), "Error: {}", e).ok(); 108 | exit(127); 109 | } 110 | }; 111 | if verbose { 112 | writeln!(&mut stderr(), "Child pid {}", child.id()).ok(); 113 | } 114 | if escape_stdout { 115 | let mut buf = Vec::new(); 116 | child.stdout.take().unwrap().read_to_end(&mut buf).unwrap(); 117 | writeln!(&mut stderr(), "{:?}", 118 | String::from_utf8_lossy(&buf[..])).unwrap(); 119 | } 120 | let res = child.wait().unwrap(); 121 | if verbose { 122 | writeln!(&mut stderr(), "[pid {}] {}", child.id(), res).ok(); 123 | } 124 | 125 | } 126 | -------------------------------------------------------------------------------- /src/callbacks.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | 3 | use crate::{Command, BoxError}; 4 | 5 | 6 | impl Command { 7 | /// Set a callback to run when child is already forked but not yet run 8 | /// 9 | /// When starting a child we sometimes need more setup from the parent, 10 | /// for example: to configure pid namespaces for the unprivileged 11 | /// process (child) by privileged process (parent). 12 | /// 13 | /// This callback runs in **parent** process after all built-in setup is 14 | /// done (setting uid namespaces). It always run before ``pre_exec`` 15 | /// callback in child. 16 | /// 17 | /// If callback returns error, process is shut down. 18 | /// 19 | /// Each invocation **replaces** callback, 20 | /// so there is only one of them can be called. 21 | /// 22 | pub fn before_unfreeze( 23 | &mut self, 24 | f: impl FnMut(u32) -> Result<(), BoxError> + 'static, 25 | ) -> &mut Self { 26 | self.before_unfreeze = Some(Box::new(f)); 27 | self 28 | } 29 | 30 | /// Set a callback to run in the child before calling exec 31 | /// 32 | /// The callback is executed right before `execve` system calls. 33 | /// All other modifications of the environment are already applied 34 | /// at this moment. It always run after ``before_unfreeze`` in parent. 35 | /// 36 | /// **Warning** this callback must not do any memory (de)allocations, 37 | /// use mutexes, otherwise process may crash or deadlock. Only bare 38 | /// syscalls are allowed (use `libc` crate). 39 | /// 40 | /// The closure is allowed to return an I/O error whose 41 | /// OS error code will be communicated back to the parent 42 | /// and returned as an error from when the spawn was requested. 43 | /// 44 | /// Note: unlike same method in stdlib, 45 | /// each invocation of this method **replaces** callback, 46 | /// so there is only one of them can be called. 47 | pub unsafe fn pre_exec( 48 | &mut self, 49 | f: impl Fn() -> io::Result<()> + Send + Sync + 'static, 50 | ) -> &mut Self { 51 | self.pre_exec = Some(Box::new(f)); 52 | self 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/caps.rs: -------------------------------------------------------------------------------- 1 | #[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)] 2 | #[allow(missing_docs, non_camel_case_types)] 3 | pub enum Capability { 4 | CAP_CHOWN = 0, 5 | CAP_DAC_OVERRIDE = 1, 6 | CAP_DAC_READ_SEARCH = 2, 7 | CAP_FOWNER = 3, 8 | CAP_FSETID = 4, 9 | CAP_KILL = 5, 10 | CAP_SETGID = 6, 11 | CAP_SETUID = 7, 12 | CAP_SETPCAP = 8, 13 | CAP_LINUX_IMMUTABLE = 9, 14 | CAP_NET_BIND_SERVICE = 10, 15 | CAP_NET_BROADCAST = 11, 16 | CAP_NET_ADMIN = 12, 17 | CAP_NET_RAW = 13, 18 | CAP_IPC_LOCK = 14, 19 | CAP_IPC_OWNER = 15, 20 | CAP_SYS_MODULE = 16, 21 | CAP_SYS_RAWIO = 17, 22 | CAP_SYS_CHROOT = 18, 23 | CAP_SYS_PTRACE = 19, 24 | CAP_SYS_PACCT = 20, 25 | CAP_SYS_ADMIN = 21, 26 | CAP_SYS_BOOT = 22, 27 | CAP_SYS_NICE = 23, 28 | CAP_SYS_RESOURCE = 24, 29 | CAP_SYS_TIME = 25, 30 | CAP_SYS_TTY_CONFIG = 26, 31 | CAP_MKNOD = 27, 32 | CAP_LEASE = 28, 33 | CAP_AUDIT_WRITE = 29, 34 | CAP_AUDIT_CONTROL = 30, 35 | CAP_SETFCAP = 31, 36 | CAP_MAC_OVERRIDE = 32, 37 | CAP_MAC_ADMIN = 33, 38 | CAP_SYSLOG = 34, 39 | CAP_WAKE_ALARM = 35, 40 | CAP_BLOCK_SUSPEND = 36, 41 | CAP_AUDIT_READ = 37, 42 | #[doc(hidden)] 43 | __NonExhaustive, 44 | } 45 | -------------------------------------------------------------------------------- /src/child.rs: -------------------------------------------------------------------------------- 1 | use std::os::unix::io::RawFd; 2 | use std::mem; 3 | use std::ptr; 4 | 5 | use libc; 6 | use nix; 7 | use libc::{c_void, c_ulong, sigset_t, size_t}; 8 | use libc::{kill, signal}; 9 | use libc::{F_GETFD, F_SETFD, F_DUPFD_CLOEXEC, FD_CLOEXEC, MNT_DETACH}; 10 | use libc::{SIG_DFL, SIG_SETMASK}; 11 | 12 | use crate::run::{ChildInfo, MAX_PID_LEN}; 13 | use crate::error::ErrorCode as Err; 14 | 15 | // And at this point we've reached a special time in the life of the 16 | // child. The child must now be considered hamstrung and unable to 17 | // do anything other than syscalls really. 18 | // 19 | // ESPECIALLY YOU CAN NOT DO MEMORY (DE)ALLOCATIONS 20 | // 21 | // See better explanation at: 22 | // https://github.com/rust-lang/rust/blob/c1e865c/src/libstd/sys/unix/process.rs#L202 23 | // 24 | 25 | // In particular ChildInfo is passed by refernce here to avoid 26 | // deallocating (parts of) it. 27 | pub unsafe fn child_after_clone(child: &ChildInfo) -> ! { 28 | let mut epipe = child.error_pipe; 29 | 30 | child.cfg.death_sig.as_ref().map(|&sig| { 31 | if libc::prctl(ffi::PR_SET_PDEATHSIG, sig as c_ulong, 0, 0, 0) != 0 { 32 | fail(Err::ParentDeathSignal, epipe); 33 | } 34 | }); 35 | 36 | // Now we must wait until parent set some environment for us. It's mostly 37 | // for uid_map/gid_map. But also used for attaching debugger and maybe 38 | // other things 39 | let mut wbuf = [0u8]; 40 | loop { 41 | // TODO(tailhook) put some timeout on this pipe? 42 | let rc = libc::read(child.wakeup_pipe, 43 | (&mut wbuf).as_ptr() as *mut c_void, 1); 44 | if rc == 0 { 45 | // Parent already dead presumably before we had a chance to 46 | // set PDEATHSIG, so just send signal ourself in that case 47 | if let Some(sig) = child.cfg.death_sig { 48 | kill(libc::getpid(), sig as i32); 49 | libc::_exit(127); 50 | } else { 51 | // In case we wanted to daemonize, just continue 52 | // 53 | // TODO(tailhook) not sure it's best thing to do. Maybe parent 54 | // failed to setup uid/gid map for us. Do we want to check 55 | // specific options? Or should we just always die? 56 | break; 57 | } 58 | } else if rc < 0 { 59 | let errno = nix::errno::errno(); 60 | if errno == libc::EINTR as i32 || 61 | errno == libc::EAGAIN as i32 62 | { 63 | continue; 64 | } else { 65 | fail(Err::PipeError, errno); 66 | } 67 | } else { 68 | // Do we need to check that exactly one byte is received? 69 | break; 70 | } 71 | } 72 | 73 | // Move error pipe file descriptors in case they clobber stdio 74 | while epipe < 3 { 75 | let nerr = libc::fcntl(epipe, F_DUPFD_CLOEXEC, 3); 76 | if nerr < 0 { 77 | fail(Err::CreatePipe, epipe); 78 | } 79 | epipe = nerr; 80 | } 81 | 82 | for &(nstype, fd) in child.setns_namespaces { 83 | if libc::setns(fd, nstype.bits()) != 0 { 84 | fail(Err::SetNs, epipe); 85 | } 86 | } 87 | 88 | if !child.pid_env_vars.is_empty() { 89 | let mut buf = [0u8; MAX_PID_LEN+1]; 90 | let data = format_pid_fixed(&mut buf, libc::getpid()); 91 | for &(index, offset) in child.pid_env_vars { 92 | // we know that there are at least MAX_PID_LEN+1 bytes in buffer 93 | child.environ[index].offset(offset as isize) 94 | .copy_from(data.as_ptr() as *const libc::c_char, data.len()); 95 | } 96 | } 97 | 98 | child.pivot.as_ref().map(|piv| { 99 | if ffi::pivot_root(piv.new_root.as_ptr(), piv.put_old.as_ptr()) != 0 { 100 | fail(Err::ChangeRoot, epipe); 101 | } 102 | if libc::chdir(piv.workdir.as_ptr()) != 0 { 103 | fail(Err::ChangeRoot, epipe); 104 | } 105 | if piv.unmount_old_root { 106 | if libc::umount2(piv.old_inside.as_ptr(), MNT_DETACH) != 0 { 107 | fail(Err::ChangeRoot, epipe); 108 | } 109 | } 110 | }); 111 | 112 | child.chroot.as_ref().map(|chroot| { 113 | if libc::chroot(chroot.root.as_ptr()) != 0 { 114 | fail(Err::ChangeRoot, epipe); 115 | } 116 | if libc::chdir(chroot.workdir.as_ptr()) != 0 { 117 | fail(Err::ChangeRoot, epipe); 118 | } 119 | }); 120 | 121 | child.keep_caps.as_ref().map(|_| { 122 | // Don't use securebits because on older systems it doesn't work 123 | if libc::prctl(libc::PR_SET_KEEPCAPS, 1, 0, 0, 0) != 0 { 124 | fail(Err::CapSet, epipe); 125 | } 126 | }); 127 | 128 | child.cfg.gid.as_ref().map(|&gid| { 129 | if libc::setgid(gid) != 0 { 130 | fail(Err::SetUser, epipe); 131 | } 132 | }); 133 | 134 | child.cfg.supplementary_gids.as_ref().map(|groups| { 135 | if libc::setgroups(groups.len() as size_t, groups.as_ptr()) != 0 { 136 | fail(Err::SetUser, epipe); 137 | } 138 | }); 139 | 140 | child.cfg.uid.as_ref().map(|&uid| { 141 | if libc::setuid(uid) != 0 { 142 | fail(Err::SetUser, epipe); 143 | } 144 | }); 145 | 146 | child.keep_caps.as_ref().map(|caps| { 147 | let header = ffi::CapsHeader { 148 | version: ffi::CAPS_V3, 149 | pid: 0, 150 | }; 151 | let data = ffi::CapsData { 152 | effective_s0: caps[0], 153 | permitted_s0: caps[0], 154 | inheritable_s0: caps[0], 155 | effective_s1: caps[1], 156 | permitted_s1: caps[1], 157 | inheritable_s1: caps[1], 158 | }; 159 | if libc::syscall(libc::SYS_capset, &header, &data) != 0 { 160 | fail(Err::CapSet, epipe); 161 | } 162 | for idx in 0..caps.len()*32 { 163 | if caps[(idx >> 5) as usize] & (1 << (idx & 31)) != 0 { 164 | let rc = libc::prctl( 165 | libc::PR_CAP_AMBIENT, 166 | libc::PR_CAP_AMBIENT_RAISE, 167 | idx, 0, 0); 168 | if rc != 0 && nix::errno::errno() == libc::ENOTSUP { 169 | // no need to iterate if ambient caps are notsupported 170 | break; 171 | } 172 | } 173 | } 174 | }); 175 | 176 | child.cfg.work_dir.as_ref().map(|dir| { 177 | if libc::chdir(dir.as_ptr()) != 0 { 178 | fail(Err::Chdir, epipe); 179 | } 180 | }); 181 | 182 | 183 | for &(dest_fd, src_fd) in child.fds { 184 | if src_fd == dest_fd { 185 | let flags = libc::fcntl(src_fd, F_GETFD); 186 | if flags < 0 || 187 | libc::fcntl(src_fd, F_SETFD, flags & !FD_CLOEXEC) < 0 188 | { 189 | fail(Err::StdioError, epipe); 190 | } 191 | } else { 192 | if libc::dup2(src_fd, dest_fd) < 0 { 193 | fail(Err::StdioError, epipe); 194 | } 195 | } 196 | } 197 | 198 | for &(start, end) in child.close_fds { 199 | if start < end { 200 | for fd in start..end { 201 | if child.fds.iter().find(|&&(cfd, _)| cfd == fd).is_none() { 202 | // Close may fail with ebadf, and it's okay 203 | libc::close(fd); 204 | } 205 | } 206 | } 207 | } 208 | 209 | if child.cfg.restore_sigmask { 210 | let mut sigmask: sigset_t = mem::zeroed(); 211 | libc::sigemptyset(&mut sigmask); 212 | libc::pthread_sigmask(SIG_SETMASK, &sigmask, ptr::null_mut()); 213 | for sig in 1..32 { 214 | signal(sig, SIG_DFL); 215 | } 216 | } 217 | 218 | if let Some(callback) = child.pre_exec { 219 | if let Err(e) = callback() { 220 | fail_errno(Err::PreExec, 221 | e.raw_os_error().unwrap_or(10873289), 222 | epipe); 223 | } 224 | } 225 | 226 | libc::execve(child.filename, 227 | child.args.as_ptr(), 228 | // cancelling mutability, it should be fine 229 | child.environ.as_ptr() as *const *const libc::c_char); 230 | fail(Err::Exec, epipe); 231 | } 232 | 233 | unsafe fn fail(code: Err, output: RawFd) -> ! { 234 | fail_errno(code, nix::errno::errno(), output) 235 | } 236 | unsafe fn fail_errno(code: Err, errno: i32, output: RawFd) -> ! { 237 | let bytes = [ 238 | code as u8, 239 | (errno >> 24) as u8, 240 | (errno >> 16) as u8, 241 | (errno >> 8) as u8, 242 | (errno >> 0) as u8, 243 | // TODO(tailhook) rustc adds a special sentinel at the end of error 244 | // code. Do we really need it? Assuming our pipes are always cloexec'd. 245 | ]; 246 | // Writes less than PIPE_BUF should be atomic. It's also unclear what 247 | // to do if error happened anyway 248 | libc::write(output, bytes.as_ptr() as *const c_void, 5); 249 | libc::_exit(127); 250 | } 251 | 252 | fn format_pid_fixed<'a>(buf: &'a mut [u8], pid: libc::pid_t) -> &'a [u8] { 253 | buf[buf.len()-1] = 0; 254 | if pid == 0 { 255 | buf[buf.len()-2] = b'0'; 256 | return &buf[buf.len()-2..] 257 | } else { 258 | let mut tmp = pid; 259 | // can't use stdlib function because that can allocate 260 | for n in (0..buf.len()-1).rev() { 261 | buf[n] = (tmp % 10) as u8 + b'0'; 262 | tmp /= 10; 263 | if tmp == 0 { 264 | return &buf[n..]; 265 | } 266 | } 267 | unreachable!("can't format pid"); 268 | }; 269 | } 270 | /// We don't use functions from nix here because they may allocate memory 271 | /// which we can't to this this module. 272 | mod ffi { 273 | use libc::{c_char, c_int}; 274 | 275 | pub const PR_SET_PDEATHSIG: c_int = 1; 276 | pub const CAPS_V3: u32 = 0x20080522; 277 | 278 | #[repr(C)] 279 | pub struct CapsHeader { 280 | pub version: u32, 281 | pub pid: i32, 282 | } 283 | 284 | #[repr(C)] 285 | pub struct CapsData { 286 | pub effective_s0: u32, 287 | pub permitted_s0: u32, 288 | pub inheritable_s0: u32, 289 | pub effective_s1: u32, 290 | pub permitted_s1: u32, 291 | pub inheritable_s1: u32, 292 | } 293 | 294 | extern { 295 | pub fn pivot_root(new_root: *const c_char, put_old: *const c_char) 296 | -> c_int; 297 | } 298 | } 299 | 300 | #[cfg(test)] 301 | mod test { 302 | use rand::{thread_rng, Rng}; 303 | use crate::run::MAX_PID_LEN; 304 | use std::ffi::CStr; 305 | use super::format_pid_fixed; 306 | 307 | fn fmt_normal(val: i32) -> String { 308 | let mut buf = [0u8; MAX_PID_LEN+1]; 309 | let slice = format_pid_fixed(&mut buf, val); 310 | return CStr::from_bytes_with_nul(slice).unwrap() 311 | .to_string_lossy().to_string(); 312 | } 313 | #[test] 314 | fn test_format() { 315 | assert_eq!(fmt_normal(0), "0"); 316 | assert_eq!(fmt_normal(1), "1"); 317 | assert_eq!(fmt_normal(7), "7"); 318 | assert_eq!(fmt_normal(79), "79"); 319 | assert_eq!(fmt_normal(254), "254"); 320 | assert_eq!(fmt_normal(1158), "1158"); 321 | assert_eq!(fmt_normal(77839), "77839"); 322 | } 323 | #[test] 324 | fn test_random() { 325 | for _ in 0..100000 { 326 | let x = thread_rng().gen(); 327 | if x < 0 { continue; } 328 | assert_eq!(fmt_normal(x), format!("{}", x)); 329 | } 330 | } 331 | } 332 | -------------------------------------------------------------------------------- /src/chroot.rs: -------------------------------------------------------------------------------- 1 | use std::ffi::CString; 2 | 3 | 4 | pub struct Pivot { 5 | pub new_root: CString, 6 | pub put_old: CString, 7 | pub old_inside: CString, 8 | pub workdir: CString, 9 | pub unmount_old_root: bool, 10 | } 11 | 12 | pub struct Chroot { 13 | pub root: CString, 14 | pub workdir: CString, 15 | } 16 | -------------------------------------------------------------------------------- /src/config.rs: -------------------------------------------------------------------------------- 1 | use std::default::Default; 2 | use std::ffi::CString; 3 | use std::collections::HashMap; 4 | 5 | use nix::sys::signal::{Signal, SIGKILL}; 6 | use nix::sched::CloneFlags; 7 | use libc::{uid_t, gid_t}; 8 | 9 | use crate::idmap::{UidMap, GidMap}; 10 | use crate::namespace::Namespace; 11 | use crate::stdio::Closing; 12 | 13 | 14 | pub struct Config { 15 | pub death_sig: Option, 16 | pub work_dir: Option, 17 | pub uid: Option, 18 | pub gid: Option, 19 | pub supplementary_gids: Option>, 20 | pub id_maps: Option<(Vec, Vec)>, 21 | pub namespaces: CloneFlags, 22 | pub setns_namespaces: HashMap, 23 | pub restore_sigmask: bool, 24 | pub make_group_leader: bool, 25 | // TODO(tailhook) session leader 26 | } 27 | 28 | impl Default for Config { 29 | fn default() -> Config { 30 | Config { 31 | death_sig: Some(SIGKILL), 32 | work_dir: None, 33 | uid: None, 34 | gid: None, 35 | supplementary_gids: None, 36 | id_maps: None, 37 | namespaces: CloneFlags::empty(), 38 | setns_namespaces: HashMap::new(), 39 | restore_sigmask: true, 40 | make_group_leader: false, 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/debug.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::{self, Display}; 2 | 3 | use nix::sched::CloneFlags; 4 | 5 | use crate::Command; 6 | 7 | 8 | /// This is a builder for various settings of how command may be printed 9 | /// 10 | /// Use `format!("{}", cmd.display(style))` to actually print a command. 11 | #[derive(Clone, Debug)] 12 | pub struct Style { 13 | cmd_only: bool, 14 | print_env: bool, 15 | show_path: bool, 16 | } 17 | 18 | /// A temporary value returned from `Command::display` for the sole purpose 19 | /// of being `Display`'ed. 20 | pub struct Printer<'a>(&'a Command, &'a Style); 21 | 22 | impl Style { 23 | /// Create a new style object that matches to how `fmt::Debug` works for 24 | /// the command 25 | pub fn debug() -> Style { 26 | Style { 27 | cmd_only: false, 28 | print_env: true, 29 | show_path: true, 30 | } 31 | } 32 | /// Create a simple clean user-friendly display of the command 33 | /// 34 | /// Note: this kind of pretty-printing omit many important parts of command 35 | /// and may be ambiguous. 36 | pub fn short() -> Style { 37 | Style { 38 | cmd_only: true, 39 | print_env: false, 40 | show_path: false, 41 | } 42 | } 43 | /// Toggle printing of environment 44 | /// 45 | /// When `false` is passed we only show `environ[12]`, i.e. a number of 46 | /// environment variables. Default is `true` for `Style::debug` 47 | /// constructor. 48 | /// 49 | /// This method does nothing when using `Style::short` construtor 50 | pub fn env(mut self, enable: bool) -> Style { 51 | self.print_env = enable; 52 | self 53 | } 54 | /// Toggle printing of full path to the executable 55 | /// 56 | /// By default we don't print full executable path in `Style::short` mode. 57 | /// 58 | /// Note: if this flag is disabled (default) we only show a name from 59 | /// `arg0`, instead of executable path. When flag is 60 | /// enabled, the `arg0` is shown alongside with executable path in 61 | /// parethesis if the values differ. 62 | /// 63 | /// This method does nothing when using `Style::debug` constructor 64 | pub fn path(mut self, enable: bool) -> Style { 65 | self.show_path = enable; 66 | self 67 | } 68 | } 69 | 70 | impl<'a> fmt::Display for Printer<'a> { 71 | fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { 72 | let Printer(cmd, opt) = *self; 73 | 74 | if opt.cmd_only { 75 | if opt.show_path { 76 | write!(fmt, "{:?}", cmd.filename)?; 77 | if cmd.args[0] != cmd.filename { 78 | write!(fmt, " ({:?})", &cmd.args[0])?; 79 | } 80 | } else { 81 | let path = if cmd.args[0] != cmd.filename { 82 | &cmd.args[0] 83 | } else { 84 | &cmd.filename 85 | }; 86 | let last_slash = path.as_bytes().iter() 87 | .rposition(|&x| x == b'/'); 88 | if let Some(off) = last_slash { 89 | write!(fmt, "{:?}", 90 | &String::from_utf8_lossy(&path.as_bytes()[off+1..]))?; 91 | } else { 92 | write!(fmt, "{:?}", path)?; 93 | } 94 | } 95 | for arg in cmd.args[1..].iter() { 96 | write!(fmt, " {:?}", arg)?; 97 | } 98 | } else { 99 | write!(fmt, "")? 147 | } 148 | Ok(()) 149 | } 150 | } 151 | 152 | impl Command { 153 | /// Returns the object that implements Display 154 | pub fn display<'a>(&'a self, style: &'a Style) -> Printer<'a> { 155 | Printer(self, style) 156 | } 157 | } 158 | 159 | impl fmt::Debug for Command { 160 | fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { 161 | Printer(self, &Style::debug()).fmt(fmt) 162 | } 163 | } 164 | 165 | #[cfg(test)] 166 | mod test { 167 | use crate::{Command, Style}; 168 | 169 | #[test] 170 | fn test_debug() { 171 | let mut cmd = Command::new("/bin/hello"); 172 | cmd.env_clear(); 173 | cmd.env("A", "B"); 174 | assert_eq!(&format!("{:?}", cmd), 175 | r#""#); 176 | } 177 | 178 | #[test] 179 | fn test_comprehensive() { 180 | let mut cmd = Command::new("/bin/hello"); 181 | cmd.env_clear(); 182 | cmd.env("A", "B"); 183 | assert_eq!(&format!("{}", cmd.display(&Style::debug())), 184 | r#""#); 185 | } 186 | 187 | #[test] 188 | fn test_pretty() { 189 | let mut cmd = Command::new("/bin/hello"); 190 | cmd.env_clear(); 191 | cmd.arg("world!"); 192 | assert_eq!(&format!("{}", cmd.display(&Style::short())), 193 | r#""hello" "world!""#); 194 | } 195 | 196 | #[test] 197 | fn test_no_env() { 198 | let mut cmd = Command::new("/bin/hello"); 199 | cmd.env_clear(); 200 | cmd.env("A", "B"); 201 | assert_eq!(&format!("{}", cmd.display(&Style::debug().env(false))), 202 | r#""#); 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | use std::fmt; 3 | use crate::status::ExitStatus; 4 | 5 | use nix; 6 | 7 | 8 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 9 | pub enum ErrorCode { 10 | CreatePipe = 1, 11 | Fork = 2, 12 | Exec = 3, 13 | Chdir = 4, 14 | ParentDeathSignal = 5, 15 | PipeError = 6, 16 | StdioError = 7, 17 | SetUser = 8, 18 | ChangeRoot = 9, 19 | SetIdMap = 10, 20 | SetPGid = 11, 21 | SetNs = 12, 22 | CapSet = 13, 23 | PreExec = 14, 24 | } 25 | 26 | /// Error runnning process 27 | /// 28 | /// This type has very large number of options and it's enum only to be 29 | /// compact. Probably you shouldn't match on the error cases but just format 30 | /// it for user into string. 31 | #[derive(Debug)] 32 | pub enum Error { 33 | /// Unknown nix error 34 | /// 35 | /// Frankly, this error should not happen when running process. We just 36 | /// keep it here in case `nix` returns this error, which should not happen. 37 | NixError, // Not sure it's possible, but it is here to convert from 38 | // nix::Error safer 39 | /// Some invalid error code received from child application 40 | UnknownError, 41 | /// Error happened when we were trying to create pipe. The pipes used for 42 | /// two purposes: (a) for the process's stdio (`Stdio::pipe()` or 43 | /// `Stdio::null()`), (b) internally to wake up child process and return 44 | /// error back to the parent. 45 | // TODO(tailhook) should create pipe be split into PipeError and StdioError 46 | CreatePipe(i32), 47 | /// Error when forking/cloning process 48 | Fork(i32), 49 | /// Error when running execve() systemcall 50 | Exec(i32), 51 | /// Error when setting working directory specified by user 52 | Chdir(i32), 53 | /// Unable to set death signal (probably signal number invalid) 54 | ParentDeathSignal(i32), 55 | /// Error reading/writing through one of the two signal pipes 56 | PipeError(i32), 57 | /// Error waiting for process (for some functions only, for example 58 | /// ``Command::status()``). It probably means someone already waited for 59 | /// the process, for example it might be other thread, or signal handler. 60 | WaitError(i32), 61 | /// Error setting up stdio for process 62 | StdioError(i32), 63 | /// Could not set supplementary groups, group id or user id for the 64 | /// process 65 | SetUser(i32), 66 | /// Error changing root, it explains `chroot`, `pivot_root` system calls 67 | /// and setting working directory inside new root. Also includes unmounting 68 | /// old file system for pivot_root case. 69 | ChangeRoot(i32), 70 | /// Error setting uid or gid map. May be either problem running 71 | /// `newuidmap`/`newgidmap` command or writing the mapping file directly 72 | SetIdMap(i32), 73 | /// Auxillary command failed 74 | /// 75 | /// There are two auxillary commands for now: `newuidmap` and `newgidmap`. 76 | /// They run only when uid mappings (user namespaces) are enabled. 77 | /// 78 | /// Note that failing to run the binary results to `SedIdMap(sys_errno)`, 79 | /// this error contains status code of command that was succesfullly 80 | /// spawned. 81 | AuxCommandExited(i32), 82 | /// Auxillary command was killed by signal 83 | /// 84 | /// Similar to `AuxCommandExited` but when command was killed 85 | AuxCommandKilled(i32), 86 | /// Error when calling setpgid function 87 | SetPGid(i32), 88 | /// Error when calling setns syscall 89 | SetNs(i32), 90 | /// Error when calling capset syscall 91 | CapSet(i32), 92 | /// Before unfreeze callback error 93 | BeforeUnfreeze(Box), 94 | /// Before exec callback error 95 | PreExec(i32), 96 | } 97 | 98 | impl Error { 99 | /// Similarly to `io::Error` returns bare error code 100 | pub fn raw_os_error(&self) -> Option { 101 | use self::Error::*; 102 | match self { 103 | &UnknownError => None, 104 | &NixError => None, 105 | &CreatePipe(x) => Some(x), 106 | &Fork(x) => Some(x), 107 | &Exec(x) => Some(x), 108 | &Chdir(x) => Some(x), 109 | &ParentDeathSignal(x) => Some(x), 110 | &PipeError(x) => Some(x), 111 | &WaitError(x) => Some(x), 112 | &StdioError(x) => Some(x), 113 | &SetUser(x) => Some(x), 114 | &ChangeRoot(x) => Some(x), 115 | &SetIdMap(x) => Some(x), 116 | &AuxCommandExited(..) => None, 117 | &AuxCommandKilled(..) => None, 118 | &SetPGid(x) => Some(x), 119 | &SetNs(x) => Some(x), 120 | &CapSet(x) => Some(x), 121 | &BeforeUnfreeze(..) => None, 122 | &PreExec(x) => Some(x), 123 | } 124 | } 125 | } 126 | 127 | impl Error { 128 | fn title(&self) -> &'static str { 129 | use self::Error::*; 130 | match self { 131 | &UnknownError => "unexpected value received via signal pipe", 132 | &NixError => "some unknown nix error", 133 | &CreatePipe(_) => "can't create pipe", 134 | &Fork(_) => "error when forking", 135 | &Exec(_) => "error when executing", 136 | &Chdir(_) => "error when setting working directory", 137 | &ParentDeathSignal(_) => "error when death signal", 138 | &PipeError(_) => "error in signalling pipe", 139 | &WaitError(_) => "error in waiting for child", 140 | &StdioError(_) => "error setting up stdio for child", 141 | &SetUser(_) => "error setting user or groups", 142 | &ChangeRoot(_) => "error changing root directory", 143 | &SetIdMap(_) => "error setting uid/gid mappings", 144 | &AuxCommandExited(_) => "aux command exited with non-zero code", 145 | &AuxCommandKilled(_) => "aux command was killed by signal", 146 | &SetPGid(_) => "error when calling setpgid", 147 | &SetNs(_) => "error when calling setns", 148 | &CapSet(_) => "error when setting capabilities", 149 | &BeforeUnfreeze(_) => "error in before_unfreeze callback", 150 | &PreExec(_) => "error in pre_exec callback", 151 | } 152 | } 153 | } 154 | 155 | impl fmt::Display for Error { 156 | fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { 157 | use crate::Error::*; 158 | if let Some(code) = self.raw_os_error() { 159 | let errno = nix::errno::from_i32(code); 160 | if let nix::errno::Errno::UnknownErrno = errno { 161 | // May be OS knows error name better 162 | write!(fmt, "{}: {}", self.title(), 163 | io::Error::from_raw_os_error(code)) 164 | } else { 165 | // Format similar to that of std::io::Error 166 | write!(fmt, "{}: {} (os error {})", self.title(), 167 | errno.desc(), code) 168 | } 169 | } else { 170 | match self { 171 | BeforeUnfreeze(err) => { 172 | write!(fmt, "{}: {}", self.title(), err) 173 | } 174 | _ => write!(fmt, "{}", self.title()), 175 | } 176 | } 177 | } 178 | } 179 | 180 | #[inline] 181 | pub fn result(code: ErrorCode, r: Result) 182 | -> Result 183 | { 184 | r.map_err(|e| e.into_error(code)) 185 | } 186 | 187 | #[inline] 188 | pub fn cmd_result(def_code: ErrorCode, r: Result) 189 | -> Result<(), Error> 190 | { 191 | match r.map_err(|e| e.into_error(def_code))? { 192 | ExitStatus::Exited(0) => Ok(()), 193 | ExitStatus::Exited(x) => Err(Error::AuxCommandExited(x as i32)), 194 | ExitStatus::Signaled(x, _) => Err(Error::AuxCommandKilled(x as i32)), 195 | } 196 | } 197 | 198 | pub trait IntoError { 199 | fn into_error(self, code: ErrorCode) -> Error; 200 | } 201 | 202 | impl IntoError for nix::Error { 203 | fn into_error(self, code: ErrorCode) -> Error { 204 | match self { 205 | nix::Error::Sys(x) => code.wrap(x as i32), 206 | _ => Error::NixError, 207 | } 208 | } 209 | } 210 | 211 | impl IntoError for io::Error { 212 | fn into_error(self, code: ErrorCode) -> Error { 213 | code.wrap(self.raw_os_error().unwrap_or(-1)) 214 | } 215 | } 216 | 217 | impl IntoError for Error { 218 | fn into_error(self, code: ErrorCode) -> Error { 219 | code.wrap(self.raw_os_error().unwrap_or(-1)) 220 | } 221 | } 222 | 223 | 224 | impl ErrorCode { 225 | pub fn wrap(self, errno: i32) -> Error { 226 | use self::ErrorCode as C; 227 | use self::Error as E; 228 | match self { 229 | C::CreatePipe => E::CreatePipe(errno), 230 | C::Fork => E::Fork(errno), 231 | C::Exec => E::Exec(errno), 232 | C::Chdir => E::Chdir(errno), 233 | C::ParentDeathSignal => E::ParentDeathSignal(errno), 234 | C::PipeError => E::PipeError(errno), 235 | C::StdioError => E::StdioError(errno), 236 | C::SetUser => E::SetUser(errno), 237 | C::ChangeRoot => E::ChangeRoot(errno), 238 | C::SetIdMap => E::SetIdMap(errno), 239 | C::SetPGid => E::SetPGid(errno), 240 | C::SetNs => E::SetNs(errno), 241 | C::CapSet => E::CapSet(errno), 242 | C::PreExec => E::PreExec(errno), 243 | } 244 | } 245 | pub fn from_i32(code: i32, errno: i32) -> Error { 246 | use self::ErrorCode as C; 247 | use self::Error as E; 248 | match code { 249 | c if c == C::CreatePipe as i32 => E::CreatePipe(errno), 250 | c if c == C::Fork as i32 => E::Fork(errno), 251 | c if c == C::Exec as i32 => E::Exec(errno), 252 | c if c == C::Chdir as i32 => E::Chdir(errno), 253 | c if c == C::ParentDeathSignal as i32 254 | => E::ParentDeathSignal(errno), 255 | c if c == C::PipeError as i32 => E::PipeError(errno), 256 | c if c == C::StdioError as i32 => E::StdioError(errno), 257 | c if c == C::SetUser as i32 => E::SetUser(errno), 258 | c if c == C::ChangeRoot as i32 => E::ChangeRoot(errno), 259 | c if c == C::SetIdMap as i32 => E::SetIdMap(errno), 260 | c if c == C::SetPGid as i32 => E::SetPGid(errno), 261 | c if c == C::SetNs as i32 => E::SetNs(errno), 262 | c if c == C::CapSet as i32 => E::CapSet(errno), 263 | // no BeforeUnfreeze, because can't be in a child 264 | c if c == C::PreExec as i32 => E::PreExec(errno), 265 | _ => E::UnknownError, 266 | } 267 | } 268 | } 269 | -------------------------------------------------------------------------------- /src/fds.rs: -------------------------------------------------------------------------------- 1 | use std::mem::zeroed; 2 | use std::ops::{Range, RangeTo, RangeFrom, RangeFull}; 3 | use std::os::unix::io::RawFd; 4 | 5 | use nix::errno::errno; 6 | use libc::getrlimit; 7 | use libc::RLIMIT_NOFILE; 8 | 9 | use crate::stdio::{Fd}; 10 | use crate::Command; 11 | 12 | 13 | /// This is just a temporary enum to coerce `std::ops::Range*` variants 14 | /// into single value for convenience. Used in `close_fds` method. 15 | pub enum AnyRange { 16 | RangeFrom(RawFd), 17 | Range(RawFd, RawFd), 18 | } 19 | 20 | 21 | impl Command { 22 | 23 | /// Configuration for any other file descriptor (panics for fds < 3) use 24 | /// stdin/stdout/stderr for them 25 | /// 26 | /// Rust creates file descriptors with CLOEXEC flag by default, so no 27 | /// descriptors are inherited except ones specifically configured here 28 | /// (and stdio which is inherited by default) 29 | pub fn file_descriptor(&mut self, target_fd: RawFd, cfg: Fd) 30 | -> &mut Command 31 | { 32 | if target_fd <= 2 { 33 | panic!("Stdio file descriptors must be configured with respective \ 34 | methods instead of passing fd {} to `file_descritor()`", 35 | target_fd) 36 | } 37 | self.fds.insert(target_fd, cfg); 38 | self 39 | } 40 | 41 | /// Close a range of file descriptors as soon as process forks 42 | /// 43 | /// Subsequent calls to this method add additional range. Use `reset_fds` 44 | /// to remove all the ranges. 45 | /// 46 | /// File descriptors that never closed are: 47 | /// 48 | /// * the stdio file descriptors 49 | /// * descriptors configured using `file_descriptor`/`file_descriptor_raw` 50 | /// methods 51 | /// * internal file descriptors used for parent child notification by 52 | /// unshare crate itself (they are guaranteed to have CLOEXEC) 53 | /// 54 | /// You should avoid this method if possilble and rely on CLOEXEC to 55 | /// do the work. But sometimes it's inevitable: 56 | /// 57 | /// 1. If you need to ensure closing descriptors for security reasons 58 | /// 2. If you have some bad library out of your control which doesn't 59 | /// set CLOEXEC on owned the file descriptors 60 | /// 61 | /// Ranges obey the following rules: 62 | /// 63 | /// * Range like `..12` is transformed into `3..12` 64 | /// * Range with undefined upper bound `3..` is capped at current ulimit 65 | /// for file descriptors **at the moment of calling the method** 66 | /// * The full range `..` is an alias to `3..` 67 | /// * Multiple overlapping ranges are closed multiple times which is 68 | /// both harmless and useless 69 | /// 70 | /// # Panics 71 | /// 72 | /// Panics when can't get rlimit for range without upper bound. Should 73 | /// never happen in practice. 74 | /// 75 | /// Panics when lower range of fd is < 3 (stdio file descriptors) 76 | /// 77 | pub fn close_fds>(&mut self, range: A) 78 | -> &mut Command 79 | { 80 | self.close_fds.push(match range.into() { 81 | AnyRange::Range(x, y) => { 82 | assert!(x >= 3); 83 | (x, y) 84 | } 85 | AnyRange::RangeFrom(x) => unsafe { 86 | assert!(x >= 3); 87 | let mut rlim = zeroed(); 88 | let rc = getrlimit(RLIMIT_NOFILE, &mut rlim); 89 | if rc < 0 { 90 | panic!("Can't get rlimit: errno {}", errno()); 91 | } 92 | (x, rlim.rlim_cur as RawFd) 93 | } 94 | }); 95 | self 96 | } 97 | 98 | /// Reset file descriptor including stdio to the initial state 99 | /// 100 | /// Initial state is inherit all the stdio and do nothing to other fds. 101 | pub fn reset_fds(&mut self) -> &mut Command { 102 | self.fds = vec![ 103 | (0, Fd::inherit()), 104 | (1, Fd::inherit()), 105 | (2, Fd::inherit()), 106 | ].into_iter().collect(); 107 | self.close_fds.clear(); 108 | self 109 | } 110 | } 111 | 112 | impl Into for Range { 113 | fn into(self) -> AnyRange { 114 | return AnyRange::Range(self.start, self.end); 115 | } 116 | } 117 | 118 | impl Into for RangeTo { 119 | fn into(self) -> AnyRange { 120 | return AnyRange::Range(3, self.end); 121 | } 122 | } 123 | 124 | impl Into for RangeFrom { 125 | fn into(self) -> AnyRange { 126 | return AnyRange::RangeFrom(self.start); 127 | } 128 | } 129 | 130 | impl Into for RangeFull { 131 | fn into(self) -> AnyRange { 132 | return AnyRange::RangeFrom(3); 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /src/ffi_util.rs: -------------------------------------------------------------------------------- 1 | use std::ffi::{CString, OsStr}; 2 | use std::os::unix::ffi::OsStrExt; 3 | 4 | 5 | pub trait ToCString { 6 | fn to_cstring(&self) -> CString; 7 | } 8 | 9 | impl> ToCString for T { 10 | fn to_cstring(&self) -> CString { 11 | CString::new(self.as_ref().as_bytes()) 12 | .unwrap() 13 | } 14 | } 15 | 16 | -------------------------------------------------------------------------------- /src/idmap.rs: -------------------------------------------------------------------------------- 1 | use libc::{uid_t, gid_t}; 2 | 3 | 4 | /// Entry (row) in the uid map 5 | #[derive(Clone, Copy, PartialEq, Eq, Debug)] 6 | pub struct UidMap { 7 | /// First uid inside the guest namespace 8 | pub inside_uid: uid_t, 9 | /// First uid in external (host) namespace 10 | pub outside_uid: uid_t, 11 | /// Number of uids that this entry allows starting from inside/outside uid 12 | pub count: uid_t, 13 | } 14 | 15 | /// Entry (row) in the gid map 16 | #[derive(Clone, Copy, PartialEq, Eq, Debug)] 17 | pub struct GidMap { 18 | /// First gid inside the guest namespace 19 | pub inside_gid: gid_t, 20 | /// First gid in external (host) namespace 21 | pub outside_gid: gid_t, 22 | /// Number of gids that this entry allows starting from inside/outside gid 23 | pub count: gid_t, 24 | } 25 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! The `Command` has mostly same API as `std::process::Command` except where 2 | //! is absolutely needed. 3 | //! 4 | //! In addition `Command` contains methods to configure linux namespaces, 5 | //! chroots and more linux stuff. 6 | //! 7 | //! We have diverged from ``std::process::Command`` in the following 8 | //! major things: 9 | //! 10 | //! 1. Error handling. Since sometimes we have long chains of system calls 11 | //! involved, we need to give user some way to find out which call failed 12 | //! with an error, so `io::Error` is not an option. We have 13 | //! ``error::Error`` class which describes the error as precisely as 14 | //! possible 15 | //! 16 | //! 2. We set ``PDEATHSIG`` to ``SIGKILL`` by default. I.e. child process will 17 | //! die when parent is dead. This is what you want most of the time. If you 18 | //! want to allow child process to daemonize explicitly call the 19 | //! ``allow_daemonize`` method (but look at documentation of 20 | //! ``Command::set_parent_death_signal`` first). 21 | //! 22 | //! 3. We don't search for `program` in `PATH`. It's hard to do right in all 23 | //! cases of `chroot`, `pivot_root`, user and mount namespaces. So we expect 24 | //! its easier to do for your specific container setup. 25 | //! 26 | //! Anyway this is low-level interface. You may want to use some higher level 27 | //! abstraction which mounts filesystems, sets network and monitors processes. 28 | //! 29 | #![warn(missing_docs)] 30 | extern crate libc; 31 | extern crate nix; 32 | #[cfg(test)] extern crate rand; 33 | 34 | mod caps; 35 | mod namespace; 36 | mod idmap; 37 | mod chroot; 38 | mod ffi_util; 39 | mod std_api; 40 | mod config; 41 | mod error; 42 | mod pipe; 43 | mod child; 44 | mod callbacks; 45 | mod linux; 46 | mod fds; 47 | mod run; 48 | mod status; 49 | mod wait; 50 | mod stdio; 51 | mod debug; 52 | mod zombies; 53 | 54 | pub use crate::error::Error; 55 | pub use crate::status::ExitStatus; 56 | pub use crate::stdio::{Stdio, Fd}; 57 | pub use crate::pipe::{PipeReader, PipeWriter}; 58 | pub use crate::namespace::{Namespace}; 59 | pub use crate::idmap::{UidMap, GidMap}; 60 | pub use crate::zombies::{reap_zombies, child_events, ChildEvent}; 61 | pub use nix::sys::signal::Signal; 62 | pub use crate::debug::{Style, Printer}; 63 | pub use crate::caps::{Capability}; 64 | 65 | use std::ffi::{CString, OsString}; 66 | use std::path::PathBuf; 67 | use std::os::unix::io::RawFd; 68 | use std::collections::{HashMap, HashSet}; 69 | use std::io; 70 | 71 | use crate::pipe::PipeHolder; 72 | 73 | use libc::{pid_t}; 74 | 75 | type BoxError = Box; 76 | 77 | /// Main class for running processes. Works in the spirit of builder pattern. 78 | pub struct Command { 79 | filename: CString, 80 | args: Vec, 81 | environ: Option>, 82 | config: config::Config, 83 | fds: HashMap, 84 | close_fds: Vec<(RawFd, RawFd)>, 85 | chroot_dir: Option, 86 | pivot_root: Option<(PathBuf, PathBuf, bool)>, 87 | id_map_commands: Option<(PathBuf, PathBuf)>, 88 | pid_env_vars: HashSet, 89 | keep_caps: Option<[u32; 2]>, 90 | before_unfreeze: Option Result<(), BoxError>>>, 91 | pre_exec: Option Result<(), io::Error>>>, 92 | } 93 | 94 | /// The reference to the running child 95 | #[derive(Debug)] 96 | pub struct Child { 97 | pid: pid_t, 98 | status: Option, 99 | fds: HashMap, 100 | /// Stdin of a child if it is a pipe 101 | pub stdin: Option, 102 | /// Stdout of a child if it is a pipe 103 | pub stdout: Option, 104 | /// Stderr of a child if it is a pipe 105 | pub stderr: Option, 106 | } 107 | -------------------------------------------------------------------------------- /src/linux.rs: -------------------------------------------------------------------------------- 1 | use std::ffi::OsStr; 2 | use std::io; 3 | use std::os::unix::io::AsRawFd; 4 | use std::path::Path; 5 | 6 | use nix::sys::signal::{Signal}; 7 | 8 | use crate::ffi_util::ToCString; 9 | use crate::{Command, Namespace}; 10 | use crate::idmap::{UidMap, GidMap}; 11 | use crate::stdio::dup_file_cloexec; 12 | use crate::namespace::to_clone_flag; 13 | use crate::caps::Capability; 14 | 15 | 16 | impl Command { 17 | 18 | /// Allow child process to daemonize. By default we run equivalent of 19 | /// `set_parent_death_signal(SIGKILL)`. See the `set_parent_death_signal` 20 | /// for better explanation. 21 | pub fn allow_daemonize(&mut self) -> &mut Command { 22 | self.config.death_sig = None; 23 | self 24 | } 25 | 26 | /// Set a signal that is sent to a process when it's parent is dead. 27 | /// This is by default set to `SIGKILL`. And you should keep it that way 28 | /// unless you know what you are doing. 29 | /// 30 | /// Particularly you should consider the following choices: 31 | /// 32 | /// 1. Instead of setting ``PDEATHSIG`` to some other signal, send signal 33 | /// yourself and wait until child gracefully finishes. 34 | /// 35 | /// 2. Instead of daemonizing use ``systemd``/``upstart``/whatever system 36 | /// init script to run your service 37 | /// 38 | /// Another issue with this option is that it works only with immediate 39 | /// child. To better control all descendant processes you may need the 40 | /// following: 41 | /// 42 | /// 1. The `prctl(PR_SET_CHILD_SUBREAPER..)` in parent which allows to 43 | /// "catch" descendant processes. 44 | /// 45 | /// 2. The pid namespaces 46 | /// 47 | /// The former is out of scope of this library. The latter works by 48 | /// ``cmd.unshare(Namespace::Pid)``, but you may need to setup mount points 49 | /// and other important things (which are out of scope too). 50 | /// 51 | /// To reset this behavior use ``allow_daemonize()``. 52 | /// 53 | pub fn set_parent_death_signal(&mut self, sig: Signal) -> &mut Command { 54 | self.config.death_sig = Some(sig); 55 | self 56 | } 57 | 58 | /// Set chroot dir. Only absolute path is supported 59 | /// 60 | /// This method has a non-standard security feature: even if current_dir 61 | /// is unspecified we set it to the directory inside the new root dir. 62 | /// see more details in the description of `Command::current_dir`. 63 | /// 64 | /// Note that if both chroot dir and pivot_root specified. The chroot dir 65 | /// is applied after pivot root. If chroot dir is relative it's relative 66 | /// to either suffix of the current directory with stripped off pivot dir 67 | /// or the pivot dir itself (if old workdir is not prefixed by pivot dir) 68 | /// 69 | /// # Panics 70 | /// 71 | /// If directory is not absolute 72 | pub fn chroot_dir>(&mut self, dir: P) -> &mut Command 73 | { 74 | let dir = dir.as_ref(); 75 | if !dir.is_absolute() { 76 | panic!("Chroot dir must be absolute"); 77 | } 78 | self.chroot_dir = Some(dir.to_path_buf()); 79 | 80 | self 81 | } 82 | 83 | /// Moves the root of the file system to the directory `put_old` and 84 | /// makes `new_root` the new root file system. Also it's optionally 85 | /// unmount `new_root` mount point after moving root (but it must exist 86 | /// anyway). 87 | /// 88 | /// The documentation says that `put_old` must be underneath the 89 | /// `new_root`. Currently we have a restriction that both must be absolute 90 | /// and `new_root` be prefix of `put_old`, but we may lift it later. 91 | /// 92 | /// **Warning** if you don't unshare the mount namespace you will get 93 | /// moved filesystem root for *all processes running in that namespace* 94 | /// including parent (currently running) process itself. If you don't 95 | /// run equivalent to ``mount --make-private`` for the old root filesystem 96 | /// and set ``unmount`` to true, you may get unmounted filesystem for 97 | /// running processes too. 98 | /// 99 | /// See `man 2 pivot` for further details 100 | /// 101 | /// Note that if both chroot dir and pivot_root specified. The chroot dir 102 | /// is applied after pivot root. 103 | /// 104 | /// # Panics 105 | /// 106 | /// Panics if either path is not absolute or new_root is not a prefix of 107 | /// put_old. 108 | pub fn pivot_root, B:AsRef>(&mut self, 109 | new_root: A, put_old: B, unmount: bool) 110 | -> &mut Command 111 | { 112 | let new_root = new_root.as_ref(); 113 | let put_old = put_old.as_ref(); 114 | if !new_root.is_absolute() { 115 | panic!("New root must be absolute"); 116 | }; 117 | if !put_old.is_absolute() { 118 | panic!("The `put_old` dir must be absolute"); 119 | } 120 | let mut old_cmp = put_old.components(); 121 | for (n, o) in new_root.components().zip(old_cmp.by_ref()) { 122 | if n != o { 123 | panic!("The new_root is not a prefix of put old"); 124 | } 125 | } 126 | self.pivot_root = Some((new_root.to_path_buf(), put_old.to_path_buf(), 127 | unmount)); 128 | self 129 | } 130 | 131 | /// Unshare given namespaces 132 | /// 133 | /// Note: each namespace have some consequences on how new process will 134 | /// work, some of them are described in the `Namespace` type documentation. 135 | pub fn unshare<'x>(&mut self, iter: impl IntoIterator) 136 | -> &mut Command 137 | { 138 | for ns in iter { 139 | self.config.namespaces |= to_clone_flag(*ns); 140 | } 141 | self 142 | } 143 | 144 | /// Reassociate child process with a namespace specified by a file 145 | /// descriptor 146 | /// 147 | /// `file` argument is an open file referring to a namespace 148 | /// 149 | /// 'ns' is a namespace type 150 | /// 151 | /// See `man 2 setns` for further details 152 | /// 153 | /// Note: using `unshare` and `setns` for the same namespace is meaningless. 154 | pub fn set_namespace(&mut self, file: &F, ns: Namespace) 155 | -> io::Result<&mut Command> 156 | { 157 | let fd = dup_file_cloexec(file)?; 158 | self.config.setns_namespaces.insert(ns, fd); 159 | Ok(self) 160 | } 161 | 162 | /// Sets user id and group id mappings for new process 163 | /// 164 | /// This automatically enables `User` namespace. You should also set `uid` 165 | /// and `gid` with respective methods for the new process. 166 | /// 167 | /// Note there are basically two ways to enable id maps: 168 | /// 169 | /// 1. Write them directly 170 | /// 2. Invoke a `newuidmap`, `newgidmap` commands 171 | /// 172 | /// First option works either if current process is root or if resulting 173 | /// map only contains current user in the mapping. 174 | /// 175 | /// The library will not try to guess the behavior. By default it will 176 | /// write directly. You need to call the `set_id_map_commands` when you 177 | /// want non-default behavior. 178 | /// 179 | /// See `man 7 user_namespaces` for more info 180 | pub fn set_id_maps(&mut self, uid_map: Vec, gid_map: Vec) 181 | -> &mut Command 182 | { 183 | self.unshare(&[Namespace::User]); 184 | self.config.id_maps = Some((uid_map, gid_map)); 185 | self 186 | } 187 | 188 | /// Set path to command-line utilities for writing uid/gid maps 189 | /// 190 | /// The utilities provided my obey same interface as `newuidmap` and 191 | /// `newgidmap` from `shadow` (or sometimes `uidmap`) package. To get it 192 | /// working you usually need to setup `/etc/subuid` and `/etc/subgid` 193 | /// files. 194 | /// 195 | /// See `man 1 newuidmap`, `man 1 newgidmap` for details 196 | /// 197 | /// This method is no-op unless `set_id_maps` is called. 198 | pub fn set_id_map_commands, B: AsRef>(&mut self, 199 | newuidmap: A, newgidmap: B) 200 | -> &mut Command 201 | { 202 | self.id_map_commands = Some(( 203 | newuidmap.as_ref().to_path_buf(), 204 | newgidmap.as_ref().to_path_buf())); 205 | self 206 | } 207 | 208 | /// Keep signal mask intact after executing child, keeps also ignored 209 | /// signals 210 | /// 211 | /// By default signal mask is empty and all signals are reset to the 212 | /// `SIG_DFL` value right before `execve()` syscall. 213 | /// 214 | /// This is only useful if started process is aware of the issue and sets 215 | /// sigmasks to some reasonable value. When used wisely it may avoid some 216 | /// race conditions when signal is sent after child is cloned but before 217 | /// child have been able to establish it's state. 218 | pub fn keep_sigmask(&mut self) -> &mut Command { 219 | self.config.restore_sigmask = false; 220 | self 221 | } 222 | 223 | /// Set the argument zero for the process 224 | /// 225 | /// By default argument zero is same as path to the program to run. You 226 | /// may set it to a short name of the command or to something else to 227 | /// pretend there is a symlink to a program (for example to run `gzip` as 228 | /// `gunzip`). 229 | pub fn arg0>(&mut self, arg: S) -> &mut Command { 230 | self.args[0] = arg.to_cstring(); 231 | self 232 | } 233 | 234 | /// Makes child process a group leader 235 | /// 236 | /// If child process is being launched as a foreground job, 237 | /// the child process group needs to be put into the foreground on 238 | /// the controlling terminal using `tcsetpgrp`. To request status 239 | /// information from stopped child process you should call `waitpid` with 240 | /// `WUNTRACED` flag. And then check status with `WIFSTOPPED` macro. 241 | /// After giving child process group access to the controlling terminal 242 | /// you should send the SIGCONT signal to the child process group. 243 | pub fn make_group_leader(&mut self, make_group_leader: bool) -> &mut Command { 244 | self.config.make_group_leader = make_group_leader; 245 | self 246 | } 247 | 248 | /// Inserts a magic environment variable that will contain pid of spawned 249 | /// process 250 | /// 251 | /// This is usually needed to avoid accidental propagation of the 252 | /// environment variables targeted only at this specific process. 253 | /// 254 | /// # Example 255 | /// 256 | /// This is how you can encode [systemd activation] protocol: 257 | /// 258 | /// ```rust,ignore 259 | /// cmd.env_var_with_pid("LISTEN_PID"); 260 | /// cmd.env("LISTEN_FDS", "1"); 261 | /// ``` 262 | /// 263 | /// [systemd activation]: https://www.freedesktop.org/software/systemd/man/sd_listen_fds.html 264 | pub fn env_var_with_pid(&mut self, key: K) -> &mut Command 265 | where K: AsRef, 266 | { 267 | self.init_env_map(); 268 | self.environ.as_mut().unwrap().remove(key.as_ref()); 269 | self.pid_env_vars.insert(key.as_ref().to_os_string()); 270 | self 271 | } 272 | 273 | /// Drop all capabilities, but keep only ones set by this setter 274 | /// 275 | /// This method sets three or four sets of capabilities: 276 | /// * Permitted 277 | /// * Inherited 278 | /// * Effective 279 | /// * Ambient (if supported) 280 | /// 281 | /// This works both when uid changes (from 0 to other) and when it 282 | /// isn't changed, but requires process to have all capabilities 283 | /// granted by this method. 284 | /// 285 | /// This method replaces whole capability mask on each invocation 286 | pub fn keep_caps<'x>(&mut self, 287 | caps: impl IntoIterator) 288 | { 289 | let mut buf = [0u32; 2]; 290 | for item in caps { 291 | let item = *item as u32; 292 | buf[(item >> 5) as usize] |= 1 << (item & 31); 293 | } 294 | self.keep_caps = Some(buf); 295 | } 296 | } 297 | -------------------------------------------------------------------------------- /src/namespace.rs: -------------------------------------------------------------------------------- 1 | use nix::sched::CloneFlags; 2 | 3 | 4 | /// Namespace name to unshare 5 | /// 6 | /// See `man 7 namespaces` for more information 7 | #[derive(PartialEq, Eq, Hash, Clone, Copy)] 8 | pub enum Namespace { 9 | /// Unshare the mount namespace. It basically means that you can now mount 10 | /// and unmount folders without touching parent mount points. 11 | /// 12 | /// But note that you also have to make all your mountpoints non-shareable 13 | /// or changes will be propagated to parent namespace anyway. 14 | /// 15 | /// This is always needed if you want `pivot_root` (but not enforced by 16 | /// library) 17 | Mount, 18 | /// Unshare the UTS namespace. This allows you to change hostname of the 19 | /// new container. 20 | Uts, 21 | /// Unshare the IPC namespace. This creates new namespace for System V IPC 22 | /// POSIX message queues and similar. 23 | Ipc, 24 | /// Unshare user namespace. This allows unprivileged user to be root 25 | /// user in new namespace and/or change mappings between real (outer) 26 | /// user namespace and the inner one. 27 | /// 28 | /// This one is required if you want to unshare any other namespace without 29 | /// root privileges (it's not enforced by kernel not the library) 30 | /// 31 | /// See `man 7 user_namespaces` for more information. 32 | User, 33 | /// Unshare pid namespace. The child process becomes PID 1 (inside 34 | /// container) with the following rough list of consequences: 35 | /// 36 | /// 1. All daemon processes are reparented to the process 37 | /// 2. All signal dispositions are set to `Ignore`. E.g. process doesn't 38 | /// get killed by `SIGINT` (Ctrl+C), unless signal handler is explicitly 39 | /// set 40 | /// 3. If the process is dead, all its children are killed by `SIGKILL` 41 | /// (i.e. can't catch the death signal) 42 | /// 43 | /// All this means that most of the time the new process having this 44 | /// namespace must be some kind of process supervisor. 45 | /// 46 | /// Also take a note that `/proc` is not automatically changed. So you 47 | /// should also unshare `Mount` namespace and mount new `/proc` inside the 48 | /// PID namespace. 49 | /// 50 | /// See `man 7 pid_namespaces` for more information 51 | Pid, 52 | /// Unshare network namespace 53 | /// 54 | /// New namespace is empty and has no conectivity, even localhost network, 55 | /// unless some setup is done afterwards. 56 | /// 57 | /// Note that unix sockets continue to work, but "abstract unix sockets" 58 | /// are isolated as a result of this option. The availability of unix 59 | /// sockets might also mean that libc is able to resolve DNS names by using 60 | /// NSCD. You may isolate unix sockets by using any kind of filesystem 61 | /// isolation. 62 | Net, 63 | /// Cgroup namespace 64 | /// 65 | /// Creates a new namespace for CGroups. 66 | /// 67 | /// See `man 7 cgroup_namespaces` for more information 68 | Cgroup, 69 | } 70 | 71 | /// Convert namespace to a clone flag passed to syscalls 72 | // TODO(tailhook) should this method be private? 73 | pub fn to_clone_flag(ns: Namespace) -> CloneFlags { 74 | match ns { 75 | Namespace::Mount => CloneFlags::CLONE_NEWNS, 76 | Namespace::Uts => CloneFlags::CLONE_NEWUTS, 77 | Namespace::Ipc => CloneFlags::CLONE_NEWIPC, 78 | Namespace::User => CloneFlags::CLONE_NEWUSER, 79 | Namespace::Pid => CloneFlags::CLONE_NEWPID, 80 | Namespace::Net => CloneFlags::CLONE_NEWNET, 81 | Namespace::Cgroup => CloneFlags::CLONE_NEWCGROUP, 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /src/pipe.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | use std::mem; 3 | use std::os::unix::io::{RawFd}; 4 | 5 | use nix::unistd::pipe2; 6 | use nix::fcntl::OFlag; 7 | use libc; 8 | use libc::{c_void, size_t}; 9 | 10 | use crate::error::{result, Error}; 11 | use crate::error::ErrorCode::CreatePipe; 12 | 13 | 14 | /// A pipe used to communicate with subprocess 15 | #[derive(Debug)] 16 | pub struct Pipe(RawFd, RawFd); 17 | 18 | /// A reading end of `Pipe` object after `Pipe::split` 19 | #[derive(Debug)] 20 | pub struct PipeReader(RawFd); 21 | 22 | /// A writing end of `Pipe` object after `Pipe::split` 23 | #[derive(Debug)] 24 | pub struct PipeWriter(RawFd); 25 | 26 | #[derive(Debug)] 27 | pub enum PipeHolder { 28 | Reader(PipeReader), 29 | Writer(PipeWriter), 30 | } 31 | 32 | 33 | impl Pipe { 34 | pub fn new() -> Result { 35 | let (rd, wr) = result(CreatePipe, pipe2(OFlag::O_CLOEXEC))?; 36 | Ok(Pipe(rd, wr)) 37 | } 38 | pub fn split(self) -> (PipeReader, PipeWriter) { 39 | let Pipe(rd, wr) = self; 40 | mem::forget(self); 41 | (PipeReader(rd), PipeWriter(wr)) 42 | } 43 | } 44 | 45 | impl Drop for Pipe { 46 | fn drop(&mut self) { 47 | let Pipe(x, y) = *self; 48 | unsafe { 49 | libc::close(x); 50 | libc::close(y); 51 | } 52 | } 53 | } 54 | 55 | impl PipeReader { 56 | /// Extract file descriptor from pipe reader without closing 57 | // TODO(tailhook) implement IntoRawFd here 58 | pub fn into_fd(self) -> RawFd { 59 | let PipeReader(fd) = self; 60 | mem::forget(self); 61 | return fd; 62 | } 63 | } 64 | 65 | impl PipeWriter { 66 | /// Extract file descriptor from pipe reader without closing 67 | // TODO(tailhook) implement IntoRawFd here 68 | pub fn into_fd(self) -> RawFd { 69 | let PipeWriter(fd) = self; 70 | mem::forget(self); 71 | return fd; 72 | } 73 | } 74 | 75 | impl Drop for PipeReader { 76 | fn drop(&mut self) { 77 | unsafe { libc::close(self.0) }; 78 | } 79 | } 80 | 81 | impl Drop for PipeWriter { 82 | fn drop(&mut self) { 83 | unsafe { libc::close(self.0) }; 84 | } 85 | } 86 | 87 | impl io::Read for PipeReader { 88 | fn read(&mut self, buf: &mut [u8]) -> io::Result { 89 | let ret = unsafe { 90 | libc::read(self.0, 91 | buf.as_mut_ptr() as *mut c_void, 92 | buf.len() as size_t) 93 | }; 94 | if ret < 0 { 95 | return Err(io::Error::last_os_error()); 96 | } 97 | Ok(ret as usize) 98 | } 99 | } 100 | 101 | impl io::Write for PipeWriter { 102 | fn write(&mut self, buf: &[u8]) -> io::Result { 103 | let ret = unsafe { 104 | libc::write(self.0, 105 | buf.as_ptr() as *const c_void, 106 | buf.len() as size_t) 107 | }; 108 | if ret < 0 { 109 | return Err(io::Error::last_os_error()); 110 | } 111 | Ok(ret as usize) 112 | } 113 | fn flush(&mut self) -> io::Result<()> { Ok(()) } 114 | } 115 | -------------------------------------------------------------------------------- /src/run.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use std::env::current_dir; 3 | use std::ffi::CString; 4 | use std::fs::File; 5 | use std::io::{self, Read, Write}; 6 | use std::iter::repeat; 7 | use std::os::unix::ffi::{OsStrExt}; 8 | use std::os::unix::io::{RawFd, AsRawFd}; 9 | use std::path::{Path, PathBuf}; 10 | use std::ptr; 11 | 12 | use libc::{c_char, close}; 13 | use nix; 14 | use nix::errno::Errno::EINTR; 15 | use nix::fcntl::{fcntl, FcntlArg, open}; 16 | use nix::fcntl::OFlag; 17 | use nix::sched::{clone, CloneFlags}; 18 | use nix::sys::signal::{SIGKILL, SIGCHLD, kill}; 19 | use nix::sys::stat::Mode; 20 | use nix::sys::wait::waitpid; 21 | use nix::unistd::{setpgid, Pid}; 22 | 23 | use crate::child; 24 | use crate::config::Config; 25 | use crate::{Command, Child, ExitStatus}; 26 | use crate::error::{Error, result, cmd_result}; 27 | use crate::error::ErrorCode as Err; 28 | use crate::pipe::{Pipe, PipeReader, PipeWriter, PipeHolder}; 29 | use crate::stdio::{Fd, Closing}; 30 | use crate::chroot::{Pivot, Chroot}; 31 | use crate::ffi_util::ToCString; 32 | use crate::namespace::to_clone_flag; 33 | 34 | 35 | pub const MAX_PID_LEN: usize = 12; 36 | 37 | pub struct ChildInfo<'a> { 38 | pub filename: *const c_char, 39 | pub args: &'a [*const c_char], 40 | // this is mut because we write pid to environ 41 | pub environ: &'a [*mut c_char], 42 | pub cfg: &'a Config, 43 | pub chroot: &'a Option, 44 | pub pivot: &'a Option, 45 | pub wakeup_pipe: RawFd, 46 | pub error_pipe: RawFd, 47 | pub fds: &'a [(RawFd, RawFd)], 48 | /// This map may only be used for lookup but not for iteration! 49 | pub fd_lookup: &'a HashMap, 50 | pub close_fds: &'a [(RawFd, RawFd)], 51 | pub setns_namespaces: &'a [(CloneFlags, RawFd)], 52 | pub pid_env_vars: &'a [(usize, usize)], 53 | pub keep_caps: &'a Option<[u32; 2]>, 54 | pub pre_exec: &'a Option Result<(), io::Error>>>, 55 | } 56 | 57 | fn raw_with_null(arr: &Vec) -> Vec<*const c_char> { 58 | let mut vec = Vec::with_capacity(arr.len() + 1); 59 | for i in arr { 60 | vec.push(i.as_ptr()); 61 | } 62 | vec.push(ptr::null()); 63 | return vec; 64 | } 65 | 66 | fn raw_with_null_mut(arr: &mut Vec>) -> Vec<*mut c_char> { 67 | let mut vec = Vec::with_capacity(arr.len() + 1); 68 | for i in arr { 69 | vec.push(i.as_mut_ptr() as *mut c_char); 70 | } 71 | vec.push(ptr::null_mut()); 72 | return vec; 73 | } 74 | 75 | fn relative_to, B:AsRef>(dir: A, rel: B, absolute: bool) 76 | -> Option 77 | { 78 | let dir = dir.as_ref(); 79 | let rel = rel.as_ref(); 80 | let mut dircmp = dir.components(); 81 | for (dc, rc) in rel.components().zip(dircmp.by_ref()) { 82 | if dc != rc { 83 | return None; 84 | } 85 | } 86 | if absolute { 87 | Some(Path::new("/").join(dircmp.as_path())) 88 | } else { 89 | Some(dircmp.as_path().to_path_buf()) 90 | } 91 | } 92 | 93 | fn prepare_descriptors(fds: &HashMap) 94 | -> Result<(HashMap, HashMap, 95 | Vec), Error> 96 | { 97 | let mut inner = HashMap::new(); 98 | let mut outer = HashMap::new(); 99 | let mut guards = Vec::new(); 100 | for (&dest_fd, fdkind) in fds.iter() { 101 | let mut fd = match fdkind { 102 | &Fd::ReadPipe => { 103 | let (rd, wr) = Pipe::new()?.split(); 104 | let fd = rd.into_fd(); 105 | guards.push(Closing::new(fd)); 106 | outer.insert(dest_fd, PipeHolder::Writer(wr)); 107 | fd 108 | } 109 | &Fd::WritePipe => { 110 | let (rd, wr) = Pipe::new()?.split(); 111 | let fd = wr.into_fd(); 112 | guards.push(Closing::new(fd)); 113 | outer.insert(dest_fd, PipeHolder::Reader(rd)); 114 | fd 115 | } 116 | &Fd::ReadNull => { 117 | // Need to keep fd with cloexec, until we are in child 118 | let fd = result(Err::CreatePipe, 119 | open(Path::new("/dev/null"), 120 | OFlag::O_CLOEXEC|OFlag::O_RDONLY, 121 | Mode::empty()))?; 122 | guards.push(Closing::new(fd)); 123 | fd 124 | } 125 | &Fd::WriteNull => { 126 | // Need to keep fd with cloexec, until we are in child 127 | let fd = result(Err::CreatePipe, 128 | open(Path::new("/dev/null"), 129 | OFlag::O_CLOEXEC|OFlag::O_WRONLY, 130 | Mode::empty()))?; 131 | guards.push(Closing::new(fd)); 132 | fd 133 | } 134 | &Fd::Inherit => { 135 | dest_fd 136 | } 137 | &Fd::Fd(ref x) => { 138 | x.as_raw_fd() 139 | } 140 | }; 141 | // The descriptor must not clobber the descriptors that are passed to 142 | // a child 143 | while fd != dest_fd && fds.contains_key(&fd) { 144 | fd = result(Err::CreatePipe, 145 | fcntl(fd, FcntlArg::F_DUPFD_CLOEXEC(3)))?; 146 | guards.push(Closing::new(fd)); 147 | } 148 | inner.insert(dest_fd, fd); 149 | } 150 | Ok((inner, outer, guards)) 151 | } 152 | 153 | impl Command { 154 | /// Run the command and return exit status 155 | pub fn status(&mut self) -> Result { 156 | // TODO(tailhook) stdin/stdout/stderr 157 | self.spawn()? 158 | .wait() 159 | .map_err(|e| Error::WaitError(e.raw_os_error().unwrap_or(-1))) 160 | } 161 | /// Spawn the command and return a handle that can be waited for 162 | pub fn spawn(&mut self) -> Result { 163 | // TODO(tailhook) We need mutable self only for init_env_map. Probably 164 | // we might do this internally and don't modify Command. That would 165 | // be more clear and also allow to print Display command easily in 166 | // error handler 167 | self.init_env_map(); 168 | unsafe { self.spawn_inner() } 169 | } 170 | 171 | unsafe fn spawn_inner(&mut self) -> Result { 172 | // TODO(tailhook) add RAII for pipes 173 | let (wakeup_rd, wakeup) = Pipe::new()?.split(); 174 | let (errpipe, errpipe_wr) = Pipe::new()?.split(); 175 | 176 | let c_args = raw_with_null(&self.args); 177 | 178 | let mut environ: Vec<_> = self.environ.as_ref().unwrap() 179 | .iter().map(|(k, v)| { 180 | let mut pair = k[..].as_bytes().to_vec(); 181 | pair.push(b'='); 182 | pair.extend(v.as_bytes()); 183 | pair.push(0); 184 | pair 185 | }).collect(); 186 | let mut pid_env_vars = Vec::new(); 187 | for var_name in &self.pid_env_vars { 188 | let mut pair = var_name[..].as_bytes().to_vec(); 189 | pair.push(b'='); 190 | let (index, offset) = (environ.len(), pair.len()); 191 | pair.extend(repeat(0).take(MAX_PID_LEN+1)); 192 | environ.push(pair); 193 | pid_env_vars.push((index, offset)); 194 | } 195 | let c_environ: Vec<_> = raw_with_null_mut(&mut environ); 196 | 197 | let (int_fds, ext_fds, _guards) = prepare_descriptors(&self.fds)?; 198 | 199 | let pivot = self.pivot_root.as_ref().map(|&(ref new, ref old, unmnt)| { 200 | Pivot { 201 | new_root: new.to_cstring(), 202 | put_old: old.to_cstring(), 203 | old_inside: relative_to(old, new, true).unwrap().to_cstring(), 204 | workdir: current_dir().ok() 205 | .and_then(|cur| relative_to(cur, new, true)) 206 | .unwrap_or(PathBuf::from("/")) 207 | .to_cstring(), 208 | unmount_old_root: unmnt, 209 | } 210 | }); 211 | 212 | let chroot = self.chroot_dir.as_ref().map(|dir| { 213 | let wrk_rel = if let Some((ref piv, _, _)) = self.pivot_root { 214 | piv.join(relative_to(dir, "/", false).unwrap()) 215 | } else { 216 | dir.to_path_buf() 217 | }; 218 | Chroot { 219 | root: dir.to_cstring(), 220 | workdir: current_dir().ok() 221 | .and_then(|cur| relative_to(cur, wrk_rel, true)) 222 | .unwrap_or(PathBuf::from("/")) 223 | .to_cstring() 224 | , 225 | } 226 | }); 227 | 228 | let mut nstack = [0u8; 4096]; 229 | let mut wakeup = Some(wakeup); 230 | let mut wakeup_rd = Some(wakeup_rd); 231 | let mut errpipe_wr = Some(errpipe_wr); 232 | let args_slice = &c_args[..]; 233 | let environ_slice = &c_environ[..]; 234 | // We transform all hashmaps into vectors, because iterating over 235 | // hash map involves closure which crashes in the child in unoptimized 236 | // build 237 | let fds = int_fds.iter().map(|(&x, &y)| (x, y)).collect::>(); 238 | let close_fds = self.close_fds.iter().cloned().collect::>(); 239 | let setns_ns = self.config.setns_namespaces.iter() 240 | .map(|(ns, fd)| (to_clone_flag(*ns), fd.as_raw_fd())) 241 | .collect::>(); 242 | let pid = result(Err::Fork, clone(Box::new(|| -> isize { 243 | // Note: mo memory allocations/deallocations here 244 | close(wakeup.take().unwrap().into_fd()); 245 | let child_info = ChildInfo { 246 | filename: self.filename.as_ptr(), 247 | args: args_slice, 248 | environ: environ_slice, 249 | cfg: &self.config, 250 | chroot: &chroot, 251 | pivot: &pivot, 252 | wakeup_pipe: wakeup_rd.take().unwrap().into_fd(), 253 | error_pipe: errpipe_wr.take().unwrap().into_fd(), 254 | fds: &fds, 255 | fd_lookup: &int_fds, 256 | close_fds: &close_fds, 257 | setns_namespaces: &setns_ns, 258 | pid_env_vars: &pid_env_vars, 259 | keep_caps: &self.keep_caps, 260 | pre_exec: &self.pre_exec, 261 | }; 262 | child::child_after_clone(&child_info); 263 | }), &mut nstack[..], self.config.namespaces, Some(SIGCHLD as i32)))?; 264 | drop(wakeup_rd); 265 | drop(errpipe_wr); // close pipe so we don't wait for ourself 266 | 267 | if let Err(e) = self.after_start(pid, wakeup.unwrap(), errpipe) { 268 | kill(pid, SIGKILL).ok(); 269 | loop { 270 | match waitpid(pid, None) { 271 | Err(nix::Error::Sys(EINTR)) => continue, 272 | _ => break, 273 | } 274 | } 275 | return Err(e); 276 | } 277 | 278 | let mut outer_fds = ext_fds; 279 | Ok(Child { 280 | pid: pid.into(), 281 | status: None, 282 | stdin: outer_fds.remove(&0).map(|x| { 283 | match x { 284 | PipeHolder::Writer(x) => x, 285 | _ => unreachable!(), 286 | }}), 287 | stdout: outer_fds.remove(&1).map(|x| { 288 | match x { 289 | PipeHolder::Reader(x) => x, 290 | _ => unreachable!(), 291 | }}), 292 | stderr: outer_fds.remove(&2).map(|x| { 293 | match x { 294 | PipeHolder::Reader(x) => x, 295 | _ => unreachable!(), 296 | }}), 297 | fds: outer_fds, 298 | }) 299 | } 300 | 301 | fn after_start(&mut self, pid: Pid, 302 | mut wakeup: PipeWriter, mut errpipe: PipeReader) 303 | -> Result<(), Error> 304 | { 305 | if self.config.make_group_leader { 306 | result(Err::SetPGid, setpgid(pid, pid))?; 307 | } 308 | 309 | if let Some(&(ref uids, ref gids)) = self.config.id_maps.as_ref() { 310 | if let Some(&(ref ucmd, ref gcmd)) = self.id_map_commands.as_ref() 311 | { 312 | let mut cmd = Command::new(ucmd); 313 | cmd.arg(format!("{}", pid)); 314 | for map in uids { 315 | cmd.arg(format!("{}", map.inside_uid)); 316 | cmd.arg(format!("{}", map.outside_uid)); 317 | cmd.arg(format!("{}", map.count)); 318 | } 319 | cmd_result(Err::SetIdMap, cmd.status())?; 320 | let mut cmd = Command::new(gcmd); 321 | cmd.arg(format!("{}", pid)); 322 | for map in gids { 323 | cmd.arg(format!("{}", map.inside_gid)); 324 | cmd.arg(format!("{}", map.outside_gid)); 325 | cmd.arg(format!("{}", map.count)); 326 | } 327 | cmd_result(Err::SetIdMap, cmd.status())?; 328 | } else { 329 | let mut buf = Vec::new(); 330 | for map in uids { 331 | writeln!(&mut buf, "{} {} {}", 332 | map.inside_uid, map.outside_uid, map.count).unwrap(); 333 | } 334 | result(Err::SetIdMap, 335 | File::create(format!("/proc/{}/uid_map", pid)) 336 | .and_then(|mut f| f.write_all(&buf[..])))?; 337 | let mut buf = Vec::new(); 338 | for map in gids { 339 | writeln!(&mut buf, "{} {} {}", 340 | map.inside_gid, map.outside_gid, map.count).unwrap(); 341 | } 342 | result(Err::SetIdMap, 343 | File::create(format!("/proc/{}/gid_map", pid)) 344 | .and_then(|mut f| f.write_all(&buf[..])))?; 345 | } 346 | } 347 | if let Some(ref mut callback) = self.before_unfreeze { 348 | callback(i32::from(pid) as u32).map_err(Error::BeforeUnfreeze)?; 349 | } 350 | 351 | result(Err::PipeError, wakeup.write_all(b"x"))?; 352 | let mut err = [0u8; 6]; 353 | match result(Err::PipeError, errpipe.read(&mut err))? { 354 | 0 => {} // Process successfully execve'd or dead 355 | 5 => { 356 | let code = err[0]; 357 | let errno = ((err[1] as i32) << 24) | ((err[2] as i32) << 16) | 358 | ((err[3] as i32) << 8) | (err[4] as i32); 359 | return Err(Err::from_i32(code as i32, errno)) 360 | } 361 | _ => { return Err(Error::UnknownError); } 362 | } 363 | Ok(()) 364 | } 365 | } 366 | -------------------------------------------------------------------------------- /src/runtime.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tailhook/unshare/6cdc15d97aca90f59d1427e01da4c461184d0fe4/src/runtime.rs -------------------------------------------------------------------------------- /src/status.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | use crate::{Signal}; 3 | 4 | 5 | /// The exit status of a process 6 | /// 7 | /// Returned either by `reap_zombies()` or by `child_events()` 8 | /// or by `Child::wait()` 9 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 10 | pub enum ExitStatus { 11 | /// Process exited normally with some exit code 12 | Exited(i8), 13 | /// Process was killed by a signal (bool flag is true when core is dumped) 14 | Signaled(Signal, /* dore dumped */bool) 15 | } 16 | 17 | impl ExitStatus { 18 | /// Returns `true` if this exit status means successful exit 19 | pub fn success(&self) -> bool { 20 | self == &ExitStatus::Exited(0) 21 | } 22 | /// Returns exit code if the process has exited normally 23 | pub fn code(&self) -> Option { 24 | match self { 25 | &ExitStatus::Exited(e) => Some(e as i32), 26 | &ExitStatus::Signaled(_, _) => None, 27 | } 28 | } 29 | /// Returns signal number if he process was killed by signal 30 | pub fn signal(&self) -> Option { 31 | match self { 32 | &ExitStatus::Exited(_) => None, 33 | &ExitStatus::Signaled(sig, _) => Some(sig as i32), 34 | } 35 | } 36 | } 37 | 38 | impl fmt::Display for ExitStatus { 39 | fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { 40 | use self::ExitStatus::*; 41 | match self { 42 | &Exited(c) => write!(fmt, "exited with code {}", c), 43 | &Signaled(sig, false) => { 44 | write!(fmt, "killed by signal {:?}[{}]", 45 | sig, sig as i32) 46 | } 47 | &Signaled(sig, true) => { 48 | write!(fmt, "killed by signal {:?}[{}] (core dumped)", 49 | sig, sig as i32) 50 | } 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/std_api.rs: -------------------------------------------------------------------------------- 1 | // This file was derived from rust's own libstd/process.rs with the following 2 | // copyright: 3 | // 4 | // Copyright 2015 The Rust Project Developers. See the COPYRIGHT 5 | // file at the top-level directory of this distribution and at 6 | // http://rust-lang.org/COPYRIGHT. 7 | // 8 | use std::ffi::OsStr; 9 | use std::default::Default; 10 | use std::collections::HashMap; 11 | use std::collections::HashSet; 12 | use std::env; 13 | use std::path::Path; 14 | 15 | use libc::{uid_t, gid_t}; 16 | use crate::ffi_util::ToCString; 17 | use crate::{Command, Stdio, Fd}; 18 | 19 | 20 | impl Command { 21 | /// Constructs a new `Command` for launching the program at 22 | /// path `program`, with the following default configuration: 23 | /// 24 | /// * No arguments to the program 25 | /// * Inherit the current process's environment 26 | /// * Inherit the current process's working directory 27 | /// * Inherit stdin/stdout/stderr for `spawn` or `status`, but create pipes for `output` 28 | /// 29 | /// Builder methods are provided to change these defaults and 30 | /// otherwise configure the process. 31 | pub fn new>(program: S) -> Command { 32 | Command { 33 | filename: program.to_cstring(), 34 | args: vec![program.to_cstring()], 35 | environ: None, 36 | config: Default::default(), 37 | chroot_dir: None, 38 | pivot_root: None, 39 | fds: vec![ 40 | (0, Fd::inherit()), 41 | (1, Fd::inherit()), 42 | (2, Fd::inherit()), 43 | ].into_iter().collect(), 44 | close_fds: Vec::new(), 45 | id_map_commands: None, 46 | pid_env_vars: HashSet::new(), 47 | keep_caps: None, 48 | before_unfreeze: None, 49 | pre_exec: None, 50 | } 51 | } 52 | 53 | /// Add an argument to pass to the program. 54 | pub fn arg>(&mut self, arg: S) -> &mut Command { 55 | self.args.push(arg.to_cstring()); 56 | self 57 | } 58 | 59 | /// Add multiple arguments to pass to the program. 60 | pub fn args>(&mut self, args: &[S]) -> &mut Command { 61 | self.args.extend(args.iter().map(ToCString::to_cstring)); 62 | self 63 | } 64 | 65 | // TODO(tailhook) It's only public for our run module any better way? 66 | // TODO(tailhook) make it private 67 | #[doc(hidden)] 68 | pub fn init_env_map(&mut self) { 69 | if self.environ.is_none() { 70 | self.environ = Some(env::vars_os().collect()); 71 | } 72 | } 73 | 74 | /// Inserts or updates an environment variable mapping. 75 | pub fn env(&mut self, key: K, val: V) -> &mut Command 76 | where K: AsRef, V: AsRef 77 | { 78 | self.init_env_map(); 79 | self.environ.as_mut().unwrap().insert( 80 | key.as_ref().to_os_string(), 81 | val.as_ref().to_os_string()); 82 | self.pid_env_vars.remove(key.as_ref()); 83 | self 84 | } 85 | 86 | /// Inserts or updates multiple environment variable mappings. 87 | pub fn envs(&mut self, vars: I)-> &mut Command 88 | where I: IntoIterator, K: AsRef, V: AsRef 89 | { 90 | for (ref key, ref val) in vars { 91 | self.init_env_map(); 92 | self.environ.as_mut().unwrap().insert( 93 | key.as_ref().to_os_string(), 94 | val.as_ref().to_os_string()); 95 | self.pid_env_vars.remove(key.as_ref()); 96 | } 97 | self 98 | } 99 | 100 | /// Removes an environment variable mapping. 101 | pub fn env_remove>(&mut self, key: K) -> &mut Command { 102 | self.init_env_map(); 103 | self.environ.as_mut().unwrap().remove(key.as_ref()); 104 | self.pid_env_vars.remove(key.as_ref()); 105 | self 106 | } 107 | 108 | /// Clears the entire environment map for the child process. 109 | pub fn env_clear(&mut self) -> &mut Command { 110 | self.environ = Some(HashMap::new()); 111 | self.pid_env_vars = HashSet::new(); 112 | self 113 | } 114 | 115 | /// Sets the working directory for the child process. 116 | /// 117 | /// Note: in case of `chroot` or `pivot_root` the working directory is 118 | /// always set to something inside the new root. Algorithm is following: 119 | /// 120 | /// 1. If path is set to absolute path, current dir is this path *inside* 121 | /// the chroot 122 | /// 2. Check if chroot dir is prefix of `env::current_dir()`. If it is 123 | /// set current directory to the suffix. Otherwise set current directory 124 | /// to the new root dir. 125 | /// 3. If `current_dir` is specified (and relative) set working directory 126 | /// to the value (i.e. relative to the dir set in #2) 127 | /// 128 | /// The `pivot_root` is treated just the same as `chroot`. I.e. we will 129 | /// not try to set working directory inside the `old_root`, unless path 130 | /// inside is set explicitly by this method. 131 | /// 132 | /// At the end of the day, the ``cmd.current_dir(env::current_dir())`` is 133 | /// not no-op if using chroot/pivot_root. 134 | pub fn current_dir>(&mut self, dir: P) -> &mut Command 135 | { 136 | self.config.work_dir = Some(dir.as_ref().to_cstring()); 137 | self 138 | } 139 | 140 | /// Configuration for the child process's stdin handle (file descriptor 0). 141 | pub fn stdin(&mut self, cfg: Stdio) -> &mut Command { 142 | self.fds.insert(0, cfg.to_fd(false)); 143 | self 144 | } 145 | 146 | /// Configuration for the child process's stdout handle (file descriptor 1). 147 | pub fn stdout(&mut self, cfg: Stdio) -> &mut Command { 148 | self.fds.insert(1, cfg.to_fd(true)); 149 | self 150 | } 151 | 152 | /// Configuration for the child process's stderr handle (file descriptor 2). 153 | pub fn stderr(&mut self, cfg: Stdio) -> &mut Command { 154 | self.fds.insert(2, cfg.to_fd(true)); 155 | self 156 | } 157 | 158 | /// Set user id of the new process. Note that it works only for root 159 | /// process or if you also set up user namespace 160 | pub fn uid(&mut self, id: uid_t) -> &mut Command { 161 | self.config.uid = Some(id); 162 | self 163 | } 164 | 165 | /// Set primary group id of the new process. Note that it works only for 166 | /// root process or if you also set up user namespace 167 | pub fn gid(&mut self, id: gid_t) -> &mut Command { 168 | self.config.gid = Some(id); 169 | self 170 | } 171 | 172 | /// Set supplementary group ids. Note that it works only for root process 173 | /// or if you also set up user namespace 174 | pub fn groups(&mut self, ids: Vec) -> &mut Command { 175 | self.config.supplementary_gids = Some(ids); 176 | self 177 | } 178 | } 179 | 180 | -------------------------------------------------------------------------------- /src/stdio.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | use std::os::unix::io::{RawFd, AsRawFd, IntoRawFd}; 3 | 4 | use nix; 5 | use nix::fcntl::{fcntl, FcntlArg}; 6 | use libc; 7 | 8 | 9 | /// An enumeration that is used to configure stdio file descritors 10 | /// 11 | /// The enumeration members might be non-stable, it's better to use 12 | /// one of the constructors to create an instance 13 | pub enum Stdio { 14 | /// This fd will use pipe to/from the appliation 15 | Pipe, 16 | /// This fd will be inherited from the parent application 17 | Inherit, 18 | /// This fd will open /dev/null in read or write mode 19 | Null, 20 | /// This is fd passed by application (and closed by `unshare`) 21 | Fd(Closing), 22 | } 23 | 24 | /// An enumeration that is used to configure non-stdio file descriptors. It 25 | /// differs from stdio one because we must differentiate from readable and 26 | /// writable file descriptors for things open by the library 27 | /// 28 | /// The enumeration members might be non-stable, it's better to use 29 | /// one of the constructors to create an instance 30 | // TODO(tailhook) should this object be hidden? 31 | pub enum Fd { 32 | /// This fd is a reading end of a pipe 33 | ReadPipe, 34 | /// This fd is a writing end of a pipe 35 | WritePipe, 36 | /// This fd is inherited from parent (current) process 37 | Inherit, 38 | /// This fd is redirected from `/dev/null` 39 | ReadNull, 40 | /// This fd is redirected to `/dev/null` 41 | WriteNull, 42 | /// This is fd passed by application (and closed by `unshare`) 43 | Fd(Closing), 44 | } 45 | 46 | pub struct Closing(RawFd); 47 | 48 | pub fn dup_file_cloexec(file: &F) -> io::Result { 49 | match fcntl(file.as_raw_fd(), FcntlArg::F_DUPFD_CLOEXEC(3)) { 50 | Ok(fd) => Ok(Closing::new(fd)), 51 | Err(nix::Error::Sys(errno)) => { 52 | return Err(io::Error::from_raw_os_error(errno as i32)); 53 | } 54 | Err(nix::Error::InvalidPath) => unreachable!(), 55 | Err(nix::Error::InvalidUtf8) => unreachable!(), 56 | Err(nix::Error::UnsupportedOperation) => { 57 | return Err(io::Error::new(io::ErrorKind::Other, 58 | "nix error: unsupported operation")); 59 | } 60 | } 61 | } 62 | 63 | impl Stdio { 64 | /// Pipe is created for child process 65 | pub fn piped() -> Stdio { Stdio::Pipe } 66 | /// The child inherits file descriptor from the parent process 67 | pub fn inherit() -> Stdio { Stdio::Inherit } 68 | /// Stream is attached to `/dev/null` 69 | pub fn null() -> Stdio { Stdio::Null } 70 | /// Converts stdio definition to file descriptor definition 71 | /// (mostly needed internally) 72 | pub fn to_fd(self, write: bool) -> Fd { 73 | match (self, write) { 74 | (Stdio::Fd(x), _) => Fd::Fd(x), 75 | (Stdio::Pipe, false) => Fd::ReadPipe, 76 | (Stdio::Pipe, true) => Fd::WritePipe, 77 | (Stdio::Inherit, _) => Fd::Inherit, 78 | (Stdio::Null, false) => Fd::ReadNull, 79 | (Stdio::Null, true) => Fd::WriteNull, 80 | } 81 | } 82 | /// A simpler helper method for `from_raw_fd`, that does dup of file 83 | /// descriptor, so is actually safe to use (but can fail) 84 | pub fn dup_file(file: &F) -> io::Result { 85 | dup_file_cloexec(file).map(|f| Stdio::Fd(f)) 86 | } 87 | /// A simpler helper method for `from_raw_fd`, that consumes file 88 | /// 89 | /// Note: we assume that file descriptor **already has** the `CLOEXEC` 90 | /// flag. This is by default for all files opened by rust. 91 | pub fn from_file(file: F) -> Stdio { 92 | Stdio::Fd(Closing(file.into_raw_fd())) 93 | } 94 | } 95 | 96 | impl Fd { 97 | /// Create a pipe so that child can read from it 98 | pub fn piped_read() -> Fd { Fd::ReadPipe } 99 | /// Create a pipe so that child can write to it 100 | pub fn piped_write() -> Fd { Fd::WritePipe } 101 | /// Inherit the child descriptor from parent 102 | /// 103 | /// Not very useful for custom file descriptors better use `from_file()` 104 | pub fn inherit() -> Fd { Fd::Inherit } 105 | /// Create a readable pipe that always has end of file condition 106 | pub fn read_null() -> Fd { Fd::ReadNull } 107 | /// Create a writable pipe that ignores all the input 108 | pub fn write_null() -> Fd { Fd::WriteNull } 109 | /// A simpler helper method for `from_raw_fd`, that does dup of file 110 | /// descriptor, so is actually safe to use (but can fail) 111 | pub fn dup_file(file: &F) -> io::Result { 112 | dup_file_cloexec(file).map(|f| Fd::Fd(f)) 113 | } 114 | /// A simpler helper method for `from_raw_fd`, that consumes file 115 | pub fn from_file(file: F) -> Fd { 116 | Fd::Fd(Closing(file.into_raw_fd())) 117 | } 118 | } 119 | 120 | impl Closing { 121 | pub fn new(fd: RawFd) -> Closing { 122 | Closing(fd) 123 | } 124 | } 125 | 126 | impl AsRawFd for Closing { 127 | fn as_raw_fd(&self) -> RawFd { 128 | return self.0; 129 | } 130 | } 131 | 132 | impl Drop for Closing { 133 | fn drop(&mut self) { 134 | unsafe { 135 | libc::close(self.0); 136 | } 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /src/wait.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | use std::os::unix::io::RawFd; 3 | 4 | use nix::Error; 5 | use nix::unistd::Pid; 6 | use nix::sys::wait::waitpid; 7 | use nix::sys::signal::{Signal, SIGKILL, kill}; 8 | use nix::errno::Errno::EINTR; 9 | use libc::pid_t; 10 | 11 | use crate::pipe::PipeHolder; 12 | use crate::{Child, ExitStatus, PipeReader, PipeWriter}; 13 | 14 | 15 | impl Child { 16 | 17 | /// Returns pid of the process (a mirror of std method) 18 | pub fn id(&self) -> u32 { 19 | self.pid as u32 20 | } 21 | 22 | /// Returns pid of process with correct pid_t type 23 | pub fn pid(&self) -> pid_t { 24 | self.pid 25 | } 26 | 27 | /// Synchronously wait for child to complete and return exit status 28 | pub fn wait(&mut self) -> Result { 29 | if let Some(x) = self.status { 30 | return Ok(x); 31 | } 32 | let status = self._wait()?; 33 | self.status = Some(status); 34 | Ok(status) 35 | } 36 | 37 | 38 | fn _wait(&mut self) -> Result { 39 | use nix::sys::wait::WaitStatus::*; 40 | loop { 41 | match waitpid(Some(Pid::from_raw(self.pid)), None) { 42 | Ok(PtraceEvent(..)) => {} 43 | Ok(PtraceSyscall(..)) => {} 44 | Ok(Exited(x, status)) => { 45 | assert!(i32::from(x) == self.pid); 46 | return Ok(ExitStatus::Exited(status as i8)); 47 | } 48 | Ok(Signaled(x, sig, core)) => { 49 | assert!(i32::from(x) == self.pid); 50 | return Ok(ExitStatus::Signaled(sig, core)); 51 | } 52 | Ok(Stopped(_, _)) => unreachable!(), 53 | Ok(Continued(_)) => unreachable!(), 54 | Ok(StillAlive) => unreachable!(), 55 | Err(Error::Sys(EINTR)) => continue, 56 | Err(Error::InvalidPath) => unreachable!(), 57 | Err(Error::InvalidUtf8) => unreachable!(), 58 | Err(Error::UnsupportedOperation) => { 59 | return Err(io::Error::new(io::ErrorKind::Other, 60 | "nix error: unsupported operation")); 61 | } 62 | Err(Error::Sys(x)) => { 63 | return Err(io::Error::from_raw_os_error(x as i32)) 64 | } 65 | } 66 | } 67 | } 68 | 69 | /// Send arbitrary unix signal to the process 70 | pub fn signal(&self, signal: Signal) -> Result<(), io::Error> { 71 | // This prevents (somewhat not-reliable) killing some other process 72 | // with same pid 73 | if self.status.is_some() { 74 | return Err(io::Error::new( 75 | io::ErrorKind::InvalidInput, 76 | "invalid argument: can't kill an exited process", 77 | )) 78 | } 79 | kill(Pid::from_raw(self.pid), signal) 80 | .map_err(|e| match e { 81 | Error::Sys(x) => io::Error::from_raw_os_error(x as i32), 82 | Error::InvalidPath => unreachable!(), 83 | Error::InvalidUtf8 => unreachable!(), 84 | Error::UnsupportedOperation => { 85 | io::Error::new(io::ErrorKind::Other, 86 | "nix error: unsupported operation") 87 | } 88 | }) 89 | } 90 | 91 | /// Kill process with SIGKILL signal 92 | pub fn kill(&self) -> Result<(), io::Error> { 93 | self.signal(SIGKILL) 94 | } 95 | 96 | /// Returns pipe reader for a pipe declared with `file_descriptor()` 97 | /// 98 | /// Returns None for wrong configuration or when called twice for same 99 | /// descriptor 100 | pub fn take_pipe_reader(&mut self, fd: RawFd) -> Option { 101 | match self.fds.remove(&fd) { 102 | Some(PipeHolder::Reader(x)) => Some(x), 103 | _ => None, 104 | } 105 | } 106 | 107 | /// Returns pipe writer for a pipe declared with `file_descriptor()` 108 | /// 109 | /// Returns None for wrong configuration or when called twice for same 110 | /// descriptor 111 | pub fn take_pipe_writer(&mut self, fd: RawFd) -> Option { 112 | match self.fds.remove(&fd) { 113 | Some(PipeHolder::Writer(x)) => Some(x), 114 | _ => None, 115 | } 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /src/zombies.rs: -------------------------------------------------------------------------------- 1 | use std::marker::PhantomData; 2 | 3 | use libc::pid_t; 4 | use nix::sys::wait::{waitpid}; 5 | use nix::sys::wait::WaitPidFlag; 6 | use nix::errno::Errno::{EINTR, ECHILD}; 7 | use nix::Error; 8 | 9 | use crate::{ExitStatus, Signal}; 10 | 11 | /// A non-blocking iteration over zombie processes 12 | /// 13 | /// Use `reap_zombies()` to create one, and read docs there 14 | pub struct ZombieIterator(PhantomData); 15 | 16 | 17 | impl Iterator for ZombieIterator { 18 | type Item = (pid_t, ExitStatus); 19 | 20 | fn next(&mut self) -> Option<(pid_t, ExitStatus)> { 21 | use nix::sys::wait::WaitStatus::*; 22 | loop { 23 | match waitpid(None, Some(WaitPidFlag::WNOHANG)) { 24 | Ok(PtraceEvent(..)) => {} 25 | Ok(PtraceSyscall(..)) => {} 26 | Ok(Exited(pid, status)) => { 27 | return Some((pid.into(), ExitStatus::Exited(status as i8))); 28 | } 29 | Ok(Signaled(pid, sig, core)) => { 30 | return Some((pid.into(), ExitStatus::Signaled(sig, core))); 31 | } 32 | Ok(Stopped(_, _)) => continue, 33 | Ok(Continued(_)) => continue, 34 | Ok(StillAlive) => return None, 35 | Err(Error::Sys(EINTR)) => continue, 36 | Err(Error::Sys(ECHILD)) => return None, 37 | Err(e) => { 38 | panic!("Unexpected waitpid error: {:?}", e); 39 | } 40 | } 41 | } 42 | } 43 | } 44 | 45 | 46 | /// Creates iterator over zombie processes 47 | /// 48 | /// On each iteration it calls `waitpid()` and returns child pid and exit 49 | /// status if there is zombie process. The operation is non-blocking. The 50 | /// iterator is exhausted when there are no zombie process at the moment, 51 | /// 52 | /// Alternatively see a more comprehensive `child_events()` function. 53 | /// 54 | /// # Example 55 | /// 56 | /// So waiting for all processes to finish may look like this: 57 | /// 58 | /// ```ignore 59 | /// while alive.len() > 0 { 60 | /// sigwait() 61 | /// for (pid, status) in zombies() { 62 | /// alive.remove(pid); 63 | /// } 64 | /// } 65 | /// ``` 66 | /// 67 | /// # Important Notes 68 | /// 69 | /// * If you are using this function you can't reliably use `Child::wait` 70 | /// any more. 71 | /// * If you got `SIGCHLD` you *must* exhaust this iterator until waiting for 72 | /// next signal, or you will have zombie processes around 73 | pub fn reap_zombies() -> ZombieIterator { ZombieIterator(PhantomData) } 74 | 75 | 76 | /// The event returned from `child_events()` iterator 77 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 78 | pub enum ChildEvent { 79 | /// Child is dead, similar to what returned by `reap_zombies()` 80 | Death(pid_t, ExitStatus), 81 | /// Child is stopped on a signal Signal 82 | Stop(pid_t, Signal), 83 | /// Child is continued (SIGCONT sent) 84 | Continue(pid_t), 85 | } 86 | 87 | 88 | /// A non-blocking iteration over zombies and child stops 89 | /// 90 | /// Use `child_events()` to create one, and read docs there 91 | pub struct ChildEventsIterator(PhantomData); 92 | 93 | 94 | impl Iterator for ChildEventsIterator { 95 | type Item = ChildEvent; 96 | 97 | fn next(&mut self) -> Option { 98 | use self::ChildEvent::*; 99 | use nix::sys::wait::WaitStatus::*; 100 | let flags = WaitPidFlag::WNOHANG | WaitPidFlag::WUNTRACED | 101 | WaitPidFlag::WCONTINUED; 102 | loop { 103 | match waitpid(None, Some(flags)) { 104 | Ok(PtraceEvent(..)) => {} 105 | Ok(PtraceSyscall(..)) => {} 106 | Ok(Exited(pid, status)) => { 107 | return Some(Death(pid.into(), 108 | ExitStatus::Exited(status as i8))); 109 | } 110 | Ok(Signaled(pid, sig, core)) => { 111 | return Some(Death(pid.into(), 112 | ExitStatus::Signaled(sig, core))); 113 | } 114 | Ok(Stopped(pid, sig)) => return Some(Stop(pid.into(), sig)), 115 | Ok(Continued(pid)) => return Some(Continue(pid.into())), 116 | Ok(StillAlive) => return None, 117 | Err(Error::Sys(EINTR)) => continue, 118 | Err(Error::Sys(ECHILD)) => return None, 119 | Err(e) => { 120 | panic!("Unexpected waitpid error: {:?}", e); 121 | } 122 | } 123 | } 124 | } 125 | } 126 | 127 | 128 | /// Creates iterator over child events 129 | /// 130 | /// On each iteration it calls `waitpid()` and returns one of the 131 | /// events described in `ChildEvent`. 132 | /// 133 | /// The operation is non-blocking. The iterator is exhausted when there are no 134 | /// zombie process at the moment. 135 | /// 136 | /// Alternatively see a simpler `reap_zombies()` function. 137 | /// 138 | /// # Example 139 | /// 140 | /// So waiting for all processes to finish may look like this: 141 | /// 142 | /// ```ignore 143 | /// while alive.len() > 0 { 144 | /// sigwait() 145 | /// for event in zombies() { 146 | /// match event { 147 | /// Death(pid, _) => alive.remove(pid), 148 | /// Stop(..) => {} 149 | /// Continue(..) => {} 150 | /// } 151 | /// } 152 | /// ``` 153 | /// 154 | /// # Important Notes 155 | /// 156 | /// * If you are using this function you can't reliably use `Child::wait` 157 | /// any more. 158 | /// * If you got `SIGCHLD` you *must* exhaust this iterator until waiting for 159 | /// next signal, or you will have zombie processes around 160 | pub fn child_events() -> ChildEventsIterator { 161 | ChildEventsIterator(PhantomData) 162 | } 163 | -------------------------------------------------------------------------------- /vagga.yaml: -------------------------------------------------------------------------------- 1 | commands: 2 | 3 | cargo: !Command 4 | description: Run any cargo command 5 | container: ubuntu 6 | run: [cargo] 7 | 8 | make: !Command 9 | description: Build the library 10 | container: ubuntu 11 | run: [cargo, build] 12 | 13 | test: !Command 14 | description: Test the library 15 | container: ubuntu 16 | run: [cargo, test] 17 | 18 | doc: !Command 19 | description: Generate docs 20 | container: ubuntu 21 | epilog: | 22 | ------------------------------------------ 23 | xdg-open target/doc/unshare/index.html 24 | run: [cargo, test] 25 | 26 | _bulk: !Command 27 | description: Run `bulk` command (for version bookkeeping) 28 | container: ubuntu 29 | run: [bulk] 30 | 31 | containers: 32 | 33 | ubuntu: 34 | setup: 35 | - !Ubuntu bionic 36 | - !Install [ca-certificates, build-essential, vim] 37 | 38 | - !TarInstall 39 | url: "https://static.rust-lang.org/dist/rust-1.36.0-x86_64-unknown-linux-gnu.tar.gz" 40 | script: "./install.sh --prefix=/usr \ 41 | --components=rustc,rust-std-x86_64-unknown-linux-gnu,cargo" 42 | - &bulk !Tar 43 | url: "https://github.com/tailhook/bulk/releases/download/v0.4.11/bulk-v0.4.11.tar.gz" 44 | sha256: b718bb8448e726690c94d98d004bf7575f7a429106ec26ad3faf11e0fd9a7978 45 | path: / 46 | 47 | 48 | environ: 49 | HOME: /work/target 50 | USER: pc 51 | --------------------------------------------------------------------------------