├── fuzz ├── fuzz_quote_real_shell │ ├── basic-corpus │ │ ├── cr │ │ ├── long-random │ │ ├── short-random │ │ └── long-a │ ├── Dockerfile │ ├── Cargo.toml │ ├── each-shell.sh │ └── src │ │ └── fuzz.rs ├── fuzz_quote_wordexp │ ├── build.rs │ ├── Cargo.toml │ └── src │ │ ├── wordexp_wrapper.c │ │ └── fuzz.rs ├── fuzz_targets │ ├── fuzz_quote.rs │ └── fuzz_next.rs ├── fuzz_quote_python │ ├── Cargo.toml │ └── src │ │ └── fuzz.rs ├── Cargo.toml └── Cargo.lock ├── .gitignore ├── LICENSE-APACHE ├── Cargo.toml ├── .github └── workflows │ └── test.yml ├── LICENSE-MIT ├── CHANGELOG.md ├── README.md └── src ├── lib.rs ├── quoting_warning.md └── bytes.rs /fuzz/fuzz_quote_real_shell/basic-corpus/cr: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | nocommit/ 2 | target/ 3 | artifacts/ 4 | corpus/ 5 | /Cargo.lock 6 | **/*.rs.bk 7 | .*.sw? 8 | .sw? 9 | -------------------------------------------------------------------------------- /fuzz/fuzz_quote_real_shell/basic-corpus/long-random: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/comex/rust-shlex/HEAD/fuzz/fuzz_quote_real_shell/basic-corpus/long-random -------------------------------------------------------------------------------- /fuzz/fuzz_quote_real_shell/basic-corpus/short-random: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/comex/rust-shlex/HEAD/fuzz/fuzz_quote_real_shell/basic-corpus/short-random -------------------------------------------------------------------------------- /fuzz/fuzz_quote_real_shell/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:latest 2 | RUN apk update 3 | # coreutils and strace are not needed but convenient for debugging. 4 | RUN apk add zsh bash dash busybox strace coreutils python3 fish mksh 5 | -------------------------------------------------------------------------------- /fuzz/fuzz_quote_wordexp/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | println!("cargo:rerun-if-changed=src/wordexp_wrapper.c"); 3 | cc::Build::new() 4 | .file("src/wordexp_wrapper.c") 5 | .compile("wordexp_wrapper"); 6 | } 7 | 8 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/fuzz_quote.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | #[macro_use] extern crate libfuzzer_sys; 3 | use shlex::quote; 4 | 5 | fuzz_target!(|data: &[u8]| { 6 | if let Ok(s) = std::str::from_utf8(data) { 7 | quote(s); 8 | } 9 | }); 10 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/fuzz_next.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | #[macro_use] extern crate libfuzzer_sys; 3 | use shlex::Shlex; 4 | 5 | fuzz_target!(|data: &[u8]| { 6 | if let Ok(s) = std::str::from_utf8(data) { 7 | let mut sh = Shlex::new(s); 8 | while let Some(_word) = sh.next() {} 9 | } 10 | }); 11 | -------------------------------------------------------------------------------- /fuzz/fuzz_quote_real_shell/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "fuzz_quote_real_shell" 3 | version = "0.0.0" 4 | authors = ["see main rust-shlex Cargo.toml for authors"] 5 | license = "MIT OR Apache-2.0" 6 | publish = false 7 | edition = "2021" 8 | 9 | [package.metadata] 10 | cargo-fuzz = true 11 | 12 | [dependencies] 13 | libfuzzer-sys = "0.4" 14 | rand = "0.8.4" 15 | bstr = "1.8.0" 16 | nu-pretty-hex = "0.87.1" 17 | 18 | [dependencies.shlex] 19 | path = "../.." 20 | 21 | [[bin]] 22 | name = "fuzz_quote_real_shell" 23 | path = "src/fuzz.rs" 24 | 25 | -------------------------------------------------------------------------------- /fuzz/fuzz_quote_wordexp/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "fuzz_quote_wordexp" 3 | version = "0.0.0" 4 | authors = ["see main rust-shlex Cargo.toml for authors"] 5 | license = "MIT OR Apache-2.0" 6 | publish = false 7 | edition = "2021" 8 | 9 | [package.metadata] 10 | cargo-fuzz = true 11 | 12 | [dependencies] 13 | libfuzzer-sys = "0.4" 14 | nu-pretty-hex = "0.87.1" 15 | 16 | [dependencies.shlex] 17 | path = "../.." 18 | 19 | [build-dependencies] 20 | cc = "1.0" 21 | 22 | [[bin]] 23 | name = "fuzz_quote_wordexp" 24 | path = "src/fuzz.rs" 25 | test = false 26 | doc = false 27 | 28 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Copyright 2015 Nicholas Allegra (comex). 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /fuzz/fuzz_quote_python/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "fuzz_quote_python" 3 | version = "0.0.0" 4 | authors = ["see main rust-shlex Cargo.toml for authors"] 5 | license = "MIT OR Apache-2.0" 6 | publish = false 7 | edition = "2021" 8 | 9 | [package.metadata] 10 | cargo-fuzz = true 11 | 12 | [dependencies] 13 | libfuzzer-sys = "0.4" 14 | nu-pretty-hex = "0.87.1" 15 | 16 | [dependencies.pyo3] 17 | version = "0.20.2" 18 | features = ["auto-initialize"] 19 | 20 | [dependencies.shlex] 21 | path = "../.." 22 | 23 | [build-dependencies] 24 | cc = "1.0" 25 | 26 | [[bin]] 27 | name = "fuzz_quote_python" 28 | path = "src/fuzz.rs" 29 | test = false 30 | doc = false 31 | 32 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "shlex" 3 | version = "1.3.0" 4 | authors = [ 5 | "comex ", 6 | "Fenhl ", 7 | "Adrian Taylor ", 8 | "Alex Touchet ", 9 | "Daniel Parks ", 10 | "Garrett Berg ", 11 | ] 12 | license = "MIT OR Apache-2.0" 13 | repository = "https://github.com/comex/rust-shlex" 14 | description = "Split a string into shell words, like Python's shlex." 15 | categories = [ 16 | "command-line-interface", 17 | "parser-implementations" 18 | ] 19 | rust-version = "1.46.0" 20 | 21 | [features] 22 | std = [] 23 | default = ["std"] 24 | -------------------------------------------------------------------------------- /fuzz/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "shlex-fuzz" 3 | version = "0.0.0" 4 | authors = ["see main rust-shlex Cargo.toml for authors"] 5 | publish = false 6 | edition = "2018" 7 | 8 | [package.metadata] 9 | cargo-fuzz = true 10 | 11 | [dependencies] 12 | libfuzzer-sys = "0.4" 13 | 14 | [dependencies.shlex] 15 | path = ".." 16 | 17 | [workspace] 18 | members = [ 19 | ".", 20 | "fuzz_quote_real_shell", 21 | "fuzz_quote_python", 22 | "fuzz_quote_wordexp", 23 | ] 24 | 25 | [[bin]] 26 | name = "fuzz_next" 27 | path = "fuzz_targets/fuzz_next.rs" 28 | test = false 29 | doc = false 30 | 31 | [[bin]] 32 | name = "fuzz_quote" 33 | path = "fuzz_targets/fuzz_quote.rs" 34 | test = false 35 | doc = false 36 | -------------------------------------------------------------------------------- /fuzz/fuzz_quote_wordexp/src/wordexp_wrapper.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | static _Thread_local wordexp_t we; 5 | 6 | const char *wordexp_wrapper(const char *words, char ***wordv_p, size_t *wordc_p) { 7 | int res = wordexp(words, &we, WRDE_NOCMD | WRDE_SHOWERR | WRDE_UNDEF); 8 | *wordv_p = we.we_wordv; 9 | *wordc_p = we.we_wordc; 10 | switch (res) { 11 | case 0: return NULL; 12 | case WRDE_BADCHAR: return "WRDE_BADCHAR"; 13 | case WRDE_BADVAL: return "WRDE_BADVAL"; 14 | case WRDE_CMDSUB: return "WRDE_CMDSUB"; 15 | case WRDE_NOSPACE: return "WRDE_NOSPACE"; 16 | case WRDE_SYNTAX: return "WRDE_SYNTAX"; 17 | default: return "[unknown wordexp error]"; 18 | } 19 | } 20 | 21 | void wordfree_wrapper() { 22 | wordfree(&we); 23 | } 24 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | pull_request: 5 | push: 6 | 7 | jobs: 8 | check: 9 | name: Check 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v2 13 | - uses: ATiltedTree/setup-rust@v1 14 | with: 15 | rust-version: stable 16 | - run: cargo check 17 | 18 | test: 19 | name: Test 20 | runs-on: ubuntu-latest 21 | steps: 22 | - uses: actions/checkout@v2 23 | - uses: ATiltedTree/setup-rust@v1 24 | with: 25 | rust-version: stable 26 | - run: cargo test 27 | 28 | test_no_default_features: 29 | name: Test (no default features) 30 | runs-on: ubuntu-latest 31 | steps: 32 | - uses: actions/checkout@v2 33 | - uses: ATiltedTree/setup-rust@v1 34 | with: 35 | rust-version: stable 36 | - run: cargo test --no-default-features 37 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Nicholas Allegra (comex). 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Unreleased 2 | 3 | * Fixed the URL of the security advisory linked from the documentation. 4 | 5 | # 1.3.0 6 | 7 | * Full fix for the high-severity security vulnerability [RUSTSEC-2024-0006](https://rustsec.org/advisories/RUSTSEC-2024-0006.html) a.k.a. [GHSA-r7qv-8r2h-pg27](https://github.com/comex/rust-shlex/security/advisories/GHSA-r7qv-8r2h-pg27): 8 | * Deprecates quote APIs in favor of `try_` equivalents that complain about nul bytes. 9 | * Also adds a builder API, which allows re-enabling nul bytes without using the deprecated interface, and in the future can allow other things (as discussed in quoting_warning). 10 | * Adds documentation about various security risks that remain, particularly with interactive shells. 11 | * Adds explicit MSRV of 1.46.0. 12 | 13 | # 1.2.1 14 | 15 | * Partial fix for the high-severity security vulnerability [RUSTSEC-2024-0006](https://rustsec.org/advisories/RUSTSEC-2024-0006.html) a.k.a. [GHSA-r7qv-8r2h-pg27](https://github.com/comex/rust-shlex/security/advisories/GHSA-r7qv-8r2h-pg27) without bumping MSRV: 16 | * The bytes `{` and `\xa0` are now escaped by quoting functions. 17 | 18 | # 1.2.0 19 | 20 | * Adds `bytes` module to support operating directly on byte strings. 21 | 22 | # 1.1.0 23 | 24 | * Adds the `std` feature (enabled by default). 25 | * Disabling the `std` feature makes the crate work in `#![no_std]` mode, assuming presence of the `alloc` crate. 26 | 27 | # 1.0.0 28 | 29 | * Adds the `join` convenience function. 30 | * Fixes parsing of `'\\n'` to match the behavior of bash/Zsh/Python `shlex`. The result was previously `\n`, now it is `\\n`. 31 | 32 | # 0.1.1 33 | 34 | * Adds handling of `#` comments. 35 | 36 | # 0.1.0 37 | 38 | This is the initial release. 39 | -------------------------------------------------------------------------------- /fuzz/fuzz_quote_python/src/fuzz.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | #[macro_use] extern crate libfuzzer_sys; 3 | use shlex::try_join; 4 | use nu_pretty_hex::pretty_hex; 5 | 6 | use pyo3::prelude::*; 7 | 8 | fn shlex_split(words: &str) -> Result, String> { 9 | Python::with_gil(|py| { 10 | Ok(py 11 | .import("shlex").unwrap() 12 | .getattr("split").unwrap() 13 | .call1((words,)) 14 | .map_err(|e| e.to_string())? 15 | .extract().unwrap()) 16 | }) 17 | } 18 | 19 | fn pretty_hex_multi<'a>(strings: impl IntoIterator) -> String { 20 | let mut res = "[\n".to_owned(); 21 | for string in strings { 22 | res += &pretty_hex(&string); 23 | res.push('\n'); 24 | } 25 | res.push(']'); 26 | res 27 | } 28 | 29 | fuzz_target!(|unquoted: &[u8]| { 30 | // Treat the input as a list of words separated by nul chars. 31 | let Ok(unquoted) = std::str::from_utf8(unquoted) else { 32 | // ignore invalid utf-8 33 | return; 34 | }; 35 | let words: Vec<&str> = unquoted.split('\0').collect(); 36 | let quoted: String = try_join(words.iter().cloned()).unwrap(); 37 | let res = shlex_split("ed); 38 | 39 | match res { 40 | Ok(expanded) => { 41 | if expanded != words { 42 | panic!("original: {}\nshlex.split output:{}\nquoted:\n{}", 43 | pretty_hex_multi(words.iter().cloned()), 44 | pretty_hex_multi(expanded.iter().map(|x| &**x)), 45 | pretty_hex("ed)); 46 | } 47 | } 48 | Err(err) => { 49 | panic!("original: {}\nquoted:\n{}\nshlex.split error: {}", 50 | pretty_hex_multi(words.iter().cloned()), 51 | pretty_hex("ed), 52 | err); 53 | }, 54 | } 55 | }); 56 | 57 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![ci badge]][ci link] [![crates.io badge]][crates.io link] [![docs.rs badge]][docs.rs link] 2 | 3 | [crates.io badge]: https://img.shields.io/crates/v/shlex.svg?style=flat-square 4 | [crates.io link]: https://crates.io/crates/shlex 5 | [docs.rs badge]: https://img.shields.io/badge/docs-online-dddddd.svg?style=flat-square 6 | [docs.rs link]: https://docs.rs/shlex 7 | [ci badge]: https://img.shields.io/github/actions/workflow/status/comex/rust-shlex/test.yml?branch=master&style=flat-square 8 | [ci link]: https://github.com/comex/rust-shlex/actions 9 | 10 | Same idea as (but implementation not directly based on) the Python shlex 11 | module. However, this implementation does not support any of the Python 12 | module's customization because it makes parsing slower and is fairly useless. 13 | You only get the default settings of shlex.split, which mimic the POSIX shell: 14 | 15 | 16 | This implementation also deviates from the Python version in not treating \r 17 | specially, which I believe is more compliant. 18 | 19 | This crate can be used on either normal Rust strings, or on byte strings with 20 | the `bytes` module. The algorithms used are oblivious to UTF-8 high bytes, so 21 | internally they all work on bytes directly as a micro-optimization. 22 | 23 | Disabling the `std` feature (which is enabled by default) will allow the crate 24 | to work in `no_std` environments, where the `alloc` crate, and a global 25 | allocator, are available. 26 | 27 | # LICENSE 28 | 29 | The source code in this repository is Licensed under either of 30 | - Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or 31 | https://www.apache.org/licenses/LICENSE-2.0) 32 | - MIT license ([LICENSE-MIT](LICENSE-MIT) or 33 | https://opensource.org/licenses/MIT) 34 | 35 | at your option. 36 | 37 | Unless you explicitly state otherwise, any contribution intentionally submitted 38 | for inclusion in the work by you, as defined in the Apache-2.0 license, shall 39 | be dual licensed as above, without any additional terms or conditions. 40 | -------------------------------------------------------------------------------- /fuzz/fuzz_quote_wordexp/src/fuzz.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | #[macro_use] extern crate libfuzzer_sys; 3 | use shlex::bytes::try_join; 4 | use std::ptr; 5 | use std::ffi::{c_char, CStr, CString}; 6 | use nu_pretty_hex::pretty_hex; 7 | 8 | extern "C" { 9 | // wordexp_wrapper.c 10 | fn wordexp_wrapper(words: *const c_char, wordv_p: *mut *mut *mut c_char, wordc_p: *mut usize) -> *const c_char; 11 | fn wordfree_wrapper(); 12 | } 13 | 14 | fn wordexp(words: Vec) -> Result>, String> { 15 | unsafe { 16 | let mut wordv: *mut *mut c_char = ptr::null_mut(); 17 | let mut wordc: usize = 0; 18 | let cwords = CString::new(words).unwrap(); 19 | let err = wordexp_wrapper(cwords.as_ptr(), &mut wordv, &mut wordc); 20 | if err.is_null() { 21 | // success 22 | let mut ret = Vec::new(); 23 | for i in 0..wordc { 24 | ret.push(CStr::from_ptr(*wordv.add(i)).to_bytes().to_owned()); 25 | } 26 | wordfree_wrapper(); 27 | Ok(ret) 28 | } else { 29 | Err(CStr::from_ptr(err).to_string_lossy().to_string()) 30 | } 31 | } 32 | } 33 | 34 | fn pretty_hex_multi<'a>(strings: impl IntoIterator) -> String { 35 | let mut res = "[\n".to_owned(); 36 | for string in strings { 37 | res += &pretty_hex(&string); 38 | res.push('\n'); 39 | } 40 | res.push(']'); 41 | res 42 | } 43 | 44 | fuzz_target!(|unquoted: &[u8]| { 45 | // Treat the input as a list of words separated by nul chars. 46 | let words: Vec<&[u8]> = unquoted.split(|&c| c == b'\0').collect(); 47 | let quoted: Vec = try_join(words.iter().cloned()).unwrap(); 48 | 49 | let res = wordexp(quoted.clone()); 50 | 51 | match res { 52 | Ok(expanded) => { 53 | if expanded != words { 54 | panic!("original: {}\nwordexp output:{}\nquoted:\n{}", 55 | pretty_hex_multi(words.iter().cloned()), 56 | pretty_hex_multi(expanded.iter().map(|x| &**x)), 57 | pretty_hex("ed)); 58 | } 59 | } 60 | Err(err) => { 61 | #[cfg(target_os = "macos")] 62 | if quoted.contains(&b'`') { 63 | // macOS wordexp bug 64 | return; 65 | } 66 | 67 | if err == "WRDE_NOSPACE" { 68 | // Input is probably too long. 69 | return; 70 | } 71 | 72 | panic!("original: {}\nquoted:\n{}\nwordexp error: {}", 73 | pretty_hex_multi(words.iter().cloned()), 74 | pretty_hex("ed), 75 | err); 76 | }, 77 | } 78 | }); 79 | 80 | -------------------------------------------------------------------------------- /fuzz/fuzz_quote_real_shell/each-shell.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env zsh 2 | # Run a command for each of several configurations. 3 | # Example: 4 | # ./each-shell.sh 'cargo fuzz run --fuzz-dir . fuzz_quote_real_shell basic-corpus/*' 5 | # ./each-shell.sh 'nohup cargo fuzz run --fuzz-dir . fuzz_quote_real_shell >&/tmp/out.$ident &' 6 | 7 | # TODO: This could be handled better. The choice of shell should probably just 8 | # be part of the fuzz input. 9 | 10 | shells=( 11 | 'zsh --no-rcs' 12 | 'bash --norc' 13 | 'dash +m' 14 | 'fish --private --no-config' 15 | 'mksh' 16 | ) 17 | 18 | running_on_linux=1 19 | if [[ `uname` == Darwin && "$FUZZ_USE_DOCKER" == 0 ]]; then 20 | running_on_linux=0 21 | fi 22 | # Add busybox unless we're running natively on macOS, since busybox doesn't run 23 | # on macOS. 24 | # (If you're on Linux but it's not installed, then too bad, install it.) 25 | if (( running_on_linux )); then 26 | shells+=('busybox ash +m') 27 | fi 28 | # Gather existing FUZZ_* environment variables just to make it easier to copy 29 | # and paste individual commands from the debug output: 30 | already_set=$(export | grep '^FUZZ_' | tr '\n' ' ') 31 | for shell in $shells; do 32 | for interactive in '-i' '+i'; do 33 | for pty in 0 1; do 34 | for lang in C en_US.UTF-8; do 35 | ident="${shell%% *}.$interactive.pty$pty.$lang" 36 | if [[ $shell == fish* && $ident != fish.-i.pty1.* ]]; then 37 | # fish must have a pty because otherwise it buffers the 38 | # entire stdin rather than responding live; and it must 39 | # have -i because +i doesn't actually work. 40 | continue 41 | fi 42 | if [[ $ident == zsh.-i.pty0.* ]]; then 43 | # zsh in interactive mode forces the use of the tty instead of 44 | # using stdin/stdout, so we can't test it without a pty. 45 | continue 46 | fi 47 | if [[ $ident == zsh.+i.pty1.* && $running_on_linux == 0 ]]; then 48 | # Fails due to a macOS kernel bug(?). In zsh, `shingetchar` 49 | # really does not want to read past a newline. Instead of just 50 | # just buffering any excess data, it uses a weird scheme where 51 | # it tries a no-op lseek on the input fd. If that succeeds, it 52 | # calls `read` with some reasonable buffer size and then, if it 53 | # read too many bytes (i.e. past a newline), it lseeks 54 | # backwards to the newline. If the no-op lseek fails, it falls 55 | # back to reading one byte at a time. On macOS, lseek on a pty 56 | # succeeds even though it does not do anything meaningful. 57 | # Pipes don't have this issue. 58 | continue 59 | fi 60 | prefix="FUZZ_USE_PTY=$pty FUZZ_SHELL=\"env LANG=$lang $shell $interactive\" " 61 | echo ">> ${already_set}ident='$ident' $prefix $*" 62 | eval "$prefix $*" || { 63 | echo "FAIL: $ident" 64 | exit 1 65 | } 66 | done 67 | done 68 | done 69 | done 70 | echo 'all ok' 71 | -------------------------------------------------------------------------------- /fuzz/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "arbitrary" 7 | version = "1.3.2" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110" 10 | 11 | [[package]] 12 | name = "autocfg" 13 | version = "1.1.0" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 16 | 17 | [[package]] 18 | name = "bitflags" 19 | version = "1.3.2" 20 | source = "registry+https://github.com/rust-lang/crates.io-index" 21 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 22 | 23 | [[package]] 24 | name = "bstr" 25 | version = "1.9.0" 26 | source = "registry+https://github.com/rust-lang/crates.io-index" 27 | checksum = "c48f0051a4b4c5e0b6d365cd04af53aeaa209e3cc15ec2cdb69e73cc87fbd0dc" 28 | dependencies = [ 29 | "memchr", 30 | "regex-automata", 31 | "serde", 32 | ] 33 | 34 | [[package]] 35 | name = "cc" 36 | version = "1.0.83" 37 | source = "registry+https://github.com/rust-lang/crates.io-index" 38 | checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" 39 | dependencies = [ 40 | "jobserver", 41 | "libc", 42 | ] 43 | 44 | [[package]] 45 | name = "cfg-if" 46 | version = "1.0.0" 47 | source = "registry+https://github.com/rust-lang/crates.io-index" 48 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 49 | 50 | [[package]] 51 | name = "fuzz_quote_python" 52 | version = "0.0.0" 53 | dependencies = [ 54 | "cc", 55 | "libfuzzer-sys", 56 | "nu-pretty-hex", 57 | "pyo3", 58 | "shlex", 59 | ] 60 | 61 | [[package]] 62 | name = "fuzz_quote_real_shell" 63 | version = "0.0.0" 64 | dependencies = [ 65 | "bstr", 66 | "libfuzzer-sys", 67 | "nu-pretty-hex", 68 | "rand", 69 | "shlex", 70 | ] 71 | 72 | [[package]] 73 | name = "fuzz_quote_wordexp" 74 | version = "0.0.0" 75 | dependencies = [ 76 | "cc", 77 | "libfuzzer-sys", 78 | "nu-pretty-hex", 79 | "shlex", 80 | ] 81 | 82 | [[package]] 83 | name = "getrandom" 84 | version = "0.2.12" 85 | source = "registry+https://github.com/rust-lang/crates.io-index" 86 | checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" 87 | dependencies = [ 88 | "cfg-if", 89 | "libc", 90 | "wasi", 91 | ] 92 | 93 | [[package]] 94 | name = "heck" 95 | version = "0.4.1" 96 | source = "registry+https://github.com/rust-lang/crates.io-index" 97 | checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" 98 | 99 | [[package]] 100 | name = "indoc" 101 | version = "2.0.4" 102 | source = "registry+https://github.com/rust-lang/crates.io-index" 103 | checksum = "1e186cfbae8084e513daff4240b4797e342f988cecda4fb6c939150f96315fd8" 104 | 105 | [[package]] 106 | name = "jobserver" 107 | version = "0.1.27" 108 | source = "registry+https://github.com/rust-lang/crates.io-index" 109 | checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" 110 | dependencies = [ 111 | "libc", 112 | ] 113 | 114 | [[package]] 115 | name = "libc" 116 | version = "0.2.152" 117 | source = "registry+https://github.com/rust-lang/crates.io-index" 118 | checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" 119 | 120 | [[package]] 121 | name = "libfuzzer-sys" 122 | version = "0.4.7" 123 | source = "registry+https://github.com/rust-lang/crates.io-index" 124 | checksum = "a96cfd5557eb82f2b83fed4955246c988d331975a002961b07c81584d107e7f7" 125 | dependencies = [ 126 | "arbitrary", 127 | "cc", 128 | "once_cell", 129 | ] 130 | 131 | [[package]] 132 | name = "lock_api" 133 | version = "0.4.11" 134 | source = "registry+https://github.com/rust-lang/crates.io-index" 135 | checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" 136 | dependencies = [ 137 | "autocfg", 138 | "scopeguard", 139 | ] 140 | 141 | [[package]] 142 | name = "memchr" 143 | version = "2.7.1" 144 | source = "registry+https://github.com/rust-lang/crates.io-index" 145 | checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" 146 | 147 | [[package]] 148 | name = "memoffset" 149 | version = "0.9.0" 150 | source = "registry+https://github.com/rust-lang/crates.io-index" 151 | checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" 152 | dependencies = [ 153 | "autocfg", 154 | ] 155 | 156 | [[package]] 157 | name = "nu-ansi-term" 158 | version = "0.49.0" 159 | source = "registry+https://github.com/rust-lang/crates.io-index" 160 | checksum = "c073d3c1930d0751774acf49e66653acecb416c3a54c6ec095a9b11caddb5a68" 161 | dependencies = [ 162 | "windows-sys", 163 | ] 164 | 165 | [[package]] 166 | name = "nu-pretty-hex" 167 | version = "0.87.1" 168 | source = "registry+https://github.com/rust-lang/crates.io-index" 169 | checksum = "934849ad57ec319bddad52dc0fd7cd6c6bd7e9a80e79636cbf41e3e5c29ca6e2" 170 | dependencies = [ 171 | "nu-ansi-term", 172 | ] 173 | 174 | [[package]] 175 | name = "once_cell" 176 | version = "1.19.0" 177 | source = "registry+https://github.com/rust-lang/crates.io-index" 178 | checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" 179 | 180 | [[package]] 181 | name = "parking_lot" 182 | version = "0.12.1" 183 | source = "registry+https://github.com/rust-lang/crates.io-index" 184 | checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" 185 | dependencies = [ 186 | "lock_api", 187 | "parking_lot_core", 188 | ] 189 | 190 | [[package]] 191 | name = "parking_lot_core" 192 | version = "0.9.9" 193 | source = "registry+https://github.com/rust-lang/crates.io-index" 194 | checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" 195 | dependencies = [ 196 | "cfg-if", 197 | "libc", 198 | "redox_syscall", 199 | "smallvec", 200 | "windows-targets", 201 | ] 202 | 203 | [[package]] 204 | name = "ppv-lite86" 205 | version = "0.2.17" 206 | source = "registry+https://github.com/rust-lang/crates.io-index" 207 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" 208 | 209 | [[package]] 210 | name = "proc-macro2" 211 | version = "1.0.76" 212 | source = "registry+https://github.com/rust-lang/crates.io-index" 213 | checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c" 214 | dependencies = [ 215 | "unicode-ident", 216 | ] 217 | 218 | [[package]] 219 | name = "pyo3" 220 | version = "0.20.2" 221 | source = "registry+https://github.com/rust-lang/crates.io-index" 222 | checksum = "9a89dc7a5850d0e983be1ec2a463a171d20990487c3cfcd68b5363f1ee3d6fe0" 223 | dependencies = [ 224 | "cfg-if", 225 | "indoc", 226 | "libc", 227 | "memoffset", 228 | "parking_lot", 229 | "pyo3-build-config", 230 | "pyo3-ffi", 231 | "pyo3-macros", 232 | "unindent", 233 | ] 234 | 235 | [[package]] 236 | name = "pyo3-build-config" 237 | version = "0.20.2" 238 | source = "registry+https://github.com/rust-lang/crates.io-index" 239 | checksum = "07426f0d8fe5a601f26293f300afd1a7b1ed5e78b2a705870c5f30893c5163be" 240 | dependencies = [ 241 | "once_cell", 242 | "target-lexicon", 243 | ] 244 | 245 | [[package]] 246 | name = "pyo3-ffi" 247 | version = "0.20.2" 248 | source = "registry+https://github.com/rust-lang/crates.io-index" 249 | checksum = "dbb7dec17e17766b46bca4f1a4215a85006b4c2ecde122076c562dd058da6cf1" 250 | dependencies = [ 251 | "libc", 252 | "pyo3-build-config", 253 | ] 254 | 255 | [[package]] 256 | name = "pyo3-macros" 257 | version = "0.20.2" 258 | source = "registry+https://github.com/rust-lang/crates.io-index" 259 | checksum = "05f738b4e40d50b5711957f142878cfa0f28e054aa0ebdfc3fd137a843f74ed3" 260 | dependencies = [ 261 | "proc-macro2", 262 | "pyo3-macros-backend", 263 | "quote", 264 | "syn", 265 | ] 266 | 267 | [[package]] 268 | name = "pyo3-macros-backend" 269 | version = "0.20.2" 270 | source = "registry+https://github.com/rust-lang/crates.io-index" 271 | checksum = "0fc910d4851847827daf9d6cdd4a823fbdaab5b8818325c5e97a86da79e8881f" 272 | dependencies = [ 273 | "heck", 274 | "proc-macro2", 275 | "quote", 276 | "syn", 277 | ] 278 | 279 | [[package]] 280 | name = "quote" 281 | version = "1.0.35" 282 | source = "registry+https://github.com/rust-lang/crates.io-index" 283 | checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" 284 | dependencies = [ 285 | "proc-macro2", 286 | ] 287 | 288 | [[package]] 289 | name = "rand" 290 | version = "0.8.5" 291 | source = "registry+https://github.com/rust-lang/crates.io-index" 292 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 293 | dependencies = [ 294 | "libc", 295 | "rand_chacha", 296 | "rand_core", 297 | ] 298 | 299 | [[package]] 300 | name = "rand_chacha" 301 | version = "0.3.1" 302 | source = "registry+https://github.com/rust-lang/crates.io-index" 303 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 304 | dependencies = [ 305 | "ppv-lite86", 306 | "rand_core", 307 | ] 308 | 309 | [[package]] 310 | name = "rand_core" 311 | version = "0.6.4" 312 | source = "registry+https://github.com/rust-lang/crates.io-index" 313 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 314 | dependencies = [ 315 | "getrandom", 316 | ] 317 | 318 | [[package]] 319 | name = "redox_syscall" 320 | version = "0.4.1" 321 | source = "registry+https://github.com/rust-lang/crates.io-index" 322 | checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" 323 | dependencies = [ 324 | "bitflags", 325 | ] 326 | 327 | [[package]] 328 | name = "regex-automata" 329 | version = "0.4.3" 330 | source = "registry+https://github.com/rust-lang/crates.io-index" 331 | checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" 332 | 333 | [[package]] 334 | name = "scopeguard" 335 | version = "1.2.0" 336 | source = "registry+https://github.com/rust-lang/crates.io-index" 337 | checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" 338 | 339 | [[package]] 340 | name = "serde" 341 | version = "1.0.195" 342 | source = "registry+https://github.com/rust-lang/crates.io-index" 343 | checksum = "63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02" 344 | dependencies = [ 345 | "serde_derive", 346 | ] 347 | 348 | [[package]] 349 | name = "serde_derive" 350 | version = "1.0.195" 351 | source = "registry+https://github.com/rust-lang/crates.io-index" 352 | checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c" 353 | dependencies = [ 354 | "proc-macro2", 355 | "quote", 356 | "syn", 357 | ] 358 | 359 | [[package]] 360 | name = "shlex" 361 | version = "1.3.0" 362 | 363 | [[package]] 364 | name = "shlex-fuzz" 365 | version = "0.0.0" 366 | dependencies = [ 367 | "libfuzzer-sys", 368 | "shlex", 369 | ] 370 | 371 | [[package]] 372 | name = "smallvec" 373 | version = "1.12.0" 374 | source = "registry+https://github.com/rust-lang/crates.io-index" 375 | checksum = "2593d31f82ead8df961d8bd23a64c2ccf2eb5dd34b0a34bfb4dd54011c72009e" 376 | 377 | [[package]] 378 | name = "syn" 379 | version = "2.0.48" 380 | source = "registry+https://github.com/rust-lang/crates.io-index" 381 | checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" 382 | dependencies = [ 383 | "proc-macro2", 384 | "quote", 385 | "unicode-ident", 386 | ] 387 | 388 | [[package]] 389 | name = "target-lexicon" 390 | version = "0.12.13" 391 | source = "registry+https://github.com/rust-lang/crates.io-index" 392 | checksum = "69758bda2e78f098e4ccb393021a0963bb3442eac05f135c30f61b7370bbafae" 393 | 394 | [[package]] 395 | name = "unicode-ident" 396 | version = "1.0.12" 397 | source = "registry+https://github.com/rust-lang/crates.io-index" 398 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" 399 | 400 | [[package]] 401 | name = "unindent" 402 | version = "0.2.3" 403 | source = "registry+https://github.com/rust-lang/crates.io-index" 404 | checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" 405 | 406 | [[package]] 407 | name = "wasi" 408 | version = "0.11.0+wasi-snapshot-preview1" 409 | source = "registry+https://github.com/rust-lang/crates.io-index" 410 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 411 | 412 | [[package]] 413 | name = "windows-sys" 414 | version = "0.48.0" 415 | source = "registry+https://github.com/rust-lang/crates.io-index" 416 | checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" 417 | dependencies = [ 418 | "windows-targets", 419 | ] 420 | 421 | [[package]] 422 | name = "windows-targets" 423 | version = "0.48.5" 424 | source = "registry+https://github.com/rust-lang/crates.io-index" 425 | checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" 426 | dependencies = [ 427 | "windows_aarch64_gnullvm", 428 | "windows_aarch64_msvc", 429 | "windows_i686_gnu", 430 | "windows_i686_msvc", 431 | "windows_x86_64_gnu", 432 | "windows_x86_64_gnullvm", 433 | "windows_x86_64_msvc", 434 | ] 435 | 436 | [[package]] 437 | name = "windows_aarch64_gnullvm" 438 | version = "0.48.5" 439 | source = "registry+https://github.com/rust-lang/crates.io-index" 440 | checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" 441 | 442 | [[package]] 443 | name = "windows_aarch64_msvc" 444 | version = "0.48.5" 445 | source = "registry+https://github.com/rust-lang/crates.io-index" 446 | checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" 447 | 448 | [[package]] 449 | name = "windows_i686_gnu" 450 | version = "0.48.5" 451 | source = "registry+https://github.com/rust-lang/crates.io-index" 452 | checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" 453 | 454 | [[package]] 455 | name = "windows_i686_msvc" 456 | version = "0.48.5" 457 | source = "registry+https://github.com/rust-lang/crates.io-index" 458 | checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" 459 | 460 | [[package]] 461 | name = "windows_x86_64_gnu" 462 | version = "0.48.5" 463 | source = "registry+https://github.com/rust-lang/crates.io-index" 464 | checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" 465 | 466 | [[package]] 467 | name = "windows_x86_64_gnullvm" 468 | version = "0.48.5" 469 | source = "registry+https://github.com/rust-lang/crates.io-index" 470 | checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" 471 | 472 | [[package]] 473 | name = "windows_x86_64_msvc" 474 | version = "0.48.5" 475 | source = "registry+https://github.com/rust-lang/crates.io-index" 476 | checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" 477 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Nicholas Allegra (comex). 2 | // Licensed under the Apache License, Version 2.0 or 3 | // the MIT license , at your option. This file may not be 4 | // copied, modified, or distributed except according to those terms. 5 | 6 | //! Parse strings like, and escape strings for, POSIX shells. 7 | //! 8 | //! Same idea as (but implementation not directly based on) the Python shlex module. 9 | //! 10 | //! Disabling the `std` feature (which is enabled by default) will allow the crate to work in 11 | //! `no_std` environments, where the `alloc` crate, and a global allocator, are available. 12 | //! 13 | //! ## Warning 14 | //! 15 | //! The [`try_quote`]/[`try_join`] family of APIs does not quote control characters (because they 16 | //! cannot be quoted portably). 17 | //! 18 | //! This is fully safe in noninteractive contexts, like shell scripts and `sh -c` arguments (or 19 | //! even scripts `source`d from interactive shells). 20 | //! 21 | //! But if you are quoting for human consumption, you should keep in mind that ugly inputs produce 22 | //! ugly outputs (which may not be copy-pastable). 23 | //! 24 | //! And if by chance you are piping the output of [`try_quote`]/[`try_join`] directly to the stdin 25 | //! of an interactive shell, you should stop, because control characters can lead to arbitrary 26 | //! command injection. 27 | //! 28 | //! For more information, and for information about more minor issues, please see [quoting_warning]. 29 | //! 30 | //! ## Compatibility 31 | //! 32 | //! This crate's quoting functionality tries to be compatible with **any POSIX-compatible shell**; 33 | //! it's tested against `bash`, `zsh`, `dash`, Busybox `ash`, and `mksh`, plus `fish` (which is not 34 | //! POSIX-compatible but close enough). 35 | //! 36 | //! It also aims to be compatible with Python `shlex` and C `wordexp`. 37 | 38 | #![cfg_attr(not(feature = "std"), no_std)] 39 | 40 | extern crate alloc; 41 | use alloc::vec::Vec; 42 | use alloc::borrow::Cow; 43 | use alloc::string::String; 44 | #[cfg(test)] 45 | use alloc::vec; 46 | #[cfg(test)] 47 | use alloc::borrow::ToOwned; 48 | 49 | pub mod bytes; 50 | #[cfg(all(doc, not(doctest)))] 51 | #[path = "quoting_warning.md"] 52 | pub mod quoting_warning; 53 | 54 | /// An iterator that takes an input string and splits it into the words using the same syntax as 55 | /// the POSIX shell. 56 | /// 57 | /// See [`bytes::Shlex`]. 58 | pub struct Shlex<'a>(bytes::Shlex<'a>); 59 | 60 | impl<'a> Shlex<'a> { 61 | pub fn new(in_str: &'a str) -> Self { 62 | Self(bytes::Shlex::new(in_str.as_bytes())) 63 | } 64 | } 65 | 66 | impl<'a> Iterator for Shlex<'a> { 67 | type Item = String; 68 | fn next(&mut self) -> Option { 69 | self.0.next().map(|byte_word| { 70 | // Safety: given valid UTF-8, bytes::Shlex will always return valid UTF-8. 71 | unsafe { String::from_utf8_unchecked(byte_word) } 72 | }) 73 | } 74 | } 75 | 76 | impl<'a> core::ops::Deref for Shlex<'a> { 77 | type Target = bytes::Shlex<'a>; 78 | 79 | fn deref(&self) -> &Self::Target { 80 | &self.0 81 | } 82 | } 83 | 84 | impl<'a> core::ops::DerefMut for Shlex<'a> { 85 | fn deref_mut(&mut self) -> &mut Self::Target { 86 | &mut self.0 87 | } 88 | } 89 | 90 | /// Convenience function that consumes the whole string at once. Returns None if the input was 91 | /// erroneous. 92 | pub fn split(in_str: &str) -> Option> { 93 | let mut shl = Shlex::new(in_str); 94 | let res = shl.by_ref().collect(); 95 | if shl.had_error { None } else { Some(res) } 96 | } 97 | 98 | /// Errors from [`Quoter::quote`], [`Quoter::join`], etc. (and their [`bytes`] counterparts). 99 | /// 100 | /// By default, the only error that can be returned is [`QuoteError::Nul`]. If you call 101 | /// `allow_nul(true)`, then no errors can be returned at all. Any error variants added in the 102 | /// future will not be enabled by default; they will be enabled through corresponding non-default 103 | /// [`Quoter`] options. 104 | /// 105 | /// ...In theory. In the unlikely event that additional classes of inputs are discovered that, 106 | /// like nul bytes, are fundamentally unsafe to quote even for non-interactive shells, the risk 107 | /// will be mitigated by adding corresponding [`QuoteError`] variants that *are* enabled by 108 | /// default. 109 | #[non_exhaustive] 110 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 111 | pub enum QuoteError { 112 | /// The input contained a nul byte. In most cases, shells fundamentally [cannot handle strings 113 | /// containing nul bytes](quoting_warning#nul-bytes), no matter how they are quoted. But if 114 | /// you're sure you can handle nul bytes, you can call `allow_nul(true)` on the `Quoter` to let 115 | /// them pass through. 116 | Nul, 117 | } 118 | 119 | impl core::fmt::Display for QuoteError { 120 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 121 | match self { 122 | QuoteError::Nul => f.write_str("cannot shell-quote string containing nul byte"), 123 | } 124 | } 125 | } 126 | 127 | #[cfg(feature = "std")] 128 | impl std::error::Error for QuoteError {} 129 | 130 | /// A more configurable interface to quote strings. If you only want the default settings you can 131 | /// use the convenience functions [`try_quote`] and [`try_join`]. 132 | /// 133 | /// The bytes equivalent is [`bytes::Quoter`]. 134 | #[derive(Default, Debug, Clone)] 135 | pub struct Quoter { 136 | inner: bytes::Quoter, 137 | } 138 | 139 | impl Quoter { 140 | /// Create a new [`Quoter`] with default settings. 141 | #[inline] 142 | pub fn new() -> Self { 143 | Self::default() 144 | } 145 | 146 | /// Set whether to allow [nul bytes](quoting_warning#nul-bytes). By default they are not 147 | /// allowed and will result in an error of [`QuoteError::Nul`]. 148 | #[inline] 149 | pub fn allow_nul(mut self, allow: bool) -> Self { 150 | self.inner = self.inner.allow_nul(allow); 151 | self 152 | } 153 | 154 | /// Convenience function that consumes an iterable of words and turns it into a single string, 155 | /// quoting words when necessary. Consecutive words will be separated by a single space. 156 | pub fn join<'a, I: IntoIterator>(&self, words: I) -> Result { 157 | // Safety: given valid UTF-8, bytes::join() will always return valid UTF-8. 158 | self.inner.join(words.into_iter().map(|s| s.as_bytes())) 159 | .map(|bytes| unsafe { String::from_utf8_unchecked(bytes) }) 160 | } 161 | 162 | /// Given a single word, return a string suitable to encode it as a shell argument. 163 | pub fn quote<'a>(&self, in_str: &'a str) -> Result, QuoteError> { 164 | Ok(match self.inner.quote(in_str.as_bytes())? { 165 | Cow::Borrowed(out) => { 166 | // Safety: given valid UTF-8, bytes::quote() will always return valid UTF-8. 167 | unsafe { core::str::from_utf8_unchecked(out) }.into() 168 | } 169 | Cow::Owned(out) => { 170 | // Safety: given valid UTF-8, bytes::quote() will always return valid UTF-8. 171 | unsafe { String::from_utf8_unchecked(out) }.into() 172 | } 173 | }) 174 | } 175 | } 176 | 177 | impl From for Quoter { 178 | fn from(inner: bytes::Quoter) -> Quoter { 179 | Quoter { inner } 180 | } 181 | } 182 | 183 | impl From for bytes::Quoter { 184 | fn from(quoter: Quoter) -> bytes::Quoter { 185 | quoter.inner 186 | } 187 | } 188 | 189 | /// Convenience function that consumes an iterable of words and turns it into a single string, 190 | /// quoting words when necessary. Consecutive words will be separated by a single space. 191 | /// 192 | /// Uses default settings except that nul bytes are passed through, which [may be 193 | /// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated. 194 | /// 195 | /// Equivalent to [`Quoter::new().allow_nul(true).join(words).unwrap()`](Quoter). 196 | /// 197 | /// (That configuration never returns `Err`, so this function does not panic.) 198 | /// 199 | /// The bytes equivalent is [bytes::join]. 200 | #[deprecated(since = "1.3.0", note = "replace with `try_join(words)?` to avoid nul byte danger")] 201 | pub fn join<'a, I: IntoIterator>(words: I) -> String { 202 | Quoter::new().allow_nul(true).join(words).unwrap() 203 | } 204 | 205 | /// Convenience function that consumes an iterable of words and turns it into a single string, 206 | /// quoting words when necessary. Consecutive words will be separated by a single space. 207 | /// 208 | /// Uses default settings. The only error that can be returned is [`QuoteError::Nul`]. 209 | /// 210 | /// Equivalent to [`Quoter::new().join(words)`](Quoter). 211 | /// 212 | /// The bytes equivalent is [bytes::try_join]. 213 | pub fn try_join<'a, I: IntoIterator>(words: I) -> Result { 214 | Quoter::new().join(words) 215 | } 216 | 217 | /// Given a single word, return a string suitable to encode it as a shell argument. 218 | /// 219 | /// Uses default settings except that nul bytes are passed through, which [may be 220 | /// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated. 221 | /// 222 | /// Equivalent to [`Quoter::new().allow_nul(true).quote(in_str).unwrap()`](Quoter). 223 | /// 224 | /// (That configuration never returns `Err`, so this function does not panic.) 225 | /// 226 | /// The bytes equivalent is [bytes::quote]. 227 | #[deprecated(since = "1.3.0", note = "replace with `try_quote(str)?` to avoid nul byte danger")] 228 | pub fn quote(in_str: &str) -> Cow { 229 | Quoter::new().allow_nul(true).quote(in_str).unwrap() 230 | } 231 | 232 | /// Given a single word, return a string suitable to encode it as a shell argument. 233 | /// 234 | /// Uses default settings. The only error that can be returned is [`QuoteError::Nul`]. 235 | /// 236 | /// Equivalent to [`Quoter::new().quote(in_str)`](Quoter). 237 | /// 238 | /// (That configuration never returns `Err`, so this function does not panic.) 239 | /// 240 | /// The bytes equivalent is [bytes::try_quote]. 241 | pub fn try_quote(in_str: &str) -> Result, QuoteError> { 242 | Quoter::new().quote(in_str) 243 | } 244 | 245 | #[cfg(test)] 246 | static SPLIT_TEST_ITEMS: &'static [(&'static str, Option<&'static [&'static str]>)] = &[ 247 | ("foo$baz", Some(&["foo$baz"])), 248 | ("foo baz", Some(&["foo", "baz"])), 249 | ("foo\"bar\"baz", Some(&["foobarbaz"])), 250 | ("foo \"bar\"baz", Some(&["foo", "barbaz"])), 251 | (" foo \nbar", Some(&["foo", "bar"])), 252 | ("foo\\\nbar", Some(&["foobar"])), 253 | ("\"foo\\\nbar\"", Some(&["foobar"])), 254 | ("'baz\\$b'", Some(&["baz\\$b"])), 255 | ("'baz\\\''", None), 256 | ("\\", None), 257 | ("\"\\", None), 258 | ("'\\", None), 259 | ("\"", None), 260 | ("'", None), 261 | ("foo #bar\nbaz", Some(&["foo", "baz"])), 262 | ("foo #bar", Some(&["foo"])), 263 | ("foo#bar", Some(&["foo#bar"])), 264 | ("foo\"#bar", None), 265 | ("'\\n'", Some(&["\\n"])), 266 | ("'\\\\n'", Some(&["\\\\n"])), 267 | ]; 268 | 269 | #[test] 270 | fn test_split() { 271 | for &(input, output) in SPLIT_TEST_ITEMS { 272 | assert_eq!(split(input), output.map(|o| o.iter().map(|&x| x.to_owned()).collect())); 273 | } 274 | } 275 | 276 | #[test] 277 | fn test_lineno() { 278 | let mut sh = Shlex::new("\nfoo\nbar"); 279 | while let Some(word) = sh.next() { 280 | if word == "bar" { 281 | assert_eq!(sh.line_no, 3); 282 | } 283 | } 284 | } 285 | 286 | #[test] 287 | #[cfg_attr(not(feature = "std"), allow(unreachable_code, unused_mut))] 288 | fn test_quote() { 289 | // This is a list of (unquoted, quoted) pairs. 290 | // But it's using a single long (raw) string literal with an ad-hoc format, just because it's 291 | // hard to read if we have to put the test strings through Rust escaping on top of the escaping 292 | // being tested. (Even raw string literals are noisy for short strings). 293 | // Ad-hoc: "NL" is replaced with a literal newline; no other escape sequences. 294 | let tests = r#" 295 | <> => <''> 296 | => 297 | => <'foo bar'> 298 | <"foo bar'"> => <"\"foo bar'\""> 299 | <'foo bar'> => <"'foo bar'"> 300 | <"> => <'"'> 301 | <"'> => <"\"'"> 302 | => <'hello!world'> 303 | <'hello!world> => <"'hello"'!world'> 304 | <'hello!> => <"'hello"'!'> 305 | => <'hello ''^ world'> 306 | => 307 | => <'!world'"'"> 308 | <{a, b}> => <'{a, b}'> 309 | => <'NL'> 310 | <^> => <'^'> 311 | => 312 | => <'NLx''^'> 313 | => <'NL''^x'> 314 | => <'NL ''^x'> 315 | <{a,b}> => <'{a,b}'> 316 | => <'a,b'> 317 | 318 | <'$> => <"'"'$'> 319 | <"^> => <'"''^'> 320 | "#; 321 | let mut ok = true; 322 | for test in tests.trim().split('\n') { 323 | let parts: Vec = test 324 | .replace("NL", "\n") 325 | .split("=>") 326 | .map(|part| part.trim().trim_start_matches('<').trim_end_matches('>').to_owned()) 327 | .collect(); 328 | assert!(parts.len() == 2); 329 | let unquoted = &*parts[0]; 330 | let quoted_expected = &*parts[1]; 331 | let quoted_actual = try_quote(&parts[0]).unwrap(); 332 | if quoted_expected != quoted_actual { 333 | #[cfg(not(feature = "std"))] 334 | panic!("FAIL: for input <{}>, expected <{}>, got <{}>", 335 | unquoted, quoted_expected, quoted_actual); 336 | #[cfg(feature = "std")] 337 | println!("FAIL: for input <{}>, expected <{}>, got <{}>", 338 | unquoted, quoted_expected, quoted_actual); 339 | ok = false; 340 | } 341 | } 342 | assert!(ok); 343 | } 344 | 345 | #[test] 346 | #[allow(deprecated)] 347 | fn test_join() { 348 | assert_eq!(join(vec![]), ""); 349 | assert_eq!(join(vec![""]), "''"); 350 | assert_eq!(join(vec!["a", "b"]), "a b"); 351 | assert_eq!(join(vec!["foo bar", "baz"]), "'foo bar' baz"); 352 | } 353 | 354 | #[test] 355 | fn test_fallible() { 356 | assert_eq!(try_join(vec!["\0"]), Err(QuoteError::Nul)); 357 | assert_eq!(try_quote("\0"), Err(QuoteError::Nul)); 358 | } 359 | -------------------------------------------------------------------------------- /src/quoting_warning.md: -------------------------------------------------------------------------------- 1 | // vim: textwidth=99 2 | /* 3 | Meta note: This file is loaded as a .rs file by rustdoc only. 4 | */ 5 | /*! 6 | 7 | A more detailed version of the [warning at the top level](super#warning) about the `quote`/`join` 8 | family of APIs. 9 | 10 | In general, passing the output of these APIs to a shell should recover the original string(s). 11 | This page lists cases where it fails to do so. 12 | 13 | In noninteractive contexts, there are only minor issues. 'Noninteractive' includes shell scripts 14 | and `sh -c` arguments, or even scripts `source`d from interactive shells. The issues are: 15 | 16 | - [Nul bytes](#nul-bytes) 17 | 18 | - [Overlong commands](#overlong-commands) 19 | 20 | If you are writing directly to the stdin of an interactive (`-i`) shell (i.e., if you are 21 | pretending to be a terminal), or if you are writing to a cooked-mode pty (even if the other end is 22 | noninteractive), then there is a **severe** security issue: 23 | 24 | - [Control characters](#control-characters-interactive-contexts-only) 25 | 26 | Finally, there are some [solved issues](#solved-issues). 27 | 28 | # List of issues 29 | 30 | ## Nul bytes 31 | 32 | For non-interactive shells, the most problematic input is nul bytes (bytes with value 0). The 33 | non-deprecated functions all default to returning [`QuoteError::Nul`] when encountering them, but 34 | the deprecated [`quote`] and [`join`] functions leave them as-is. 35 | 36 | In Unix, nul bytes can't appear in command arguments, environment variables, or filenames. It's 37 | not a question of proper quoting; they just can't be used at all. This is a consequence of Unix's 38 | system calls all being designed around nul-terminated C strings. 39 | 40 | Shells inherit that limitation. Most of them do not accept nul bytes in strings even internally. 41 | Even when they do, it's pretty much useless or even dangerous, since you can't pass them to 42 | external commands. 43 | 44 | In some cases, you might fail to pass the nul byte to the shell in the first place. For example, 45 | the following code uses [`join`] to tunnel a command over an SSH connection: 46 | 47 | ```rust 48 | std::process::Command::new("ssh") 49 | .arg("myhost") 50 | .arg("--") 51 | .arg(join(my_cmd_args)) 52 | ``` 53 | 54 | If any argument in `my_cmd_args` contains a nul byte, then `join(my_cmd_args)` will contain a nul 55 | byte. But `join(my_cmd_args)` is itself being passed as an argument to a command (the ssh 56 | command), and command arguments can't contain nul bytes! So this will simply result in the 57 | `Command` failing to launch. 58 | 59 | Still, there are other ways to smuggle nul bytes into a shell. How the shell reacts depends on the 60 | shell and the method of smuggling. For example, here is Bash 5.2.21 exhibiting three different 61 | behaviors: 62 | 63 | - With ANSI-C quoting, the string is truncated at the first nul byte: 64 | ```bash 65 | $ echo $'foo\0bar' | hexdump -C 66 | 00000000 66 6f 6f 0a |foo.| 67 | ``` 68 | 69 | - With command substitution, nul bytes are removed with a warning: 70 | ```bash 71 | $ echo $(printf 'foo\0bar') | hexdump -C 72 | bash: warning: command substitution: ignored null byte in input 73 | 00000000 66 6f 6f 62 61 72 0a |foobar.| 74 | ``` 75 | 76 | - When a nul byte appears directly in a shell script, it's removed with no warning: 77 | ```bash 78 | $ printf 'echo "foo\0bar"' | bash | hexdump -C 79 | 00000000 66 6f 6f 62 61 72 0a |foobar.| 80 | ``` 81 | 82 | Zsh, in contrast, actually allows nul bytes internally, in shell variables and even arguments to 83 | builtin commands. But if a variable is exported to the environment, or if an argument is used for 84 | an external command, then the child process will see it silently truncated at the first nul. This 85 | might actually be more dangerous, depending on the use case. 86 | 87 | ## Overlong commands 88 | 89 | If you pass a long string into a shell, several things might happen: 90 | 91 | - It might succeed, yet the shell might have trouble actually doing anything with it. For example: 92 | 93 | ```bash 94 | x=$(printf '%010000000d' 0); /bin/echo $x 95 | bash: /bin/echo: Argument list too long 96 | ``` 97 | 98 | - If you're using certain shells (e.g. Busybox Ash) *and* using a pty for communication, then the 99 | shell will impose a line length limit, ignoring all input past the limit. 100 | 101 | - If you're using a pty in cooked mode, then by default, if you write so many bytes as input that 102 | it fills the kernel's internal buffer, the kernel will simply drop those bytes, instead of 103 | blocking waiting for the shell to empty out the buffer. In other words, random bits of input can 104 | be lost, which is obviously insecure. 105 | 106 | Future versions of this crate may add an option to [`Quoter`] to check the length for you. 107 | 108 | ## Control characters (*interactive contexts only*) 109 | 110 | Control characters are the bytes from `\x00` to `\x1f`, plus `\x7f`. `\x00` (the nul byte) is 111 | discussed [above](#nul-bytes), but what about the rest? Well, many of them correspond to terminal 112 | keyboard shortcuts. For example, when you press Ctrl-A at a shell prompt, your terminal sends the 113 | byte `\x01`. The shell sees that byte and (if not configured differently) takes the standard 114 | action for Ctrl-A, which is to move the cursor to the beginning of the line. 115 | 116 | This means that it's quite dangerous to pipe bytes to an interactive shell. For example, here is a 117 | program that tries to tell Bash to echo an arbitrary string, 'safely': 118 | ```rust 119 | use std::process::{Command, Stdio}; 120 | use std::io::Write; 121 | 122 | let evil_string = "\x01do_something_evil; "; 123 | let quoted = shlex::try_quote(evil_string).unwrap(); 124 | println!("quoted string is {:?}", quoted); 125 | 126 | let mut bash = Command::new("bash") 127 | .arg("-i") // force interactive mode 128 | .stdin(Stdio::piped()) 129 | .spawn() 130 | .unwrap(); 131 | let stdin = bash.stdin.as_mut().unwrap(); 132 | write!(stdin, "echo {}\n", quoted).unwrap(); 133 | ``` 134 | 135 | Here's the output of the program (with irrelevant bits removed): 136 | 137 | ```text 138 | quoted string is "'\u{1}do_something_evil; '" 139 | /tmp comex$ do_something_evil; 'echo ' 140 | bash: do_something_evil: command not found 141 | bash: echo : command not found 142 | ``` 143 | 144 | Even though we quoted it, Bash still ran an arbitrary command! 145 | 146 | This is not because the quoting was insufficient, per se. In single quotes, all input is supposed 147 | to be treated as raw data until the closing single quote. And in fact, this would work fine 148 | without the `"-i"` argument. 149 | 150 | But line input is a separate stage from shell syntax parsing. After all, if you type a single 151 | quote on the keyboard, you wouldn't expect it to disable all your keyboard shortcuts. So a control 152 | character always has its designated effect, no matter if it's quoted or backslash-escaped. 153 | 154 | Also, some control characters are interpreted by the kernel tty layer instead, like CTRL-C to send 155 | SIGINT. These can be an issue even with noninteractive shells, but only if using a pty for 156 | communication, as opposed to a pipe. 157 | 158 | To be safe, you just have to avoid sending them. 159 | 160 | ### Why not just use hex escapes? 161 | 162 | In any normal programming languages, this would be no big deal. 163 | 164 | Any normal language has a way to escape arbitrary characters in strings by writing out their 165 | numeric values. For example, Rust lets you write them in hexadecimal, like `"\x4f"` (or 166 | `"\u{1d546}"` for Unicode). In this way, arbitrary strings can be represented using only 'nice' 167 | simple characters. Any remotely suspicious character can be replaced with a numeric escape 168 | sequence, where the escape sequence itself consists only of alphanumeric characters and some 169 | punctuation. The result may not be the most readable[^choices], but it's quite safe from being 170 | misinterpreted or corrupted in transit. 171 | 172 | Shell is not normal. It has no numeric escape sequences. 173 | 174 | There are a few different ways to quote characters (unquoted, unquoted-with-backslash, single 175 | quotes, double quotes), but all of them involve writing the character itself. If the input 176 | contains a control character, the output must contain that same character. 177 | 178 | ### Mitigation: terminal filters 179 | 180 | In practice, automating interactive shells like in the above example is pretty uncommon these days. 181 | In most cases, the only way for a programmatically generated string to make its way to the input of 182 | an interactive shell is if a human copies and pastes it into their terminal. 183 | 184 | And many terminals detect when you paste a string containing control characters. iTerm2 strips 185 | them out; gnome-terminal replaces them with alternate characters[^gr]; Kitty outright prompts for 186 | confirmation. This mitigates the risk. 187 | 188 | But it's not perfect. Some other terminals don't implement this check or implement it incorrectly. 189 | Also, these checks tend to not filter the tab character, which could trigger tab completion. In 190 | most cases that's a non-issue, because most shells support paste bracketing, which disables tab and 191 | some other control characters[^bracketing] within pasted text. But in some cases paste bracketing 192 | gets disabled. 193 | 194 | ### Future possibility: ANSI-C quoting 195 | 196 | I said that shell syntax has no numeric escapes, but that only applies to *portable* shell syntax. 197 | Bash and Zsh support an obscure alternate quoting style with the syntax `$'foo'`. It's called 198 | ["ANSI-C quoting"][ansic], and inside it you can use all the escape sequences supported by C, 199 | including hex escapes: 200 | 201 | ```bash 202 | $ echo $'\x41\n\x42' 203 | A 204 | B 205 | ``` 206 | 207 | But other shells don't support it — including Dash, a popular choice for `/bin/sh`, and Busybox's 208 | Ash, frequently seen on stripped-down embedded systems. This crate's quoting functionality [tries 209 | to be compatible](crate#compatibility) with those shells, plus all other POSIX-compatible shells. 210 | That makes ANSI-C quoting a no-go. 211 | 212 | Still, future versions of this crate may provide an option to enable ANSI-C quoting, at the cost of 213 | reduced portability. 214 | 215 | ### Future possibility: printf 216 | 217 | Another option would be to invoke the `printf` command, which is required by POSIX to support octal 218 | escapes. For example, you could 'escape' the Rust string `"\x01"` into the shell syntax `"$(printf 219 | '\001')"`. The shell will execute the command `printf` with the first argument being literally a 220 | backslash followed by three digits; `printf` will output the actual byte with value 1; and the 221 | shell will substitute that back into the original command. 222 | 223 | The problem is that 'escaping' a string into a command substitution just feels too surprising. If 224 | nothing else, it only works with an actual shell; [other languages' shell parsing 225 | routines](crate#compatibility) wouldn't understand it. Neither would this crate's own parser, 226 | though that could be fixed. 227 | 228 | Future versions of this crate may provide an option to use `printf` for quoting. 229 | 230 | ### Special note: newlines 231 | 232 | Did you know that `\r` and `\n` are control characters? They aren't as dangerous as other control 233 | characters (if quoted properly). But there's still an issue with them in interactive contexts. 234 | 235 | Namely, in some cases, interactive shells and/or the tty layer will 'helpfully' translate between 236 | different line ending conventions. The possibilities include replacing `\r` with `\n`, replacing 237 | `\n` with `\r\n`, and others. This can't result in command injection, but it's still a lossy 238 | transformation which can result in a failure to round-trip (i.e. the shell sees a different string 239 | from what was originally passed to `quote`). 240 | 241 | Numeric escapes would solve this as well. 242 | 243 | # Solved issues 244 | 245 | ## Solved: Past vulnerability (GHSA-r7qv-8r2h-pg27 / RUSTSEC-2024-XXX) 246 | 247 | Versions of this crate before 1.3.0 did not quote `{`, `}`, and `\xa0`. 248 | 249 | See: 250 | - 251 | - (TODO: Add Rustsec link) 252 | 253 | ## Solved: `!` and `^` 254 | 255 | There are two non-control characters which have a special meaning in interactive contexts only: `!` and 256 | `^`. Luckily, these can be escaped adequately. 257 | 258 | The `!` character triggers [history expansion][he]; the `^` character can trigger a variant of 259 | history expansion known as [Quick Substitution][qs]. Both of these characters get expanded even 260 | inside of double-quoted strings\! 261 | 262 | If we're in a double-quoted string, then we can't just escape these characters with a backslash. 263 | Only a specific set of characters can be backslash-escaped inside double quotes; the set of 264 | supported characters depends on the shell, but it often doesn't include `!` and `^`.[^escbs] 265 | Trying to backslash-escape an unsupported character produces a literal backslash: 266 | ```bash 267 | $ echo "\!" 268 | \! 269 | ``` 270 | 271 | However, these characters don't get expanded in single-quoted strings, so this crate just 272 | single-quotes them. 273 | 274 | But there's a Bash bug where `^` actually does get partially expanded in single-quoted strings: 275 | ```bash 276 | $ echo ' 277 | > ^a^b 278 | > ' 279 | 280 | !!:s^a^b 281 | ``` 282 | 283 | To work around that, this crate forces `^` to appear right after an opening single quote. For 284 | example, the string `"^` is quoted into `'"''^'` instead of `'"^'`. This restriction is overkill, 285 | since `^` is only meaningful right after a newline, but it's a sufficient restriction (after all, a 286 | `^` character can't be preceded by a newline if it's forced to be preceded by a single quote), and 287 | for now it simplifies things. 288 | 289 | ## Solved: `\xa0` 290 | 291 | The byte `\xa0` may be treated as a shell word separator, specifically on Bash on macOS when using 292 | the default UTF-8 locale, only when the input is invalid UTF-8. This crate handles the issue by 293 | always using quotes for arguments containing this byte. 294 | 295 | In fact, this crate always uses quotes for arguments containing any non-ASCII bytes. This may be 296 | changed in the future, since it's a bit unfriendly to non-English users. But for now it 297 | minimizes risk, especially considering the large number of different legacy single-byte locales 298 | someone might hypothetically be running their shell in. 299 | 300 | ### Demonstration 301 | 302 | ```bash 303 | $ echo -e 'ls a\xa0b' | bash 304 | ls: a: No such file or directory 305 | ls: b: No such file or directory 306 | ``` 307 | The normal behavior would be to output a single line, e.g.: 308 | ```bash 309 | $ echo -e 'ls a\xa0b' | bash 310 | ls: cannot access 'a'$'\240''b': No such file or directory 311 | ``` 312 | (The specific quoting in the error doesn't matter.) 313 | 314 | ### Cause 315 | 316 | Just for fun, here's why this behavior occurs: 317 | 318 | Bash decides which bytes serve as word separators based on the libc function [`isblank`][isblank]. 319 | On macOS on UTF-8 locales, this passes for `\xa0`, corresponding to U+00A0 NO-BREAK SPACE. 320 | 321 | This is doubly unique compared to the other systems I tested (Linux/glibc, Linux/musl, and 322 | Windows/MSVC). First, the other systems don't allow bytes in the range [0x80, 0xFF] to pass 323 | isfoo functions in UTF-8 locales, even if the corresponding Unicode codepoint 324 | does pass, as determined by the wide-character equivalent function, iswfoo. 325 | Second, the other systems don't treat U+00A0 as blank (even using `iswblank`). 326 | 327 | Meanwhile, Bash checks for multi-byte sequences and forbids them from being treated as special 328 | characters, so the proper UTF-8 encoding of U+00A0, `b"\xc2\xa0"`, is not treated as a word 329 | separator. Treatment as a word separator only happens for `b"\xa0"` alone, which is illegal UTF-8. 330 | 331 | [ansic]: https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html 332 | [he]: https://www.gnu.org/software/bash/manual/html_node/History-Interaction.html 333 | [qs]: https://www.gnu.org/software/bash/manual/html_node/Event-Designators.html 334 | [isblank]: https://man7.org/linux/man-pages/man3/isblank.3p.html 335 | [nul]: #nul-bytes 336 | 337 | [^choices]: This can lead to tough choices over which 338 | characters to escape and which to leave as-is, especially when Unicode gets involved and you 339 | have to balance the risk of confusion with the benefit of properly supporting non-English 340 | languages. 341 |
342 |
343 | We don't have the luxury of those choices. 344 | 345 | [^gr]: For example, backspace (in Unicode lingo, U+0008 BACKSPACE) turns into U+2408 SYMBOL FOR BACKSPACE. 346 | 347 | [^bracketing]: It typically disables almost all handling of control characters by the shell proper, 348 | but one necessary exception is the end-of-paste sequence itself (which starts with the control 349 | character `\x1b`). In addition, paste bracketing does not suppress handling of control 350 | characters by the kernel tty layer, such as `\x03` sending SIGINT (which typically clears the 351 | currently typed command, making it dangerous in a similar way to `\x01`). 352 | 353 | [^escbs]: For example, Dash doesn't remove the backslash from `"\!"` because it simply doesn't know 354 | anything about `!` as a special character: it doesn't support history expansion. On the other 355 | end of the spectrum, Zsh supports history expansion and does remove the backslash — though only 356 | in interactive mode. Bash's behavior is weirder. It supports history expansion, and if you 357 | write `"\!"`, the backslash does prevent history expansion from occurring — but it doesn't get 358 | removed! 359 | 360 | */ 361 | 362 | // `use` declarations to make auto links work: 363 | use ::{quote, join, Shlex, Quoter, QuoteError}; 364 | 365 | // TODO: add more about copy-paste and human readability. 366 | -------------------------------------------------------------------------------- /fuzz/fuzz_quote_real_shell/src/fuzz.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | #[macro_use] extern crate libfuzzer_sys; 3 | use std::sync::mpsc::{self, RecvTimeoutError}; 4 | use std::thread; 5 | use std::io::{Read, Write}; 6 | use std::cell::RefCell; 7 | use std::process::{Command, Stdio, ChildStdin}; 8 | use std::time::Duration; 9 | use std::sync::OnceLock; 10 | 11 | use rand::{distributions::Alphanumeric, Rng}; 12 | use bstr::ByteSlice; 13 | use nu_pretty_hex::pretty_hex; 14 | 15 | use shlex::bytes; 16 | 17 | #[derive(PartialEq, Debug)] 18 | enum CompatMode { 19 | Bash, 20 | Zsh, 21 | Dash, 22 | BusyboxAsh, 23 | Fish, 24 | Mksh, 25 | Other 26 | } 27 | 28 | fn env_var_or(var: &str, default: &str) -> String { 29 | match std::env::var(var) { 30 | Ok(s) => s, 31 | Err(std::env::VarError::NotPresent) => default.into(), 32 | Err(std::env::VarError::NotUnicode(_)) => panic!("unicode"), 33 | } 34 | } 35 | 36 | fn env_bool(var: &str, default: bool) -> bool { 37 | match &*env_var_or(var, "") { 38 | "" => default, 39 | "0" => false, 40 | "1" => true, 41 | _ => panic!("{} should be 0 or 1", var), 42 | } 43 | } 44 | 45 | fn env_u64(var: &str, default: u64) -> u64 { 46 | match &*env_var_or(var, "") { 47 | "" => default, 48 | x => x.parse().unwrap(), 49 | } 50 | } 51 | 52 | struct Config { 53 | fuzz_shell: String, 54 | debug: bool, 55 | use_docker: bool, 56 | use_pty: bool, 57 | cooked_pty: bool, // just for experimentation; this is expected to fail 58 | compat_mode: CompatMode, 59 | shell_is_interactive: bool, 60 | fuzz_timeout: u64, 61 | } 62 | 63 | static CONFIG: OnceLock = OnceLock::new(); 64 | impl Config { 65 | fn get() -> &'static Config { 66 | CONFIG.get_or_init(|| { 67 | let fuzz_shell = env_var_or("FUZZ_SHELL", "zsh --no-rcs"); 68 | let use_pty = env_bool("FUZZ_USE_PTY", true); 69 | let shell_is_interactive = env_bool("FUZZ_SHELL_IS_INTERACTIVE", { 70 | // default: guess -i/+i from the string (very crude) 71 | if fuzz_shell.contains(" -i") { 72 | true 73 | } else if fuzz_shell.contains(" +i") { 74 | false 75 | } else { 76 | use_pty 77 | } 78 | }); 79 | let compat_mode = match &*env_var_or("FUZZ_COMPAT_MODE", "") { 80 | "bash" => CompatMode::Bash, 81 | "zsh" => CompatMode::Zsh, 82 | "dash" => CompatMode::Dash, 83 | "busybox ash" => CompatMode::BusyboxAsh, 84 | "fish" => CompatMode::Fish, 85 | "mksh" => CompatMode::Mksh, 86 | "other" => CompatMode::Other, 87 | "" => { 88 | // default: guess the shell from the string (somewhat dumbly) 89 | if fuzz_shell.contains("bash") { 90 | CompatMode::Bash 91 | } else if fuzz_shell.contains("zsh") { 92 | CompatMode::Zsh 93 | } else if fuzz_shell.contains("dash") { 94 | CompatMode::Dash 95 | } else if fuzz_shell.contains("ash") { 96 | CompatMode::BusyboxAsh 97 | } else if fuzz_shell.contains("fish") { 98 | CompatMode::Fish 99 | } else if fuzz_shell.contains("mksh") { 100 | CompatMode::Mksh 101 | } else { 102 | CompatMode::Other 103 | } 104 | }, 105 | _ => panic!("invalid FUZZ_COMPAT_MODE") 106 | }; 107 | Config { 108 | debug: env_bool("FUZZ_DEBUG", false), 109 | use_docker: env_bool("FUZZ_USE_DOCKER", true), 110 | use_pty, 111 | cooked_pty: env_bool("FUZZ_COOKED_PTY", false), 112 | compat_mode, 113 | fuzz_shell, 114 | shell_is_interactive, 115 | fuzz_timeout: env_u64("FUZZ_TIMEOUT", 120), 116 | } 117 | }) 118 | } 119 | } 120 | 121 | 122 | struct Shell { 123 | stdout_receiver: mpsc::Receiver>, 124 | stdout_buf: Vec, 125 | stdin: ChildStdin, 126 | } 127 | impl Shell { 128 | fn new() -> Shell { 129 | let config = Config::get(); 130 | let mut real_shell = config.fuzz_shell.clone(); 131 | real_shell = format!("{} 2>&1", real_shell); 132 | #[cfg(target_os = "macos")] 133 | if !config.use_docker { 134 | // Provide some protection for native macOS execution. Not actually secure (it doesn't 135 | // block IPC) but should be good enough against _accidental_ bad commands. Probably. 136 | let sandbox_profile = r#""" 137 | (version 1) 138 | (allow default) 139 | (deny file-write*) 140 | (allow file-write-data (literal "/dev/null")) 141 | """#; 142 | real_shell = format!("sandbox-exec -p {} sh -c {}", 143 | shlex::try_quote(sandbox_profile).unwrap(), 144 | shlex::try_quote(&real_shell).unwrap()); 145 | } 146 | if config.use_pty { 147 | // Use python3 to set up a pty. Don't do it locally because then we're validating the 148 | // pty relay layer of Docker for Mac and I've had issues with it. 149 | real_shell = format!("exec python3 -c 'import sys, pty; exit(pty.spawn(sys.argv[1:]))' sh -c 'stty sane {} -echo; exec '{}", 150 | if config.cooked_pty { "cooked" } else { "raw" }, 151 | shlex::try_quote(&real_shell).unwrap()); 152 | //real_shell = format!(r#"CMD={} socat -b1 - 'EXEC:sh -c "\"eval \\\"$CMD\\\"\"",pty,sane,raw,echo=0,nonblock'"#, shlex::quote(&real_shell)); 153 | } 154 | if config.use_docker { 155 | // By default, run in a Docker container so that we don't cause random commands to be 156 | // run on the host (if quoting is buggy), or clutter up the shell history file for 157 | // interactive shells. 158 | real_shell = format!("docker run --rm --log-opt max-size=1m -i {} $(docker build -q - < {}/Dockerfile) sh -c {}", 159 | env_var_or("FUZZ_DOCKER_ARGS", ""), 160 | shlex::try_quote(env!("CARGO_MANIFEST_DIR")).unwrap(), 161 | shlex::try_quote(&real_shell).unwrap()); 162 | } 163 | if config.debug { 164 | println!("=> {}", real_shell); 165 | } 166 | let cmd = Command::new("/bin/sh") 167 | .arg("-c") 168 | .arg(real_shell) 169 | .stdin(Stdio::piped()) 170 | .stdout(Stdio::piped()) 171 | .spawn() 172 | .expect("failed to execute shell"); 173 | let mut stdout = cmd.stdout.unwrap(); 174 | let stdin = cmd.stdin.unwrap(); 175 | let (sender, receiver) = mpsc::channel(); 176 | 177 | // Read stdout on a separate thread to avoid deadlocking on pipe buffers. 178 | thread::spawn(move || { 179 | loop { 180 | let mut buf: Vec = Vec::new(); 181 | buf.resize(128, 0u8); 182 | let size = stdout.read(&mut buf).expect("failed to read stdout"); 183 | if size == 0 { 184 | break; 185 | } 186 | buf.truncate(size); 187 | if sender.send(buf).is_err() { break; } 188 | } 189 | }); 190 | 191 | let mut this = Shell { stdout_receiver: receiver, stdout_buf: Vec::new(), stdin }; 192 | 193 | this.wait_until_responsive(); 194 | this 195 | } 196 | 197 | // Keep reading until we find `delim`; return the output without `delim`. 198 | fn read_until_delim(&mut self, delim: &[u8], timeout: Duration) -> Result, RecvTimeoutError> { 199 | let mut pos = 0; 200 | loop { 201 | if Config::get().debug { 202 | println!("READ: {}", pretty_hex(&self.stdout_buf)); 203 | //println!(">> wanted: {}", pretty_hex(&delim)); 204 | //if self.stdout_buf.find(b"zsh: no such event").is_some() { panic!("xxx"); } 205 | } 206 | if let Some(delim_pos) = self.stdout_buf[pos..].find(delim) { 207 | let ret = self.stdout_buf[..pos + delim_pos].to_owned(); 208 | self.stdout_buf.drain(0..(pos + delim_pos + delim.len())); 209 | return Ok(ret); 210 | } 211 | pos = self.stdout_buf.len().saturating_sub(delim.len() - 1); 212 | let new_data = self.stdout_receiver.recv_timeout(timeout)?; 213 | self.stdout_buf.extend_from_slice(&new_data); 214 | } 215 | } 216 | 217 | // Write something. 218 | fn write(&mut self, text: &[u8]) { 219 | if Config::get().debug { 220 | println!("WROTE: {}", pretty_hex(&text)); 221 | } 222 | self.stdin.write_all(text).expect("failed to write to shell stdin"); 223 | self.stdin.flush().expect("failed to flush shell stdin"); // shouldn't be necessary 224 | } 225 | 226 | // Wait until the shell listens to us. Also disable history logging in case this is an 227 | // interactive shell. 228 | fn wait_until_responsive(&mut self) { 229 | let unset_histfile: &[u8] = if let CompatMode::Fish = Config::get().compat_mode { 230 | b"" 231 | } else { 232 | b"; unset HISTFILE" 233 | }; 234 | for _ in 0..60 { 235 | let delimiter = random_alphanum(); 236 | self.write(&[ 237 | b"echo ", 238 | &delimiter[..1], 239 | b"''", 240 | &delimiter[1..], 241 | unset_histfile, 242 | b"\n", 243 | ].concat()); 244 | match self.read_until_delim(&delimiter, Duration::from_millis(500)) { 245 | Ok(_) => return, 246 | Err(RecvTimeoutError::Timeout) => (), 247 | Err(RecvTimeoutError::Disconnected) => panic!("shell exited"), 248 | } 249 | }; 250 | panic!("timeout waiting for shell to be responsive"); 251 | } 252 | } 253 | 254 | /// Return a byte string of 10 random alphanumeric characters. 255 | /// 256 | /// Used as delimiters around the stuff we actually want to quote. 257 | /// 258 | /// Using `rand` makes the fuzzer slightly less reproducible, but the specific string chosen 259 | /// shouldn't make a difference, and having it be different every time reduces the chance of false 260 | /// positive matches with interactive shells, in case the delimiter gets into shell history and 261 | /// then the shell prints it as part of some autocompletion routine. 262 | /// 263 | /// (Though in theory, unsetting HISTFILE as done above should be enough to prevent it from getting 264 | /// into shell history in the first place.) 265 | fn random_alphanum() -> Vec { 266 | rand::thread_rng() 267 | .sample_iter(&Alphanumeric) 268 | .take(10) 269 | .collect() 270 | } 271 | 272 | thread_local! { 273 | static SHELL: RefCell = RefCell::new(Shell::new()); 274 | } 275 | 276 | fuzz_target!(|unquoted: &[u8]| { 277 | let mut unquoted: Vec = unquoted.into(); 278 | { 279 | // Strip nul characters. 280 | for byte in unquoted.iter_mut() { 281 | if *byte == 0 { 282 | *byte = b'x'; 283 | } 284 | } 285 | } 286 | let config = Config::get(); 287 | 288 | /* 289 | TODO: 290 | let length_limit = match config.compat_mode { 291 | // zsh in interactive mode gets very slow for long inputs. 292 | CompatMode::Zsh if config.shell_is_interactive => Some(1024), 293 | // busybox ash has a line length limit when reading from a pty (and we need to be 294 | // conservative since this length is pre-quoting). 295 | CompatMode::BusyboxAsh if config.use_pty => Some(256), 296 | // Otherwise no length limit. 297 | _ => None 298 | }; 299 | */ 300 | let length_limit = Some(256); 301 | 302 | if let Some(limit) = length_limit { 303 | unquoted.truncate(limit); 304 | } 305 | 306 | // Disable certain types of input for shells that can't handle them. 307 | // This is perhaps unnecessarily tightly dialed in to the quirks of specific shells, but I've 308 | // found this helpful as a way understand those shells' behavior better. 309 | 310 | // Strip control characters in pty mode because they are special there and we cannot quote them 311 | // properly while being POSIX-compatible (see crate documentation). 312 | // And bash tries to interpret them even without a pty in interactive mode. 313 | let strip_controls = config.use_pty || 314 | (config.compat_mode == CompatMode::Bash && config.shell_is_interactive); 315 | 316 | // Strip \r in cases where shells turns it into \n. 317 | // - bash: happens in interactive mode, using a pty, or both 318 | // - zsh: happens if using a pty (can't test interactive mode without pty) 319 | // - busybox ash: happens if using a pty (not in interactive mode) 320 | // - fish: actually turns \n into \r\n, but we need to strip it from input 321 | // In all cases, I verified using strace that this is happening in the shell rather than in the 322 | // kernel's tty layer. The tty layer can be configured to do things like that, but apparently 323 | // it's not the default. 324 | let strip_crs = match config.compat_mode { 325 | CompatMode::Bash => config.use_pty || config.shell_is_interactive, 326 | CompatMode::Zsh | CompatMode::BusyboxAsh => config.use_pty, 327 | CompatMode::Fish => config.use_pty, 328 | CompatMode::Mksh => config.use_pty, 329 | _ => false 330 | }; 331 | 332 | // Ignore \r added by the shell. This assumes strip_crs is also on. 333 | let ignore_added_crs = match config.compat_mode { 334 | CompatMode::Fish => config.use_pty, 335 | _ => false 336 | }; 337 | 338 | // Strip characters with the high bit set only if the string as a whole is invalid UTF-8, 339 | // because: 340 | // - bash: sometimes strips bytes at the end that could be the beginning of a UTF-8 341 | // sequence, again if in interactive mode and/or using a pty 342 | // XXX and also valid UTF-8? 343 | // - zsh: goes through multibyte routines and will replace invalid characters with 344 | // question marks, only if interactive 345 | // - busybox ash: something similar, only if using a pty 346 | // - fish: ditto 347 | // Again, can't deal with this properly while being POSIX-compatible. (In theory we could make 348 | // them safer by quoting, so the question marks wouldn't be treated as glob characters, but the 349 | // string still wouldn't round-trip properly, so don't bother.) 350 | let is_invalid_utf8 = std::str::from_utf8(&unquoted).is_err(); 351 | let strip_8bit = match config.compat_mode { 352 | CompatMode::Bash => config.use_pty || config.shell_is_interactive, 353 | CompatMode::Zsh => config.shell_is_interactive && is_invalid_utf8, 354 | CompatMode::BusyboxAsh | 355 | CompatMode::Fish | 356 | CompatMode::Mksh => config.use_pty && is_invalid_utf8, 357 | CompatMode::Dash | 358 | CompatMode::Other => false, 359 | }; 360 | 361 | for byte in unquoted.iter_mut() { 362 | if (strip_controls && byte.is_ascii_control() && *byte != b'\r' && *byte != b'\n') || 363 | (*byte == b'\0') || 364 | (strip_crs && *byte == b'\r') || 365 | (strip_8bit && *byte >= 0x80) { 366 | *byte = b'a' + (*byte % 26); 367 | } 368 | } 369 | 370 | //println!("len={}", unquoted.len()); 371 | 372 | // We already filtered out nul bytes so this should be successful. 373 | let quoted = bytes::try_quote(&unquoted).unwrap(); 374 | 375 | SHELL.with(|ref_shell| { 376 | let mut shell = ref_shell.borrow_mut(); 377 | // Add a random prefix and suffix to ensure we can identify the output while ignoring the shell 378 | // prompt. The prefix and suffix are alphanumeric so they don't need to be quoted. They are 379 | // placed outside the double quotes just in case any shell cares about something being the 380 | // first or last character in a double-quoted string (though it shouldn't). 381 | // Also break up the prefix and suffix so that we don't get them back from shell echo. 382 | let mut alphanum_prefix = random_alphanum(); 383 | let mut alphanum_suffix = random_alphanum(); 384 | // Add the literal string PREFIX to the end of the prefix, and SUFFIX to the start of the 385 | // suffix, to make them more recognizable. 386 | alphanum_prefix.extend_from_slice(b"PREFIX"); 387 | alphanum_suffix.splice(0..0, *b"SUFFIX"); 388 | // Write the command: 389 | // printf %s "AAAPREFIX***SUFFIXBBB" 390 | // ^^^---------------------random prefix 391 | // ^^^------------quoted string 392 | // ^^^---random suffix 393 | let full_command = [ 394 | b"printf %s ", 395 | &alphanum_prefix[..1], 396 | b"\"\"", 397 | &alphanum_prefix[1..], 398 | "ed, 399 | &alphanum_suffix[..1], 400 | b"\"\"", 401 | &alphanum_suffix[1..], 402 | b"\n" 403 | ].concat(); 404 | shell.write(&full_command); 405 | let read_data = shell.read_until_delim(&alphanum_suffix, Duration::from_secs(config.fuzz_timeout)).unwrap(); 406 | let prefix_pos = read_data.find(&alphanum_prefix).expect("did not find prefix"); 407 | let mut read_data = &read_data[prefix_pos + alphanum_prefix.len() ..]; 408 | let buf: Vec; 409 | //println!("read back {} bytes", read_data.len()); 410 | if ignore_added_crs { 411 | buf = read_data.iter().cloned().filter(|&c| c != b'\r').collect(); 412 | read_data = &buf[..]; 413 | } 414 | if read_data != unquoted { 415 | panic!("original:\n{}\nread from shell:\n{}\nquoted:\n{}", 416 | pretty_hex(&unquoted), pretty_hex(&read_data), pretty_hex("ed)); 417 | } 418 | }) 419 | }); 420 | -------------------------------------------------------------------------------- /src/bytes.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Nicholas Allegra (comex). 2 | // Licensed under the Apache License, Version 2.0 or 3 | // the MIT license , at your option. This file may not be 4 | // copied, modified, or distributed except according to those terms. 5 | 6 | //! [`Shlex`] and friends for byte strings. 7 | //! 8 | //! This is used internally by the [outer module](crate), and may be more 9 | //! convenient if you are working with byte slices (`[u8]`) or types that are 10 | //! wrappers around bytes, such as [`OsStr`](std::ffi::OsStr): 11 | //! 12 | //! ```rust 13 | //! #[cfg(unix)] { 14 | //! use shlex::bytes::quote; 15 | //! use std::ffi::OsStr; 16 | //! use std::os::unix::ffi::OsStrExt; 17 | //! 18 | //! // `\x80` is invalid in UTF-8. 19 | //! let os_str = OsStr::from_bytes(b"a\x80b c"); 20 | //! assert_eq!(quote(os_str.as_bytes()), &b"'a\x80b c'"[..]); 21 | //! } 22 | //! ``` 23 | //! 24 | //! (On Windows, `OsStr` uses 16 bit wide characters so this will not work.) 25 | 26 | extern crate alloc; 27 | use alloc::vec::Vec; 28 | use alloc::borrow::Cow; 29 | #[cfg(test)] 30 | use alloc::vec; 31 | #[cfg(test)] 32 | use alloc::borrow::ToOwned; 33 | #[cfg(all(doc, not(doctest)))] 34 | use crate::{self as shlex, quoting_warning}; 35 | 36 | use super::QuoteError; 37 | 38 | /// An iterator that takes an input byte string and splits it into the words using the same syntax as 39 | /// the POSIX shell. 40 | pub struct Shlex<'a> { 41 | in_iter: core::slice::Iter<'a, u8>, 42 | /// The number of newlines read so far, plus one. 43 | pub line_no: usize, 44 | /// An input string is erroneous if it ends while inside a quotation or right after an 45 | /// unescaped backslash. Since Iterator does not have a mechanism to return an error, if that 46 | /// happens, Shlex just throws out the last token, ends the iteration, and sets 'had_error' to 47 | /// true; best to check it after you're done iterating. 48 | pub had_error: bool, 49 | } 50 | 51 | impl<'a> Shlex<'a> { 52 | pub fn new(in_bytes: &'a [u8]) -> Self { 53 | Shlex { 54 | in_iter: in_bytes.iter(), 55 | line_no: 1, 56 | had_error: false, 57 | } 58 | } 59 | 60 | fn parse_word(&mut self, mut ch: u8) -> Option> { 61 | let mut result: Vec = Vec::new(); 62 | loop { 63 | match ch as char { 64 | '"' => if let Err(()) = self.parse_double(&mut result) { 65 | self.had_error = true; 66 | return None; 67 | }, 68 | '\'' => if let Err(()) = self.parse_single(&mut result) { 69 | self.had_error = true; 70 | return None; 71 | }, 72 | '\\' => if let Some(ch2) = self.next_char() { 73 | if ch2 != '\n' as u8 { result.push(ch2); } 74 | } else { 75 | self.had_error = true; 76 | return None; 77 | }, 78 | ' ' | '\t' | '\n' => { break; }, 79 | _ => { result.push(ch as u8); }, 80 | } 81 | if let Some(ch2) = self.next_char() { ch = ch2; } else { break; } 82 | } 83 | Some(result) 84 | } 85 | 86 | fn parse_double(&mut self, result: &mut Vec) -> Result<(), ()> { 87 | loop { 88 | if let Some(ch2) = self.next_char() { 89 | match ch2 as char { 90 | '\\' => { 91 | if let Some(ch3) = self.next_char() { 92 | match ch3 as char { 93 | // \$ => $ 94 | '$' | '`' | '"' | '\\' => { result.push(ch3); }, 95 | // \ => nothing 96 | '\n' => {}, 97 | // \x => =x 98 | _ => { result.push('\\' as u8); result.push(ch3); } 99 | } 100 | } else { 101 | return Err(()); 102 | } 103 | }, 104 | '"' => { return Ok(()); }, 105 | _ => { result.push(ch2); }, 106 | } 107 | } else { 108 | return Err(()); 109 | } 110 | } 111 | } 112 | 113 | fn parse_single(&mut self, result: &mut Vec) -> Result<(), ()> { 114 | loop { 115 | if let Some(ch2) = self.next_char() { 116 | match ch2 as char { 117 | '\'' => { return Ok(()); }, 118 | _ => { result.push(ch2); }, 119 | } 120 | } else { 121 | return Err(()); 122 | } 123 | } 124 | } 125 | 126 | fn next_char(&mut self) -> Option { 127 | let res = self.in_iter.next().copied(); 128 | if res == Some(b'\n') { self.line_no += 1; } 129 | res 130 | } 131 | } 132 | 133 | impl<'a> Iterator for Shlex<'a> { 134 | type Item = Vec; 135 | fn next(&mut self) -> Option { 136 | if let Some(mut ch) = self.next_char() { 137 | // skip initial whitespace 138 | loop { 139 | match ch as char { 140 | ' ' | '\t' | '\n' => {}, 141 | '#' => { 142 | while let Some(ch2) = self.next_char() { 143 | if ch2 as char == '\n' { break; } 144 | } 145 | }, 146 | _ => { break; } 147 | } 148 | if let Some(ch2) = self.next_char() { ch = ch2; } else { return None; } 149 | } 150 | self.parse_word(ch) 151 | } else { // no initial character 152 | None 153 | } 154 | } 155 | 156 | } 157 | 158 | /// Convenience function that consumes the whole byte string at once. Returns None if the input was 159 | /// erroneous. 160 | pub fn split(in_bytes: &[u8]) -> Option>> { 161 | let mut shl = Shlex::new(in_bytes); 162 | let res = shl.by_ref().collect(); 163 | if shl.had_error { None } else { Some(res) } 164 | } 165 | 166 | /// A more configurable interface to quote strings. If you only want the default settings you can 167 | /// use the convenience functions [`try_quote`] and [`try_join`]. 168 | /// 169 | /// The string equivalent is [`shlex::Quoter`]. 170 | #[derive(Default, Debug, Clone)] 171 | pub struct Quoter { 172 | allow_nul: bool, 173 | // TODO: more options 174 | } 175 | 176 | impl Quoter { 177 | /// Create a new [`Quoter`] with default settings. 178 | #[inline] 179 | pub fn new() -> Self { 180 | Self::default() 181 | } 182 | 183 | /// Set whether to allow [nul bytes](quoting_warning#nul-bytes). By default they are not 184 | /// allowed and will result in an error of [`QuoteError::Nul`]. 185 | #[inline] 186 | pub fn allow_nul(mut self, allow: bool) -> Self { 187 | self.allow_nul = allow; 188 | self 189 | } 190 | 191 | /// Convenience function that consumes an iterable of words and turns it into a single byte string, 192 | /// quoting words when necessary. Consecutive words will be separated by a single space. 193 | pub fn join<'a, I: IntoIterator>(&self, words: I) -> Result, QuoteError> { 194 | Ok(words.into_iter() 195 | .map(|word| self.quote(word)) 196 | .collect::>, QuoteError>>()? 197 | .join(&b' ')) 198 | } 199 | 200 | /// Given a single word, return a byte string suitable to encode it as a shell argument. 201 | /// 202 | /// If given valid UTF-8, this will never produce invalid UTF-8. This is because it only 203 | /// ever inserts valid ASCII characters before or after existing ASCII characters (or 204 | /// returns two single quotes if the input was an empty string). It will never modify a 205 | /// multibyte UTF-8 character. 206 | pub fn quote<'a>(&self, mut in_bytes: &'a [u8]) -> Result, QuoteError> { 207 | if in_bytes.is_empty() { 208 | // Empty string. Special case that isn't meaningful as only part of a word. 209 | return Ok(b"''"[..].into()); 210 | } 211 | if !self.allow_nul && in_bytes.iter().any(|&b| b == b'\0') { 212 | return Err(QuoteError::Nul); 213 | } 214 | let mut out: Vec = Vec::new(); 215 | while !in_bytes.is_empty() { 216 | // Pick a quoting strategy for some prefix of the input. Normally this will cover the 217 | // entire input, but in some case we might need to divide the input into multiple chunks 218 | // that are quoted differently. 219 | let (cur_len, strategy) = quoting_strategy(in_bytes); 220 | if cur_len == in_bytes.len() && strategy == QuotingStrategy::Unquoted && out.is_empty() { 221 | // Entire string can be represented unquoted. Reuse the allocation. 222 | return Ok(in_bytes.into()); 223 | } 224 | let (cur_chunk, rest) = in_bytes.split_at(cur_len); 225 | assert!(rest.len() < in_bytes.len()); // no infinite loop 226 | in_bytes = rest; 227 | append_quoted_chunk(&mut out, cur_chunk, strategy); 228 | } 229 | Ok(out.into()) 230 | } 231 | 232 | } 233 | 234 | #[derive(PartialEq)] 235 | enum QuotingStrategy { 236 | /// No quotes and no backslash escapes. (If backslash escapes would be necessary, we use a 237 | /// different strategy instead.) 238 | Unquoted, 239 | /// Single quoted. 240 | SingleQuoted, 241 | /// Double quotes, potentially with backslash escapes. 242 | DoubleQuoted, 243 | // TODO: add $'xxx' and "$(printf 'xxx')" styles 244 | } 245 | 246 | /// Is this ASCII byte okay to emit unquoted? 247 | const fn unquoted_ok(c: u8) -> bool { 248 | match c as char { 249 | // Allowed characters: 250 | '+' | '-' | '.' | '/' | ':' | '@' | ']' | '_' | 251 | '0'..='9' | 'A'..='Z' | 'a'..='z' 252 | => true, 253 | 254 | // Non-allowed characters: 255 | // From POSIX https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html 256 | // "The application shall quote the following characters if they are to represent themselves:" 257 | '|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | '\'' | ' ' | '\t' | '\n' | 258 | // "and the following may need to be quoted under certain circumstances[..]:" 259 | '*' | '?' | '[' | '#' | '~' | '=' | '%' | 260 | // Brace expansion. These ought to be in the POSIX list but aren't yet; 261 | // see: https://www.austingroupbugs.net/view.php?id=1193 262 | '{' | '}' | 263 | // Also quote comma, just to be safe in the extremely odd case that the user of this crate 264 | // is intentionally placing a quoted string inside a brace expansion, e.g.: 265 | // format!("echo foo{{a,b,{}}}" | shlex::quote(some_str)) 266 | ',' | 267 | // '\r' is allowed in a word by all real shells I tested, but is treated as a word 268 | // separator by Python `shlex` | and might be translated to '\n' in interactive mode. 269 | '\r' | 270 | // '!' and '^' are treated specially in interactive mode; see quoting_warning. 271 | '!' | '^' | 272 | // Nul bytes and control characters. 273 | '\x00' ..= '\x1f' | '\x7f' 274 | => false, 275 | '\u{80}' ..= '\u{10ffff}' => { 276 | // This is unreachable since `unquoted_ok` is only called for 0..128. 277 | // Non-ASCII bytes are handled separately in `quoting_strategy`. 278 | // Can't call unreachable!() from `const fn` on old Rust, so... 279 | unquoted_ok(c) 280 | }, 281 | } 282 | // Note: The logic cited above for quoting comma might suggest that `..` should also be quoted, 283 | // it as a special case of brace expansion). But it's not necessary. There are three cases: 284 | // 285 | // 1. The user wants comma-based brace expansion, but the untrusted string being `quote`d 286 | // contains `..`, so they get something like `{foo,bar,3..5}`. 287 | // => That's safe; both Bash and Zsh expand this to `foo bar 3..5` rather than 288 | // `foo bar 3 4 5`. The presence of commas disables sequence expression expansion. 289 | // 290 | // 2. The user wants comma-based brace expansion where the contents of the braces are a 291 | // variable number of `quote`d strings and nothing else. There happens to be exactly 292 | // one string and it contains `..`, so they get something like `{3..5}`. 293 | // => Then this will expand as a sequence expression, which is unintended. But I don't mind, 294 | // because any such code is already buggy. Suppose the untrusted string *didn't* contain 295 | // `,` or `..`, resulting in shell input like `{foo}`. Then the shell would interpret it 296 | // as the literal string `{foo}` rather than brace-expanding it into `foo`. 297 | // 298 | // 3. The user wants a sequence expression and wants to supply an untrusted string as one of 299 | // the endpoints or the increment. 300 | // => Well, that's just silly, since the endpoints can only be numbers or single letters. 301 | } 302 | 303 | /// Optimized version of `unquoted_ok`. 304 | fn unquoted_ok_fast(c: u8) -> bool { 305 | const UNQUOTED_OK_MASK: u128 = { 306 | // Make a mask of all bytes in 0..<0x80 that pass. 307 | let mut c = 0u8; 308 | let mut mask = 0u128; 309 | while c < 0x80 { 310 | if unquoted_ok(c) { 311 | mask |= 1u128 << c; 312 | } 313 | c += 1; 314 | } 315 | mask 316 | }; 317 | ((UNQUOTED_OK_MASK >> c) & 1) != 0 318 | } 319 | 320 | /// Is this ASCII byte okay to emit in single quotes? 321 | fn single_quoted_ok(c: u8) -> bool { 322 | match c { 323 | // No single quotes in single quotes. 324 | b'\'' => false, 325 | // To work around a Bash bug, ^ is only allowed right after an opening single quote; see 326 | // quoting_warning. 327 | b'^' => false, 328 | // Backslashes in single quotes are literal according to POSIX, but Fish treats them as an 329 | // escape character. Ban them. Fish doesn't aim to be POSIX-compatible, but we *can* 330 | // achieve Fish compatibility using double quotes, so we might as well. 331 | b'\\' => false, 332 | _ => true 333 | } 334 | } 335 | 336 | /// Is this ASCII byte okay to emit in double quotes? 337 | fn double_quoted_ok(c: u8) -> bool { 338 | match c { 339 | // Work around Python `shlex` bug where parsing "\`" and "\$" doesn't strip the 340 | // backslash, even though POSIX requires it. 341 | b'`' | b'$' => false, 342 | // '!' and '^' are treated specially in interactive mode; see quoting_warning. 343 | b'!' | b'^' => false, 344 | _ => true 345 | } 346 | } 347 | 348 | /// Given an input, return a quoting strategy that can cover some prefix of the string, along with 349 | /// the size of that prefix. 350 | /// 351 | /// Precondition: input size is nonzero. (Empty strings are handled by the caller.) 352 | /// Postcondition: returned size is nonzero. 353 | #[cfg_attr(manual_codegen_check, inline(never))] 354 | fn quoting_strategy(in_bytes: &[u8]) -> (usize, QuotingStrategy) { 355 | const UNQUOTED_OK: u8 = 1; 356 | const SINGLE_QUOTED_OK: u8 = 2; 357 | const DOUBLE_QUOTED_OK: u8 = 4; 358 | 359 | let mut prev_ok = SINGLE_QUOTED_OK | DOUBLE_QUOTED_OK | UNQUOTED_OK; 360 | let mut i = 0; 361 | 362 | if in_bytes[0] == b'^' { 363 | // To work around a Bash bug, ^ is only allowed right after an opening single quote; see 364 | // quoting_warning. 365 | prev_ok = SINGLE_QUOTED_OK; 366 | i = 1; 367 | } 368 | 369 | while i < in_bytes.len() { 370 | let c = in_bytes[i]; 371 | let mut cur_ok = prev_ok; 372 | 373 | if c >= 0x80 { 374 | // Normally, non-ASCII characters shouldn't require quoting, but see quoting_warning.md 375 | // about \xa0. For now, just treat all non-ASCII characters as requiring quotes. This 376 | // also ensures things are safe in the off-chance that you're in a legacy 8-bit locale that 377 | // has additional characters satisfying `isblank`. 378 | cur_ok &= !UNQUOTED_OK; 379 | } else { 380 | if !unquoted_ok_fast(c) { 381 | cur_ok &= !UNQUOTED_OK; 382 | } 383 | if !single_quoted_ok(c){ 384 | cur_ok &= !SINGLE_QUOTED_OK; 385 | } 386 | if !double_quoted_ok(c) { 387 | cur_ok &= !DOUBLE_QUOTED_OK; 388 | } 389 | } 390 | 391 | if cur_ok == 0 { 392 | // There are no quoting strategies that would work for both the previous characters and 393 | // this one. So we have to end the chunk before this character. The caller will call 394 | // `quoting_strategy` again to handle the rest of the string. 395 | break; 396 | } 397 | 398 | prev_ok = cur_ok; 399 | i += 1; 400 | } 401 | 402 | // Pick the best allowed strategy. 403 | let strategy = if prev_ok & UNQUOTED_OK != 0 { 404 | QuotingStrategy::Unquoted 405 | } else if prev_ok & SINGLE_QUOTED_OK != 0 { 406 | QuotingStrategy::SingleQuoted 407 | } else if prev_ok & DOUBLE_QUOTED_OK != 0 { 408 | QuotingStrategy::DoubleQuoted 409 | } else { 410 | unreachable!() 411 | }; 412 | debug_assert!(i > 0); 413 | (i, strategy) 414 | } 415 | 416 | fn append_quoted_chunk(out: &mut Vec, cur_chunk: &[u8], strategy: QuotingStrategy) { 417 | match strategy { 418 | QuotingStrategy::Unquoted => { 419 | out.extend_from_slice(cur_chunk); 420 | }, 421 | QuotingStrategy::SingleQuoted => { 422 | out.reserve(cur_chunk.len() + 2); 423 | out.push(b'\''); 424 | out.extend_from_slice(cur_chunk); 425 | out.push(b'\''); 426 | }, 427 | QuotingStrategy::DoubleQuoted => { 428 | out.reserve(cur_chunk.len() + 2); 429 | out.push(b'"'); 430 | for &c in cur_chunk.into_iter() { 431 | if let b'$' | b'`' | b'"' | b'\\' = c { 432 | // Add a preceding backslash. 433 | // Note: We shouldn't actually get here for $ and ` because they don't pass 434 | // `double_quoted_ok`. 435 | out.push(b'\\'); 436 | } 437 | // Add the character itself. 438 | out.push(c); 439 | } 440 | out.push(b'"'); 441 | }, 442 | } 443 | } 444 | 445 | /// Convenience function that consumes an iterable of words and turns it into a single byte string, 446 | /// quoting words when necessary. Consecutive words will be separated by a single space. 447 | /// 448 | /// Uses default settings except that nul bytes are passed through, which [may be 449 | /// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated. 450 | /// 451 | /// Equivalent to [`Quoter::new().allow_nul(true).join(words).unwrap()`](Quoter). 452 | /// 453 | /// (That configuration never returns `Err`, so this function does not panic.) 454 | /// 455 | /// The string equivalent is [shlex::join]. 456 | #[deprecated(since = "1.3.0", note = "replace with `try_join(words)?` to avoid nul byte danger")] 457 | pub fn join<'a, I: IntoIterator>(words: I) -> Vec { 458 | Quoter::new().allow_nul(true).join(words).unwrap() 459 | } 460 | 461 | /// Convenience function that consumes an iterable of words and turns it into a single byte string, 462 | /// quoting words when necessary. Consecutive words will be separated by a single space. 463 | /// 464 | /// Uses default settings. The only error that can be returned is [`QuoteError::Nul`]. 465 | /// 466 | /// Equivalent to [`Quoter::new().join(words)`](Quoter). 467 | /// 468 | /// The string equivalent is [shlex::try_join]. 469 | pub fn try_join<'a, I: IntoIterator>(words: I) -> Result, QuoteError> { 470 | Quoter::new().join(words) 471 | } 472 | 473 | /// Given a single word, return a string suitable to encode it as a shell argument. 474 | /// 475 | /// Uses default settings except that nul bytes are passed through, which [may be 476 | /// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated. 477 | /// 478 | /// Equivalent to [`Quoter::new().allow_nul(true).quote(in_bytes).unwrap()`](Quoter). 479 | /// 480 | /// (That configuration never returns `Err`, so this function does not panic.) 481 | /// 482 | /// The string equivalent is [shlex::quote]. 483 | #[deprecated(since = "1.3.0", note = "replace with `try_quote(str)?` to avoid nul byte danger")] 484 | pub fn quote(in_bytes: &[u8]) -> Cow<[u8]> { 485 | Quoter::new().allow_nul(true).quote(in_bytes).unwrap() 486 | } 487 | 488 | /// Given a single word, return a string suitable to encode it as a shell argument. 489 | /// 490 | /// Uses default settings. The only error that can be returned is [`QuoteError::Nul`]. 491 | /// 492 | /// Equivalent to [`Quoter::new().quote(in_bytes)`](Quoter). 493 | /// 494 | /// (That configuration never returns `Err`, so this function does not panic.) 495 | /// 496 | /// The string equivalent is [shlex::try_quote]. 497 | pub fn try_quote(in_bytes: &[u8]) -> Result, QuoteError> { 498 | Quoter::new().quote(in_bytes) 499 | } 500 | 501 | #[cfg(test)] 502 | const INVALID_UTF8: &[u8] = b"\xa1"; 503 | #[cfg(test)] 504 | const INVALID_UTF8_SINGLEQUOTED: &[u8] = b"'\xa1'"; 505 | 506 | #[test] 507 | #[allow(invalid_from_utf8)] 508 | fn test_invalid_utf8() { 509 | // Check that our test string is actually invalid UTF-8. 510 | assert!(core::str::from_utf8(INVALID_UTF8).is_err()); 511 | } 512 | 513 | #[cfg(test)] 514 | static SPLIT_TEST_ITEMS: &'static [(&'static [u8], Option<&'static [&'static [u8]]>)] = &[ 515 | (b"foo$baz", Some(&[b"foo$baz"])), 516 | (b"foo baz", Some(&[b"foo", b"baz"])), 517 | (b"foo\"bar\"baz", Some(&[b"foobarbaz"])), 518 | (b"foo \"bar\"baz", Some(&[b"foo", b"barbaz"])), 519 | (b" foo \nbar", Some(&[b"foo", b"bar"])), 520 | (b"foo\\\nbar", Some(&[b"foobar"])), 521 | (b"\"foo\\\nbar\"", Some(&[b"foobar"])), 522 | (b"'baz\\$b'", Some(&[b"baz\\$b"])), 523 | (b"'baz\\\''", None), 524 | (b"\\", None), 525 | (b"\"\\", None), 526 | (b"'\\", None), 527 | (b"\"", None), 528 | (b"'", None), 529 | (b"foo #bar\nbaz", Some(&[b"foo", b"baz"])), 530 | (b"foo #bar", Some(&[b"foo"])), 531 | (b"foo#bar", Some(&[b"foo#bar"])), 532 | (b"foo\"#bar", None), 533 | (b"'\\n'", Some(&[b"\\n"])), 534 | (b"'\\\\n'", Some(&[b"\\\\n"])), 535 | (INVALID_UTF8, Some(&[INVALID_UTF8])), 536 | ]; 537 | 538 | #[test] 539 | fn test_split() { 540 | for &(input, output) in SPLIT_TEST_ITEMS { 541 | assert_eq!(split(input), output.map(|o| o.iter().map(|&x| x.to_owned()).collect())); 542 | } 543 | } 544 | 545 | #[test] 546 | fn test_lineno() { 547 | let mut sh = Shlex::new(b"\nfoo\nbar"); 548 | while let Some(word) = sh.next() { 549 | if word == b"bar" { 550 | assert_eq!(sh.line_no, 3); 551 | } 552 | } 553 | } 554 | 555 | #[test] 556 | #[allow(deprecated)] 557 | fn test_quote() { 558 | // Validate behavior with invalid UTF-8: 559 | assert_eq!(quote(INVALID_UTF8), INVALID_UTF8_SINGLEQUOTED); 560 | // Replicate a few tests from lib.rs. No need to replicate all of them. 561 | assert_eq!(quote(b""), &b"''"[..]); 562 | assert_eq!(quote(b"foobar"), &b"foobar"[..]); 563 | assert_eq!(quote(b"foo bar"), &b"'foo bar'"[..]); 564 | assert_eq!(quote(b"'\""), &b"\"'\\\"\""[..]); 565 | assert_eq!(quote(b""), &b"''"[..]); 566 | } 567 | 568 | #[test] 569 | #[allow(deprecated)] 570 | fn test_join() { 571 | // Validate behavior with invalid UTF-8: 572 | assert_eq!(join(vec![INVALID_UTF8]), INVALID_UTF8_SINGLEQUOTED); 573 | // Replicate a few tests from lib.rs. No need to replicate all of them. 574 | assert_eq!(join(vec![]), &b""[..]); 575 | assert_eq!(join(vec![&b""[..]]), b"''"); 576 | } 577 | -------------------------------------------------------------------------------- /fuzz/fuzz_quote_real_shell/basic-corpus/long-a: -------------------------------------------------------------------------------- 1 |  --------------------------------------------------------------------------------