├── .gitignore ├── Cargo.toml ├── LICENSE ├── README.md └── src └── lib.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /Cargo.lock 2 | /target 3 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["Jimmy Cuadra "] 3 | categories = ["command-line-interface"] 4 | description = "Manipulate strings according to the word parsing rules of the UNIX Bourne shell." 5 | documentation = "https://docs.rs/shellwords" 6 | edition = "2018" 7 | homepage = "https://github.com/jimmycuadra/rust-shellwords" 8 | keywords = ["bash", "sh", "shell"] 9 | license = "MIT" 10 | name = "shellwords" 11 | readme = "README.md" 12 | repository = "https://github.com/jimmycuadra/rust-shellwords" 13 | version = "1.1.0" 14 | 15 | [dependencies] 16 | lazy_static = "1.4.0" 17 | regex = "1.3.9" 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Jimmy Cuadra 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # shellwords 2 | 3 | Crate **shellwords** provides utilities for parsing strings as they would be interpreted by the UNIX Bourne shell. 4 | 5 | * [shellwords](https://crates.io/crates/shellwords) on crates.io 6 | * [Documentation](https://docs.rs/shellwords) for the latest crates.io release 7 | 8 | ## Examples 9 | 10 | Split a string into a vector of words in the same way the UNIX Bourne shell does: 11 | 12 | ``` rust 13 | assert_eq!(split("here are \"two words\"").unwrap(), ["here", "are", "two words"]); 14 | ``` 15 | 16 | ## Legal 17 | 18 | shellwords is released under the MIT license. 19 | See `LICENSE` for details. 20 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Crate `shellwords` provides utilities for parsing strings as they would be interpreted by the 2 | //! UNIX Bourne shell. 3 | 4 | #[deny(missing_debug_implementations, missing_docs, warnings)] 5 | use lazy_static::lazy_static; 6 | use regex::Regex; 7 | 8 | /// Escapes a string so it will be interpreted as a single word by the UNIX Bourne shell. 9 | /// 10 | /// If the input string is empty, this function returns an empty quoted string. 11 | /// 12 | /// # Examples 13 | /// 14 | /// ``` 15 | /// # extern crate shellwords; 16 | /// # use shellwords::escape; 17 | /// # fn main() { 18 | /// assert_eq!(escape("special's.txt"), "special\\'s.txt".to_string()); 19 | /// # } 20 | /// ``` 21 | pub fn escape(input: &str) -> String { 22 | lazy_static! { 23 | static ref ESCAPE_PATTERN: Regex = Regex::new(r"([^A-Za-z0-9_\-.,:/@\n])").unwrap(); 24 | static ref LINE_FEED: Regex = Regex::new(r"\n").unwrap(); 25 | } 26 | 27 | if input.len() == 0 { 28 | return "''".to_owned(); 29 | } 30 | 31 | let output = &ESCAPE_PATTERN.replace_all(input, "\\$1"); 32 | 33 | LINE_FEED.replace_all(output, "'\n'").to_string() 34 | } 35 | 36 | /// Builds a command line string from a list of arguments. 37 | /// 38 | /// The arguments are combined into a single string with each word separated by a space. Each 39 | /// individual word is escaped as necessary via `escape`. 40 | /// 41 | /// # Examples 42 | /// 43 | /// ``` 44 | /// # extern crate shellwords; 45 | /// # use shellwords::join; 46 | /// # fn main() { 47 | /// let args = ["There's", "a", "time", "and", "place", "for", "everything"]; 48 | /// assert_eq!(join(&args), "There\\'s a time and place for everything"); 49 | /// # } 50 | /// ``` 51 | pub fn join(args: &[&str]) -> String { 52 | let escaped: Vec = args.iter().map(|arg| escape(arg)).collect(); 53 | 54 | escaped.join(" ") 55 | } 56 | 57 | /// Splits a string into a vector of words in the same way the UNIX Bourne shell does. 58 | /// 59 | /// This function does not behave like a full command line parser. Only single quotes, double 60 | /// quotes, and backslashes are treated as metacharacters. Within double quoted strings, 61 | /// backslashes are only treated as metacharacters when followed by one of the following 62 | /// characters: 63 | /// 64 | /// * $ 65 | /// * ` 66 | /// * " 67 | /// * \ 68 | /// * newline 69 | /// 70 | /// # Errors 71 | /// 72 | /// If the input contains mismatched quotes (a quoted string missing a matching ending quote), 73 | /// a `MismatchedQuotes` error is returned. 74 | /// 75 | /// # Examples 76 | /// 77 | /// Quoted strings are intepreted as one word: 78 | /// 79 | /// ``` 80 | /// # extern crate shellwords; 81 | /// # use shellwords::split; 82 | /// # fn main() { 83 | /// assert_eq!(split("here are \"two words\"").unwrap(), ["here", "are", "two words"]); 84 | /// # } 85 | /// ``` 86 | /// 87 | /// The pipe character has no special meaning: 88 | /// 89 | /// ``` 90 | /// # extern crate shellwords; 91 | /// # use shellwords::split; 92 | /// # fn main() { 93 | /// assert_eq!(split("cat file.txt | less").unwrap(), ["cat", "file.txt", "|", "less"]); 94 | /// # } 95 | /// ``` 96 | /// 97 | pub fn split(input: &str) -> Result, MismatchedQuotes> { 98 | lazy_static! { 99 | static ref MAIN_PATTERN: Regex = Regex::new( 100 | r#"(?m:\s*(?:([^\s\\'"]+)|'([^']*)'|"((?:[^"\\]|\\.)*)"|(\\.?)|(\S))(\s|\z)?)"# 101 | ) 102 | .unwrap(); 103 | static ref ESCAPE_PATTERN: Regex = Regex::new(r#"\\(.)"#).unwrap(); 104 | static ref METACHAR_PATTERN: Regex = Regex::new(r#"\\([$`"\\\n])"#).unwrap(); 105 | } 106 | 107 | let mut words = Vec::new(); 108 | let mut field = String::new(); 109 | 110 | for capture in MAIN_PATTERN.captures_iter(input) { 111 | if let Some(word) = capture.get(1) { 112 | field.push_str(word.as_str()); 113 | } else if let Some(single_quoted_word) = capture.get(2) { 114 | field.push_str(single_quoted_word.as_str()); 115 | } else if let Some(double_quoted_word) = capture.get(3) { 116 | field.push_str(&METACHAR_PATTERN.replace_all(double_quoted_word.as_str(), "$1")); 117 | } else if let Some(escape) = capture.get(4) { 118 | field.push_str(&ESCAPE_PATTERN.replace_all(escape.as_str(), "$1")); 119 | } else if capture.get(5).is_some() { 120 | return Err(MismatchedQuotes); 121 | } 122 | 123 | if capture.get(6).is_some() { 124 | words.push(field); 125 | field = String::new(); 126 | } 127 | } 128 | 129 | Ok(words) 130 | } 131 | 132 | /// An error when splitting a string with mismatched quotes. 133 | #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] 134 | pub struct MismatchedQuotes; 135 | 136 | impl std::fmt::Display for MismatchedQuotes { 137 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 138 | write!(f, "Mismatched quotes") 139 | } 140 | } 141 | 142 | impl std::error::Error for MismatchedQuotes {} 143 | 144 | #[cfg(test)] 145 | mod tests { 146 | use super::{escape, join, split, MismatchedQuotes}; 147 | 148 | #[test] 149 | fn nothing_special() { 150 | assert_eq!(split("a b c d").unwrap(), ["a", "b", "c", "d"]); 151 | } 152 | 153 | #[test] 154 | fn quoted_strings() { 155 | assert_eq!(split("a \"b b\" a").unwrap(), ["a", "b b", "a"]); 156 | } 157 | 158 | #[test] 159 | fn escaped_double_quotes() { 160 | assert_eq!(split("a \"\\\"b\\\" c\" d").unwrap(), ["a", "\"b\" c", "d"]); 161 | } 162 | 163 | #[test] 164 | fn escaped_single_quotes() { 165 | assert_eq!(split("a \"'b' c\" d").unwrap(), ["a", "'b' c", "d"]); 166 | } 167 | 168 | #[test] 169 | fn escaped_spaces() { 170 | assert_eq!(split("a b\\ c d").unwrap(), ["a", "b c", "d"]); 171 | } 172 | 173 | #[test] 174 | fn bad_double_quotes() { 175 | assert_eq!(split("a \"b c d e").unwrap_err(), MismatchedQuotes); 176 | } 177 | 178 | #[test] 179 | fn bad_single_quotes() { 180 | assert_eq!(split("a 'b c d e").unwrap_err(), MismatchedQuotes); 181 | } 182 | 183 | #[test] 184 | fn bad_quotes() { 185 | assert_eq!(split("one '\"\"\"").unwrap_err(), MismatchedQuotes); 186 | } 187 | 188 | #[test] 189 | fn trailing_whitespace() { 190 | assert_eq!(split("a b c d ").unwrap(), ["a", "b", "c", "d"]); 191 | } 192 | 193 | #[test] 194 | fn empty_escape() { 195 | assert_eq!(escape(""), "''"); 196 | } 197 | 198 | #[test] 199 | fn full_escape() { 200 | assert_eq!(escape("foo '\"' bar"), "foo\\ \\'\\\"\\'\\ bar"); 201 | } 202 | 203 | #[test] 204 | fn escape_and_join_whitespace() { 205 | let empty = "".to_owned(); 206 | let space = " ".to_owned(); 207 | let newline = "\n".to_owned(); 208 | let tab = "\t".to_owned(); 209 | 210 | let tokens = vec![ 211 | empty.clone(), 212 | space.clone(), 213 | space.clone() + &space, 214 | newline.clone(), 215 | newline.clone() + &newline, 216 | tab.clone(), 217 | tab.clone() + &tab, 218 | empty.clone(), 219 | space + &newline + &tab, 220 | empty, 221 | ]; 222 | 223 | for token in tokens.iter() { 224 | assert_eq!( 225 | vec![token.as_str()], 226 | split(escape(token.as_str()).as_str()).unwrap() 227 | ); 228 | } 229 | 230 | let borrowed_tokens: Vec<&str> = tokens.iter().map(|token| &token[..]).collect(); 231 | assert_eq!( 232 | tokens, 233 | split(join(borrowed_tokens.as_slice()).as_str()).unwrap() 234 | ); 235 | } 236 | 237 | #[test] 238 | fn escape_multibyte() { 239 | assert_eq!(escape("あい"), "\\あ\\い"); 240 | } 241 | 242 | #[test] 243 | fn percent_signs() { 244 | assert_eq!(split("abc '%foo bar%'").unwrap(), ["abc", "%foo bar%"]); 245 | } 246 | } 247 | --------------------------------------------------------------------------------