├── .github ├── setup_wasmtime.sh └── workflows │ └── ci.yaml ├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── DESIGN.md ├── LICENSE ├── README.md ├── clippy.toml ├── examples ├── cargo.rs ├── hello.rs ├── nonstandard.rs ├── pico_test_app.rs └── posixly_correct.rs ├── fuzz ├── .gitignore ├── Cargo.toml └── fuzz_targets │ └── fuzz_target_1.rs └── src └── lib.rs /.github/setup_wasmtime.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | 4 | if test -z "$RUNNER_OS"; then 5 | echo "Should only run in CI!" 6 | exit 1 7 | fi 8 | 9 | url=https://github.com/bytecodealliance/wasmtime/releases/download/v31.0.0/wasmtime-v31.0.0-x86_64-linux.tar.xz 10 | 11 | cd /tmp 12 | curl -L "$url" | tar Jx 13 | mv wasmtime-*/wasmtime . 14 | 15 | mkdir -p ~/.cargo 16 | >> ~/.cargo/config.toml cat <<'EOF' 17 | [target.'cfg(target_os = "wasi")'] 18 | runner = "/tmp/wasmtime" 19 | EOF 20 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | test: 7 | name: Test 8 | strategy: 9 | matrix: 10 | os: [ubuntu-latest, windows-latest] 11 | runs-on: ${{ matrix.os }} 12 | steps: 13 | - uses: actions/checkout@v2 14 | - run: rustup toolchain add 1.31 1.45 stable nightly 15 | - run: cargo +1.31 test --lib # MSRV (examples don't compile) 16 | - run: cargo +1.45 test # First version where all examples compile 17 | - run: cargo +stable test 18 | - run: cargo +nightly test 19 | 20 | test_wasi: 21 | name: Test (WASI) 22 | runs-on: ubuntu-latest 23 | steps: 24 | - uses: actions/checkout@v2 25 | - run: .github/setup_wasmtime.sh 26 | - run: rustup toolchain add 1.45 stable nightly 27 | - run: rustup target add wasm32-wasi --toolchain 1.45 28 | - run: rustup target add wasm32-wasip1 wasm32-wasip2 --toolchain stable 29 | - run: rustup target add wasm32-wasip1 wasm32-wasip2 --toolchain nightly 30 | - run: cargo +1.45 test --target wasm32-wasi 31 | - run: cargo +stable test --target wasm32-wasip1 32 | - run: cargo +stable test --target wasm32-wasip2 33 | - run: cargo +nightly test --target wasm32-wasip1 34 | - run: cargo +nightly test --target wasm32-wasip2 35 | 36 | fuzz: 37 | name: Fuzz 38 | runs-on: ubuntu-latest 39 | steps: 40 | - uses: actions/checkout@v2 41 | - run: rustup toolchain add nightly 42 | - run: cargo install cargo-fuzz 43 | # Two minutes is a very short time to fuzz, but we have a very small state space. 44 | # I sometimes fuzz locally for an hour or so. 45 | - run: cargo +nightly fuzz run fuzz_target_1 -- -max_len=100000 -timeout=1 -max_total_time=120 46 | 47 | lint: 48 | name: Lint 49 | runs-on: ubuntu-latest 50 | steps: 51 | - uses: actions/checkout@v2 52 | - run: rustup target add x86_64-pc-windows-msvc wasm32-wasip2 53 | 54 | - name: rustfmt 55 | run: cargo fmt -- --check 56 | 57 | - name: clippy (Linux) 58 | run: cargo clippy -- -D warnings 59 | 60 | - name: clippy (Windows) 61 | run: cargo clippy --target x86_64-pc-windows-msvc -- -D warnings 62 | 63 | - name: clippy (WASI) 64 | run: cargo clippy --target wasm32-wasip2 -- -D warnings 65 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | .vscode 4 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.3.1 (2025-03-31) 2 | 3 | New: 4 | 5 | - Support the `wasm32-wasip2` target. 6 | 7 | Changes: 8 | 9 | - Tweaked sanitization of invalid UTF-8 in options. 10 | - Removed special handling for WASI since WASI is de facto Unicode-only. 11 | 12 | ## 0.3.0 (2023-01-16) 13 | 14 | This release adds a new preferred way to cast `OsString` into `String` (`.string()?`) and makes raw argument processing more flexible. 15 | 16 | Almost no programs should need changes to keep working, but `.string()?` makes it easier to use lexopt with other error types like [anyhow](https://docs.rs/anyhow)'s and using it is therefore recommended. 17 | 18 | New: 19 | 20 | - Add `ValueExt::string()` as the preferred method for converting from `OsString` into `String`. Unlike [`OsString::into_string()`](https://doc.rust-lang.org/std/ffi/struct.OsString.html#method.into_string) it has a normal error type so it's compatible with catch-all error types like [`anyhow::Error`](https://docs.rs/anyhow/latest/anyhow/struct.Error.html). 21 | - `into_string()?` will stay supported for the time being. 22 | - Add `RawArgs::as_slice()` for unlimited lookahead. 23 | - Add `Parser::try_raw_args()` to get raw arguments without consuming any arguments in case of failure. 24 | - `Parser` now implements `Clone`, `Send`, and `Sync`. Its `Debug` output now shows the remaining arguments. 25 | 26 | Changes: 27 | 28 | - The input iterator is now consumed when you create a `Parser`, instead of during parsing. This breaks certain clever code that inspects the state of the iterator, but `RawArgs::as_slice()` may provide an alternative. (If you don't know what this means then you aren't affected.) 29 | - Calling `Parser::values()` no longer consumes any arguments if you don't use the iterator. 30 | - `RawArgs::peek()` now takes `&self` instead of `&mut self`. 31 | 32 | ## 0.2.1 (2022-07-10) 33 | 34 | New: 35 | 36 | - Add `Parser::raw_args()` for collecting raw unparsed arguments. ([#12](https://github.com/blyxxyz/lexopt/issues/12)) 37 | - Implement `Debug` for `ValuesIter`. 38 | 39 | Bug fixes: 40 | 41 | - Change "missing argument at end of command" error message. ([#11](https://github.com/blyxxyz/lexopt/issues/11)) 42 | 43 | ## 0.2.0 (2021-10-23) 44 | 45 | While this release is not strictly backward-compatible it should break very few programs. 46 | 47 | New: 48 | 49 | - Add `Parser::values()` for options with multiple arguments. 50 | - Add `Parser::optional_value()` for options with optional arguments. 51 | - Add `Parser::from_iter()` to construct from an iterator that includes the binary name. ([#5](https://github.com/blyxxyz/lexopt/issues/5)) 52 | - Document how to use `Parser::value()` to collect all remaining arguments. 53 | 54 | Changes: 55 | 56 | - Support `=` as a separator for short options (as in `-o=value`). ([#18](https://github.com/blyxxyz/lexopt/issues/18)) 57 | - Sanitize the binary name if it's invalid unicode instead of ignoring it. 58 | - Make `Error::UnexpectedValue.option` a `String` instead of an `Option`. 59 | 60 | Bug fixes: 61 | 62 | - Include `bin_name` in `Parser`'s `Debug` output. 63 | 64 | ## 0.1.0 (2021-07-16) 65 | Initial release. 66 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "lexopt" 3 | version = "0.3.1" 4 | authors = ["Jan Verbeek "] 5 | description = "Minimalist pedantic command line parser" 6 | keywords = ["args", "arguments", "cli", "parser", "getopt"] 7 | categories = ["command-line-interface"] 8 | repository = "https://github.com/blyxxyz/lexopt" 9 | documentation = "https://docs.rs/lexopt" 10 | license = "MIT" 11 | readme = "README.md" 12 | edition = "2018" 13 | exclude = [".github", ".gitignore"] 14 | 15 | [profile.test] 16 | # The basic_fuzz test benefits greatly from this. 17 | opt-level = 2 18 | -------------------------------------------------------------------------------- /DESIGN.md: -------------------------------------------------------------------------------- 1 | Some rambling thoughts that don't deserve a place in the README. 2 | 3 | # Cleanness 4 | The API consists of a handful of clean simple powerful methods with no arguments and no configurability, plus some more junk to make them convenient to use. 5 | 6 | Language features are to be preferred over library features as much as possible. That way the library can stay smaller and code that uses the library is hopefully easier to understand in detail for people who aren't familiar with the library. 7 | 8 | I don't really like the `ValueExt` extension trait, but I can't think of a nicer way to parse values. In my ideal workflow you would call `.into_string()?.parse()?` to parse a value, all built-in methods. But I don't think it's possible to have an error type that can be transformed from both methods' error types, `into_string` returns `OsString` and there are annoying rules around overlapping trait implementations. The error messages would also suffer. 9 | 10 | (Update: as of 0.3.0, `ValueExt` has a `string` method as an alternative to `into_string` with a cleaner return type. In theory this opens the way to removing `From`, but I don't think `lexopt::Error` should be catch-all. There's `anyhow` for that.) 11 | 12 | Keeping the core API clean and generic means this could perhaps be used as the basis of a more complete parser. 13 | 14 | # Possible enhancements 15 | POSIX has a notion of subarguments, combining multiple values in a single option-argument by separating them with commas or spaces. This is easy enough to hand-roll for valid unicode (`.into_string()?.split(...)`) but we could provide a function that does it on `OsString`s. I can't think of a case where values may not be valid unicode but definitely don't contain commas or spaces, though. 16 | 17 | # Language quirks 18 | Sometimes Rust is a bother. 19 | 20 | `Arg::Long` contains a borrowed string instead of an owned string because you can't match owned strings against string literals. That means `Arg` needs a lifetime, the iterator protocol cannot be used (it would also be a bad fit for other reasons), and some abstractions are hard or impossible to build. On the plus side, error messages can be slightly better. 21 | 22 | (Deref patterns would fix this, and if/when they're released I'll probably do a breaking release with a huge MSRV bump. I consider them a prerequisite for 1.0.) 23 | 24 | Arguments on Windows sometimes have to be transcoded three times: from UTF-16 to WTF-8 by `args_os`, then back to UTF-16 to parse them, then to WTF-8 again to be used. This ensures we see the original invalid code unit if there's a problem, but it's a bit sad. (Luckily it only happens very rarely.) 25 | 26 | # Errors 27 | There's not always enough information for a good error message. A plain `OsString` doesn't remember what the parser knows, like what the last option was. 28 | 29 | `ValueExt::parse` exists to include the original string in an error message and to wrap all errors inside a uniform type. It's unclear if it earns its upkeep. 30 | 31 | # Iterator backing 32 | I see three ways to store `Parser`'s internal iterator: 33 | 34 | 1. As a generic field (`Parser where I: Iterator`) 35 | 2. As a trait object (`source: Box + 'static>`) 36 | 3. As a particular known type (`source: std::vec::IntoIter`) 37 | 38 | lexopt originally used option 2 but switched to option 3. 39 | 40 | **Option 1** (generic field) is the most general and powerful but it's cumbersome and bloated. Benefits: 41 | 42 | - The parser inherits the iterator's properties. You can have a non-`'static` parser, or a parser that is or isn't thread-safe. 43 | - You can provide direct access to the original iterator. 44 | - In theory, better optimization. 45 | 46 | Drawbacks: 47 | 48 | - Using a parser as an argument (or return value, or field) is difficult. You have to name the whole type (e.g. `Parser`), and you can't mix and match parers created from different iterators. 49 | - Code size and compile times are bloated, particularly if you use multiple iterator types. 50 | - The benefits are pretty weak or niche. 51 | 52 | **Option 2** (trait object) doesn't have the drawbacks of option 1, but it reduces everything to a lowest common denominator: 53 | 54 | - Either the input must be `Send`/`Sync`, or the parser can't be `Send`/`Sync`. (To complicate things, `ArgsOs` is `!Send` and `!Sync` out of caution.) 55 | - `Clone` can't be implemented. (Unless you exhaust the original iterator, which requires interior mutability and has bad edge cases.) 56 | - `Debug` can't be derived. 57 | 58 | **Option 3** (known type) would mean collecting the iterator into a `Vec` when the parser is constructed and then turning that into an iterator. 59 | 60 | - The biggest benefit is that `vec::IntoIter` is a well-behaved type and everything becomes easy. It's `Sync` and `Send` and `Clone` and `Debug` and `Debug` even shows the raw arguments. 61 | - We get unlimited lookahead through `vec::IntoIter::as_slice()`. 62 | - `FromIterator` can be implemented. 63 | 64 | There are also drawbacks: 65 | 66 | - It's likely to be less efficient. But not disastrously so: `args_os()` allocates a brand-new `Vec` full of brand-new `OsString`s (each with their own allocation) before returning, and we only duplicate the `Vec` allocation. 67 | - Iterators can't be infinite or otherwise avoid loading all arguments into memory at once. 68 | - You can't use [clever tricks](https://gist.github.com/blyxxyz/06b45c82c4a4f1030a89e0289adebf09) to observe which argument is being processed. 69 | - `as_slice()` might provide an alternative, but if this is to be a proper API it has to be designed carefully. 70 | 71 | # Configuration 72 | lexopt isn't configurable right now but maybe it should be. 73 | 74 | There are requests to **a)** [disable `=` for short options](https://github.com/blyxxyz/lexopt/issues/13) and **b)** [make `.value()` ignore arguments that look like options](https://github.com/blyxxyz/lexopt/issues/14). 75 | 76 | Especially b) is context-sensitive. An option might need to take negative numbers as values, or arbitrary filenames. That means you might want to switch the option on/off just for the duration of parsing a single option. That rules out the builder pattern of `cfg(self) -> Self`, but not `cfg(&mut self) -> &mut Self`. 77 | 78 | a) might also be context-sensitive if you e.g. want to allow it just for a particular option for backward compatibility. But this seems less likely. 79 | 80 | A footgun of `cfg(&mut self)` is that you have to remember to revert the configuration once you're done. 81 | 82 | Other possible APIs: 83 | - `parser.value_disallow_dash()` 84 | - `parser.value_cfg(Config { allow_dash: false })` 85 | - `parser.allow_dash(false).value()` (with `allow_dash() -> SomeWrapper`) 86 | 87 | I'm not really happy with any of these. 88 | 89 | # Problems in other libraries 90 | These are all defensible design choices, they're just a bad fit for some of the programs I want to write. All of them make some other kind of program easier to write. 91 | 92 | ## pico-args 93 | - Results can be erratic in edge cases: option arguments may be interpreted as options, the order in which you request options matters, and arguments may get treated as if they're next to each other if the arguments inbetween get parsed first. 94 | - `--` as a separator is not built in. 95 | - Arguments that are not valid unicode are not recognized as options, even if they start with a dash. 96 | - Left-over arguments are ignored by default. I prefer when the path of least resistance is strict. 97 | - It uses `Vec::remove`, so it's potentially slow if you pass many thousands of options. (This is a bit academic, there's no problem for realistic workloads.) 98 | 99 | These make the library simpler and smaller, which is the whole point. 100 | 101 | ## clap/structopt 102 | - structopt nudges the user toward needlessly panicking on invalid unicode: even if a field has type `OsString` or `PathBuf` it'll round-trip through a unicode string and panic unless `from_os_str` is used. (I don't know if this is fixable even in theory while keeping the API ergonomic.) 103 | - Invalid unicode can cause a panic instead of a soft error. 104 | - Options with a variable number of arguments are supported, even though they're ambiguous. In structopt you need to take care not to enable this if you want an option that can occur multiple times with a single argument each time. 105 | - They're large, both in API surface and in code size. 106 | 107 | That said, it's still my first choice for complicated interfaces. 108 | 109 | (I don't know how much of this applies to clap v3+ and clap-derive.) 110 | 111 | # Minimum Supported Rust Version 112 | The current MSRV is 1.31, the first release of the 2018 edition. 113 | 114 | The blocker for moving it even earlier is non-lexical lifetimes, there's some code that won't compile without it. 115 | 116 | The `Value(arg) if foo.is_none() =>` pattern doesn't actually work until 1.39 ([`bind_by_move_pattern_guards`](https://github.com/rust-lang/rust/pull/63118)), so not all of the examples compile on the MSRV. (And one of them uses `str::strip_prefix`, which requires at least 1.45.) 117 | 118 | Even Debian oldstable packages Rust 1.41 as of writing, so it's okay to relax that if there's a reason to. 119 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2021 Jan Verbeek 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Lexopt 2 | 3 | [![Crates.io](https://img.shields.io/crates/v/lexopt.svg)](https://crates.io/crates/lexopt) 4 | [![API reference](https://docs.rs/lexopt/badge.svg)](https://docs.rs/lexopt/) 5 | [![MSRV](https://img.shields.io/badge/MSRV-1.31-blue)](https://blog.rust-lang.org/2018/12/06/Rust-1.31-and-rust-2018.html) 6 | [![CI](https://img.shields.io/github/actions/workflow/status/blyxxyz/lexopt/ci.yaml?branch=master)](https://github.com/blyxxyz/lexopt/actions) 7 | 8 | Lexopt is an argument parser for Rust. It tries to have the simplest possible design that's still correct. It's so simple that it's a bit tedious to use. 9 | 10 | Lexopt is: 11 | - Small: one file, no dependencies, no macros. Easy to audit or vendor. 12 | - Correct: standard conventions are supported and ambiguity is avoided. Tested and fuzzed. 13 | - Pedantic: arguments are returned as [`OsString`](https://doc.rust-lang.org/std/ffi/struct.OsString.html)s, forcing you to convert them explicitly. This lets you handle badly-encoded filenames. 14 | - Imperative: options are returned as they are found, nothing is declared ahead of time. 15 | - Minimalist: only basic functionality is provided. 16 | - Unhelpful: there is no help generation and error messages often lack context. 17 | 18 | ## Example 19 | ```rust 20 | struct Args { 21 | thing: String, 22 | number: u32, 23 | shout: bool, 24 | } 25 | 26 | fn parse_args() -> Result { 27 | use lexopt::prelude::*; 28 | 29 | let mut thing = None; 30 | let mut number = 1; 31 | let mut shout = false; 32 | let mut parser = lexopt::Parser::from_env(); 33 | while let Some(arg) = parser.next()? { 34 | match arg { 35 | Short('n') | Long("number") => { 36 | number = parser.value()?.parse()?; 37 | } 38 | Long("shout") => { 39 | shout = true; 40 | } 41 | Value(val) if thing.is_none() => { 42 | thing = Some(val.string()?); 43 | } 44 | Long("help") => { 45 | println!("Usage: hello [-n|--number=NUM] [--shout] THING"); 46 | std::process::exit(0); 47 | } 48 | _ => return Err(arg.unexpected()), 49 | } 50 | } 51 | 52 | Ok(Args { 53 | thing: thing.ok_or("missing argument THING")?, 54 | number, 55 | shout, 56 | }) 57 | } 58 | 59 | fn main() -> Result<(), lexopt::Error> { 60 | let args = parse_args()?; 61 | let mut message = format!("Hello {}", args.thing); 62 | if args.shout { 63 | message = message.to_uppercase(); 64 | } 65 | for _ in 0..args.number { 66 | println!("{}", message); 67 | } 68 | Ok(()) 69 | } 70 | ``` 71 | 72 | Let's walk through this: 73 | - We start parsing with `Parser::from_env()`. 74 | - We call `parser.next()` in a loop to get all the arguments until they run out. 75 | - We match on arguments. `Short` and `Long` indicate an option. 76 | - To get the value that belongs to an option (like `10` in `-n 10`) we call `parser.value()`. 77 | - This returns a standard [`OsString`](https://doc.rust-lang.org/std/ffi/struct.OsString.html). 78 | - For convenience, `use lexopt::prelude::*` adds a `.parse()` method, analogous to [the one on `&str`](https://doc.rust-lang.org/std/primitive.str.html#method.parse). 79 | - Calling `parser.value()` is how we tell `Parser` that `-n` takes a value at all. 80 | - `Value` indicates a free-standing argument. 81 | - `if thing.is_none()` is a useful pattern for positional arguments. If we already found `thing` we pass it on to another case. 82 | - It also contains an `OsString`. 83 | - The `.string()` method decodes it into a plain `String`. 84 | - If we don't know what to do with an argument we use `return Err(arg.unexpected())` to turn it into an error message. 85 | - Strings can be promoted to errors for custom error messages. 86 | 87 | This covers most of the functionality in the library. Lexopt does very little for you. 88 | 89 | For a larger example with useful patterns, see [`examples/cargo.rs`](examples/cargo.rs). 90 | 91 | ## Command line syntax 92 | The following conventions are supported: 93 | - Short options (`-q`) 94 | - Long options (`--verbose`) 95 | - `--` to mark the end of options 96 | - `=` to separate options from values (`--option=value`, `-o=value`) 97 | - Spaces to separate options from values (`--option value`, `-o value`) 98 | - Unseparated short options (`-ovalue`) 99 | - Combined short options (`-abc` to mean `-a -b -c`) 100 | - Options with optional arguments (like GNU sed's `-i`, which can be used standalone or as `-iSUFFIX`) ([`Parser::optional_value()`](https://docs.rs/lexopt/latest/lexopt/struct.Parser.html#method.optional_value)) 101 | - Options with multiple arguments ([`Parser::values()`](https://docs.rs/lexopt/latest/lexopt/struct.Parser.html#method.values)) 102 | 103 | These are not supported out of the box: 104 | - Single-dash long options (like find's `-name`) 105 | - Abbreviated long options (GNU's getopt lets you write `--num` instead of `--number` if it can be expanded unambiguously) 106 | 107 | [`Parser::raw_args()`](https://docs.rs/lexopt/latest/lexopt/struct.Parser.html#method.raw_args) and [`Parser::try_raw_args()`](https://docs.rs/lexopt/latest/lexopt/struct.Parser.html#method.try_raw_args) provide an escape hatch for consuming the original command line. This can be used for custom syntax, like treating `-123` as a number instead of a string of options. See [`examples/nonstandard.rs`](examples/nonstandard.rs) for an example of this. 108 | 109 | ## Unicode 110 | This library supports unicode while tolerating non-unicode arguments. 111 | 112 | Short options may be unicode, but only a single codepoint (a `char`). 113 | 114 | Options can be combined with non-unicode arguments. That is, `--option=���` will not cause an error or mangle the value. 115 | 116 | Options themselves are patched as by [`String::from_utf8_lossy`](https://doc.rust-lang.org/std/string/struct.String.html#method.from_utf8_lossy) if they're not valid unicode. That typically means you'll raise an error later when they're not recognized. 117 | 118 | ## Why? 119 | For a particular application I was looking for a small parser that's pedantically correct. There are other compact argument parsing libraries, but I couldn't find one that handled `OsString`s and implemented all the fiddly details of the argument syntax faithfully. 120 | 121 | This library may also be useful if a lot of control is desired, like when the exact argument order matters or not all options are known ahead of time. It could be considered more of a lexer than a parser. 122 | 123 | ## Why not? 124 | This library may not be worth using if: 125 | - You don't care about non-unicode arguments 126 | - You don't care about exact compliance and correctness 127 | - You don't care about code size 128 | - You do care about great error messages 129 | - You hate boilerplate 130 | 131 | ## See also 132 | - [Collected benchmarks of argument parsing crates](https://github.com/rosetta-rs/argparse-rosetta-rs). 133 | - libc's [`getopt`](https://en.wikipedia.org/wiki/Getopt#Examples). 134 | - Plan 9's [*arg(3)* macros](https://9fans.github.io/plan9port/man/man3/arg.html). 135 | -------------------------------------------------------------------------------- /clippy.toml: -------------------------------------------------------------------------------- 1 | msrv = "1.31.0" 2 | -------------------------------------------------------------------------------- /examples/cargo.rs: -------------------------------------------------------------------------------- 1 | //! A very partial unfaithful implementation of cargo's command line. 2 | //! 3 | //! This showcases some hairier patterns, like subcommands and custom value parsing. 4 | 5 | use std::{path::PathBuf, str::FromStr}; 6 | 7 | const HELP: &str = "cargo [+toolchain] [OPTIONS] [SUBCOMMAND]"; 8 | 9 | fn main() -> Result<(), lexopt::Error> { 10 | use lexopt::prelude::*; 11 | 12 | let mut settings = GlobalSettings { 13 | toolchain: "stable".to_owned(), 14 | color: Color::Auto, 15 | offline: false, 16 | quiet: false, 17 | verbose: false, 18 | }; 19 | 20 | let mut parser = lexopt::Parser::from_env(); 21 | while let Some(arg) = parser.next()? { 22 | match arg { 23 | Long("color") => { 24 | settings.color = parser.value()?.parse()?; 25 | } 26 | Long("offline") => { 27 | settings.offline = true; 28 | } 29 | Long("quiet") => { 30 | settings.quiet = true; 31 | settings.verbose = false; 32 | } 33 | Long("verbose") => { 34 | settings.verbose = true; 35 | settings.quiet = false; 36 | } 37 | Long("help") => { 38 | println!("{}", HELP); 39 | std::process::exit(0); 40 | } 41 | Value(value) => { 42 | let value = value.string()?; 43 | match value.as_str() { 44 | value if value.starts_with('+') => { 45 | settings.toolchain = value[1..].to_owned(); 46 | } 47 | "install" => { 48 | return install(settings, parser); 49 | } 50 | value => { 51 | return Err(format!("unknown subcommand '{}'", value).into()); 52 | } 53 | } 54 | } 55 | _ => return Err(arg.unexpected()), 56 | } 57 | } 58 | 59 | println!("{}", HELP); 60 | Ok(()) 61 | } 62 | 63 | #[derive(Debug)] 64 | struct GlobalSettings { 65 | toolchain: String, 66 | color: Color, 67 | offline: bool, 68 | quiet: bool, 69 | verbose: bool, 70 | } 71 | 72 | fn install(settings: GlobalSettings, mut parser: lexopt::Parser) -> Result<(), lexopt::Error> { 73 | use lexopt::prelude::*; 74 | 75 | let mut package: Option = None; 76 | let mut root: Option = None; 77 | let mut jobs: u16 = get_no_of_cpus(); 78 | 79 | while let Some(arg) = parser.next()? { 80 | match arg { 81 | Value(value) if package.is_none() => { 82 | package = Some(value.string()?); 83 | } 84 | Long("root") => { 85 | root = Some(parser.value()?.into()); 86 | } 87 | Short('j') | Long("jobs") => { 88 | jobs = parser.value()?.parse()?; 89 | } 90 | Long("help") => { 91 | println!("cargo install [OPTIONS] CRATE"); 92 | std::process::exit(0); 93 | } 94 | _ => return Err(arg.unexpected()), 95 | } 96 | } 97 | 98 | println!("Settings: {:#?}", settings); 99 | println!( 100 | "Installing {} into {:?} with {} jobs", 101 | package.ok_or("missing CRATE argument")?, 102 | root, 103 | jobs 104 | ); 105 | 106 | Ok(()) 107 | } 108 | 109 | #[derive(Debug)] 110 | enum Color { 111 | Auto, 112 | Always, 113 | Never, 114 | } 115 | 116 | // clap has a macro for this: https://docs.rs/clap/2.33.3/clap/macro.arg_enum.html 117 | // We have to do it manually. 118 | impl FromStr for Color { 119 | type Err = String; 120 | 121 | fn from_str(s: &str) -> Result { 122 | match s.to_lowercase().as_str() { 123 | "auto" => Ok(Color::Auto), 124 | "always" => Ok(Color::Always), 125 | "never" => Ok(Color::Never), 126 | _ => Err(format!( 127 | "Invalid style '{}' [pick from: auto, always, never]", 128 | s 129 | )), 130 | } 131 | } 132 | } 133 | 134 | fn get_no_of_cpus() -> u16 { 135 | 4 136 | } 137 | -------------------------------------------------------------------------------- /examples/hello.rs: -------------------------------------------------------------------------------- 1 | struct Args { 2 | thing: String, 3 | number: u32, 4 | shout: bool, 5 | } 6 | 7 | fn parse_args() -> Result { 8 | use lexopt::prelude::*; 9 | 10 | let mut thing = None; 11 | let mut number = 1; 12 | let mut shout = false; 13 | let mut parser = lexopt::Parser::from_env(); 14 | while let Some(arg) = parser.next()? { 15 | match arg { 16 | Short('n') | Long("number") => { 17 | number = parser.value()?.parse()?; 18 | } 19 | Long("shout") => { 20 | shout = true; 21 | } 22 | Value(val) if thing.is_none() => { 23 | thing = Some(val.string()?); 24 | } 25 | Long("help") => { 26 | println!("Usage: hello [-n|--number=NUM] [--shout] THING"); 27 | std::process::exit(0); 28 | } 29 | _ => return Err(arg.unexpected()), 30 | } 31 | } 32 | 33 | Ok(Args { 34 | thing: thing.ok_or("missing argument THING")?, 35 | number, 36 | shout, 37 | }) 38 | } 39 | 40 | fn main() -> Result<(), lexopt::Error> { 41 | let args = parse_args()?; 42 | let mut message = format!("Hello {}", args.thing); 43 | if args.shout { 44 | message = message.to_uppercase(); 45 | } 46 | for _ in 0..args.number { 47 | println!("{}", message); 48 | } 49 | Ok(()) 50 | } 51 | -------------------------------------------------------------------------------- /examples/nonstandard.rs: -------------------------------------------------------------------------------- 1 | //! Some programs accept options with an unusual syntax. For example, tail 2 | //! accepts `-13` as an alias for `-n 13`. 3 | //! 4 | //! This program shows how to use `Parser::try_raw_args()` to handle them 5 | //! manually. 6 | //! 7 | //! (Note: actual tail implementations handle it slightly differently! This 8 | //! is just an example.) 9 | 10 | use std::path::PathBuf; 11 | 12 | fn parse_dashnum(parser: &mut lexopt::Parser) -> Option { 13 | let mut raw = parser.try_raw_args()?; 14 | let arg = raw.peek()?.to_str()?; 15 | let num = arg.strip_prefix('-')?.parse::().ok()?; 16 | raw.next(); // Consume the argument we just parsed 17 | Some(num) 18 | } 19 | 20 | fn main() -> Result<(), lexopt::Error> { 21 | use lexopt::prelude::*; 22 | 23 | let mut parser = lexopt::Parser::from_env(); 24 | loop { 25 | if let Some(num) = parse_dashnum(&mut parser) { 26 | println!("Got number {}", num); 27 | } else if let Some(arg) = parser.next()? { 28 | match arg { 29 | Short('f') | Long("follow") => { 30 | println!("Got --follow"); 31 | } 32 | Short('n') | Long("number") => { 33 | let num: u64 = parser.value()?.parse()?; 34 | println!("Got number {}", num); 35 | } 36 | Value(path) => { 37 | let path = PathBuf::from(path); 38 | println!("Got file {}", path.display()); 39 | } 40 | _ => return Err(arg.unexpected()), 41 | } 42 | } else { 43 | break; 44 | } 45 | } 46 | 47 | Ok(()) 48 | } 49 | -------------------------------------------------------------------------------- /examples/pico_test_app.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | //! pico-args implements this program for a number of argument parsers: 3 | //! https://github.com/RazrFalcon/pico-args/tree/master/test-apps 4 | //! 5 | //! It's a nice reference point. 6 | //! 7 | //! I release this implementation into the public domain. Feel free to copy. 8 | 9 | const HELP: &str = "\ 10 | USAGE: app [OPTIONS] --number NUMBER INPUT 11 | 12 | OPTIONS: 13 | --number NUMBER Set a number (required) 14 | --opt-number NUMBER Set an optional number 15 | --width WIDTH Set a width (non-zero, default 10) 16 | 17 | ARGS: 18 | Input file 19 | "; 20 | 21 | #[derive(Debug)] 22 | struct AppArgs { 23 | number: u32, 24 | opt_number: Option, 25 | width: u32, 26 | input: std::path::PathBuf, 27 | } 28 | 29 | fn parse_width(s: &str) -> Result { 30 | let w = s.parse().map_err(|_| "not a number")?; 31 | if w != 0 { 32 | Ok(w) 33 | } else { 34 | Err("width must be positive".to_string()) 35 | } 36 | } 37 | 38 | fn main() { 39 | let args = match parse_args() { 40 | Ok(args) => args, 41 | Err(err) => { 42 | eprintln!("Error: {}.", err); 43 | std::process::exit(1); 44 | } 45 | }; 46 | println!("{:#?}", args); 47 | } 48 | 49 | fn parse_args() -> Result { 50 | use lexopt::prelude::*; 51 | 52 | let mut number = None; 53 | let mut opt_number = None; 54 | let mut width = 10; 55 | let mut input = None; 56 | 57 | let mut parser = lexopt::Parser::from_env(); 58 | while let Some(arg) = parser.next()? { 59 | match arg { 60 | Short('h') | Long("help") => { 61 | print!("{}", HELP); 62 | std::process::exit(0); 63 | } 64 | Long("number") => number = Some(parser.value()?.parse()?), 65 | Long("opt-number") => opt_number = Some(parser.value()?.parse()?), 66 | Long("width") => width = parser.value()?.parse_with(parse_width)?, 67 | Value(path) if input.is_none() => input = Some(path.into()), 68 | _ => return Err(arg.unexpected()), 69 | } 70 | } 71 | Ok(AppArgs { 72 | number: number.ok_or("missing required option --number")?, 73 | opt_number, 74 | width, 75 | input: input.ok_or("missing required argument INPUT")?, 76 | }) 77 | } 78 | -------------------------------------------------------------------------------- /examples/posixly_correct.rs: -------------------------------------------------------------------------------- 1 | //! POSIX [recommends](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap12.html#tag_12_02) 2 | //! that no more options are parsed after the first positional argument. 3 | //! The other arguments are then all treated as positional arguments. 4 | //! 5 | //! lexopt can be used like this. After seeing the first positional argument 6 | //! (`Arg::Value`), call `Parser::raw_args`. 7 | //! 8 | //! The most logical thing to then do is often to collect the values 9 | //! into a `Vec`. This is shown below. 10 | //! 11 | //! Note that most modern software doesn't follow POSIX's rule and allows 12 | //! options anywhere (as long as they come before "--"). 13 | 14 | fn main() -> Result<(), lexopt::Error> { 15 | use lexopt::prelude::*; 16 | 17 | let mut parser = lexopt::Parser::from_env(); 18 | let mut free = Vec::new(); 19 | while let Some(arg) = parser.next()? { 20 | match arg { 21 | Short('n') | Long("number") => { 22 | let num: u16 = parser.value()?.parse()?; 23 | println!("Got number {}", num); 24 | } 25 | Long("shout") => { 26 | println!("Got --shout"); 27 | } 28 | Value(val) => { 29 | free.push(val); 30 | free.extend(parser.raw_args()?); 31 | } 32 | _ => return Err(arg.unexpected()), 33 | } 34 | } 35 | println!("Got free args {:?}", free); 36 | Ok(()) 37 | } 38 | -------------------------------------------------------------------------------- /fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target 3 | corpus 4 | artifacts 5 | -------------------------------------------------------------------------------- /fuzz/Cargo.toml: -------------------------------------------------------------------------------- 1 | 2 | [package] 3 | name = "lexopt-fuzz" 4 | version = "0.0.0" 5 | authors = ["Automatically generated"] 6 | publish = false 7 | edition = "2018" 8 | 9 | [package.metadata] 10 | cargo-fuzz = true 11 | 12 | [dependencies] 13 | libfuzzer-sys = "0.4" 14 | 15 | [dependencies.lexopt] 16 | path = ".." 17 | 18 | # Prevent this from interfering with workspaces 19 | [workspace] 20 | members = ["."] 21 | 22 | [[bin]] 23 | name = "fuzz_target_1" 24 | path = "fuzz_targets/fuzz_target_1.rs" 25 | test = false 26 | doc = false 27 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/fuzz_target_1.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | use libfuzzer_sys::fuzz_target; 3 | use std::convert::TryInto; 4 | use std::ffi::OsString; 5 | use std::os::unix::ffi::{OsStrExt, OsStringExt}; 6 | 7 | // We check some basic invariants but mainly make sure that it 8 | // doesn't panic or hang. 9 | fuzz_target!(|data: &[u8]| { 10 | let mut data = data; 11 | let mut decisions; 12 | if data.len() > 8 { 13 | // Decide which method to call, 64 / 4 = 16 times 14 | decisions = u64::from_le_bytes(data[..8].try_into().unwrap()); 15 | data = &data[8..]; 16 | } else { 17 | decisions = 0; 18 | } 19 | let data: Vec<_> = data 20 | // Arguments can't contain null bytes (on Unix) so it's a 21 | // reasonable separator 22 | .split(|&x| x == b'\0') 23 | .map(Into::into) 24 | .map(OsString::from_vec) 25 | .collect(); 26 | let mut p = lexopt::Parser::from_args(data); 27 | loop { 28 | // 0 -> Parser::next() 29 | // 1 -> Parser::value() 30 | // 2 -> Parser::values() 31 | // 3 -> Parser::optional_value() 32 | match decisions % 4 { 33 | 0 => match p.next() { 34 | Err(_) => assert_finished_arg(&mut p), 35 | Ok(Some(_)) => (), 36 | Ok(None) => break, 37 | }, 38 | 1 => match p.value() { 39 | Ok(_) => assert_finished_arg(&mut p), 40 | Err(_) => break, 41 | }, 42 | 2 => match p.values() { 43 | Ok(iter) => { 44 | assert!(iter.count() > 0); 45 | assert_finished_arg(&mut p); 46 | } 47 | Err(_) => { 48 | // Either the command line ran out, or the next argument is an option 49 | if let Some(next) = p.try_raw_args().unwrap().as_slice().first() { 50 | let arg = next.as_bytes(); 51 | assert!(arg.starts_with(b"-")); 52 | assert_ne!(arg, b"-"); 53 | } 54 | } 55 | }, 56 | 3 => { 57 | let could_get_raw = p.try_raw_args().is_some(); 58 | let had_optional = p.optional_value().is_some(); 59 | assert_ne!(could_get_raw, had_optional); 60 | assert_finished_arg(&mut p); 61 | } 62 | _ => unreachable!(), 63 | } 64 | decisions /= 4; 65 | // This should be safe to call all the time 66 | let _ = p.try_raw_args(); 67 | } 68 | assert_eq!(p.try_raw_args().unwrap().as_slice().len(), 0); 69 | assert!(matches!(p.next(), Ok(None))); 70 | assert!(matches!(p.next(), Ok(None))); 71 | assert!(matches!(p.next(), Ok(None))); 72 | }); 73 | 74 | fn assert_finished_arg(parser: &mut lexopt::Parser) { 75 | assert!(parser.try_raw_args().is_some()); 76 | // These methods can mutate Parser so we maybe shouldn't call them here 77 | // in case they happen to repair the state. 78 | // assert!(parser.raw_args().is_ok()); 79 | // assert!(parser.optional_value().is_none()); 80 | } 81 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! A pathologically simple command line argument parser. 2 | //! 3 | //! Most argument parsers are declarative: you tell them what to parse, 4 | //! and they do it. 5 | //! 6 | //! This one provides you with a stream of options and values and lets you 7 | //! figure out the rest. 8 | //! 9 | //! ## Example 10 | //! ```no_run 11 | //! struct Args { 12 | //! thing: String, 13 | //! number: u32, 14 | //! shout: bool, 15 | //! } 16 | //! 17 | //! fn parse_args() -> Result { 18 | //! use lexopt::prelude::*; 19 | //! 20 | //! let mut thing = None; 21 | //! let mut number = 1; 22 | //! let mut shout = false; 23 | //! let mut parser = lexopt::Parser::from_env(); 24 | //! while let Some(arg) = parser.next()? { 25 | //! match arg { 26 | //! Short('n') | Long("number") => { 27 | //! number = parser.value()?.parse()?; 28 | //! } 29 | //! Long("shout") => { 30 | //! shout = true; 31 | //! } 32 | //! Value(val) if thing.is_none() => { 33 | //! thing = Some(val.string()?); 34 | //! } 35 | //! Long("help") => { 36 | //! println!("Usage: hello [-n|--number=NUM] [--shout] THING"); 37 | //! std::process::exit(0); 38 | //! } 39 | //! _ => return Err(arg.unexpected()), 40 | //! } 41 | //! } 42 | //! 43 | //! Ok(Args { 44 | //! thing: thing.ok_or("missing argument THING")?, 45 | //! number, 46 | //! shout, 47 | //! }) 48 | //! } 49 | //! 50 | //! fn main() -> Result<(), lexopt::Error> { 51 | //! let args = parse_args()?; 52 | //! let mut message = format!("Hello {}", args.thing); 53 | //! if args.shout { 54 | //! message = message.to_uppercase(); 55 | //! } 56 | //! for _ in 0..args.number { 57 | //! println!("{}", message); 58 | //! } 59 | //! Ok(()) 60 | //! } 61 | //! ``` 62 | //! Let's walk through this: 63 | //! - We start parsing with [`Parser::from_env`]. 64 | //! - We call [`parser.next()`][Parser::next] in a loop to get all the arguments until they run out. 65 | //! - We match on arguments. [`Short`][Arg::Short] and [`Long`][Arg::Long] indicate an option. 66 | //! - To get the value that belongs to an option (like `10` in `-n 10`) we call [`parser.value()`][Parser::value]. 67 | //! - This returns a standard [`OsString`]. 68 | //! - For convenience, [`use lexopt::prelude::*`][prelude] adds a [`.parse()`][ValueExt::parse] method, analogous to [`str::parse`]. 69 | //! - Calling `parser.value()` is how we tell `Parser` that `-n` takes a value at all. 70 | //! - `Value` indicates a free-standing argument. 71 | //! - `if thing.is_none()` is a useful pattern for positional arguments. If we already found `thing` we pass it on to another case. 72 | //! - It also contains an `OsString`. 73 | //! - The [`.string()`][ValueExt::string] method decodes it into a plain `String`. 74 | //! - If we don't know what to do with an argument we use [`return Err(arg.unexpected())`][Arg::unexpected] to turn it into an error message. 75 | //! - Strings can be promoted to errors for custom error messages. 76 | 77 | #![forbid(unsafe_code)] 78 | #![warn(missing_docs, missing_debug_implementations, elided_lifetimes_in_paths)] 79 | #![allow(clippy::should_implement_trait)] 80 | 81 | use std::{ 82 | ffi::{OsStr, OsString}, 83 | fmt::Display, 84 | mem::replace, 85 | str::{FromStr, Utf8Error}, 86 | }; 87 | 88 | #[cfg(unix)] 89 | use std::os::unix::ffi::{OsStrExt, OsStringExt}; 90 | #[cfg(windows)] 91 | use std::os::windows::ffi::{OsStrExt, OsStringExt}; 92 | 93 | type InnerIter = std::vec::IntoIter; 94 | 95 | fn make_iter(iter: impl Iterator) -> InnerIter { 96 | iter.collect::>().into_iter() 97 | } 98 | 99 | /// A parser for command line arguments. 100 | #[derive(Debug, Clone)] 101 | pub struct Parser { 102 | source: InnerIter, 103 | state: State, 104 | /// The last option we emitted. 105 | last_option: LastOption, 106 | /// The name of the command (argv\[0\]). 107 | bin_name: Option, 108 | } 109 | 110 | #[derive(Debug, Clone)] 111 | enum State { 112 | /// Nothing interesting is going on. 113 | None, 114 | /// We have a value left over from --option=value. 115 | PendingValue(OsString), 116 | /// We're in the middle of -abc. 117 | /// 118 | /// On Windows and other non-UTF8-OsString platforms this Vec should 119 | /// only ever contain valid UTF-8 (and could instead be a String). 120 | Shorts(Vec, usize), 121 | #[cfg(windows)] 122 | /// We're in the middle of -ab� on Windows (invalid UTF-16). 123 | ShortsU16(Vec, usize), 124 | /// We saw -- and know no more options are coming. 125 | FinishedOpts, 126 | } 127 | 128 | /// We use this to keep track of the last emitted option, for error messages when 129 | /// an expected value is not found. 130 | /// 131 | /// We also use this as storage for long options so we can hand out &str 132 | /// (because String doesn't support pattern matching). 133 | #[derive(Debug, Clone)] 134 | enum LastOption { 135 | None, 136 | Short(char), 137 | Long(String), 138 | } 139 | 140 | /// A command line argument found by [`Parser`], either an option or a positional argument. 141 | #[derive(Debug, Clone, PartialEq, Eq)] 142 | pub enum Arg<'a> { 143 | /// A short option, e.g. `Short('q')` for `-q`. 144 | Short(char), 145 | /// A long option, e.g. `Long("verbose")` for `--verbose`. (The dashes are not included.) 146 | Long(&'a str), 147 | /// A positional argument, e.g. `/dev/null`. 148 | Value(OsString), 149 | } 150 | 151 | impl Parser { 152 | /// Get the next option or positional argument. 153 | /// 154 | /// A return value of `Ok(None)` means the command line has been exhausted. 155 | /// 156 | /// Options that are not valid unicode are transformed with replacement 157 | /// characters as by [`String::from_utf8_lossy`]. 158 | /// 159 | /// # Errors 160 | /// 161 | /// [`Error::UnexpectedValue`] is returned if the last option had a 162 | /// value that hasn't been consumed, as in `--option=value` or `-o=value`. 163 | /// 164 | /// It's possible to continue parsing after an error (but this is rarely useful). 165 | pub fn next(&mut self) -> Result>, Error> { 166 | match self.state { 167 | State::PendingValue(ref mut value) => { 168 | // Last time we got `--long=value`, and `value` hasn't been used. 169 | let value = replace(value, OsString::new()); 170 | self.state = State::None; 171 | return Err(Error::UnexpectedValue { 172 | option: self 173 | .format_last_option() 174 | .expect("Should only have pending value after long option"), 175 | value, 176 | }); 177 | } 178 | State::Shorts(ref arg, ref mut pos) => { 179 | // We're somewhere inside a -abc chain. Because we're in .next(), 180 | // not .value(), we can assume that the next character is another option. 181 | match first_codepoint(&arg[*pos..]) { 182 | Ok(None) => { 183 | self.state = State::None; 184 | } 185 | // If we find "-=[...]" we interpret it as an option. 186 | // If we find "-o=..." then there's an unexpected value. 187 | // ('-=' as an option exists, see https://linux.die.net/man/1/a2ps.) 188 | // clap always interprets it as a short flag in this case, but 189 | // that feels sloppy. 190 | Ok(Some('=')) if *pos > 1 => { 191 | return Err(Error::UnexpectedValue { 192 | option: self.format_last_option().unwrap(), 193 | value: self.optional_value().unwrap(), 194 | }); 195 | } 196 | Ok(Some(ch)) => { 197 | *pos += ch.len_utf8(); 198 | self.last_option = LastOption::Short(ch); 199 | return Ok(Some(Arg::Short(ch))); 200 | } 201 | Err(err) => { 202 | // Advancing may allow recovery. 203 | // This is a little iffy, there might be more bad unicode next. 204 | match err.error_len() { 205 | Some(len) => *pos += len, 206 | None => *pos = arg.len(), 207 | } 208 | self.last_option = LastOption::Short('�'); 209 | return Ok(Some(Arg::Short('�'))); 210 | } 211 | } 212 | } 213 | #[cfg(windows)] 214 | State::ShortsU16(ref arg, ref mut pos) => match first_utf16_codepoint(&arg[*pos..]) { 215 | Ok(None) => { 216 | self.state = State::None; 217 | } 218 | Ok(Some('=')) if *pos > 1 => { 219 | return Err(Error::UnexpectedValue { 220 | option: self.format_last_option().unwrap(), 221 | value: self.optional_value().unwrap(), 222 | }); 223 | } 224 | Ok(Some(ch)) => { 225 | *pos += ch.len_utf16(); 226 | self.last_option = LastOption::Short(ch); 227 | return Ok(Some(Arg::Short(ch))); 228 | } 229 | Err(_) => { 230 | *pos += 1; 231 | self.last_option = LastOption::Short('�'); 232 | return Ok(Some(Arg::Short('�'))); 233 | } 234 | }, 235 | State::FinishedOpts => { 236 | return Ok(self.source.next().map(Arg::Value)); 237 | } 238 | State::None => (), 239 | } 240 | 241 | match self.state { 242 | State::None => (), 243 | ref state => panic!("unexpected state {:?}", state), 244 | } 245 | 246 | let arg = match self.source.next() { 247 | Some(arg) => arg, 248 | None => return Ok(None), 249 | }; 250 | 251 | if arg == "--" { 252 | self.state = State::FinishedOpts; 253 | return self.next(); 254 | } 255 | 256 | #[cfg(unix)] 257 | { 258 | // Fast solution for platforms where OsStrings are just UTF-8-ish bytes 259 | let mut arg = arg.into_vec(); 260 | if arg.starts_with(b"--") { 261 | // Long options have two forms: --option and --option=value. 262 | if let Some(ind) = arg.iter().position(|&b| b == b'=') { 263 | // The value can be an OsString... 264 | self.state = State::PendingValue(OsString::from_vec(arg[ind + 1..].into())); 265 | arg.truncate(ind); 266 | } 267 | // ...but the option has to be a string. 268 | // String::from_utf8_lossy().into_owned() would work, but its 269 | // return type is Cow: if the original was valid a borrowed 270 | // version is returned, and then into_owned() does an 271 | // unnecessary copy. 272 | // By trying String::from_utf8 first we avoid that copy if arg 273 | // is already UTF-8 (which is most of the time). 274 | // reqwest does a similar maneuver more efficiently with unsafe: 275 | // https://github.com/seanmonstar/reqwest/blob/e6a1a09f0904e06de4ff1317278798c4ed28af66/src/async_impl/response.rs#L194 276 | let option = match String::from_utf8(arg) { 277 | Ok(text) => text, 278 | Err(err) => String::from_utf8_lossy(err.as_bytes()).into_owned(), 279 | }; 280 | Ok(Some(self.set_long(option))) 281 | } else if arg.len() > 1 && arg[0] == b'-' { 282 | self.state = State::Shorts(arg, 1); 283 | self.next() 284 | } else { 285 | Ok(Some(Arg::Value(OsString::from_vec(arg)))) 286 | } 287 | } 288 | 289 | #[cfg(not(unix))] 290 | { 291 | // Platforms where looking inside an OsString is harder 292 | 293 | #[cfg(windows)] 294 | { 295 | // Fast path for Windows 296 | let mut bytes = arg.encode_wide(); 297 | const DASH: u16 = b'-' as u16; 298 | match (bytes.next(), bytes.next()) { 299 | (Some(DASH), Some(_)) => { 300 | // This is an option, we'll have to do more work. 301 | // (We already checked for "--" earlier.) 302 | } 303 | _ => { 304 | // Just a value, return early. 305 | return Ok(Some(Arg::Value(arg))); 306 | } 307 | } 308 | } 309 | 310 | let mut arg = match arg.into_string() { 311 | Ok(arg) => arg, 312 | Err(arg) => { 313 | // The argument is not valid unicode. 314 | // If it's an option we'll have to do something nasty, 315 | // otherwise we can return it as-is. 316 | 317 | #[cfg(windows)] 318 | { 319 | // On Windows we can only get here if this is an option, otherwise 320 | // we return earlier. 321 | // Unlike on Unix, we can't efficiently process invalid unicode. 322 | // Semantically it's UTF-16, but internally it's WTF-8 (a superset of UTF-8). 323 | // So we only process the raw version here, when we know we really have to. 324 | let mut arg: Vec = arg.encode_wide().collect(); 325 | const DASH: u16 = b'-' as u16; 326 | const EQ: u16 = b'=' as u16; 327 | if arg.starts_with(&[DASH, DASH]) { 328 | if let Some(ind) = arg.iter().position(|&u| u == EQ) { 329 | self.state = 330 | State::PendingValue(OsString::from_wide(&arg[ind + 1..])); 331 | arg.truncate(ind); 332 | } 333 | let long = self.set_long(String::from_utf16_lossy(&arg)); 334 | return Ok(Some(long)); 335 | } else { 336 | assert!(arg.len() > 1); 337 | assert_eq!(arg[0], DASH); 338 | self.state = State::ShortsU16(arg, 1); 339 | return self.next(); 340 | } 341 | }; 342 | 343 | #[cfg(not(windows))] 344 | { 345 | // Other platforms. 346 | 347 | // Most of them have their own `OsStrExt` but that doesn't mean 348 | // they actually have invalid strings. WASI for example exports 349 | // the Unix extension trait even though only UTF-8 OS strings 350 | // are allowed. 351 | // So on WASI this code path is only reachable by creating custom 352 | // faulty OS strings, not by processing true environment args. 353 | 354 | // This allocates unconditionally, sadly. 355 | let text = arg.to_string_lossy(); 356 | if text.starts_with('-') { 357 | // Use the lossily patched version and hope for the best. 358 | // This may be incorrect behavior. 359 | // Other options are returning an error or (as of Rust 1.74) 360 | // using the unsafe encoded_bytes API. But neither seem worth 361 | // it in this obscure corner, especially since they'd be hard 362 | // to test. 363 | // (The entire crate will most likely move to the encoded_bytes 364 | // API in the future, once it supports checked conversion.) 365 | // Please open an issue if this behavior affects you! 366 | text.into_owned() 367 | } else { 368 | // It didn't look like an option, so return it as a value. 369 | return Ok(Some(Arg::Value(arg))); 370 | } 371 | } 372 | } 373 | }; 374 | 375 | // The argument is valid unicode. This is the ideal version of the 376 | // code, the previous mess was purely to deal with invalid unicode. 377 | if arg.starts_with("--") { 378 | if let Some(ind) = arg.find('=') { 379 | self.state = State::PendingValue(arg[ind + 1..].into()); 380 | arg.truncate(ind); 381 | } 382 | Ok(Some(self.set_long(arg))) 383 | } else if arg.starts_with('-') && arg != "-" { 384 | self.state = State::Shorts(arg.into(), 1); 385 | self.next() 386 | } else { 387 | Ok(Some(Arg::Value(arg.into()))) 388 | } 389 | } 390 | } 391 | 392 | /// Get a value for an option. 393 | /// 394 | /// This function should normally be called right after seeing an option 395 | /// that expects a value, with positional arguments being collected 396 | /// using [`next()`][Parser::next]. 397 | /// 398 | /// A value is collected even if it looks like an option 399 | /// (i.e., starts with `-`). 400 | /// 401 | /// # Errors 402 | /// 403 | /// An [`Error::MissingValue`] is returned if the end of the command 404 | /// line is reached. 405 | pub fn value(&mut self) -> Result { 406 | if let Some(value) = self.optional_value() { 407 | return Ok(value); 408 | } 409 | 410 | if let Some(value) = self.source.next() { 411 | return Ok(value); 412 | } 413 | 414 | Err(Error::MissingValue { 415 | option: self.format_last_option(), 416 | }) 417 | } 418 | 419 | /// Gather multiple values for an option. 420 | /// 421 | /// This is used for options that take multiple arguments, such as a 422 | /// `--command` flag that's invoked as `app --command echo 'Hello world'`. 423 | /// 424 | /// It will gather arguments until another option is found, or `--` is found, or 425 | /// the end of the command line is reached. This differs from `.value()`, which 426 | /// takes a value even if it looks like an option. 427 | /// 428 | /// On success the resulting iterator will yield at least one value. 429 | /// 430 | /// An equals sign (`=`) will limit this to a single value. That means `-a=b c` and 431 | /// `--opt=b c` will only yield `"b"` while `-a b c`, `-ab c` and `--opt b c` will 432 | /// yield `"b"`, `"c"`. 433 | /// 434 | /// # Errors 435 | /// If not at least one value is found then [`Error::MissingValue`] is returned. 436 | /// 437 | /// # Example 438 | /// ``` 439 | /// # fn main() -> Result<(), lexopt::Error> { 440 | /// # use lexopt::prelude::*; 441 | /// # use std::ffi::OsString; 442 | /// # use std::path::PathBuf; 443 | /// # let mut parser = lexopt::Parser::from_args(&["a", "b", "-x", "one", "two", "three", "four"]); 444 | /// let arguments: Vec = parser.values()?.collect(); 445 | /// # assert_eq!(arguments, &["a", "b"]); 446 | /// # let _ = parser.next(); 447 | /// let at_most_three_files: Vec = parser.values()?.take(3).map(Into::into).collect(); 448 | /// # assert_eq!(parser.raw_args()?.as_slice(), &["four"]); 449 | /// for value in parser.values()? { 450 | /// // ... 451 | /// } 452 | /// # Ok(()) } 453 | /// ``` 454 | pub fn values(&mut self) -> Result, Error> { 455 | // This code is designed so that just calling .values() doesn't consume 456 | // any arguments as long as you don't use the iterator. It used to work 457 | // differently. 458 | // "--" is treated like an option and not consumed. This seems to me the 459 | // least unreasonable behavior, and it's the easiest to implement. 460 | if self.has_pending() || self.next_is_normal() { 461 | Ok(ValuesIter { 462 | took_first: false, 463 | parser: Some(self), 464 | }) 465 | } else { 466 | Err(Error::MissingValue { 467 | option: self.format_last_option(), 468 | }) 469 | } 470 | } 471 | 472 | /// Inspect an argument and consume it if it's "normal" (not an option or --). 473 | /// 474 | /// Used by [`Parser::values`]. 475 | /// 476 | /// This method should not be called while partway through processing an 477 | /// argument. 478 | fn next_if_normal(&mut self) -> Option { 479 | if self.next_is_normal() { 480 | self.source.next() 481 | } else { 482 | None 483 | } 484 | } 485 | 486 | /// Execute the check for next_if_normal(). 487 | fn next_is_normal(&self) -> bool { 488 | assert!(!self.has_pending()); 489 | let arg = match self.source.as_slice().first() { 490 | // There has to be a next argument. 491 | None => return false, 492 | Some(arg) => arg, 493 | }; 494 | if let State::FinishedOpts = self.state { 495 | // If we already found a -- then we're really not supposed to be here, 496 | // but we shouldn't treat the next argument as an option. 497 | return true; 498 | } 499 | if arg == "-" { 500 | // "-" is the one argument with a leading '-' that's allowed. 501 | return true; 502 | } 503 | #[cfg(unix)] 504 | let lead_dash = arg.as_bytes().first() == Some(&b'-'); 505 | #[cfg(windows)] 506 | let lead_dash = arg.encode_wide().next() == Some(b'-' as u16); 507 | #[cfg(not(any(unix, windows)))] 508 | let lead_dash = arg.to_string_lossy().as_bytes().first() == Some(&b'-'); 509 | 510 | !lead_dash 511 | } 512 | 513 | /// Take raw arguments from the original command line. 514 | /// 515 | /// This returns an iterator of [`OsString`]s. Any arguments that are not 516 | /// consumed are kept, so you can continue parsing after you're done with 517 | /// the iterator. 518 | /// 519 | /// To inspect an argument without consuming it, use [`RawArgs::peek`] or 520 | /// [`RawArgs::as_slice`]. 521 | /// 522 | /// # Errors 523 | /// 524 | /// Returns an [`Error::UnexpectedValue`] if the last option had a left-over 525 | /// argument, as in `--option=value`, `-ovalue`, or if it was midway through 526 | /// an option chain, as in `-abc`. The iterator only yields whole arguments. 527 | /// To avoid this, use [`try_raw_args()`][Parser::try_raw_args]. 528 | /// 529 | /// After this error the method is guaranteed to succeed, as it consumes the 530 | /// rest of the argument. 531 | /// 532 | /// # Example 533 | /// As soon as a free-standing argument is found, consume the other arguments 534 | /// as-is, and build them into a command. 535 | /// ``` 536 | /// # fn main() -> Result<(), lexopt::Error> { 537 | /// # use lexopt::prelude::*; 538 | /// # use std::ffi::OsString; 539 | /// # use std::path::PathBuf; 540 | /// # let mut parser = lexopt::Parser::from_args(&["-x", "echo", "-n", "'Hello, world'"]); 541 | /// # while let Some(arg) = parser.next()? { 542 | /// # match arg { 543 | /// Value(prog) => { 544 | /// let args: Vec<_> = parser.raw_args()?.collect(); 545 | /// let command = std::process::Command::new(prog).args(args); 546 | /// } 547 | /// # _ => (), }} Ok(()) } 548 | /// ``` 549 | pub fn raw_args(&mut self) -> Result, Error> { 550 | if let Some(value) = self.optional_value() { 551 | return Err(Error::UnexpectedValue { 552 | option: self.format_last_option().unwrap(), 553 | value, 554 | }); 555 | } 556 | 557 | Ok(RawArgs(&mut self.source)) 558 | } 559 | 560 | /// Take raw arguments from the original command line, *if* the current argument 561 | /// has finished processing. 562 | /// 563 | /// Unlike [`raw_args()`][Parser::raw_args] this does not consume any value 564 | /// in case of a left-over argument. This makes it safe to call at any time. 565 | /// 566 | /// It returns `None` exactly when [`optional_value()`][Parser::optional_value] 567 | /// would return `Some`. 568 | /// 569 | /// Note: If no arguments are left then it returns an empty iterator (not `None`). 570 | /// 571 | /// # Example 572 | /// Process arguments of the form `-123` as numbers. For a complete runnable version of 573 | /// this example, see 574 | /// [`examples/nonstandard.rs`](https://github.com/blyxxyz/lexopt/blob/e3754e6f24506afb42394602fc257b1ad9258d84/examples/nonstandard.rs). 575 | /// ``` 576 | /// # fn main() -> Result<(), lexopt::Error> { 577 | /// # use lexopt::prelude::*; 578 | /// # use std::ffi::OsString; 579 | /// # use std::path::PathBuf; 580 | /// # let mut parser = lexopt::Parser::from_iter(&["-13"]); 581 | /// fn parse_dashnum(parser: &mut lexopt::Parser) -> Option { 582 | /// let mut raw = parser.try_raw_args()?; 583 | /// let arg = raw.peek()?.to_str()?; 584 | /// let num = arg.strip_prefix('-')?.parse::().ok()?; 585 | /// raw.next(); // Consume the argument we just parsed 586 | /// Some(num) 587 | /// } 588 | /// 589 | /// loop { 590 | /// if let Some(num) = parse_dashnum(&mut parser) { 591 | /// println!("Got number {}", num); 592 | /// } else if let Some(arg) = parser.next()? { 593 | /// match arg { 594 | /// // ... 595 | /// # _ => (), 596 | /// } 597 | /// } else { 598 | /// break; 599 | /// } 600 | /// } 601 | /// # Ok(()) } 602 | /// ``` 603 | pub fn try_raw_args(&mut self) -> Option> { 604 | if self.has_pending() { 605 | None 606 | } else { 607 | Some(RawArgs(&mut self.source)) 608 | } 609 | } 610 | 611 | /// Check whether we're halfway through an argument, or in other words, 612 | /// if [`Parser::optional_value()`] would return `Some`. 613 | fn has_pending(&self) -> bool { 614 | match self.state { 615 | State::None | State::FinishedOpts => false, 616 | State::PendingValue(_) => true, 617 | State::Shorts(ref arg, pos) => pos < arg.len(), 618 | #[cfg(windows)] 619 | State::ShortsU16(ref arg, pos) => pos < arg.len(), 620 | } 621 | } 622 | 623 | #[inline(never)] 624 | fn format_last_option(&self) -> Option { 625 | match self.last_option { 626 | LastOption::None => None, 627 | LastOption::Short(ch) => Some(format!("-{}", ch)), 628 | LastOption::Long(ref option) => Some(option.clone()), 629 | } 630 | } 631 | 632 | /// The name of the command, as in the zeroth argument of the process. 633 | /// 634 | /// This is intended for use in messages. If the name is not valid unicode 635 | /// it will be sanitized with replacement characters as by 636 | /// [`String::from_utf8_lossy`]. 637 | /// 638 | /// To get the current executable, use [`std::env::current_exe`]. 639 | /// 640 | /// # Example 641 | /// ``` 642 | /// let mut parser = lexopt::Parser::from_env(); 643 | /// let bin_name = parser.bin_name().unwrap_or("myapp"); 644 | /// println!("{}: Some message", bin_name); 645 | /// ``` 646 | pub fn bin_name(&self) -> Option<&str> { 647 | Some(self.bin_name.as_ref()?) 648 | } 649 | 650 | /// Get a value only if it's concatenated to an option, as in `-ovalue` or 651 | /// `--option=value` or `-o=value`, but not `-o value` or `--option value`. 652 | pub fn optional_value(&mut self) -> Option { 653 | Some(self.raw_optional_value()?.0) 654 | } 655 | 656 | /// [`Parser::optional_value`], but indicate whether the value was joined 657 | /// with an = sign. This matters for [`Parser::values`]. 658 | fn raw_optional_value(&mut self) -> Option<(OsString, bool)> { 659 | match replace(&mut self.state, State::None) { 660 | State::PendingValue(value) => Some((value, true)), 661 | State::Shorts(mut arg, mut pos) => { 662 | if pos >= arg.len() { 663 | return None; 664 | } 665 | let mut had_eq_sign = false; 666 | if arg[pos] == b'=' { 667 | // -o=value. 668 | // clap actually strips out all leading '='s, but that seems silly. 669 | // We allow `-xo=value`. Python's argparse doesn't strip the = in that case. 670 | pos += 1; 671 | had_eq_sign = true; 672 | } 673 | arg.drain(..pos); // Reuse allocation 674 | #[cfg(unix)] 675 | { 676 | Some((OsString::from_vec(arg), had_eq_sign)) 677 | } 678 | #[cfg(not(unix))] 679 | { 680 | let arg = String::from_utf8(arg) 681 | .expect("short option args on exotic platforms must be unicode"); 682 | Some((arg.into(), had_eq_sign)) 683 | } 684 | } 685 | #[cfg(windows)] 686 | State::ShortsU16(arg, mut pos) => { 687 | if pos >= arg.len() { 688 | return None; 689 | } 690 | let mut had_eq_sign = false; 691 | if arg[pos] == b'=' as u16 { 692 | pos += 1; 693 | had_eq_sign = true; 694 | } 695 | Some((OsString::from_wide(&arg[pos..]), had_eq_sign)) 696 | } 697 | State::FinishedOpts => { 698 | // Not really supposed to be here, but it's benign and not our fault 699 | self.state = State::FinishedOpts; 700 | None 701 | } 702 | State::None => None, 703 | } 704 | } 705 | 706 | fn new(bin_name: Option, source: InnerIter) -> Parser { 707 | Parser { 708 | source, 709 | state: State::None, 710 | last_option: LastOption::None, 711 | bin_name: bin_name.map(|s| match s.into_string() { 712 | Ok(text) => text, 713 | Err(text) => text.to_string_lossy().into_owned(), 714 | }), 715 | } 716 | } 717 | 718 | /// Create a parser from the environment using [`std::env::args_os`]. 719 | /// 720 | /// This is the usual way to create a `Parser`. 721 | pub fn from_env() -> Parser { 722 | let mut source = make_iter(std::env::args_os()); 723 | Parser::new(source.next(), source) 724 | } 725 | 726 | // The collision with `FromIterator::from_iter` is a bit unfortunate. 727 | // This name is used because: 728 | // - `from_args()` was taken, and changing its behavior without changing 729 | // its signature would be evil. 730 | // - structopt also had a method by that name, so there's a precedent. 731 | // (clap_derive doesn't.) 732 | // - I couldn't think of a better one. 733 | // When this name was chosen `FromIterator` could not actually be implemented. 734 | // It can be implemented now, but I'm not sure there's a reason to. 735 | 736 | /// Create a parser from an iterator. This is useful for testing among other things. 737 | /// 738 | /// The first item from the iterator **must** be the binary name, as from [`std::env::args_os`]. 739 | /// 740 | /// The iterator is consumed immediately. 741 | /// 742 | /// # Example 743 | /// ``` 744 | /// let mut parser = lexopt::Parser::from_iter(&["myapp", "-n", "10", "./foo.bar"]); 745 | /// ``` 746 | pub fn from_iter(args: I) -> Parser 747 | where 748 | I: IntoIterator, 749 | I::Item: Into, 750 | { 751 | let mut args = make_iter(args.into_iter().map(Into::into)); 752 | Parser::new(args.next(), args) 753 | } 754 | 755 | /// Create a parser from an iterator that does **not** include the binary name. 756 | /// 757 | /// The iterator is consumed immediately. 758 | /// 759 | /// [`bin_name()`](`Parser::bin_name`) will return `None`. Consider using 760 | /// [`Parser::from_iter`] instead. 761 | pub fn from_args(args: I) -> Parser 762 | where 763 | I: IntoIterator, 764 | I::Item: Into, 765 | { 766 | Parser::new(None, make_iter(args.into_iter().map(Into::into))) 767 | } 768 | 769 | /// Store a long option so the caller can borrow it. 770 | fn set_long(&mut self, option: String) -> Arg<'_> { 771 | self.last_option = LastOption::Long(option); 772 | match self.last_option { 773 | LastOption::Long(ref option) => Arg::Long(&option[2..]), 774 | _ => unreachable!(), 775 | } 776 | } 777 | } 778 | 779 | impl Arg<'_> { 780 | /// Convert an unexpected argument into an error. 781 | pub fn unexpected(self) -> Error { 782 | match self { 783 | Arg::Short(short) => Error::UnexpectedOption(format!("-{}", short)), 784 | Arg::Long(long) => Error::UnexpectedOption(format!("--{}", long)), 785 | Arg::Value(value) => Error::UnexpectedArgument(value), 786 | } 787 | } 788 | } 789 | 790 | /// An iterator for multiple option-arguments, returned by [`Parser::values`]. 791 | /// 792 | /// It's guaranteed to yield at least one value. 793 | #[derive(Debug)] 794 | pub struct ValuesIter<'a> { 795 | took_first: bool, 796 | parser: Option<&'a mut Parser>, 797 | } 798 | 799 | impl Iterator for ValuesIter<'_> { 800 | type Item = OsString; 801 | 802 | fn next(&mut self) -> Option { 803 | let parser = self.parser.as_mut()?; 804 | if self.took_first { 805 | parser.next_if_normal() 806 | } else if let Some((value, had_eq_sign)) = parser.raw_optional_value() { 807 | if had_eq_sign { 808 | self.parser = None; 809 | } 810 | self.took_first = true; 811 | Some(value) 812 | } else { 813 | let value = parser 814 | .next_if_normal() 815 | .expect("ValuesIter must yield at least one value"); 816 | self.took_first = true; 817 | Some(value) 818 | } 819 | } 820 | } 821 | 822 | /// An iterator for the remaining raw arguments, returned by [`Parser::raw_args`]. 823 | #[derive(Debug)] 824 | pub struct RawArgs<'a>(&'a mut InnerIter); 825 | 826 | impl Iterator for RawArgs<'_> { 827 | type Item = OsString; 828 | 829 | fn next(&mut self) -> Option { 830 | self.0.next() 831 | } 832 | } 833 | 834 | impl RawArgs<'_> { 835 | /// Return a reference to the next() value without consuming it. 836 | /// 837 | /// An argument you peek but do not consume will still be seen by `Parser` 838 | /// if you resume parsing. 839 | /// 840 | /// See [`Iterator::peekable`], [`std::iter::Peekable::peek`]. 841 | pub fn peek(&self) -> Option<&OsStr> { 842 | Some(self.0.as_slice().first()?.as_os_str()) 843 | } 844 | 845 | /// Consume and return the next argument if a condition is true. 846 | /// 847 | /// See [`std::iter::Peekable::next_if`]. 848 | pub fn next_if(&mut self, func: impl FnOnce(&OsStr) -> bool) -> Option { 849 | match self.peek() { 850 | Some(arg) if func(arg) => self.next(), 851 | _ => None, 852 | } 853 | } 854 | 855 | /// Return the remaining arguments as a slice. 856 | pub fn as_slice(&self) -> &[OsString] { 857 | self.0.as_slice() 858 | } 859 | } 860 | 861 | // These would make sense: 862 | // - fn RawArgs::iter(&self) 863 | // - impl IntoIterator for &RawArgs 864 | // - impl AsRef<[OsString]> for RawArgs 865 | // But they're niche and constrain future design. 866 | // Let's leave them out for now. 867 | // (Open question: should iter() return std::slice::Iter and get 868 | // an optimized .nth() and so on for free, or should it return a novel type 869 | // that yields &OsStr?) 870 | 871 | /// An error during argument parsing. 872 | /// 873 | /// This implements `From` and `From<&str>`, for easy ad-hoc error 874 | /// messages. 875 | // 876 | // This is not #[non_exhaustive] because of the MSRV. I'm hoping no more 877 | // variants will turn out to be needed: this seems reasonable, if the scope 878 | // of the library doesn't change. Worst case scenario it can be stuffed inside 879 | // Error::Custom. 880 | pub enum Error { 881 | /// An option argument was expected but was not found. 882 | MissingValue { 883 | /// The most recently emitted option. 884 | option: Option, 885 | }, 886 | 887 | /// An unexpected option was found. 888 | UnexpectedOption(String), 889 | 890 | /// A positional argument was found when none was expected. 891 | UnexpectedArgument(OsString), 892 | 893 | /// An option had a value when none was expected. 894 | UnexpectedValue { 895 | /// The option. 896 | option: String, 897 | /// The value. 898 | value: OsString, 899 | }, 900 | 901 | /// Parsing a value failed. Returned by methods on [`ValueExt`]. 902 | ParsingFailed { 903 | /// The string that failed to parse. 904 | value: String, 905 | /// The error returned while parsing. 906 | error: Box, 907 | }, 908 | 909 | /// A value was found that was not valid unicode. 910 | /// 911 | /// This can be returned by the methods on [`ValueExt`]. 912 | NonUnicodeValue(OsString), 913 | 914 | /// For custom error messages in application code. 915 | Custom(Box), 916 | } 917 | 918 | impl Display for Error { 919 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 920 | use crate::Error::*; 921 | match self { 922 | MissingValue { option: None } => write!(f, "missing argument"), 923 | MissingValue { 924 | option: Some(option), 925 | } => { 926 | write!(f, "missing argument for option '{}'", option) 927 | } 928 | UnexpectedOption(option) => write!(f, "invalid option '{}'", option), 929 | UnexpectedArgument(value) => write!(f, "unexpected argument {:?}", value), 930 | UnexpectedValue { option, value } => { 931 | write!( 932 | f, 933 | "unexpected argument for option '{}': {:?}", 934 | option, value 935 | ) 936 | } 937 | NonUnicodeValue(value) => write!(f, "argument is invalid unicode: {:?}", value), 938 | ParsingFailed { value, error } => { 939 | write!(f, "cannot parse argument {:?}: {}", value, error) 940 | } 941 | Custom(err) => write!(f, "{}", err), 942 | } 943 | } 944 | } 945 | 946 | // This is printed when returning an error from main(), so defer to Display 947 | impl std::fmt::Debug for Error { 948 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 949 | Display::fmt(self, f) 950 | } 951 | } 952 | 953 | impl std::error::Error for Error { 954 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { 955 | match self { 956 | Error::ParsingFailed { error, .. } | Error::Custom(error) => Some(error.as_ref()), 957 | _ => None, 958 | } 959 | } 960 | } 961 | 962 | impl From for Error { 963 | fn from(msg: String) -> Self { 964 | Error::Custom(msg.into()) 965 | } 966 | } 967 | 968 | impl<'a> From<&'a str> for Error { 969 | fn from(msg: &'a str) -> Self { 970 | Error::Custom(msg.into()) 971 | } 972 | } 973 | 974 | /// For [`OsString::into_string`], so it may be used with the try (`?`) operator. 975 | /// 976 | /// [`ValueExt::string`] is the new preferred method because it's compatible with 977 | /// catch-all error types like `anyhow::Error`. 978 | impl From for Error { 979 | fn from(arg: OsString) -> Self { 980 | Error::NonUnicodeValue(arg) 981 | } 982 | } 983 | 984 | mod private { 985 | pub trait Sealed {} 986 | impl Sealed for std::ffi::OsString {} 987 | } 988 | 989 | /// An optional extension trait with methods for parsing [`OsString`]s. 990 | /// 991 | /// They may fail in two cases: 992 | /// - The value cannot be decoded because it's invalid unicode 993 | /// ([`Error::NonUnicodeValue`]) 994 | /// - The value can be decoded, but parsing fails ([`Error::ParsingFailed`]) 995 | /// 996 | /// If parsing fails the error will be wrapped in lexopt's own [`Error`] type. 997 | pub trait ValueExt: private::Sealed { 998 | /// Decode the value and parse it using [`FromStr`]. 999 | /// 1000 | /// This will fail if the value is not valid unicode or if the subsequent 1001 | /// parsing fails. 1002 | fn parse(&self) -> Result 1003 | where 1004 | T::Err: Into>; 1005 | 1006 | // TODO: move the F parameter to the end for better turbofishing. 1007 | // This is a breaking change that affects at least one real-world program. 1008 | // But the code will be better off for it, so it's worth doing in the next 1009 | // breaking release. 1010 | 1011 | /// Decode the value and parse it using a custom function. 1012 | fn parse_with(&self, func: F) -> Result 1013 | where 1014 | F: FnOnce(&str) -> Result, 1015 | E: Into>; 1016 | 1017 | // There is no parse_os_with() because I can't think of any useful 1018 | // fallible operations on an OsString. Typically you'd either decode it, 1019 | // use it as is, or do an infallible conversion to a PathBuf or such. 1020 | // 1021 | // If you have a use for parse_os_with() please open an issue with an 1022 | // example. 1023 | 1024 | /// Convert the `OsString` into a [`String`] if it's valid Unicode. 1025 | /// 1026 | /// This is like [`OsString::into_string`] but returns an 1027 | /// [`Error::NonUnicodeValue`] on error instead of the original `OsString`. 1028 | /// This makes it easier to propagate the failure with libraries like 1029 | /// `anyhow`. 1030 | fn string(self) -> Result; 1031 | } 1032 | 1033 | impl ValueExt for OsString { 1034 | fn parse(&self) -> Result 1035 | where 1036 | T::Err: Into>, 1037 | { 1038 | self.parse_with(FromStr::from_str) 1039 | } 1040 | 1041 | fn parse_with(&self, func: F) -> Result 1042 | where 1043 | F: FnOnce(&str) -> Result, 1044 | E: Into>, 1045 | { 1046 | match self.to_str() { 1047 | Some(text) => match func(text) { 1048 | Ok(value) => Ok(value), 1049 | Err(err) => Err(Error::ParsingFailed { 1050 | value: text.to_owned(), 1051 | error: err.into(), 1052 | }), 1053 | }, 1054 | None => Err(Error::NonUnicodeValue(self.into())), 1055 | } 1056 | } 1057 | 1058 | fn string(self) -> Result { 1059 | match self.into_string() { 1060 | Ok(string) => Ok(string), 1061 | Err(raw) => Err(Error::NonUnicodeValue(raw)), 1062 | } 1063 | } 1064 | } 1065 | 1066 | /// A small prelude for processing arguments. 1067 | /// 1068 | /// It allows you to write `Short`/`Long`/`Value` without an [`Arg`] prefix 1069 | /// and adds convenience methods to [`OsString`]. 1070 | /// 1071 | /// If this is used it's best to import it inside a function, not in module 1072 | /// scope: 1073 | /// ``` 1074 | /// # struct Args; 1075 | /// fn parse_args() -> Result { 1076 | /// use lexopt::prelude::*; 1077 | /// // ... 1078 | /// # Ok(Args) 1079 | /// } 1080 | /// ``` 1081 | pub mod prelude { 1082 | pub use super::Arg::*; 1083 | pub use super::ValueExt; 1084 | } 1085 | 1086 | /// Take the first codepoint from a UTF-8 bytestring. 1087 | /// 1088 | /// The rest of the bytestring does not have to be valid unicode. 1089 | fn first_codepoint(bytes: &[u8]) -> Result, Utf8Error> { 1090 | // We only need the first 4 bytes 1091 | let bytes = bytes.get(..4).unwrap_or(bytes); 1092 | let text = match std::str::from_utf8(bytes) { 1093 | Ok(text) => text, 1094 | Err(err) if err.valid_up_to() > 0 => { 1095 | std::str::from_utf8(&bytes[..err.valid_up_to()]).unwrap() 1096 | } 1097 | Err(err) => return Err(err), 1098 | }; 1099 | Ok(text.chars().next()) 1100 | } 1101 | 1102 | #[cfg(windows)] 1103 | /// As before, but for UTF-16. 1104 | fn first_utf16_codepoint(units: &[u16]) -> Result, u16> { 1105 | match std::char::decode_utf16(units.iter().cloned()).next() { 1106 | Some(Ok(ch)) => Ok(Some(ch)), 1107 | Some(Err(_)) => Err(units[0]), 1108 | None => Ok(None), 1109 | } 1110 | } 1111 | 1112 | #[cfg(test)] 1113 | mod tests { 1114 | use std::error::Error as stdError; 1115 | 1116 | use super::prelude::*; 1117 | use super::*; 1118 | 1119 | // On wasm32-wasip1 invalid OS strings can't come from the OS but 1120 | // can be constructed by Rust code. 1121 | // We abuse that to test some hard-to-reach codepaths. 1122 | // On wasm32-wasip2 this trait is currently unstable. 1123 | #[cfg(all(target_os = "wasi", target_env = "p1"))] 1124 | use std::os::wasi::ffi::OsStringExt; 1125 | 1126 | fn parse(args: &'static str) -> Parser { 1127 | Parser::from_args(args.split_whitespace().map(bad_string)) 1128 | } 1129 | 1130 | /// Specialized backport of matches!() 1131 | macro_rules! assert_matches { 1132 | ($expression: expr, $( $pattern: pat )|+) => { 1133 | match $expression { 1134 | $( $pattern )|+ => (), 1135 | _ => panic!( 1136 | "{:?} does not match {:?}", 1137 | stringify!($expression), 1138 | stringify!($( $pattern )|+) 1139 | ), 1140 | } 1141 | }; 1142 | } 1143 | 1144 | #[test] 1145 | fn test_basic() -> Result<(), Error> { 1146 | let mut p = parse("-n 10 foo - -- baz -qux"); 1147 | assert_eq!(p.next()?.unwrap(), Short('n')); 1148 | assert_eq!(p.value()?.parse::()?, 10); 1149 | assert_eq!(p.next()?.unwrap(), Value("foo".into())); 1150 | assert_eq!(p.next()?.unwrap(), Value("-".into())); 1151 | assert_eq!(p.next()?.unwrap(), Value("baz".into())); 1152 | assert_eq!(p.next()?.unwrap(), Value("-qux".into())); 1153 | assert_eq!(p.next()?, None); 1154 | assert_eq!(p.next()?, None); 1155 | assert_eq!(p.next()?, None); 1156 | Ok(()) 1157 | } 1158 | 1159 | #[test] 1160 | fn test_combined() -> Result<(), Error> { 1161 | let mut p = parse("-abc -fvalue -xfvalue"); 1162 | assert_eq!(p.next()?.unwrap(), Short('a')); 1163 | assert_eq!(p.next()?.unwrap(), Short('b')); 1164 | assert_eq!(p.next()?.unwrap(), Short('c')); 1165 | assert_eq!(p.next()?.unwrap(), Short('f')); 1166 | assert_eq!(p.value()?, "value"); 1167 | assert_eq!(p.next()?.unwrap(), Short('x')); 1168 | assert_eq!(p.next()?.unwrap(), Short('f')); 1169 | assert_eq!(p.value()?, "value"); 1170 | assert_eq!(p.next()?, None); 1171 | Ok(()) 1172 | } 1173 | 1174 | #[test] 1175 | fn test_long() -> Result<(), Error> { 1176 | let mut p = parse("--foo --bar=qux --foobar=qux=baz"); 1177 | assert_eq!(p.next()?.unwrap(), Long("foo")); 1178 | assert_eq!(p.next()?.unwrap(), Long("bar")); 1179 | assert_eq!(p.value()?, "qux"); 1180 | assert_eq!(p.next()?.unwrap(), Long("foobar")); 1181 | assert_eq!( 1182 | p.next().unwrap_err().to_string(), 1183 | r#"unexpected argument for option '--foobar': "qux=baz""# 1184 | ); 1185 | assert_eq!(p.next()?, None); 1186 | Ok(()) 1187 | } 1188 | 1189 | #[test] 1190 | fn test_dash_args() -> Result<(), Error> { 1191 | // "--" should indicate the end of the options 1192 | let mut p = parse("-x -- -y"); 1193 | assert_eq!(p.next()?.unwrap(), Short('x')); 1194 | assert_eq!(p.next()?.unwrap(), Value("-y".into())); 1195 | assert_eq!(p.next()?, None); 1196 | 1197 | // ...unless it's an argument of an option 1198 | let mut p = parse("-x -- -y"); 1199 | assert_eq!(p.next()?.unwrap(), Short('x')); 1200 | assert_eq!(p.value()?, "--"); 1201 | assert_eq!(p.next()?.unwrap(), Short('y')); 1202 | assert_eq!(p.next()?, None); 1203 | 1204 | // "-" is a valid value that should not be treated as an option 1205 | let mut p = parse("-x - -y"); 1206 | assert_eq!(p.next()?.unwrap(), Short('x')); 1207 | assert_eq!(p.next()?.unwrap(), Value("-".into())); 1208 | assert_eq!(p.next()?.unwrap(), Short('y')); 1209 | assert_eq!(p.next()?, None); 1210 | 1211 | // '-' is a silly and hard to use short option, but other parsers treat 1212 | // it like an option in this position 1213 | let mut p = parse("-x-y"); 1214 | assert_eq!(p.next()?.unwrap(), Short('x')); 1215 | assert_eq!(p.next()?.unwrap(), Short('-')); 1216 | assert_eq!(p.next()?.unwrap(), Short('y')); 1217 | assert_eq!(p.next()?, None); 1218 | 1219 | Ok(()) 1220 | } 1221 | 1222 | #[test] 1223 | fn test_missing_value() -> Result<(), Error> { 1224 | let mut p = parse("-o"); 1225 | assert_eq!(p.next()?.unwrap(), Short('o')); 1226 | assert_eq!( 1227 | p.value().unwrap_err().to_string(), 1228 | "missing argument for option '-o'", 1229 | ); 1230 | 1231 | let mut q = parse("--out"); 1232 | assert_eq!(q.next()?.unwrap(), Long("out")); 1233 | assert_eq!( 1234 | q.value().unwrap_err().to_string(), 1235 | "missing argument for option '--out'", 1236 | ); 1237 | 1238 | let mut r = parse(""); 1239 | assert_eq!(r.value().unwrap_err().to_string(), "missing argument"); 1240 | 1241 | Ok(()) 1242 | } 1243 | 1244 | #[test] 1245 | fn test_weird_args() -> Result<(), Error> { 1246 | let mut p = Parser::from_args(&[ 1247 | "", "--=", "--=3", "-", "-x", "--", "-", "-x", "--", "", "-", "-x", 1248 | ]); 1249 | assert_eq!(p.next()?.unwrap(), Value(OsString::from(""))); 1250 | 1251 | // These are weird and questionable, but this seems to be the standard 1252 | // interpretation 1253 | // GNU getopt_long and argparse complain that it could be an abbreviation 1254 | // of every single long option 1255 | // clap complains that "--" is not expected, which matches its treatment 1256 | // of unknown long options 1257 | assert_eq!(p.next()?.unwrap(), Long("")); 1258 | assert_eq!(p.value()?, OsString::from("")); 1259 | assert_eq!(p.next()?.unwrap(), Long("")); 1260 | assert_eq!(p.value()?, OsString::from("3")); 1261 | 1262 | assert_eq!(p.next()?.unwrap(), Value(OsString::from("-"))); 1263 | assert_eq!(p.next()?.unwrap(), Short('x')); 1264 | assert_eq!(p.value()?, OsString::from("--")); 1265 | assert_eq!(p.next()?.unwrap(), Value(OsString::from("-"))); 1266 | assert_eq!(p.next()?.unwrap(), Short('x')); 1267 | assert_eq!(p.next()?.unwrap(), Value(OsString::from(""))); 1268 | assert_eq!(p.next()?.unwrap(), Value(OsString::from("-"))); 1269 | assert_eq!(p.next()?.unwrap(), Value(OsString::from("-x"))); 1270 | assert_eq!(p.next()?, None); 1271 | 1272 | #[cfg(any(unix, windows, all(target_os = "wasi", target_env = "p1")))] 1273 | { 1274 | let mut q = parse("--=@"); 1275 | assert_eq!(q.next()?.unwrap(), Long("")); 1276 | assert_eq!(q.value()?, bad_output_string("@")); 1277 | assert_eq!(q.next()?, None); 1278 | } 1279 | 1280 | let mut r = parse(""); 1281 | assert_eq!(r.next()?, None); 1282 | 1283 | Ok(()) 1284 | } 1285 | 1286 | #[test] 1287 | fn test_unicode() -> Result<(), Error> { 1288 | let mut p = parse("-aµ --µ=10 µ --foo=µ"); 1289 | assert_eq!(p.next()?.unwrap(), Short('a')); 1290 | assert_eq!(p.next()?.unwrap(), Short('µ')); 1291 | assert_eq!(p.next()?.unwrap(), Long("µ")); 1292 | assert_eq!(p.value()?, "10"); 1293 | assert_eq!(p.next()?.unwrap(), Value("µ".into())); 1294 | assert_eq!(p.next()?.unwrap(), Long("foo")); 1295 | assert_eq!(p.value()?, "µ"); 1296 | Ok(()) 1297 | } 1298 | 1299 | #[cfg(any(unix, windows, all(target_os = "wasi", target_env = "p1")))] 1300 | #[test] 1301 | fn test_mixed_invalid() -> Result<(), Error> { 1302 | let mut p = parse("--foo=@@@"); 1303 | assert_eq!(p.next()?.unwrap(), Long("foo")); 1304 | assert_eq!(p.value()?, bad_output_string("@@@")); 1305 | 1306 | let mut q = parse("-💣@@@"); 1307 | assert_eq!(q.next()?.unwrap(), Short('💣')); 1308 | assert_eq!(q.value()?, bad_output_string("@@@")); 1309 | 1310 | let mut r = parse("-f@@@"); 1311 | assert_eq!(r.next()?.unwrap(), Short('f')); 1312 | assert_eq!(r.next()?.unwrap(), Short('�')); 1313 | assert_eq!(r.next()?.unwrap(), Short('�')); 1314 | assert_eq!(r.next()?.unwrap(), Short('�')); 1315 | assert_eq!(r.next()?, None); 1316 | 1317 | let mut s = parse("--foo=bar=@@@"); 1318 | assert_eq!(s.next()?.unwrap(), Long("foo")); 1319 | assert_eq!(s.value()?, bad_output_string("bar=@@@")); 1320 | 1321 | Ok(()) 1322 | } 1323 | 1324 | #[cfg(any(unix, windows, all(target_os = "wasi", target_env = "p1")))] 1325 | #[test] 1326 | fn test_separate_invalid() -> Result<(), Error> { 1327 | let mut p = parse("--foo @@@"); 1328 | assert_eq!(p.next()?.unwrap(), Long("foo")); 1329 | assert_eq!(p.value()?, bad_string("@@@")); 1330 | Ok(()) 1331 | } 1332 | 1333 | #[cfg(any(unix, windows, all(target_os = "wasi", target_env = "p1")))] 1334 | #[test] 1335 | fn test_invalid_long_option() -> Result<(), Error> { 1336 | let mut p = parse("--@=10"); 1337 | assert_eq!(p.next()?.unwrap(), Long("�")); 1338 | assert_eq!(p.value().unwrap(), OsString::from("10")); 1339 | assert_eq!(p.next()?, None); 1340 | 1341 | let mut q = parse("--@"); 1342 | assert_eq!(q.next()?.unwrap(), Long("�")); 1343 | assert_eq!(q.next()?, None); 1344 | 1345 | Ok(()) 1346 | } 1347 | 1348 | #[test] 1349 | fn short_opt_equals_sign() -> Result<(), Error> { 1350 | let mut p = parse("-a=b"); 1351 | assert_eq!(p.next()?.unwrap(), Short('a')); 1352 | assert_eq!(p.value()?, OsString::from("b")); 1353 | assert_eq!(p.next()?, None); 1354 | 1355 | let mut p = parse("-a=b"); 1356 | assert_eq!(p.next()?.unwrap(), Short('a')); 1357 | assert_eq!( 1358 | p.next().unwrap_err().to_string(), 1359 | r#"unexpected argument for option '-a': "b""# 1360 | ); 1361 | assert_eq!(p.next()?, None); 1362 | 1363 | let mut p = parse("-a="); 1364 | assert_eq!(p.next()?.unwrap(), Short('a')); 1365 | assert_eq!(p.value()?, OsString::from("")); 1366 | assert_eq!(p.next()?, None); 1367 | 1368 | let mut p = parse("-a="); 1369 | assert_eq!(p.next()?.unwrap(), Short('a')); 1370 | assert_eq!( 1371 | p.next().unwrap_err().to_string(), 1372 | r#"unexpected argument for option '-a': """# 1373 | ); 1374 | assert_eq!(p.next()?, None); 1375 | 1376 | let mut p = parse("-="); 1377 | assert_eq!(p.next()?.unwrap(), Short('=')); 1378 | assert_eq!(p.next()?, None); 1379 | 1380 | let mut p = parse("-=a"); 1381 | assert_eq!(p.next()?.unwrap(), Short('=')); 1382 | assert_eq!(p.value()?, "a"); 1383 | 1384 | Ok(()) 1385 | } 1386 | 1387 | #[cfg(any(unix, windows, all(target_os = "wasi", target_env = "p1")))] 1388 | #[test] 1389 | fn short_opt_equals_sign_invalid() -> Result<(), Error> { 1390 | let mut p = parse("-a=@"); 1391 | assert_eq!(p.next()?.unwrap(), Short('a')); 1392 | assert_eq!(p.value()?, bad_output_string("@")); 1393 | assert_eq!(p.next()?, None); 1394 | 1395 | let mut p = parse("-a=@"); 1396 | assert_eq!(p.next()?.unwrap(), Short('a')); 1397 | #[cfg(unix)] 1398 | assert_eq!( 1399 | p.next().unwrap_err().to_string(), 1400 | r#"unexpected argument for option '-a': "\xFF""# 1401 | ); 1402 | #[cfg(windows)] 1403 | assert_eq!( 1404 | p.next().unwrap_err().to_string(), 1405 | r#"unexpected argument for option '-a': "\u{d800}""# 1406 | ); 1407 | #[cfg(all(target_os = "wasi", target_env = "p1"))] 1408 | assert_eq!( 1409 | dbg!(p.next().unwrap_err().to_string()), 1410 | r#"unexpected argument for option '-a': "�""# 1411 | ); 1412 | assert_eq!(p.next()?, None); 1413 | 1414 | let mut p = parse("-=@"); 1415 | assert_eq!(p.next()?.unwrap(), Short('=')); 1416 | assert_eq!(p.value()?, bad_output_string("@")); 1417 | 1418 | Ok(()) 1419 | } 1420 | 1421 | #[test] 1422 | fn multi_values() -> Result<(), Error> { 1423 | for &case in &["-a b c d", "-ab c d", "-a b c d --", "--a b c d"] { 1424 | let mut p = parse(case); 1425 | p.next()?.unwrap(); 1426 | let mut iter = p.values()?; 1427 | let values: Vec<_> = iter.by_ref().collect(); 1428 | assert_eq!(values, &["b", "c", "d"]); 1429 | assert!(iter.next().is_none()); 1430 | assert!(p.next()?.is_none()); 1431 | } 1432 | 1433 | for &case in &["-a=b c", "--a=b c"] { 1434 | let mut p = parse(case); 1435 | p.next()?.unwrap(); 1436 | let mut iter = p.values()?; 1437 | let values: Vec<_> = iter.by_ref().collect(); 1438 | assert_eq!(values, &["b"]); 1439 | assert!(iter.next().is_none()); 1440 | assert_eq!(p.next()?.unwrap(), Value("c".into())); 1441 | assert!(p.next()?.is_none()); 1442 | } 1443 | 1444 | for &case in &["-a", "--a", "-a -b", "-a -- b", "-a --"] { 1445 | let mut p = parse(case); 1446 | p.next()?.unwrap(); 1447 | assert!(p.values().is_err()); 1448 | assert!(p.next().is_ok()); 1449 | assert!(p.next().unwrap().is_none()); 1450 | } 1451 | 1452 | for &case in &["-a=", "--a="] { 1453 | let mut p = parse(case); 1454 | p.next()?.unwrap(); 1455 | let mut iter = p.values()?; 1456 | let values: Vec<_> = iter.by_ref().collect(); 1457 | assert_eq!(values, &[""]); 1458 | assert!(iter.next().is_none()); 1459 | assert!(p.next()?.is_none()); 1460 | } 1461 | 1462 | // Test that .values() does not eagerly consume the first value 1463 | for &case in &["-a=b", "--a=b", "-a b"] { 1464 | let mut p = parse(case); 1465 | p.next()?.unwrap(); 1466 | assert!(p.values().is_ok()); 1467 | assert_eq!(p.value()?, "b"); 1468 | } 1469 | 1470 | { 1471 | let mut p = parse("-ab"); 1472 | p.next()?.unwrap(); 1473 | assert!(p.values().is_ok()); 1474 | assert_eq!(p.next()?.unwrap(), Short('b')); 1475 | } 1476 | 1477 | Ok(()) 1478 | } 1479 | 1480 | #[test] 1481 | fn raw_args() -> Result<(), Error> { 1482 | let mut p = parse("-a b c d"); 1483 | assert!(p.try_raw_args().is_some()); 1484 | assert_eq!(p.raw_args()?.collect::>(), &["-a", "b", "c", "d"]); 1485 | assert!(p.try_raw_args().is_some()); 1486 | assert!(p.next()?.is_none()); 1487 | assert!(p.try_raw_args().is_some()); 1488 | assert_eq!(p.raw_args()?.as_slice().len(), 0); 1489 | 1490 | let mut p = parse("-ab c d"); 1491 | p.next()?; 1492 | assert!(p.try_raw_args().is_none()); 1493 | assert!(p.raw_args().is_err()); 1494 | assert_eq!(p.try_raw_args().unwrap().collect::>(), &["c", "d"]); 1495 | assert!(p.next()?.is_none()); 1496 | assert_eq!(p.try_raw_args().unwrap().as_slice().len(), 0); 1497 | 1498 | let mut p = parse("-a b c d"); 1499 | assert_eq!(p.raw_args()?.take(3).collect::>(), &["-a", "b", "c"]); 1500 | assert_eq!(p.next()?, Some(Value("d".into()))); 1501 | assert!(p.next()?.is_none()); 1502 | 1503 | let mut p = parse("a"); 1504 | let mut it = p.raw_args()?; 1505 | assert_eq!(it.peek(), Some("a".as_ref())); 1506 | assert_eq!(it.next_if(|_| false), None); 1507 | assert_eq!( 1508 | it.next_if(|arg| { 1509 | assert_eq!(arg, "a"); 1510 | true 1511 | }), 1512 | Some("a".into()) 1513 | ); 1514 | assert!(p.next()?.is_none()); 1515 | 1516 | Ok(()) 1517 | } 1518 | 1519 | #[test] 1520 | fn bin_name() { 1521 | assert_eq!( 1522 | Parser::from_iter(&["foo", "bar", "baz"]).bin_name(), 1523 | Some("foo") 1524 | ); 1525 | assert_eq!(Parser::from_args(&["foo", "bar", "baz"]).bin_name(), None); 1526 | assert_eq!(Parser::from_iter(&[] as &[&str]).bin_name(), None); 1527 | assert_eq!(Parser::from_iter(&[""]).bin_name(), Some("")); 1528 | #[cfg(any(unix, windows, all(target_os = "wasi", target_env = "p1")))] 1529 | { 1530 | assert!(Parser::from_env().bin_name().is_some()); 1531 | assert_eq!( 1532 | Parser::from_iter(vec![bad_string("foo@bar")]).bin_name(), 1533 | Some("foo�bar") 1534 | ); 1535 | } 1536 | } 1537 | 1538 | #[test] 1539 | fn test_value_ext() -> Result<(), Error> { 1540 | let s = OsString::from("-10"); 1541 | assert_eq!(s.parse::()?, -10); 1542 | assert_eq!( 1543 | s.parse_with(|s| match s { 1544 | "-10" => Ok(0), 1545 | _ => Err("bad"), 1546 | })?, 1547 | 0, 1548 | ); 1549 | assert_eq!( 1550 | s.parse::().unwrap_err().to_string(), 1551 | r#"cannot parse argument "-10": invalid digit found in string"#, 1552 | ); 1553 | assert_eq!( 1554 | s.parse_with(|s| match s { 1555 | "11" => Ok(0_i32), 1556 | _ => Err("bad"), 1557 | }) 1558 | .unwrap_err() 1559 | .to_string(), 1560 | r#"cannot parse argument "-10": bad"#, 1561 | ); 1562 | assert_eq!(s.string()?, "-10"); 1563 | Ok(()) 1564 | } 1565 | 1566 | #[cfg(any(unix, windows, all(target_os = "wasi", target_env = "p1")))] 1567 | #[test] 1568 | fn test_value_ext_invalid() -> Result<(), Error> { 1569 | let s = bad_string("foo@"); 1570 | #[cfg(unix)] 1571 | let message = r#"argument is invalid unicode: "foo\xFF""#; 1572 | #[cfg(windows)] 1573 | let message = r#"argument is invalid unicode: "foo\u{d800}""#; 1574 | #[cfg(all(target_os = "wasi", target_env = "p1"))] 1575 | let message = r#"argument is invalid unicode: "foo\xFF""#; 1576 | assert_eq!(s.parse::().unwrap_err().to_string(), message); 1577 | assert_eq!( 1578 | s.parse_with(::from_str) 1579 | .unwrap_err() 1580 | .to_string(), 1581 | message, 1582 | ); 1583 | assert_eq!(s.clone().string().unwrap_err().to_string(), message); 1584 | assert_eq!( 1585 | Error::from(s.into_string().unwrap_err()).to_string(), 1586 | message, 1587 | ); 1588 | Ok(()) 1589 | } 1590 | 1591 | #[test] 1592 | fn test_errors() { 1593 | assert_eq!( 1594 | Arg::Short('o').unexpected().to_string(), 1595 | "invalid option '-o'", 1596 | ); 1597 | assert_eq!( 1598 | Arg::Long("opt").unexpected().to_string(), 1599 | "invalid option '--opt'", 1600 | ); 1601 | assert_eq!( 1602 | Arg::Value("foo".into()).unexpected().to_string(), 1603 | r#"unexpected argument "foo""#, 1604 | ); 1605 | assert_eq!( 1606 | Error::from("this is an error message").to_string(), 1607 | "this is an error message", 1608 | ); 1609 | assert_eq!( 1610 | Error::from("this is an error message".to_owned()).to_string(), 1611 | "this is an error message", 1612 | ); 1613 | assert!(Error::from("this is an error message").source().is_some()); 1614 | assert!(OsString::from("foo") 1615 | .parse::() 1616 | .unwrap_err() 1617 | .source() 1618 | .is_some()); 1619 | assert!(Arg::Short('o').unexpected().source().is_none()); 1620 | assert_eq!( 1621 | format!("{:?}", Arg::Short('o').unexpected()), 1622 | "invalid option '-o'", 1623 | ); 1624 | } 1625 | 1626 | #[test] 1627 | fn test_first_codepoint() { 1628 | assert_eq!(first_codepoint(b"foo").unwrap(), Some('f')); 1629 | assert_eq!(first_codepoint(b"").unwrap(), None); 1630 | assert_eq!(first_codepoint(b"f\xFF\xFF").unwrap(), Some('f')); 1631 | assert_eq!(first_codepoint(b"\xC2\xB5bar").unwrap(), Some('µ')); 1632 | assert_eq!(first_codepoint(b"foo\xC2\xB5").unwrap(), Some('f')); 1633 | assert_eq!( 1634 | first_codepoint(b"\xFF\xFF").unwrap_err().error_len(), 1635 | Some(1) 1636 | ); 1637 | assert_eq!(first_codepoint(b"\xC2").unwrap_err().error_len(), None); 1638 | assert_eq!(first_codepoint(b"\xC2a").unwrap_err().error_len(), Some(1)); 1639 | assert_eq!(first_codepoint(b"\xF0").unwrap_err().error_len(), None); 1640 | assert_eq!( 1641 | first_codepoint(b"\xF0\x9D\x84").unwrap_err().error_len(), 1642 | None 1643 | ); 1644 | assert_eq!( 1645 | first_codepoint(b"\xF0\x9Da").unwrap_err().error_len(), 1646 | Some(2) 1647 | ); 1648 | assert_eq!( 1649 | first_codepoint(b"\xF0\x9D\x84a").unwrap_err().error_len(), 1650 | Some(3) 1651 | ); 1652 | assert_eq!(first_codepoint(b"\xF0\x9D\x84\x9E").unwrap(), Some('𝄞')); 1653 | } 1654 | 1655 | #[cfg(unix)] 1656 | #[test] 1657 | fn test_lossy_decode() -> Result<(), Error> { 1658 | fn bparse(s: &[u8]) -> Parser { 1659 | Parser::from_args(s.split(|&b| b == b' ').map(OsStr::from_bytes)) 1660 | } 1661 | 1662 | let mut p = bparse(b"-a\xFFc"); 1663 | assert_eq!(p.next()?.unwrap(), Short('a')); 1664 | assert_eq!(p.next()?.unwrap(), Short('�')); 1665 | assert_eq!(p.next()?.unwrap(), Short('c')); 1666 | assert_eq!(p.next()?, None); 1667 | 1668 | let mut p = bparse(b"-a\xFFc\xFF\xFF"); 1669 | assert_eq!(p.next()?.unwrap(), Short('a')); 1670 | assert_eq!(p.next()?.unwrap(), Short('�')); 1671 | assert_eq!(p.value()?, OsStr::from_bytes(b"c\xFF\xFF")); 1672 | 1673 | let mut p = bparse(b"-\xF0\x9Da"); 1674 | assert_eq!(p.next()?.unwrap(), Short('�')); 1675 | assert_eq!(p.next()?.unwrap(), Short('a')); 1676 | assert_eq!(p.next()?, None); 1677 | 1678 | let mut p = bparse(b"-\xF0\x9D"); 1679 | assert_eq!(p.next()?.unwrap(), Short('�')); 1680 | assert_eq!(p.next()?, None); 1681 | 1682 | Ok(()) 1683 | } 1684 | 1685 | /// Transform @ characters into invalid unicode. 1686 | fn bad_string(text: &str) -> OsString { 1687 | #[cfg(any(unix, all(target_os = "wasi", target_env = "p1")))] 1688 | { 1689 | let mut text = text.as_bytes().to_vec(); 1690 | for ch in &mut text { 1691 | if *ch == b'@' { 1692 | *ch = b'\xFF'; 1693 | } 1694 | } 1695 | OsString::from_vec(text) 1696 | } 1697 | #[cfg(windows)] 1698 | { 1699 | let mut out = Vec::new(); 1700 | for ch in text.chars() { 1701 | if ch == '@' { 1702 | out.push(0xD800); 1703 | } else { 1704 | let mut buf = [0; 2]; 1705 | out.extend(&*ch.encode_utf16(&mut buf)); 1706 | } 1707 | } 1708 | OsString::from_wide(&out) 1709 | } 1710 | #[cfg(not(any(unix, windows, all(target_os = "wasi", target_env = "p1"))))] 1711 | { 1712 | if text.contains('@') { 1713 | unimplemented!("Don't know how to create invalid OsStrings on this platform"); 1714 | } 1715 | text.into() 1716 | } 1717 | } 1718 | 1719 | /// [`bad_string`] for text that has been processed. 1720 | /// 1721 | /// On wasip1 we test invalid OS strings but the crate doesn't preserve them. 1722 | /// 1723 | /// On other targets this is identical to `bad_string`. 1724 | #[allow(unused)] 1725 | fn bad_output_string(text: &str) -> OsString { 1726 | #[cfg(all(target_os = "wasi", target_env = "p1"))] 1727 | return text.replace("@", "�").into(); 1728 | #[cfg(not(all(target_os = "wasi", target_env = "p1")))] 1729 | return bad_string(text); 1730 | } 1731 | 1732 | /// Basic exhaustive testing of short combinations of "interesting" 1733 | /// arguments. They should not panic, not hang, and pass some checks. 1734 | /// 1735 | /// The advantage compared to full fuzzing is that it runs on all platforms 1736 | /// and together with the other tests. cargo-fuzz doesn't work on Windows 1737 | /// and requires a special incantation. 1738 | /// 1739 | /// A disadvantage is that it's still limited by arguments I could think of 1740 | /// and only does very short sequences. Another is that it's bad at 1741 | /// reporting failure, though the println!() helps. 1742 | /// 1743 | /// This test takes a while to run. 1744 | #[test] 1745 | fn basic_fuzz() { 1746 | #[cfg(any(windows, unix, all(target_os = "wasi", target_env = "p1")))] 1747 | const VOCABULARY: &[&str] = &[ 1748 | "", "-", "--", "---", "a", "-a", "-aa", "@", "-@", "-a@", "-@a", "--a", "--@", "--a=a", 1749 | "--a=", "--a=@", "--@=a", "--=", "--=@", "--=a", "-@@", "-a=a", "-a=", "-=", "-a-", 1750 | ]; 1751 | #[cfg(not(any(windows, unix, all(target_os = "wasi", target_env = "p1"))))] 1752 | const VOCABULARY: &[&str] = &[ 1753 | "", "-", "--", "---", "a", "-a", "-aa", "--a", "--a=a", "--a=", "--=", "--=a", "-a=a", 1754 | "-a=", "-=", "-a-", 1755 | ]; 1756 | exhaust(Parser::new(None, Vec::new().into_iter()), 0); 1757 | let vocabulary: Vec = VOCABULARY.iter().map(|&s| bad_string(s)).collect(); 1758 | let mut permutations = vec![vec![]]; 1759 | for _ in 0..3 { 1760 | let mut new = Vec::new(); 1761 | for old in permutations { 1762 | for word in &vocabulary { 1763 | let mut extended = old.clone(); 1764 | extended.push(word); 1765 | new.push(extended); 1766 | } 1767 | } 1768 | permutations = new; 1769 | for permutation in &permutations { 1770 | println!("{:?}", permutation); 1771 | let p = Parser::from_args(permutation); 1772 | exhaust(p, 0); 1773 | } 1774 | } 1775 | } 1776 | 1777 | /// Run many sequences of methods on a Parser. 1778 | fn exhaust(mut parser: Parser, depth: u16) { 1779 | if depth > 100 { 1780 | panic!("Stuck in loop"); 1781 | } 1782 | 1783 | // has_pending() == optional_value().is_some() 1784 | if parser.has_pending() { 1785 | { 1786 | let mut parser = parser.clone(); 1787 | assert!(parser.try_raw_args().is_none()); 1788 | assert!(parser.try_raw_args().is_none()); 1789 | assert!(parser.raw_args().is_err()); 1790 | // Recovery possible 1791 | assert!(parser.raw_args().is_ok()); 1792 | assert!(parser.try_raw_args().is_some()); 1793 | } 1794 | 1795 | { 1796 | let mut parser = parser.clone(); 1797 | assert!(parser.optional_value().is_some()); 1798 | exhaust(parser, depth + 1); 1799 | } 1800 | } else { 1801 | let prev_state = parser.state.clone(); 1802 | let prev_remaining = parser.source.as_slice().len(); 1803 | assert!(parser.optional_value().is_none()); 1804 | assert!(parser.raw_args().is_ok()); 1805 | assert!(parser.try_raw_args().is_some()); 1806 | // Verify state transitions 1807 | match prev_state { 1808 | State::None | State::PendingValue(_) => { 1809 | assert_matches!(parser.state, State::None); 1810 | } 1811 | State::Shorts(arg, pos) => { 1812 | assert_eq!(pos, arg.len()); 1813 | assert_matches!(parser.state, State::None); 1814 | } 1815 | #[cfg(windows)] 1816 | State::ShortsU16(arg, pos) => { 1817 | assert_eq!(pos, arg.len()); 1818 | assert_matches!(parser.state, State::None); 1819 | } 1820 | State::FinishedOpts => assert_matches!(parser.state, State::FinishedOpts), 1821 | } 1822 | // No arguments were consumed 1823 | assert_eq!(parser.source.as_slice().len(), prev_remaining); 1824 | } 1825 | 1826 | { 1827 | let mut parser = parser.clone(); 1828 | match parser.next() { 1829 | Ok(None) => { 1830 | assert_matches!(parser.state, State::None | State::FinishedOpts); 1831 | assert_eq!(parser.source.as_slice().len(), 0); 1832 | } 1833 | _ => exhaust(parser, depth + 1), 1834 | } 1835 | } 1836 | 1837 | { 1838 | let mut parser = parser.clone(); 1839 | match parser.value() { 1840 | Err(_) => { 1841 | assert_matches!(parser.state, State::None | State::FinishedOpts); 1842 | assert_eq!(parser.source.as_slice().len(), 0); 1843 | } 1844 | Ok(_) => { 1845 | assert_matches!(parser.state, State::None | State::FinishedOpts); 1846 | exhaust(parser, depth + 1); 1847 | } 1848 | } 1849 | } 1850 | 1851 | { 1852 | match parser.values() { 1853 | Err(_) => (), 1854 | Ok(iter) => { 1855 | assert!(iter.count() > 0); 1856 | exhaust(parser, depth + 1); 1857 | } 1858 | } 1859 | } 1860 | } 1861 | } 1862 | --------------------------------------------------------------------------------