├── .gitignore ├── .github ├── ISSUE_TEMPLATE │ ├── something-else.md │ ├── feature_request.md │ └── bug_report.md ├── PULL_REQUEST_TEMPLATE │ ├── something-else.md │ ├── bug_fix.md │ └── feature.md └── workflows │ └── ci.yml ├── tests ├── no_use_import.rs └── tests.rs ├── Cargo.toml ├── CHANGELOG.md ├── README.md ├── CODE_OF_CONDUCT.md ├── LICENSE └── src └── lib.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | **/*.rs.bk 3 | Cargo.lock 4 | .vscode 5 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/something-else.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Something Else 3 | about: Just give me a text box! 4 | --- 5 | 6 | 7 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE/something-else.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Something Else 3 | about: Just give me a text box! 4 | 5 | --- 6 | 7 | 8 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE/bug_fix.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug fix 3 | about: A bug squashed. 4 | 5 | --- 6 | 7 | **Related bugs:** 8 | This bug fix closes issue #???. 9 | 10 | **Description of problem:** 11 | Describe what was causing the related issue to happen. 12 | 13 | **Description of solution:** 14 | Describe the rationale behind the fix. 15 | 16 | **Checklist:** 17 | The CI will check all of these, but you'll need to have done them: 18 | 19 | * [ ] `cargo fmt -- --check` passes. 20 | * [ ] `cargo +nightly clippy` has no warnings. 21 | * [ ] `cargo test` passes. 22 | -------------------------------------------------------------------------------- /tests/no_use_import.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. 2 | 3 | use std::*; 4 | 5 | #[test] 6 | #[cfg_attr(not(feature = "failpoints"), ignore)] 7 | fn test_return() { 8 | let f = || { 9 | fail::fail_point!("return", |s: Option| s 10 | .map_or(2, |s| s.parse().unwrap())); 11 | 0 12 | }; 13 | assert_eq!(f(), 0); 14 | 15 | fail::cfg("return", "return(1000)").unwrap(); 16 | assert_eq!(f(), 1000); 17 | 18 | fail::cfg("return", "return").unwrap(); 19 | assert_eq!(f(), 2); 20 | } 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Help us develop our roadmap and direction. 4 | --- 5 | 6 | **Is your feature request related to a problem? Please describe.** 7 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 8 | 9 | **Describe the solution you'd like** 10 | A clear and concise description of what you want to happen. 11 | 12 | **Describe alternatives you've considered** 13 | A clear and concise description of any alternative solutions or features you've considered. 14 | 15 | **Additional context** 16 | Add any other context or screenshots about the feature request here. 17 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE/feature.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature 3 | about: The dawn of a new era. 4 | 5 | --- 6 | 7 | **Related features:** 8 | This feature resolves issue #???. 9 | 10 | **Description of feature:** 11 | A short description of the feature implemented. 12 | 13 | **Implementation:** 14 | Describe any pieces of the implementation that deserve further explanation. 15 | Detail any gotchas, uncertainties, or open questions about the implementation. 16 | 17 | **Checklist:** 18 | 19 | The CI will check all of these, but you'll need to have done them: 20 | 21 | * [ ] `cargo fmt -- --check` passes. 22 | * [ ] `cargo +nightly clippy` has no warnings. 23 | * [ ] `cargo test` passes. 24 | 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Found a bug? Help us squash it! 4 | --- 5 | 6 | **Describe the bug** 7 | A clear and concise description of what the bug is. 8 | 9 | **To Reproduce** 10 | Steps to reproduce the behavior: 11 | 1. Go to '...' 12 | 2. Click on '....' 13 | 3. Scroll down to '....' 14 | 4. See error 15 | 16 | **Expected behavior** 17 | A clear and concise description of what you expected to happen. 18 | 19 | **System information** 20 | * CPU architecture: 21 | * Distribution and kernel version: 22 | * SELinux on?: 23 | * Any other system details we should know?: 24 | 25 | **Additional context** 26 | Add any other context about the problem here. 27 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "fail" 3 | version = "0.5.1" 4 | authors = ["The TiKV Project Developers"] 5 | license = "Apache-2.0" 6 | keywords = ["failpoints", "fail"] 7 | repository = "https://github.com/tikv/fail-rs" 8 | readme = "README.md" 9 | homepage = "https://github.com/tikv/fail-rs" 10 | documentation = "https://docs.rs/fail" 11 | description = "Fail points for rust." 12 | categories = ["development-tools::testing"] 13 | edition = "2021" 14 | exclude = ["/.github/*", "/.travis.yml", "/appveyor.yml"] 15 | 16 | [dependencies] 17 | log = { version = "0.4", features = ["std"] } 18 | once_cell = "1.9.0" 19 | rand = "0.8" 20 | 21 | [features] 22 | failpoints = [] 23 | 24 | [package.metadata.docs.rs] 25 | all-features = true 26 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # 0.5.1 - 2022-10-08 2 | 3 | - Switch to 2021 edition and use once cell (#61) 4 | - Support configuring fail point in RAII style (#62) 5 | - Fix recursive macro invocation (#66) 6 | 7 | # 0.5.0 - 2021-11-04 8 | 9 | - update rand to 0.8 10 | 11 | # 0.4.0 - 2020-04-13 12 | 13 | - re-arrange macros to avoid leaking 14 | - support callback action 15 | - update rand to 0.7 16 | 17 | # 0.3.0 - 2019-07-15 18 | 19 | - fail-rs is now 2018-edition compatible! 20 | - refine documentations 21 | - add test-mutex pattern directly to the library 22 | - disable code generation by default 23 | 24 | # 0.2.1 - 2018-12-13 25 | 26 | - improve documentation 27 | 28 | # 0.2.0 - 2017-12-21 29 | 30 | - use name to identify failpoint instead of absolute module path 31 | - support listing configured rules 32 | 33 | # 0.1.0 - 2017-09-27 34 | 35 | initial release 36 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - master 8 | schedule: 9 | - cron: '0 22 * * *' 10 | 11 | env: 12 | RUST_BACKTRACE: 1 13 | RUSTFLAGS: "--deny=warnings" 14 | 15 | jobs: 16 | Linux-Stable: 17 | name: Linux-Stable 18 | runs-on: ubuntu-latest 19 | steps: 20 | - uses: actions/checkout@v2 21 | - run: cargo fmt -- --check 22 | - run: cargo clippy -- -D clippy::all 23 | - run: cargo test --all -- --nocapture 24 | - run: cargo test --all --all-features -- --nocapture 25 | - run: cargo bench --all -- --test 26 | - run: cargo bench --all --all-features -- --test 27 | 28 | Linux-Nightly: 29 | name: Linux-Nightly 30 | runs-on: ubuntu-latest 31 | steps: 32 | - uses: actions/checkout@v2 33 | - run: rustup default nightly 34 | - run: cargo test --all -- --nocapture 35 | - run: cargo test --all --all-features -- --nocapture 36 | - run: cargo bench --all -- --test 37 | - run: cargo bench --all --all-features -- --test 38 | 39 | Mac-Stable: 40 | name: Mac 41 | runs-on: macos-latest 42 | steps: 43 | - uses: actions/checkout@v2 44 | - run: cargo test --all -- --nocapture 45 | - run: cargo test --all --all-features -- --nocapture 46 | - run: cargo bench --all -- --test 47 | - run: cargo bench --all --all-features -- --test 48 | 49 | Win-Stable: 50 | name: Windows 51 | runs-on: windows-latest 52 | steps: 53 | - uses: actions/checkout@v2 54 | - run: cargo test --all -- --nocapture 55 | - run: cargo test --all --all-features -- --nocapture 56 | - run: cargo bench --all -- --test 57 | - run: cargo bench --all --all-features -- --test -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fail-rs 2 | 3 | [![CI](https://github.com/tikv/fail-rs/workflows/CI/badge.svg)](https://github.com/tikv/fail-rs/actions) 4 | [![Crates.io](https://img.shields.io/crates/v/fail.svg?maxAge=2592000)](https://crates.io/crates/fail) 5 | 6 | [Documentation](https://docs.rs/fail). 7 | 8 | A fail point implementation for Rust. 9 | 10 | Fail points are code instrumentations that allow errors and other behavior to be injected dynamically at runtime, primarily for testing purposes. Fail points are flexible and can be configured to exhibit a variety of behavior, including panics, early returns, and sleeping. They can be controlled both programmatically and via the environment, and can be triggered conditionally and probabilistically. 11 | 12 | This crate is inspired by FreeBSD's [failpoints](https://freebsd.org/cgi/man.cgi?query=fail). 13 | 14 | ## Usage 15 | 16 | First, add this to your `Cargo.toml`: 17 | 18 | ```toml 19 | [dependencies] 20 | fail = "0.5" 21 | ``` 22 | 23 | Now you can import the `fail_point!` macro from the `fail` crate and use it to inject dynamic failures. 24 | Fail points generation by this macro is disabled by default, and can be enabled where relevant with the `failpoints` Cargo feature. 25 | 26 | As an example, here's a simple program that uses a fail point to simulate an I/O panic: 27 | 28 | ```rust 29 | use fail::{fail_point, FailScenario}; 30 | 31 | fn do_fallible_work() { 32 | fail_point!("read-dir"); 33 | let _dir: Vec<_> = std::fs::read_dir(".").unwrap().collect(); 34 | // ... do some work on the directory ... 35 | } 36 | 37 | fn main() { 38 | let scenario = FailScenario::setup(); 39 | do_fallible_work(); 40 | scenario.teardown(); 41 | println!("done"); 42 | } 43 | ``` 44 | 45 | Here, the program calls `unwrap` on the result of `read_dir`, a function that returns a `Result`. In other words, this particular program expects this call to `read_dir` to always succeed. And in practice it almost always will, which makes the behavior of this program when `read_dir` fails difficult to test. By instrumenting the program with a fail point we can pretend that `read_dir` failed, causing the subsequent `unwrap` to panic, and allowing us to observe the program's behavior under failure conditions. 46 | 47 | When the program is run normally it just prints "done": 48 | 49 | ```sh 50 | $ cargo run --features fail/failpoints 51 | Finished dev [unoptimized + debuginfo] target(s) in 0.01s 52 | Running `target/debug/failpointtest` 53 | done 54 | ``` 55 | 56 | But now, by setting the `FAILPOINTS` variable we can see what happens if the `read_dir` fails: 57 | 58 | ``` 59 | FAILPOINTS=read-dir=panic cargo run --features fail/failpoints 60 | Finished dev [unoptimized + debuginfo] target(s) in 0.01s 61 | Running `target/debug/failpointtest` 62 | thread 'main' panicked at 'failpoint read-dir panic', /home/ubuntu/.cargo/registry/src/github.com-1ecc6299db9ec823/fail-0.2.0/src/lib.rs:286:25 63 | note: Run with `RUST_BACKTRACE=1` for a backtrace. 64 | ``` 65 | 66 | For further information see the [API documentation](https://docs.rs/fail). 67 | 68 | 69 | ## TODO 70 | 71 | Triggering a fail point via the HTTP API is planned but not implemented yet. 72 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at coc@pingcap.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /tests/tests.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. 2 | 3 | use std::sync::atomic::{AtomicUsize, Ordering}; 4 | use std::sync::*; 5 | use std::time::*; 6 | use std::*; 7 | 8 | use fail::fail_point; 9 | 10 | #[test] 11 | fn test_off() { 12 | let f = || { 13 | fail_point!("off", |_| 2); 14 | 0 15 | }; 16 | assert_eq!(f(), 0); 17 | 18 | fail::cfg("off", "off").unwrap(); 19 | assert_eq!(f(), 0); 20 | } 21 | 22 | #[test] 23 | #[cfg_attr(not(feature = "failpoints"), ignore)] 24 | fn test_return() { 25 | let f = || { 26 | fail_point!("return", |s: Option| s 27 | .map_or(2, |s| s.parse().unwrap())); 28 | 0 29 | }; 30 | assert_eq!(f(), 0); 31 | 32 | fail::cfg("return", "return(1000)").unwrap(); 33 | assert_eq!(f(), 1000); 34 | 35 | fail::cfg("return", "return").unwrap(); 36 | assert_eq!(f(), 2); 37 | } 38 | 39 | #[test] 40 | #[cfg_attr(not(feature = "failpoints"), ignore)] 41 | fn test_sleep() { 42 | let f = || { 43 | fail_point!("sleep"); 44 | }; 45 | let timer = Instant::now(); 46 | f(); 47 | assert!(timer.elapsed() < Duration::from_millis(1000)); 48 | 49 | let timer = Instant::now(); 50 | fail::cfg("sleep", "sleep(1000)").unwrap(); 51 | f(); 52 | assert!(timer.elapsed() > Duration::from_millis(1000)); 53 | } 54 | 55 | #[test] 56 | #[should_panic] 57 | #[cfg_attr(not(feature = "failpoints"), ignore)] 58 | fn test_panic() { 59 | let f = || { 60 | fail_point!("panic"); 61 | }; 62 | fail::cfg("panic", "panic(msg)").unwrap(); 63 | f(); 64 | } 65 | 66 | #[test] 67 | #[cfg_attr(not(feature = "failpoints"), ignore)] 68 | fn test_print() { 69 | struct LogCollector(Arc>>); 70 | impl log::Log for LogCollector { 71 | fn enabled(&self, _: &log::Metadata) -> bool { 72 | true 73 | } 74 | fn log(&self, record: &log::Record) { 75 | let mut buf = self.0.lock().unwrap(); 76 | buf.push(format!("{}", record.args())); 77 | } 78 | fn flush(&self) {} 79 | } 80 | 81 | let buffer = Arc::new(Mutex::new(vec![])); 82 | let collector = LogCollector(buffer.clone()); 83 | log::set_max_level(log::LevelFilter::Info); 84 | log::set_boxed_logger(Box::new(collector)).unwrap(); 85 | 86 | let f = || { 87 | fail_point!("print"); 88 | }; 89 | fail::cfg("print", "print(msg)").unwrap(); 90 | f(); 91 | let msg = buffer.lock().unwrap().pop().unwrap(); 92 | assert_eq!(msg, "msg"); 93 | 94 | fail::cfg("print", "print").unwrap(); 95 | f(); 96 | let msg = buffer.lock().unwrap().pop().unwrap(); 97 | assert_eq!(msg, "failpoint print executed."); 98 | } 99 | 100 | #[test] 101 | #[cfg_attr(not(feature = "failpoints"), ignore)] 102 | fn test_pause() { 103 | let f = || { 104 | fail_point!("pause"); 105 | }; 106 | f(); 107 | 108 | fail::cfg("pause", "pause").unwrap(); 109 | let (tx, rx) = mpsc::channel(); 110 | thread::spawn(move || { 111 | // pause 112 | f(); 113 | tx.send(()).unwrap(); 114 | // woken up by new order pause, and then pause again. 115 | f(); 116 | tx.send(()).unwrap(); 117 | // woken up by remove, and then quit immediately. 118 | f(); 119 | tx.send(()).unwrap(); 120 | }); 121 | 122 | assert!(rx.recv_timeout(Duration::from_millis(500)).is_err()); 123 | fail::cfg("pause", "pause").unwrap(); 124 | rx.recv_timeout(Duration::from_millis(500)).unwrap(); 125 | 126 | assert!(rx.recv_timeout(Duration::from_millis(500)).is_err()); 127 | fail::remove("pause"); 128 | rx.recv_timeout(Duration::from_millis(500)).unwrap(); 129 | 130 | rx.recv_timeout(Duration::from_millis(500)).unwrap(); 131 | } 132 | 133 | #[test] 134 | fn test_yield() { 135 | let f = || { 136 | fail_point!("yield"); 137 | }; 138 | fail::cfg("test", "yield").unwrap(); 139 | f(); 140 | } 141 | 142 | #[test] 143 | #[cfg_attr(not(feature = "failpoints"), ignore)] 144 | fn test_callback() { 145 | let f1 = || { 146 | fail_point!("cb"); 147 | }; 148 | let f2 = || { 149 | fail_point!("cb"); 150 | }; 151 | 152 | let counter = Arc::new(AtomicUsize::new(0)); 153 | let counter2 = counter.clone(); 154 | fail::cfg_callback("cb", move || { 155 | counter2.fetch_add(1, Ordering::SeqCst); 156 | }) 157 | .unwrap(); 158 | f1(); 159 | f2(); 160 | assert_eq!(2, counter.load(Ordering::SeqCst)); 161 | } 162 | 163 | #[test] 164 | #[cfg_attr(not(feature = "failpoints"), ignore)] 165 | fn test_delay() { 166 | let f = || fail_point!("delay"); 167 | let timer = Instant::now(); 168 | fail::cfg("delay", "delay(1000)").unwrap(); 169 | f(); 170 | assert!(timer.elapsed() > Duration::from_millis(1000)); 171 | } 172 | 173 | #[test] 174 | #[cfg_attr(not(feature = "failpoints"), ignore)] 175 | fn test_freq_and_count() { 176 | let f = || { 177 | fail_point!("freq_and_count", |s: Option| s 178 | .map_or(2, |s| s.parse().unwrap())); 179 | 0 180 | }; 181 | fail::cfg( 182 | "freq_and_count", 183 | "50%50*return(1)->50%50*return(-1)->50*return", 184 | ) 185 | .unwrap(); 186 | let mut sum = 0; 187 | for _ in 0..5000 { 188 | let res = f(); 189 | sum += res; 190 | } 191 | assert_eq!(sum, 100); 192 | } 193 | 194 | #[test] 195 | #[cfg_attr(not(feature = "failpoints"), ignore)] 196 | fn test_condition() { 197 | let f = |_enabled| { 198 | fail_point!("condition", _enabled, |_| 2); 199 | 0 200 | }; 201 | assert_eq!(f(false), 0); 202 | 203 | fail::cfg("condition", "return").unwrap(); 204 | assert_eq!(f(false), 0); 205 | 206 | assert_eq!(f(true), 2); 207 | } 208 | 209 | #[test] 210 | fn test_list() { 211 | assert!(!fail::list().contains(&("list".to_string(), "off".to_string()))); 212 | fail::cfg("list", "off").unwrap(); 213 | assert!(fail::list().contains(&("list".to_string(), "off".to_string()))); 214 | fail::cfg("list", "return").unwrap(); 215 | assert!(fail::list().contains(&("list".to_string(), "return".to_string()))); 216 | } 217 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2019 TiKV Project Authors. 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. 2 | 3 | //! A fail point implementation for Rust. 4 | //! 5 | //! Fail points are code instrumentations that allow errors and other behavior 6 | //! to be injected dynamically at runtime, primarily for testing purposes. Fail 7 | //! points are flexible and can be configured to exhibit a variety of behavior, 8 | //! including panics, early returns, and sleeping. They can be controlled both 9 | //! programmatically and via the environment, and can be triggered 10 | //! conditionally and probabilistically. 11 | //! 12 | //! This crate is inspired by FreeBSD's 13 | //! [failpoints](https://freebsd.org/cgi/man.cgi?query=fail). 14 | //! 15 | //! ## Usage 16 | //! 17 | //! First, add this to your `Cargo.toml`: 18 | //! 19 | //! ```toml 20 | //! [dependencies] 21 | //! fail = "0.5" 22 | //! ``` 23 | //! 24 | //! Now you can import the `fail_point!` macro from the `fail` crate and use it 25 | //! to inject dynamic failures. 26 | //! 27 | //! As an example, here's a simple program that uses a fail point to simulate an 28 | //! I/O panic: 29 | //! 30 | //! ```rust 31 | //! use fail::{fail_point, FailScenario}; 32 | //! 33 | //! fn do_fallible_work() { 34 | //! fail_point!("read-dir"); 35 | //! let _dir: Vec<_> = std::fs::read_dir(".").unwrap().collect(); 36 | //! // ... do some work on the directory ... 37 | //! } 38 | //! 39 | //! let scenario = FailScenario::setup(); 40 | //! do_fallible_work(); 41 | //! scenario.teardown(); 42 | //! println!("done"); 43 | //! ``` 44 | //! 45 | //! Here, the program calls `unwrap` on the result of `read_dir`, a function 46 | //! that returns a `Result`. In other words, this particular program expects 47 | //! this call to `read_dir` to always succeed. And in practice it almost always 48 | //! will, which makes the behavior of this program when `read_dir` fails 49 | //! difficult to test. By instrumenting the program with a fail point we can 50 | //! pretend that `read_dir` failed, causing the subsequent `unwrap` to panic, 51 | //! and allowing us to observe the program's behavior under failure conditions. 52 | //! 53 | //! When the program is run normally it just prints "done": 54 | //! 55 | //! ```sh 56 | //! $ cargo run --features fail/failpoints 57 | //! Finished dev [unoptimized + debuginfo] target(s) in 0.01s 58 | //! Running `target/debug/failpointtest` 59 | //! done 60 | //! ``` 61 | //! 62 | //! But now, by setting the `FAILPOINTS` variable we can see what happens if the 63 | //! `read_dir` fails: 64 | //! 65 | //! ```sh 66 | //! FAILPOINTS=read-dir=panic cargo run --features fail/failpoints 67 | //! Finished dev [unoptimized + debuginfo] target(s) in 0.01s 68 | //! Running `target/debug/failpointtest` 69 | //! thread 'main' panicked at 'failpoint read-dir panic', /home/ubuntu/.cargo/registry/src/github.com-1ecc6299db9ec823/fail-0.2.0/src/lib.rs:286:25 70 | //! note: Run with `RUST_BACKTRACE=1` for a backtrace. 71 | //! ``` 72 | //! 73 | //! ## Usage in tests 74 | //! 75 | //! The previous example triggers a fail point by modifying the `FAILPOINTS` 76 | //! environment variable. In practice, you'll often want to trigger fail points 77 | //! programmatically, in unit tests. 78 | //! Fail points are global resources, and Rust tests run in parallel, 79 | //! so tests that exercise fail points generally need to hold a lock to 80 | //! avoid interfering with each other. This is accomplished by `FailScenario`. 81 | //! 82 | //! Here's a basic pattern for writing unit tests tests with fail points: 83 | //! 84 | //! ```rust,no_run 85 | //! use fail::{fail_point, FailScenario}; 86 | //! 87 | //! fn do_fallible_work() { 88 | //! fail_point!("read-dir"); 89 | //! let _dir: Vec<_> = std::fs::read_dir(".").unwrap().collect(); 90 | //! // ... do some work on the directory ... 91 | //! } 92 | //! 93 | //! #[test] 94 | //! #[should_panic] 95 | //! fn test_fallible_work() { 96 | //! let scenario = FailScenario::setup(); 97 | //! fail::cfg("read-dir", "panic").unwrap(); 98 | //! 99 | //! do_fallible_work(); 100 | //! 101 | //! scenario.teardown(); 102 | //! } 103 | //! ``` 104 | //! 105 | //! Even if a test does not itself turn on any fail points, code that it runs 106 | //! could trigger a fail point that was configured by another thread. Because of 107 | //! this it is a best practice to put all fail point unit tests into their own 108 | //! binary. Here's an example of a snippet from `Cargo.toml` that creates a 109 | //! fail-point-specific test binary: 110 | //! 111 | //! ```toml 112 | //! [[test]] 113 | //! name = "failpoints" 114 | //! path = "tests/failpoints/mod.rs" 115 | //! required-features = ["fail/failpoints"] 116 | //! ``` 117 | //! 118 | //! 119 | //! ## Early return 120 | //! 121 | //! The previous examples illustrate injecting panics via fail points, but 122 | //! panics aren't the only — or even the most common — error pattern 123 | //! in Rust. The more common type of error is propagated by `Result` return 124 | //! values, and fail points can inject those as well with "early returns". That 125 | //! is, when configuring a fail point as "return" (as opposed to "panic"), the 126 | //! fail point will immediately return from the function, optionally with a 127 | //! configurable value. 128 | //! 129 | //! The setup for early return requires a slightly diferent invocation of the 130 | //! `fail_point!` macro. To illustrate this, let's modify the `do_fallible_work` 131 | //! function we used earlier to return a `Result`: 132 | //! 133 | //! ```rust 134 | //! use fail::{fail_point, FailScenario}; 135 | //! use std::io; 136 | //! 137 | //! fn do_fallible_work() -> io::Result<()> { 138 | //! fail_point!("read-dir"); 139 | //! let _dir: Vec<_> = std::fs::read_dir(".")?.collect(); 140 | //! // ... do some work on the directory ... 141 | //! Ok(()) 142 | //! } 143 | //! 144 | //! fn main() -> io::Result<()> { 145 | //! let scenario = FailScenario::setup(); 146 | //! do_fallible_work()?; 147 | //! scenario.teardown(); 148 | //! println!("done"); 149 | //! Ok(()) 150 | //! } 151 | //! ``` 152 | //! 153 | //! This example has more proper Rust error handling, with no unwraps 154 | //! anywhere. Instead it uses `?` to propagate errors via the `Result` type 155 | //! return values. This is more realistic Rust code. 156 | //! 157 | //! The "read-dir" fail point though is not yet configured to support early 158 | //! return, so if we attempt to configure it to "return", we'll see an error 159 | //! like 160 | //! 161 | //! ```sh 162 | //! $ FAILPOINTS=read-dir=return cargo run --features fail/failpoints 163 | //! Finished dev [unoptimized + debuginfo] target(s) in 0.13s 164 | //! Running `target/debug/failpointtest` 165 | //! thread 'main' panicked at 'Return is not supported for the fail point "read-dir"', src/main.rs:7:5 166 | //! note: Run with `RUST_BACKTRACE=1` for a backtrace. 167 | //! ``` 168 | //! 169 | //! This error tells us that the "read-dir" fail point is not defined correctly 170 | //! to support early return, and gives us the line number of that fail point. 171 | //! What we're missing in the fail point definition is code describring _how_ to 172 | //! return an error value, and the way we do this is by passing `fail_point!` a 173 | //! closure that returns the same type as the enclosing function. 174 | //! 175 | //! Here's a variation that does so: 176 | //! 177 | //! ```rust 178 | //! # use std::io; 179 | //! fn do_fallible_work() -> io::Result<()> { 180 | //! fail::fail_point!("read-dir", |_| { 181 | //! Err(io::Error::new(io::ErrorKind::PermissionDenied, "error")) 182 | //! }); 183 | //! let _dir: Vec<_> = std::fs::read_dir(".")?.collect(); 184 | //! // ... do some work on the directory ... 185 | //! Ok(()) 186 | //! } 187 | //! ``` 188 | //! 189 | //! And now if the "read-dir" fail point is configured to "return" we get a 190 | //! different result: 191 | //! 192 | //! ```sh 193 | //! $ FAILPOINTS=read-dir=return cargo run --features fail/failpoints 194 | //! Compiling failpointtest v0.1.0 195 | //! Finished dev [unoptimized + debuginfo] target(s) in 2.38s 196 | //! Running `target/debug/failpointtest` 197 | //! Error: Custom { kind: PermissionDenied, error: StringError("error") } 198 | //! ``` 199 | //! 200 | //! This time, `do_fallible_work` returned the error defined in our closure, 201 | //! which propagated all the way up and out of main. 202 | //! 203 | //! ## Advanced usage 204 | //! 205 | //! That's the basics of fail points: defining them with `fail_point!`, 206 | //! configuring them with `FAILPOINTS` and `fail::cfg`, and configuring them to 207 | //! panic and return early. But that's not all they can do. To learn more see 208 | //! the documentation for [`cfg`](fn.cfg.html), 209 | //! [`cfg_callback`](fn.cfg_callback.html) and 210 | //! [`fail_point!`](macro.fail_point.html). 211 | //! 212 | //! 213 | //! ## Usage considerations 214 | //! 215 | //! For most effective fail point usage, keep in mind the following: 216 | //! 217 | //! - Fail points are disabled by default and can be enabled via the `failpoints` 218 | //! feature. When failpoints are disabled, no code is generated by the macro. 219 | //! - Carefully consider complex, concurrent, non-deterministic combinations of 220 | //! fail points. Put test cases exercising fail points into their own test 221 | //! crate. 222 | //! - Fail points might have the same name, in which case they take the 223 | //! same actions. Be careful about duplicating fail point names, either within 224 | //! a single crate, or across multiple crates. 225 | 226 | #![deny(missing_docs, missing_debug_implementations)] 227 | 228 | use std::collections::HashMap; 229 | use std::env::VarError; 230 | use std::fmt::Debug; 231 | use std::str::FromStr; 232 | use std::sync::atomic::{AtomicUsize, Ordering}; 233 | use std::sync::{Arc, Condvar, Mutex, MutexGuard, RwLock, TryLockError}; 234 | use std::time::{Duration, Instant}; 235 | use std::{env, thread}; 236 | 237 | #[derive(Clone)] 238 | struct SyncCallback(Arc); 239 | 240 | impl Debug for SyncCallback { 241 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 242 | f.write_str("SyncCallback()") 243 | } 244 | } 245 | 246 | impl PartialEq for SyncCallback { 247 | fn eq(&self, other: &Self) -> bool { 248 | Arc::ptr_eq(&self.0, &other.0) 249 | } 250 | } 251 | 252 | impl SyncCallback { 253 | fn new(f: impl Fn() + Send + Sync + 'static) -> SyncCallback { 254 | SyncCallback(Arc::new(f)) 255 | } 256 | 257 | fn run(&self) { 258 | let callback = &self.0; 259 | callback(); 260 | } 261 | } 262 | 263 | /// Supported tasks. 264 | #[derive(Clone, Debug, PartialEq)] 265 | enum Task { 266 | /// Do nothing. 267 | Off, 268 | /// Return the value. 269 | Return(Option), 270 | /// Sleep for some milliseconds. 271 | Sleep(u64), 272 | /// Panic with the message. 273 | Panic(Option), 274 | /// Print the message. 275 | Print(Option), 276 | /// Sleep until other action is set. 277 | Pause, 278 | /// Yield the CPU. 279 | Yield, 280 | /// Busy waiting for some milliseconds. 281 | Delay(u64), 282 | /// Call callback function. 283 | Callback(SyncCallback), 284 | } 285 | 286 | #[derive(Debug)] 287 | struct Action { 288 | task: Task, 289 | freq: f32, 290 | count: Option, 291 | } 292 | 293 | impl PartialEq for Action { 294 | fn eq(&self, hs: &Action) -> bool { 295 | if self.task != hs.task || self.freq != hs.freq { 296 | return false; 297 | } 298 | if let Some(ref lhs) = self.count { 299 | if let Some(ref rhs) = hs.count { 300 | return lhs.load(Ordering::Relaxed) == rhs.load(Ordering::Relaxed); 301 | } 302 | } else if hs.count.is_none() { 303 | return true; 304 | } 305 | false 306 | } 307 | } 308 | 309 | impl Action { 310 | fn new(task: Task, freq: f32, max_cnt: Option) -> Action { 311 | Action { 312 | task, 313 | freq, 314 | count: max_cnt.map(AtomicUsize::new), 315 | } 316 | } 317 | 318 | fn from_callback(f: impl Fn() + Send + Sync + 'static) -> Action { 319 | let task = Task::Callback(SyncCallback::new(f)); 320 | Action { 321 | task, 322 | freq: 1.0, 323 | count: None, 324 | } 325 | } 326 | 327 | fn get_task(&self) -> Option { 328 | use rand::Rng; 329 | 330 | if let Some(ref cnt) = self.count { 331 | let c = cnt.load(Ordering::Acquire); 332 | if c == 0 { 333 | return None; 334 | } 335 | } 336 | if self.freq < 1f32 && !rand::thread_rng().gen_bool(f64::from(self.freq)) { 337 | return None; 338 | } 339 | if let Some(ref ref_cnt) = self.count { 340 | let mut cnt = ref_cnt.load(Ordering::Acquire); 341 | loop { 342 | if cnt == 0 { 343 | return None; 344 | } 345 | let new_cnt = cnt - 1; 346 | match ref_cnt.compare_exchange_weak( 347 | cnt, 348 | new_cnt, 349 | Ordering::AcqRel, 350 | Ordering::Acquire, 351 | ) { 352 | Ok(_) => break, 353 | Err(c) => cnt = c, 354 | } 355 | } 356 | } 357 | Some(self.task.clone()) 358 | } 359 | } 360 | 361 | fn partition(s: &str, pattern: char) -> (&str, Option<&str>) { 362 | let mut splits = s.splitn(2, pattern); 363 | (splits.next().unwrap(), splits.next()) 364 | } 365 | 366 | impl FromStr for Action { 367 | type Err = String; 368 | 369 | /// Parse an action. 370 | /// 371 | /// `s` should be in the format `[p%][cnt*]task[(args)]`, `p%` is the frequency, 372 | /// `cnt` is the max times the action can be triggered. 373 | fn from_str(s: &str) -> Result { 374 | let mut remain = s.trim(); 375 | let mut args = None; 376 | // in case there is '%' in args, we need to parse it first. 377 | let (first, second) = partition(remain, '('); 378 | if let Some(second) = second { 379 | remain = first; 380 | if !second.ends_with(')') { 381 | return Err("parentheses do not match".to_owned()); 382 | } 383 | args = Some(&second[..second.len() - 1]); 384 | } 385 | 386 | let mut frequency = 1f32; 387 | let (first, second) = partition(remain, '%'); 388 | if let Some(second) = second { 389 | remain = second; 390 | match first.parse::() { 391 | Err(e) => return Err(format!("failed to parse frequency: {e}")), 392 | Ok(freq) => frequency = freq / 100.0, 393 | } 394 | } 395 | 396 | let mut max_cnt = None; 397 | let (first, second) = partition(remain, '*'); 398 | if let Some(second) = second { 399 | remain = second; 400 | match first.parse() { 401 | Err(e) => return Err(format!("failed to parse count: {e}")), 402 | Ok(cnt) => max_cnt = Some(cnt), 403 | } 404 | } 405 | 406 | let parse_timeout = || match args { 407 | None => Err("sleep require timeout".to_owned()), 408 | Some(timeout_str) => match timeout_str.parse() { 409 | Err(e) => Err(format!("failed to parse timeout: {e}")), 410 | Ok(timeout) => Ok(timeout), 411 | }, 412 | }; 413 | 414 | let task = match remain { 415 | "off" => Task::Off, 416 | "return" => Task::Return(args.map(str::to_owned)), 417 | "sleep" => Task::Sleep(parse_timeout()?), 418 | "panic" => Task::Panic(args.map(str::to_owned)), 419 | "print" => Task::Print(args.map(str::to_owned)), 420 | "pause" => Task::Pause, 421 | "yield" => Task::Yield, 422 | "delay" => Task::Delay(parse_timeout()?), 423 | _ => return Err(format!("unrecognized command {remain:?}")), 424 | }; 425 | 426 | Ok(Action::new(task, frequency, max_cnt)) 427 | } 428 | } 429 | 430 | #[allow(clippy::mutex_atomic)] 431 | #[derive(Debug)] 432 | struct FailPoint { 433 | pause: Mutex, 434 | pause_notifier: Condvar, 435 | actions: RwLock>, 436 | actions_str: RwLock, 437 | } 438 | 439 | #[allow(clippy::mutex_atomic)] 440 | impl FailPoint { 441 | fn new() -> FailPoint { 442 | FailPoint { 443 | pause: Mutex::new(false), 444 | pause_notifier: Condvar::new(), 445 | actions: RwLock::default(), 446 | actions_str: RwLock::default(), 447 | } 448 | } 449 | 450 | fn set_actions(&self, actions_str: &str, actions: Vec) { 451 | loop { 452 | // TODO: maybe busy waiting here. 453 | match self.actions.try_write() { 454 | Err(TryLockError::WouldBlock) => {} 455 | Ok(mut guard) => { 456 | *guard = actions; 457 | *self.actions_str.write().unwrap() = actions_str.to_string(); 458 | return; 459 | } 460 | Err(e) => panic!("unexpected poison: {e:?}"), 461 | } 462 | let mut guard = self.pause.lock().unwrap(); 463 | *guard = false; 464 | self.pause_notifier.notify_all(); 465 | } 466 | } 467 | 468 | #[allow(clippy::option_option)] 469 | fn eval(&self, name: &str) -> Option> { 470 | let task = { 471 | let actions = self.actions.read().unwrap(); 472 | match actions.iter().filter_map(Action::get_task).next() { 473 | Some(Task::Pause) => { 474 | let mut guard = self.pause.lock().unwrap(); 475 | *guard = true; 476 | loop { 477 | guard = self.pause_notifier.wait(guard).unwrap(); 478 | if !*guard { 479 | break; 480 | } 481 | } 482 | return None; 483 | } 484 | Some(t) => t, 485 | None => return None, 486 | } 487 | }; 488 | 489 | match task { 490 | Task::Off => {} 491 | Task::Return(s) => return Some(s), 492 | Task::Sleep(t) => thread::sleep(Duration::from_millis(t)), 493 | Task::Panic(msg) => match msg { 494 | Some(ref msg) => panic!("{msg}"), 495 | None => panic!("failpoint {name} panic"), 496 | }, 497 | Task::Print(msg) => match msg { 498 | Some(ref msg) => log::info!("{msg}"), 499 | None => log::info!("failpoint {name} executed."), 500 | }, 501 | Task::Pause => unreachable!(), 502 | Task::Yield => thread::yield_now(), 503 | Task::Delay(t) => { 504 | let timer = Instant::now(); 505 | let timeout = Duration::from_millis(t); 506 | while timer.elapsed() < timeout {} 507 | } 508 | Task::Callback(f) => { 509 | f.run(); 510 | } 511 | } 512 | None 513 | } 514 | } 515 | 516 | /// Registry with failpoints configuration. 517 | type Registry = HashMap>; 518 | 519 | #[derive(Debug, Default)] 520 | struct FailPointRegistry { 521 | // TODO: remove rwlock or store *mut FailPoint 522 | registry: RwLock, 523 | } 524 | 525 | use once_cell::sync::Lazy; 526 | 527 | static REGISTRY: Lazy = Lazy::new(FailPointRegistry::default); 528 | static SCENARIO: Lazy> = Lazy::new(|| Mutex::new(®ISTRY)); 529 | 530 | /// Test scenario with configured fail points. 531 | #[derive(Debug)] 532 | pub struct FailScenario<'a> { 533 | scenario_guard: MutexGuard<'a, &'static FailPointRegistry>, 534 | } 535 | 536 | impl<'a> FailScenario<'a> { 537 | /// Set up the system for a fail points scenario. 538 | /// 539 | /// Configures all fail points specified in the `FAILPOINTS` environment variable. 540 | /// It does not otherwise change any existing fail point configuration. 541 | /// 542 | /// The format of `FAILPOINTS` is `failpoint=actions;...`, where 543 | /// `failpoint` is the name of the fail point. For more information 544 | /// about fail point actions see the [`cfg`](fn.cfg.html) function and 545 | /// the [`fail_point`](macro.fail_point.html) macro. 546 | /// 547 | /// `FAILPOINTS` may configure fail points that are not actually defined. In 548 | /// this case the configuration has no effect. 549 | /// 550 | /// This function should generally be called prior to running a test with fail 551 | /// points, and afterward paired with [`teardown`](#method.teardown). 552 | /// 553 | /// # Panics 554 | /// 555 | /// Panics if an action is not formatted correctly. 556 | pub fn setup() -> Self { 557 | // Cleanup first, in case of previous failed/panic'ed test scenarios. 558 | let scenario_guard = SCENARIO.lock().unwrap_or_else(|e| e.into_inner()); 559 | let mut registry = scenario_guard.registry.write().unwrap(); 560 | Self::cleanup(&mut registry); 561 | 562 | let failpoints = match env::var("FAILPOINTS") { 563 | Ok(s) => s, 564 | Err(VarError::NotPresent) => return Self { scenario_guard }, 565 | Err(e) => panic!("invalid failpoints: {e:?}"), 566 | }; 567 | for mut cfg in failpoints.trim().split(';') { 568 | cfg = cfg.trim(); 569 | if cfg.is_empty() { 570 | continue; 571 | } 572 | let (name, order) = partition(cfg, '='); 573 | match order { 574 | None => panic!("invalid failpoint: {cfg:?}"), 575 | Some(order) => { 576 | if let Err(e) = set(&mut registry, name.to_owned(), order) { 577 | panic!("unable to configure failpoint \"{name}\": {e}"); 578 | } 579 | } 580 | } 581 | } 582 | Self { scenario_guard } 583 | } 584 | 585 | /// Tear down the fail point system. 586 | /// 587 | /// Clears the configuration of all fail points. Any paused fail 588 | /// points will be notified before they are deactivated. 589 | /// 590 | /// This function should generally be called after running a test with fail points. 591 | /// Calling `teardown` without previously calling `setup` results in a no-op. 592 | pub fn teardown(self) { 593 | drop(self) 594 | } 595 | 596 | /// Clean all registered fail points. 597 | fn cleanup(registry: &mut std::sync::RwLockWriteGuard<'a, Registry>) { 598 | for p in registry.values() { 599 | // wake up all pause failpoint. 600 | p.set_actions("", vec![]); 601 | } 602 | registry.clear(); 603 | } 604 | } 605 | 606 | impl Drop for FailScenario<'_> { 607 | fn drop(&mut self) { 608 | let mut registry = self.scenario_guard.registry.write().unwrap(); 609 | Self::cleanup(&mut registry) 610 | } 611 | } 612 | 613 | /// Returns whether code generation for failpoints is enabled. 614 | /// 615 | /// This function allows consumers to check (at runtime) whether the library 616 | /// was compiled with the (buildtime) `failpoints` feature, which enables 617 | /// code generation for failpoints. 618 | pub const fn has_failpoints() -> bool { 619 | cfg!(feature = "failpoints") 620 | } 621 | 622 | /// Get all registered fail points. 623 | /// 624 | /// Return a vector of `(name, actions)` pairs. 625 | pub fn list() -> Vec<(String, String)> { 626 | let registry = REGISTRY.registry.read().unwrap(); 627 | registry 628 | .iter() 629 | .map(|(name, fp)| (name.to_string(), fp.actions_str.read().unwrap().clone())) 630 | .collect() 631 | } 632 | 633 | #[doc(hidden)] 634 | pub fn eval) -> R>(name: &str, f: F) -> Option { 635 | let p = { 636 | let registry = REGISTRY.registry.read().unwrap(); 637 | match registry.get(name) { 638 | None => return None, 639 | Some(p) => p.clone(), 640 | } 641 | }; 642 | p.eval(name).map(f) 643 | } 644 | 645 | /// Configure the actions for a fail point at runtime. 646 | /// 647 | /// Each fail point can be configured with a series of actions, specified by the 648 | /// `actions` argument. The format of `actions` is `action[->action...]`. When 649 | /// multiple actions are specified, an action will be checked only when its 650 | /// former action is not triggered. 651 | /// 652 | /// The format of a single action is `[p%][cnt*]task[(arg)]`. `p%` is the 653 | /// expected probability that the action is triggered, and `cnt*` is the max 654 | /// times the action can be triggered. The supported values of `task` are: 655 | /// 656 | /// - `off`, the fail point will do nothing. 657 | /// - `return(arg)`, return early when the fail point is triggered. `arg` is passed to `$e` 658 | /// (defined via the `fail_point!` macro) as a string. 659 | /// - `sleep(milliseconds)`, sleep for the specified time. 660 | /// - `panic(msg)`, panic with the message. 661 | /// - `print(msg)`, log the message, using the `log` crate, at the `info` level. 662 | /// - `pause`, sleep until other action is set to the fail point. 663 | /// - `yield`, yield the CPU. 664 | /// - `delay(milliseconds)`, busy waiting for the specified time. 665 | /// 666 | /// For example, `20%3*print(still alive!)->panic` means the fail point has 20% chance to print a 667 | /// message "still alive!" and 80% chance to panic. And the message will be printed at most 3 668 | /// times. 669 | /// 670 | /// The `FAILPOINTS` environment variable accepts this same syntax for its fail 671 | /// point actions. 672 | /// 673 | /// A call to `cfg` with a particular fail point name overwrites any existing actions for 674 | /// that fail point, including those set via the `FAILPOINTS` environment variable. 675 | pub fn cfg>(name: S, actions: &str) -> Result<(), String> { 676 | let mut registry = REGISTRY.registry.write().unwrap(); 677 | set(&mut registry, name.into(), actions) 678 | } 679 | 680 | /// Configure the actions for a fail point at runtime. 681 | /// 682 | /// Each fail point can be configured by a callback. Process will call this callback function 683 | /// when it meet this fail-point. 684 | pub fn cfg_callback(name: S, f: F) -> Result<(), String> 685 | where 686 | S: Into, 687 | F: Fn() + Send + Sync + 'static, 688 | { 689 | let mut registry = REGISTRY.registry.write().unwrap(); 690 | let p = registry 691 | .entry(name.into()) 692 | .or_insert_with(|| Arc::new(FailPoint::new())); 693 | let action = Action::from_callback(f); 694 | let actions = vec![action]; 695 | p.set_actions("callback", actions); 696 | Ok(()) 697 | } 698 | 699 | /// Remove a fail point. 700 | /// 701 | /// If the fail point doesn't exist, nothing will happen. 702 | pub fn remove>(name: S) { 703 | let mut registry = REGISTRY.registry.write().unwrap(); 704 | if let Some(p) = registry.remove(name.as_ref()) { 705 | // wake up all pause failpoint. 706 | p.set_actions("", vec![]); 707 | } 708 | } 709 | 710 | /// Configure fail point in RAII style. 711 | #[derive(Debug)] 712 | pub struct FailGuard(String); 713 | 714 | impl Drop for FailGuard { 715 | fn drop(&mut self) { 716 | remove(&self.0); 717 | } 718 | } 719 | 720 | impl FailGuard { 721 | /// Configure the actions for a fail point during the lifetime of the returning `FailGuard`. 722 | /// 723 | /// Read documentation of [`cfg`] for more details. 724 | pub fn new>(name: S, actions: &str) -> Result { 725 | let name = name.into(); 726 | cfg(&name, actions)?; 727 | Ok(FailGuard(name)) 728 | } 729 | 730 | /// Configure the actions for a fail point during the lifetime of the returning `FailGuard`. 731 | /// 732 | /// Read documentation of [`cfg_callback`] for more details. 733 | pub fn with_callback(name: S, f: F) -> Result 734 | where 735 | S: Into, 736 | F: Fn() + Send + Sync + 'static, 737 | { 738 | let name = name.into(); 739 | cfg_callback(&name, f)?; 740 | Ok(FailGuard(name)) 741 | } 742 | } 743 | 744 | fn set( 745 | registry: &mut HashMap>, 746 | name: String, 747 | actions: &str, 748 | ) -> Result<(), String> { 749 | let actions_str = actions; 750 | // `actions` are in the format of `failpoint[->failpoint...]`. 751 | let actions = actions 752 | .split("->") 753 | .map(Action::from_str) 754 | .collect::>()?; 755 | // Please note that we can't figure out whether there is a failpoint named `name`, 756 | // so we may insert a failpoint that doesn't exist at all. 757 | let p = registry 758 | .entry(name) 759 | .or_insert_with(|| Arc::new(FailPoint::new())); 760 | p.set_actions(actions_str, actions); 761 | Ok(()) 762 | } 763 | 764 | /// Define a fail point (requires `failpoints` feature). 765 | /// 766 | /// The `fail_point!` macro has three forms, and they all take a name as the 767 | /// first argument. The simplest form takes only a name and is suitable for 768 | /// executing most fail point behavior, including panicking, but not for early 769 | /// return or conditional execution based on a local flag. 770 | /// 771 | /// The three forms of fail points look as follows. 772 | /// 773 | /// 1. A basic fail point: 774 | /// 775 | /// ```rust 776 | /// # #[macro_use] extern crate fail; 777 | /// fn function_return_unit() { 778 | /// fail_point!("fail-point-1"); 779 | /// } 780 | /// ``` 781 | /// 782 | /// This form of fail point can be configured to panic, print, sleep, pause, etc., but 783 | /// not to return from the function early. 784 | /// 785 | /// 2. A fail point that may return early: 786 | /// 787 | /// ```rust 788 | /// # #[macro_use] extern crate fail; 789 | /// fn function_return_value() -> u64 { 790 | /// fail_point!("fail-point-2", |r| r.map_or(2, |e| e.parse().unwrap())); 791 | /// 0 792 | /// } 793 | /// ``` 794 | /// 795 | /// This form of fail point can additionally be configured to return early from 796 | /// the enclosing function. It accepts a closure, which itself accepts an 797 | /// `Option`, and is expected to transform that argument into the early 798 | /// return value. The argument string is sourced from the fail point 799 | /// configuration string. For example configuring this "fail-point-2" as 800 | /// "return(100)" will execute the fail point closure, passing it a `Some` value 801 | /// containing a `String` equal to "100"; the closure then parses it into the 802 | /// return value. 803 | /// 804 | /// 3. A fail point with conditional execution: 805 | /// 806 | /// ```rust 807 | /// # #[macro_use] extern crate fail; 808 | /// fn function_conditional(enable: bool) { 809 | /// fail_point!("fail-point-3", enable, |_| {}); 810 | /// } 811 | /// ``` 812 | /// 813 | /// In this final form, the second argument is a local boolean expression that 814 | /// must evaluate to `true` before the fail point is evaluated. The third 815 | /// argument is again an early-return closure. 816 | /// 817 | /// The three macro arguments (or "designators") are called `$name`, `$cond`, 818 | /// and `$e`. `$name` must be `&str`, `$cond` must be a boolean expression, 819 | /// and`$e` must be a function or closure that accepts an `Option` and 820 | /// returns the same type as the enclosing function. 821 | /// 822 | /// For more examples see the [crate documentation](index.html). For more 823 | /// information about controlling fail points see the [`cfg`](fn.cfg.html) 824 | /// function. 825 | #[macro_export] 826 | #[cfg(feature = "failpoints")] 827 | macro_rules! fail_point { 828 | ($name:expr) => {{ 829 | $crate::eval($name, |_| { 830 | panic!("Return is not supported for the fail point \"{}\"", $name); 831 | }); 832 | }}; 833 | ($name:expr, $e:expr) => {{ 834 | if let Some(res) = $crate::eval($name, $e) { 835 | return res; 836 | } 837 | }}; 838 | ($name:expr, $cond:expr, $e:expr) => {{ 839 | if $cond { 840 | $crate::fail_point!($name, $e); 841 | } 842 | }}; 843 | } 844 | 845 | /// Define a fail point (disabled, see `failpoints` feature). 846 | #[macro_export] 847 | #[cfg(not(feature = "failpoints"))] 848 | macro_rules! fail_point { 849 | ($name:expr, $e:expr) => {{}}; 850 | ($name:expr) => {{}}; 851 | ($name:expr, $cond:expr, $e:expr) => {{}}; 852 | } 853 | 854 | #[cfg(test)] 855 | mod tests { 856 | use super::*; 857 | 858 | use std::sync::*; 859 | 860 | #[test] 861 | fn test_has_failpoints() { 862 | assert_eq!(cfg!(feature = "failpoints"), has_failpoints()); 863 | } 864 | 865 | #[test] 866 | fn test_off() { 867 | let point = FailPoint::new(); 868 | point.set_actions("", vec![Action::new(Task::Off, 1.0, None)]); 869 | assert!(point.eval("test_fail_point_off").is_none()); 870 | } 871 | 872 | #[test] 873 | fn test_return() { 874 | let point = FailPoint::new(); 875 | point.set_actions("", vec![Action::new(Task::Return(None), 1.0, None)]); 876 | let res = point.eval("test_fail_point_return"); 877 | assert_eq!(res, Some(None)); 878 | 879 | let ret = Some("test".to_owned()); 880 | point.set_actions("", vec![Action::new(Task::Return(ret.clone()), 1.0, None)]); 881 | let res = point.eval("test_fail_point_return"); 882 | assert_eq!(res, Some(ret)); 883 | } 884 | 885 | #[test] 886 | fn test_sleep() { 887 | let point = FailPoint::new(); 888 | let timer = Instant::now(); 889 | point.set_actions("", vec![Action::new(Task::Sleep(1000), 1.0, None)]); 890 | assert!(point.eval("test_fail_point_sleep").is_none()); 891 | assert!(timer.elapsed() > Duration::from_millis(1000)); 892 | } 893 | 894 | #[should_panic] 895 | #[test] 896 | fn test_panic() { 897 | let point = FailPoint::new(); 898 | point.set_actions("", vec![Action::new(Task::Panic(None), 1.0, None)]); 899 | point.eval("test_fail_point_panic"); 900 | } 901 | 902 | #[test] 903 | fn test_print() { 904 | struct LogCollector(Arc>>); 905 | impl log::Log for LogCollector { 906 | fn enabled(&self, _: &log::Metadata) -> bool { 907 | true 908 | } 909 | fn log(&self, record: &log::Record) { 910 | let mut buf = self.0.lock().unwrap(); 911 | buf.push(format!("{}", record.args())); 912 | } 913 | fn flush(&self) {} 914 | } 915 | 916 | let buffer = Arc::new(Mutex::new(vec![])); 917 | let collector = LogCollector(buffer.clone()); 918 | log::set_max_level(log::LevelFilter::Info); 919 | log::set_boxed_logger(Box::new(collector)).unwrap(); 920 | 921 | let point = FailPoint::new(); 922 | point.set_actions("", vec![Action::new(Task::Print(None), 1.0, None)]); 923 | assert!(point.eval("test_fail_point_print").is_none()); 924 | let msg = buffer.lock().unwrap().pop().unwrap(); 925 | assert_eq!(msg, "failpoint test_fail_point_print executed."); 926 | } 927 | 928 | #[test] 929 | fn test_pause() { 930 | let point = Arc::new(FailPoint::new()); 931 | point.set_actions("", vec![Action::new(Task::Pause, 1.0, None)]); 932 | let p = point.clone(); 933 | let (tx, rx) = mpsc::channel(); 934 | thread::spawn(move || { 935 | assert_eq!(p.eval("test_fail_point_pause"), None); 936 | tx.send(()).unwrap(); 937 | }); 938 | assert!(rx.recv_timeout(Duration::from_secs(1)).is_err()); 939 | point.set_actions("", vec![Action::new(Task::Off, 1.0, None)]); 940 | rx.recv_timeout(Duration::from_secs(1)).unwrap(); 941 | } 942 | 943 | #[test] 944 | fn test_yield() { 945 | let point = FailPoint::new(); 946 | point.set_actions("", vec![Action::new(Task::Yield, 1.0, None)]); 947 | assert!(point.eval("test_fail_point_yield").is_none()); 948 | } 949 | 950 | #[test] 951 | fn test_delay() { 952 | let point = FailPoint::new(); 953 | let timer = Instant::now(); 954 | point.set_actions("", vec![Action::new(Task::Delay(1000), 1.0, None)]); 955 | assert!(point.eval("test_fail_point_delay").is_none()); 956 | assert!(timer.elapsed() > Duration::from_millis(1000)); 957 | } 958 | 959 | #[test] 960 | fn test_frequency_and_count() { 961 | let point = FailPoint::new(); 962 | point.set_actions("", vec![Action::new(Task::Return(None), 0.8, Some(500))]); 963 | let mut count = 0; 964 | let mut times = 0f64; 965 | while count < 500 { 966 | if point.eval("test_fail_point_frequency").is_some() { 967 | count += 1; 968 | } 969 | times += 1f64; 970 | } 971 | assert!(500.0 / 0.9 < times && times < 500.0 / 0.7, "{times}"); 972 | for _ in 0..times as u64 { 973 | assert!(point.eval("test_fail_point_frequency").is_none()); 974 | } 975 | } 976 | 977 | #[test] 978 | fn test_parse() { 979 | let cases = vec![ 980 | ("return", Action::new(Task::Return(None), 1.0, None)), 981 | ( 982 | "return(64)", 983 | Action::new(Task::Return(Some("64".to_owned())), 1.0, None), 984 | ), 985 | ("5*return", Action::new(Task::Return(None), 1.0, Some(5))), 986 | ("25%return", Action::new(Task::Return(None), 0.25, None)), 987 | ( 988 | "125%2*return", 989 | Action::new(Task::Return(None), 1.25, Some(2)), 990 | ), 991 | ( 992 | "return(2%5)", 993 | Action::new(Task::Return(Some("2%5".to_owned())), 1.0, None), 994 | ), 995 | ("125%2*off", Action::new(Task::Off, 1.25, Some(2))), 996 | ( 997 | "125%2*sleep(100)", 998 | Action::new(Task::Sleep(100), 1.25, Some(2)), 999 | ), 1000 | (" 125%2*off ", Action::new(Task::Off, 1.25, Some(2))), 1001 | ("125%2*panic", Action::new(Task::Panic(None), 1.25, Some(2))), 1002 | ( 1003 | "125%2*panic(msg)", 1004 | Action::new(Task::Panic(Some("msg".to_owned())), 1.25, Some(2)), 1005 | ), 1006 | ("125%2*print", Action::new(Task::Print(None), 1.25, Some(2))), 1007 | ( 1008 | "125%2*print(msg)", 1009 | Action::new(Task::Print(Some("msg".to_owned())), 1.25, Some(2)), 1010 | ), 1011 | ("125%2*pause", Action::new(Task::Pause, 1.25, Some(2))), 1012 | ("125%2*yield", Action::new(Task::Yield, 1.25, Some(2))), 1013 | ("125%2*delay(2)", Action::new(Task::Delay(2), 1.25, Some(2))), 1014 | ]; 1015 | for (expr, exp) in cases { 1016 | let res: Action = expr.parse().unwrap(); 1017 | assert_eq!(res, exp); 1018 | } 1019 | 1020 | let fail_cases = vec![ 1021 | "delay", 1022 | "sleep", 1023 | "Return", 1024 | "ab%return", 1025 | "ab*return", 1026 | "return(msg", 1027 | "unknown", 1028 | ]; 1029 | for case in fail_cases { 1030 | assert!(case.parse::().is_err()); 1031 | } 1032 | } 1033 | 1034 | // This case should be tested as integration case, but when calling `teardown` other cases 1035 | // like `test_pause` maybe also affected, so it's better keep it here. 1036 | #[test] 1037 | #[cfg_attr(not(feature = "failpoints"), ignore)] 1038 | fn test_setup_and_teardown() { 1039 | let f1 = || { 1040 | fail_point!("setup_and_teardown1", |_| 1); 1041 | 0 1042 | }; 1043 | let f2 = || { 1044 | fail_point!("setup_and_teardown2", |_| 2); 1045 | 0 1046 | }; 1047 | env::set_var( 1048 | "FAILPOINTS", 1049 | "setup_and_teardown1=return;setup_and_teardown2=pause;", 1050 | ); 1051 | let scenario = FailScenario::setup(); 1052 | assert_eq!(f1(), 1); 1053 | 1054 | let (tx, rx) = mpsc::channel(); 1055 | thread::spawn(move || { 1056 | tx.send(f2()).unwrap(); 1057 | }); 1058 | assert!(rx.recv_timeout(Duration::from_millis(500)).is_err()); 1059 | 1060 | scenario.teardown(); 1061 | assert_eq!(rx.recv_timeout(Duration::from_millis(500)).unwrap(), 0); 1062 | assert_eq!(f1(), 0); 1063 | } 1064 | } 1065 | --------------------------------------------------------------------------------