├── .github ├── stale.yml ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── --ask-a-question.md │ ├── ---request-a-new-feature.md │ └── ---report-a-bug.md ├── auto_pr_team.yml ├── FUNDING.yml ├── no-response.yml └── pull_request_template.md ├── demo ├── .gitignore ├── member │ ├── true │ │ ├── README.md │ │ ├── lib.rs │ │ └── Cargo.toml │ ├── stray.rs │ └── procmacro │ │ ├── src │ │ └── lib.rs │ │ └── Cargo.toml ├── .config │ ├── topic.dic │ └── spellcheck.toml ├── src │ ├── nested │ │ ├── justone.rs │ │ ├── justtwo.rs │ │ ├── again │ │ │ ├── mod.rs │ │ │ └── code.rs │ │ ├── fragments │ │ │ ├── simple.rs │ │ │ └── enumerate.rs │ │ ├── fragments.rs │ │ └── mod.rs │ ├── main.rs │ └── lib.rs ├── Cargo.lock ├── README.md └── Cargo.toml ├── .gitignore ├── nlprule-data └── 0.6.4 │ └── en │ ├── en_rules.bin.xz │ └── en_tokenizer.bin.xz ├── src ├── errors.rs ├── config │ ├── reflow.rs │ ├── nlprules.rs │ ├── regex.rs │ ├── search_dirs.rs │ ├── iso.rs │ ├── hunspell.rs │ └── mod.rs ├── main.rs ├── checker │ ├── dummy.rs │ ├── nlprules.rs │ ├── cached.rs │ ├── dictaffix.rs │ ├── quirks.rs │ ├── mod.rs │ ├── zspell.rs │ └── spellbook.rs ├── tinhat.rs ├── lib.rs ├── action │ └── bandaid.rs └── traverse │ └── iter.rs ├── doc-chunks ├── README.md ├── src │ ├── errors.rs │ ├── testcase.rs │ ├── cluster.rs │ ├── lib.rs │ └── literalset.rs └── Cargo.toml ├── .pre-commit-hooks.yaml ├── .config ├── lingo.dic └── spellcheck.toml ├── docs ├── checkers.md ├── features.md ├── automation.md ├── remedy.md └── configuration.md ├── LICENSE-MIT ├── .vscode └── launch.json ├── cliff.toml ├── tests └── signal_handler.rs ├── hunspell-data └── en_US.aff ├── Cargo.toml ├── README.md └── LICENSE-APACHE /.github/stale.yml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /demo/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /demo/member/true/README.md: -------------------------------------------------------------------------------- 1 | # READ ME (maybe) -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | .vscode/settings.json 3 | -------------------------------------------------------------------------------- /demo/.config/topic.dic: -------------------------------------------------------------------------------- 1 | 10 2 | topic/A 3 | tkae/topic -------------------------------------------------------------------------------- /demo/src/nested/justone.rs: -------------------------------------------------------------------------------- 1 | /// Wroeng. 2 | struct W; -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false -------------------------------------------------------------------------------- /demo/src/nested/justtwo.rs: -------------------------------------------------------------------------------- 1 | /// Alphy 2 | /// Beto 3 | struct Abc; -------------------------------------------------------------------------------- /.github/auto_pr_team.yml: -------------------------------------------------------------------------------- 1 | org: cargo-spellcheck 2 | team: contributors 3 | -------------------------------------------------------------------------------- /demo/member/stray.rs: -------------------------------------------------------------------------------- 1 | /// Nobady references this. 2 | struct Lost; 3 | -------------------------------------------------------------------------------- /demo/src/nested/again/mod.rs: -------------------------------------------------------------------------------- 1 | mod code; 2 | 3 | /// Again. 4 | struct Again; 5 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: drahnr 2 | patreon: drahnr 3 | liberapay: drahnr 4 | open_collective: bernhard-schuster 5 | -------------------------------------------------------------------------------- /demo/member/procmacro/src/lib.rs: -------------------------------------------------------------------------------- 1 | /// Foo bar baz. 2 | fn empty() { 3 | unimplemented!("and never will be") 4 | } 5 | -------------------------------------------------------------------------------- /demo/member/true/lib.rs: -------------------------------------------------------------------------------- 1 | //! Some extar crate docs. 2 | 3 | /// ZZZZzzz makes the snake. 4 | pub fn x() { 5 | 6 | } 7 | -------------------------------------------------------------------------------- /demo/src/nested/fragments/simple.rs: -------------------------------------------------------------------------------- 1 | /// First. 2 | /// Secondo. 3 | /// Thurd number one. 4 | /// Another thurd. 5 | struct Q; -------------------------------------------------------------------------------- /nlprule-data/0.6.4/en/en_rules.bin.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drahnr/cargo-spellcheck/HEAD/nlprule-data/0.6.4/en/en_rules.bin.xz -------------------------------------------------------------------------------- /src/errors.rs: -------------------------------------------------------------------------------- 1 | //! Global error usage without cluttering each file. 2 | pub use color_eyre::eyre::{bail, eyre, Error, Result, WrapErr}; 3 | -------------------------------------------------------------------------------- /nlprule-data/0.6.4/en/en_tokenizer.bin.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drahnr/cargo-spellcheck/HEAD/nlprule-data/0.6.4/en/en_tokenizer.bin.xz -------------------------------------------------------------------------------- /demo/src/nested/fragments/enumerate.rs: -------------------------------------------------------------------------------- 1 | /// Secret. 2 | /// 3 | /// Somethign secret. 4 | enum Instrument { 5 | /// An instroment. 6 | Xylophon, 7 | } -------------------------------------------------------------------------------- /doc-chunks/README.md: -------------------------------------------------------------------------------- 1 | # doc-chunks 2 | 3 | Extract clustered documentation lines and provide 4 | a spanned and commonmark aware overlay with a 5 | span based mapping. -------------------------------------------------------------------------------- /demo/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | [[package]] 4 | name = "cargo-spellcheck-demo" 5 | version = "1.0.0" 6 | -------------------------------------------------------------------------------- /.pre-commit-hooks.yaml: -------------------------------------------------------------------------------- 1 | - id: cargo-spellcheck 2 | name: cargo-spellcheck 3 | description: Spellcheck rust files 4 | entry: cargo-spellcheck 5 | language: rust 6 | types: [rust] 7 | args: ["--code=99", "--"] 8 | -------------------------------------------------------------------------------- /demo/src/nested/again/code.rs: -------------------------------------------------------------------------------- 1 | //! 2 | //! Enclozed `codez` not checked. 3 | //! 4 | 5 | 6 | /// Do you like `wroeng` very mouch? 7 | /// Sharon stayed home from school the other day. Because she was sick. 8 | struct Coda; 9 | -------------------------------------------------------------------------------- /.github/no-response.yml: -------------------------------------------------------------------------------- 1 | daysUntilClose: 7 2 | 3 | responseRequiredLabel: needs-more-information 4 | 5 | closeComment: > 6 | Feel free to re-open once there is more information available. 7 | 8 | If you are not the original author, please create a new issue. 9 | -------------------------------------------------------------------------------- /demo/.config/spellcheck.toml: -------------------------------------------------------------------------------- 1 | [Hunspell] 2 | lang = "en_US" 3 | search_dirs = ["."] 4 | extra_dictionaries = ["topic.dic"] 5 | 6 | [Hunspell.quirks] 7 | transform_regex = ["^'([^\\s])'$", "^[0-9]+x$"] 8 | allow_concatenation = true 9 | allow_dashed = false 10 | -------------------------------------------------------------------------------- /demo/README.md: -------------------------------------------------------------------------------- 1 | # zilly demo proyekt 2 | 3 | A plethora of spelling mistackz inclusive. 4 | 5 |
 6 | ᐲ🠒🍉 see #104
 7 | 
8 | 9 | 'Verify #88' 10 | '"Does not pop up"' 11 | "ever again" 12 | 13 | Mojis are ok 🍈🍐🍇 -------------------------------------------------------------------------------- /demo/src/main.rs: -------------------------------------------------------------------------------- 1 | /*! Just a lil smthin smthin. */ 2 | 3 | mod lib; 4 | 5 | /* dev */ 6 | pub mod nested; 7 | 8 | /** 9 | Not so preferable doc comment, use `///` instead. 10 | */ 11 | fn main() { 12 | lib::a(); 13 | lib::b(); 14 | lib::c(); 15 | } 16 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/--ask-a-question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "❓ Ask a question" 3 | about: Something is not clear to you from the documentation 4 | title: '' 5 | labels: documentation, question 6 | assignees: drahnr 7 | 8 | --- 9 | 10 | ** Q: ** 11 | 12 | 13 | -------------------------------------------------------------------------------- /demo/member/procmacro/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cargo-spellcheck-demo-proc-macro" 3 | version = "1.0.0" 4 | authors = ["Bernhard Schuster "] 5 | edition = "2018" 6 | publish = false 7 | description = "Proc-macro member of the demo, with `[lib]` but no path" 8 | 9 | [lib] 10 | proc-macro = true 11 | # unspecified path 12 | 13 | [dependencies] 14 | -------------------------------------------------------------------------------- /demo/member/true/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cargo-spellcheck-demo-inner" 3 | publish = false 4 | description = "Inner member of the demo" 5 | readme = "README.md" 6 | 7 | version.workspace = true 8 | authors.workspace = true 9 | edition.workspace = true 10 | 11 | [lib] 12 | crate-type = ["staticlib"] 13 | path = "lib.rs" 14 | bench = false 15 | test = false 16 | 17 | 18 | [dependencies] 19 | -------------------------------------------------------------------------------- /src/config/reflow.rs: -------------------------------------------------------------------------------- 1 | //! Reflow configuration. 2 | use serde::{Deserialize, Serialize}; 3 | 4 | /// Parameters for wrapping doc comments 5 | #[derive(Debug, Clone, Serialize, Deserialize)] 6 | pub struct ReflowConfig { 7 | /// Hard limit for absolute length of lines. 8 | #[serde(default)] 9 | #[serde(alias = "max_line_width")] 10 | pub(crate) max_line_length: usize, 11 | } 12 | 13 | impl Default for ReflowConfig { 14 | fn default() -> Self { 15 | Self { 16 | max_line_length: 80, 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use cargo_spellcheck::{action, errors::Result, run, Args}; 2 | 3 | #[allow(missing_docs)] 4 | fn main() -> Result<()> { 5 | color_eyre::install()?; 6 | let args = Args::parse(std::env::args()).unwrap_or_else(|e| e.exit()); 7 | let res = run(args); 8 | // no matter what, restore the terminal 9 | if let Err(e) = action::interactive::ScopedRaw::restore_terminal() { 10 | log::warn!("Failed to restore terminal: {e}"); 11 | } 12 | let val = res?.as_u8(); 13 | if val != 0 { 14 | std::process::exit(val as i32) 15 | } 16 | Ok(()) 17 | } 18 | -------------------------------------------------------------------------------- /demo/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cargo-spellcheck-demo" 3 | version = "1.0.0" 4 | authors = ["Bernhard Schuster "] 5 | edition = "2024" 6 | publish = false 7 | description = "A silly demo with plenty of spelling misteakes for cargo-spellcheck demos and CI" 8 | readme = "README.md" 9 | 10 | [lib] 11 | crate-type = ["staticlib"] 12 | path = "src/lib.rs" 13 | bench = false 14 | test = false 15 | 16 | [[bin]] 17 | name = "vacays" 18 | path = "src/main.rs" 19 | bench = false 20 | test = false 21 | 22 | 23 | [dependencies] 24 | 25 | 26 | [workspace] 27 | members = ["member/*"] 28 | -------------------------------------------------------------------------------- /demo/src/nested/fragments.rs: -------------------------------------------------------------------------------- 1 | //! Modul levl documenatation. 2 | //! 3 | //! Details are full fo errors. 4 | 5 | mod simple; 6 | 7 | mod enumerate; 8 | 9 | // Shud be chcked now 10 | // Verify **some** _super_ *duper* [markdown](https://ahoi.io/). 11 | struct X; 12 | 13 | /* 14 | * Also check thiz one 15 | */ 16 | impl X { 17 | /// New, as in new. But also not. 18 | /// 19 | /// Half sentence for X #2. 20 | fn new() -> Self { 21 | unimplemented!() 22 | } 23 | 24 | /// Old, as in really old. 25 | /// 26 | /// But what does "old" really mean? 27 | fn old(&self) { 28 | unimplemented!() 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/config/nlprules.rs: -------------------------------------------------------------------------------- 1 | //! NlpRules checker configuration. 2 | use serde::{Deserialize, Serialize}; 3 | use std::path::PathBuf; 4 | 5 | #[derive(Deserialize, Serialize, Debug, Clone)] 6 | #[serde(deny_unknown_fields)] 7 | pub struct LanguageToolConfig { 8 | pub url: url::Url, 9 | } 10 | 11 | impl LanguageToolConfig { 12 | pub fn url(&self) -> &url::Url { 13 | &self.url 14 | } 15 | } 16 | #[derive(Deserialize, Serialize, Debug, Clone, Default)] 17 | #[serde(deny_unknown_fields)] 18 | pub struct NlpRulesConfig { 19 | /// Location to use for an initial lookup of alternate tokenizer and rules 20 | /// data. 21 | pub override_rules: Option, 22 | pub override_tokenizer: Option, 23 | } 24 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/---request-a-new-feature.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F331 Request a new feature" 3 | about: Suggest a feature you would like to see implemented 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a particular use-case?** 11 | 12 | 13 | 14 | **Describe the solution you'd like to implement/see implemented** 15 | 16 | 17 | 18 | **Describe alternatives you've considered** 19 | 20 | 21 | 22 | **Additional context** 23 | 24 | 25 | -------------------------------------------------------------------------------- /doc-chunks/src/errors.rs: -------------------------------------------------------------------------------- 1 | use crate::{Range, Span}; 2 | use indexmap::IndexMap; 3 | 4 | pub type Result = std::result::Result; 5 | 6 | #[derive(thiserror::Error, Debug)] 7 | pub enum Error { 8 | #[error(transparent)] 9 | Io(#[from] std::io::Error), 10 | 11 | #[error("Really pretty much anything")] 12 | Any, 13 | 14 | #[error("Failed to parse rust content: {0:?}")] 15 | ParserFailure(#[source] syn::Error), 16 | 17 | #[error("Failed to parse toml file")] 18 | Toml(#[from] toml::de::Error), 19 | 20 | #[error("{0}")] 21 | Span(String), 22 | 23 | #[error("BUG: Found a range {}..{} which that does not exist in its own source mapping: {:?}", .line_range.start, .line_range.end, .source_mapping)] 24 | InvalidLineRange { 25 | line_range: Range, 26 | source_mapping: IndexMap, 27 | }, 28 | } 29 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/---report-a-bug.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F41B Report a bug" 3 | about: Create a report to help us fix bugs 4 | title: '' 5 | labels: bug 6 | assignees: drahnr 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | 12 | 13 | 14 | **To Reproduce** 15 | 16 | Steps to reproduce the behaviour: 17 | 18 | 1. A file containing `...` 19 | 2. Run `cargo spellcheck ...` 20 | 3. ... 21 | 22 | **Expected behavior** 23 | 24 | 25 | 26 | **Screenshots** 27 | 28 | 30 | 31 | **Please complete the following information:** 32 | - System: 33 | - Obtained: 34 | - Version: 35 | 36 | **Additional context** 37 | 38 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | 4 | 5 | ## What does this PR accomplish? 6 | 7 | 10 | 11 | * 🩹 Bug Fix 12 | * 🦚 Feature 13 | * 📙 Documentation 14 | * 🦣 Legacy 15 | * 🪣 Misc 16 | 17 | 21 | Closes # . 22 | 23 | ## Changes proposed by this PR: 24 | 25 | 28 | 29 | ## Notes to reviewer: 30 | 31 | 37 | 38 | 39 | ## 📜 Checklist 40 | 41 | * [ ] Works on the `./demo` sub directory 42 | * [ ] Test coverage is excellent and passes 43 | * [ ] Documentation is thorough 44 | -------------------------------------------------------------------------------- /.config/lingo.dic: -------------------------------------------------------------------------------- 1 | 100 2 | accessor/MS 3 | API/MS 4 | backend 5 | bandaid/MS 6 | Bitflag/MS 7 | C++ 8 | cargo-spellcheck/M 9 | checkable/MS 10 | clang/MS 11 | cli 12 | cmark 13 | commonmark 14 | CommonMark 15 | config 16 | Consumingly 17 | CXX 18 | dev 19 | divide/UBS 20 | Docopt 21 | ellipsize/GD 22 | emoji/MS 23 | enablement 24 | enqueue/GD 25 | enum 26 | fallbacks 27 | featureset/MS 28 | filesystem/MS 29 | hardcode/GD 30 | http/S 31 | hunspell/MS 32 | IETF 33 | iff 34 | io 35 | iterative/Y 36 | LanguageTool/MS 37 | lookups 38 | macOS 39 | metadata 40 | md 41 | multiline/S 42 | nix/MS 43 | NLP 44 | NlpRule/S 45 | pickable 46 | postfix/GD 47 | pre/MS 48 | proc_macro2/MS 49 | README 50 | recurse 51 | reflow 52 | Reflow/MS 53 | reflown 54 | reflow/MS 55 | roadmap/MS 56 | rustdoc/MS 57 | selectable 58 | spellcheck/M 59 | stateful/PY 60 | str 61 | stringly 62 | struct/MS 63 | TODO/MS 64 | TODO 65 | tokenization/MS 66 | tokenize/USXBMD 67 | tokenizer/MS 68 | toml 69 | tuple 70 | tuple/DSM 71 | undivide/UBS 72 | UTF-8 73 | whitespace/MS 74 | workspace/MS 75 | YOLO 76 | porject/MS 77 | -------------------------------------------------------------------------------- /demo/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Fancy module docs are really helpful if they contain usage examples. 2 | 3 | /// Pick option a also known as door #1. 4 | pub fn a() { 5 | 6 | } 7 | 8 | 9 | #[doc = "Pick option b also known as door #2."] 10 | pub fn b() { 11 | 12 | } 13 | 14 | #[doc = r##"Pick option c also known as door #3."##] 15 | pub fn c() { 16 | 17 | } 18 | 19 | #[doc = r#"Risk not ya ting?"#] 20 | pub fn take_the_money_and_leave() { 21 | 22 | } 23 | 24 | 25 | /// Possible ways to run rustc and request various parts of LTO. 26 | /// 27 | /// Variant | Flag | Object Code | Bitcode 28 | /// -------------------|------------------------|-------------|-------- 29 | /// `Run` | `-C lto=foo` | n/a | n/a 30 | /// `Off` | `-C lto=off` | n/a | n/a 31 | /// `OnlyBitcode` | `-C linker-plugin-lto` | | ✓ 32 | /// `ObjectAndBitcode` | | ✓ | ✓ 33 | /// `OnlyObject` | `-C embed-bitcode=no` | ✓ | 34 | pub fn exploding_complexity() { 35 | 36 | } 37 | -------------------------------------------------------------------------------- /doc-chunks/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "doc-chunks" 3 | version = "0.2.1" 4 | authors = ["Bernhard Schuster "] 5 | edition = "2021" 6 | rust-version = "1.57.0" 7 | repository = "https://github.com/drahnr/cargo-spellcheck.git" 8 | homepage = "https://github.com/drahnr/cargo-spellcheck" 9 | license = "MIT OR Apache-2.0" 10 | keywords = ["documentation", "chunks", "cluster"] 11 | description = "Clusters of doc comments and dev comments as coherent view." 12 | readme = "README.md" 13 | 14 | [dependencies] 15 | console = "0.15" 16 | fs-err = "2" 17 | indexmap = { version = "2", features = ["rayon", "serde"] } 18 | itertools = "0.12" 19 | lazy_static = "1" 20 | memchr = "2" 21 | log = "0.4" 22 | proc-macro2 = { version = "1", features = ["span-locations"] } 23 | pulldown-cmark = "0.11" 24 | ra_ap_syntax = "0.0.269" 25 | rayon = "1.5" 26 | fancy-regex = "0.13" 27 | regex = "1" 28 | serde = { version = "1", features = ["derive"] } 29 | syn = { version = "2", features = ["full"] } 30 | thiserror = "1" 31 | toml = "0.8.2" 32 | 33 | [dev-dependencies] 34 | assert_matches = "1" 35 | env_logger = "0.11" 36 | -------------------------------------------------------------------------------- /docs/checkers.md: -------------------------------------------------------------------------------- 1 | # Checkers 2 | 3 | Available checker support 4 | 5 | ## Hunspell 6 | 7 | Requires a C++ compiler to compile the hunspell CXX source files which are part 8 | of `hunspell-sys` 9 | 10 | ### Fedora 30+ 11 | 12 | ```sh 13 | dnf install -y clang 14 | ``` 15 | 16 | ### Ubuntu 19.10+ 17 | 18 | ```sh 19 | apt install -y clang 20 | ``` 21 | 22 | ### Mac OS X 23 | 24 | ```sh 25 | brew install llvm 26 | ``` 27 | 28 | The environment variable `LLVM_CONFIG_PATH` needs to point to `llvm-config`, to 29 | do so: 30 | 31 | ```sh 32 | export LLVM_CONFIG_PATH=/usr/local/opt/llvm/bin/llvm-config 33 | ``` 34 | 35 | ## NlpRules 36 | 37 | When compiled with the default featureset which includes `nlprules`, the 38 | resulting binary can only be distributed under the [`LGPLv2.1`](./LICENSE-LGPL) 39 | since the `rules` and `tokenizer` definitions are extracted from `LanguageTool` 40 | (which is itself licensed under [`LGPLv2.1`](./LICENSE-LGPL)) as described by 41 | the library that is used for pulling and integrating - details are to be found 42 | under [crate `nlprule`'s 43 | README.md](https://github.com/bminixhofer/nlprule#license). 44 | 45 | -------------------------------------------------------------------------------- /doc-chunks/src/testcase.rs: -------------------------------------------------------------------------------- 1 | use crate::{Span, TrimmedLiteral}; 2 | 3 | pub fn annotated_literals_raw(source: &str) -> impl Iterator + '_ { 4 | let stream = syn::parse_str::(source).expect("Must be valid rust"); 5 | stream 6 | .into_iter() 7 | .filter_map(|x| { 8 | if let proc_macro2::TokenTree::Group(group) = x { 9 | Some(group.stream().into_iter()) 10 | } else { 11 | None 12 | } 13 | }) 14 | .flatten() 15 | .filter_map(|x| { 16 | if let proc_macro2::TokenTree::Literal(literal) = x { 17 | Some(literal) 18 | } else { 19 | None 20 | } 21 | }) 22 | } 23 | 24 | pub fn annotated_literals(source: &str) -> Vec { 25 | annotated_literals_raw(source) 26 | .map(|literal| { 27 | let span = Span::from(literal.span()); 28 | TrimmedLiteral::load_from(source, span) 29 | .expect("Literals must be convertable to trimmed literals") 30 | }) 31 | .collect() 32 | } 33 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT LICENSE 2 | 3 | Copyright (c) 2020 Bernhard Schuster 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /.config/spellcheck.toml: -------------------------------------------------------------------------------- 1 | [Hunspell] 2 | # lang and name of `.dic` file 3 | lang = "en_US" 4 | 5 | search_dirs = ["."] 6 | extra_dictionaries = ["lingo.dic"] 7 | 8 | skip_os_lookups = true 9 | use_builtin = true 10 | 11 | [ZSpell] 12 | # lang and name of `.dic` file 13 | lang = "en_US" 14 | 15 | search_dirs = ["."] 16 | extra_dictionaries = ["lingo.dic"] 17 | 18 | skip_os_lookups = true 19 | use_builtin = true 20 | 21 | [Spellbook] 22 | # lang and name of `.dic` file 23 | lang = "en_US" 24 | 25 | search_dirs = ["."] 26 | extra_dictionaries = ["lingo.dic"] 27 | 28 | skip_os_lookups = true 29 | use_builtin = true 30 | 31 | 32 | [Hunspell.quirks] 33 | # transforms words that are provided by the tokenizer 34 | # into word fragments based on the capture groups which are to be checked. 35 | # If no capture groups are present, the matched word is whitelisted. 36 | transform_regex = [ 37 | "^'([^\\s])'$", 38 | "^[0-9]+x$", 39 | "^\\#[0-9]+$", 40 | "^[0-9]+$", 41 | "^.+\\+$", 42 | "\\+", 43 | ] 44 | # accepts `alphabeta` variants if the checker provides a replacement suggestion 45 | # of `alpha-beta`. 46 | allow_concatenation = true 47 | allow_dashed = true 48 | 49 | [Reflow] 50 | 51 | max_line_length = 80 52 | -------------------------------------------------------------------------------- /demo/src/nested/mod.rs: -------------------------------------------------------------------------------- 1 | 2 | 3 | mod justone; 4 | mod justtwo; 5 | mod fragments; 6 | mod again; 7 | 8 | /// Nested; 9 | struct Nest; 10 | 11 | /// Overly long statements that should be reflown since they are __very__ long and exceed the line limit. 12 | /// 13 | /// This struct has a lot of documentation but unfortunately, the lines are just too long. 14 | struct SomeLong { 15 | /// This member is interesting though since it has some indentation. These whitespaces must be kept. 16 | member: i8, 17 | #[ doc = "This member is interesting though since it has some indentation. These whitespaces must be kept."] 18 | sec: i8, 19 | #[doc=r###"And a different interesting thing 20 | because we have a random newline here?!"###] 21 | third: String, 22 | } 23 | 24 | /// A long documentation which is short enough for two lines 25 | /// but too long for one line. 26 | struct TooLong; 27 | 28 | /// And these lines are too short so they become just two lines 29 | /// instead of three, as it was 30 | /// initially. 31 | struct TooShort; 32 | 33 | #[ doc = "A long comment which we wanna reflow. So it's Saturday, are you having any plans for tonight?" ] 34 | struct Someodo; 35 | 36 | #[ doc= r#"A long comment which we wanna reflow. So it's Saturday, are you having any plans for 37 | tonight? We're gonna end up with three lines here I think."#] 38 | struct AnotherSomeodo; 39 | 40 | #[ doc= r#"A long short 41 | comment which we wanna reflow 42 | to one line."#] 43 | struct AnotherSomeodo2; 44 | -------------------------------------------------------------------------------- /docs/features.md: -------------------------------------------------------------------------------- 1 | # Implemented Features + Roadmap 2 | 3 | * [x] Parse doc comments from arbitrary files 4 | * [x] Decent error printing 5 | * [x] `cargo-spellcheck check` 6 | * [x] Spell checking using `hunspell` 7 | * [x] Merge multiline doc comments 8 | * [x] Handle multiline and fragmented mistakes (i.e. for grammar) [#25](https://github.com/drahnr/cargo-spellcheck/issues/25) 9 | * [x] Grammar check using [`nlprule`](https://github.com/bminixhofer/nlprule) 10 | * [x] Follow module declarations rather than blindly recurse 11 | * [x] Be `commonmark`/`markdown` aware 12 | * [ ] Handle doc-tests with ` ```rust` as virtual files [#43](https://github.com/drahnr/cargo-spellcheck/issues/43) 13 | * [ ] Verify all types of links [#44](https://github.com/drahnr/cargo-spellcheck/issues/44) 14 | * [x] Check `README.md` files [#37](https://github.com/drahnr/cargo-spellcheck/issues/37) 15 | * [x] Improve interactive user interface with `crossterm` 16 | * [x] Ellipsize overly long statements with `...` [#42](https://github.com/drahnr/cargo-spellcheck/issues/42) 17 | * [ ] Learn topic lingo and filter false-positive-suggestions [#41](https://github.com/drahnr/cargo-spellcheck/issues/41) 18 | * [x] Handle cargo workspaces [#38](https://github.com/drahnr/cargo-spellcheck/issues/38) 19 | * [x] Re-flow doc comments [#39](https://github.com/drahnr/cargo-spellcheck/issues/39) 20 | * [x] Collect dev comments as well [#115](https://github.com/drahnr/cargo-spellcheck/issues/115) 21 | 22 | `hunspell` (dictionary based lookups) and `nlprules` (static grammar rules, 23 | derived from `languagetool`) are currently the two supported checkers. 24 | 25 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "configurations": [ 3 | { 4 | "type": "lldb", 5 | "request": "launch", 6 | "name": "Debug binary in executable 'cargo-spellcheck'", 7 | "cargo": { 8 | "args": [ 9 | "build", 10 | "--bin=cargo-spellcheck", 11 | "--package=cargo-spellcheck" 12 | ], 13 | "filter": { 14 | "name": "cargo-spellcheck", 15 | "kind": "bin" 16 | } 17 | }, 18 | "env": { 19 | "RUST_BACKTRACE":"full" 20 | }, 21 | "args": ["--", "spellcheck", "-vvvvv", "demo"], 22 | //"args": ["config", "-vvvvv", "--cfg=xxx.toml"], 23 | "cwd": "${workspaceFolder}" 24 | }, 25 | { 26 | "type": "lldb", 27 | "request": "launch", 28 | "name": "Debug unit tests in executable 'cargo-spellcheck'", 29 | "cargo": { 30 | "args": [ 31 | "test", 32 | "--no-run", 33 | "--bin=cargo-spellcheck", 34 | "--package=cargo-spellcheck", 35 | "combine_literals", 36 | "--", 37 | "--nocapture" 38 | ], 39 | "filter": { 40 | "name": "cargo-spellcheck", 41 | "kind": "bin" 42 | } 43 | }, 44 | "env": { 45 | "RUST_LOG":"cargo_spellcheck=trace", 46 | "RUST_BACKTRACE":"1" 47 | }, 48 | "args": [], 49 | "cwd": "${workspaceFolder}" 50 | } 51 | ] 52 | } 53 | -------------------------------------------------------------------------------- /docs/automation.md: -------------------------------------------------------------------------------- 1 | # Automation of `cargo-spellcheck` 2 | 3 | ## CI/CD 4 | 5 | `cargo-spellcheck` can be configured with `--code ` to return a non-zero 6 | return code if mistakes are found instead of `0`. 7 | 8 | ### GitHub Actions 9 | 10 | [Create a workflow](https://docs.github.com/en/actions/quickstart) for your project and add the following example as steps. 11 | 12 | The first step installs cargo-spellcheck on the runner. 13 | The second step loads your source code into the runner environment. 14 | The third step runs a command in a shell like you would normally do with cargo spellcheck. 15 | Specify your arguments as needed. 16 | 17 | ```yaml 18 | - name: Install cargo-spellcheck 19 | uses: taiki-e/install-action@v2 20 | with: 21 | tool: cargo-spellcheck 22 | 23 | - uses: actions/checkout@v3 24 | 25 | - name: Run cargo-spellcheck 26 | run: cargo spellcheck --code 1 27 | ``` 28 | 29 | ### Other 30 | 31 | Install `cargo-spellcheck` via [`cargo-binstall`](https://github.com/cargo-bins/cargo-binstall) and then use it like you would locally. 32 | Alternatively you can use `cargo install cargo-spellcheck` to compile it from source. 33 | 34 | ```bash 35 | cargo binstall --no-confirm cargo-spellcheck 36 | 37 | cargo-spellcheck --code 1 38 | ``` 39 | 40 | ## Git hooks 41 | 42 | If you want to manually configure `cargo-spellcheck` to run on git commits: 43 | 44 | ```bash 45 | #!/usr/bin/env bash 46 | 47 | # Redirect output to stderr. 48 | exec 1>&2 49 | 50 | exec cargo spellcheck --code 99 $(git diff-index --cached --name-only --diff-filter=AM HEAD) 51 | ``` 52 | 53 | Alternatively you can use [`pre-commit`](https://pre-commit.com/) to manage your git commit hooks 54 | for you. This can be done by appending these lines to `.pre-commit-config.yaml` in your project: 55 | 56 | ```yaml 57 | - repo: https://github.com/drahnr/cargo-spellcheck.git 58 | rev: master 59 | - id: cargo-spellcheck 60 | 61 | ``` 62 | 63 | You will need to install the hooks running `pre-commit install-hooks` and `cargo-spellcheck` will 64 | get installed and wired up as a git commit hook for you. 65 | -------------------------------------------------------------------------------- /src/config/regex.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | #[derive(Debug)] 4 | pub struct WrappedRegex(pub Regex); 5 | 6 | impl Clone for WrappedRegex { 7 | fn clone(&self) -> Self { 8 | // @todo inefficient.. but right now this should almost never happen 9 | // @todo implement a lazy static `Arc>` 10 | Self(Regex::new(self.as_str()).unwrap()) 11 | } 12 | } 13 | 14 | impl std::ops::Deref for WrappedRegex { 15 | type Target = Regex; 16 | fn deref(&self) -> &Self::Target { 17 | &self.0 18 | } 19 | } 20 | 21 | impl std::convert::AsRef for WrappedRegex { 22 | fn as_ref(&self) -> &Regex { 23 | &self.0 24 | } 25 | } 26 | 27 | impl Serialize for WrappedRegex { 28 | fn serialize(&self, serializer: S) -> Result 29 | where 30 | S: serde::ser::Serializer, 31 | { 32 | serializer.serialize_str(self.as_str()) 33 | } 34 | } 35 | 36 | impl<'de> Deserialize<'de> for WrappedRegex { 37 | fn deserialize(deserializer: D) -> Result 38 | where 39 | D: serde::de::Deserializer<'de>, 40 | { 41 | deserializer 42 | .deserialize_any(RegexVisitor) 43 | .map(WrappedRegex::from) 44 | } 45 | } 46 | 47 | impl From for Regex { 48 | fn from(val: WrappedRegex) -> Self { 49 | val.0 50 | } 51 | } 52 | 53 | impl From for WrappedRegex { 54 | fn from(other: Regex) -> WrappedRegex { 55 | WrappedRegex(other) 56 | } 57 | } 58 | 59 | struct RegexVisitor; 60 | 61 | impl<'de> serde::de::Visitor<'de> for RegexVisitor { 62 | type Value = Regex; 63 | 64 | fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 65 | formatter.write_str("String with valid regex expression") 66 | } 67 | 68 | fn visit_str(self, value: &str) -> Result 69 | where 70 | E: serde::de::Error, 71 | { 72 | let re = Regex::new(value).map_err(E::custom)?; 73 | Ok(re) 74 | } 75 | 76 | fn visit_string(self, value: String) -> Result 77 | where 78 | E: serde::de::Error, 79 | { 80 | self.visit_str::(value.as_str()) 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/checker/dummy.rs: -------------------------------------------------------------------------------- 1 | //! Everything is wrong, so wrong, even if it's correct. 2 | //! 3 | //! A test checker, only available for unit tests. 4 | 5 | // use super::tokenize; 6 | use super::{apply_tokenizer, Checker}; 7 | 8 | use crate::suggestion::{Detector, Suggestion}; 9 | use crate::util::sub_chars; 10 | use crate::{errors::*, CheckableChunk, ContentOrigin}; 11 | 12 | /// A test checker that tokenizes and marks everything as wrong 13 | pub struct DummyChecker; 14 | 15 | impl DummyChecker { 16 | pub fn new(_config: &::Config) -> Result { 17 | Ok(Self) 18 | } 19 | } 20 | 21 | impl Checker for DummyChecker { 22 | type Config = (); 23 | 24 | fn detector() -> Detector { 25 | Detector::Dummy 26 | } 27 | 28 | fn check<'a, 's>( 29 | &self, 30 | origin: &ContentOrigin, 31 | chunks: &'a [CheckableChunk], 32 | ) -> Result>> 33 | where 34 | 'a: 's, 35 | { 36 | let tokenizer = super::tokenizer::<&std::path::PathBuf>(None)?; 37 | 38 | let mut acc = Vec::with_capacity(chunks.len()); 39 | let chunk = chunks 40 | .first() 41 | .expect("DummyChecker expects at least one chunk"); 42 | let plain = chunk.erase_cmark(&Default::default()); 43 | let txt = plain.as_str(); 44 | for (index, range) in apply_tokenizer(&tokenizer, txt).enumerate() { 45 | log::trace!("****Token[{}]: >{}<", index, sub_chars(txt, range.clone())); 46 | let detector = Detector::Dummy; 47 | let range2span = plain.find_spans(range.clone()); 48 | for (range, span) in range2span { 49 | log::trace!( 50 | "Suggestion for {:?} -> {}", 51 | range, 52 | chunk.display(range.clone()) 53 | ); 54 | let replacements = vec![format!("replacement_{index}")]; 55 | let suggestion = Suggestion { 56 | detector, 57 | span, 58 | range, 59 | origin: origin.clone(), 60 | replacements, 61 | chunk, 62 | description: None, 63 | }; 64 | acc.push(suggestion); 65 | } 66 | } 67 | Ok(acc) 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /cliff.toml: -------------------------------------------------------------------------------- 1 | # configuration file for git-cliff (0.1.0) 2 | 3 | [changelog] 4 | # changelog header 5 | header = """ 6 | # Changelog 7 | All notable changes to this project will be documented in this file.\n 8 | """ 9 | # template for the changelog body 10 | # https://tera.netlify.app/docs/#introduction 11 | body = """ 12 | {% if version %}\ 13 | ## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }} 14 | {% else %}\ 15 | ## [unreleased] 16 | {% endif %}\ 17 | {% for group, commits in commits | group_by(attribute="group") %} 18 | ### {{ group | upper_first }} 19 | {% for commit in commits %} 20 | - {% if commit.breaking %}[**breaking**] {% endif %}{{ commit.message | upper_first }}\ 21 | {% endfor %} 22 | {% endfor %}\n 23 | """ 24 | # remove the leading and trailing whitespaces from the template 25 | trim = true 26 | # changelog footer 27 | footer = """ 28 | 29 | """ 30 | 31 | [git] 32 | # parse the commits based on https://www.conventionalcommits.org 33 | conventional_commits = true 34 | # filter out the commits that are not conventional 35 | filter_unconventional = true 36 | # regex for parsing and grouping commits 37 | commit_parsers = [ 38 | { message = "^feat", group = "Features" }, 39 | { message = "^fix", group = "Bug Fixes" }, 40 | { message = "^doc", group = "Documentation" }, 41 | { message = "^perf", group = "Performance" }, 42 | { message = "^refactor", group = "Refactor" }, 43 | { message = "^style", group = "Styling" }, 44 | { message = "^test", group = "Testing" }, 45 | { message = "^chore\\(release\\): prepare for", skip = true }, 46 | { message = "^(chore[/:]\\s*)?(cargo\\s+)?(fmt|fix|clippy|spellcheck)", skip = true }, 47 | { message = "^[vV]?0\\.[0-9]\\.[0-9]+", skip = true }, 48 | { message = "^\\(cargo-release\\)", skip = true }, 49 | { message = "^(chore/)?rele?ase:", skip = true }, 50 | { message = "^chore", group = "Miscellaneous Tasks" }, 51 | { body = ".*security", group = "Security" }, 52 | ] 53 | # filter out the commits that are not matched by commit parsers 54 | filter_commits = false 55 | # glob pattern for matching git tags 56 | tag_pattern = "v[0-9]*" 57 | # regex for ignoring tags 58 | ignore_tags = "" 59 | # sort the tags topologically 60 | topo_order = false 61 | # sort the commits inside sections by oldest/newest order 62 | sort_commits = "oldest" 63 | -------------------------------------------------------------------------------- /tests/signal_handler.rs: -------------------------------------------------------------------------------- 1 | #![cfg(target_os = "linux")] 2 | 3 | use nix::sys::signal::*; 4 | use nix::sys::wait::*; 5 | use nix::unistd::Pid; 6 | use nix::unistd::{fork, ForkResult}; 7 | 8 | use cargo_spellcheck::{signal_handler, TinHat}; 9 | 10 | #[test] 11 | fn signal_handler_works() -> Result<(), Box> { 12 | let _ = env_logger::Builder::new() 13 | .filter_level(log::LevelFilter::Trace) 14 | .is_test(true) 15 | .try_init(); 16 | 17 | println!("Signal handler check"); 18 | 19 | const QUIT: Signal = Signal::SIGQUIT; 20 | 21 | let sigs = { 22 | let mut sigs = SigSet::empty(); 23 | sigs.add(QUIT); 24 | sigs 25 | }; 26 | 27 | // best effort unblock 28 | let _ = sigprocmask(SigmaskHow::SIG_UNBLOCK, Some(&sigs), None); 29 | let _ = pthread_sigmask(SigmaskHow::SIG_UNBLOCK, Some(&sigs), None); 30 | 31 | if let Ok(ForkResult::Parent { child, .. }) = unsafe { fork() } { 32 | println!("[parent] Wait for child"); 33 | 34 | loop { 35 | let options = WaitPidFlag::WNOHANG; 36 | match nix::sys::wait::waitpid(child, Some(options)) { 37 | Ok(WaitStatus::StillAlive) => { 38 | std::thread::sleep(std::time::Duration::from_millis(50)); 39 | continue; 40 | } 41 | Ok(WaitStatus::Signaled(_pid, signal, _core_dump)) => { 42 | assert_eq!(signal, QUIT); 43 | unreachable!("Should exit via exit. qed") 44 | } 45 | Ok(WaitStatus::Exited(_pid, _exit_code)) => { 46 | return Ok(()); 47 | } 48 | Ok(ws) => unreachable!("Unexpected wait status: {ws:?}"), 49 | Err(errno) => unreachable!("Did not expect an error: {errno:?}"), 50 | } 51 | } 52 | } else { 53 | signal_handler(|| {}); 54 | 55 | // signal while blocking signals 56 | { 57 | let hat = TinHat::on(); 58 | println!("[child] Raise signal"); 59 | 60 | kill(Pid::this(), QUIT).unwrap(); 61 | 62 | std::thread::sleep(std::time::Duration::from_millis(1)); 63 | drop(hat); 64 | } 65 | 66 | std::thread::sleep(std::time::Duration::from_secs(10_000)); 67 | unreachable!("[child] Signal handler exits before panic."); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/tinhat.rs: -------------------------------------------------------------------------------- 1 | //! Tinhat 2 | //! 3 | //! Makes sure the cosmic signals don't meddle with IO that's in progress. 4 | //! 5 | //! ``` 6 | //! # use cargo_spellcheck::TinHat; 7 | //! let th = TinHat::on(); 8 | //! // do IO 9 | //! drop(th); 10 | //! ``` 11 | 12 | use std::sync::atomic::{AtomicBool, AtomicU16, Ordering}; 13 | 14 | #[cfg(not(target_os = "windows"))] 15 | use signal_hook::{ 16 | consts::signal::{SIGINT, SIGQUIT, SIGTERM}, 17 | iterator, 18 | }; 19 | 20 | /// Global atomic to block signal processing while a file write is currently in 21 | /// progress. 22 | static WRITE_IN_PROGRESS: AtomicU16 = AtomicU16::new(0); 23 | /// Delay if the signal handler is currently running. 24 | static SIGNAL_HANDLER_AT_WORK: AtomicBool = AtomicBool::new(false); 25 | 26 | /// Handle incoming signals. 27 | /// 28 | /// Only relevant for *-nix platforms. 29 | #[cfg(not(target_os = "windows"))] 30 | pub fn signal_handler(fx: F) 31 | where 32 | F: FnOnce() + Send + 'static, 33 | { 34 | let mut signals = 35 | iterator::Signals::new([SIGTERM, SIGINT, SIGQUIT]).expect("Failed to create Signals"); 36 | 37 | std::thread::spawn(move || { 38 | for s in signals.forever() { 39 | match s { 40 | SIGTERM | SIGINT | SIGQUIT => { 41 | SIGNAL_HANDLER_AT_WORK.store(true, Ordering::SeqCst); 42 | // Wait for potential writing to disk to be finished. 43 | while WRITE_IN_PROGRESS.load(Ordering::Acquire) > 0 { 44 | std::hint::spin_loop(); 45 | std::thread::yield_now(); 46 | } 47 | fx(); 48 | signal_hook::low_level::exit(130); 49 | } 50 | sig => log::warn!("Received unhandled signal {sig}, ignoring"), 51 | } 52 | } 53 | }); 54 | } 55 | 56 | /// Blocks (UNIX) signals. 57 | pub struct TinHat; 58 | 59 | impl TinHat { 60 | /// Put the tin hat on, and only allow signals being processed once it's 61 | /// dropped. 62 | pub fn on() -> Self { 63 | // If there is a signal handler in progress, block. 64 | while SIGNAL_HANDLER_AT_WORK.load(Ordering::Acquire) { 65 | std::hint::spin_loop(); 66 | std::thread::yield_now(); 67 | } 68 | let _ = WRITE_IN_PROGRESS.fetch_add(1, Ordering::Release); 69 | Self 70 | } 71 | } 72 | 73 | impl Drop for TinHat { 74 | fn drop(&mut self) { 75 | let _ = WRITE_IN_PROGRESS.fetch_sub(1, Ordering::Release); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /docs/remedy.md: -------------------------------------------------------------------------------- 1 | # Fixing spelling mistakes 2 | 3 | While cargo-spellcheck is good at _pointing out_ existing spellchecks, 4 | it's sometimes not obvious how to resolve them or what the correct way is 5 | to resolve them. 6 | 7 | The following covers an abstracted set of commonly encountered `cargo spellcheck` 8 | complaints and how to resolve them: 9 | 10 | ## Configuration 11 | 12 | Make sure your runs are idempotent if you run on two different systems, 13 | which is easiest achieved by using the builtin affix and dictionaries 14 | besides the topic specifc lingo dictionary that should come with your project. 15 | 16 | ```toml 17 | # .config/spellcheck.toml 18 | 19 | [Hunspell] 20 | # snip 21 | skip_os_lookups = true 22 | use_builtin = true 23 | # snip 24 | ``` 25 | 26 | --- 27 | 28 | Avoiding `nlprule` backend by passing `--checkers=hunspell` might be a good idea, 29 | since `nlprule` tends to have a few false positives. 30 | 31 | ## Examples 32 | 33 | ### Missing word variants 34 | 35 | Sometimes some word forms belong into topic specific lingo and as such should be added to 36 | the topic specific dictionary. Make use of suffix patterns such as `/S` for plural `s` and `/M` for `'s`. This will keep your dictionary to a minimum. Please check the [affix file included here](./hunspell-data/en_US.aff) or your OS' provided affix file. 37 | [It is required to understand the slightly arkane format of `.aff` and `.dic` files.](https://www.systutorials.com/docs/linux/man/4-hunspell/#lbAE) which is also available via `man 4 hunspell`. 38 | 39 | ### Types in doc comments 40 | 41 | ```raw 42 | lib.rs : 2 43 | 858 | See [MmrLeafVersion] type documentation for more details. 44 | | ^^^^^^^^^^^^^^ 45 | | Possible spelling mistake found. 46 | ``` 47 | 48 | can be resolved by using 49 | 50 | ```md 51 | [`MmrLeafVersion`] 52 | ``` 53 | 54 | with additional ticks. 55 | 56 | This is a general pattern for _types_ that make an appearence in the doc comments. 57 | 58 | ### Patterns 59 | 60 | In some cases it's a pattern one wants to whitelist, such `10x` or `117x` which can be done via 61 | the configuration adding a allowlist regex `^[0-9]+x$`. 62 | 63 | 64 | ### TODO, XXX, and FIXME 65 | 66 | Should not be present in doc comments, but only make it into developer comments, i.e. `// FIXME foo` or `/* FIXME foo */` 67 | 68 | ### markdown: autolink 69 | 70 | 71 | ```raw 72 | error: spellcheck(Hunspell) 73 | --> test.md:96 74 | | 75 | 96 | The test coverage in `lcov` can the be published to . 76 | | ^^^^^^^ 77 | | - codec 78 | | 79 | | Possible spelling mistake found. 80 | ``` 81 | 82 | will spellcheck all components of the url, since it is not a _valid_ autolink. Add the protocol type. 83 | 84 | ```md 85 | 86 | ``` 87 | 88 | and the content will be omitted from spellchecking. 89 | -------------------------------------------------------------------------------- /src/config/search_dirs.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | /// Obtain OS specific search directories. 4 | fn os_specific_search_dirs() -> &'static [PathBuf] { 5 | lazy_static::lazy_static! { 6 | static ref OS_SPECIFIC_LOOKUP_DIRS: Vec = 7 | if cfg!(target_os = "macos") { 8 | directories::BaseDirs::new() 9 | .map(|base| vec![base.home_dir().to_owned().join("/Library/Spelling/"), PathBuf::from("/Library/Spelling/")]) 10 | .unwrap_or_default() 11 | } else if cfg!(target_os = "linux") { 12 | vec![ 13 | // Fedora 14 | PathBuf::from("/usr/share/myspell/"), 15 | PathBuf::from("/usr/share/hunspell/"), 16 | // Arch Linux 17 | PathBuf::from("/usr/share/myspell/dicts/"), 18 | ] 19 | } else { 20 | Vec::new() 21 | }; 22 | 23 | } 24 | OS_SPECIFIC_LOOKUP_DIRS.as_slice() 25 | } 26 | 27 | /// A collection of search directories. OS specific paths are only provided in 28 | /// the iterator. 29 | #[derive(Debug, Clone)] 30 | pub struct SearchDirs(pub Vec); 31 | 32 | impl Default for SearchDirs { 33 | fn default() -> Self { 34 | Self(Vec::with_capacity(8)) 35 | } 36 | } 37 | 38 | impl SearchDirs { 39 | pub fn iter(&self, extend_by_os: bool) -> impl Iterator { 40 | let chained = if extend_by_os { 41 | os_specific_search_dirs().iter() 42 | } else { 43 | [].iter() 44 | }; 45 | self.0.iter().chain(chained) 46 | } 47 | } 48 | 49 | impl std::convert::AsRef> for SearchDirs { 50 | fn as_ref(&self) -> &Vec { 51 | &self.0 52 | } 53 | } 54 | 55 | impl Serialize for SearchDirs { 56 | fn serialize(&self, serializer: S) -> Result 57 | where 58 | S: serde::ser::Serializer, 59 | { 60 | serializer.serialize_newtype_struct("SearchDirs", &self.0) 61 | } 62 | } 63 | 64 | impl<'de> Deserialize<'de> for SearchDirs { 65 | fn deserialize(deserializer: D) -> Result 66 | where 67 | D: serde::de::Deserializer<'de>, 68 | { 69 | deserializer 70 | .deserialize_newtype_struct("SearchDirs", SearchDirVisitor) 71 | .map(Into::into) 72 | } 73 | } 74 | 75 | impl From for Vec { 76 | fn from(val: SearchDirs) -> Self { 77 | val.0 78 | } 79 | } 80 | 81 | impl From> for SearchDirs { 82 | fn from(other: Vec) -> SearchDirs { 83 | SearchDirs(other) 84 | } 85 | } 86 | 87 | /// A search directory visitor, auto extending the search directory with OS 88 | /// defaults. 89 | struct SearchDirVisitor; 90 | 91 | impl<'de> serde::de::Visitor<'de> for SearchDirVisitor { 92 | type Value = Vec; 93 | 94 | fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 95 | formatter.write_str("Search Dir Visitors must be an optional sequence of path") 96 | } 97 | 98 | fn visit_newtype_struct(self, deserializer: D) -> Result 99 | where 100 | D: serde::de::Deserializer<'de>, 101 | { 102 | let seq = deserializer.deserialize_seq(self)?; 103 | Ok(seq) 104 | } 105 | 106 | fn visit_seq(self, mut seq: A) -> Result 107 | where 108 | A: serde::de::SeqAccess<'de>, 109 | { 110 | let mut v = Vec::with_capacity(8); 111 | while let Some(item) = seq.next_element()? { 112 | v.push(item); 113 | } 114 | Ok(v) 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /hunspell-data/en_US.aff: -------------------------------------------------------------------------------- 1 | SET UTF8 2 | TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ' 3 | NOSUGGEST ! 4 | 5 | # ordinal numbers 6 | COMPOUNDMIN 1 7 | # only in compounds: 1th, 2th, 3th 8 | ONLYINCOMPOUND c 9 | # compound rules: 10 | # 1. [0-9]*1[0-9]th (10th, 11th, 12th, 56714th, etc.) 11 | # 2. [0-9]*[02-9](1st|2nd|3rd|[4-9]th) (21st, 22nd, 123rd, 1234th, etc.) 12 | COMPOUNDRULE 2 13 | COMPOUNDRULE n*1t 14 | COMPOUNDRULE n*mp 15 | WORDCHARS 0123456789' 16 | 17 | PFX A Y 1 18 | PFX A 0 re . 19 | 20 | PFX I Y 1 21 | PFX I 0 in . 22 | 23 | PFX U Y 1 24 | PFX U 0 un . 25 | 26 | PFX C Y 1 27 | PFX C 0 de . 28 | 29 | PFX E Y 1 30 | PFX E 0 dis . 31 | 32 | PFX F Y 1 33 | PFX F 0 con . 34 | 35 | PFX K Y 1 36 | PFX K 0 pro . 37 | 38 | SFX V N 2 39 | SFX V e ive e 40 | SFX V 0 ive [^e] 41 | 42 | SFX N Y 3 43 | SFX N e ion e 44 | SFX N y ication y 45 | SFX N 0 en [^ey] 46 | 47 | SFX X Y 3 48 | SFX X e ions e 49 | SFX X y ications y 50 | SFX X 0 ens [^ey] 51 | 52 | SFX H N 2 53 | SFX H y ieth y 54 | SFX H 0 th [^y] 55 | 56 | SFX Y Y 1 57 | SFX Y 0 ly . 58 | 59 | SFX G Y 2 60 | SFX G e ing e 61 | SFX G 0 ing [^e] 62 | 63 | SFX J Y 2 64 | SFX J e ings e 65 | SFX J 0 ings [^e] 66 | 67 | SFX D Y 4 68 | SFX D 0 d e 69 | SFX D y ied [^aeiou]y 70 | SFX D 0 ed [^ey] 71 | SFX D 0 ed [aeiou]y 72 | 73 | SFX T N 4 74 | SFX T 0 st e 75 | SFX T y iest [^aeiou]y 76 | SFX T 0 est [aeiou]y 77 | SFX T 0 est [^ey] 78 | 79 | SFX R Y 4 80 | SFX R 0 r e 81 | SFX R y ier [^aeiou]y 82 | SFX R 0 er [aeiou]y 83 | SFX R 0 er [^ey] 84 | 85 | SFX Z Y 4 86 | SFX Z 0 rs e 87 | SFX Z y iers [^aeiou]y 88 | SFX Z 0 ers [aeiou]y 89 | SFX Z 0 ers [^ey] 90 | 91 | SFX S Y 4 92 | SFX S y ies [^aeiou]y 93 | SFX S 0 s [aeiou]y 94 | SFX S 0 es [sxzh] 95 | SFX S 0 s [^sxzhy] 96 | 97 | SFX P Y 3 98 | SFX P y iness [^aeiou]y 99 | SFX P 0 ness [aeiou]y 100 | SFX P 0 ness [^y] 101 | 102 | SFX M Y 1 103 | SFX M 0 's . 104 | 105 | SFX B Y 3 106 | SFX B 0 able [^aeiou] 107 | SFX B 0 able ee 108 | SFX B e able [^aeiou]e 109 | 110 | SFX L Y 1 111 | SFX L 0 ment . 112 | 113 | SFX i N 1 114 | SFX i us i us 115 | 116 | REP 90 117 | REP a ei 118 | REP ei a 119 | REP a ey 120 | REP ey a 121 | REP ai ie 122 | REP ie ai 123 | REP alot a_lot 124 | REP are air 125 | REP are ear 126 | REP are eir 127 | REP air are 128 | REP air ere 129 | REP ere air 130 | REP ere ear 131 | REP ere eir 132 | REP ear are 133 | REP ear air 134 | REP ear ere 135 | REP eir are 136 | REP eir ere 137 | REP ch te 138 | REP te ch 139 | REP ch ti 140 | REP ti ch 141 | REP ch tu 142 | REP tu ch 143 | REP ch s 144 | REP s ch 145 | REP ch k 146 | REP k ch 147 | REP f ph 148 | REP ph f 149 | REP gh f 150 | REP f gh 151 | REP i igh 152 | REP igh i 153 | REP i uy 154 | REP uy i 155 | REP i ee 156 | REP ee i 157 | REP j di 158 | REP di j 159 | REP j gg 160 | REP gg j 161 | REP j ge 162 | REP ge j 163 | REP s ti 164 | REP ti s 165 | REP s ci 166 | REP ci s 167 | REP k cc 168 | REP cc k 169 | REP k qu 170 | REP qu k 171 | REP kw qu 172 | REP o eau 173 | REP eau o 174 | REP o ew 175 | REP ew o 176 | REP oo ew 177 | REP ew oo 178 | REP ew ui 179 | REP ui ew 180 | REP oo ui 181 | REP ui oo 182 | REP ew u 183 | REP u ew 184 | REP oo u 185 | REP u oo 186 | REP u oe 187 | REP oe u 188 | REP u ieu 189 | REP ieu u 190 | REP ue ew 191 | REP ew ue 192 | REP uff ough 193 | REP oo ieu 194 | REP ieu oo 195 | REP ier ear 196 | REP ear ier 197 | REP ear air 198 | REP air ear 199 | REP w qu 200 | REP qu w 201 | REP z ss 202 | REP ss z 203 | REP shun tion 204 | REP shun sion 205 | REP shun cion 206 | REP sitted sat 207 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cargo-spellcheck" 3 | version = "0.15.5" 4 | authors = ["Bernhard Schuster "] 5 | edition = "2021" 6 | rust-version = "1.85.0" 7 | repository = "https://github.com/drahnr/cargo-spellcheck.git" 8 | homepage = "https://github.com/drahnr/cargo-spellcheck" 9 | license = "MIT OR Apache-2.0" 10 | keywords = ["spellcheck", "spelling", "grammar"] 11 | description = "Checks all doc comments for spelling mistakes" 12 | readme = "README.md" 13 | build = "build.rs" 14 | include = [ 15 | "nlprule-data/**/*.bin.xz", 16 | "hunspell-data/*", 17 | "src/**/*.rs", 18 | "Cargo.toml", 19 | "build.rs", 20 | "/LICENSE-*", 21 | "/README.md", 22 | "tests/**/*.rs", 23 | "CHANGELOG.md", 24 | ] 25 | 26 | [workspace] 27 | members = ["./doc-chunks"] 28 | 29 | 30 | [build-dependencies] 31 | nlprule-build = { version = "=0.6.4", optional = true } 32 | # compress the nlprule artifacts to be under the 10 MB limit 33 | # that cargo enforces 34 | xz2 = "0.1" 35 | 36 | [dependencies] 37 | 38 | doc-chunks = { version = "0.2.1", path = "./doc-chunks" } 39 | 40 | color-eyre = "0.6" 41 | cargo_toml = "0.21" 42 | console = "0.15" 43 | crossterm = "0.27" 44 | # for the config file 45 | directories = "5" 46 | 47 | clap = { version = "4.1.8", features = ["derive", "env"] } 48 | clap_complete = "4.1.4" 49 | clap-verbosity-flag = "2.0" 50 | 51 | env_logger = "0.11" 52 | fancy-regex = "0.13" 53 | fs-err = { version = "2", features = ["io_safety"] } 54 | indexmap = { version = "2", features = ["rayon", "serde"] } 55 | itertools = "0.12" 56 | lazy_static = "1" 57 | memchr = "2" 58 | log = "0.4" 59 | num_cpus = "1.13" 60 | proc-macro2 = { version = "1", features = ["span-locations"] } 61 | pulldown-cmark = "0.10" 62 | rayon = "1.5" 63 | regex = "1.5" 64 | serde = { version = "1", features = ["derive"] } 65 | signal-hook = "0.3" 66 | syn = { version = "2", features = ["full"] } 67 | thiserror = "1" 68 | # for parsing and extracting elements from Cargo.toml 69 | toml = "0.8" 70 | glob = "0.3" 71 | # for the config file 72 | ignore = "0.4.18" 73 | tokio = { version = "1", features = ["full", "rt-multi-thread"] } 74 | futures = "0.3" 75 | 76 | uuid = { version = "1.0.0", features = ["v4"] } 77 | 78 | # config parsing, must be independent of features 79 | 80 | # TODO parse the country codes of dictionaries? 81 | iso_country = { version = "0.1", features = ["serde"] } 82 | isolang = { version = "2", features = ["serde"] } 83 | 84 | url = { version = "2", features = ["serde"] } 85 | 86 | # dictionary lookup with affixes 87 | hunspell-rs = { version = "0.4.0", optional = true } 88 | fd-lock = { version = "4", optional = true } 89 | encoding_rs = { version = "0.8.31", optional = true, features = [] } 90 | zspell = { version = "0.5.5", optional = true } 91 | spellbook = { version = "0.1", optional = true } 92 | 93 | # full grammar check, but also tokenization and disambiguation 94 | nlprule = { version = "=0.6.4", optional = true } 95 | 96 | # cache some expensive expansions 97 | xz2 = "0.1" 98 | sha2 = "0.10" 99 | bincode = "1" 100 | hex = "0.4" 101 | thousands = "0.2" 102 | 103 | [dev-dependencies] 104 | # for stripping ansi color codes 105 | console = "0.15" 106 | assert_matches = "1" 107 | maplit = "1" 108 | serde_plain = "1" 109 | nix = "0.26.2" 110 | 111 | [features] 112 | default = ["all"] 113 | 114 | # hunspell uses the segmenter provided by nlprules 115 | hunspell = [ 116 | "dep:hunspell-rs", 117 | "hunspell-rs?/bundled", 118 | "dep:fd-lock", 119 | "nlprules", 120 | "dep:encoding_rs", 121 | ] 122 | zet = ["dep:zspell"] 123 | spellbook = ["dep:spellbook"] 124 | nlprules = ["dep:nlprule", "nlprule?/regex-fancy", "dep:nlprule-build"] 125 | 126 | all = ["hunspell", "zet", "spellbook", "nlprules"] 127 | 128 | [profile.dev] 129 | build-override = { opt-level = 2 } 130 | 131 | [profile.dev.package] 132 | backtrace = { opt-level = 3 } 133 | bincode = { opt-level = 3 } 134 | xz2 = { opt-level = 3 } 135 | sha2 = { opt-level = 3 } 136 | hunspell-rs = { opt-level = 3 } 137 | nlprule = { opt-level = 3 } 138 | 139 | [profile.release] 140 | debug = true 141 | 142 | [package.metadata.spellcheck] 143 | config = ".config/spellcheck.toml" 144 | 145 | 146 | [[test]] 147 | name = "signal_handler" 148 | path = "tests/signal_handler.rs" 149 | -------------------------------------------------------------------------------- /docs/configuration.md: -------------------------------------------------------------------------------- 1 | # Configuration 2 | 3 | ## Source 4 | 5 | There are various ways to specify the configuration. The prioritization is as 6 | follows: 7 | 8 | _Explicit_ specification: 9 | 10 | 1. Command line flags `--cfg=...`. 11 | 1. `Cargo.toml` package metadata 12 | 13 | ```toml 14 | [package.metadata.spellcheck] 15 | config = "somewhere/cfg.toml" 16 | ``` 17 | 18 | 1. `Cargo.toml` workspace metadata 19 | 20 | ```toml 21 | [workspace.metadata.spellcheck] 22 | config = "somewhere/else/cfg.toml" 23 | ``` 24 | 25 | which will fail if specified and not existent on the filesystem. 26 | 27 | If neither of those ways of specification is present, continue with the 28 | _implicit_. 29 | 30 | 1. `Cargo.toml` metadata in the current working directory `CWD`. 31 | 1. Check the first arguments location if present, else the current working directory for `.config/spellcheck.toml`. 32 | 1. Fallback to per user configuration files: 33 | * Linux: `/home/alice/.config/cargo_spellcheck/config.toml` 34 | * Windows: `C:\Users\Alice\AppData\Roaming\cargo_spellcheck\config.toml` 35 | * macOS: `/Users/Alice/Library/Preferences/cargo_spellcheck/config.toml` 36 | 1. Use the default, builtin configuration (see `config` sub-command). 37 | 38 | Since this is rather complex, add `-vv` to your invocation to see the `info` 39 | level logs printed, which will contain the config path. 40 | ### Format 41 | 42 | ```toml 43 | # Project settings where a Cargo.toml exists and is passed 44 | # ${CARGO_MANIFEST_DIR}/.config/spellcheck.toml 45 | 46 | # Also take into account developer comments 47 | dev_comments = false 48 | 49 | # Skip the README.md file as defined in the cargo manifest 50 | skip_readme = false 51 | 52 | [Hunspell] 53 | # lang and name of `.dic` file 54 | lang = "en_US" 55 | # OS specific additives 56 | # Linux: [ /usr/share/myspell ] 57 | # Windows: [] 58 | # macOS [ /home/alice/Libraries/hunspell, /Libraries/hunspell ] 59 | 60 | # Additional search paths, which take presedence over the default 61 | # os specific search dirs, searched in order, defaults last 62 | # search_dirs = [] 63 | 64 | # Adds additional dictionaries, can be specified as 65 | # absolute paths or relative in the search dirs (in this order). 66 | # Relative paths are resolved relative to the configuration file 67 | # which is used. 68 | # Refer to `man 5 hunspell` 69 | # or https://www.systutorials.com/docs/linux/man/4-hunspell/#lbAE 70 | # on how to define a custom dictionary file. 71 | extra_dictionaries = [] 72 | 73 | # If set to `true`, the OS specific default search paths 74 | # are skipped and only explicitly specified ones are used. 75 | skip_os_lookups = false 76 | 77 | # Use the builtin dictionaries if none were found in 78 | # in the configured lookup paths. 79 | # Usually combined with `skip_os_lookups=true` 80 | # to enforce the `builtin` usage for consistent 81 | # results across distributions and CI runs. 82 | # Setting this will still use the dictionaries 83 | # specified in `extra_dictionaries = [..]` 84 | # for topic specific lingo. 85 | use_builtin = true 86 | 87 | 88 | [Hunspell.quirks] 89 | # Transforms words that are provided by the tokenizer 90 | # into word fragments based on the capture groups which are to 91 | # be checked. 92 | # If no capture groups are present, the matched word is whitelisted. 93 | transform_regex = ["^'([^\\s])'$", "^[0-9]+x$"] 94 | # Accepts `alphabeta` variants if the checker provides a replacement suggestion 95 | # of `alpha-beta`. 96 | allow_concatenation = true 97 | # And the counterpart, which accepts words with dashes, when the suggestion has 98 | # recommendations without the dashes. This is less common. 99 | allow_dashed = false 100 | # Check the expressions in the footnote references. By default this is turned on 101 | # to remain backwards compatible but disabling it could be particularly useful 102 | # when one uses abbreviations instead of numbers as footnote references. For 103 | # instance by default the fragment `hello[^xyz]` would be spellchecked as 104 | # `helloxyz` which is obviously a misspelled word, but by turning this check 105 | # off, it will skip validating the reference altogether and will only check the 106 | # word `hello`. 107 | check_footnote_references = false 108 | 109 | [NlpRules] 110 | # Allows the user to override the default included 111 | # exports of LanguageTool, with other custom 112 | # languages 113 | 114 | # override_rules = "/path/to/rules_binencoded.bin" 115 | # override_tokenizer = "/path/to/tokenizer_binencoded.bin" 116 | 117 | [Reflow] 118 | # Reflows doc comments to adhere to adhere to a given maximum line width limit. 119 | max_line_length = 80 120 | ``` 121 | 122 | To increase verbosity add `-v` (multiple) to increase verbosity. 123 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cargo-spellcheck 2 | 3 | [![crates.io](https://img.shields.io/crates/v/cargo_spellcheck.svg)](https://crates.io/crates/cargo-spellcheck) 4 | [![CI](https://ci.fff.rs/api/v1/teams/main/pipelines/cargo-spellcheck/jobs/master-validate/badge)](https://ci.fff.rs/teams/main/pipelines/cargo-spellcheck/jobs/master-validate) 5 | ![commits-since](https://img.shields.io/github/commits-since/drahnr/cargo-spellcheck/latest.svg) 6 | ![Crates.io MSRV](https://img.shields.io/crates/msrv/cargo-spellcheck) 7 | 8 | Check your spelling with `hunspell` and/or `nlprule`. 9 | 10 | ## Use Cases 11 | 12 | Run `cargo spellcheck --fix` or `cargo spellcheck fix` to fix all your 13 | documentation comments in order to avoid nasty typos all over your source tree. 14 | Meant as a helper simplifying review as well as improving CI checks after a 15 | learning phase for custom/topic specific lingo. 16 | 17 | `cargo-spellcheck` is also a valuable tool to run from git commit hooks or CI/CD 18 | systems. 19 | 20 | ### Check For Spelling and/or Grammar Mistakes 21 | 22 | ```zsh 23 | cargo spellcheck check 24 | ``` 25 | 26 |
error: spellcheck
 27 |    --> src/main.rs:44
 28 |     |
 29 |  44 | Fun facets shalld cause some erroris.
 30 |     |            ^^^^^^
 31 |     | - shall or shall d
 32 |     |
33 | 34 | ### Apply Suggestions Interactively 35 | 36 | ```zsh 37 | cargo spellcheck fix 38 | ``` 39 | 40 |
error: spellcheck(Hunspell)
 41 |     --> /media/supersonic1t/projects/cargo-spellcheck/src/literalset.rs:291
 42 |      |
 43 |  291 |  Returns literl within the Err variant if not adjacent
 44 |      |          ^^^^^^
 45 | 
 46 | (13/14) Apply this suggestion [y,n,q,a,d,j,e,?]?
 47 | 
 48 |    lite
 49 |    litter
 50 |    litterer
 51 |    liter l
 52 |    liters
 53 |    literal
 54 |    liter
 55 |  » a custom replacement literal
56 | 57 | ## Installation 58 | 59 | `cargo install --locked cargo-spellcheck` 60 | 61 | The `--locked` flag is the preferred way of installing to get the tested set of 62 | dependencies. 63 | 64 | on OS X, you need to ensure that `libclang.dylib` can be found by the linker 65 | 66 | which can be achieved by setting `DYLB_FALLBACK_LIBRARY_PATH`: 67 | 68 | ``` 69 | export DYLD_FALLBACK_LIBRARY_PATH= \ 70 | "$(xcode-select --print-path)/Toolchains/XcodeDefault.xctoolchain/usr/lib/" 71 | ``` 72 | 73 | In Linux, the file is `libclang.so` which can be installed via: 74 | 75 | ``` 76 | apt-get install libclang-dev 77 | ``` 78 | 79 | Afterwards, you can set the variable `LIBCLANG_PATH` via: 80 | 81 | ``` 82 | export LIBCLANG_PATH=/usr/lib/llvm-14/lib/ 83 | ``` 84 | 85 | ## Completions 86 | 87 | `cargo spellcheck completions` for autodetection of your current shell via 88 | `$SHELL`, 89 | 90 | or 91 | 92 | `cargo spellcheck completions --shell zsh` 93 | 94 | to explicitly specify your shell type. 95 | 96 | Commonly it's use like this from your shell's `.rc*` file: 97 | 98 | `source <(cargo spellcheck completions)` 99 | 100 | Note: There is a [relevant clap issue 101 | (#3508)](https://github.com/clap-rs/clap/issues/3508) that makes this fail in 102 | some cases. 103 | 104 | ## 🎈 Contribute! 105 | 106 | Contributions are very welcome! 107 | 108 | Generally the preferred way of doing so, is to comment in an issue that you 109 | would like to tackle the implementation/fix. 110 | 111 | This is usually followed by an initial PR where the implementation is then 112 | discussed and iteratively refined. No need to get it all correct 113 | the first time! 114 | 115 | ## Documentation 116 | 117 | - [Features and Roadmap](docs/features.md) 118 | - [Remedies for common issues](docs/remedy.md) 119 | - [Configuration](docs/configuration.md) 120 | - [Available Checkers](docs/checkers.md) 121 | - [Automation of `cargo-spellcheck`](docs/automation.md) 122 | -------------------------------------------------------------------------------- /src/config/iso.rs: -------------------------------------------------------------------------------- 1 | //! Abstracts the combination of language code and country code into one 2 | //! convenient type. 3 | //! 4 | //! Language code follows the 5 | //! [ISO 639-1](https://en.wikipedia.org/wiki/ISO_639-1) format. Country code 6 | //! follows the [Alpha-2 ISO_3166-1](https://en.wikipedia.org/wiki/ISO_3166-1) 7 | //! format. 8 | //! 9 | //! It results in a mildly adapted [IETF language 10 | //! tag](https://en.wikipedia.org/wiki/IETF_language_tag). 11 | 12 | use iso_country::Country; 13 | use isolang::Language; 14 | 15 | use std::{fmt, str::FromStr}; 16 | 17 | use serde::de::{self, Deserialize, Deserializer}; 18 | use serde::ser::Serializer; 19 | 20 | /// 5 digit language and country code as used by the dictionaries. 21 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 22 | pub struct Lang5 { 23 | pub lang: Language, 24 | pub country: Country, 25 | } 26 | 27 | impl PartialEq for Lang5 { 28 | fn eq(&self, other: &str) -> bool { 29 | self.to_string().as_str() == other 30 | } 31 | } 32 | 33 | impl PartialEq for Lang5 34 | where 35 | X: AsRef, 36 | { 37 | fn eq(&self, other: &X) -> bool { 38 | self.to_string().as_str() == other.as_ref() 39 | } 40 | } 41 | 42 | impl<'a> PartialEq for &'a str { 43 | fn eq(&self, other: &Lang5) -> bool { 44 | let other = other.to_string(); 45 | *self == other.as_str() 46 | } 47 | } 48 | 49 | impl PartialEq for String { 50 | fn eq(&self, other: &Lang5) -> bool { 51 | *self == other.to_string() 52 | } 53 | } 54 | 55 | impl Default for Lang5 { 56 | fn default() -> Self { 57 | Self::en_US 58 | } 59 | } 60 | 61 | impl Lang5 { 62 | #[allow(non_upper_case_globals)] 63 | pub const en_US: Lang5 = Lang5 { 64 | lang: Language::Eng, 65 | country: Country::US, 66 | }; 67 | } 68 | 69 | impl fmt::Display for Lang5 { 70 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 71 | let language = self.lang.to_639_1().unwrap_or("??"); 72 | let country = self.country; 73 | write!(f, "{language}_{country}")?; 74 | Ok(()) 75 | } 76 | } 77 | 78 | #[derive(Debug, Clone, thiserror::Error)] 79 | #[error("Wrong character, expected '_' found '{0}'")] 80 | struct Lang5SpacerError(char); 81 | 82 | #[derive(Debug, Clone, Copy, Default)] 83 | struct Lang5Visitor; 84 | 85 | impl<'de> de::Visitor<'de> for Lang5Visitor { 86 | type Value = Lang5; 87 | 88 | fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 89 | write!( 90 | formatter, 91 | "Expected a 5 digit lang and country code in the form of LL_CC" 92 | ) 93 | } 94 | 95 | fn visit_str(self, v: &str) -> Result 96 | where 97 | E: de::Error, 98 | { 99 | self.visit_borrowed_str::(v) 100 | } 101 | 102 | fn visit_string(self, s: String) -> Result 103 | where 104 | E: de::Error, 105 | { 106 | self.visit_borrowed_str::(s.as_str()) 107 | } 108 | 109 | fn visit_borrowed_str(self, s: &'de str) -> Result 110 | where 111 | E: serde::de::Error, 112 | { 113 | if s.len() != 5 { 114 | return Err(serde::de::Error::custom(Lang5SpacerError('l'))); 115 | } 116 | let lang = Language::from_639_1(&s[0..2]) 117 | .ok_or(Lang5SpacerError('2')) 118 | .map_err(serde::de::Error::custom)?; 119 | let c = s.chars().nth(2).unwrap(); 120 | if c != '_' { 121 | return Err(serde::de::Error::custom(Lang5SpacerError(c)))?; 122 | } 123 | let country = Country::from_str(&s[3..5]).map_err(serde::de::Error::custom)?; 124 | Ok(Lang5 { lang, country }) 125 | } 126 | } 127 | 128 | impl<'de> Deserialize<'de> for Lang5 { 129 | fn deserialize(deserializer: D) -> Result 130 | where 131 | D: Deserializer<'de>, 132 | { 133 | deserializer.deserialize_str(Lang5Visitor) 134 | } 135 | } 136 | 137 | impl serde::Serialize for Lang5 { 138 | fn serialize(&self, serializer: S) -> Result 139 | where 140 | S: Serializer, 141 | { 142 | serializer.serialize_str(self.to_string().as_str()) 143 | } 144 | } 145 | 146 | #[cfg(test)] 147 | mod tests { 148 | use super::*; 149 | use assert_matches::assert_matches; 150 | 151 | const EXPECTED: Lang5 = Lang5 { 152 | lang: Language::Deu, 153 | country: Country::AU, 154 | }; 155 | const S: &str = "de_AU"; 156 | 157 | #[test] 158 | fn iso_lang_german_austria_serde() { 159 | assert_eq!(S.to_owned(), EXPECTED.to_string()); 160 | 161 | assert_matches!(serde_plain::from_str::(S), Ok(x) => assert_eq!(EXPECTED, x)); 162 | } 163 | 164 | #[test] 165 | fn cmp_variants() { 166 | assert!(EXPECTED == S); 167 | assert!(EXPECTED == &S); 168 | assert!(EXPECTED == S.to_owned()); 169 | assert!(EXPECTED == &S.to_owned()); 170 | assert!(&EXPECTED == S); 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /src/checker/nlprules.rs: -------------------------------------------------------------------------------- 1 | //! A NLP based rule checker base on `nlprule` 2 | //! 3 | //! Does check grammar, and is supposed to only check for grammar. Sentence 4 | //! splitting is done in hand-waving way. To be improved. 5 | 6 | use super::{Checker, Detector, Suggestion}; 7 | use crate::{CheckableChunk, ContentOrigin}; 8 | 9 | use crate::errors::*; 10 | 11 | use std::collections::{hash_map::Entry, HashMap}; 12 | use std::{ 13 | path::{Path, PathBuf}, 14 | sync::{Arc, Mutex}, 15 | }; 16 | 17 | use nlprule::{Rules, Tokenizer}; 18 | 19 | use lazy_static::lazy_static; 20 | 21 | lazy_static! { 22 | static ref RULES: Mutex, Arc>> = Mutex::new(HashMap::new()); 23 | } 24 | 25 | pub(crate) fn filtered_rules + Clone>( 26 | override_path: Option

, 27 | ) -> Result> { 28 | match RULES 29 | .lock() 30 | .unwrap() 31 | .entry(override_path.clone().map(|x| x.as_ref().to_path_buf())) 32 | { 33 | Entry::Occupied(occupied) => Ok(occupied.get().clone()), 34 | Entry::Vacant(empty) => { 35 | let rules = super::rules(override_path)?; 36 | let rules = rules 37 | .rules() 38 | .iter() 39 | .filter(|rule| { 40 | match rule 41 | .category_type() 42 | .map(str::to_lowercase) 43 | .as_ref() 44 | .map(|x| x as &str) 45 | { 46 | // The hunspell backend is aware of 47 | // custom lingo, which this one is not, 48 | // so there would be a lot of false 49 | // positives. 50 | Some("misspelling") => false, 51 | // Anything quotes related is not relevant 52 | // for code documentation. 53 | Some("typographical") => false, 54 | _other => true, 55 | } 56 | }) 57 | .cloned() 58 | .collect::(); 59 | 60 | let rules = Arc::new(rules); 61 | empty.insert(rules.clone()); 62 | Ok(rules) 63 | } 64 | } 65 | } 66 | 67 | pub(crate) struct NlpRulesChecker { 68 | tokenizer: Arc, 69 | rules: Arc, 70 | } 71 | 72 | impl NlpRulesChecker { 73 | pub fn new(config: &::Config) -> Result { 74 | let tokenizer = super::tokenizer(config.override_tokenizer.as_ref())?; 75 | let rules = filtered_rules(config.override_tokenizer.as_ref())?; 76 | Ok(Self { tokenizer, rules }) 77 | } 78 | } 79 | 80 | impl Checker for NlpRulesChecker { 81 | type Config = crate::config::NlpRulesConfig; 82 | 83 | fn detector() -> Detector { 84 | Detector::NlpRules 85 | } 86 | 87 | fn check<'a, 's>( 88 | &self, 89 | origin: &ContentOrigin, 90 | chunks: &'a [CheckableChunk], 91 | ) -> Result>> 92 | where 93 | 'a: 's, 94 | { 95 | let mut acc = Vec::with_capacity(chunks.len()); 96 | 97 | for chunk in chunks { 98 | acc.extend(check_chunk( 99 | origin.clone(), 100 | chunk, 101 | &self.tokenizer, 102 | &self.rules, 103 | )); 104 | } 105 | 106 | Ok(acc) 107 | } 108 | } 109 | 110 | /// Check the plain text contained in chunk, which can be one or more sentences. 111 | fn check_chunk<'a>( 112 | origin: ContentOrigin, 113 | chunk: &'a CheckableChunk, 114 | tokenizer: &Tokenizer, 115 | rules: &Rules, 116 | ) -> Vec> { 117 | // TODO We should control which parts need to be ignored of the markdown 118 | // entities, however the `NlpRulesConfig`, which is the only configuration 119 | // we receive in the constructor does not contain the same quirks (or in 120 | // fact any other similar settings) as the Hunspell one, so we cannot obtain 121 | // this setting, therefore we fallback to default 122 | let plain = chunk.erase_cmark(&Default::default()); 123 | log::trace!("{plain:?}"); 124 | let txt = plain.as_str(); 125 | 126 | let mut acc = Vec::with_capacity(32); 127 | 128 | let nlpfixes = rules.suggest(txt, tokenizer); 129 | if nlpfixes.is_empty() { 130 | return Vec::new(); 131 | } 132 | 133 | 'nlp: for fix in nlpfixes { 134 | let message = fix.message(); 135 | let replacements = fix.replacements(); 136 | let start = fix.span().char().start; 137 | let end = fix.span().char().end; 138 | if start > end { 139 | log::debug!("BUG: crate nlprule yielded a negative range {:?} for chunk in {}, please file a bug", start..end, &origin); 140 | continue 'nlp; 141 | } 142 | let range = start..end; 143 | acc.extend( 144 | plain 145 | .find_spans(range) 146 | .into_iter() 147 | .map(|(range, span)| Suggestion { 148 | detector: Detector::NlpRules, 149 | range, 150 | span, 151 | origin: origin.clone(), 152 | replacements: replacements.to_vec(), 153 | chunk, 154 | description: Some(message.to_owned()), 155 | }), 156 | ); 157 | } 158 | 159 | acc 160 | } 161 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![deny(dead_code)] 2 | #![deny(missing_docs)] 3 | // #![deny(unused_crate_dependencies)] 4 | #![allow(clippy::non_ascii_literal)] 5 | // be explicit about certain offsets and how they are constructed 6 | #![allow(clippy::identity_op)] 7 | // in small cli projects, this is ok for now 8 | #![allow(clippy::wildcard_imports)] 9 | // personal strong preference for `from_iter` 10 | #![allow(clippy::from_iter_instead_of_collect)] 11 | #![allow(clippy::new_without_default)] 12 | #![allow(clippy::items_after_statements)] 13 | // Prevent the stray dbg! macros 14 | #![cfg_attr(not(test), deny(clippy::dbg_macro))] 15 | #![cfg_attr(test, allow(clippy::dbg_macro))] 16 | 17 | //! cargo-spellcheck 18 | //! 19 | //! A syntax tree based doc comment and common mark spell checker. 20 | 21 | pub use doc_chunks as documentation; 22 | #[cfg(test)] 23 | pub(crate) use doc_chunks::{chyrp_up, fluff_up}; 24 | 25 | pub mod action; 26 | mod checker; 27 | mod config; 28 | pub mod errors; 29 | mod reflow; 30 | mod suggestion; 31 | mod tinhat; 32 | mod traverse; 33 | 34 | pub use self::action::*; 35 | pub use self::config::args::*; 36 | pub use self::config::{Config, HunspellConfig, LanguageToolConfig}; 37 | pub use self::documentation::span::*; 38 | pub use self::documentation::util::*; 39 | pub use self::documentation::{ 40 | util, CheckableChunk, Clusters, CommentVariant, CommentVariantCategory, ContentOrigin, 41 | Documentation, PlainOverlay, Range, 42 | }; 43 | pub use self::suggestion::*; 44 | pub use self::tinhat::*; 45 | 46 | use self::errors::{bail, Result}; 47 | 48 | use std::io::Write; 49 | 50 | #[cfg(target_os = "windows")] 51 | use signal_hook as _; 52 | 53 | use checker::Checker; 54 | 55 | /// A simple exit code representation. 56 | /// 57 | /// `Custom` can be specified by the user, others map to their UNIX equivalents 58 | /// where available. 59 | #[derive(Debug, Clone, Copy, Eq, PartialEq)] 60 | pub enum ExitCode { 61 | /// Regular termination and does not imply anything in regards to spelling 62 | /// mistakes found or not. 63 | Success, 64 | /// Terminate requested by a *nix signal. 65 | Signal, 66 | /// A custom exit code, as specified with `--code=`. 67 | Custom(u8), 68 | // Failure is already default for `Err(_)` 69 | } 70 | 71 | impl ExitCode { 72 | /// Convert `ExitCode` to primitive. 73 | pub fn as_u8(&self) -> u8 { 74 | match *self { 75 | Self::Success => 0u8, 76 | Self::Signal => 130u8, 77 | Self::Custom(code) => code, 78 | } 79 | } 80 | } 81 | 82 | /// The inner main. 83 | pub fn run(args: Args) -> Result { 84 | let _ = ::rayon::ThreadPoolBuilder::new() 85 | .num_threads(args.job_count()) 86 | .build_global(); 87 | 88 | env_logger::Builder::from_env(env_logger::Env::new().filter_or("CARGO_SPELLCHECK", "warn")) 89 | .filter_level(args.verbosity()) 90 | .filter_module("nlprule", log::LevelFilter::Error) 91 | .filter_module("mio", log::LevelFilter::Error) 92 | .init(); 93 | 94 | #[cfg(not(target_os = "windows"))] 95 | signal_handler(move || { 96 | if let Err(e) = action::interactive::ScopedRaw::restore_terminal() { 97 | log::warn!("Failed to restore terminal: {e}"); 98 | } 99 | }); 100 | 101 | let (unified, config) = match &args.command { 102 | Some(Sub::Completions { shell }) => { 103 | let sink = &mut std::io::stdout(); 104 | generate_completions(*shell, sink); 105 | let _ = sink.flush(); 106 | return Ok(ExitCode::Success); 107 | } 108 | _ => args.unified()?, 109 | }; 110 | 111 | match unified { 112 | // must unify first, for the proper paths 113 | UnifiedArgs::Config { 114 | dest_config, 115 | checker_filter_set, 116 | } => { 117 | log::trace!("Configuration chore"); 118 | let mut config = Config::full(); 119 | Args::checker_selection_override( 120 | checker_filter_set.as_ref().map(AsRef::as_ref), 121 | &mut config, 122 | )?; 123 | 124 | match dest_config { 125 | ConfigWriteDestination::Stdout => { 126 | println!("{}", config.to_toml()?); 127 | return Ok(ExitCode::Success); 128 | } 129 | ConfigWriteDestination::File { overwrite, path } => { 130 | if path.exists() && !overwrite { 131 | bail!( 132 | "Attempting to overwrite {} requires `--force`.", 133 | path.display() 134 | ); 135 | } 136 | 137 | log::info!("Writing configuration file to {}", path.display()); 138 | config.write_values_to_path(path)?; 139 | } 140 | } 141 | Ok(ExitCode::Success) 142 | } 143 | UnifiedArgs::Operate { 144 | action, 145 | paths, 146 | recursive, 147 | skip_readme, 148 | config_path, 149 | dev_comments, 150 | exit_code_override, 151 | } => { 152 | log::debug!("Executing: {action:?} with {config:?} from {config_path:?}"); 153 | 154 | let documents = 155 | traverse::extract(paths, recursive, skip_readme, dev_comments, &config)?; 156 | 157 | let rt = tokio::runtime::Runtime::new()?; 158 | let finish = rt.block_on(async move { action.run(documents, config).await })?; 159 | 160 | match finish { 161 | Finish::Success | Finish::MistakeCount(0) => Ok(ExitCode::Success), 162 | Finish::MistakeCount(_n) => Ok(ExitCode::Custom(exit_code_override)), 163 | Finish::Abort => Ok(ExitCode::Signal), 164 | } 165 | } 166 | } 167 | } 168 | 169 | #[cfg(test)] 170 | mod tests; 171 | -------------------------------------------------------------------------------- /src/checker/cached.rs: -------------------------------------------------------------------------------- 1 | use crate::errors::*; 2 | 3 | use hex::ToHex; 4 | use serde::de::DeserializeOwned; 5 | use serde::ser::Serialize; 6 | use sha2::Digest; 7 | use std::io::Seek; 8 | use std::path::Path; 9 | use std::time::{Duration, Instant}; 10 | 11 | #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] 12 | struct CacheEntry { 13 | what: String, 14 | val: T, 15 | } 16 | 17 | pub struct CachedValue { 18 | /// Time it took to.. 19 | /// load the value from disk if it was there. 20 | pub fetch: Option, 21 | /// Updating the disk cache 22 | pub update: Option, 23 | /// Create a new one if needed 24 | pub creation: Option, 25 | /// The accumulated duration, 26 | pub total: Duration, 27 | /// The actual value. 28 | pub value: T, 29 | } 30 | 31 | pub struct Cached { 32 | cache_file: fd_lock::RwLock, 33 | // What to cache. 34 | what: String, 35 | _phantom: std::marker::PhantomData, 36 | } 37 | 38 | impl<'a, T> Cached 39 | where 40 | T: Serialize + DeserializeOwned, 41 | { 42 | /// Create a new `Cached` instance, to create a expanded version of something that's identified by `what`. 43 | pub fn new(what: impl AsRef, cache_dir: impl AsRef) -> Result { 44 | let what = what.as_ref(); 45 | let what_digest = sha2::Sha256::digest(what.as_bytes()); 46 | let cache_dir = cache_dir.as_ref(); 47 | fs_err::create_dir_all(cache_dir)?; 48 | let cache_file = cache_dir.join(what_digest.as_slice().encode_hex::()); 49 | let cache_file = fs_err::OpenOptions::new() 50 | .create(true) 51 | .read(true) 52 | .write(true) 53 | .open(cache_file)?; 54 | Ok(Self { 55 | cache_file: fd_lock::RwLock::new(cache_file), 56 | what: what.to_owned(), 57 | _phantom: std::marker::PhantomData, 58 | }) 59 | } 60 | 61 | pub fn fetch_or_update( 62 | &mut self, 63 | create: impl FnOnce(&str) -> Result, 64 | ) -> Result> { 65 | let total_start = Instant::now(); 66 | match self.fetch() { 67 | Ok(Some(value)) => { 68 | let elapsed = total_start.elapsed(); 69 | Ok(CachedValue { 70 | value, 71 | fetch: Some(elapsed), 72 | update: None, 73 | creation: None, 74 | total: elapsed, 75 | }) 76 | } 77 | Ok(None) => { 78 | let fetch = Some(total_start.elapsed()); 79 | 80 | let creation_start = Instant::now(); 81 | let value = create(self.what.as_str())?; 82 | let creation = Some(creation_start.elapsed()); 83 | 84 | let update_start = Instant::now(); 85 | if let Err(err) = self.update(&value) { 86 | log::warn!("Failed to write value to cached: {err:?}"); 87 | } 88 | let update = Some(update_start.elapsed()); 89 | let total = total_start.elapsed(); 90 | Ok(CachedValue { 91 | value, 92 | fetch, 93 | update, 94 | creation, 95 | total, 96 | }) 97 | } 98 | Err(err) => { 99 | log::warn!("Overriding existing value that failed to load: {err:?}"); 100 | 101 | let fetch = Some(total_start.elapsed()); 102 | 103 | let creation_start = Instant::now(); 104 | let value = create(self.what.as_str())?; 105 | let creation = Some(creation_start.elapsed()); 106 | 107 | let update_start = Instant::now(); 108 | if let Err(err) = self.update(&value) { 109 | log::warn!("Failed to update cached: {err:?}"); 110 | } 111 | let update = Some(update_start.elapsed()); 112 | let total = total_start.elapsed(); 113 | Ok(CachedValue { 114 | value, 115 | fetch, 116 | update, 117 | creation, 118 | total, 119 | }) 120 | } 121 | } 122 | } 123 | pub fn fetch(&mut self) -> Result> { 124 | let guard = self.cache_file.read()?; 125 | let buf = std::io::BufReader::new(guard.file()); 126 | // let buf = xz2::bufread::XzDecoder::new(buf); 127 | match bincode::deserialize_from(buf) { 128 | Ok(CacheEntry { what, val }) => { 129 | if what == self.what { 130 | log::debug!("Cached value with matching what \"{what}\""); 131 | Ok(Some(val)) 132 | } else { 133 | log::warn!( 134 | "Cached value what \"{}\" does not match expect what \"{}\", removing", 135 | what, 136 | self.what 137 | ); 138 | Ok(None) 139 | } 140 | } 141 | Err(e) => { 142 | log::warn!("Failed to load cached value: {e:?}"); 143 | Ok(None) 144 | } 145 | } 146 | } 147 | 148 | pub fn update(&mut self, val: &T) -> Result<()> { 149 | let mut write_guard = self.cache_file.write()?; 150 | 151 | let entry = CacheEntry { 152 | what: self.what.clone(), 153 | val, 154 | }; 155 | let encoded: Vec = bincode::serialize(&entry).unwrap(); 156 | let mut encoded = &encoded[..]; 157 | // let mut compressed = xz2::bufread::XzEncoder::new(&mut encoded, 6); 158 | 159 | // effectively truncate, but without losing the lock 160 | let file = write_guard.file_mut(); 161 | file.rewind()?; 162 | std::io::copy(&mut encoded, file)?; 163 | let loco = file.stream_position()?; 164 | file.set_len(loco)?; 165 | Ok(()) 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /src/checker/dictaffix.rs: -------------------------------------------------------------------------------- 1 | use super::hunspell::cache_builtin; 2 | use super::Result; 3 | use crate::config::{Lang5, SearchDirs}; 4 | use color_eyre::eyre::{bail, eyre, WrapErr}; 5 | use fs_err as fs; 6 | use itertools::Itertools; 7 | use std::io; 8 | use std::io::BufRead; 9 | use std::path::{Path, PathBuf}; 10 | 11 | pub(crate) struct DicAff { 12 | pub(crate) dic: String, 13 | pub(crate) aff: String, 14 | } 15 | 16 | impl DicAff { 17 | pub(crate) fn load( 18 | extra_dictionaries: &[std::path::PathBuf], 19 | search_dirs: &SearchDirs, 20 | lang: Lang5, 21 | use_builtin: bool, 22 | skip_os_lookups: bool, 23 | ) -> Result { 24 | let lang = lang.to_string(); 25 | let lang = lang.as_str(); 26 | 27 | // lookup paths are really just an attempt to provide a dictionary, so be more forgiving 28 | // when encountering errors here 29 | let (dic, aff): (PathBuf, PathBuf) = search_dirs.iter(!skip_os_lookups) 30 | .filter(|search_dir| { 31 | let keep = search_dir.is_dir(); 32 | if !keep { 33 | // search_dir also contains the default paths, so just silently ignore these 34 | log::debug!( 35 | target: "affdic", 36 | "Dictionary search path is not a directory {}", 37 | search_dir.display() 38 | ); 39 | } else { 40 | log::debug!( 41 | target: "affdic", 42 | "Found dictionary search path {}", 43 | search_dir.display() 44 | ); 45 | } 46 | keep 47 | }) 48 | .find_map(|search_dir| { 49 | let dic = search_dir.join(lang).with_extension("dic"); 50 | if !dic.is_file() { 51 | log::debug!( 52 | target: "affdic", 53 | "Dictionary path dervied from search dir is not a file {}", 54 | dic.display() 55 | ); 56 | return None; 57 | } 58 | let aff = search_dir.join(lang).with_extension("aff"); 59 | if !aff.is_file() { 60 | log::debug!( 61 | target: "affdic", 62 | "Affixes path dervied from search dir is not a file {}", 63 | aff.display() 64 | ); 65 | return None; 66 | } 67 | log::debug!("Using dic {} and aff {}", dic.display(), aff.display()); 68 | Some((dic, aff)) 69 | }) 70 | .ok_or_else(|| { 71 | eyre!("Failed to find any {lang}.dic / {lang}.aff in any search dir or no search provided", 72 | lang = lang) 73 | }) 74 | .or_else(|e| { 75 | if use_builtin { 76 | Ok(cache_builtin()?) 77 | } else { 78 | Err(e) 79 | } 80 | })?; 81 | 82 | let dic = fs_err::read_to_string(&dic)?; 83 | let aff = fs_err::read_to_string(&aff)?; 84 | 85 | // We need to combine multiple dictionaries into one 86 | // since we want suffix support rather than plain word lists 87 | let mut dic_acc = dic; 88 | 89 | // suggestion must contain the word itself if it is valid extra dictionary 90 | // be more strict about the extra dictionaries, they have to exist 91 | log::info!(target: "dicaff", "Adding {} extra dictionaries", extra_dictionaries.len()); 92 | 93 | for extra_dic_path in extra_dictionaries { 94 | log::debug!(target: "affdic", "Adding extra dictionary {}", extra_dic_path.display()); 95 | // after calling `sanitize_paths` 96 | // the ought to be all absolutes 97 | assert!(extra_dic_path.is_absolute()); 98 | let extra_dic = fs::read_to_string(extra_dic_path)?; 99 | is_valid_hunspell_dic(&mut extra_dic.as_bytes())?; 100 | log::trace!(target: "affdic", "Adding extra dict to main dict: {}", extra_dic.trim().lines().count() - 1); 101 | dic_acc.push('\n'); 102 | // trim the initil number 103 | dic_acc.push_str( 104 | extra_dic 105 | .trim() 106 | .split_once("\n") 107 | .expect("It's a valid dictionary. qed") 108 | .1, 109 | ); 110 | } 111 | 112 | // sort them, just in case 113 | let mut counter = 0; 114 | let dic = dic_acc 115 | .lines() 116 | .inspect(|_line| counter += 1) 117 | .sorted() 118 | .unique() 119 | .join("\n"); 120 | let counter = counter.to_string(); 121 | let dic = counter + "\n" + dic.trim(); 122 | 123 | log::trace!(target: "affdic", "Total dictionary entries are: {}", dic.trim().lines().count() - 1); 124 | 125 | Ok(Self { dic, aff }) 126 | } 127 | } 128 | 129 | /// Check if provided path has valid dictionary format. 130 | /// 131 | /// This is a YOLO check. 132 | pub(crate) fn is_valid_hunspell_dic_path(path: impl AsRef) -> Result<()> { 133 | let reader = io::BufReader::new(fs::File::open(path.as_ref())?); 134 | is_valid_hunspell_dic(reader) 135 | } 136 | 137 | /// Check a reader for correct hunspell format. 138 | pub(crate) fn is_valid_hunspell_dic(reader: impl BufRead) -> Result<()> { 139 | let mut iter = reader.lines().enumerate(); 140 | if let Some((_lineno, first)) = iter.next() { 141 | let first = first?; 142 | let _ = first.parse::().wrap_err_with(|| { 143 | eyre!("First line of extra dictionary must a number, but is: >{first}<") 144 | })?; 145 | } 146 | // Just check the first 10 lines, don't waste much time here 147 | // the first two are the most important ones. 148 | for (lineno, line) in iter.take(10) { 149 | // All lines after must be format x. 150 | if let Ok(num) = line?.parse::() { 151 | bail!("Line {lineno} of extra dictionary must not be a number, but is: >{num}<",) 152 | }; 153 | } 154 | Ok(()) 155 | } 156 | -------------------------------------------------------------------------------- /src/action/bandaid.rs: -------------------------------------------------------------------------------- 1 | //! A mistake bandaid. 2 | //! 3 | //! A `BandAid` covers the mistake with a suggested replacement, as picked by 4 | //! the user. 5 | 6 | use crate::documentation::Span; 7 | 8 | /// A chosen suggestion for a certain span 9 | #[derive(Debug, Clone, PartialEq, Eq)] 10 | pub struct BandAid { 11 | /// `String` replaces the content covered by `Span` 12 | pub content: String, 13 | /// range which will be replaced 14 | pub span: Span, 15 | } 16 | 17 | impl BandAid { 18 | /// Check if the bandaid covers `line` which is 1 indexed. 19 | pub fn covers_line(&self, line: usize) -> bool { 20 | self.span.covers_line(line) 21 | } 22 | } 23 | 24 | impl From<(String, &Span)> for BandAid { 25 | fn from((replacement, span): (String, &Span)) -> Self { 26 | Self { 27 | content: replacement, 28 | span: *span, 29 | } 30 | } 31 | } 32 | 33 | #[cfg(test)] 34 | pub(crate) mod tests { 35 | 36 | use crate::util::load_span_from; 37 | 38 | use crate::{LineColumn, Span}; 39 | 40 | #[test] 41 | fn span_helper_integrity() { 42 | const SOURCE: &str = r#"0 43 | abcde 44 | f 45 | g 46 | hijk 47 | l 48 | "#; 49 | 50 | struct TestSet { 51 | span: Span, 52 | expected: &'static str, 53 | } 54 | 55 | const SETS: &[TestSet] = &[ 56 | TestSet { 57 | span: Span { 58 | start: LineColumn { 59 | line: 1usize, 60 | column: 0, 61 | }, 62 | end: LineColumn { 63 | line: 1usize, 64 | column: 0, 65 | }, 66 | }, 67 | expected: "0", 68 | }, 69 | TestSet { 70 | span: Span { 71 | start: LineColumn { 72 | line: 2usize, 73 | column: 2, 74 | }, 75 | end: LineColumn { 76 | line: 2usize, 77 | column: 4, 78 | }, 79 | }, 80 | expected: "cde", 81 | }, 82 | TestSet { 83 | span: Span { 84 | start: LineColumn { 85 | line: 5usize, 86 | column: 0, 87 | }, 88 | end: LineColumn { 89 | line: 5usize, 90 | column: 1, 91 | }, 92 | }, 93 | expected: "hi", 94 | }, 95 | ]; 96 | 97 | for item in SETS { 98 | assert_eq!( 99 | load_span_from(SOURCE.as_bytes(), item.span).unwrap(), 100 | item.expected.to_string() 101 | ); 102 | } 103 | } 104 | 105 | #[test] 106 | fn try_from_string_works() { 107 | const TEST: &str = include_str!("../../demo/src/main.rs"); 108 | 109 | const EXPECTED: &[Span] = &[ 110 | Span { 111 | start: LineColumn { line: 1, column: 4 }, 112 | end: LineColumn { line: 1, column: 7 }, 113 | }, 114 | Span { 115 | start: LineColumn { line: 1, column: 9 }, 116 | end: LineColumn { line: 1, column: 9 }, 117 | }, 118 | Span { 119 | start: LineColumn { 120 | line: 1, 121 | column: 11, 122 | }, 123 | end: LineColumn { 124 | line: 1, 125 | column: 13, 126 | }, 127 | }, 128 | Span { 129 | start: LineColumn { 130 | line: 1, 131 | column: 15, 132 | }, 133 | end: LineColumn { 134 | line: 1, 135 | column: 20, 136 | }, 137 | }, 138 | Span { 139 | start: LineColumn { 140 | line: 1, 141 | column: 22, 142 | }, 143 | end: LineColumn { 144 | line: 1, 145 | column: 27, 146 | }, 147 | }, 148 | Span { 149 | start: LineColumn { 150 | line: 1, 151 | column: 28, 152 | }, 153 | end: LineColumn { 154 | line: 1, 155 | column: 28, 156 | }, 157 | }, 158 | ]; 159 | 160 | crate::checker::tests::extraction_test_body(TEST, EXPECTED); 161 | } 162 | 163 | #[test] 164 | fn try_from_raw_string_works() { 165 | const TEST: &str = include_str!("../../demo/src/lib.rs"); 166 | let fn_with_doc = TEST 167 | .lines() 168 | .skip(18) 169 | .take(4) 170 | .fold(String::new(), |acc, line| acc + line); 171 | 172 | const EXPECTED: &[Span] = &[ 173 | Span { 174 | start: LineColumn { 175 | line: 1, 176 | column: 11, 177 | }, 178 | end: LineColumn { 179 | line: 1, 180 | column: 14, 181 | }, 182 | }, 183 | Span { 184 | start: LineColumn { 185 | line: 1, 186 | column: 16, 187 | }, 188 | end: LineColumn { 189 | line: 1, 190 | column: 18, 191 | }, 192 | }, 193 | Span { 194 | start: LineColumn { 195 | line: 1, 196 | column: 20, 197 | }, 198 | end: LineColumn { 199 | line: 1, 200 | column: 21, 201 | }, 202 | }, 203 | Span { 204 | start: LineColumn { 205 | line: 1, 206 | column: 23, 207 | }, 208 | end: LineColumn { 209 | line: 1, 210 | column: 26, 211 | }, 212 | }, 213 | Span { 214 | start: LineColumn { 215 | line: 1, 216 | column: 27, 217 | }, 218 | end: LineColumn { 219 | line: 1, 220 | column: 27, 221 | }, 222 | }, 223 | ]; 224 | 225 | crate::checker::tests::extraction_test_body(dbg!(fn_with_doc.as_str()), EXPECTED); 226 | } 227 | } 228 | -------------------------------------------------------------------------------- /doc-chunks/src/cluster.rs: -------------------------------------------------------------------------------- 1 | //! Cluster `proc_macro2::Literal`s into `LiteralSets` 2 | 3 | use syn::spanned::Spanned; 4 | use syn::LitStr; 5 | use syn::Macro; 6 | use syn::Token; 7 | 8 | use super::{LiteralSet, TokenTree, TrimmedLiteral}; 9 | use crate::developer::extract_developer_comments; 10 | 11 | use crate::errors::*; 12 | use crate::Span; 13 | 14 | mod kw { 15 | syn::custom_keyword!(doc); 16 | } 17 | 18 | enum DocContent { 19 | LitStr(LitStr), 20 | Macro(Macro), 21 | } 22 | impl DocContent { 23 | fn span(&self) -> proc_macro2::Span { 24 | match self { 25 | Self::LitStr(inner) => inner.span(), 26 | Self::Macro(inner) => inner.span(), 27 | } 28 | } 29 | } 30 | 31 | struct DocComment { 32 | #[allow(dead_code)] 33 | doc: kw::doc, 34 | #[allow(dead_code)] 35 | eq_token: Token![=], 36 | content: DocContent, 37 | } 38 | 39 | impl syn::parse::Parse for DocComment { 40 | fn parse(input: syn::parse::ParseStream<'_>) -> syn::Result { 41 | let doc = input.parse::()?; 42 | let eq_token: Token![=] = input.parse()?; 43 | 44 | let lookahead = input.lookahead1(); 45 | let content = if lookahead.peek(LitStr) { 46 | input.parse().map(DocContent::LitStr)? 47 | } else { 48 | input.parse().map(DocContent::Macro)? 49 | }; 50 | Ok(Self { 51 | doc, 52 | eq_token, 53 | content, 54 | }) 55 | } 56 | } 57 | 58 | /// Cluster comments together, such they appear as continuous text blocks. 59 | #[derive(Debug)] 60 | pub struct Clusters { 61 | pub(crate) set: Vec, 62 | } 63 | 64 | impl Clusters { 65 | /// Only works if the file is processed line by line, otherwise requires a 66 | /// adjacency list. 67 | fn process_literal(&mut self, source: &str, comment: DocComment) -> Result<()> { 68 | let span = Span::from(comment.content.span()); 69 | let trimmed_literal = match comment.content { 70 | DocContent::LitStr(_s) => TrimmedLiteral::load_from(source, span)?, 71 | DocContent::Macro(_) => { 72 | TrimmedLiteral::new_empty(source, span, crate::CommentVariant::MacroDocEqMacro) 73 | } 74 | }; 75 | if let Some(cls) = self.set.last_mut() { 76 | if let Err(trimmed_literal) = cls.add_adjacent(trimmed_literal) { 77 | log::trace!(target: "documentation", 78 | "appending, but failed to append: {trimmed_literal:?} to set {cls:?}", 79 | ); 80 | self.set.push(LiteralSet::from(trimmed_literal)) 81 | } else { 82 | log::trace!("successfully appended to existing: {cls:?} to set"); 83 | } 84 | } else { 85 | self.set.push(LiteralSet::from(trimmed_literal)); 86 | } 87 | Ok(()) 88 | } 89 | 90 | /// Helper function to parse a stream and associate the found literals. 91 | pub fn parse_token_tree( 92 | &mut self, 93 | source: &str, 94 | stream: proc_macro2::TokenStream, 95 | ) -> Result<()> { 96 | let iter = stream.into_iter(); 97 | for tree in iter { 98 | if let TokenTree::Group(group) = tree { 99 | if let Ok(comment) = syn::parse2::(group.stream()) { 100 | if let Err(e) = self.process_literal(source, comment) { 101 | log::error!("BUG: Failed to guarantee literal content/span integrity: {e}"); 102 | continue; 103 | } 104 | } else { 105 | self.parse_token_tree(source, group.stream())?; 106 | } 107 | }; 108 | } 109 | Ok(()) 110 | } 111 | 112 | /// From the given source text, extracts developer comments to `LiteralSet`s 113 | /// and adds them to this `Clusters` 114 | fn parse_developer_comments(&mut self, source: &str) { 115 | let developer_comments = extract_developer_comments(source); 116 | self.set.extend(developer_comments); 117 | } 118 | 119 | /// Sort the `LiteralSet`s in this `Cluster` by start line descending, to 120 | /// ensure that the comments higher up in the source file appear first to 121 | /// the user 122 | fn ensure_sorted(&mut self) { 123 | self.set.sort_by(|ls1, ls2| ls1.coverage.cmp(&ls2.coverage)); 124 | } 125 | 126 | /// Load clusters from a `&str`. Optionally loads developer comments as 127 | /// well. 128 | pub fn load_from_str(source: &str, doc_comments: bool, dev_comments: bool) -> Result { 129 | let mut chunk = Self { 130 | set: Vec::with_capacity(64), 131 | }; 132 | if doc_comments { 133 | let stream = 134 | syn::parse_str::(source).map_err(Error::ParserFailure)?; 135 | chunk.parse_token_tree(source, stream)?; 136 | } 137 | if dev_comments { 138 | chunk.parse_developer_comments(source); 139 | } 140 | chunk.ensure_sorted(); 141 | Ok(chunk) 142 | } 143 | } 144 | 145 | #[cfg(test)] 146 | mod tests { 147 | use super::*; 148 | 149 | #[test] 150 | fn doc_comment_parse() { 151 | let _ = syn::parse_str::(r########"doc=foo!(bar!(xxx))"########).unwrap(); 152 | let _ = syn::parse_str::(r########"doc="s""########).unwrap(); 153 | let _ = syn::parse_str::(r########"doc=r#"s"#"########).unwrap(); 154 | let _ = syn::parse_str::(r########"doc=r##"s"##"########).unwrap(); 155 | let _ = syn::parse_str::(r########"doc=r###"s"###"########).unwrap(); 156 | let _ = syn::parse_str::(r########"doc=r####"s"####"########).unwrap(); 157 | } 158 | 159 | #[test] 160 | fn create_cluster() { 161 | static CONTENT: &str = r#####" 162 | mod mm_mm { 163 | 164 | /// A 165 | #[doc=foo!(B)] 166 | /// C 167 | #[doc=r##"D"##] 168 | struct X; 169 | 170 | } 171 | "#####; 172 | let clusters = Clusters::load_from_str(CONTENT, true, true).unwrap(); 173 | assert_eq!(clusters.set.len(), 1); 174 | dbg!(&clusters.set[0]); 175 | } 176 | 177 | #[test] 178 | fn space_in_code_block_does_not_break_cluster() { 179 | static CONTENT: &str = r#####" 180 | // ```c 181 | // hugloboi 182 | // 183 | // fucksteufelswuid 184 | // ``` 185 | struct DefinitelyNotZ; 186 | "#####; 187 | let clusters = Clusters::load_from_str(CONTENT, true, true).unwrap(); 188 | assert_eq!(clusters.set.len(), 1); 189 | dbg!(&clusters.set[0]); 190 | } 191 | 192 | #[test] 193 | fn polite() { 194 | static CONTENT: &str = r#####" 195 | // Hello Sir 196 | // 197 | // How are you doing today? 198 | struct VeryWellThanks; 199 | "#####; 200 | let clusters = Clusters::load_from_str(CONTENT, true, true).unwrap(); 201 | assert_eq!(clusters.set.len(), 1); 202 | dbg!(&clusters.set[0]); 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /src/config/hunspell.rs: -------------------------------------------------------------------------------- 1 | //! Hunspell checker configuration. 2 | 3 | use super::{Lang5, SearchDirs, WrappedRegex}; 4 | use std::path::{Path, PathBuf}; 5 | 6 | use crate::errors::*; 7 | 8 | use serde::{Deserialize, Serialize}; 9 | 10 | const fn yes() -> bool { 11 | true 12 | } 13 | 14 | #[derive(Deserialize, Serialize, Debug, Clone)] 15 | pub struct Quirks { 16 | /// A regular expression, whose capture groups will be checked, instead of 17 | /// the initial token. Only the first one that matches will be used to split 18 | /// the word. 19 | #[serde(default)] 20 | pub transform_regex: Vec, 21 | /// Allow concatenated words instead of dashed connection. Note that this 22 | /// only applies, if one of the suggested replacements has an item that is 23 | /// equivalent except for addition dashes (`-`). 24 | #[serde(default)] 25 | pub allow_concatenation: bool, 26 | /// The counterpart of `allow_concatenation`. Accepts words which have 27 | /// replacement suggestions that contain additional dashes. 28 | #[serde(default)] 29 | pub allow_dashes: bool, 30 | /// Treats sequences of emojis as OK. 31 | #[serde(default = "yes")] 32 | pub allow_emojis: bool, 33 | /// Check the expressions in the footnote references. By default this is 34 | /// turned on to remain backwards compatible but disabling it could be 35 | /// particularly useful when one uses abbreviations instead of numbers as 36 | /// footnote references. For instance by default the fragment `hello[^xyz]` 37 | /// would be spellchecked as `helloxyz` which is obviously a misspelled 38 | /// word, but by turning this check off, it will skip validating the 39 | /// reference altogether and will only check the word `hello`. 40 | #[serde(default = "yes")] 41 | pub check_footnote_references: bool, 42 | } 43 | 44 | impl Default for Quirks { 45 | fn default() -> Self { 46 | Self { 47 | transform_regex: Vec::new(), 48 | allow_concatenation: false, 49 | allow_dashes: false, 50 | allow_emojis: true, 51 | check_footnote_references: true, 52 | } 53 | } 54 | } 55 | 56 | impl Quirks { 57 | pub(crate) const fn allow_concatenated(&self) -> bool { 58 | self.allow_concatenation 59 | } 60 | 61 | pub(crate) const fn allow_dashed(&self) -> bool { 62 | self.allow_dashes 63 | } 64 | 65 | pub(crate) const fn allow_emojis(&self) -> bool { 66 | self.allow_emojis 67 | } 68 | 69 | pub(crate) fn transform_regex(&self) -> &[WrappedRegex] { 70 | &self.transform_regex 71 | } 72 | 73 | pub(crate) fn check_footnote_references(&self) -> bool { 74 | self.check_footnote_references 75 | } 76 | } 77 | 78 | fn default_tokenization_splitchars() -> String { 79 | "\",;:.!?#(){}[]|/_-‒'`&@§¶…".to_owned() 80 | } 81 | 82 | pub type ZetConfig = HunspellConfig; 83 | pub type SpellbookConfig = HunspellConfig; 84 | 85 | #[derive(Deserialize, Serialize, Debug, Clone)] 86 | #[serde(deny_unknown_fields)] 87 | pub struct HunspellConfig { 88 | /// The language we want to check against, used as the dictionary and 89 | /// affixes file name. 90 | #[serde(default)] 91 | pub lang: Lang5, 92 | /// Additional search directories for `.dic` and `.aff` files. 93 | // must be option so it can be omitted in the config 94 | #[serde(default)] 95 | pub search_dirs: SearchDirs, 96 | 97 | /// Avoid the OS provided dictionaries and only use the builtin ones, 98 | /// besides those defined in `extra_dictionaries`. 99 | #[serde(default)] 100 | pub skip_os_lookups: bool, 101 | 102 | /// Use the builtin dictionaries as last resort. Usually combined with 103 | /// `skip_os_lookups=true` to enforce the `builtin` usage. Does not prevent 104 | /// the usage of `extra_dictionaries`. 105 | #[serde(default)] 106 | pub use_builtin: bool, 107 | 108 | #[serde(default = "default_tokenization_splitchars")] 109 | pub tokenization_splitchars: String, 110 | 111 | /// Additional dictionaries for topic specific lingo. 112 | #[serde(default)] 113 | pub extra_dictionaries: Vec, 114 | /// Additional quirks besides dictionary lookups. 115 | #[serde(default)] 116 | pub quirks: Quirks, 117 | } 118 | 119 | impl Default for HunspellConfig { 120 | fn default() -> Self { 121 | Self { 122 | lang: Lang5::en_US, 123 | search_dirs: SearchDirs::default(), 124 | extra_dictionaries: Vec::default(), 125 | quirks: Quirks::default(), 126 | tokenization_splitchars: default_tokenization_splitchars(), 127 | skip_os_lookups: false, 128 | use_builtin: true, 129 | } 130 | } 131 | } 132 | 133 | impl HunspellConfig { 134 | pub fn lang(&self) -> Lang5 { 135 | self.lang 136 | } 137 | 138 | pub fn search_dirs(&self) -> impl Iterator { 139 | self.search_dirs.iter(!self.skip_os_lookups) 140 | } 141 | 142 | pub fn extra_dictionaries(&self) -> impl Iterator { 143 | self.extra_dictionaries.iter() 144 | } 145 | 146 | pub fn sanitize_paths(&mut self, base: &Path) -> Result<()> { 147 | self.search_dirs = self 148 | .search_dirs 149 | .iter(!self.skip_os_lookups) 150 | .filter_map(|search_dir| { 151 | let abspath = if !search_dir.is_absolute() { 152 | base.join(search_dir) 153 | } else { 154 | search_dir.to_owned() 155 | }; 156 | 157 | abspath.canonicalize().ok().inspect(|abspath| { 158 | log::trace!( 159 | "Sanitized ({} + {}) -> {}", 160 | base.display(), 161 | search_dir.display(), 162 | abspath.display() 163 | ); 164 | }) 165 | }) 166 | .collect::>() 167 | .into(); 168 | 169 | // convert all extra dictionaries to absolute paths 170 | 171 | 'o: for extra_dic in self.extra_dictionaries.iter_mut() { 172 | for search_dir in 173 | self.search_dirs 174 | .iter(!self.skip_os_lookups) 175 | .filter_map(|search_dir| { 176 | if !extra_dic.is_absolute() { 177 | base.join(search_dir).canonicalize().ok() 178 | } else { 179 | Some(search_dir.to_owned()) 180 | } 181 | }) 182 | { 183 | let abspath = if !extra_dic.is_absolute() { 184 | search_dir.join(&extra_dic) 185 | } else { 186 | continue 'o; 187 | }; 188 | if let Ok(abspath) = abspath.canonicalize() { 189 | if abspath.is_file() { 190 | *extra_dic = abspath; 191 | continue 'o; 192 | } 193 | } else { 194 | log::debug!("Failed to canonicalize {}", abspath.display()); 195 | } 196 | } 197 | bail!( 198 | "Could not find extra dictionary {} in any of the search paths", 199 | extra_dic.display() 200 | ); 201 | } 202 | 203 | Ok(()) 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /src/traverse/iter.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use crate::Documentation; 3 | 4 | use fs_err as fs; 5 | 6 | use std::path::{Path, PathBuf}; 7 | 8 | /// An iterator traversing module hierarchies yielding paths 9 | #[derive(Debug, Clone)] 10 | pub struct TraverseModulesIter { 11 | /// state for enqueuing child files and the depth at which they are found 12 | queue: VecDeque<(PathBuf, usize)>, 13 | /// zero limits to the provided path, if it is a directory, all children are 14 | /// collected 15 | max_depth: usize, 16 | } 17 | 18 | impl Default for TraverseModulesIter { 19 | fn default() -> Self { 20 | Self { 21 | max_depth: usize::MAX, 22 | queue: VecDeque::with_capacity(128), 23 | } 24 | } 25 | } 26 | 27 | impl TraverseModulesIter { 28 | fn add_initial_path

(&mut self, path: P, level: usize) -> Result<()> 29 | where 30 | P: AsRef, 31 | { 32 | let path = path.as_ref(); 33 | let path = fs::canonicalize(path)?; 34 | let meta = fs::metadata(&path)?; 35 | if meta.is_file() { 36 | self.queue.push_front((path, level)); 37 | } else if meta.is_dir() { 38 | ignore::WalkBuilder::new(path) 39 | .git_ignore(true) 40 | .max_depth(1.into()) 41 | .same_file_system(true) 42 | .skip_stdout(true) 43 | .build() 44 | .filter_map(|entry| { 45 | entry 46 | .ok() 47 | .filter(|entry| entry.file_type().map(|ft| ft.is_file()).unwrap_or(false)) 48 | .map(|x| x.path().to_owned()) 49 | }) 50 | .filter(|path: &PathBuf| { 51 | path.to_str() 52 | .map(|x| x.to_owned()) 53 | .filter(|path| path.ends_with(".rs")) 54 | .is_some() 55 | }) 56 | .try_for_each::<_, Result<()>>(|path| { 57 | log::trace!("🌱 using path {} as seed recursion dir", path.display()); 58 | self.queue.push_front((path, level)); 59 | Ok(()) 60 | })?; 61 | } 62 | Ok(()) 63 | } 64 | 65 | #[allow(unused)] 66 | pub fn with_multi(entries: I) -> Result 67 | where 68 | P: AsRef, 69 | J: Iterator, 70 | I: IntoIterator, 71 | { 72 | let mut me = Self::default(); 73 | for path in entries.into_iter() { 74 | me.add_initial_path(path, 0)?; 75 | } 76 | Ok(me) 77 | } 78 | 79 | pub fn with_depth_limit>(path: P, max_depth: usize) -> Result { 80 | let mut me = Self { 81 | max_depth, 82 | ..Default::default() 83 | }; 84 | me.add_initial_path(path, 0)?; 85 | Ok(me) 86 | } 87 | 88 | /// Create a new path with (almost) infinite depth bounds 89 | #[allow(unused)] 90 | pub fn new>(path: P) -> Result { 91 | Self::with_depth_limit(path, usize::MAX) 92 | } 93 | 94 | pub fn collect_modules(&mut self, path: &Path, level: usize) -> Result<()> { 95 | if path.is_file() { 96 | log::trace!("🥞 collecting mods declared in file {}", path.display()); 97 | self.queue.extend( 98 | extract_modules_from_file(path)? 99 | .into_iter() 100 | .map(|item| (item, level)), 101 | ); 102 | } else { 103 | log::warn!("🥞 Only dealing with files, dropping {}", path.display()); 104 | } 105 | Ok(()) 106 | } 107 | } 108 | 109 | impl Iterator for TraverseModulesIter { 110 | type Item = PathBuf; 111 | fn next(&mut self) -> Option { 112 | if let Some((path, level)) = self.queue.pop_front() { 113 | if level < self.max_depth { 114 | // ignore the error here, there is nothing we can do really 115 | // TODO potentially consider returning a result covering this 116 | let _ = self.collect_modules(path.as_path(), level + 1); 117 | } 118 | Some(path) 119 | } else { 120 | None 121 | } 122 | } 123 | } 124 | 125 | /// traverse path with a depth limit, if the path is a directory all its 126 | /// children will be collected instead 127 | // TODO should not read the documentation, that is out of scope. 128 | // TODO should not have knowledge of `dev_comments`. 129 | pub(crate) fn traverse( 130 | path: &Path, 131 | doc_comments: bool, 132 | dev_comments: bool, 133 | ) -> Result> { 134 | traverse_with_depth_limit(path, usize::MAX, doc_comments, dev_comments) 135 | } 136 | 137 | /// traverse path with a depth limit, if the path is a directory all its 138 | /// children will be collected as depth 0 instead 139 | pub(crate) fn traverse_with_depth_limit( 140 | path: &Path, 141 | max_depth: usize, 142 | doc_comments: bool, 143 | dev_comments: bool, 144 | ) -> Result> { 145 | let it = TraverseModulesIter::with_depth_limit(path, max_depth)? 146 | .filter_map(move |path: PathBuf| -> Option { 147 | fs::read_to_string(&path).ok().map(|content| { 148 | Documentation::load_from_str( 149 | ContentOrigin::RustSourceFile(path), 150 | content.as_str(), 151 | doc_comments, 152 | dev_comments, 153 | ) 154 | }) 155 | }) 156 | .filter(|documentation| !documentation.is_empty()); 157 | Ok(it) 158 | } 159 | 160 | #[cfg(test)] 161 | mod tests { 162 | use super::*; 163 | 164 | fn demo_dir() -> PathBuf { 165 | manifest_dir().join("demo") 166 | } 167 | 168 | #[test] 169 | fn traverse_main_rs() { 170 | let _ = env_logger::builder() 171 | .filter_level(log::LevelFilter::Trace) 172 | .is_test(true) 173 | .try_init(); 174 | let manifest_path = demo_dir().join("src/main.rs"); 175 | 176 | let expect = indexmap::indexset! { 177 | "src/main.rs", 178 | "src/lib.rs", 179 | "src/nested/mod.rs", 180 | "src/nested/justone.rs", 181 | "src/nested/justtwo.rs", 182 | "src/nested/again/mod.rs", 183 | "src/nested/again/code.rs", 184 | "src/nested/fragments.rs", 185 | "src/nested/fragments/enumerate.rs", 186 | "src/nested/fragments/simple.rs", 187 | } 188 | .into_iter() 189 | .map(|sub| demo_dir().join(sub)) 190 | .collect::>(); 191 | 192 | let found = TraverseModulesIter::new(manifest_path.as_path()) 193 | .expect("Must succeed to traverse file tree.") 194 | .into_iter() 195 | .collect::>(); 196 | 197 | let unexpected_files: Vec<_> = dbg!(&found) 198 | .iter() 199 | .filter(|found_path| !expect.contains(*found_path)) 200 | .collect(); 201 | assert_eq!(Vec::<&PathBuf>::new(), unexpected_files); 202 | 203 | let missing_files: Vec<_> = expect 204 | .iter() 205 | .filter(|expected_path| !found.contains(expected_path)) 206 | .collect(); 207 | assert_eq!(Vec::<&PathBuf>::new(), missing_files); 208 | 209 | assert_eq!(found.len(), expect.len()); 210 | } 211 | } 212 | -------------------------------------------------------------------------------- /src/checker/quirks.rs: -------------------------------------------------------------------------------- 1 | //! A set of quirks, not necessarily specific to a checker 2 | 3 | use crate::Range; 4 | use fancy_regex::Regex; 5 | 6 | /// Returns `true` iff the replacements contains a variant of `word` without 7 | /// dashes. 8 | pub(crate) fn replacements_contain_dashless>(word: &str, replacements: &[T]) -> bool { 9 | let dashless = word.chars().filter(|c| *c != '-').collect::(); 10 | // if the word does not contain any dashes, skip the replacement iterations 11 | if dashless == word { 12 | return false; 13 | } 14 | replacements 15 | .iter() 16 | .map(|s| s.as_ref()) 17 | .any(|x| x == &dashless) 18 | } 19 | 20 | /// Returns `true` iff the replacements contains a variant of `word` with 21 | /// additional dashes. 22 | pub(crate) fn replacements_contain_dashed>(word: &str, replacements: &[T]) -> bool { 23 | // before doing lots of work, check if the word itself contains a dash, if so 24 | // the below logic cannot yield and positive results 25 | if word.chars().any(|c| c == '-') { 26 | return false; 27 | } 28 | 29 | replacements 30 | .iter() 31 | .map(|s| s.as_ref()) 32 | // avoid lots of string iterations in find 33 | .filter(|s| s.as_bytes().first() == word.as_bytes().first()) 34 | .any(|s| itertools::equal(s.chars().filter(|c| *c != '-'), word.chars())) 35 | } 36 | 37 | /// Transformed word with information on the transformation outcome. 38 | #[derive(Debug, Eq, PartialEq)] 39 | pub(crate) enum Transformed<'i> { 40 | /// A allow-listed chunk 41 | Whitelisted((Range, &'i str)), 42 | /// A set of word-fragments to be checked. 43 | Fragments(Vec<(Range, &'i str)>), 44 | /// A word to be checked. Equiv to no match. 45 | Atomic((Range, &'i str)), 46 | } 47 | 48 | /// Transforms a word into a set of fragment-ranges and associated str slices. 49 | pub(crate) fn transform<'i, R: AsRef>( 50 | transform_regex: &[R], 51 | word: &'i str, 52 | range: Range, 53 | ) -> Transformed<'i> { 54 | let mut q = std::collections::VecDeque::<(Range, &'_ str)>::with_capacity(32); 55 | let mut words = Vec::with_capacity(16); 56 | let mut whitelisted = 0usize; 57 | q.push_back((range.clone(), word)); 58 | while let Some((range, word)) = q.pop_front() { 59 | // work on a fragment now 60 | match transform_inner(transform_regex, word, range.clone()) { 61 | // we try to recursively match the fragments with the regex expr until they become atomic words or whitelisted 62 | Transformed::Fragments(v) => q.extend(v), 63 | Transformed::Atomic(word) => words.push(word), 64 | Transformed::Whitelisted(_) => whitelisted += 1, 65 | } 66 | } 67 | 68 | // no match found at all, this word is "atomic" and will be checked as is 69 | if whitelisted == 0usize { 70 | // empty means nothing, one word with the same range means we only found the initial provided word 71 | if words.is_empty() || (words.len() == 1 && words[0].0.len() == word.len()) { 72 | return Transformed::Atomic((range, word)); 73 | } 74 | } 75 | 76 | if !words.is_empty() { 77 | // collect all the words as fragments again (they actually really are) 78 | Transformed::Fragments(words) 79 | } else { 80 | // if there are no words to be checked, everything is whitelisted 81 | Transformed::Whitelisted((range, word)) 82 | } 83 | } 84 | 85 | /// Inner loop transform 86 | /// 87 | /// Returns `Some(vec![..])` if any captures were found. 88 | fn transform_inner<'i, R: AsRef>( 89 | transform_regex: &[R], 90 | word: &'i str, 91 | range: Range, 92 | ) -> Transformed<'i> { 93 | for regex in transform_regex.iter().map(AsRef::as_ref) { 94 | match regex.captures(word) { 95 | Ok(Some(captures)) => { 96 | // first one is always the full match 97 | if captures.len() == 1 { 98 | // means match, but no captures, 99 | // which is equiv to an implicit whitelist 100 | return Transformed::Whitelisted((range, word)); 101 | } 102 | let intermediate = captures 103 | .iter() 104 | .skip(1) 105 | .flatten() 106 | .map(|m| { 107 | let intra_word_range = m.start()..m.end(); 108 | log::trace!(target:"quirks", 109 | "Found capture for word >{}<, with match >{}< and capture >{}< at {:?}", 110 | captures.get(0).unwrap().as_str(), 111 | word, 112 | m.as_str(), 113 | &intra_word_range 114 | ); 115 | let offset = word 116 | .char_indices() 117 | .take_while(|(byte_pos, _)| m.start() > *byte_pos) 118 | .count(); 119 | let range = Range { 120 | start: range.start + offset, 121 | end: range.start + offset + m.as_str().chars().count(), 122 | }; 123 | (range, &word[intra_word_range]) 124 | }) 125 | .collect::>(); 126 | 127 | return Transformed::Fragments(intermediate); 128 | } 129 | Ok(None) => { 130 | // no regex match, try the next regex 131 | continue; 132 | } 133 | Err(e) => { 134 | log::warn!(target:"quirks", "Matching regex >{}< errored: {}", regex.as_str(), e); 135 | break; 136 | } 137 | } 138 | } 139 | // nothing matched, check the entire word instead 140 | Transformed::Atomic((range, word)) 141 | } 142 | 143 | #[cfg(test)] 144 | mod tests { 145 | use super::*; 146 | use crate::config::WrappedRegex; 147 | use env_logger; 148 | 149 | #[test] 150 | fn dashed() { 151 | let _ = env_logger::builder() 152 | .is_test(true) 153 | .filter(None, log::LevelFilter::Trace) 154 | .try_init(); 155 | 156 | const REPLACEMENTS: &'static [&'static str] = &["fffff", "qqq", "z", "zeta-ray"]; 157 | const WORD: &str = "zetaray"; 158 | assert!(replacements_contain_dashed(WORD, REPLACEMENTS)); 159 | } 160 | 161 | #[test] 162 | fn dashless() { 163 | let _ = env_logger::builder() 164 | .is_test(true) 165 | .filter(None, log::LevelFilter::Trace) 166 | .try_init(); 167 | 168 | const WORD: &str = "zeta-ray"; 169 | const REPLACEMENTS: &'static [&'static str] = &["fffff", "qqq", "z", "zetaray"]; 170 | assert!(replacements_contain_dashless(WORD, REPLACEMENTS)); 171 | } 172 | 173 | #[test] 174 | fn transformer() { 175 | let _ = env_logger::builder() 176 | .is_test(true) 177 | .filter(None, log::LevelFilter::Trace) 178 | .try_init(); 179 | 180 | let re = vec![ 181 | WrappedRegex::from(Regex::new("^[0-9]+x$").unwrap()), //whitelist 182 | WrappedRegex::from(Regex::new(r#"^'([^\s]+)'$"#).unwrap()), 183 | WrappedRegex::from(Regex::new("(Alpha)(beta)").unwrap()), 184 | ]; 185 | 186 | let words = ["2x", r#"''so-to-speak''"#, "Alphabeta", "Nothing"]; 187 | 188 | // whitelist 189 | assert_eq!( 190 | transform(re.as_slice(), words[0], 10..24), 191 | Transformed::Whitelisted((10..24, words[0])) 192 | ); 193 | 194 | // single quoted, recursive 2x 195 | assert_eq!( 196 | transform(re.as_slice(), words[1], 10..25), 197 | Transformed::Fragments(vec![(12..23, &words[1][2..13])]) 198 | ); 199 | 200 | // multi capture 201 | assert_eq!( 202 | transform(re.as_slice(), words[2], 10..19), 203 | Transformed::Fragments(vec![(10..15, &words[2][0..5]), (15..19, &words[2][5..9]),]) 204 | ); 205 | 206 | // no match 207 | assert_eq!( 208 | transform(re.as_slice(), words[3], 10..17), 209 | Transformed::Atomic((10..17, words[3])) 210 | ); 211 | } 212 | } 213 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | -------------------------------------------------------------------------------- /doc-chunks/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! # Doc Chunks 2 | //! 3 | //! `Documentation` is a representation of one or multiple documents. 4 | //! 5 | //! A `literal` is a token provided by `proc_macro2` or `ra_ap_syntax` crate, which is then converted by 6 | //! means of `TrimmedLiteral` using `Cluster`ing into a `CheckableChunk` (mostly 7 | //! named just `chunk`). 8 | //! 9 | //! `CheckableChunk`s can consist of multiple fragments, where each fragment can 10 | //! span multiple lines, yet each fragment is covering a consecutive `Span` in 11 | //! the origin content. Each fragment also has a direct mapping to the 12 | //! `CheckableChunk` internal string representation. 13 | //! 14 | //! And `Documentation` holds one or many `CheckableChunks` per file path. 15 | 16 | #![deny(unused_crate_dependencies)] 17 | 18 | // contains test helpers 19 | pub mod span; 20 | pub mod testcase; 21 | pub use self::span::Span; 22 | pub use proc_macro2::LineColumn; 23 | 24 | pub mod util; 25 | use self::util::{load_span_from, sub_char_range}; 26 | 27 | use indexmap::IndexMap; 28 | use proc_macro2::TokenTree; 29 | use rayon::prelude::*; 30 | use serde::Deserialize; 31 | use std::path::PathBuf; 32 | use toml::Spanned; 33 | 34 | /// Range based on `usize`, simplification. 35 | pub type Range = core::ops::Range; 36 | 37 | /// Apply an offset to `start` and `end` members, equaling a shift of the range. 38 | pub fn apply_offset(range: &mut Range, offset: usize) { 39 | range.start = range.start.saturating_add(offset); 40 | range.end = range.end.saturating_add(offset); 41 | } 42 | 43 | pub mod chunk; 44 | pub mod cluster; 45 | mod developer; 46 | pub mod errors; 47 | pub mod literal; 48 | pub mod literalset; 49 | pub mod markdown; 50 | 51 | pub use chunk::*; 52 | pub use cluster::*; 53 | pub use errors::*; 54 | pub use literal::*; 55 | pub use literalset::*; 56 | pub use markdown::*; 57 | 58 | /// Collection of all the documentation entries across the project 59 | #[derive(Debug, Clone)] 60 | pub struct Documentation { 61 | /// Mapping of a path to documentation literals 62 | index: IndexMap>, 63 | } 64 | 65 | impl Default for Documentation { 66 | fn default() -> Self { 67 | Self::new() 68 | } 69 | } 70 | 71 | impl Documentation { 72 | /// Create a new and empty doc. 73 | pub fn new() -> Self { 74 | Self { 75 | index: IndexMap::with_capacity(64), 76 | } 77 | } 78 | 79 | /// Check if a particular key is contained. 80 | pub fn contains_key(&self, key: &ContentOrigin) -> bool { 81 | self.index.contains_key(key) 82 | } 83 | 84 | /// Check if the document contains any checkable items. 85 | #[inline(always)] 86 | pub fn is_empty(&self) -> bool { 87 | self.index.is_empty() 88 | } 89 | 90 | /// Borrowing iterator across content origins and associated sets of chunks. 91 | #[inline(always)] 92 | pub fn iter(&self) -> impl Iterator)> { 93 | self.index.iter() 94 | } 95 | 96 | /// Borrowing iterator across content origins and associated sets of chunks. 97 | pub fn par_iter(&self) -> impl ParallelIterator)> { 98 | self.index.par_iter() 99 | } 100 | 101 | /// Consuming iterator across content origins and associated sets of chunks. 102 | pub fn into_par_iter( 103 | self, 104 | ) -> impl ParallelIterator)> { 105 | self.index.into_par_iter() 106 | } 107 | 108 | /// Extend `self` by joining in other `Documentation`s. 109 | pub fn extend(&mut self, other: I) 110 | where 111 | I: IntoIterator), IntoIter = J>, 112 | J: Iterator)>, 113 | { 114 | other 115 | .into_iter() 116 | .for_each(|(origin, chunks): (_, Vec)| { 117 | self.add_inner(origin, chunks); 118 | }); 119 | } 120 | 121 | /// Adds a set of `CheckableChunk`s to the documentation to be checked. 122 | pub fn add_inner(&mut self, origin: ContentOrigin, mut chunks: Vec) { 123 | self.index 124 | .entry(origin) 125 | .and_modify(|acc: &mut Vec| { 126 | acc.append(&mut chunks); 127 | }) 128 | .or_insert_with(|| chunks); 129 | // Ok(()) TODO make this failable 130 | } 131 | 132 | /// Adds a rust content str to the documentation. 133 | pub fn add_rust( 134 | &mut self, 135 | origin: ContentOrigin, 136 | content: &str, 137 | doc_comments: bool, 138 | dev_comments: bool, 139 | ) -> Result<()> { 140 | let cluster = Clusters::load_from_str(content, doc_comments, dev_comments)?; 141 | 142 | let chunks = Vec::::from(cluster); 143 | self.add_inner(origin, chunks); 144 | Ok(()) 145 | } 146 | 147 | /// Adds a content string to the documentation sourced from the 148 | /// `description` field in a `Cargo.toml` manifest. 149 | pub fn add_cargo_manifest_description( 150 | &mut self, 151 | path: PathBuf, 152 | manifest_content: &str, 153 | ) -> Result<()> { 154 | fn extract_range_of_description(manifest_content: &str) -> Result { 155 | #[derive(Deserialize, Debug)] 156 | struct Manifest { 157 | package: Spanned, 158 | } 159 | 160 | #[derive(Deserialize, Debug)] 161 | struct Package { 162 | description: Spanned, 163 | } 164 | 165 | let value: Manifest = toml::from_str(manifest_content)?; 166 | let d = value.package.into_inner().description; 167 | let range = d.span(); 168 | Ok(range) 169 | } 170 | 171 | let mut range = extract_range_of_description(manifest_content)?; 172 | let description = sub_char_range(manifest_content, range.clone()); 173 | 174 | // Attention: `description` does include `\"\"\"` as well as `\\\n`, the latter is not a big issue, 175 | // but the trailing start and end delimiters are. 176 | // TODO: split into multiple on `\\\n` and create multiple range/span mappings. 177 | let description = if range.len() > 6 { 178 | if description.starts_with("\"\"\"") { 179 | range.start += 3; 180 | range.end -= 3; 181 | assert!(!range.is_empty()); 182 | } 183 | dbg!(&description[3..(description.len()) - 3]) 184 | } else { 185 | description 186 | }; 187 | 188 | fn convert_range_to_span(content: &str, range: Range) -> Option { 189 | let mut line = 0_usize; 190 | let mut column = 0_usize; 191 | let mut prev = '\n'; 192 | let mut start = None; 193 | for (offset, c) in content.chars().enumerate() { 194 | if prev == '\n' { 195 | column = 0; 196 | line += 1; 197 | } 198 | prev = c; 199 | 200 | if offset == range.start { 201 | start = Some(LineColumn { line, column }); 202 | continue; 203 | } 204 | // take care of inclusivity 205 | if offset + 1 == range.end { 206 | let end = LineColumn { line, column }; 207 | return Some(Span { 208 | start: start.unwrap(), 209 | end, 210 | }); 211 | } 212 | column += 1; 213 | } 214 | None 215 | } 216 | 217 | let span = convert_range_to_span(manifest_content, range.clone()).expect( 218 | "Description is part of the manifest since it was parsed from the same source. qed", 219 | ); 220 | let origin = ContentOrigin::CargoManifestDescription(path); 221 | let source_mapping = dbg!(indexmap::indexmap! { 222 | range => span 223 | }); 224 | self.add_inner( 225 | origin, 226 | vec![CheckableChunk::from_str( 227 | description, 228 | source_mapping, 229 | CommentVariant::TomlEntry, 230 | )], 231 | ); 232 | Ok(()) 233 | } 234 | 235 | /// Adds a common mark content str to the documentation. 236 | pub fn add_commonmark(&mut self, origin: ContentOrigin, content: &str) -> Result<()> { 237 | // extract the full content span and range 238 | let start = LineColumn { line: 1, column: 0 }; 239 | let end = content 240 | .lines() 241 | .enumerate() 242 | .last() 243 | .map(|(idx, linecontent)| (idx + 1, linecontent)) 244 | .map(|(linenumber, linecontent)| LineColumn { 245 | line: linenumber, 246 | column: linecontent.chars().count().saturating_sub(1), 247 | }) 248 | .ok_or_else(|| { 249 | Error::Span( 250 | "Common mark / markdown file does not contain a single line".to_string(), 251 | ) 252 | })?; 253 | 254 | let span = Span { start, end }; 255 | let source_mapping = indexmap::indexmap! { 256 | 0..content.chars().count() => span 257 | }; 258 | self.add_inner( 259 | origin, 260 | vec![CheckableChunk::from_str( 261 | content, 262 | source_mapping, 263 | CommentVariant::CommonMark, 264 | )], 265 | ); 266 | Ok(()) 267 | } 268 | 269 | /// Obtain the set of chunks for a particular origin. 270 | #[inline(always)] 271 | pub fn get(&self, origin: &ContentOrigin) -> Option<&[CheckableChunk]> { 272 | self.index.get(origin).map(AsRef::as_ref) 273 | } 274 | 275 | /// Count the number of origins. 276 | #[inline(always)] 277 | pub fn entry_count(&self) -> usize { 278 | self.index.len() 279 | } 280 | 281 | /// Load a document from a single string with a defined origin. 282 | pub fn load_from_str( 283 | origin: ContentOrigin, 284 | content: &str, 285 | doc_comments: bool, 286 | dev_comments: bool, 287 | ) -> Self { 288 | let mut docs = Documentation::new(); 289 | 290 | match origin.clone() { 291 | ContentOrigin::RustDocTest(_path, span) => { 292 | if let Ok(excerpt) = load_span_from(&mut content.as_bytes(), span) { 293 | docs.add_rust(origin.clone(), excerpt.as_str(), doc_comments, dev_comments) 294 | } else { 295 | // TODO 296 | Ok(()) 297 | } 298 | } 299 | origin @ ContentOrigin::RustSourceFile(_) => { 300 | docs.add_rust(origin, content, doc_comments, dev_comments) 301 | } 302 | ContentOrigin::CargoManifestDescription(path) => { 303 | docs.add_cargo_manifest_description(path, content) 304 | } 305 | origin @ ContentOrigin::CommonMarkFile(_) => docs.add_commonmark(origin, content), 306 | origin @ ContentOrigin::TestEntityRust => { 307 | docs.add_rust(origin, content, doc_comments, dev_comments) 308 | } 309 | origin @ ContentOrigin::TestEntityCommonMark => docs.add_commonmark(origin, content), 310 | } 311 | .unwrap_or_else(move |e| { 312 | log::warn!( 313 | "BUG: Failed to load content from {origin} (dev_comments={dev_comments:?}): {e:?}", 314 | ); 315 | }); 316 | docs 317 | } 318 | 319 | pub fn len(&self) -> usize { 320 | self.index.len() 321 | } 322 | } 323 | 324 | impl IntoIterator for Documentation { 325 | type Item = (ContentOrigin, Vec); 326 | type IntoIter = indexmap::map::IntoIter>; 327 | 328 | fn into_iter(self) -> Self::IntoIter { 329 | self.index.into_iter() 330 | } 331 | } 332 | -------------------------------------------------------------------------------- /src/checker/mod.rs: -------------------------------------------------------------------------------- 1 | //! Checker 2 | //! 3 | //! Trait to handle additional trackers. Contains also helpers to avoid 4 | //! re-implementing generic algorithms again and again, i.e. tokenization. 5 | 6 | use crate::{CheckableChunk, Config, ContentOrigin, Detector, Suggestion}; 7 | 8 | use crate::errors::*; 9 | 10 | mod cached; 11 | use self::cached::Cached; 12 | 13 | use std::collections::HashSet; 14 | 15 | mod tokenize; 16 | 17 | #[cfg(feature = "hunspell")] 18 | pub(crate) use self::hunspell::HunspellChecker; 19 | #[cfg(feature = "nlprules")] 20 | pub(crate) use self::nlprules::NlpRulesChecker; 21 | #[cfg(feature = "spellbook")] 22 | pub(crate) use self::spellbook::SpellbookChecker; 23 | pub(crate) use self::tokenize::*; 24 | #[cfg(feature = "zet")] 25 | pub(crate) use self::zspell::ZetChecker; 26 | 27 | #[cfg(feature = "hunspell")] 28 | mod hunspell; 29 | 30 | #[cfg(feature = "zet")] 31 | mod zspell; 32 | 33 | #[cfg(feature = "spellbook")] 34 | mod spellbook; 35 | 36 | #[cfg(feature = "nlprules")] 37 | mod nlprules; 38 | 39 | mod dictaffix; 40 | 41 | #[cfg(any(feature = "spellbook", feature = "zet", feature = "hunspell"))] 42 | mod quirks; 43 | 44 | /// Implementation for a checker 45 | pub trait Checker { 46 | type Config; 47 | 48 | fn detector() -> Detector; 49 | 50 | fn check<'a, 's>( 51 | &self, 52 | origin: &ContentOrigin, 53 | chunks: &'a [CheckableChunk], 54 | ) -> Result>> 55 | where 56 | 'a: 's; 57 | } 58 | 59 | /// Check a full document for violations using the tools we have. 60 | /// 61 | /// Only configured checkers are used. 62 | pub struct Checkers { 63 | hunspell: Option, 64 | #[cfg(feature = "zet")] 65 | zet: Option, 66 | #[cfg(feature = "spellbook")] 67 | spellbook: Option, 68 | nlprules: Option, 69 | } 70 | 71 | impl Checkers { 72 | pub fn new(config: Config) -> Result { 73 | macro_rules! create_checker { 74 | ($feature:literal, $checker:ty, $config:expr, $checker_config:expr) => { 75 | if !cfg!(feature = $feature) { 76 | log::debug!("Feature {} is disabled by compilation.", $feature); 77 | None 78 | } else { 79 | let config = $config; 80 | let detector = <$checker>::detector(); 81 | if config.is_enabled(detector) { 82 | log::debug!("Enabling {} checks.", detector); 83 | Some(<$checker>::new($checker_config.unwrap())?) 84 | } else { 85 | log::debug!("Checker {detector} is disabled by configuration."); 86 | None 87 | } 88 | } 89 | }; 90 | } 91 | 92 | let hunspell = create_checker!( 93 | "hunspell", 94 | HunspellChecker, 95 | &config, 96 | config.hunspell.as_ref() 97 | ); 98 | #[cfg(feature = "zet")] 99 | let zet = create_checker!("zet", ZetChecker, &config, config.zet.as_ref()); 100 | #[cfg(feature = "spellbook")] 101 | let spellbook = create_checker!( 102 | "spellbook", 103 | SpellbookChecker, 104 | &config, 105 | config.spellbook.as_ref() 106 | ); 107 | let nlprules = create_checker!( 108 | "nlprules", 109 | NlpRulesChecker, 110 | &config, 111 | config.nlprules.as_ref() 112 | ); 113 | Ok(Self { 114 | hunspell, 115 | #[cfg(feature = "zet")] 116 | zet, 117 | #[cfg(feature = "spellbook")] 118 | spellbook, 119 | nlprules, 120 | }) 121 | } 122 | } 123 | 124 | impl Checker for Checkers { 125 | type Config = Config; 126 | 127 | fn detector() -> Detector { 128 | unreachable!() 129 | } 130 | 131 | fn check<'a, 's>( 132 | &self, 133 | origin: &ContentOrigin, 134 | chunks: &'a [CheckableChunk], 135 | ) -> Result>> 136 | where 137 | 'a: 's, 138 | { 139 | let mut collective = HashSet::>::new(); 140 | if let Some(ref hunspell) = self.hunspell { 141 | collective.extend(hunspell.check(origin, chunks)?); 142 | } 143 | #[cfg(feature = "zet")] 144 | if let Some(ref zet) = self.zet { 145 | collective.extend(zet.check(origin, chunks)?); 146 | } 147 | #[cfg(feature = "spellbook")] 148 | if let Some(ref spellbook) = self.spellbook { 149 | collective.extend(spellbook.check(origin, chunks)?); 150 | } 151 | if let Some(ref nlprule) = self.nlprules { 152 | collective.extend(nlprule.check(origin, chunks)?); 153 | } 154 | 155 | let mut suggestions: Vec> = Vec::from_iter(collective); 156 | suggestions.sort(); 157 | if suggestions.is_empty() { 158 | return Ok(suggestions); 159 | } 160 | 161 | // Iterate through suggestions and identify overlapping ones. 162 | let suggestions = Vec::from_iter(suggestions.clone().into_iter().enumerate().filter_map( 163 | |(idx, cur)| { 164 | if idx == 0 || !cur.is_overlapped(&suggestions[idx - 1]) { 165 | Some(cur) 166 | } else { 167 | None 168 | } 169 | }, 170 | )); 171 | 172 | Ok(suggestions) 173 | } 174 | } 175 | 176 | #[cfg(test)] 177 | pub mod dummy; 178 | 179 | #[cfg(test)] 180 | pub mod tests { 181 | use super::*; 182 | use crate::load_span_from; 183 | use crate::ContentOrigin; 184 | use crate::Documentation; 185 | use crate::LineColumn; 186 | use crate::Range; 187 | use crate::Span; 188 | use std::path::PathBuf; 189 | 190 | use crate::fluff_up; 191 | 192 | const TEXT: &str = "With markdown removed, for sure."; 193 | lazy_static::lazy_static! { 194 | static ref TOKENS: Vec<&'static str> = vec![ 195 | "With", 196 | "markdown", 197 | "removed", 198 | ",", 199 | "for", 200 | "sure", 201 | ".", 202 | ]; 203 | } 204 | 205 | #[test] 206 | fn tokens() { 207 | let tokenizer = tokenizer::<&PathBuf>(None).unwrap(); 208 | let ranges: Vec = dbg!(apply_tokenizer(&tokenizer, TEXT).collect()); 209 | for (range, expect) in ranges.into_iter().zip(TOKENS.iter()) { 210 | assert_eq!(&&TEXT[range], expect); 211 | } 212 | } 213 | 214 | pub fn extraction_test_body(content: &str, expected_spans: &[Span]) { 215 | let _ = env_logger::builder() 216 | .filter(None, log::LevelFilter::Trace) 217 | .is_test(true) 218 | .try_init(); 219 | let doc_comments = true; 220 | let dev_comments = false; 221 | let docs = Documentation::load_from_str( 222 | ContentOrigin::TestEntityRust, 223 | content, 224 | doc_comments, 225 | dev_comments, 226 | ); 227 | let (origin, chunks) = docs.into_iter().next().expect("Contains exactly one file"); 228 | let suggestions = dummy::DummyChecker 229 | .check(&origin, &chunks[..]) 230 | .expect("Dummy extraction must never fail"); 231 | 232 | // with a known number of suggestions 233 | assert_eq!(suggestions.len(), expected_spans.len()); 234 | 235 | for (index, (suggestion, expected_span)) in 236 | suggestions.iter().zip(expected_spans.iter()).enumerate() 237 | { 238 | assert_eq!( 239 | suggestion.replacements, 240 | vec![format!("replacement_{index}")], 241 | "found vs expected replacement" 242 | ); 243 | let extracts = load_span_from(&mut content.as_bytes(), suggestion.span).unwrap(); 244 | let expected_extracts = 245 | load_span_from(&mut content.as_bytes(), *expected_span).unwrap(); 246 | assert_eq!( 247 | (suggestion.span, extracts), 248 | (*expected_span, expected_extracts), 249 | "found vs expected span" 250 | ); 251 | } 252 | } 253 | 254 | #[test] 255 | fn extract_suggestions_simple() { 256 | const SIMPLE: &str = fluff_up!("two literals"); 257 | 258 | /// keep in mind, `Span` bounds are inclusive, unlike Ranges, where 259 | /// `range.end` is _exclusive_ 260 | const EXPECTED_SPANS: &[Span] = &[ 261 | Span { 262 | start: LineColumn { line: 1, column: 4 }, 263 | end: LineColumn { line: 1, column: 6 }, 264 | }, 265 | Span { 266 | start: LineColumn { line: 1, column: 8 }, 267 | end: LineColumn { 268 | line: 1, 269 | column: 15, 270 | }, 271 | }, 272 | ]; 273 | extraction_test_body(dbg!(SIMPLE), EXPECTED_SPANS); 274 | } 275 | 276 | #[test] 277 | fn extract_suggestions_left_aligned() { 278 | const SIMPLE: &str = fluff_up!("two literals "); 279 | 280 | /// keep in mind, `Span` bounds are inclusive, unlike Ranges, where 281 | /// `range.end` is _exclusive_ 282 | const EXPECTED_SPANS: &[Span] = &[ 283 | Span { 284 | start: LineColumn { line: 1, column: 4 }, 285 | end: LineColumn { line: 1, column: 6 }, 286 | }, 287 | Span { 288 | start: LineColumn { line: 1, column: 9 }, 289 | end: LineColumn { 290 | line: 1, 291 | column: 16, 292 | }, 293 | }, 294 | ]; 295 | extraction_test_body(dbg!(SIMPLE), EXPECTED_SPANS); 296 | } 297 | 298 | #[test] 299 | fn extract_suggestions_3spaces() { 300 | const SIMPLE: &str = fluff_up!(" third testcase "); 301 | 302 | /// keep in mind, `Span` bounds are inclusive, unlike Ranges, where 303 | /// `range.end` is _exclusive_ 304 | const EXPECTED_SPANS: &[Span] = &[ 305 | Span { 306 | start: LineColumn { line: 1, column: 6 }, 307 | end: LineColumn { 308 | line: 1, 309 | column: 10, 310 | }, 311 | }, 312 | Span { 313 | start: LineColumn { 314 | line: 1, 315 | column: 13, 316 | }, 317 | end: LineColumn { 318 | line: 1, 319 | column: 20, 320 | }, 321 | }, 322 | ]; 323 | extraction_test_body(dbg!(SIMPLE), EXPECTED_SPANS); 324 | } 325 | 326 | #[test] 327 | fn checker_discrepancies() { 328 | let _ = env_logger::Builder::new() 329 | .default_format() 330 | .filter_level(log::LevelFilter::Debug) 331 | .filter(Some("dicaff"), log::LevelFilter::Trace) 332 | .is_test(true) 333 | .try_init(); 334 | 335 | let x = r###" 336 | /// With all patches applied. 337 | /// 338 | /// No line in need of a reflow. 339 | /// 340 | /// `Patch`s foo. 341 | /// 342 | /// I am a TODO where TODO is in the extra dictionary. 343 | struct X; 344 | "###; 345 | 346 | let mut doc = Documentation::new(); 347 | doc.add_rust(ContentOrigin::TestEntityRust, x, true, false) 348 | .unwrap(); 349 | 350 | let config = Config::default(); 351 | assert!(config.is_enabled(Detector::Hunspell)); 352 | assert!(config.is_enabled(Detector::Spellbook)); 353 | assert!(config.is_enabled(Detector::ZSpell)); 354 | let cs = Checkers::new(config).unwrap(); 355 | 356 | let (origin, ccs) = doc.iter().next().unwrap(); 357 | dbg!(&ccs); 358 | let assert_cmp = |a: &[Suggestion<'_>], b: &[Suggestion<'_>]| { 359 | assert_eq!(a.len(), b.len()); 360 | for (a, b) in a.iter().zip(b.iter()) { 361 | assert_eq!(a.range, b.range); 362 | assert_eq!(a.chunk, b.chunk); 363 | } 364 | }; 365 | 366 | let hun = dbg!(cs.hunspell.unwrap().check(origin, ccs)).unwrap(); 367 | let book = dbg!(cs.spellbook.unwrap().check(origin, ccs)).unwrap(); 368 | let z = dbg!(cs.zet.unwrap().check(origin, ccs)).unwrap(); 369 | assert_cmp(&hun, &z); 370 | assert_cmp(&z, &book); 371 | } 372 | } 373 | -------------------------------------------------------------------------------- /src/checker/zspell.rs: -------------------------------------------------------------------------------- 1 | //! A dictionary check with affixes, backed by `libhunspell` 2 | //! 3 | //! Does not check grammar, but tokenizes the documentation chunk, and checks 4 | //! the individual tokens against the dictionary using the defined affixes. Can 5 | //! handle multiple dictionaries. 6 | 7 | use super::{apply_tokenizer, Checker, Detector, Suggestion}; 8 | 9 | use crate::checker::dictaffix::DicAff; 10 | use crate::config::WrappedRegex; 11 | use crate::documentation::{CheckableChunk, ContentOrigin, PlainOverlay}; 12 | use crate::util::sub_chars; 13 | use crate::Range; 14 | 15 | use nlprule::Tokenizer; 16 | 17 | use std::path::PathBuf; 18 | use std::sync::Arc; 19 | 20 | use doc_chunks::Ignores; 21 | 22 | use crate::errors::*; 23 | 24 | use super::quirks::{ 25 | replacements_contain_dashed, replacements_contain_dashless, transform, Transformed, 26 | }; 27 | 28 | use super::hunspell::consists_of_vulgar_fractions_or_emojis; 29 | 30 | #[derive(Clone)] 31 | pub struct ZetCheckerInner { 32 | zspell: zspell::Dictionary, 33 | transform_regex: Vec, 34 | allow_concatenated: bool, 35 | allow_dashed: bool, 36 | allow_emojis: bool, 37 | check_footnote_references: bool, 38 | ignorelist: String, 39 | } 40 | 41 | impl ZetCheckerInner { 42 | fn new(config: &::Config) -> Result { 43 | // TODO allow override 44 | let ( 45 | transform_regex, 46 | allow_concatenated, 47 | allow_dashed, 48 | allow_emojis, 49 | check_footnote_references, 50 | ) = { 51 | let quirks = &config.quirks; 52 | ( 53 | quirks.transform_regex().to_vec(), 54 | quirks.allow_concatenated(), 55 | quirks.allow_dashed(), 56 | quirks.allow_emojis(), 57 | quirks.check_footnote_references(), 58 | ) 59 | }; 60 | // FIXME rename the config option 61 | let ignorelist = config.tokenization_splitchars.clone(); 62 | // without these, a lot of those would be flagged as mistakes. 63 | debug_assert!(ignorelist.contains(',')); 64 | debug_assert!(ignorelist.contains('.')); 65 | debug_assert!(ignorelist.contains(';')); 66 | debug_assert!(ignorelist.contains('!')); 67 | debug_assert!(ignorelist.contains('?')); 68 | 69 | let DicAff { dic, aff } = DicAff::load( 70 | &config.extra_dictionaries[..], 71 | &config.search_dirs, 72 | config.lang(), 73 | config.use_builtin, 74 | config.skip_os_lookups, 75 | )?; 76 | 77 | let aff = aff.replace("UTF8", "UTF-8"); 78 | let zet = zspell::builder().config_str(&aff).dict_str(&dic).build()?; 79 | 80 | log::debug!("Dictionary setup completed successfully."); 81 | Ok(Self { 82 | zspell: zet, 83 | transform_regex, 84 | allow_concatenated, 85 | allow_dashed, 86 | allow_emojis, 87 | check_footnote_references, 88 | ignorelist, 89 | }) 90 | } 91 | } 92 | 93 | #[derive(Clone)] 94 | pub struct ZetChecker(pub Arc, pub Arc); 95 | 96 | impl std::ops::Deref for ZetChecker { 97 | type Target = ZetCheckerInner; 98 | fn deref(&self) -> &Self::Target { 99 | self.0.deref() 100 | } 101 | } 102 | 103 | impl ZetChecker { 104 | pub fn new(config: &::Config) -> Result { 105 | let tokenizer = super::tokenizer::<&PathBuf>(None)?; 106 | let inner = ZetCheckerInner::new(config)?; 107 | let hunspell = Arc::new(inner); 108 | Ok(ZetChecker(hunspell, tokenizer)) 109 | } 110 | } 111 | 112 | impl Checker for ZetChecker { 113 | type Config = crate::config::ZetConfig; 114 | 115 | fn detector() -> Detector { 116 | Detector::ZSpell 117 | } 118 | 119 | fn check<'a, 's>( 120 | &self, 121 | origin: &ContentOrigin, 122 | chunks: &'a [CheckableChunk], 123 | ) -> Result>> 124 | where 125 | 'a: 's, 126 | { 127 | let mut acc = Vec::with_capacity(chunks.len()); 128 | 129 | for chunk in chunks { 130 | let plain = chunk.erase_cmark(&Ignores { 131 | footnote_references: !self.0.check_footnote_references, 132 | }); 133 | log::trace!("{plain:?}"); 134 | let txt = plain.as_str(); 135 | 136 | 'tokenization: for range in apply_tokenizer(&self.1, txt) { 137 | let word = sub_chars(txt, range.clone()); 138 | if range.len() == 1 139 | && word 140 | .chars() 141 | .next() 142 | .filter(|c| self.ignorelist.contains(*c)) 143 | .is_some() 144 | { 145 | continue 'tokenization; 146 | } 147 | if self.transform_regex.is_empty() { 148 | obtain_suggestions( 149 | &plain, 150 | chunk, 151 | &self.zspell, 152 | origin, 153 | word, 154 | range, 155 | self.allow_concatenated, 156 | self.allow_dashed, 157 | self.allow_emojis, 158 | &mut acc, 159 | ) 160 | } else { 161 | match transform(&self.transform_regex[..], word.as_str(), range.clone()) { 162 | Transformed::Fragments(word_fragments) => { 163 | for (range, word_fragment) in word_fragments { 164 | obtain_suggestions( 165 | &plain, 166 | chunk, 167 | &self.zspell, 168 | origin, 169 | word_fragment.to_owned(), 170 | range, 171 | self.allow_concatenated, 172 | self.allow_dashed, 173 | self.allow_emojis, 174 | &mut acc, 175 | ); 176 | } 177 | } 178 | Transformed::Atomic((range, word)) => { 179 | obtain_suggestions( 180 | &plain, 181 | chunk, 182 | &self.zspell, 183 | origin, 184 | word.to_owned(), 185 | range, 186 | self.allow_concatenated, 187 | self.allow_dashed, 188 | self.allow_emojis, 189 | &mut acc, 190 | ); 191 | } 192 | Transformed::Whitelisted(_) => {} 193 | } 194 | } 195 | } 196 | } 197 | Ok(acc) 198 | } 199 | } 200 | 201 | fn obtain_suggestions<'s>( 202 | plain: &PlainOverlay, 203 | chunk: &'s CheckableChunk, 204 | zspell: &zspell::Dictionary, 205 | origin: &ContentOrigin, 206 | word: String, 207 | range: Range, 208 | allow_concatenated: bool, 209 | allow_dashed: bool, 210 | allow_emojis: bool, 211 | acc: &mut Vec>, 212 | ) { 213 | log::trace!("Checking {word} in {range:?}.."); 214 | 215 | match zspell.check_word(&word) { 216 | false => { 217 | log::trace!("No match for word (plain range: {range:?}): >{word}<"); 218 | // get rid of single character suggestions 219 | let replacements = vec![]; 220 | // single char suggestions tend to be useless 221 | 222 | log::debug!(target: "zspell", "{word} --{{suggest}}--> {replacements:?}"); 223 | 224 | // strings made of vulgar fraction or emoji 225 | if allow_emojis && consists_of_vulgar_fractions_or_emojis(&word) { 226 | log::trace!(target: "quirks", "Found emoji or vulgar fraction character, treating {word} as ok"); 227 | return; 228 | } 229 | 230 | if allow_concatenated && replacements_contain_dashless(&word, replacements.as_slice()) { 231 | log::trace!(target: "quirks", "Found dashless word in replacement suggestions, treating {word} as ok"); 232 | return; 233 | } 234 | if allow_dashed && replacements_contain_dashed(&word, replacements.as_slice()) { 235 | log::trace!(target: "quirks", "Found dashed word in replacement suggestions, treating {word} as ok"); 236 | return; 237 | } 238 | for (range, span) in plain.find_spans(range.clone()) { 239 | acc.push(Suggestion { 240 | detector: Detector::ZSpell, 241 | range, 242 | span, 243 | origin: origin.clone(), 244 | replacements: replacements.clone(), 245 | chunk, 246 | description: Some("Possible spelling mistake found.".to_owned()), 247 | }) 248 | } 249 | } 250 | true => { 251 | log::trace!("Found a match for word (plain range: {range:?}): >{word}<",); 252 | } 253 | } 254 | } 255 | 256 | #[cfg(test)] 257 | mod tests { 258 | use crate::checker::dictaffix::is_valid_hunspell_dic; 259 | 260 | use super::*; 261 | 262 | #[test] 263 | fn hunspell_dic_format() { 264 | const GOOD: &str = "2 265 | whitespazes 266 | catsndogs 267 | "; 268 | const BAD_1: &str = "foo 269 | 12349 270 | bar 271 | "; 272 | const BAD_2: &str = "2 273 | 12349 274 | bar 275 | "; 276 | const BAD_3: &str = "foo 277 | xxx 278 | bar 279 | "; 280 | assert!(is_valid_hunspell_dic(&mut GOOD.as_bytes()).is_ok()); 281 | assert!(is_valid_hunspell_dic(&mut BAD_1.as_bytes()).is_err()); 282 | assert!(is_valid_hunspell_dic(&mut BAD_2.as_bytes()).is_err()); 283 | assert!(is_valid_hunspell_dic(&mut BAD_3.as_bytes()).is_err()); 284 | } 285 | 286 | macro_rules! parametrized_vulgar_fraction_or_emoji { 287 | ($($name:ident: $value:expr,)*) => { 288 | $( 289 | #[test] 290 | fn $name() { 291 | let (input, expected) = $value; 292 | assert_eq!(expected, consists_of_vulgar_fractions_or_emojis(input)); 293 | } 294 | )* 295 | } 296 | } 297 | 298 | parametrized_vulgar_fraction_or_emoji! { 299 | empty: ("", false), 300 | emojis: ("🐍🤗🦀", true), 301 | contains_emojis: ("🦀acean", false), 302 | contains_only_unicode: ("⅔⅔⅔↉↉↉", true), 303 | contains_emojis_and_unicodes: ("🐍🤗⅒🦀⅔¾", true), 304 | no_emojis: ("no emoji string", false), 305 | is_number: ("123", true), 306 | is_latin_letter: ("a", false), 307 | vulgar_fraction_one_quarter_and_emojis: ("¼🤗🦀", true), 308 | emojis_and_vulgar_fraction_one_half: ("🤗🦀½", true), 309 | emojis_and_vulgar_fraction_three_quarters: ("🤗🦀¾", true), 310 | emojis_and_vulgar_fraction_one_seventh: ("🤗🦀⅐", true), 311 | emojis_and_vulgar_fraction_one_ninth: ("🤗🦀⅑", true), 312 | emojis_and_vulgar_fraction_one_tenth: ("🤗🦀⅒", true), 313 | emojis_and_vulgar_fraction_one_third: ("🤗🦀⅓", true), 314 | emojis_and_vulgar_fraction_two_thirds: ("🤗🦀⅔", true), 315 | emojis_and_vulgar_fraction_one_fifth: ("🤗🦀⅕", true), 316 | emojis_and_vulgar_fraction_two_fifth: ("🤗🦀⅖", true), 317 | emojis_and_vulgar_fraction_three_fifths: ("🤗🦀⅗", true), 318 | emojis_and_vulgar_fraction_four_fifths: ( "🐍⅘", true), 319 | emojis_and_vulgar_fraction_one_sixth: ("🐍⅙", true), 320 | emojis_and_vulgar_fraction_five_sixths: ("🐍⅚", true), 321 | emojis_and_vulgar_fraction_one_eighth: ("🦀🐍⅛", true), 322 | emojis_and_vulgar_fraction_three_eighths: ("🦀🐍⅜", true), 323 | emojis_and_vulgar_fraction_five_eights: ("🦀🐍⅝", true), 324 | emojis_and_vulgar_fraction_five_eighths: ("🦀🐍⅝", true), 325 | emojis_and_vulgar_fraction_seven_eighths: ("🦀🐍⅞", true), 326 | emojis_and_vulgar_fraction_zero_thirds: ("🦀🐍↉", true), 327 | } 328 | } 329 | -------------------------------------------------------------------------------- /src/checker/spellbook.rs: -------------------------------------------------------------------------------- 1 | //! A dictionary check with affixes, backed by `libhunspell` 2 | //! 3 | //! Does not check grammar, but tokenizes the documentation chunk, and checks 4 | //! the individual tokens against the dictionary using the defined affixes. Can 5 | //! handle multiple dictionaries. 6 | 7 | use super::{apply_tokenizer, Checker, Detector, Suggestion}; 8 | 9 | use crate::checker::dictaffix::DicAff; 10 | use crate::config::WrappedRegex; 11 | use crate::documentation::{CheckableChunk, ContentOrigin, PlainOverlay}; 12 | use crate::util::sub_chars; 13 | use crate::Range; 14 | 15 | use nlprule::Tokenizer; 16 | 17 | use std::path::PathBuf; 18 | use std::sync::Arc; 19 | 20 | use doc_chunks::Ignores; 21 | 22 | use crate::errors::*; 23 | 24 | use super::quirks::{ 25 | replacements_contain_dashed, replacements_contain_dashless, transform, Transformed, 26 | }; 27 | 28 | use super::hunspell::consists_of_vulgar_fractions_or_emojis; 29 | 30 | #[derive(Clone)] 31 | pub struct SpellbookCheckerInner { 32 | spellbook: ::spellbook::Dictionary, 33 | transform_regex: Vec, 34 | allow_concatenated: bool, 35 | allow_dashed: bool, 36 | allow_emojis: bool, 37 | check_footnote_references: bool, 38 | ignorelist: String, 39 | } 40 | 41 | impl SpellbookCheckerInner { 42 | fn new(config: &::Config) -> Result { 43 | // TODO allow override 44 | let ( 45 | transform_regex, 46 | allow_concatenated, 47 | allow_dashed, 48 | allow_emojis, 49 | check_footnote_references, 50 | ) = { 51 | let quirks = &config.quirks; 52 | ( 53 | quirks.transform_regex().to_vec(), 54 | quirks.allow_concatenated(), 55 | quirks.allow_dashed(), 56 | quirks.allow_emojis(), 57 | quirks.check_footnote_references(), 58 | ) 59 | }; 60 | // FIXME rename the config option 61 | let ignorelist = config.tokenization_splitchars.clone(); 62 | // without these, a lot of those would be flagged as mistakes. 63 | debug_assert!(ignorelist.contains(',')); 64 | debug_assert!(ignorelist.contains('.')); 65 | debug_assert!(ignorelist.contains(';')); 66 | debug_assert!(ignorelist.contains('!')); 67 | debug_assert!(ignorelist.contains('?')); 68 | 69 | let DicAff { dic, aff } = DicAff::load( 70 | &config.extra_dictionaries[..], 71 | &config.search_dirs, 72 | config.lang(), 73 | config.use_builtin, 74 | config.skip_os_lookups, 75 | )?; 76 | 77 | let spellbook = ::spellbook::Dictionary::new(&aff, &dic) 78 | .map_err(|e| eyre!("Failed to parse dictionary: {e}"))?; 79 | 80 | log::debug!("Dictionary setup completed successfully."); 81 | Ok(Self { 82 | spellbook, 83 | transform_regex, 84 | allow_concatenated, 85 | allow_dashed, 86 | allow_emojis, 87 | check_footnote_references, 88 | ignorelist, 89 | }) 90 | } 91 | } 92 | 93 | #[derive(Clone)] 94 | pub struct SpellbookChecker(pub Arc, pub Arc); 95 | 96 | impl std::ops::Deref for SpellbookChecker { 97 | type Target = SpellbookCheckerInner; 98 | fn deref(&self) -> &Self::Target { 99 | self.0.deref() 100 | } 101 | } 102 | 103 | impl SpellbookChecker { 104 | pub fn new(config: &::Config) -> Result { 105 | let tokenizer = super::tokenizer::<&PathBuf>(None)?; 106 | let inner = SpellbookCheckerInner::new(config)?; 107 | let hunspell = Arc::new(inner); 108 | Ok(SpellbookChecker(hunspell, tokenizer)) 109 | } 110 | } 111 | 112 | impl Checker for SpellbookChecker { 113 | type Config = crate::config::SpellbookConfig; 114 | 115 | fn detector() -> Detector { 116 | Detector::Spellbook 117 | } 118 | 119 | fn check<'a, 's>( 120 | &self, 121 | origin: &ContentOrigin, 122 | chunks: &'a [CheckableChunk], 123 | ) -> Result>> 124 | where 125 | 'a: 's, 126 | { 127 | let mut acc = Vec::with_capacity(chunks.len()); 128 | 129 | for chunk in chunks { 130 | let plain = chunk.erase_cmark(&Ignores { 131 | footnote_references: !self.0.check_footnote_references, 132 | }); 133 | log::trace!("{plain:?}"); 134 | let txt = plain.as_str(); 135 | 136 | 'tokenization: for range in apply_tokenizer(&self.1, txt) { 137 | let word = sub_chars(txt, range.clone()); 138 | if range.len() == 1 139 | && word 140 | .chars() 141 | .next() 142 | .filter(|c| self.ignorelist.contains(*c)) 143 | .is_some() 144 | { 145 | continue 'tokenization; 146 | } 147 | if self.transform_regex.is_empty() { 148 | obtain_suggestions( 149 | &plain, 150 | chunk, 151 | &self.spellbook, 152 | origin, 153 | word, 154 | range, 155 | self.allow_concatenated, 156 | self.allow_dashed, 157 | self.allow_emojis, 158 | &mut acc, 159 | ) 160 | } else { 161 | match transform(&self.transform_regex[..], word.as_str(), range.clone()) { 162 | Transformed::Fragments(word_fragments) => { 163 | for (range, word_fragment) in word_fragments { 164 | obtain_suggestions( 165 | &plain, 166 | chunk, 167 | &self.spellbook, 168 | origin, 169 | word_fragment.to_owned(), 170 | range, 171 | self.allow_concatenated, 172 | self.allow_dashed, 173 | self.allow_emojis, 174 | &mut acc, 175 | ); 176 | } 177 | } 178 | Transformed::Atomic((range, word)) => { 179 | obtain_suggestions( 180 | &plain, 181 | chunk, 182 | &self.spellbook, 183 | origin, 184 | word.to_owned(), 185 | range, 186 | self.allow_concatenated, 187 | self.allow_dashed, 188 | self.allow_emojis, 189 | &mut acc, 190 | ); 191 | } 192 | Transformed::Whitelisted(_) => {} 193 | } 194 | } 195 | } 196 | } 197 | Ok(acc) 198 | } 199 | } 200 | 201 | fn obtain_suggestions<'s>( 202 | plain: &PlainOverlay, 203 | chunk: &'s CheckableChunk, 204 | dictionary: &::spellbook::Dictionary, 205 | origin: &ContentOrigin, 206 | word: String, 207 | range: Range, 208 | allow_concatenated: bool, 209 | allow_dashed: bool, 210 | allow_emojis: bool, 211 | acc: &mut Vec>, 212 | ) { 213 | log::trace!("Checking {word} in {range:?}.."); 214 | 215 | match dictionary.check(&word) { 216 | false => { 217 | log::trace!(target: "spellbook", "No match for word (plain range: {range:?}): >{word}<"); 218 | // get rid of single character suggestions 219 | let replacements = vec![]; 220 | // single char suggestions tend to be useless 221 | 222 | log::debug!(target: "spellbook", "{word} --{{suggest}}--> {replacements:?}"); 223 | 224 | // strings made of vulgar fraction or emoji 225 | if allow_emojis && consists_of_vulgar_fractions_or_emojis(&word) { 226 | log::trace!(target: "quirks", "Found emoji or vulgar fraction character, treating {word} as ok"); 227 | return; 228 | } 229 | 230 | if allow_concatenated && replacements_contain_dashless(&word, replacements.as_slice()) { 231 | log::trace!(target: "quirks", "Found dashless word in replacement suggestions, treating {word} as ok"); 232 | return; 233 | } 234 | if allow_dashed && replacements_contain_dashed(&word, replacements.as_slice()) { 235 | log::trace!(target: "quirks", "Found dashed word in replacement suggestions, treating {word} as ok"); 236 | return; 237 | } 238 | for (range, span) in plain.find_spans(range.clone()) { 239 | acc.push(Suggestion { 240 | detector: Detector::Spellbook, 241 | range, 242 | span, 243 | origin: origin.clone(), 244 | replacements: replacements.clone(), 245 | chunk, 246 | description: Some("Possible spelling mistake found.".to_owned()), 247 | }) 248 | } 249 | } 250 | true => { 251 | log::trace!(target: "spellbook", "Found a match for word (plain range: {range:?}): >{word}<",); 252 | } 253 | } 254 | } 255 | 256 | #[cfg(test)] 257 | mod tests { 258 | use crate::checker::dictaffix::is_valid_hunspell_dic; 259 | 260 | use super::*; 261 | 262 | #[test] 263 | fn hunspell_dic_format() { 264 | const GOOD: &str = "2 265 | whitespazes 266 | catsndogs 267 | "; 268 | const BAD_1: &str = "foo 269 | 12349 270 | bar 271 | "; 272 | const BAD_2: &str = "2 273 | 12349 274 | bar 275 | "; 276 | const BAD_3: &str = "foo 277 | xxx 278 | bar 279 | "; 280 | assert!(is_valid_hunspell_dic(&mut GOOD.as_bytes()).is_ok()); 281 | assert!(is_valid_hunspell_dic(&mut BAD_1.as_bytes()).is_err()); 282 | assert!(is_valid_hunspell_dic(&mut BAD_2.as_bytes()).is_err()); 283 | assert!(is_valid_hunspell_dic(&mut BAD_3.as_bytes()).is_err()); 284 | } 285 | 286 | macro_rules! parametrized_vulgar_fraction_or_emoji { 287 | ($($name:ident: $value:expr,)*) => { 288 | $( 289 | #[test] 290 | fn $name() { 291 | let (input, expected) = $value; 292 | assert_eq!(expected, consists_of_vulgar_fractions_or_emojis(input)); 293 | } 294 | )* 295 | } 296 | } 297 | 298 | parametrized_vulgar_fraction_or_emoji! { 299 | empty: ("", false), 300 | emojis: ("🐍🤗🦀", true), 301 | contains_emojis: ("🦀acean", false), 302 | contains_only_unicode: ("⅔⅔⅔↉↉↉", true), 303 | contains_emojis_and_unicodes: ("🐍🤗⅒🦀⅔¾", true), 304 | no_emojis: ("no emoji string", false), 305 | is_number: ("123", true), 306 | is_latin_letter: ("a", false), 307 | vulgar_fraction_one_quarter_and_emojis: ("¼🤗🦀", true), 308 | emojis_and_vulgar_fraction_one_half: ("🤗🦀½", true), 309 | emojis_and_vulgar_fraction_three_quarters: ("🤗🦀¾", true), 310 | emojis_and_vulgar_fraction_one_seventh: ("🤗🦀⅐", true), 311 | emojis_and_vulgar_fraction_one_ninth: ("🤗🦀⅑", true), 312 | emojis_and_vulgar_fraction_one_tenth: ("🤗🦀⅒", true), 313 | emojis_and_vulgar_fraction_one_third: ("🤗🦀⅓", true), 314 | emojis_and_vulgar_fraction_two_thirds: ("🤗🦀⅔", true), 315 | emojis_and_vulgar_fraction_one_fifth: ("🤗🦀⅕", true), 316 | emojis_and_vulgar_fraction_two_fifth: ("🤗🦀⅖", true), 317 | emojis_and_vulgar_fraction_three_fifths: ("🤗🦀⅗", true), 318 | emojis_and_vulgar_fraction_four_fifths: ( "🐍⅘", true), 319 | emojis_and_vulgar_fraction_one_sixth: ("🐍⅙", true), 320 | emojis_and_vulgar_fraction_five_sixths: ("🐍⅚", true), 321 | emojis_and_vulgar_fraction_one_eighth: ("🦀🐍⅛", true), 322 | emojis_and_vulgar_fraction_three_eighths: ("🦀🐍⅜", true), 323 | emojis_and_vulgar_fraction_five_eights: ("🦀🐍⅝", true), 324 | emojis_and_vulgar_fraction_five_eighths: ("🦀🐍⅝", true), 325 | emojis_and_vulgar_fraction_seven_eighths: ("🦀🐍⅞", true), 326 | emojis_and_vulgar_fraction_zero_thirds: ("🦀🐍↉", true), 327 | } 328 | } 329 | -------------------------------------------------------------------------------- /src/config/mod.rs: -------------------------------------------------------------------------------- 1 | //! Configure cargo-spellcheck 2 | //! 3 | //! Supports `Hunspell` and `LanguageTool` scopes. 4 | //! 5 | //! A default configuration will be generated in the default location by 6 | //! default. Default. Default default default. 7 | 8 | // TODO pendeng refactor, avoid spending time on documenting the status quo. 9 | #![allow(missing_docs)] 10 | 11 | pub mod args; 12 | 13 | mod regex; 14 | pub use self::regex::*; 15 | 16 | mod reflow; 17 | pub use self::reflow::*; 18 | 19 | mod hunspell; 20 | pub use self::hunspell::*; 21 | 22 | mod nlprules; 23 | pub use self::nlprules::*; 24 | 25 | mod search_dirs; 26 | pub use search_dirs::*; 27 | 28 | mod iso; 29 | pub use iso::*; 30 | 31 | use crate::errors::*; 32 | use crate::Detector; 33 | use fancy_regex::Regex; 34 | 35 | use fs_err as fs; 36 | use serde::{Deserialize, Serialize}; 37 | use std::convert::AsRef; 38 | use std::fmt; 39 | use std::io::Read; 40 | use std::path::{Path, PathBuf}; 41 | 42 | #[derive(Deserialize, Serialize, Debug, Clone)] 43 | #[serde(deny_unknown_fields)] 44 | pub struct Config { 45 | // Options that modify the inputs being picked up. 46 | #[serde(default)] 47 | #[serde(alias = "dev-comments")] 48 | #[serde(alias = "devcomments")] 49 | pub dev_comments: bool, 50 | 51 | #[serde(default)] 52 | #[serde(alias = "skip-readme")] 53 | #[serde(alias = "skipreadme")] 54 | pub skip_readme: bool, 55 | 56 | #[serde(alias = "Hunspell")] 57 | #[serde(default = "default_hunspell")] 58 | pub hunspell: Option, 59 | 60 | #[serde(alias = "ZSpell")] 61 | #[serde(default = "default_zspell")] 62 | pub zet: Option, 63 | 64 | #[serde(alias = "Spellbook")] 65 | #[serde(alias = "book")] 66 | #[serde(default = "default_spellbook")] 67 | pub spellbook: Option, 68 | 69 | #[serde(alias = "Nlp")] 70 | #[serde(alias = "NLP")] 71 | #[serde(alias = "nlp")] 72 | #[serde(alias = "NLP")] 73 | #[serde(alias = "NlpRules")] 74 | #[serde(default = "default_nlprules")] 75 | pub nlprules: Option, 76 | 77 | #[serde(alias = "ReFlow")] 78 | #[serde(alias = "Reflow")] 79 | pub reflow: Option, 80 | } 81 | 82 | impl Config { 83 | const QUALIFIER: &'static str = "rs"; 84 | const ORGANIZATION: &'static str = "fff"; 85 | const APPLICATION: &'static str = "cargo_spellcheck"; 86 | 87 | /// Sanitize all relative paths to absolute paths in relation to `base`. 88 | fn sanitize_paths(&mut self, base: &Path) -> Result<()> { 89 | if let Some(ref mut hunspell) = self.hunspell { 90 | hunspell.sanitize_paths(base)?; 91 | } 92 | if let Some(ref mut zspell) = self.zet { 93 | zspell.sanitize_paths(base)?; 94 | } 95 | if let Some(ref mut spellbook) = self.spellbook { 96 | spellbook.sanitize_paths(base)?; 97 | } 98 | Ok(()) 99 | } 100 | 101 | pub fn parse>(s: S) -> Result { 102 | Ok(toml::from_str(s.as_ref())?) 103 | } 104 | 105 | pub fn load_from>(path: P) -> Result> { 106 | let (contents, path) = match Self::load_content(path) { 107 | Err(e) if e.kind() == std::io::ErrorKind::NotFound => { 108 | return Ok(None); 109 | } 110 | Err(e) => bail!(e), 111 | Ok(contents) => contents, 112 | }; 113 | Self::parse(&contents) 114 | .wrap_err_with(|| { 115 | eyre!( 116 | "Syntax of a given config file({}) is broken", 117 | path.display() 118 | ) 119 | }) 120 | .and_then(|mut cfg| { 121 | if let Some(base) = path.parent() { 122 | cfg.sanitize_paths(base)?; 123 | } 124 | Ok(Some(cfg)) 125 | }) 126 | } 127 | 128 | pub fn load_content>(path: P) -> std::io::Result<(String, PathBuf)> { 129 | let path = path.as_ref().canonicalize()?; 130 | let mut file = fs::File::open(&path)?; 131 | 132 | let mut contents = String::with_capacity(1024); 133 | file.read_to_string(&mut contents)?; 134 | Ok((contents, path)) 135 | } 136 | 137 | pub fn load() -> Result> { 138 | if let Some(base) = directories::BaseDirs::new() { 139 | Self::load_from( 140 | base.config_dir() 141 | .join("cargo_spellcheck") 142 | .join("config.toml"), 143 | ) 144 | } else { 145 | bail!("No idea where your config directory is located. XDG compliance would be nice.") 146 | } 147 | } 148 | 149 | pub fn to_toml(&self) -> Result { 150 | toml::to_string(self).wrap_err_with(|| eyre!("Failed to convert to toml")) 151 | } 152 | 153 | pub fn write_values_to(&self, mut writer: W) -> Result { 154 | let s = self.to_toml()?; 155 | writer.write_all(s.as_bytes())?; 156 | Ok(self.clone()) 157 | } 158 | 159 | pub fn write_values_to_path>(&self, path: P) -> Result { 160 | let path = path.as_ref(); 161 | 162 | if let Some(path) = path.parent() { 163 | fs::create_dir_all(path).wrap_err_with(|| { 164 | eyre!("Failed to create config parent dirs {}", path.display()) 165 | })?; 166 | } 167 | 168 | let file = fs::OpenOptions::new() 169 | .create(true) 170 | .write(true) 171 | .truncate(true) 172 | .open(path) 173 | .wrap_err_with(|| eyre!("Failed to write default values to {}", path.display()))?; 174 | 175 | let writer = std::io::BufWriter::new(file); 176 | 177 | self.write_values_to(writer) 178 | .wrap_err_with(|| eyre!("Failed to write default config to {}", path.display())) 179 | } 180 | 181 | pub fn write_values_to_default_path(&self) -> Result { 182 | let path = Self::default_path()?; 183 | self.write_values_to_path(path) 184 | } 185 | 186 | pub fn write_default_values_to>(path: P) -> Result { 187 | Self::default().write_values_to_path(path) 188 | } 189 | 190 | pub fn default_path() -> Result { 191 | if let Some(base) = 192 | directories::ProjectDirs::from(Self::QUALIFIER, Self::ORGANIZATION, Self::APPLICATION) 193 | { 194 | Ok(base.config_dir().join("config.toml")) 195 | } else { 196 | bail!("No idea where your config directory is located. `$HOME` must be set.") 197 | } 198 | } 199 | 200 | /// Obtain a project specific config file. 201 | pub fn project_config(manifest_dir: impl AsRef) -> Result { 202 | let path = manifest_dir 203 | .as_ref() 204 | .to_owned() 205 | .join(".config") 206 | .join("spellcheck.toml"); 207 | 208 | let path = path.canonicalize()?; 209 | 210 | if path.is_file() { 211 | Ok(path) 212 | } else { 213 | bail!( 214 | "Local project dir config {} does not exist or is not a file.", 215 | path.display() 216 | ) 217 | } 218 | } 219 | 220 | pub fn write_default_values() -> Result { 221 | let d = Self::default_path()?; 222 | Self::write_default_values_to(d.join("config.toml")) 223 | } 224 | 225 | pub fn is_enabled(&self, detector: Detector) -> bool { 226 | match detector { 227 | Detector::Hunspell => self.hunspell.is_some(), 228 | Detector::ZSpell => self.zet.is_some(), 229 | Detector::Spellbook => self.spellbook.is_some(), 230 | Detector::NlpRules => self.nlprules.is_some(), 231 | Detector::Reflow => self.reflow.is_some(), 232 | #[cfg(test)] 233 | Detector::Dummy => true, 234 | } 235 | } 236 | 237 | pub fn full() -> Self { 238 | Default::default() 239 | } 240 | } 241 | 242 | fn default_nlprules() -> Option { 243 | if cfg!(feature = "nlprules") { 244 | Some(NlpRulesConfig::default()) 245 | } else { 246 | log::warn!("Cannot enable nlprules, since it wasn't compiled with `nlprules` as checker"); 247 | None 248 | } 249 | } 250 | 251 | fn default_hunspell() -> Option { 252 | Some(HunspellConfig::default()) 253 | } 254 | fn default_zspell() -> Option { 255 | Some(ZetConfig::default()) 256 | } 257 | fn default_spellbook() -> Option { 258 | Some(SpellbookConfig::default()) 259 | } 260 | 261 | impl Default for Config { 262 | fn default() -> Self { 263 | Self { 264 | dev_comments: false, 265 | skip_readme: false, 266 | hunspell: default_hunspell(), 267 | zet: default_zspell(), 268 | spellbook: default_spellbook(), 269 | nlprules: default_nlprules(), 270 | reflow: Some(ReflowConfig::default()), 271 | } 272 | } 273 | } 274 | 275 | #[cfg(test)] 276 | mod tests { 277 | use super::*; 278 | use assert_matches::assert_matches; 279 | 280 | #[test] 281 | fn can_serialize_to_toml() { 282 | let config = dbg!(Config::full()); 283 | assert_matches!(config.to_toml(), Ok(_s)); 284 | } 285 | 286 | #[test] 287 | fn project_config_works() { 288 | let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) 289 | .join(".config") 290 | .join("spellcheck.toml"); 291 | assert_matches!(Config::load_from(&path), Ok(_)); 292 | } 293 | 294 | #[test] 295 | fn all() { 296 | let _ = Config::parse( 297 | r#" 298 | dev_comments = true 299 | skip-readme = true 300 | 301 | [Hunspell] 302 | lang = "en_US" 303 | search_dirs = ["/usr/lib64/hunspell"] 304 | extra_dictionaries = ["/home/bernhard/test.dic"] 305 | "#, 306 | ) 307 | .unwrap(); 308 | } 309 | 310 | #[test] 311 | fn empty() { 312 | assert!(Config::parse( 313 | r#" 314 | "#, 315 | ) 316 | .is_ok()); 317 | } 318 | #[test] 319 | fn partial_1() { 320 | let _cfg = Config::parse( 321 | r#" 322 | [hunspell] 323 | lang = "en_GB" 324 | search_dirs = ["/usr/lib64/hunspell"] 325 | extra_dictionaries = ["/home/bernhard/test.dic"] 326 | "#, 327 | ) 328 | .unwrap(); 329 | } 330 | 331 | #[test] 332 | fn partial_3() { 333 | let cfg = Config::parse( 334 | r#" 335 | [Hunspell] 336 | lang = "de_AT" 337 | search_dirs = ["/usr/lib64/hunspell"] 338 | extra_dictionaries = ["/home/bernhard/test.dic"] 339 | "#, 340 | ) 341 | .unwrap(); 342 | let _hunspell = cfg.hunspell.expect("Must contain hunspell cfg"); 343 | } 344 | 345 | #[test] 346 | fn partial_4() { 347 | let cfg = Config::parse( 348 | r#" 349 | [Hunspell] 350 | lang = "en_US" 351 | "#, 352 | ) 353 | .unwrap(); 354 | let _hunspell = cfg.hunspell.expect("Must contain hunspell cfg"); 355 | } 356 | 357 | #[test] 358 | fn partial_5() { 359 | assert!(Config::parse( 360 | r#" 361 | [hUNspell] 362 | lang = "en_US" 363 | "#, 364 | ) 365 | .is_err()); 366 | } 367 | 368 | #[test] 369 | fn partial_6() { 370 | let cfg = Config::parse( 371 | r#" 372 | [hunspell] 373 | "#, 374 | ) 375 | .unwrap(); 376 | let _hunspell = cfg.hunspell.expect("Must contain hunspell cfg"); 377 | } 378 | 379 | #[test] 380 | fn partial_7() { 381 | let cfg = Config::parse( 382 | r#" 383 | [Hunspell.quirks] 384 | allow_concatenation = true 385 | allow_dashes = true 386 | transform_regex = ["^'([^\\s])'$", "^[0-9]+x$"] 387 | "#, 388 | ) 389 | .unwrap(); 390 | let _hunspell = cfg.hunspell.expect("Must contain hunspell cfg"); 391 | } 392 | 393 | #[test] 394 | fn partial_8() { 395 | let cfg = Config::parse( 396 | r#" 397 | [Hunspell] 398 | search_dirs = ["/search/1", "/search/2"] 399 | skip_os_lookups = true 400 | "#, 401 | ) 402 | .unwrap(); 403 | 404 | let hunspell: HunspellConfig = cfg.hunspell.expect("Must contain hunspell cfg"); 405 | assert!(hunspell.skip_os_lookups); 406 | 407 | let search_dirs = hunspell.search_dirs; 408 | let search_dirs2: Vec<_> = search_dirs.as_ref().clone(); 409 | assert!(!search_dirs2.is_empty()); 410 | 411 | assert_eq!(search_dirs.iter(false).count(), 2); 412 | 413 | #[cfg(target_os = "linux")] 414 | assert_eq!(search_dirs.iter(true).count(), 5); 415 | 416 | #[cfg(target_os = "windows")] 417 | assert_eq!(search_dirs.iter(true).count(), 2); 418 | 419 | #[cfg(target_os = "macos")] 420 | assert!(search_dirs.iter(true).count() >= 3); 421 | } 422 | 423 | #[test] 424 | fn partial_9() { 425 | let cfg = Config::parse( 426 | r#" 427 | [Reflow] 428 | max_line_length = 42 429 | "#, 430 | ) 431 | .unwrap(); 432 | assert_eq!( 433 | cfg.reflow.expect("Must contain reflow cfg").max_line_length, 434 | 42 435 | ); 436 | } 437 | } 438 | -------------------------------------------------------------------------------- /doc-chunks/src/literalset.rs: -------------------------------------------------------------------------------- 1 | pub use super::{TrimmedLiteral, TrimmedLiteralDisplay}; 2 | 3 | use crate::{CheckableChunk, CommentVariant, Range}; 4 | 5 | use std::fmt; 6 | 7 | /// A set of consecutive literals. 8 | /// 9 | /// Provides means to render them as a code block 10 | #[derive(Clone, Default, Debug, Hash, PartialEq, Eq)] 11 | pub struct LiteralSet { 12 | /// consecutive set of literals mapped by line number 13 | literals: Vec, 14 | /// lines spanned (start, end) inclusive 15 | pub coverage: (usize, usize), 16 | /// Track what kind of comment the literals are 17 | variant: CommentVariant, 18 | } 19 | 20 | impl LiteralSet { 21 | /// Initiate a new set based on the first literal 22 | pub fn from(literal: TrimmedLiteral) -> Self { 23 | Self { 24 | coverage: (literal.span().start.line, literal.span().end.line), 25 | variant: literal.variant(), 26 | literals: vec![literal], 27 | } 28 | } 29 | 30 | /// Add a literal to a literal set, if the previous lines literal already 31 | /// exists. 32 | /// 33 | /// Returns literal within the Err variant if not adjacent 34 | pub fn add_adjacent(&mut self, literal: TrimmedLiteral) -> Result<(), TrimmedLiteral> { 35 | if literal.variant().category() != self.variant.category() { 36 | log::debug!( 37 | "Adjacent literal is not the same comment variant: {:?} vs {:?}", 38 | literal.variant().category(), 39 | self.variant.category() 40 | ); 41 | return Err(literal); 42 | } 43 | let previous_line = literal.span().end.line; 44 | if previous_line == self.coverage.1 + 1 { 45 | self.coverage.1 += 1; 46 | self.literals.push(literal); 47 | return Ok(()); 48 | } 49 | 50 | let next_line = literal.span().start.line; 51 | if next_line + 1 == self.coverage.0 { 52 | self.literals.push(literal); 53 | self.coverage.1 -= 1; 54 | return Ok(()); 55 | } 56 | 57 | Err(literal) 58 | } 59 | 60 | /// The set of trimmed literals that is covered. 61 | pub fn literals(&self) -> Vec<&TrimmedLiteral> { 62 | self.literals.iter().by_ref().collect() 63 | } 64 | 65 | /// The number of literals inside this set. 66 | pub fn len(&self) -> usize { 67 | self.literals.len() 68 | } 69 | 70 | /// Convert to a checkable chunk. 71 | /// 72 | /// Creates the map from content ranges to source spans. 73 | pub fn into_chunk(self) -> crate::CheckableChunk { 74 | let n = self.len(); 75 | let mut source_mapping = indexmap::IndexMap::with_capacity(n); 76 | let mut content = String::with_capacity(n * 120); 77 | if n > 0 { 78 | // cursor operates on characters 79 | let mut cursor = 0usize; 80 | // for use with `Range` 81 | let mut start; // inclusive 82 | let mut end; // exclusive 83 | let mut it = self.literals.iter(); 84 | let mut next = it.next(); 85 | while let Some(literal) = next { 86 | start = cursor; 87 | cursor += literal.len_in_chars(); 88 | end = cursor; 89 | 90 | let span = literal.span(); 91 | let range = Range { start, end }; 92 | 93 | // TODO this does not hold anymore for `#[doc=foo!(..)]`. 94 | // TODO where the span is covering `foo!()`, but the 95 | // TODO rendered length is 0. 96 | if literal.variant() != CommentVariant::MacroDocEqMacro { 97 | if let Some(span_len) = span.one_line_len() { 98 | assert_eq!(range.len(), span_len); 99 | } 100 | } 101 | // keep zero length values too, to guarantee continuity 102 | source_mapping.insert(range, span); 103 | content.push_str(literal.as_str()); 104 | // the newline is _not_ covered by a span, after all it's inserted by us! 105 | next = it.next(); 106 | if next.is_some() { 107 | // for the last, skip the newline 108 | content.push('\n'); 109 | cursor += 1; 110 | } 111 | } 112 | } 113 | // all literals in a set have the same variant, so lets take the first one 114 | let variant = if let Some(literal) = self.literals.first() { 115 | literal.variant() 116 | } else { 117 | crate::CommentVariant::Unknown 118 | }; 119 | CheckableChunk::from_string(content, source_mapping, variant) 120 | } 121 | } 122 | 123 | impl<'s> fmt::Display for LiteralSet { 124 | fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { 125 | let n = self.len(); 126 | if n > 0 { 127 | for literal in self.literals.iter().take(n - 1) { 128 | writeln!(formatter, "{}", literal.as_str())?; 129 | } 130 | if let Some(literal) = self.literals.last() { 131 | write!(formatter, "{}", literal.as_str())?; 132 | } 133 | } 134 | Ok(()) 135 | } 136 | } 137 | /// A debug helper to print concatenated length of all items. 138 | #[macro_export] 139 | macro_rules! chyrp_dbg { 140 | ($first:literal $(, $( $line:literal ),+ )? $(,)? $(@ $prefix:literal)? ) => { 141 | dbg!(concat!($first $( $(, "\n", $line )+ )?).len()); 142 | dbg!(concat!($first $( $(, "\n", $line )+ )?)); 143 | } 144 | } 145 | 146 | /// A helper macro creating valid doc string using the macro syntax 147 | /// `#[doc=r#"..."#]`. 148 | /// 149 | /// Example: 150 | /// 151 | /// ```rust 152 | /// # use doc_chunks::chyrp_up; 153 | /// let x = chyrp_up!(["some", "thing"]); 154 | /// let y = r##"#[doc=r#"some 155 | /// thing"#] 156 | /// struct ChyrpChyrp;"##; 157 | /// 158 | /// assert_eq!(x,y); 159 | /// ``` 160 | #[macro_export] 161 | macro_rules! chyrp_up { 162 | ([ $( $line:literal ),+ $(,)? ] $(@ $prefix:literal)? ) => { 163 | chyrp_up!( $( $line ),+ $(@ $prefix)? ) 164 | }; 165 | ($first:literal $(, $( $line:literal ),+ )? $(,)? $(@ $prefix:literal)? ) => { 166 | concat!($( $prefix ,)? r##"#[doc=r#""##, $first $( $(, "\n", $line )+ )?, r##""#]"##, "\n", "struct ChyrpChyrp;") 167 | }; 168 | } 169 | 170 | /// A helper macro creating valid doc string using the macro syntax 171 | /// `/// ...`. 172 | /// 173 | /// Example: 174 | /// 175 | /// ```rust 176 | /// # use doc_chunks::fluff_up; 177 | /// let x = fluff_up!(["some", "thing"]); 178 | /// let y = r#"/// some 179 | /// /// thing 180 | /// struct Fluff;"#; 181 | /// 182 | /// assert_eq!(x,y); 183 | /// ``` 184 | #[macro_export] 185 | macro_rules! fluff_up { 186 | ([ $( $line:literal ),+ $(,)?] $( @ $prefix:literal)?) => { 187 | fluff_up!($( $line ),+ $(@ $prefix)?) 188 | }; 189 | ($($line:literal ),+ $(,)? ) => { 190 | fluff_up!($( $line ),+ @ "") 191 | }; 192 | ($($line:literal ),+ $(,)? @ $prefix:literal ) => { 193 | concat!("" $(, $prefix, "/// ", $line, "\n")+ , "struct Fluff;") 194 | }; 195 | } 196 | 197 | pub mod testhelper { 198 | use super::*; 199 | use crate::testcase::annotated_literals; 200 | 201 | pub fn gen_literal_set(source: &str) -> LiteralSet { 202 | let literals = dbg!(annotated_literals(dbg!(source))); 203 | 204 | let mut iter = dbg!(literals).into_iter(); 205 | let literal = iter 206 | .next() 207 | .expect("Must have at least one item in laterals"); 208 | let mut cls = LiteralSet::from(literal); 209 | 210 | for literal in iter { 211 | assert!(cls.add_adjacent(literal).is_ok()); 212 | } 213 | dbg!(cls) 214 | } 215 | } 216 | 217 | #[cfg(test)] 218 | mod tests { 219 | use super::*; 220 | 221 | use super::testhelper::gen_literal_set; 222 | use crate::util::load_span_from; 223 | use crate::util::sub_chars; 224 | 225 | #[test] 226 | fn fluff_one() { 227 | const RAW: &str = fluff_up!(["a"]); 228 | const EXPECT: &str = r#"/// a 229 | struct Fluff;"#; 230 | assert_eq!(RAW, EXPECT); 231 | } 232 | 233 | #[test] 234 | fn fluff_multi() { 235 | const RAW: &str = fluff_up!(["a", "b", "c"]); 236 | const EXPECT: &str = r#"/// a 237 | /// b 238 | /// c 239 | struct Fluff;"#; 240 | assert_eq!(RAW, EXPECT); 241 | } 242 | 243 | // range within the literalset content string 244 | const EXMALIBU_RANGE_START: usize = 9; 245 | const EXMALIBU_RANGE_END: usize = EXMALIBU_RANGE_START + 8; 246 | const EXMALIBU_RANGE: Range = EXMALIBU_RANGE_START..EXMALIBU_RANGE_END; 247 | const RAW: &str = r#"/// Another exmalibu verification pass. 248 | /// 🚤w🌴x🌋y🍈z🍉0 249 | /// ♫ Boats float, ♫♫ don't they? ♫ 250 | struct Vikings; 251 | "#; 252 | 253 | const EXMALIBU_CHUNK_STR: &str = r#" Another exmalibu verification pass. 254 | 🚤w🌴x🌋y🍈z🍉0 255 | ♫ Boats float, ♫♫ don't they? ♫"#; 256 | 257 | #[test] 258 | fn combine_literals() { 259 | let _ = env_logger::builder() 260 | .is_test(true) 261 | .filter(None, log::LevelFilter::Trace) 262 | .try_init(); 263 | 264 | let cls = gen_literal_set(RAW); 265 | 266 | assert_eq!(cls.len(), 3); 267 | assert_eq!(cls.to_string(), EXMALIBU_CHUNK_STR.to_owned()); 268 | } 269 | 270 | #[test] 271 | fn coverage() { 272 | let _ = env_logger::builder() 273 | .is_test(true) 274 | .filter(None, log::LevelFilter::Trace) 275 | .try_init(); 276 | 277 | let literal_set = gen_literal_set(RAW); 278 | let chunk: CheckableChunk = literal_set.into_chunk(); 279 | let map_range_to_span = chunk.find_spans(EXMALIBU_RANGE); 280 | let (_range, _span) = map_range_to_span 281 | .first() 282 | .expect("Must be at least one literal"); 283 | 284 | let range_for_raw_str = Range { 285 | start: EXMALIBU_RANGE_START, 286 | end: EXMALIBU_RANGE_END, 287 | }; 288 | 289 | // check test integrity 290 | assert_eq!("exmalibu", &EXMALIBU_CHUNK_STR[EXMALIBU_RANGE]); 291 | 292 | // check actual result 293 | assert_eq!( 294 | &EXMALIBU_CHUNK_STR[EXMALIBU_RANGE], 295 | &chunk.as_str()[range_for_raw_str.clone()] 296 | ); 297 | } 298 | 299 | macro_rules! test_raw { 300 | ($test: ident, [ $($txt: literal),+ $(,)? ]; $range: expr, $expected: literal) => { 301 | #[test] 302 | fn $test() { 303 | test_raw!([$($txt),+] ; $range, $expected); 304 | } 305 | }; 306 | 307 | ([$($txt:literal),+ $(,)?]; $range: expr, $expected: literal) => { 308 | let _ = env_logger::builder() 309 | .filter(None, log::LevelFilter::Trace) 310 | .is_test(true) 311 | .try_init(); 312 | 313 | let range: Range = $range; 314 | 315 | const RAW: &str = fluff_up!($( $txt),+); 316 | const START: usize = 3; // skip `///` which is the span we get from the literal 317 | let _end: usize = START $( + $txt.len())+; 318 | let literal_set = gen_literal_set(dbg!(RAW)); 319 | 320 | 321 | let chunk: CheckableChunk = dbg!(literal_set.into_chunk()); 322 | let map_range_to_span = chunk.find_spans(range.clone()); 323 | 324 | let mut iter = dbg!(map_range_to_span).into_iter(); 325 | let (range, _span) = iter.next().expect("Must be at least one literal"); 326 | 327 | // the range for raw str contains an offset of 3 when used with `///` 328 | let range_for_raw_str = Range { 329 | start: range.start + START, 330 | end: range.end + START, 331 | }; 332 | 333 | assert_eq!(&RAW[range_for_raw_str.clone()], &chunk.as_str()[range], "Testing range extract vs stringified chunk for integrity"); 334 | assert_eq!(&RAW[range_for_raw_str], $expected, "Testing range extract vs expected"); 335 | }; 336 | } 337 | 338 | #[test] 339 | fn first_line_extract_0() { 340 | test_raw!(["livelyness", "yyy"] ; 2..6, "ivel"); 341 | } 342 | 343 | #[test] 344 | fn first_line_extract_1() { 345 | test_raw!(["+ 12 + x0"] ; 9..10, "0"); 346 | } 347 | 348 | #[test] 349 | fn literal_set_into_chunk() { 350 | let _ = env_logger::builder() 351 | .filter(None, log::LevelFilter::Trace) 352 | .is_test(true) 353 | .try_init(); 354 | 355 | let literal_set = dbg!(gen_literal_set(RAW)); 356 | 357 | let chunk = dbg!(literal_set.clone().into_chunk()); 358 | let it = literal_set.literals(); 359 | 360 | for (range, span, s) in itertools::cons_tuples(chunk.iter().zip(it)) { 361 | if range.len() == 0 { 362 | continue; 363 | } 364 | assert_eq!( 365 | load_span_from(RAW.as_bytes(), span.clone()).expect("Span extraction must work"), 366 | sub_chars(chunk.as_str(), range.clone()) 367 | ); 368 | 369 | let r: Range = span.to_content_range(&chunk).expect("Should work"); 370 | // the range for raw str contains an offset of 3 when used with `///` 371 | assert_eq!( 372 | sub_chars(chunk.as_str(), range.clone()), 373 | s.as_str().to_owned() 374 | ); 375 | assert_eq!(&r, range); 376 | } 377 | } 378 | } 379 | --------------------------------------------------------------------------------