├── .github
    ├── stale.yml
    ├── ISSUE_TEMPLATE
    │   ├── config.yml
    │   ├── --ask-a-question.md
    │   ├── ---request-a-new-feature.md
    │   └── ---report-a-bug.md
    ├── auto_pr_team.yml
    ├── FUNDING.yml
    ├── no-response.yml
    └── pull_request_template.md
├── demo
    ├── .gitignore
    ├── member
    │   ├── true
    │   │   ├── README.md
    │   │   ├── lib.rs
    │   │   └── Cargo.toml
    │   ├── stray.rs
    │   └── procmacro
    │   │   ├── src
    │   │       └── lib.rs
    │   │   └── Cargo.toml
    ├── .config
    │   ├── topic.dic
    │   └── spellcheck.toml
    ├── src
    │   ├── nested
    │   │   ├── justone.rs
    │   │   ├── justtwo.rs
    │   │   ├── again
    │   │   │   ├── mod.rs
    │   │   │   └── code.rs
    │   │   ├── fragments
    │   │   │   ├── simple.rs
    │   │   │   └── enumerate.rs
    │   │   ├── fragments.rs
    │   │   └── mod.rs
    │   ├── main.rs
    │   └── lib.rs
    ├── Cargo.lock
    ├── README.md
    └── Cargo.toml
├── .gitignore
├── nlprule-data
    └── 0.6.4
    │   └── en
    │       ├── en_rules.bin.xz
    │       └── en_tokenizer.bin.xz
├── src
    ├── errors.rs
    ├── config
    │   ├── reflow.rs
    │   ├── nlprules.rs
    │   ├── regex.rs
    │   ├── search_dirs.rs
    │   ├── iso.rs
    │   ├── hunspell.rs
    │   └── mod.rs
    ├── main.rs
    ├── checker
    │   ├── dummy.rs
    │   ├── nlprules.rs
    │   ├── cached.rs
    │   ├── dictaffix.rs
    │   ├── quirks.rs
    │   ├── mod.rs
    │   ├── zspell.rs
    │   └── spellbook.rs
    ├── tinhat.rs
    ├── lib.rs
    ├── action
    │   └── bandaid.rs
    └── traverse
    │   └── iter.rs
├── doc-chunks
    ├── README.md
    ├── src
    │   ├── errors.rs
    │   ├── testcase.rs
    │   ├── cluster.rs
    │   ├── lib.rs
    │   └── literalset.rs
    └── Cargo.toml
├── .pre-commit-hooks.yaml
├── .config
    ├── lingo.dic
    └── spellcheck.toml
├── docs
    ├── checkers.md
    ├── features.md
    ├── automation.md
    ├── remedy.md
    └── configuration.md
├── LICENSE-MIT
├── .vscode
    └── launch.json
├── cliff.toml
├── tests
    └── signal_handler.rs
├── hunspell-data
    └── en_US.aff
├── Cargo.toml
├── README.md
└── LICENSE-APACHE


/.github/stale.yml:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/demo/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | 


--------------------------------------------------------------------------------
/demo/member/true/README.md:
--------------------------------------------------------------------------------
1 | # READ ME (maybe)


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | .vscode/settings.json
3 | 


--------------------------------------------------------------------------------
/demo/.config/topic.dic:
--------------------------------------------------------------------------------
1 | 10
2 | topic/A
3 | tkae/topic


--------------------------------------------------------------------------------
/demo/src/nested/justone.rs:
--------------------------------------------------------------------------------
1 | /// Wroeng.
2 | struct W;


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false


--------------------------------------------------------------------------------
/demo/src/nested/justtwo.rs:
--------------------------------------------------------------------------------
1 | /// Alphy
2 | /// Beto
3 | struct Abc;


--------------------------------------------------------------------------------
/.github/auto_pr_team.yml:
--------------------------------------------------------------------------------
1 | org: cargo-spellcheck
2 | team: contributors
3 | 


--------------------------------------------------------------------------------
/demo/member/stray.rs:
--------------------------------------------------------------------------------
1 | /// Nobady references this.
2 | struct Lost;
3 | 


--------------------------------------------------------------------------------
/demo/src/nested/again/mod.rs:
--------------------------------------------------------------------------------
1 | mod code;
2 | 
3 | /// Again.
4 | struct Again;
5 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: drahnr
2 | patreon: drahnr
3 | liberapay: drahnr
4 | open_collective: bernhard-schuster
5 | 


--------------------------------------------------------------------------------
/demo/member/procmacro/src/lib.rs:
--------------------------------------------------------------------------------
1 | /// Foo bar baz.
2 | fn empty() {
3 |     unimplemented!("and never will be")
4 | }
5 | 


--------------------------------------------------------------------------------
/demo/member/true/lib.rs:
--------------------------------------------------------------------------------
1 | //! Some extar crate docs.
2 | 
3 | /// ZZZZzzz makes the snake.
4 | pub fn x() {
5 | 
6 | }
7 | 


--------------------------------------------------------------------------------
/demo/src/nested/fragments/simple.rs:
--------------------------------------------------------------------------------
1 | /// First.
2 | /// Secondo.
3 | /// Thurd number one.
4 | /// Another thurd.
5 | struct Q;


--------------------------------------------------------------------------------
/nlprule-data/0.6.4/en/en_rules.bin.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drahnr/cargo-spellcheck/HEAD/nlprule-data/0.6.4/en/en_rules.bin.xz


--------------------------------------------------------------------------------
/src/errors.rs:
--------------------------------------------------------------------------------
1 | //! Global error usage without cluttering each file.
2 | pub use color_eyre::eyre::{bail, eyre, Error, Result, WrapErr};
3 | 


--------------------------------------------------------------------------------
/nlprule-data/0.6.4/en/en_tokenizer.bin.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drahnr/cargo-spellcheck/HEAD/nlprule-data/0.6.4/en/en_tokenizer.bin.xz


--------------------------------------------------------------------------------
/demo/src/nested/fragments/enumerate.rs:
--------------------------------------------------------------------------------
1 | /// Secret.
2 | ///
3 | /// Somethign secret.
4 | enum Instrument {
5 | 	/// An instroment.
6 | 	Xylophon,
7 | }


--------------------------------------------------------------------------------
/doc-chunks/README.md:
--------------------------------------------------------------------------------
1 | # doc-chunks
2 | 
3 | Extract clustered documentation lines and provide
4 | a spanned and commonmark aware overlay with a
5 | span based mapping.


--------------------------------------------------------------------------------
/demo/Cargo.lock:
--------------------------------------------------------------------------------
1 | # This file is automatically @generated by Cargo.
2 | # It is not intended for manual editing.
3 | [[package]]
4 | name = "cargo-spellcheck-demo"
5 | version = "1.0.0"
6 | 


--------------------------------------------------------------------------------
/.pre-commit-hooks.yaml:
--------------------------------------------------------------------------------
1 | - id: cargo-spellcheck
2 |   name: cargo-spellcheck 
3 |   description: Spellcheck rust files
4 |   entry: cargo-spellcheck
5 |   language: rust
6 |   types: [rust]
7 |   args: ["--code=99", "--"]
8 | 


--------------------------------------------------------------------------------
/demo/src/nested/again/code.rs:
--------------------------------------------------------------------------------
1 | //!
2 | //! Enclozed `codez` not checked.
3 | //!
4 | 
5 | 
6 | /// Do you like `wroeng` very mouch?
7 | /// Sharon stayed home from school the other day. Because she was sick.
8 | struct Coda;
9 | 


--------------------------------------------------------------------------------
/.github/no-response.yml:
--------------------------------------------------------------------------------
1 | daysUntilClose: 7
2 | 
3 | responseRequiredLabel: needs-more-information
4 | 
5 | closeComment: >
6 |   Feel free to re-open once there is more information available.
7 | 
8 |   If you are not the original author, please create a new issue.
9 | 


--------------------------------------------------------------------------------
/demo/.config/spellcheck.toml:
--------------------------------------------------------------------------------
 1 | [Hunspell]
 2 | lang = "en_US"
 3 | search_dirs = ["."]
 4 | extra_dictionaries = ["topic.dic"]
 5 | 
 6 | [Hunspell.quirks]
 7 | transform_regex = ["^'([^\\s])'$", "^[0-9]+x$"]
 8 | allow_concatenation = true
 9 | allow_dashed = false
10 | 


--------------------------------------------------------------------------------
/demo/README.md:
--------------------------------------------------------------------------------
 1 | # zilly demo proyekt
 2 | 
 3 | A plethora of spelling mistackz inclusive.
 4 | 
 5 | <pre>
 6 | <span style="color:#8AE234"><b>ᐲ🠒🍉 see #104</b></span>
 7 | </pre>
 8 | 
 9 | 'Verify #88'
10 | '"Does not pop up"'
11 | "ever again"
12 | 
13 | Mojis are ok 🍈🍐🍇


--------------------------------------------------------------------------------
/demo/src/main.rs:
--------------------------------------------------------------------------------
 1 | /*! Just a lil smthin smthin. */
 2 | 
 3 | mod lib;
 4 | 
 5 | /* dev */
 6 | pub mod nested;
 7 | 
 8 | /**
 9 | Not so preferable doc comment, use `///` instead.
10 | */
11 | fn main() {
12 |     lib::a();
13 |     lib::b();
14 |     lib::c();
15 | }
16 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/--ask-a-question.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "❓ Ask a question"
 3 | about: Something is not clear to you from the documentation
 4 | title: ''
 5 | labels: documentation, question
 6 | assignees: drahnr
 7 | 
 8 | ---
 9 | 
10 | ** Q: **
11 | 
12 | <!-- Describe concisely what is unclear, include the context and use-case -->
13 | 


--------------------------------------------------------------------------------
/demo/member/procmacro/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "cargo-spellcheck-demo-proc-macro"
 3 | version = "1.0.0"
 4 | authors = ["Bernhard Schuster <bernhard@ahoi.io>"]
 5 | edition = "2018"
 6 | publish = false
 7 | description = "Proc-macro member of the demo, with `[lib]` but no path"
 8 | 
 9 | [lib]
10 | proc-macro = true
11 | # unspecified path
12 | 
13 | [dependencies]
14 | 


--------------------------------------------------------------------------------
/demo/member/true/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "cargo-spellcheck-demo-inner"
 3 | publish = false
 4 | description = "Inner member of the demo"
 5 | readme = "README.md"
 6 | 
 7 | version.workspace = true
 8 | authors.workspace = true
 9 | edition.workspace = true
10 | 
11 | [lib]
12 | crate-type = ["staticlib"]
13 | path = "lib.rs"
14 | bench = false
15 | test = false
16 | 
17 | 
18 | [dependencies]
19 | 


--------------------------------------------------------------------------------
/src/config/reflow.rs:
--------------------------------------------------------------------------------
 1 | //! Reflow configuration.
 2 | use serde::{Deserialize, Serialize};
 3 | 
 4 | /// Parameters for wrapping doc comments
 5 | #[derive(Debug, Clone, Serialize, Deserialize)]
 6 | pub struct ReflowConfig {
 7 |     /// Hard limit for absolute length of lines.
 8 |     #[serde(default)]
 9 |     #[serde(alias = "max_line_width")]
10 |     pub(crate) max_line_length: usize,
11 | }
12 | 
13 | impl Default for ReflowConfig {
14 |     fn default() -> Self {
15 |         Self {
16 |             max_line_length: 80,
17 |         }
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
 1 | use cargo_spellcheck::{action, errors::Result, run, Args};
 2 | 
 3 | #[allow(missing_docs)]
 4 | fn main() -> Result<()> {
 5 |     color_eyre::install()?;
 6 |     let args = Args::parse(std::env::args()).unwrap_or_else(|e| e.exit());
 7 |     let res = run(args);
 8 |     // no matter what, restore the terminal
 9 |     if let Err(e) = action::interactive::ScopedRaw::restore_terminal() {
10 |         log::warn!("Failed to restore terminal: {e}");
11 |     }
12 |     let val = res?.as_u8();
13 |     if val != 0 {
14 |         std::process::exit(val as i32)
15 |     }
16 |     Ok(())
17 | }
18 | 


--------------------------------------------------------------------------------
/demo/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "cargo-spellcheck-demo"
 3 | version = "1.0.0"
 4 | authors = ["Bernhard Schuster <bernhard@ahoi.io>"]
 5 | edition = "2024"
 6 | publish = false
 7 | description = "A silly demo with plenty of spelling misteakes for cargo-spellcheck demos and CI"
 8 | readme = "README.md"
 9 | 
10 | [lib]
11 | crate-type = ["staticlib"]
12 | path = "src/lib.rs"
13 | bench = false
14 | test = false
15 | 
16 | [[bin]]
17 | name = "vacays"
18 | path = "src/main.rs"
19 | bench = false
20 | test = false
21 | 
22 | 
23 | [dependencies]
24 | 
25 | 
26 | [workspace]
27 | members = ["member/*"]
28 | 


--------------------------------------------------------------------------------
/demo/src/nested/fragments.rs:
--------------------------------------------------------------------------------
 1 | //! Modul levl documenatation.
 2 | //!
 3 | //! Details are full fo errors.
 4 | 
 5 | mod simple;
 6 | 
 7 | mod enumerate;
 8 | 
 9 | // Shud be chcked now
10 | // Verify **some** _super_ *duper* [markdown](https://ahoi.io/).
11 | struct X;
12 | 
13 | /*
14 |  * Also check thiz one
15 |  */
16 | impl X {
17 | 	/// New, as in new. But also not.
18 | 	///
19 | 	/// Half sentence for X #2.
20 | 	fn new() -> Self {
21 | 		unimplemented!()
22 | 	}
23 | 
24 | 	/// Old, as in really old.
25 | 	///
26 | 	/// But what does "old" really mean?
27 | 	fn old(&self) {
28 | 		unimplemented!()
29 | 	}
30 | }
31 | 


--------------------------------------------------------------------------------
/src/config/nlprules.rs:
--------------------------------------------------------------------------------
 1 | //! NlpRules checker configuration.
 2 | use serde::{Deserialize, Serialize};
 3 | use std::path::PathBuf;
 4 | 
 5 | #[derive(Deserialize, Serialize, Debug, Clone)]
 6 | #[serde(deny_unknown_fields)]
 7 | pub struct LanguageToolConfig {
 8 |     pub url: url::Url,
 9 | }
10 | 
11 | impl LanguageToolConfig {
12 |     pub fn url(&self) -> &url::Url {
13 |         &self.url
14 |     }
15 | }
16 | #[derive(Deserialize, Serialize, Debug, Clone, Default)]
17 | #[serde(deny_unknown_fields)]
18 | pub struct NlpRulesConfig {
19 |     /// Location to use for an initial lookup of alternate tokenizer and rules
20 |     /// data.
21 |     pub override_rules: Option<PathBuf>,
22 |     pub override_tokenizer: Option<PathBuf>,
23 | }
24 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/---request-a-new-feature.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F331 Request a new feature"
 3 | about: Suggest a feature you would like to see implemented
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a particular use-case?**
11 | 
12 | <!-- A clear and concise description of what the problem is and how the feature improves it. -->
13 | 
14 | **Describe the solution you'd like to implement/see implemented**
15 | 
16 | <!-- An outline of behaviour expectations of this feature. -->
17 | 
18 | **Describe alternatives you've considered**
19 | 
20 | <!-- Description of any alternative solutions or features you've considered. -->
21 | 
22 | **Additional context**
23 | 
24 | <!-- Add any other context or visuals about the feature's behaviour expectations. -->
25 | 


--------------------------------------------------------------------------------
/doc-chunks/src/errors.rs:
--------------------------------------------------------------------------------
 1 | use crate::{Range, Span};
 2 | use indexmap::IndexMap;
 3 | 
 4 | pub type Result<T> = std::result::Result<T, Error>;
 5 | 
 6 | #[derive(thiserror::Error, Debug)]
 7 | pub enum Error {
 8 |     #[error(transparent)]
 9 |     Io(#[from] std::io::Error),
10 | 
11 |     #[error("Really pretty much anything")]
12 |     Any,
13 | 
14 |     #[error("Failed to parse rust content: {0:?}")]
15 |     ParserFailure(#[source] syn::Error),
16 | 
17 |     #[error("Failed to parse toml file")]
18 |     Toml(#[from] toml::de::Error),
19 | 
20 |     #[error("{0}")]
21 |     Span(String),
22 | 
23 |     #[error("BUG: Found a range {}..{} which that does not exist in its own source mapping: {:?}", .line_range.start, .line_range.end, .source_mapping)]
24 |     InvalidLineRange {
25 |         line_range: Range,
26 |         source_mapping: IndexMap<Range, Span>,
27 |     },
28 | }
29 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/---report-a-bug.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F41B Report a bug"
 3 | about: Create a report to help us fix bugs
 4 | title: ''
 5 | labels: bug
 6 | assignees: drahnr
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | 
12 | <!-- A clear and concise description of what the bug is. -->
13 | 
14 | **To Reproduce**
15 | 
16 | Steps to reproduce the behaviour:
17 | 
18 | 1. A file containing `...`
19 | 2. Run `cargo spellcheck ...`
20 | 3. ...
21 | 
22 | **Expected behavior**
23 | 
24 | <!-- A clear and concise description of what you expected to happen. -->
25 | 
26 | **Screenshots**
27 | 
28 | <!-- If applicable, add screenshots or copies of the commandline output to help explain your problem.
29 | Use code blocks -->
30 | 
31 | **Please complete the following information:**
32 |  - System: <!-- Fedora, Ubuntu, Win10, MacOS, ... -->
33 |  - Obtained: <!-- git, cargo, binary -->
34 |  - Version: <!-- run `$(cargo spellcheck --version)` -->
35 | 
36 | **Additional context**
37 | <!-- Add any other context about the problem here. -->
38 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | Thank you for submitting a PR to cargo-spellcheck!
 3 | -->
 4 | 
 5 | ## What does this PR accomplish?
 6 | 
 7 | <!---
 8 | Delete all that do not apply:
 9 | -->
10 | 
11 |  * 🩹 Bug Fix
12 |  * 🦚 Feature
13 |  * 📙 Documentation
14 |  * 🦣 Legacy
15 |  * 🪣 Misc
16 | 
17 | <!---
18 | Mention the linked issue here.
19 | This will magically close the issue once the PR is merged.
20 | -->
21 | Closes # .
22 | 
23 | ## Changes proposed by this PR:
24 | 
25 | <!---
26 | Tell the reviewer What changed, Why, and How were you able to accomplish that?
27 | -->
28 | 
29 | ## Notes to reviewer:
30 | 
31 | <!---
32 | Leave a message to whoever is going to review this PR.
33 | Mainly, pointers to review the PR, and how they can test it.
34 | If things are still WIP or feedback on particular impl details
35 | are wanted, state them here too.
36 | -->
37 | 
38 | 
39 | ## 📜 Checklist
40 | 
41 |  * [ ] Works on the `./demo` sub directory
42 |  * [ ] Test coverage is excellent and passes
43 |  * [ ] Documentation is thorough
44 | 


--------------------------------------------------------------------------------
/.config/lingo.dic:
--------------------------------------------------------------------------------
 1 | 100
 2 | accessor/MS
 3 | API/MS
 4 | backend
 5 | bandaid/MS
 6 | Bitflag/MS
 7 | C++
 8 | cargo-spellcheck/M
 9 | checkable/MS
10 | clang/MS
11 | cli
12 | cmark
13 | commonmark
14 | CommonMark
15 | config
16 | Consumingly
17 | CXX
18 | dev
19 | divide/UBS
20 | Docopt
21 | ellipsize/GD
22 | emoji/MS
23 | enablement
24 | enqueue/GD
25 | enum
26 | fallbacks
27 | featureset/MS
28 | filesystem/MS
29 | hardcode/GD
30 | http/S
31 | hunspell/MS
32 | IETF
33 | iff
34 | io
35 | iterative/Y
36 | LanguageTool/MS
37 | lookups
38 | macOS
39 | metadata
40 | md
41 | multiline/S
42 | nix/MS
43 | NLP
44 | NlpRule/S
45 | pickable
46 | postfix/GD
47 | pre/MS
48 | proc_macro2/MS
49 | README
50 | recurse
51 | reflow
52 | Reflow/MS
53 | reflown
54 | reflow/MS
55 | roadmap/MS
56 | rustdoc/MS
57 | selectable
58 | spellcheck/M
59 | stateful/PY
60 | str
61 | stringly
62 | struct/MS
63 | TODO/MS
64 | TODO
65 | tokenization/MS
66 | tokenize/USXBMD
67 | tokenizer/MS
68 | toml
69 | tuple
70 | tuple/DSM
71 | undivide/UBS
72 | UTF-8
73 | whitespace/MS
74 | workspace/MS
75 | YOLO
76 | porject/MS
77 | 


--------------------------------------------------------------------------------
/demo/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! Fancy module docs are really helpful if they contain usage examples.
 2 | 
 3 | /// Pick option a also known as door #1.
 4 | pub fn a() {
 5 | 
 6 | }
 7 | 
 8 | 
 9 | #[doc = "Pick option b also known as door #2."]
10 | pub fn b() {
11 | 
12 | }
13 | 
14 | #[doc = r##"Pick option c also known as door #3."##]
15 | pub fn c() {
16 | 
17 | }
18 | 
19 | #[doc = r#"Risk not ya ting?"#]
20 | pub fn take_the_money_and_leave() {
21 | 
22 | }
23 | 
24 | 
25 | /// Possible ways to run rustc and request various parts of LTO.
26 | ///
27 | /// Variant            | Flag                   | Object Code | Bitcode
28 | /// -------------------|------------------------|-------------|--------
29 | /// `Run`              | `-C lto=foo`           | n/a         | n/a
30 | /// `Off`              | `-C lto=off`           | n/a         | n/a
31 | /// `OnlyBitcode`      | `-C linker-plugin-lto` |             | ✓
32 | /// `ObjectAndBitcode` |                        | ✓           | ✓
33 | /// `OnlyObject`       | `-C embed-bitcode=no`  | ✓           |
34 | pub fn exploding_complexity() {
35 | 
36 | }
37 | 


--------------------------------------------------------------------------------
/doc-chunks/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "doc-chunks"
 3 | version = "0.2.1"
 4 | authors = ["Bernhard Schuster <bernhard@ahoi.io>"]
 5 | edition = "2021"
 6 | rust-version = "1.57.0"
 7 | repository = "https://github.com/drahnr/cargo-spellcheck.git"
 8 | homepage = "https://github.com/drahnr/cargo-spellcheck"
 9 | license = "MIT OR Apache-2.0"
10 | keywords = ["documentation", "chunks", "cluster"]
11 | description = "Clusters of doc comments and dev comments as coherent view."
12 | readme = "README.md"
13 | 
14 | [dependencies]
15 | console = "0.15"
16 | fs-err = "2"
17 | indexmap = { version = "2", features = ["rayon", "serde"] }
18 | itertools = "0.12"
19 | lazy_static = "1"
20 | memchr = "2"
21 | log = "0.4"
22 | proc-macro2 = { version = "1", features = ["span-locations"] }
23 | pulldown-cmark = "0.11"
24 | ra_ap_syntax = "0.0.269"
25 | rayon = "1.5"
26 | fancy-regex = "0.13"
27 | regex = "1"
28 | serde = { version = "1", features = ["derive"] }
29 | syn = { version = "2", features = ["full"] }
30 | thiserror = "1"
31 | toml = "0.8.2"
32 | 
33 | [dev-dependencies]
34 | assert_matches = "1"
35 | env_logger = "0.11"
36 | 


--------------------------------------------------------------------------------
/docs/checkers.md:
--------------------------------------------------------------------------------
 1 | # Checkers
 2 | 
 3 | Available checker support
 4 | 
 5 | ## Hunspell
 6 | 
 7 | Requires a C++ compiler to compile the hunspell CXX source files which are part
 8 | of `hunspell-sys`
 9 | 
10 | ### Fedora 30+
11 | 
12 | ```sh
13 | dnf install -y clang
14 | ```
15 | 
16 | ### Ubuntu 19.10+
17 | 
18 | ```sh
19 | apt install -y clang
20 | ```
21 | 
22 | ### Mac OS X
23 | 
24 | ```sh
25 | brew install llvm
26 | ```
27 | 
28 | The environment variable `LLVM_CONFIG_PATH` needs to point to `llvm-config`, to
29 | do so:
30 | 
31 | ```sh
32 | export LLVM_CONFIG_PATH=/usr/local/opt/llvm/bin/llvm-config
33 | ```
34 | 
35 | ## NlpRules
36 | 
37 | When compiled with the default featureset which includes `nlprules`, the
38 | resulting binary can only be distributed under the [`LGPLv2.1`](./LICENSE-LGPL)
39 | since the `rules` and `tokenizer` definitions are extracted from `LanguageTool`
40 | (which is itself licensed under [`LGPLv2.1`](./LICENSE-LGPL)) as described by
41 | the library that is used for pulling and integrating - details are to be found
42 | under [crate `nlprule`'s
43 | README.md](https://github.com/bminixhofer/nlprule#license).
44 | 
45 | 


--------------------------------------------------------------------------------
/doc-chunks/src/testcase.rs:
--------------------------------------------------------------------------------
 1 | use crate::{Span, TrimmedLiteral};
 2 | 
 3 | pub fn annotated_literals_raw(source: &str) -> impl Iterator<Item = proc_macro2::Literal> + '_ {
 4 |     let stream = syn::parse_str::<proc_macro2::TokenStream>(source).expect("Must be valid rust");
 5 |     stream
 6 |         .into_iter()
 7 |         .filter_map(|x| {
 8 |             if let proc_macro2::TokenTree::Group(group) = x {
 9 |                 Some(group.stream().into_iter())
10 |             } else {
11 |                 None
12 |             }
13 |         })
14 |         .flatten()
15 |         .filter_map(|x| {
16 |             if let proc_macro2::TokenTree::Literal(literal) = x {
17 |                 Some(literal)
18 |             } else {
19 |                 None
20 |             }
21 |         })
22 | }
23 | 
24 | pub fn annotated_literals(source: &str) -> Vec<TrimmedLiteral> {
25 |     annotated_literals_raw(source)
26 |         .map(|literal| {
27 |             let span = Span::from(literal.span());
28 |             TrimmedLiteral::load_from(source, span)
29 |                 .expect("Literals must be convertable to trimmed literals")
30 |         })
31 |         .collect()
32 | }
33 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | MIT LICENSE
 2 | 
 3 | Copyright (c) 2020 Bernhard Schuster <bernhard+licensing@ahoi.io>
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.


--------------------------------------------------------------------------------
/.config/spellcheck.toml:
--------------------------------------------------------------------------------
 1 | [Hunspell]
 2 | # lang and name of `.dic` file
 3 | lang = "en_US"
 4 | 
 5 | search_dirs = ["."]
 6 | extra_dictionaries = ["lingo.dic"]
 7 | 
 8 | skip_os_lookups = true
 9 | use_builtin = true
10 | 
11 | [ZSpell]
12 | # lang and name of `.dic` file
13 | lang = "en_US"
14 | 
15 | search_dirs = ["."]
16 | extra_dictionaries = ["lingo.dic"]
17 | 
18 | skip_os_lookups = true
19 | use_builtin = true
20 | 
21 | [Spellbook]
22 | # lang and name of `.dic` file
23 | lang = "en_US"
24 | 
25 | search_dirs = ["."]
26 | extra_dictionaries = ["lingo.dic"]
27 | 
28 | skip_os_lookups = true
29 | use_builtin = true
30 | 
31 | 
32 | [Hunspell.quirks]
33 | # transforms words that are provided by the tokenizer
34 | # into word fragments based on the capture groups which are to be checked.
35 | # If no capture groups are present, the matched word is whitelisted.
36 | transform_regex = [
37 |   "^'([^\\s])'$",
38 |   "^[0-9]+x$",
39 |   "^\\#[0-9]+$",
40 |   "^[0-9]+$",
41 |   "^.+\\+$",
42 |   "\\+",
43 | ]
44 | # accepts `alphabeta` variants if the checker provides a replacement suggestion
45 | # of `alpha-beta`.
46 | allow_concatenation = true
47 | allow_dashed = true
48 | 
49 | [Reflow]
50 | 
51 | max_line_length = 80
52 | 


--------------------------------------------------------------------------------
/demo/src/nested/mod.rs:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | mod justone;
 4 | mod justtwo;
 5 | mod fragments;
 6 | mod again;
 7 | 
 8 | /// Nested;
 9 | struct Nest;
10 | 
11 | /// Overly long statements that should be reflown since they are __very__ long and exceed the line limit.
12 | ///
13 | /// This struct has a lot of documentation but unfortunately, the lines are just too long.
14 | struct SomeLong {
15 |     /// This member is interesting though since it has some indentation. These whitespaces must be kept.
16 |     member: i8,
17 |     #[ doc = "This member is interesting though since it has some indentation. These whitespaces must be kept."]
18 |     sec: i8,
19 |     #[doc=r###"And a different interesting thing
20 |     because we have a random newline here?!"###]
21 |     third: String,
22 | }
23 | 
24 | /// A long documentation which is short enough for two lines
25 | /// but too long for one line.
26 | struct TooLong;
27 | 
28 | /// And these lines are too short so they become just two lines
29 | /// instead of three, as it was
30 | /// initially.
31 | struct TooShort;
32 | 
33 | #[ doc = "A long comment which we wanna reflow. So it's Saturday, are you having any plans for tonight?" ]
34 | struct Someodo;
35 | 
36 | #[ doc= r#"A long comment which we wanna reflow. So it's Saturday, are you having any plans for 
37 |            tonight? We're gonna end up with three lines here I think."#]
38 | struct AnotherSomeodo;
39 | 
40 | #[ doc= r#"A long short
41 | comment which we wanna reflow
42 | to one line."#]
43 | struct AnotherSomeodo2;
44 | 


--------------------------------------------------------------------------------
/docs/features.md:
--------------------------------------------------------------------------------
 1 | # Implemented Features + Roadmap
 2 | 
 3 | * [x] Parse doc comments from arbitrary files
 4 | * [x] Decent error printing
 5 | * [x] `cargo-spellcheck check`
 6 | * [x] Spell checking using `hunspell`
 7 | * [x] Merge multiline doc comments
 8 | * [x] Handle multiline and fragmented mistakes (i.e. for grammar) [#25](https://github.com/drahnr/cargo-spellcheck/issues/25)
 9 | * [x] Grammar check using [`nlprule`](https://github.com/bminixhofer/nlprule)
10 | * [x] Follow module declarations rather than blindly recurse
11 | * [x] Be `commonmark`/`markdown` aware
12 |   * [ ] Handle doc-tests with ` ```rust` as virtual files [#43](https://github.com/drahnr/cargo-spellcheck/issues/43)
13 |   * [ ] Verify all types of links [#44](https://github.com/drahnr/cargo-spellcheck/issues/44)
14 | * [x] Check `README.md` files [#37](https://github.com/drahnr/cargo-spellcheck/issues/37)
15 | * [x] Improve interactive user interface with `crossterm`
16 | * [x] Ellipsize overly long statements with `...` [#42](https://github.com/drahnr/cargo-spellcheck/issues/42)
17 | * [ ] Learn topic lingo and filter false-positive-suggestions [#41](https://github.com/drahnr/cargo-spellcheck/issues/41)
18 | * [x] Handle cargo workspaces [#38](https://github.com/drahnr/cargo-spellcheck/issues/38)
19 | * [x] Re-flow doc comments [#39](https://github.com/drahnr/cargo-spellcheck/issues/39)
20 | * [x] Collect dev comments as well [#115](https://github.com/drahnr/cargo-spellcheck/issues/115)
21 | 
22 | `hunspell` (dictionary based lookups) and `nlprules` (static grammar rules,
23 | derived from `languagetool`) are currently the two supported checkers.
24 | 
25 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"configurations": [
 3 |         {
 4 |             "type": "lldb",
 5 |             "request": "launch",
 6 |             "name": "Debug binary in executable 'cargo-spellcheck'",
 7 |             "cargo": {
 8 |                 "args": [
 9 |                     "build",
10 |                     "--bin=cargo-spellcheck",
11 |                     "--package=cargo-spellcheck"
12 |                 ],
13 |                 "filter": {
14 |                     "name": "cargo-spellcheck",
15 |                     "kind": "bin"
16 |                 }
17 |             },
18 |             "env": {
19 |                 "RUST_BACKTRACE":"full"
20 |             },
21 |             "args": ["--", "spellcheck", "-vvvvv", "demo"],
22 |             //"args": ["config", "-vvvvv", "--cfg=xxx.toml"],
23 |             "cwd": "${workspaceFolder}"
24 |         },
25 |         {
26 |             "type": "lldb",
27 |             "request": "launch",
28 |             "name": "Debug unit tests in executable 'cargo-spellcheck'",
29 |             "cargo": {
30 |                 "args": [
31 |                     "test",
32 |                     "--no-run",
33 |                     "--bin=cargo-spellcheck",
34 |                     "--package=cargo-spellcheck",
35 |                     "combine_literals",
36 |                     "--",
37 |                     "--nocapture"
38 |                 ],
39 |                 "filter": {
40 |                     "name": "cargo-spellcheck",
41 |                     "kind": "bin"
42 |                 }
43 |             },
44 |             "env": {
45 |                 "RUST_LOG":"cargo_spellcheck=trace",
46 |                 "RUST_BACKTRACE":"1"
47 |             },
48 |             "args": [],
49 |             "cwd": "${workspaceFolder}"
50 |         }
51 | 	]
52 | }
53 | 


--------------------------------------------------------------------------------
/docs/automation.md:
--------------------------------------------------------------------------------
 1 | # Automation of `cargo-spellcheck`
 2 | 
 3 | ## CI/CD
 4 | 
 5 | `cargo-spellcheck` can be configured with `--code <code>` to return a non-zero
 6 | return code if mistakes are found instead of `0`.
 7 | 
 8 | ### GitHub Actions
 9 | 
10 | [Create a workflow](https://docs.github.com/en/actions/quickstart) for your project and add the following example as steps.
11 | 
12 | The first step installs cargo-spellcheck on the runner.
13 | The second step loads your source code into the runner environment.
14 | The third step runs a command in a shell like you would normally do with cargo spellcheck.
15 | Specify your arguments as needed.
16 | 
17 | ```yaml
18 | - name: Install cargo-spellcheck
19 |   uses: taiki-e/install-action@v2
20 |   with:
21 |     tool: cargo-spellcheck
22 |     
23 | - uses: actions/checkout@v3
24 | 
25 | - name: Run cargo-spellcheck
26 |   run: cargo spellcheck --code 1
27 | ```
28 | 
29 | ### Other
30 | 
31 | Install `cargo-spellcheck` via [`cargo-binstall`](https://github.com/cargo-bins/cargo-binstall) and then use it like you would locally.
32 | Alternatively you can use `cargo install cargo-spellcheck` to compile it from source.
33 | 
34 | ```bash
35 | cargo binstall --no-confirm cargo-spellcheck
36 | 
37 | cargo-spellcheck --code 1
38 | ```
39 | 
40 | ## Git hooks
41 | 
42 | If you want to manually configure `cargo-spellcheck` to run on git commits:
43 | 
44 | ```bash
45 | #!/usr/bin/env bash
46 | 
47 | # Redirect output to stderr.
48 | exec 1>&2
49 | 
50 | exec cargo spellcheck --code 99 $(git diff-index --cached --name-only --diff-filter=AM HEAD)
51 | ```
52 | 
53 | Alternatively you can use [`pre-commit`](https://pre-commit.com/) to manage your git commit hooks
54 | for you. This can be done by appending these lines to `.pre-commit-config.yaml` in your project:
55 | 
56 | ```yaml
57 | - repo: https://github.com/drahnr/cargo-spellcheck.git
58 |   rev: master
59 |   - id: cargo-spellcheck
60 | 
61 | ```
62 | 
63 | You will need to install the hooks running `pre-commit install-hooks` and `cargo-spellcheck` will
64 | get installed and wired up as a git commit hook for you.
65 | 


--------------------------------------------------------------------------------
/src/config/regex.rs:
--------------------------------------------------------------------------------
 1 | use super::*;
 2 | 
 3 | #[derive(Debug)]
 4 | pub struct WrappedRegex(pub Regex);
 5 | 
 6 | impl Clone for WrappedRegex {
 7 |     fn clone(&self) -> Self {
 8 |         // @todo inefficient.. but right now this should almost never happen
 9 |         // @todo implement a lazy static `Arc<Mutex<HashMap<&'static str,Regex>>`
10 |         Self(Regex::new(self.as_str()).unwrap())
11 |     }
12 | }
13 | 
14 | impl std::ops::Deref for WrappedRegex {
15 |     type Target = Regex;
16 |     fn deref(&self) -> &Self::Target {
17 |         &self.0
18 |     }
19 | }
20 | 
21 | impl std::convert::AsRef<Regex> for WrappedRegex {
22 |     fn as_ref(&self) -> &Regex {
23 |         &self.0
24 |     }
25 | }
26 | 
27 | impl Serialize for WrappedRegex {
28 |     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
29 |     where
30 |         S: serde::ser::Serializer,
31 |     {
32 |         serializer.serialize_str(self.as_str())
33 |     }
34 | }
35 | 
36 | impl<'de> Deserialize<'de> for WrappedRegex {
37 |     fn deserialize<D>(deserializer: D) -> Result<WrappedRegex, D::Error>
38 |     where
39 |         D: serde::de::Deserializer<'de>,
40 |     {
41 |         deserializer
42 |             .deserialize_any(RegexVisitor)
43 |             .map(WrappedRegex::from)
44 |     }
45 | }
46 | 
47 | impl From<WrappedRegex> for Regex {
48 |     fn from(val: WrappedRegex) -> Self {
49 |         val.0
50 |     }
51 | }
52 | 
53 | impl From<Regex> for WrappedRegex {
54 |     fn from(other: Regex) -> WrappedRegex {
55 |         WrappedRegex(other)
56 |     }
57 | }
58 | 
59 | struct RegexVisitor;
60 | 
61 | impl<'de> serde::de::Visitor<'de> for RegexVisitor {
62 |     type Value = Regex;
63 | 
64 |     fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
65 |         formatter.write_str("String with valid regex expression")
66 |     }
67 | 
68 |     fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
69 |     where
70 |         E: serde::de::Error,
71 |     {
72 |         let re = Regex::new(value).map_err(E::custom)?;
73 |         Ok(re)
74 |     }
75 | 
76 |     fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
77 |     where
78 |         E: serde::de::Error,
79 |     {
80 |         self.visit_str::<E>(value.as_str())
81 |     }
82 | }
83 | 


--------------------------------------------------------------------------------
/src/checker/dummy.rs:
--------------------------------------------------------------------------------
 1 | //! Everything is wrong, so wrong, even if it's correct.
 2 | //!
 3 | //! A test checker, only available for unit tests.
 4 | 
 5 | // use super::tokenize;
 6 | use super::{apply_tokenizer, Checker};
 7 | 
 8 | use crate::suggestion::{Detector, Suggestion};
 9 | use crate::util::sub_chars;
10 | use crate::{errors::*, CheckableChunk, ContentOrigin};
11 | 
12 | /// A test checker that tokenizes and marks everything as wrong
13 | pub struct DummyChecker;
14 | 
15 | impl DummyChecker {
16 |     pub fn new(_config: &<Self as Checker>::Config) -> Result<Self> {
17 |         Ok(Self)
18 |     }
19 | }
20 | 
21 | impl Checker for DummyChecker {
22 |     type Config = ();
23 | 
24 |     fn detector() -> Detector {
25 |         Detector::Dummy
26 |     }
27 | 
28 |     fn check<'a, 's>(
29 |         &self,
30 |         origin: &ContentOrigin,
31 |         chunks: &'a [CheckableChunk],
32 |     ) -> Result<Vec<Suggestion<'s>>>
33 |     where
34 |         'a: 's,
35 |     {
36 |         let tokenizer = super::tokenizer::<&std::path::PathBuf>(None)?;
37 | 
38 |         let mut acc = Vec::with_capacity(chunks.len());
39 |         let chunk = chunks
40 |             .first()
41 |             .expect("DummyChecker expects at least one chunk");
42 |         let plain = chunk.erase_cmark(&Default::default());
43 |         let txt = plain.as_str();
44 |         for (index, range) in apply_tokenizer(&tokenizer, txt).enumerate() {
45 |             log::trace!("****Token[{}]: >{}<", index, sub_chars(txt, range.clone()));
46 |             let detector = Detector::Dummy;
47 |             let range2span = plain.find_spans(range.clone());
48 |             for (range, span) in range2span {
49 |                 log::trace!(
50 |                     "Suggestion for {:?} -> {}",
51 |                     range,
52 |                     chunk.display(range.clone())
53 |                 );
54 |                 let replacements = vec![format!("replacement_{index}")];
55 |                 let suggestion = Suggestion {
56 |                     detector,
57 |                     span,
58 |                     range,
59 |                     origin: origin.clone(),
60 |                     replacements,
61 |                     chunk,
62 |                     description: None,
63 |                 };
64 |                 acc.push(suggestion);
65 |             }
66 |         }
67 |         Ok(acc)
68 |     }
69 | }
70 | 


--------------------------------------------------------------------------------
/cliff.toml:
--------------------------------------------------------------------------------
 1 | # configuration file for git-cliff (0.1.0)
 2 | 
 3 | [changelog]
 4 | # changelog header
 5 | header = """
 6 | # Changelog
 7 | All notable changes to this project will be documented in this file.\n
 8 | """
 9 | # template for the changelog body
10 | # https://tera.netlify.app/docs/#introduction
11 | body = """
12 | {% if version %}\
13 |     ## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
14 | {% else %}\
15 |     ## [unreleased]
16 | {% endif %}\
17 | {% for group, commits in commits | group_by(attribute="group") %}
18 |     ### {{ group | upper_first }}
19 |     {% for commit in commits %}
20 |         - {% if commit.breaking %}[**breaking**] {% endif %}{{ commit.message | upper_first }}\
21 |     {% endfor %}
22 | {% endfor %}\n
23 | """
24 | # remove the leading and trailing whitespaces from the template
25 | trim = true
26 | # changelog footer
27 | footer = """
28 | <!-- generated by git-cliff -->
29 | """
30 | 
31 | [git]
32 | # parse the commits based on https://www.conventionalcommits.org
33 | conventional_commits = true
34 | # filter out the commits that are not conventional
35 | filter_unconventional = true
36 | # regex for parsing and grouping commits
37 | commit_parsers = [
38 |   { message = "^feat", group = "Features" },
39 |   { message = "^fix", group = "Bug Fixes" },
40 |   { message = "^doc", group = "Documentation" },
41 |   { message = "^perf", group = "Performance" },
42 |   { message = "^refactor", group = "Refactor" },
43 |   { message = "^style", group = "Styling" },
44 |   { message = "^test", group = "Testing" },
45 |   { message = "^chore\\(release\\): prepare for", skip = true },
46 |   { message = "^(chore[/:]\\s*)?(cargo\\s+)?(fmt|fix|clippy|spellcheck)", skip = true },
47 |   { message = "^[vV]?0\\.[0-9]\\.[0-9]+", skip = true },
48 |   { message = "^\\(cargo-release\\)", skip = true },
49 |   { message = "^(chore/)?rele?ase:", skip = true },
50 |   { message = "^chore", group = "Miscellaneous Tasks" },
51 |   { body = ".*security", group = "Security" },
52 | ]
53 | # filter out the commits that are not matched by commit parsers
54 | filter_commits = false
55 | # glob pattern for matching git tags
56 | tag_pattern = "v[0-9]*"
57 | # regex for ignoring tags
58 | ignore_tags = ""
59 | # sort the tags topologically
60 | topo_order = false
61 | # sort the commits inside sections by oldest/newest order
62 | sort_commits = "oldest"
63 | 


--------------------------------------------------------------------------------
/tests/signal_handler.rs:
--------------------------------------------------------------------------------
 1 | #![cfg(target_os = "linux")]
 2 | 
 3 | use nix::sys::signal::*;
 4 | use nix::sys::wait::*;
 5 | use nix::unistd::Pid;
 6 | use nix::unistd::{fork, ForkResult};
 7 | 
 8 | use cargo_spellcheck::{signal_handler, TinHat};
 9 | 
10 | #[test]
11 | fn signal_handler_works() -> Result<(), Box<dyn std::error::Error + 'static>> {
12 |     let _ = env_logger::Builder::new()
13 |         .filter_level(log::LevelFilter::Trace)
14 |         .is_test(true)
15 |         .try_init();
16 | 
17 |     println!("Signal handler check");
18 | 
19 |     const QUIT: Signal = Signal::SIGQUIT;
20 | 
21 |     let sigs = {
22 |         let mut sigs = SigSet::empty();
23 |         sigs.add(QUIT);
24 |         sigs
25 |     };
26 | 
27 |     // best effort unblock
28 |     let _ = sigprocmask(SigmaskHow::SIG_UNBLOCK, Some(&sigs), None);
29 |     let _ = pthread_sigmask(SigmaskHow::SIG_UNBLOCK, Some(&sigs), None);
30 | 
31 |     if let Ok(ForkResult::Parent { child, .. }) = unsafe { fork() } {
32 |         println!("[parent] Wait for child");
33 | 
34 |         loop {
35 |             let options = WaitPidFlag::WNOHANG;
36 |             match nix::sys::wait::waitpid(child, Some(options)) {
37 |                 Ok(WaitStatus::StillAlive) => {
38 |                     std::thread::sleep(std::time::Duration::from_millis(50));
39 |                     continue;
40 |                 }
41 |                 Ok(WaitStatus::Signaled(_pid, signal, _core_dump)) => {
42 |                     assert_eq!(signal, QUIT);
43 |                     unreachable!("Should exit via exit. qed")
44 |                 }
45 |                 Ok(WaitStatus::Exited(_pid, _exit_code)) => {
46 |                     return Ok(());
47 |                 }
48 |                 Ok(ws) => unreachable!("Unexpected wait status: {ws:?}"),
49 |                 Err(errno) => unreachable!("Did not expect an error: {errno:?}"),
50 |             }
51 |         }
52 |     } else {
53 |         signal_handler(|| {});
54 | 
55 |         // signal while blocking signals
56 |         {
57 |             let hat = TinHat::on();
58 |             println!("[child] Raise signal");
59 | 
60 |             kill(Pid::this(), QUIT).unwrap();
61 | 
62 |             std::thread::sleep(std::time::Duration::from_millis(1));
63 |             drop(hat);
64 |         }
65 | 
66 |         std::thread::sleep(std::time::Duration::from_secs(10_000));
67 |         unreachable!("[child] Signal handler exits before panic.");
68 |     }
69 | }
70 | 


--------------------------------------------------------------------------------
/src/tinhat.rs:
--------------------------------------------------------------------------------
 1 | //! Tinhat
 2 | //!
 3 | //! Makes sure the cosmic signals don't meddle with IO that's in progress.
 4 | //!
 5 | //! ```
 6 | //! # use cargo_spellcheck::TinHat;
 7 | //! let th = TinHat::on();
 8 | //! // do IO
 9 | //! drop(th);
10 | //! ```
11 | 
12 | use std::sync::atomic::{AtomicBool, AtomicU16, Ordering};
13 | 
14 | #[cfg(not(target_os = "windows"))]
15 | use signal_hook::{
16 |     consts::signal::{SIGINT, SIGQUIT, SIGTERM},
17 |     iterator,
18 | };
19 | 
20 | /// Global atomic to block signal processing while a file write is currently in
21 | /// progress.
22 | static WRITE_IN_PROGRESS: AtomicU16 = AtomicU16::new(0);
23 | /// Delay if the signal handler is currently running.
24 | static SIGNAL_HANDLER_AT_WORK: AtomicBool = AtomicBool::new(false);
25 | 
26 | /// Handle incoming signals.
27 | ///
28 | /// Only relevant for *-nix platforms.
29 | #[cfg(not(target_os = "windows"))]
30 | pub fn signal_handler<F>(fx: F)
31 | where
32 |     F: FnOnce() + Send + 'static,
33 | {
34 |     let mut signals =
35 |         iterator::Signals::new([SIGTERM, SIGINT, SIGQUIT]).expect("Failed to create Signals");
36 | 
37 |     std::thread::spawn(move || {
38 |         for s in signals.forever() {
39 |             match s {
40 |                 SIGTERM | SIGINT | SIGQUIT => {
41 |                     SIGNAL_HANDLER_AT_WORK.store(true, Ordering::SeqCst);
42 |                     // Wait for potential writing to disk to be finished.
43 |                     while WRITE_IN_PROGRESS.load(Ordering::Acquire) > 0 {
44 |                         std::hint::spin_loop();
45 |                         std::thread::yield_now();
46 |                     }
47 |                     fx();
48 |                     signal_hook::low_level::exit(130);
49 |                 }
50 |                 sig => log::warn!("Received unhandled signal {sig}, ignoring"),
51 |             }
52 |         }
53 |     });
54 | }
55 | 
56 | /// Blocks (UNIX) signals.
57 | pub struct TinHat;
58 | 
59 | impl TinHat {
60 |     /// Put the tin hat on, and only allow signals being processed once it's
61 |     /// dropped.
62 |     pub fn on() -> Self {
63 |         // If there is a signal handler in progress, block.
64 |         while SIGNAL_HANDLER_AT_WORK.load(Ordering::Acquire) {
65 |             std::hint::spin_loop();
66 |             std::thread::yield_now();
67 |         }
68 |         let _ = WRITE_IN_PROGRESS.fetch_add(1, Ordering::Release);
69 |         Self
70 |     }
71 | }
72 | 
73 | impl Drop for TinHat {
74 |     fn drop(&mut self) {
75 |         let _ = WRITE_IN_PROGRESS.fetch_sub(1, Ordering::Release);
76 |     }
77 | }
78 | 


--------------------------------------------------------------------------------
/docs/remedy.md:
--------------------------------------------------------------------------------
 1 | # Fixing spelling mistakes
 2 | 
 3 | While cargo-spellcheck is good at _pointing out_ existing spellchecks,
 4 | it's sometimes not obvious how to resolve them or what the correct way is
 5 | to resolve them.
 6 | 
 7 | The following covers an abstracted set of commonly encountered `cargo spellcheck`
 8 | complaints and how to resolve them:
 9 | 
10 | ## Configuration
11 | 
12 | Make sure your runs are idempotent if you run on two different systems,
13 | which is easiest achieved by using the builtin affix and dictionaries
14 | besides the topic specifc lingo dictionary that should come with your project.
15 | 
16 | ```toml
17 | # .config/spellcheck.toml
18 | 
19 | [Hunspell]
20 | # snip
21 | skip_os_lookups = true
22 | use_builtin = true
23 | # snip
24 | ```
25 | 
26 | ---
27 | 
28 | Avoiding `nlprule` backend by passing `--checkers=hunspell` might be a good idea,
29 | since `nlprule` tends to have a few false positives.
30 | 
31 | ## Examples
32 | 
33 | ### Missing word variants
34 | 
35 | Sometimes some word forms belong into topic specific lingo and as such should be added to
36 | the topic specific dictionary. Make use of suffix patterns such as `/S` for plural `s` and `/M` for `'s`. This will keep your dictionary to a minimum. Please check the [affix file included here](./hunspell-data/en_US.aff) or your OS'  provided affix file.
37 | [It is required to understand the slightly arkane format of `.aff` and `.dic` files.](https://www.systutorials.com/docs/linux/man/4-hunspell/#lbAE) which is also available via `man 4 hunspell`.
38 | 
39 | ### Types in doc comments
40 | 
41 | ```raw
42 | lib.rs : 2
43 |  858 |  See [MmrLeafVersion] type documentation for more details.
44 |      |       ^^^^^^^^^^^^^^
45 |      |   Possible spelling mistake found.
46 | ```
47 | 
48 | can be resolved by using
49 | 
50 | ```md
51 | [`MmrLeafVersion`]
52 | ```
53 | 
54 | with additional ticks.
55 | 
56 | This is a general pattern for _types_ that make an appearence in the doc comments.
57 | 
58 | ### Patterns
59 | 
60 | In some cases it's a pattern one wants to whitelist, such `10x` or `117x` which can be done via
61 | the configuration adding a allowlist regex `^[0-9]+x$`.
62 | 
63 | 
64 | ### TODO, XXX, and FIXME
65 | 
66 | Should not be present in doc comments, but only make it into developer comments, i.e. `// FIXME foo` or `/* FIXME foo */`
67 | 
68 | ### markdown: autolink
69 | 
70 | 
71 | ```raw
72 | error: spellcheck(Hunspell)
73 |    --> test.md:96
74 |     |
75 |  96 | The test coverage in `lcov` can the be published to <codecov.io>.
76 |     |                                                      ^^^^^^^
77 |     | - codec
78 |     |
79 |     |   Possible spelling mistake found.
80 | ```
81 | 
82 | will spellcheck all components of the url, since it is not a _valid_ autolink. Add the protocol type.
83 | 
84 | ```md
85 | <https://codecov.io>
86 | ```
87 | 
88 | and the content will be omitted from spellchecking.
89 | 


--------------------------------------------------------------------------------
/src/config/search_dirs.rs:
--------------------------------------------------------------------------------
  1 | use super::*;
  2 | 
  3 | /// Obtain OS specific search directories.
  4 | fn os_specific_search_dirs() -> &'static [PathBuf] {
  5 |     lazy_static::lazy_static! {
  6 |         static ref OS_SPECIFIC_LOOKUP_DIRS: Vec<PathBuf> =
  7 |             if cfg!(target_os = "macos") {
  8 |                 directories::BaseDirs::new()
  9 |                     .map(|base| vec![base.home_dir().to_owned().join("/Library/Spelling/"), PathBuf::from("/Library/Spelling/")])
 10 |                     .unwrap_or_default()
 11 |             } else if cfg!(target_os = "linux") {
 12 |                 vec![
 13 |                     // Fedora
 14 |                     PathBuf::from("/usr/share/myspell/"),
 15 |                     PathBuf::from("/usr/share/hunspell/"),
 16 |                     // Arch Linux
 17 |                     PathBuf::from("/usr/share/myspell/dicts/"),
 18 |                 ]
 19 |             } else {
 20 |                 Vec::new()
 21 |             };
 22 | 
 23 |     }
 24 |     OS_SPECIFIC_LOOKUP_DIRS.as_slice()
 25 | }
 26 | 
 27 | /// A collection of search directories. OS specific paths are only provided in
 28 | /// the iterator.
 29 | #[derive(Debug, Clone)]
 30 | pub struct SearchDirs(pub Vec<PathBuf>);
 31 | 
 32 | impl Default for SearchDirs {
 33 |     fn default() -> Self {
 34 |         Self(Vec::with_capacity(8))
 35 |     }
 36 | }
 37 | 
 38 | impl SearchDirs {
 39 |     pub fn iter(&self, extend_by_os: bool) -> impl Iterator<Item = &PathBuf> {
 40 |         let chained = if extend_by_os {
 41 |             os_specific_search_dirs().iter()
 42 |         } else {
 43 |             [].iter()
 44 |         };
 45 |         self.0.iter().chain(chained)
 46 |     }
 47 | }
 48 | 
 49 | impl std::convert::AsRef<Vec<PathBuf>> for SearchDirs {
 50 |     fn as_ref(&self) -> &Vec<PathBuf> {
 51 |         &self.0
 52 |     }
 53 | }
 54 | 
 55 | impl Serialize for SearchDirs {
 56 |     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
 57 |     where
 58 |         S: serde::ser::Serializer,
 59 |     {
 60 |         serializer.serialize_newtype_struct("SearchDirs", &self.0)
 61 |     }
 62 | }
 63 | 
 64 | impl<'de> Deserialize<'de> for SearchDirs {
 65 |     fn deserialize<D>(deserializer: D) -> Result<SearchDirs, D::Error>
 66 |     where
 67 |         D: serde::de::Deserializer<'de>,
 68 |     {
 69 |         deserializer
 70 |             .deserialize_newtype_struct("SearchDirs", SearchDirVisitor)
 71 |             .map(Into::into)
 72 |     }
 73 | }
 74 | 
 75 | impl From<SearchDirs> for Vec<PathBuf> {
 76 |     fn from(val: SearchDirs) -> Self {
 77 |         val.0
 78 |     }
 79 | }
 80 | 
 81 | impl From<Vec<PathBuf>> for SearchDirs {
 82 |     fn from(other: Vec<PathBuf>) -> SearchDirs {
 83 |         SearchDirs(other)
 84 |     }
 85 | }
 86 | 
 87 | /// A search directory visitor, auto extending the search directory with OS
 88 | /// defaults.
 89 | struct SearchDirVisitor;
 90 | 
 91 | impl<'de> serde::de::Visitor<'de> for SearchDirVisitor {
 92 |     type Value = Vec<PathBuf>;
 93 | 
 94 |     fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
 95 |         formatter.write_str("Search Dir Visitors must be an optional sequence of path")
 96 |     }
 97 | 
 98 |     fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
 99 |     where
100 |         D: serde::de::Deserializer<'de>,
101 |     {
102 |         let seq = deserializer.deserialize_seq(self)?;
103 |         Ok(seq)
104 |     }
105 | 
106 |     fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
107 |     where
108 |         A: serde::de::SeqAccess<'de>,
109 |     {
110 |         let mut v = Vec::with_capacity(8);
111 |         while let Some(item) = seq.next_element()? {
112 |             v.push(item);
113 |         }
114 |         Ok(v)
115 |     }
116 | }
117 | 


--------------------------------------------------------------------------------
/hunspell-data/en_US.aff:
--------------------------------------------------------------------------------
  1 | SET UTF8
  2 | TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ'
  3 | NOSUGGEST !
  4 | 
  5 | # ordinal numbers
  6 | COMPOUNDMIN 1
  7 | # only in compounds: 1th, 2th, 3th
  8 | ONLYINCOMPOUND c
  9 | # compound rules:
 10 | # 1. [0-9]*1[0-9]th (10th, 11th, 12th, 56714th, etc.)
 11 | # 2. [0-9]*[02-9](1st|2nd|3rd|[4-9]th) (21st, 22nd, 123rd, 1234th, etc.)
 12 | COMPOUNDRULE 2
 13 | COMPOUNDRULE n*1t
 14 | COMPOUNDRULE n*mp
 15 | WORDCHARS 0123456789'
 16 | 
 17 | PFX A Y 1
 18 | PFX A   0     re         .
 19 | 
 20 | PFX I Y 1
 21 | PFX I   0     in         .
 22 | 
 23 | PFX U Y 1
 24 | PFX U   0     un         .
 25 | 
 26 | PFX C Y 1
 27 | PFX C   0     de          .
 28 | 
 29 | PFX E Y 1
 30 | PFX E   0     dis         .
 31 | 
 32 | PFX F Y 1
 33 | PFX F   0     con         .
 34 | 
 35 | PFX K Y 1
 36 | PFX K   0     pro         .
 37 | 
 38 | SFX V N 2
 39 | SFX V   e     ive        e
 40 | SFX V   0     ive        [^e]
 41 | 
 42 | SFX N Y 3
 43 | SFX N   e     ion        e
 44 | SFX N   y     ication    y 
 45 | SFX N   0     en         [^ey] 
 46 | 
 47 | SFX X Y 3
 48 | SFX X   e     ions       e
 49 | SFX X   y     ications   y
 50 | SFX X   0     ens        [^ey]
 51 | 
 52 | SFX H N 2
 53 | SFX H   y     ieth       y
 54 | SFX H   0     th         [^y] 
 55 | 
 56 | SFX Y Y 1
 57 | SFX Y   0     ly         .
 58 | 
 59 | SFX G Y 2
 60 | SFX G   e     ing        e
 61 | SFX G   0     ing        [^e] 
 62 | 
 63 | SFX J Y 2
 64 | SFX J   e     ings       e
 65 | SFX J   0     ings       [^e]
 66 | 
 67 | SFX D Y 4
 68 | SFX D   0     d          e
 69 | SFX D   y     ied        [^aeiou]y
 70 | SFX D   0     ed         [^ey]
 71 | SFX D   0     ed         [aeiou]y
 72 | 
 73 | SFX T N 4
 74 | SFX T   0     st         e
 75 | SFX T   y     iest       [^aeiou]y
 76 | SFX T   0     est        [aeiou]y
 77 | SFX T   0     est        [^ey]
 78 | 
 79 | SFX R Y 4
 80 | SFX R   0     r          e
 81 | SFX R   y     ier        [^aeiou]y
 82 | SFX R   0     er         [aeiou]y
 83 | SFX R   0     er         [^ey]
 84 | 
 85 | SFX Z Y 4
 86 | SFX Z   0     rs         e
 87 | SFX Z   y     iers       [^aeiou]y
 88 | SFX Z   0     ers        [aeiou]y
 89 | SFX Z   0     ers        [^ey]
 90 | 
 91 | SFX S Y 4
 92 | SFX S   y     ies        [^aeiou]y
 93 | SFX S   0     s          [aeiou]y
 94 | SFX S   0     es         [sxzh]
 95 | SFX S   0     s          [^sxzhy]
 96 | 
 97 | SFX P Y 3
 98 | SFX P   y     iness      [^aeiou]y
 99 | SFX P   0     ness       [aeiou]y
100 | SFX P   0     ness       [^y]
101 | 
102 | SFX M Y 1
103 | SFX M   0     's         .
104 | 
105 | SFX B Y 3
106 | SFX B   0     able       [^aeiou]
107 | SFX B   0     able       ee
108 | SFX B   e     able       [^aeiou]e
109 | 
110 | SFX L Y 1
111 | SFX L   0     ment       .
112 | 
113 | SFX i N 1
114 | SFX i   us    i          us
115 | 
116 | REP 90
117 | REP a ei
118 | REP ei a
119 | REP a ey
120 | REP ey a
121 | REP ai ie
122 | REP ie ai
123 | REP alot a_lot
124 | REP are air
125 | REP are ear
126 | REP are eir
127 | REP air are
128 | REP air ere
129 | REP ere air
130 | REP ere ear
131 | REP ere eir
132 | REP ear are
133 | REP ear air
134 | REP ear ere
135 | REP eir are
136 | REP eir ere
137 | REP ch te
138 | REP te ch
139 | REP ch ti
140 | REP ti ch
141 | REP ch tu
142 | REP tu ch
143 | REP ch s
144 | REP s ch
145 | REP ch k
146 | REP k ch
147 | REP f ph
148 | REP ph f
149 | REP gh f
150 | REP f gh
151 | REP i igh
152 | REP igh i
153 | REP i uy
154 | REP uy i
155 | REP i ee
156 | REP ee i
157 | REP j di
158 | REP di j
159 | REP j gg
160 | REP gg j
161 | REP j ge
162 | REP ge j
163 | REP s ti
164 | REP ti s
165 | REP s ci
166 | REP ci s
167 | REP k cc
168 | REP cc k
169 | REP k qu
170 | REP qu k
171 | REP kw qu
172 | REP o eau
173 | REP eau o
174 | REP o ew
175 | REP ew o
176 | REP oo ew
177 | REP ew oo
178 | REP ew ui
179 | REP ui ew
180 | REP oo ui
181 | REP ui oo
182 | REP ew u
183 | REP u ew
184 | REP oo u
185 | REP u oo
186 | REP u oe
187 | REP oe u
188 | REP u ieu
189 | REP ieu u
190 | REP ue ew
191 | REP ew ue
192 | REP uff ough
193 | REP oo ieu
194 | REP ieu oo
195 | REP ier ear
196 | REP ear ier
197 | REP ear air
198 | REP air ear
199 | REP w qu
200 | REP qu w
201 | REP z ss
202 | REP ss z
203 | REP shun tion
204 | REP shun sion
205 | REP shun cion
206 | REP sitted sat
207 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
  1 | [package]
  2 | name = "cargo-spellcheck"
  3 | version = "0.15.5"
  4 | authors = ["Bernhard Schuster <bernhard@ahoi.io>"]
  5 | edition = "2021"
  6 | rust-version = "1.85.0"
  7 | repository = "https://github.com/drahnr/cargo-spellcheck.git"
  8 | homepage = "https://github.com/drahnr/cargo-spellcheck"
  9 | license = "MIT OR Apache-2.0"
 10 | keywords = ["spellcheck", "spelling", "grammar"]
 11 | description = "Checks all doc comments for spelling mistakes"
 12 | readme = "README.md"
 13 | build = "build.rs"
 14 | include = [
 15 |   "nlprule-data/**/*.bin.xz",
 16 |   "hunspell-data/*",
 17 |   "src/**/*.rs",
 18 |   "Cargo.toml",
 19 |   "build.rs",
 20 |   "/LICENSE-*",
 21 |   "/README.md",
 22 |   "tests/**/*.rs",
 23 |   "CHANGELOG.md",
 24 | ]
 25 | 
 26 | [workspace]
 27 | members = ["./doc-chunks"]
 28 | 
 29 | 
 30 | [build-dependencies]
 31 | nlprule-build = { version = "=0.6.4", optional = true }
 32 | # compress the nlprule artifacts to be under the 10 MB limit
 33 | # that cargo enforces
 34 | xz2 = "0.1"
 35 | 
 36 | [dependencies]
 37 | 
 38 | doc-chunks = { version = "0.2.1", path = "./doc-chunks" }
 39 | 
 40 | color-eyre = "0.6"
 41 | cargo_toml = "0.21"
 42 | console = "0.15"
 43 | crossterm = "0.27"
 44 | # for the config file
 45 | directories = "5"
 46 | 
 47 | clap = { version = "4.1.8", features = ["derive", "env"] }
 48 | clap_complete = "4.1.4"
 49 | clap-verbosity-flag = "2.0"
 50 | 
 51 | env_logger = "0.11"
 52 | fancy-regex = "0.13"
 53 | fs-err = { version = "2", features = ["io_safety"] }
 54 | indexmap = { version = "2", features = ["rayon", "serde"] }
 55 | itertools = "0.12"
 56 | lazy_static = "1"
 57 | memchr = "2"
 58 | log = "0.4"
 59 | num_cpus = "1.13"
 60 | proc-macro2 = { version = "1", features = ["span-locations"] }
 61 | pulldown-cmark = "0.10"
 62 | rayon = "1.5"
 63 | regex = "1.5"
 64 | serde = { version = "1", features = ["derive"] }
 65 | signal-hook = "0.3"
 66 | syn = { version = "2", features = ["full"] }
 67 | thiserror = "1"
 68 | # for parsing and extracting elements from Cargo.toml
 69 | toml = "0.8"
 70 | glob = "0.3"
 71 | # for the config file
 72 | ignore = "0.4.18"
 73 | tokio = { version = "1", features = ["full", "rt-multi-thread"] }
 74 | futures = "0.3"
 75 | 
 76 | uuid = { version = "1.0.0", features = ["v4"] }
 77 | 
 78 | # config parsing, must be independent of features
 79 | 
 80 | # TODO parse the country codes of dictionaries?
 81 | iso_country = { version = "0.1", features = ["serde"] }
 82 | isolang = { version = "2", features = ["serde"] }
 83 | 
 84 | url = { version = "2", features = ["serde"] }
 85 | 
 86 | # dictionary lookup with affixes
 87 | hunspell-rs = { version = "0.4.0", optional = true }
 88 | fd-lock = { version = "4", optional = true }
 89 | encoding_rs = { version = "0.8.31", optional = true, features = [] }
 90 | zspell = { version = "0.5.5", optional = true }
 91 | spellbook = { version = "0.1", optional = true }
 92 | 
 93 | # full grammar check, but also tokenization and disambiguation
 94 | nlprule = { version = "=0.6.4", optional = true }
 95 | 
 96 | # cache some expensive expansions
 97 | xz2 = "0.1"
 98 | sha2 = "0.10"
 99 | bincode = "1"
100 | hex = "0.4"
101 | thousands = "0.2"
102 | 
103 | [dev-dependencies]
104 | # for stripping ansi color codes
105 | console = "0.15"
106 | assert_matches = "1"
107 | maplit = "1"
108 | serde_plain = "1"
109 | nix = "0.26.2"
110 | 
111 | [features]
112 | default = ["all"]
113 | 
114 | # hunspell uses the segmenter provided by nlprules
115 | hunspell = [
116 |   "dep:hunspell-rs",
117 |   "hunspell-rs?/bundled",
118 |   "dep:fd-lock",
119 |   "nlprules",
120 |   "dep:encoding_rs",
121 | ]
122 | zet = ["dep:zspell"]
123 | spellbook = ["dep:spellbook"]
124 | nlprules = ["dep:nlprule", "nlprule?/regex-fancy", "dep:nlprule-build"]
125 | 
126 | all = ["hunspell", "zet", "spellbook", "nlprules"]
127 | 
128 | [profile.dev]
129 | build-override = { opt-level = 2 }
130 | 
131 | [profile.dev.package]
132 | backtrace = { opt-level = 3 }
133 | bincode = { opt-level = 3 }
134 | xz2 = { opt-level = 3 }
135 | sha2 = { opt-level = 3 }
136 | hunspell-rs = { opt-level = 3 }
137 | nlprule = { opt-level = 3 }
138 | 
139 | [profile.release]
140 | debug = true
141 | 
142 | [package.metadata.spellcheck]
143 | config = ".config/spellcheck.toml"
144 | 
145 | 
146 | [[test]]
147 | name = "signal_handler"
148 | path = "tests/signal_handler.rs"
149 | 


--------------------------------------------------------------------------------
/docs/configuration.md:
--------------------------------------------------------------------------------
  1 | # Configuration
  2 | 
  3 | ## Source
  4 | 
  5 | There are various ways to specify the configuration. The prioritization is as
  6 | follows:
  7 | 
  8 | _Explicit_ specification:
  9 | 
 10 | 1. Command line flags `--cfg=...`.
 11 | 1. `Cargo.toml` package metadata
 12 | 
 13 |     ```toml
 14 |     [package.metadata.spellcheck]
 15 |     config = "somewhere/cfg.toml"
 16 |     ```
 17 | 
 18 | 1. `Cargo.toml` workspace metadata
 19 | 
 20 |     ```toml
 21 |     [workspace.metadata.spellcheck]
 22 |     config = "somewhere/else/cfg.toml"
 23 |     ```
 24 | 
 25 | which will fail if specified and not existent on the filesystem.
 26 | 
 27 | If neither of those ways of specification is present, continue with the
 28 | _implicit_.
 29 | 
 30 | 1. `Cargo.toml` metadata in the current working directory `CWD`.
 31 | 1. Check the first arguments location if present, else the current working directory for `.config/spellcheck.toml`.
 32 | 1. Fallback to per user configuration files:
 33 |     * Linux:   `/home/alice/.config/cargo_spellcheck/config.toml`
 34 |     * Windows: `C:\Users\Alice\AppData\Roaming\cargo_spellcheck\config.toml`
 35 |     * macOS:   `/Users/Alice/Library/Preferences/cargo_spellcheck/config.toml`
 36 | 1. Use the default, builtin configuration (see `config` sub-command).
 37 | 
 38 | Since this is rather complex, add `-vv` to your invocation to see the `info`
 39 | level logs printed, which will contain the config path.
 40 | ### Format
 41 | 
 42 | ```toml
 43 | # Project settings where a Cargo.toml exists and is passed
 44 | # ${CARGO_MANIFEST_DIR}/.config/spellcheck.toml
 45 | 
 46 | # Also take into account developer comments
 47 | dev_comments = false
 48 | 
 49 | # Skip the README.md file as defined in the cargo manifest
 50 | skip_readme = false
 51 | 
 52 | [Hunspell]
 53 | # lang and name of `.dic` file
 54 | lang = "en_US"
 55 | # OS specific additives
 56 | # Linux: [ /usr/share/myspell ]
 57 | # Windows: []
 58 | # macOS [ /home/alice/Libraries/hunspell, /Libraries/hunspell ]
 59 | 
 60 | # Additional search paths, which take presedence over the default
 61 | # os specific search dirs, searched in order, defaults last
 62 | # search_dirs = []
 63 | 
 64 | # Adds additional dictionaries, can be specified as
 65 | # absolute paths or relative in the search dirs (in this order).
 66 | # Relative paths are resolved relative to the configuration file
 67 | # which is used.
 68 | # Refer to `man 5 hunspell`
 69 | # or https://www.systutorials.com/docs/linux/man/4-hunspell/#lbAE
 70 | # on how to define a custom dictionary file.
 71 | extra_dictionaries = []
 72 | 
 73 | # If set to `true`, the OS specific default search paths
 74 | # are skipped and only explicitly specified ones are used.
 75 | skip_os_lookups = false
 76 | 
 77 | # Use the builtin dictionaries if none were found in
 78 | # in the configured lookup paths.
 79 | # Usually combined with `skip_os_lookups=true`
 80 | # to enforce the `builtin` usage for consistent
 81 | # results across distributions and CI runs.
 82 | # Setting this will still use the dictionaries
 83 | # specified in `extra_dictionaries = [..]`
 84 | # for topic specific lingo.
 85 | use_builtin = true
 86 | 
 87 | 
 88 | [Hunspell.quirks]
 89 | # Transforms words that are provided by the tokenizer
 90 | # into word fragments based on the capture groups which are to
 91 | # be checked.
 92 | # If no capture groups are present, the matched word is whitelisted.
 93 | transform_regex = ["^'([^\\s])'$", "^[0-9]+x$"]
 94 | # Accepts `alphabeta` variants if the checker provides a replacement suggestion
 95 | # of `alpha-beta`.
 96 | allow_concatenation = true
 97 | # And the counterpart, which accepts words with dashes, when the suggestion has
 98 | # recommendations without the dashes. This is less common.
 99 | allow_dashed = false
100 | # Check the expressions in the footnote references. By default this is turned on
101 | # to remain backwards compatible but disabling it could be particularly useful
102 | # when one uses abbreviations instead of numbers as footnote references.  For
103 | # instance by default the fragment `hello[^xyz]` would be spellchecked as
104 | # `helloxyz` which is obviously a misspelled word, but by turning this check
105 | # off, it will skip validating the reference altogether and will only check the
106 | # word `hello`.
107 | check_footnote_references = false
108 | 
109 | [NlpRules]
110 | # Allows the user to override the default included
111 | # exports of LanguageTool, with other custom
112 | # languages
113 | 
114 | # override_rules = "/path/to/rules_binencoded.bin"
115 | # override_tokenizer = "/path/to/tokenizer_binencoded.bin"
116 | 
117 | [Reflow]
118 | # Reflows doc comments to adhere to adhere to a given maximum line width limit.
119 | max_line_length = 80
120 | ```
121 | 
122 | To increase verbosity add `-v` (multiple) to increase verbosity.
123 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # cargo-spellcheck
  2 | 
  3 | [![crates.io](https://img.shields.io/crates/v/cargo_spellcheck.svg)](https://crates.io/crates/cargo-spellcheck)
  4 | [![CI](https://ci.fff.rs/api/v1/teams/main/pipelines/cargo-spellcheck/jobs/master-validate/badge)](https://ci.fff.rs/teams/main/pipelines/cargo-spellcheck/jobs/master-validate)
  5 | ![commits-since](https://img.shields.io/github/commits-since/drahnr/cargo-spellcheck/latest.svg)
  6 | ![Crates.io MSRV](https://img.shields.io/crates/msrv/cargo-spellcheck)
  7 | 
  8 | Check your spelling with `hunspell` and/or `nlprule`.
  9 | 
 10 | ## Use Cases
 11 | 
 12 | Run `cargo spellcheck --fix` or `cargo spellcheck fix` to fix all your
 13 | documentation comments in order to avoid nasty typos all over your source tree.
 14 | Meant as a helper simplifying review as well as improving CI checks after a
 15 | learning phase for custom/topic specific lingo.
 16 | 
 17 | `cargo-spellcheck` is also a valuable tool to run from git commit hooks or CI/CD
 18 | systems.
 19 | 
 20 | ### Check For Spelling and/or Grammar Mistakes
 21 | 
 22 | ```zsh
 23 | cargo spellcheck check
 24 | ```
 25 | 
 26 | <pre><code><span style="color:#CC0000"><b>error</b></span><span style="color:#D3D7CF"><b>: spellcheck</b></span>
 27 | <span style="color:#3465A4">   --&gt;</span> src/main.rs:44
 28 | <span style="color:#3465A4"><b>    |</b></span>
 29 | <span style="color:#3465A4"><b> 44 |</b></span> Fun facets shalld cause some erroris.
 30 | <span style="color:#3465A4"><b>    |</b></span><span style="color:#C4A000"><b>            ^^^^^^</b></span>
 31 | <span style="color:#3465A4"><b>    |</b></span><span style="color:#CC0000"><b> - </b></span><span style="color:#4E9A06"><b>shall</b></span> or <span style="color:#4E9A06">shall d</span>
 32 | <span style="color:#3465A4"><b>    |</b></span></code></pre>
 33 | 
 34 | ### Apply Suggestions Interactively
 35 | 
 36 | ```zsh
 37 | cargo spellcheck fix
 38 | ```
 39 | 
 40 | <pre><code><span style="color:#CC0000"><b>error</b></span><span style="color:#D3D7CF"><b>: spellcheck(Hunspell)</b></span>
 41 | <span style="color:#3465A4">    --&gt;</span> /media/supersonic1t/projects/cargo-spellcheck/src/literalset.rs:291
 42 | <span style="color:#3465A4"><b>     |</b></span>
 43 | <span style="color:#3465A4"><b> 291 |</b></span>  Returns literl within the Err variant if not adjacent
 44 | <span style="color:#3465A4"><b>     |</b></span><span style="color:#C4A000"><b>          ^^^^^^</b></span>
 45 | 
 46 | <span style="color:#729FCF"><b>(13/14) Apply this suggestion [y,n,q,a,d,j,e,?]?</b></span>
 47 | 
 48 |    <span style="background-color:#2E3436;color:#729FCF;">lite</span>
 49 |    <span style="background-color:#2E3436;color:#729FCF;">litter</span>
 50 |    <span style="background-color:#2E3436;color:#729FCF;">litterer</span>
 51 |    <span style="background-color:#2E3436;color:#729FCF;">liter l</span>
 52 |    <span style="background-color:#2E3436;color:#729FCF;">liters</span>
 53 |    <span style="background-color:#2E3436;color:#729FCF;">literal</span>
 54 |    <span style="background-color:#2E3436;color:#729FCF;">liter</span>
 55 |  <span style="color:#8AE234"><b>»</b></span> <span style="background-color:#2E3436;color:#FCE94F">a custom replacement literal</span></code></pre>
 56 | 
 57 | ## Installation
 58 | 
 59 | `cargo install --locked cargo-spellcheck`
 60 | 
 61 | The `--locked` flag is the preferred way of installing to get the tested set of
 62 | dependencies.
 63 | 
 64 | on OS X, you need to ensure that `libclang.dylib` can be found by the linker
 65 | 
 66 | which can be achieved by setting `DYLB_FALLBACK_LIBRARY_PATH`:
 67 | 
 68 | ```
 69 | export DYLD_FALLBACK_LIBRARY_PATH= \
 70 |     "$(xcode-select --print-path)/Toolchains/XcodeDefault.xctoolchain/usr/lib/"
 71 | ```
 72 | 
 73 | In Linux, the file is `libclang.so` which can be installed via:
 74 | 
 75 | ```
 76 | apt-get install libclang-dev
 77 | ``` 
 78 | 
 79 | Afterwards, you can set the variable `LIBCLANG_PATH` via:
 80 | 
 81 | ```
 82 | export LIBCLANG_PATH=/usr/lib/llvm-14/lib/
 83 | ```
 84 | 
 85 | ## Completions
 86 | 
 87 | `cargo spellcheck completions` for autodetection of your current shell via
 88 | `$SHELL`,
 89 | 
 90 |  or
 91 | 
 92 | `cargo spellcheck completions --shell zsh`
 93 | 
 94 | to explicitly specify your shell type.
 95 | 
 96 | Commonly it's use like this from your shell's `.rc*` file:
 97 | 
 98 | `source <(cargo spellcheck completions)`
 99 | 
100 | Note: There is a [relevant clap issue
101 | (#3508)](https://github.com/clap-rs/clap/issues/3508) that makes this fail in
102 | some cases.
103 | 
104 | ## 🎈 Contribute!
105 | 
106 | Contributions are very welcome!
107 | 
108 | Generally the preferred way of doing so, is to comment in an issue that you
109 | would like to tackle the implementation/fix.
110 | 
111 | This is usually followed by an initial PR where the implementation is then
112 | discussed and iteratively refined. No need to get it all correct
113 | the first time!
114 | 
115 | ## Documentation
116 | 
117 | - [Features and Roadmap](docs/features.md)
118 | - [Remedies for common issues](docs/remedy.md)
119 | - [Configuration](docs/configuration.md)
120 | - [Available Checkers](docs/checkers.md)
121 | - [Automation of `cargo-spellcheck`](docs/automation.md)
122 | 


--------------------------------------------------------------------------------
/src/config/iso.rs:
--------------------------------------------------------------------------------
  1 | //! Abstracts the combination of language code and country code into one
  2 | //! convenient type.
  3 | //!
  4 | //! Language code follows the
  5 | //! [ISO 639-1](https://en.wikipedia.org/wiki/ISO_639-1) format. Country code
  6 | //! follows the [Alpha-2 ISO_3166-1](https://en.wikipedia.org/wiki/ISO_3166-1)
  7 | //! format.
  8 | //!
  9 | //! It results in a mildly adapted [IETF language
 10 | //! tag](https://en.wikipedia.org/wiki/IETF_language_tag).
 11 | 
 12 | use iso_country::Country;
 13 | use isolang::Language;
 14 | 
 15 | use std::{fmt, str::FromStr};
 16 | 
 17 | use serde::de::{self, Deserialize, Deserializer};
 18 | use serde::ser::Serializer;
 19 | 
 20 | /// 5 digit language and country code as used by the dictionaries.
 21 | #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 22 | pub struct Lang5 {
 23 |     pub lang: Language,
 24 |     pub country: Country,
 25 | }
 26 | 
 27 | impl PartialEq<str> for Lang5 {
 28 |     fn eq(&self, other: &str) -> bool {
 29 |         self.to_string().as_str() == other
 30 |     }
 31 | }
 32 | 
 33 | impl<X> PartialEq<X> for Lang5
 34 | where
 35 |     X: AsRef<str>,
 36 | {
 37 |     fn eq(&self, other: &X) -> bool {
 38 |         self.to_string().as_str() == other.as_ref()
 39 |     }
 40 | }
 41 | 
 42 | impl<'a> PartialEq<Lang5> for &'a str {
 43 |     fn eq(&self, other: &Lang5) -> bool {
 44 |         let other = other.to_string();
 45 |         *self == other.as_str()
 46 |     }
 47 | }
 48 | 
 49 | impl PartialEq<Lang5> for String {
 50 |     fn eq(&self, other: &Lang5) -> bool {
 51 |         *self == other.to_string()
 52 |     }
 53 | }
 54 | 
 55 | impl Default for Lang5 {
 56 |     fn default() -> Self {
 57 |         Self::en_US
 58 |     }
 59 | }
 60 | 
 61 | impl Lang5 {
 62 |     #[allow(non_upper_case_globals)]
 63 |     pub const en_US: Lang5 = Lang5 {
 64 |         lang: Language::Eng,
 65 |         country: Country::US,
 66 |     };
 67 | }
 68 | 
 69 | impl fmt::Display for Lang5 {
 70 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 71 |         let language = self.lang.to_639_1().unwrap_or("??");
 72 |         let country = self.country;
 73 |         write!(f, "{language}_{country}")?;
 74 |         Ok(())
 75 |     }
 76 | }
 77 | 
 78 | #[derive(Debug, Clone, thiserror::Error)]
 79 | #[error("Wrong character, expected '_' found '{0}'")]
 80 | struct Lang5SpacerError(char);
 81 | 
 82 | #[derive(Debug, Clone, Copy, Default)]
 83 | struct Lang5Visitor;
 84 | 
 85 | impl<'de> de::Visitor<'de> for Lang5Visitor {
 86 |     type Value = Lang5;
 87 | 
 88 |     fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
 89 |         write!(
 90 |             formatter,
 91 |             "Expected a 5 digit lang and country code in the form of LL_CC"
 92 |         )
 93 |     }
 94 | 
 95 |     fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
 96 |     where
 97 |         E: de::Error,
 98 |     {
 99 |         self.visit_borrowed_str::<E>(v)
100 |     }
101 | 
102 |     fn visit_string<E>(self, s: String) -> Result<Self::Value, E>
103 |     where
104 |         E: de::Error,
105 |     {
106 |         self.visit_borrowed_str::<E>(s.as_str())
107 |     }
108 | 
109 |     fn visit_borrowed_str<E>(self, s: &'de str) -> Result<Self::Value, E>
110 |     where
111 |         E: serde::de::Error,
112 |     {
113 |         if s.len() != 5 {
114 |             return Err(serde::de::Error::custom(Lang5SpacerError('l')));
115 |         }
116 |         let lang = Language::from_639_1(&s[0..2])
117 |             .ok_or(Lang5SpacerError('2'))
118 |             .map_err(serde::de::Error::custom)?;
119 |         let c = s.chars().nth(2).unwrap();
120 |         if c != '_' {
121 |             return Err(serde::de::Error::custom(Lang5SpacerError(c)))?;
122 |         }
123 |         let country = Country::from_str(&s[3..5]).map_err(serde::de::Error::custom)?;
124 |         Ok(Lang5 { lang, country })
125 |     }
126 | }
127 | 
128 | impl<'de> Deserialize<'de> for Lang5 {
129 |     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
130 |     where
131 |         D: Deserializer<'de>,
132 |     {
133 |         deserializer.deserialize_str(Lang5Visitor)
134 |     }
135 | }
136 | 
137 | impl serde::Serialize for Lang5 {
138 |     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
139 |     where
140 |         S: Serializer,
141 |     {
142 |         serializer.serialize_str(self.to_string().as_str())
143 |     }
144 | }
145 | 
146 | #[cfg(test)]
147 | mod tests {
148 |     use super::*;
149 |     use assert_matches::assert_matches;
150 | 
151 |     const EXPECTED: Lang5 = Lang5 {
152 |         lang: Language::Deu,
153 |         country: Country::AU,
154 |     };
155 |     const S: &str = "de_AU";
156 | 
157 |     #[test]
158 |     fn iso_lang_german_austria_serde() {
159 |         assert_eq!(S.to_owned(), EXPECTED.to_string());
160 | 
161 |         assert_matches!(serde_plain::from_str::<Lang5>(S), Ok(x) => assert_eq!(EXPECTED, x));
162 |     }
163 | 
164 |     #[test]
165 |     fn cmp_variants() {
166 |         assert!(EXPECTED == S);
167 |         assert!(EXPECTED == &S);
168 |         assert!(EXPECTED == S.to_owned());
169 |         assert!(EXPECTED == &S.to_owned());
170 |         assert!(&EXPECTED == S);
171 |     }
172 | }
173 | 


--------------------------------------------------------------------------------
/src/checker/nlprules.rs:
--------------------------------------------------------------------------------
  1 | //! A NLP based rule checker base on `nlprule`
  2 | //!
  3 | //! Does check grammar, and is supposed to only check for grammar. Sentence
  4 | //! splitting is done in hand-waving way. To be improved.
  5 | 
  6 | use super::{Checker, Detector, Suggestion};
  7 | use crate::{CheckableChunk, ContentOrigin};
  8 | 
  9 | use crate::errors::*;
 10 | 
 11 | use std::collections::{hash_map::Entry, HashMap};
 12 | use std::{
 13 |     path::{Path, PathBuf},
 14 |     sync::{Arc, Mutex},
 15 | };
 16 | 
 17 | use nlprule::{Rules, Tokenizer};
 18 | 
 19 | use lazy_static::lazy_static;
 20 | 
 21 | lazy_static! {
 22 |     static ref RULES: Mutex<HashMap<Option<PathBuf>, Arc<Rules>>> = Mutex::new(HashMap::new());
 23 | }
 24 | 
 25 | pub(crate) fn filtered_rules<P: AsRef<Path> + Clone>(
 26 |     override_path: Option<P>,
 27 | ) -> Result<Arc<Rules>> {
 28 |     match RULES
 29 |         .lock()
 30 |         .unwrap()
 31 |         .entry(override_path.clone().map(|x| x.as_ref().to_path_buf()))
 32 |     {
 33 |         Entry::Occupied(occupied) => Ok(occupied.get().clone()),
 34 |         Entry::Vacant(empty) => {
 35 |             let rules = super::rules(override_path)?;
 36 |             let rules = rules
 37 |                 .rules()
 38 |                 .iter()
 39 |                 .filter(|rule| {
 40 |                     match rule
 41 |                         .category_type()
 42 |                         .map(str::to_lowercase)
 43 |                         .as_ref()
 44 |                         .map(|x| x as &str)
 45 |                     {
 46 |                         // The hunspell backend is aware of
 47 |                         // custom lingo, which this one is not,
 48 |                         // so there would be a lot of false
 49 |                         // positives.
 50 |                         Some("misspelling") => false,
 51 |                         // Anything quotes related is not relevant
 52 |                         // for code documentation.
 53 |                         Some("typographical") => false,
 54 |                         _other => true,
 55 |                     }
 56 |                 })
 57 |                 .cloned()
 58 |                 .collect::<Rules>();
 59 | 
 60 |             let rules = Arc::new(rules);
 61 |             empty.insert(rules.clone());
 62 |             Ok(rules)
 63 |         }
 64 |     }
 65 | }
 66 | 
 67 | pub(crate) struct NlpRulesChecker {
 68 |     tokenizer: Arc<Tokenizer>,
 69 |     rules: Arc<Rules>,
 70 | }
 71 | 
 72 | impl NlpRulesChecker {
 73 |     pub fn new(config: &<Self as Checker>::Config) -> Result<Self> {
 74 |         let tokenizer = super::tokenizer(config.override_tokenizer.as_ref())?;
 75 |         let rules = filtered_rules(config.override_tokenizer.as_ref())?;
 76 |         Ok(Self { tokenizer, rules })
 77 |     }
 78 | }
 79 | 
 80 | impl Checker for NlpRulesChecker {
 81 |     type Config = crate::config::NlpRulesConfig;
 82 | 
 83 |     fn detector() -> Detector {
 84 |         Detector::NlpRules
 85 |     }
 86 | 
 87 |     fn check<'a, 's>(
 88 |         &self,
 89 |         origin: &ContentOrigin,
 90 |         chunks: &'a [CheckableChunk],
 91 |     ) -> Result<Vec<Suggestion<'s>>>
 92 |     where
 93 |         'a: 's,
 94 |     {
 95 |         let mut acc = Vec::with_capacity(chunks.len());
 96 | 
 97 |         for chunk in chunks {
 98 |             acc.extend(check_chunk(
 99 |                 origin.clone(),
100 |                 chunk,
101 |                 &self.tokenizer,
102 |                 &self.rules,
103 |             ));
104 |         }
105 | 
106 |         Ok(acc)
107 |     }
108 | }
109 | 
110 | /// Check the plain text contained in chunk, which can be one or more sentences.
111 | fn check_chunk<'a>(
112 |     origin: ContentOrigin,
113 |     chunk: &'a CheckableChunk,
114 |     tokenizer: &Tokenizer,
115 |     rules: &Rules,
116 | ) -> Vec<Suggestion<'a>> {
117 |     // TODO We should control which parts need to be ignored of the markdown
118 |     // entities, however the `NlpRulesConfig`, which is the only configuration
119 |     // we receive in the constructor does not contain the same quirks (or in
120 |     // fact any other similar settings) as the Hunspell one, so we cannot obtain
121 |     // this setting, therefore we fallback to default
122 |     let plain = chunk.erase_cmark(&Default::default());
123 |     log::trace!("{plain:?}");
124 |     let txt = plain.as_str();
125 | 
126 |     let mut acc = Vec::with_capacity(32);
127 | 
128 |     let nlpfixes = rules.suggest(txt, tokenizer);
129 |     if nlpfixes.is_empty() {
130 |         return Vec::new();
131 |     }
132 | 
133 |     'nlp: for fix in nlpfixes {
134 |         let message = fix.message();
135 |         let replacements = fix.replacements();
136 |         let start = fix.span().char().start;
137 |         let end = fix.span().char().end;
138 |         if start > end {
139 |             log::debug!("BUG: crate nlprule yielded a negative range {:?} for chunk in {}, please file a bug", start..end, &origin);
140 |             continue 'nlp;
141 |         }
142 |         let range = start..end;
143 |         acc.extend(
144 |             plain
145 |                 .find_spans(range)
146 |                 .into_iter()
147 |                 .map(|(range, span)| Suggestion {
148 |                     detector: Detector::NlpRules,
149 |                     range,
150 |                     span,
151 |                     origin: origin.clone(),
152 |                     replacements: replacements.to_vec(),
153 |                     chunk,
154 |                     description: Some(message.to_owned()),
155 |                 }),
156 |         );
157 |     }
158 | 
159 |     acc
160 | }
161 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | #![deny(dead_code)]
  2 | #![deny(missing_docs)]
  3 | // #![deny(unused_crate_dependencies)]
  4 | #![allow(clippy::non_ascii_literal)]
  5 | // be explicit about certain offsets and how they are constructed
  6 | #![allow(clippy::identity_op)]
  7 | // in small cli projects, this is ok for now
  8 | #![allow(clippy::wildcard_imports)]
  9 | // personal strong preference for `from_iter`
 10 | #![allow(clippy::from_iter_instead_of_collect)]
 11 | #![allow(clippy::new_without_default)]
 12 | #![allow(clippy::items_after_statements)]
 13 | // Prevent the stray dbg! macros
 14 | #![cfg_attr(not(test), deny(clippy::dbg_macro))]
 15 | #![cfg_attr(test, allow(clippy::dbg_macro))]
 16 | 
 17 | //! cargo-spellcheck
 18 | //!
 19 | //! A syntax tree based doc comment and common mark spell checker.
 20 | 
 21 | pub use doc_chunks as documentation;
 22 | #[cfg(test)]
 23 | pub(crate) use doc_chunks::{chyrp_up, fluff_up};
 24 | 
 25 | pub mod action;
 26 | mod checker;
 27 | mod config;
 28 | pub mod errors;
 29 | mod reflow;
 30 | mod suggestion;
 31 | mod tinhat;
 32 | mod traverse;
 33 | 
 34 | pub use self::action::*;
 35 | pub use self::config::args::*;
 36 | pub use self::config::{Config, HunspellConfig, LanguageToolConfig};
 37 | pub use self::documentation::span::*;
 38 | pub use self::documentation::util::*;
 39 | pub use self::documentation::{
 40 |     util, CheckableChunk, Clusters, CommentVariant, CommentVariantCategory, ContentOrigin,
 41 |     Documentation, PlainOverlay, Range,
 42 | };
 43 | pub use self::suggestion::*;
 44 | pub use self::tinhat::*;
 45 | 
 46 | use self::errors::{bail, Result};
 47 | 
 48 | use std::io::Write;
 49 | 
 50 | #[cfg(target_os = "windows")]
 51 | use signal_hook as _;
 52 | 
 53 | use checker::Checker;
 54 | 
 55 | /// A simple exit code representation.
 56 | ///
 57 | /// `Custom` can be specified by the user, others map to their UNIX equivalents
 58 | /// where available.
 59 | #[derive(Debug, Clone, Copy, Eq, PartialEq)]
 60 | pub enum ExitCode {
 61 |     /// Regular termination and does not imply anything in regards to spelling
 62 |     /// mistakes found or not.
 63 |     Success,
 64 |     /// Terminate requested by a *nix signal.
 65 |     Signal,
 66 |     /// A custom exit code, as specified with `--code=<code>`.
 67 |     Custom(u8),
 68 |     // Failure is already default for `Err(_)`
 69 | }
 70 | 
 71 | impl ExitCode {
 72 |     /// Convert `ExitCode` to primitive.
 73 |     pub fn as_u8(&self) -> u8 {
 74 |         match *self {
 75 |             Self::Success => 0u8,
 76 |             Self::Signal => 130u8,
 77 |             Self::Custom(code) => code,
 78 |         }
 79 |     }
 80 | }
 81 | 
 82 | /// The inner main.
 83 | pub fn run(args: Args) -> Result<ExitCode> {
 84 |     let _ = ::rayon::ThreadPoolBuilder::new()
 85 |         .num_threads(args.job_count())
 86 |         .build_global();
 87 | 
 88 |     env_logger::Builder::from_env(env_logger::Env::new().filter_or("CARGO_SPELLCHECK", "warn"))
 89 |         .filter_level(args.verbosity())
 90 |         .filter_module("nlprule", log::LevelFilter::Error)
 91 |         .filter_module("mio", log::LevelFilter::Error)
 92 |         .init();
 93 | 
 94 |     #[cfg(not(target_os = "windows"))]
 95 |     signal_handler(move || {
 96 |         if let Err(e) = action::interactive::ScopedRaw::restore_terminal() {
 97 |             log::warn!("Failed to restore terminal: {e}");
 98 |         }
 99 |     });
100 | 
101 |     let (unified, config) = match &args.command {
102 |         Some(Sub::Completions { shell }) => {
103 |             let sink = &mut std::io::stdout();
104 |             generate_completions(*shell, sink);
105 |             let _ = sink.flush();
106 |             return Ok(ExitCode::Success);
107 |         }
108 |         _ => args.unified()?,
109 |     };
110 | 
111 |     match unified {
112 |         // must unify first, for the proper paths
113 |         UnifiedArgs::Config {
114 |             dest_config,
115 |             checker_filter_set,
116 |         } => {
117 |             log::trace!("Configuration chore");
118 |             let mut config = Config::full();
119 |             Args::checker_selection_override(
120 |                 checker_filter_set.as_ref().map(AsRef::as_ref),
121 |                 &mut config,
122 |             )?;
123 | 
124 |             match dest_config {
125 |                 ConfigWriteDestination::Stdout => {
126 |                     println!("{}", config.to_toml()?);
127 |                     return Ok(ExitCode::Success);
128 |                 }
129 |                 ConfigWriteDestination::File { overwrite, path } => {
130 |                     if path.exists() && !overwrite {
131 |                         bail!(
132 |                             "Attempting to overwrite {} requires `--force`.",
133 |                             path.display()
134 |                         );
135 |                     }
136 | 
137 |                     log::info!("Writing configuration file to {}", path.display());
138 |                     config.write_values_to_path(path)?;
139 |                 }
140 |             }
141 |             Ok(ExitCode::Success)
142 |         }
143 |         UnifiedArgs::Operate {
144 |             action,
145 |             paths,
146 |             recursive,
147 |             skip_readme,
148 |             config_path,
149 |             dev_comments,
150 |             exit_code_override,
151 |         } => {
152 |             log::debug!("Executing: {action:?} with {config:?} from {config_path:?}");
153 | 
154 |             let documents =
155 |                 traverse::extract(paths, recursive, skip_readme, dev_comments, &config)?;
156 | 
157 |             let rt = tokio::runtime::Runtime::new()?;
158 |             let finish = rt.block_on(async move { action.run(documents, config).await })?;
159 | 
160 |             match finish {
161 |                 Finish::Success | Finish::MistakeCount(0) => Ok(ExitCode::Success),
162 |                 Finish::MistakeCount(_n) => Ok(ExitCode::Custom(exit_code_override)),
163 |                 Finish::Abort => Ok(ExitCode::Signal),
164 |             }
165 |         }
166 |     }
167 | }
168 | 
169 | #[cfg(test)]
170 | mod tests;
171 | 


--------------------------------------------------------------------------------
/src/checker/cached.rs:
--------------------------------------------------------------------------------
  1 | use crate::errors::*;
  2 | 
  3 | use hex::ToHex;
  4 | use serde::de::DeserializeOwned;
  5 | use serde::ser::Serialize;
  6 | use sha2::Digest;
  7 | use std::io::Seek;
  8 | use std::path::Path;
  9 | use std::time::{Duration, Instant};
 10 | 
 11 | #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
 12 | struct CacheEntry<T> {
 13 |     what: String,
 14 |     val: T,
 15 | }
 16 | 
 17 | pub struct CachedValue<T> {
 18 |     /// Time it took to..
 19 |     /// load the value from disk if it was there.
 20 |     pub fetch: Option<Duration>,
 21 |     /// Updating the disk cache
 22 |     pub update: Option<Duration>,
 23 |     /// Create a new one if needed
 24 |     pub creation: Option<Duration>,
 25 |     /// The accumulated duration,
 26 |     pub total: Duration,
 27 |     /// The actual value.
 28 |     pub value: T,
 29 | }
 30 | 
 31 | pub struct Cached<T> {
 32 |     cache_file: fd_lock::RwLock<fs_err::File>,
 33 |     // What to cache.
 34 |     what: String,
 35 |     _phantom: std::marker::PhantomData<T>,
 36 | }
 37 | 
 38 | impl<'a, T> Cached<T>
 39 | where
 40 |     T: Serialize + DeserializeOwned,
 41 | {
 42 |     /// Create a new `Cached` instance, to create a expanded version of something that's identified by `what`.
 43 |     pub fn new(what: impl AsRef<str>, cache_dir: impl AsRef<Path>) -> Result<Self> {
 44 |         let what = what.as_ref();
 45 |         let what_digest = sha2::Sha256::digest(what.as_bytes());
 46 |         let cache_dir = cache_dir.as_ref();
 47 |         fs_err::create_dir_all(cache_dir)?;
 48 |         let cache_file = cache_dir.join(what_digest.as_slice().encode_hex::<String>());
 49 |         let cache_file = fs_err::OpenOptions::new()
 50 |             .create(true)
 51 |             .read(true)
 52 |             .write(true)
 53 |             .open(cache_file)?;
 54 |         Ok(Self {
 55 |             cache_file: fd_lock::RwLock::new(cache_file),
 56 |             what: what.to_owned(),
 57 |             _phantom: std::marker::PhantomData,
 58 |         })
 59 |     }
 60 | 
 61 |     pub fn fetch_or_update(
 62 |         &mut self,
 63 |         create: impl FnOnce(&str) -> Result<T>,
 64 |     ) -> Result<CachedValue<T>> {
 65 |         let total_start = Instant::now();
 66 |         match self.fetch() {
 67 |             Ok(Some(value)) => {
 68 |                 let elapsed = total_start.elapsed();
 69 |                 Ok(CachedValue {
 70 |                     value,
 71 |                     fetch: Some(elapsed),
 72 |                     update: None,
 73 |                     creation: None,
 74 |                     total: elapsed,
 75 |                 })
 76 |             }
 77 |             Ok(None) => {
 78 |                 let fetch = Some(total_start.elapsed());
 79 | 
 80 |                 let creation_start = Instant::now();
 81 |                 let value = create(self.what.as_str())?;
 82 |                 let creation = Some(creation_start.elapsed());
 83 | 
 84 |                 let update_start = Instant::now();
 85 |                 if let Err(err) = self.update(&value) {
 86 |                     log::warn!("Failed to write value to cached: {err:?}");
 87 |                 }
 88 |                 let update = Some(update_start.elapsed());
 89 |                 let total = total_start.elapsed();
 90 |                 Ok(CachedValue {
 91 |                     value,
 92 |                     fetch,
 93 |                     update,
 94 |                     creation,
 95 |                     total,
 96 |                 })
 97 |             }
 98 |             Err(err) => {
 99 |                 log::warn!("Overriding existing value that failed to load: {err:?}");
100 | 
101 |                 let fetch = Some(total_start.elapsed());
102 | 
103 |                 let creation_start = Instant::now();
104 |                 let value = create(self.what.as_str())?;
105 |                 let creation = Some(creation_start.elapsed());
106 | 
107 |                 let update_start = Instant::now();
108 |                 if let Err(err) = self.update(&value) {
109 |                     log::warn!("Failed to update cached: {err:?}");
110 |                 }
111 |                 let update = Some(update_start.elapsed());
112 |                 let total = total_start.elapsed();
113 |                 Ok(CachedValue {
114 |                     value,
115 |                     fetch,
116 |                     update,
117 |                     creation,
118 |                     total,
119 |                 })
120 |             }
121 |         }
122 |     }
123 |     pub fn fetch(&mut self) -> Result<Option<T>> {
124 |         let guard = self.cache_file.read()?;
125 |         let buf = std::io::BufReader::new(guard.file());
126 |         // let buf = xz2::bufread::XzDecoder::new(buf);
127 |         match bincode::deserialize_from(buf) {
128 |             Ok(CacheEntry { what, val }) => {
129 |                 if what == self.what {
130 |                     log::debug!("Cached value with matching what \"{what}\"");
131 |                     Ok(Some(val))
132 |                 } else {
133 |                     log::warn!(
134 |                         "Cached value what \"{}\" does not match expect what \"{}\", removing",
135 |                         what,
136 |                         self.what
137 |                     );
138 |                     Ok(None)
139 |                 }
140 |             }
141 |             Err(e) => {
142 |                 log::warn!("Failed to load cached value: {e:?}");
143 |                 Ok(None)
144 |             }
145 |         }
146 |     }
147 | 
148 |     pub fn update(&mut self, val: &T) -> Result<()> {
149 |         let mut write_guard = self.cache_file.write()?;
150 | 
151 |         let entry = CacheEntry {
152 |             what: self.what.clone(),
153 |             val,
154 |         };
155 |         let encoded: Vec<u8> = bincode::serialize(&entry).unwrap();
156 |         let mut encoded = &encoded[..];
157 |         // let mut compressed = xz2::bufread::XzEncoder::new(&mut encoded, 6);
158 | 
159 |         // effectively truncate, but without losing the lock
160 |         let file = write_guard.file_mut();
161 |         file.rewind()?;
162 |         std::io::copy(&mut encoded, file)?;
163 |         let loco = file.stream_position()?;
164 |         file.set_len(loco)?;
165 |         Ok(())
166 |     }
167 | }
168 | 


--------------------------------------------------------------------------------
/src/checker/dictaffix.rs:
--------------------------------------------------------------------------------
  1 | use super::hunspell::cache_builtin;
  2 | use super::Result;
  3 | use crate::config::{Lang5, SearchDirs};
  4 | use color_eyre::eyre::{bail, eyre, WrapErr};
  5 | use fs_err as fs;
  6 | use itertools::Itertools;
  7 | use std::io;
  8 | use std::io::BufRead;
  9 | use std::path::{Path, PathBuf};
 10 | 
 11 | pub(crate) struct DicAff {
 12 |     pub(crate) dic: String,
 13 |     pub(crate) aff: String,
 14 | }
 15 | 
 16 | impl DicAff {
 17 |     pub(crate) fn load(
 18 |         extra_dictionaries: &[std::path::PathBuf],
 19 |         search_dirs: &SearchDirs,
 20 |         lang: Lang5,
 21 |         use_builtin: bool,
 22 |         skip_os_lookups: bool,
 23 |     ) -> Result<Self> {
 24 |         let lang = lang.to_string();
 25 |         let lang = lang.as_str();
 26 | 
 27 |         // lookup paths are really just an attempt to provide a dictionary, so be more forgiving
 28 |         // when encountering errors here
 29 |         let (dic, aff): (PathBuf, PathBuf) = search_dirs.iter(!skip_os_lookups)
 30 |         .filter(|search_dir| {
 31 |             let keep = search_dir.is_dir();
 32 |             if !keep {
 33 |                 // search_dir also contains the default paths, so just silently ignore these
 34 |                 log::debug!(
 35 |                     target: "affdic",
 36 |                     "Dictionary search path is not a directory {}",
 37 |                     search_dir.display()
 38 |                 );
 39 |             } else {
 40 |                 log::debug!(
 41 |                     target: "affdic",
 42 |                     "Found dictionary search path {}",
 43 |                     search_dir.display()
 44 |                 );
 45 |             }
 46 |             keep
 47 |         })
 48 |         .find_map(|search_dir| {
 49 |             let dic = search_dir.join(lang).with_extension("dic");
 50 |             if !dic.is_file() {
 51 |                 log::debug!(
 52 |                     target: "affdic",
 53 |                     "Dictionary path dervied from search dir is not a file {}",
 54 |                     dic.display()
 55 |                 );
 56 |                 return None;
 57 |             }
 58 |             let aff = search_dir.join(lang).with_extension("aff");
 59 |             if !aff.is_file() {
 60 |                 log::debug!(
 61 |                     target: "affdic", 
 62 |                     "Affixes path dervied from search dir is not a file {}",
 63 |                     aff.display()
 64 |                 );
 65 |                 return None;
 66 |             }
 67 |             log::debug!("Using dic {} and aff {}", dic.display(), aff.display());
 68 |             Some((dic, aff))
 69 |         })
 70 |         .ok_or_else(|| {
 71 |             eyre!("Failed to find any {lang}.dic / {lang}.aff in any search dir or no search provided",
 72 |                 lang = lang)
 73 |         })
 74 |         .or_else(|e| {
 75 |             if use_builtin {
 76 |                 Ok(cache_builtin()?)
 77 |             } else {
 78 |                 Err(e)
 79 |             }
 80 |         })?;
 81 | 
 82 |         let dic = fs_err::read_to_string(&dic)?;
 83 |         let aff = fs_err::read_to_string(&aff)?;
 84 | 
 85 |         // We need to combine multiple dictionaries into one
 86 |         // since we want suffix support rather than plain word lists
 87 |         let mut dic_acc = dic;
 88 | 
 89 |         // suggestion must contain the word itself if it is valid extra dictionary
 90 |         // be more strict about the extra dictionaries, they have to exist
 91 |         log::info!(target: "dicaff", "Adding {} extra dictionaries", extra_dictionaries.len());
 92 | 
 93 |         for extra_dic_path in extra_dictionaries {
 94 |             log::debug!(target: "affdic", "Adding extra dictionary {}", extra_dic_path.display());
 95 |             // after calling `sanitize_paths`
 96 |             // the ought to be all absolutes
 97 |             assert!(extra_dic_path.is_absolute());
 98 |             let extra_dic = fs::read_to_string(extra_dic_path)?;
 99 |             is_valid_hunspell_dic(&mut extra_dic.as_bytes())?;
100 |             log::trace!(target: "affdic", "Adding extra dict to main dict: {}", extra_dic.trim().lines().count() - 1);
101 |             dic_acc.push('\n');
102 |             // trim the initil number
103 |             dic_acc.push_str(
104 |                 extra_dic
105 |                     .trim()
106 |                     .split_once("\n")
107 |                     .expect("It's a valid dictionary. qed")
108 |                     .1,
109 |             );
110 |         }
111 | 
112 |         // sort them, just in case
113 |         let mut counter = 0;
114 |         let dic = dic_acc
115 |             .lines()
116 |             .inspect(|_line| counter += 1)
117 |             .sorted()
118 |             .unique()
119 |             .join("\n");
120 |         let counter = counter.to_string();
121 |         let dic = counter + "\n" + dic.trim();
122 | 
123 |         log::trace!(target: "affdic", "Total dictionary entries are: {}", dic.trim().lines().count() - 1);
124 | 
125 |         Ok(Self { dic, aff })
126 |     }
127 | }
128 | 
129 | /// Check if provided path has valid dictionary format.
130 | ///
131 | /// This is a YOLO check.
132 | pub(crate) fn is_valid_hunspell_dic_path(path: impl AsRef<Path>) -> Result<()> {
133 |     let reader = io::BufReader::new(fs::File::open(path.as_ref())?);
134 |     is_valid_hunspell_dic(reader)
135 | }
136 | 
137 | /// Check a reader for correct hunspell format.
138 | pub(crate) fn is_valid_hunspell_dic(reader: impl BufRead) -> Result<()> {
139 |     let mut iter = reader.lines().enumerate();
140 |     if let Some((_lineno, first)) = iter.next() {
141 |         let first = first?;
142 |         let _ = first.parse::<u64>().wrap_err_with(|| {
143 |             eyre!("First line of extra dictionary must a number, but is: >{first}<")
144 |         })?;
145 |     }
146 |     // Just check the first 10 lines, don't waste much time here
147 |     // the first two are the most important ones.
148 |     for (lineno, line) in iter.take(10) {
149 |         // All lines after must be format x.
150 |         if let Ok(num) = line?.parse::<i64>() {
151 |             bail!("Line {lineno} of extra dictionary must not be a number, but is: >{num}<",)
152 |         };
153 |     }
154 |     Ok(())
155 | }
156 | 


--------------------------------------------------------------------------------
/src/action/bandaid.rs:
--------------------------------------------------------------------------------
  1 | //! A mistake bandaid.
  2 | //!
  3 | //! A `BandAid` covers the mistake with a suggested replacement, as picked by
  4 | //! the user.
  5 | 
  6 | use crate::documentation::Span;
  7 | 
  8 | /// A chosen suggestion for a certain span
  9 | #[derive(Debug, Clone, PartialEq, Eq)]
 10 | pub struct BandAid {
 11 |     /// `String` replaces the content covered by `Span`
 12 |     pub content: String,
 13 |     /// range which will be replaced
 14 |     pub span: Span,
 15 | }
 16 | 
 17 | impl BandAid {
 18 |     /// Check if the bandaid covers `line` which is 1 indexed.
 19 |     pub fn covers_line(&self, line: usize) -> bool {
 20 |         self.span.covers_line(line)
 21 |     }
 22 | }
 23 | 
 24 | impl From<(String, &Span)> for BandAid {
 25 |     fn from((replacement, span): (String, &Span)) -> Self {
 26 |         Self {
 27 |             content: replacement,
 28 |             span: *span,
 29 |         }
 30 |     }
 31 | }
 32 | 
 33 | #[cfg(test)]
 34 | pub(crate) mod tests {
 35 | 
 36 |     use crate::util::load_span_from;
 37 | 
 38 |     use crate::{LineColumn, Span};
 39 | 
 40 |     #[test]
 41 |     fn span_helper_integrity() {
 42 |         const SOURCE: &str = r#"0
 43 | abcde
 44 | f
 45 | g
 46 | hijk
 47 | l
 48 | "#;
 49 | 
 50 |         struct TestSet {
 51 |             span: Span,
 52 |             expected: &'static str,
 53 |         }
 54 | 
 55 |         const SETS: &[TestSet] = &[
 56 |             TestSet {
 57 |                 span: Span {
 58 |                     start: LineColumn {
 59 |                         line: 1usize,
 60 |                         column: 0,
 61 |                     },
 62 |                     end: LineColumn {
 63 |                         line: 1usize,
 64 |                         column: 0,
 65 |                     },
 66 |                 },
 67 |                 expected: "0",
 68 |             },
 69 |             TestSet {
 70 |                 span: Span {
 71 |                     start: LineColumn {
 72 |                         line: 2usize,
 73 |                         column: 2,
 74 |                     },
 75 |                     end: LineColumn {
 76 |                         line: 2usize,
 77 |                         column: 4,
 78 |                     },
 79 |                 },
 80 |                 expected: "cde",
 81 |             },
 82 |             TestSet {
 83 |                 span: Span {
 84 |                     start: LineColumn {
 85 |                         line: 5usize,
 86 |                         column: 0,
 87 |                     },
 88 |                     end: LineColumn {
 89 |                         line: 5usize,
 90 |                         column: 1,
 91 |                     },
 92 |                 },
 93 |                 expected: "hi",
 94 |             },
 95 |         ];
 96 | 
 97 |         for item in SETS {
 98 |             assert_eq!(
 99 |                 load_span_from(SOURCE.as_bytes(), item.span).unwrap(),
100 |                 item.expected.to_string()
101 |             );
102 |         }
103 |     }
104 | 
105 |     #[test]
106 |     fn try_from_string_works() {
107 |         const TEST: &str = include_str!("../../demo/src/main.rs");
108 | 
109 |         const EXPECTED: &[Span] = &[
110 |             Span {
111 |                 start: LineColumn { line: 1, column: 4 },
112 |                 end: LineColumn { line: 1, column: 7 },
113 |             },
114 |             Span {
115 |                 start: LineColumn { line: 1, column: 9 },
116 |                 end: LineColumn { line: 1, column: 9 },
117 |             },
118 |             Span {
119 |                 start: LineColumn {
120 |                     line: 1,
121 |                     column: 11,
122 |                 },
123 |                 end: LineColumn {
124 |                     line: 1,
125 |                     column: 13,
126 |                 },
127 |             },
128 |             Span {
129 |                 start: LineColumn {
130 |                     line: 1,
131 |                     column: 15,
132 |                 },
133 |                 end: LineColumn {
134 |                     line: 1,
135 |                     column: 20,
136 |                 },
137 |             },
138 |             Span {
139 |                 start: LineColumn {
140 |                     line: 1,
141 |                     column: 22,
142 |                 },
143 |                 end: LineColumn {
144 |                     line: 1,
145 |                     column: 27,
146 |                 },
147 |             },
148 |             Span {
149 |                 start: LineColumn {
150 |                     line: 1,
151 |                     column: 28,
152 |                 },
153 |                 end: LineColumn {
154 |                     line: 1,
155 |                     column: 28,
156 |                 },
157 |             },
158 |         ];
159 | 
160 |         crate::checker::tests::extraction_test_body(TEST, EXPECTED);
161 |     }
162 | 
163 |     #[test]
164 |     fn try_from_raw_string_works() {
165 |         const TEST: &str = include_str!("../../demo/src/lib.rs");
166 |         let fn_with_doc = TEST
167 |             .lines()
168 |             .skip(18)
169 |             .take(4)
170 |             .fold(String::new(), |acc, line| acc + line);
171 | 
172 |         const EXPECTED: &[Span] = &[
173 |             Span {
174 |                 start: LineColumn {
175 |                     line: 1,
176 |                     column: 11,
177 |                 },
178 |                 end: LineColumn {
179 |                     line: 1,
180 |                     column: 14,
181 |                 },
182 |             },
183 |             Span {
184 |                 start: LineColumn {
185 |                     line: 1,
186 |                     column: 16,
187 |                 },
188 |                 end: LineColumn {
189 |                     line: 1,
190 |                     column: 18,
191 |                 },
192 |             },
193 |             Span {
194 |                 start: LineColumn {
195 |                     line: 1,
196 |                     column: 20,
197 |                 },
198 |                 end: LineColumn {
199 |                     line: 1,
200 |                     column: 21,
201 |                 },
202 |             },
203 |             Span {
204 |                 start: LineColumn {
205 |                     line: 1,
206 |                     column: 23,
207 |                 },
208 |                 end: LineColumn {
209 |                     line: 1,
210 |                     column: 26,
211 |                 },
212 |             },
213 |             Span {
214 |                 start: LineColumn {
215 |                     line: 1,
216 |                     column: 27,
217 |                 },
218 |                 end: LineColumn {
219 |                     line: 1,
220 |                     column: 27,
221 |                 },
222 |             },
223 |         ];
224 | 
225 |         crate::checker::tests::extraction_test_body(dbg!(fn_with_doc.as_str()), EXPECTED);
226 |     }
227 | }
228 | 


--------------------------------------------------------------------------------
/doc-chunks/src/cluster.rs:
--------------------------------------------------------------------------------
  1 | //! Cluster `proc_macro2::Literal`s into `LiteralSets`
  2 | 
  3 | use syn::spanned::Spanned;
  4 | use syn::LitStr;
  5 | use syn::Macro;
  6 | use syn::Token;
  7 | 
  8 | use super::{LiteralSet, TokenTree, TrimmedLiteral};
  9 | use crate::developer::extract_developer_comments;
 10 | 
 11 | use crate::errors::*;
 12 | use crate::Span;
 13 | 
 14 | mod kw {
 15 |     syn::custom_keyword!(doc);
 16 | }
 17 | 
 18 | enum DocContent {
 19 |     LitStr(LitStr),
 20 |     Macro(Macro),
 21 | }
 22 | impl DocContent {
 23 |     fn span(&self) -> proc_macro2::Span {
 24 |         match self {
 25 |             Self::LitStr(inner) => inner.span(),
 26 |             Self::Macro(inner) => inner.span(),
 27 |         }
 28 |     }
 29 | }
 30 | 
 31 | struct DocComment {
 32 |     #[allow(dead_code)]
 33 |     doc: kw::doc,
 34 |     #[allow(dead_code)]
 35 |     eq_token: Token![=],
 36 |     content: DocContent,
 37 | }
 38 | 
 39 | impl syn::parse::Parse for DocComment {
 40 |     fn parse(input: syn::parse::ParseStream<'_>) -> syn::Result<Self> {
 41 |         let doc = input.parse::<kw::doc>()?;
 42 |         let eq_token: Token![=] = input.parse()?;
 43 | 
 44 |         let lookahead = input.lookahead1();
 45 |         let content = if lookahead.peek(LitStr) {
 46 |             input.parse().map(DocContent::LitStr)?
 47 |         } else {
 48 |             input.parse().map(DocContent::Macro)?
 49 |         };
 50 |         Ok(Self {
 51 |             doc,
 52 |             eq_token,
 53 |             content,
 54 |         })
 55 |     }
 56 | }
 57 | 
 58 | /// Cluster comments together, such they appear as continuous text blocks.
 59 | #[derive(Debug)]
 60 | pub struct Clusters {
 61 |     pub(crate) set: Vec<LiteralSet>,
 62 | }
 63 | 
 64 | impl Clusters {
 65 |     /// Only works if the file is processed line by line, otherwise requires a
 66 |     /// adjacency list.
 67 |     fn process_literal(&mut self, source: &str, comment: DocComment) -> Result<()> {
 68 |         let span = Span::from(comment.content.span());
 69 |         let trimmed_literal = match comment.content {
 70 |             DocContent::LitStr(_s) => TrimmedLiteral::load_from(source, span)?,
 71 |             DocContent::Macro(_) => {
 72 |                 TrimmedLiteral::new_empty(source, span, crate::CommentVariant::MacroDocEqMacro)
 73 |             }
 74 |         };
 75 |         if let Some(cls) = self.set.last_mut() {
 76 |             if let Err(trimmed_literal) = cls.add_adjacent(trimmed_literal) {
 77 |                 log::trace!(target: "documentation",
 78 |                     "appending, but failed to append: {trimmed_literal:?} to set {cls:?}",
 79 |                 );
 80 |                 self.set.push(LiteralSet::from(trimmed_literal))
 81 |             } else {
 82 |                 log::trace!("successfully appended to existing: {cls:?} to set");
 83 |             }
 84 |         } else {
 85 |             self.set.push(LiteralSet::from(trimmed_literal));
 86 |         }
 87 |         Ok(())
 88 |     }
 89 | 
 90 |     /// Helper function to parse a stream and associate the found literals.
 91 |     pub fn parse_token_tree(
 92 |         &mut self,
 93 |         source: &str,
 94 |         stream: proc_macro2::TokenStream,
 95 |     ) -> Result<()> {
 96 |         let iter = stream.into_iter();
 97 |         for tree in iter {
 98 |             if let TokenTree::Group(group) = tree {
 99 |                 if let Ok(comment) = syn::parse2::<DocComment>(group.stream()) {
100 |                     if let Err(e) = self.process_literal(source, comment) {
101 |                         log::error!("BUG: Failed to guarantee literal content/span integrity: {e}");
102 |                         continue;
103 |                     }
104 |                 } else {
105 |                     self.parse_token_tree(source, group.stream())?;
106 |                 }
107 |             };
108 |         }
109 |         Ok(())
110 |     }
111 | 
112 |     /// From the given source text, extracts developer comments to `LiteralSet`s
113 |     /// and adds them to this `Clusters`
114 |     fn parse_developer_comments(&mut self, source: &str) {
115 |         let developer_comments = extract_developer_comments(source);
116 |         self.set.extend(developer_comments);
117 |     }
118 | 
119 |     /// Sort the `LiteralSet`s in this `Cluster` by start line descending, to
120 |     /// ensure that the comments higher up in the source file appear first to
121 |     /// the user
122 |     fn ensure_sorted(&mut self) {
123 |         self.set.sort_by(|ls1, ls2| ls1.coverage.cmp(&ls2.coverage));
124 |     }
125 | 
126 |     /// Load clusters from a `&str`. Optionally loads developer comments as
127 |     /// well.
128 |     pub fn load_from_str(source: &str, doc_comments: bool, dev_comments: bool) -> Result<Self> {
129 |         let mut chunk = Self {
130 |             set: Vec::with_capacity(64),
131 |         };
132 |         if doc_comments {
133 |             let stream =
134 |                 syn::parse_str::<proc_macro2::TokenStream>(source).map_err(Error::ParserFailure)?;
135 |             chunk.parse_token_tree(source, stream)?;
136 |         }
137 |         if dev_comments {
138 |             chunk.parse_developer_comments(source);
139 |         }
140 |         chunk.ensure_sorted();
141 |         Ok(chunk)
142 |     }
143 | }
144 | 
145 | #[cfg(test)]
146 | mod tests {
147 |     use super::*;
148 | 
149 |     #[test]
150 |     fn doc_comment_parse() {
151 |         let _ = syn::parse_str::<DocComment>(r########"doc=foo!(bar!(xxx))"########).unwrap();
152 |         let _ = syn::parse_str::<DocComment>(r########"doc="s""########).unwrap();
153 |         let _ = syn::parse_str::<DocComment>(r########"doc=r#"s"#"########).unwrap();
154 |         let _ = syn::parse_str::<DocComment>(r########"doc=r##"s"##"########).unwrap();
155 |         let _ = syn::parse_str::<DocComment>(r########"doc=r###"s"###"########).unwrap();
156 |         let _ = syn::parse_str::<DocComment>(r########"doc=r####"s"####"########).unwrap();
157 |     }
158 | 
159 |     #[test]
160 |     fn create_cluster() {
161 |         static CONTENT: &str = r#####"
162 | mod mm_mm {
163 | 
164 | /// A
165 | #[doc=foo!(B)]
166 | /// C
167 | #[doc=r##"D"##]
168 | struct X;
169 | 
170 | }
171 | "#####;
172 |         let clusters = Clusters::load_from_str(CONTENT, true, true).unwrap();
173 |         assert_eq!(clusters.set.len(), 1);
174 |         dbg!(&clusters.set[0]);
175 |     }
176 | 
177 |     #[test]
178 |     fn space_in_code_block_does_not_break_cluster() {
179 |         static CONTENT: &str = r#####"
180 | // ```c
181 | // hugloboi
182 | //
183 | // fucksteufelswuid
184 | // ```
185 | struct DefinitelyNotZ;
186 | "#####;
187 |         let clusters = Clusters::load_from_str(CONTENT, true, true).unwrap();
188 |         assert_eq!(clusters.set.len(), 1);
189 |         dbg!(&clusters.set[0]);
190 |     }
191 | 
192 |     #[test]
193 |     fn polite() {
194 |         static CONTENT: &str = r#####"
195 | // Hello Sir
196 | //
197 | // How are you doing today?
198 | struct VeryWellThanks;
199 | "#####;
200 |         let clusters = Clusters::load_from_str(CONTENT, true, true).unwrap();
201 |         assert_eq!(clusters.set.len(), 1);
202 |         dbg!(&clusters.set[0]);
203 |     }
204 | }
205 | 


--------------------------------------------------------------------------------
/src/config/hunspell.rs:
--------------------------------------------------------------------------------
  1 | //! Hunspell checker configuration.
  2 | 
  3 | use super::{Lang5, SearchDirs, WrappedRegex};
  4 | use std::path::{Path, PathBuf};
  5 | 
  6 | use crate::errors::*;
  7 | 
  8 | use serde::{Deserialize, Serialize};
  9 | 
 10 | const fn yes() -> bool {
 11 |     true
 12 | }
 13 | 
 14 | #[derive(Deserialize, Serialize, Debug, Clone)]
 15 | pub struct Quirks {
 16 |     /// A regular expression, whose capture groups will be checked, instead of
 17 |     /// the initial token. Only the first one that matches will be used to split
 18 |     /// the word.
 19 |     #[serde(default)]
 20 |     pub transform_regex: Vec<WrappedRegex>,
 21 |     /// Allow concatenated words instead of dashed connection. Note that this
 22 |     /// only applies, if one of the suggested replacements has an item that is
 23 |     /// equivalent except for addition dashes (`-`).
 24 |     #[serde(default)]
 25 |     pub allow_concatenation: bool,
 26 |     /// The counterpart of `allow_concatenation`. Accepts words which have
 27 |     /// replacement suggestions that contain additional dashes.
 28 |     #[serde(default)]
 29 |     pub allow_dashes: bool,
 30 |     /// Treats sequences of emojis as OK.
 31 |     #[serde(default = "yes")]
 32 |     pub allow_emojis: bool,
 33 |     /// Check the expressions in the footnote references. By default this is
 34 |     /// turned on to remain backwards compatible but disabling it could be
 35 |     /// particularly useful when one uses abbreviations instead of numbers as
 36 |     /// footnote references.  For instance by default the fragment `hello[^xyz]`
 37 |     /// would be spellchecked as `helloxyz` which is obviously a misspelled
 38 |     /// word, but by turning this check off, it will skip validating the
 39 |     /// reference altogether and will only check the word `hello`.
 40 |     #[serde(default = "yes")]
 41 |     pub check_footnote_references: bool,
 42 | }
 43 | 
 44 | impl Default for Quirks {
 45 |     fn default() -> Self {
 46 |         Self {
 47 |             transform_regex: Vec::new(),
 48 |             allow_concatenation: false,
 49 |             allow_dashes: false,
 50 |             allow_emojis: true,
 51 |             check_footnote_references: true,
 52 |         }
 53 |     }
 54 | }
 55 | 
 56 | impl Quirks {
 57 |     pub(crate) const fn allow_concatenated(&self) -> bool {
 58 |         self.allow_concatenation
 59 |     }
 60 | 
 61 |     pub(crate) const fn allow_dashed(&self) -> bool {
 62 |         self.allow_dashes
 63 |     }
 64 | 
 65 |     pub(crate) const fn allow_emojis(&self) -> bool {
 66 |         self.allow_emojis
 67 |     }
 68 | 
 69 |     pub(crate) fn transform_regex(&self) -> &[WrappedRegex] {
 70 |         &self.transform_regex
 71 |     }
 72 | 
 73 |     pub(crate) fn check_footnote_references(&self) -> bool {
 74 |         self.check_footnote_references
 75 |     }
 76 | }
 77 | 
 78 | fn default_tokenization_splitchars() -> String {
 79 |     "\",;:.!?#(){}[]|/_-‒'`&@§¶…".to_owned()
 80 | }
 81 | 
 82 | pub type ZetConfig = HunspellConfig;
 83 | pub type SpellbookConfig = HunspellConfig;
 84 | 
 85 | #[derive(Deserialize, Serialize, Debug, Clone)]
 86 | #[serde(deny_unknown_fields)]
 87 | pub struct HunspellConfig {
 88 |     /// The language we want to check against, used as the dictionary and
 89 |     /// affixes file name.
 90 |     #[serde(default)]
 91 |     pub lang: Lang5,
 92 |     /// Additional search directories for `.dic` and `.aff` files.
 93 |     // must be option so it can be omitted in the config
 94 |     #[serde(default)]
 95 |     pub search_dirs: SearchDirs,
 96 | 
 97 |     /// Avoid the OS provided dictionaries and only use the builtin ones,
 98 |     /// besides those defined in `extra_dictionaries`.
 99 |     #[serde(default)]
100 |     pub skip_os_lookups: bool,
101 | 
102 |     /// Use the builtin dictionaries as last resort. Usually combined with
103 |     /// `skip_os_lookups=true` to enforce the `builtin` usage. Does not prevent
104 |     /// the usage of `extra_dictionaries`.
105 |     #[serde(default)]
106 |     pub use_builtin: bool,
107 | 
108 |     #[serde(default = "default_tokenization_splitchars")]
109 |     pub tokenization_splitchars: String,
110 | 
111 |     /// Additional dictionaries for topic specific lingo.
112 |     #[serde(default)]
113 |     pub extra_dictionaries: Vec<PathBuf>,
114 |     /// Additional quirks besides dictionary lookups.
115 |     #[serde(default)]
116 |     pub quirks: Quirks,
117 | }
118 | 
119 | impl Default for HunspellConfig {
120 |     fn default() -> Self {
121 |         Self {
122 |             lang: Lang5::en_US,
123 |             search_dirs: SearchDirs::default(),
124 |             extra_dictionaries: Vec::default(),
125 |             quirks: Quirks::default(),
126 |             tokenization_splitchars: default_tokenization_splitchars(),
127 |             skip_os_lookups: false,
128 |             use_builtin: true,
129 |         }
130 |     }
131 | }
132 | 
133 | impl HunspellConfig {
134 |     pub fn lang(&self) -> Lang5 {
135 |         self.lang
136 |     }
137 | 
138 |     pub fn search_dirs(&self) -> impl Iterator<Item = &PathBuf> {
139 |         self.search_dirs.iter(!self.skip_os_lookups)
140 |     }
141 | 
142 |     pub fn extra_dictionaries(&self) -> impl Iterator<Item = &PathBuf> {
143 |         self.extra_dictionaries.iter()
144 |     }
145 | 
146 |     pub fn sanitize_paths(&mut self, base: &Path) -> Result<()> {
147 |         self.search_dirs = self
148 |             .search_dirs
149 |             .iter(!self.skip_os_lookups)
150 |             .filter_map(|search_dir| {
151 |                 let abspath = if !search_dir.is_absolute() {
152 |                     base.join(search_dir)
153 |                 } else {
154 |                     search_dir.to_owned()
155 |                 };
156 | 
157 |                 abspath.canonicalize().ok().inspect(|abspath| {
158 |                     log::trace!(
159 |                         "Sanitized ({} + {}) -> {}",
160 |                         base.display(),
161 |                         search_dir.display(),
162 |                         abspath.display()
163 |                     );
164 |                 })
165 |             })
166 |             .collect::<Vec<PathBuf>>()
167 |             .into();
168 | 
169 |         // convert all extra dictionaries to absolute paths
170 | 
171 |         'o: for extra_dic in self.extra_dictionaries.iter_mut() {
172 |             for search_dir in
173 |                 self.search_dirs
174 |                     .iter(!self.skip_os_lookups)
175 |                     .filter_map(|search_dir| {
176 |                         if !extra_dic.is_absolute() {
177 |                             base.join(search_dir).canonicalize().ok()
178 |                         } else {
179 |                             Some(search_dir.to_owned())
180 |                         }
181 |                     })
182 |             {
183 |                 let abspath = if !extra_dic.is_absolute() {
184 |                     search_dir.join(&extra_dic)
185 |                 } else {
186 |                     continue 'o;
187 |                 };
188 |                 if let Ok(abspath) = abspath.canonicalize() {
189 |                     if abspath.is_file() {
190 |                         *extra_dic = abspath;
191 |                         continue 'o;
192 |                     }
193 |                 } else {
194 |                     log::debug!("Failed to canonicalize {}", abspath.display());
195 |                 }
196 |             }
197 |             bail!(
198 |                 "Could not find extra dictionary {} in any of the search paths",
199 |                 extra_dic.display()
200 |             );
201 |         }
202 | 
203 |         Ok(())
204 |     }
205 | }
206 | 


--------------------------------------------------------------------------------
/src/traverse/iter.rs:
--------------------------------------------------------------------------------
  1 | use super::*;
  2 | use crate::Documentation;
  3 | 
  4 | use fs_err as fs;
  5 | 
  6 | use std::path::{Path, PathBuf};
  7 | 
  8 | /// An iterator traversing module hierarchies yielding paths
  9 | #[derive(Debug, Clone)]
 10 | pub struct TraverseModulesIter {
 11 |     /// state for enqueuing child files and the depth at which they are found
 12 |     queue: VecDeque<(PathBuf, usize)>,
 13 |     /// zero limits to the provided path, if it is a directory, all children are
 14 |     /// collected
 15 |     max_depth: usize,
 16 | }
 17 | 
 18 | impl Default for TraverseModulesIter {
 19 |     fn default() -> Self {
 20 |         Self {
 21 |             max_depth: usize::MAX,
 22 |             queue: VecDeque::with_capacity(128),
 23 |         }
 24 |     }
 25 | }
 26 | 
 27 | impl TraverseModulesIter {
 28 |     fn add_initial_path<P>(&mut self, path: P, level: usize) -> Result<()>
 29 |     where
 30 |         P: AsRef<Path>,
 31 |     {
 32 |         let path = path.as_ref();
 33 |         let path = fs::canonicalize(path)?;
 34 |         let meta = fs::metadata(&path)?;
 35 |         if meta.is_file() {
 36 |             self.queue.push_front((path, level));
 37 |         } else if meta.is_dir() {
 38 |             ignore::WalkBuilder::new(path)
 39 |                 .git_ignore(true)
 40 |                 .max_depth(1.into())
 41 |                 .same_file_system(true)
 42 |                 .skip_stdout(true)
 43 |                 .build()
 44 |                 .filter_map(|entry| {
 45 |                     entry
 46 |                         .ok()
 47 |                         .filter(|entry| entry.file_type().map(|ft| ft.is_file()).unwrap_or(false))
 48 |                         .map(|x| x.path().to_owned())
 49 |                 })
 50 |                 .filter(|path: &PathBuf| {
 51 |                     path.to_str()
 52 |                         .map(|x| x.to_owned())
 53 |                         .filter(|path| path.ends_with(".rs"))
 54 |                         .is_some()
 55 |                 })
 56 |                 .try_for_each::<_, Result<()>>(|path| {
 57 |                     log::trace!("🌱 using path {} as seed recursion dir", path.display());
 58 |                     self.queue.push_front((path, level));
 59 |                     Ok(())
 60 |                 })?;
 61 |         }
 62 |         Ok(())
 63 |     }
 64 | 
 65 |     #[allow(unused)]
 66 |     pub fn with_multi<P, J, I>(entries: I) -> Result<Self>
 67 |     where
 68 |         P: AsRef<Path>,
 69 |         J: Iterator<Item = P>,
 70 |         I: IntoIterator<Item = P, IntoIter = J>,
 71 |     {
 72 |         let mut me = Self::default();
 73 |         for path in entries.into_iter() {
 74 |             me.add_initial_path(path, 0)?;
 75 |         }
 76 |         Ok(me)
 77 |     }
 78 | 
 79 |     pub fn with_depth_limit<P: AsRef<Path>>(path: P, max_depth: usize) -> Result<Self> {
 80 |         let mut me = Self {
 81 |             max_depth,
 82 |             ..Default::default()
 83 |         };
 84 |         me.add_initial_path(path, 0)?;
 85 |         Ok(me)
 86 |     }
 87 | 
 88 |     /// Create a new path with (almost) infinite depth bounds
 89 |     #[allow(unused)]
 90 |     pub fn new<P: AsRef<Path>>(path: P) -> Result<Self> {
 91 |         Self::with_depth_limit(path, usize::MAX)
 92 |     }
 93 | 
 94 |     pub fn collect_modules(&mut self, path: &Path, level: usize) -> Result<()> {
 95 |         if path.is_file() {
 96 |             log::trace!("🥞 collecting mods declared in file {}", path.display());
 97 |             self.queue.extend(
 98 |                 extract_modules_from_file(path)?
 99 |                     .into_iter()
100 |                     .map(|item| (item, level)),
101 |             );
102 |         } else {
103 |             log::warn!("🥞 Only dealing with files, dropping {}", path.display());
104 |         }
105 |         Ok(())
106 |     }
107 | }
108 | 
109 | impl Iterator for TraverseModulesIter {
110 |     type Item = PathBuf;
111 |     fn next(&mut self) -> Option<Self::Item> {
112 |         if let Some((path, level)) = self.queue.pop_front() {
113 |             if level < self.max_depth {
114 |                 // ignore the error here, there is nothing we can do really
115 |                 // TODO potentially consider returning a result covering this
116 |                 let _ = self.collect_modules(path.as_path(), level + 1);
117 |             }
118 |             Some(path)
119 |         } else {
120 |             None
121 |         }
122 |     }
123 | }
124 | 
125 | /// traverse path with a depth limit, if the path is a directory all its
126 | /// children will be collected instead
127 | // TODO should not read the documentation, that is out of scope.
128 | // TODO should not have knowledge of `dev_comments`.
129 | pub(crate) fn traverse(
130 |     path: &Path,
131 |     doc_comments: bool,
132 |     dev_comments: bool,
133 | ) -> Result<impl Iterator<Item = Documentation>> {
134 |     traverse_with_depth_limit(path, usize::MAX, doc_comments, dev_comments)
135 | }
136 | 
137 | /// traverse path with a depth limit, if the path is a directory all its
138 | /// children will be collected as depth 0 instead
139 | pub(crate) fn traverse_with_depth_limit(
140 |     path: &Path,
141 |     max_depth: usize,
142 |     doc_comments: bool,
143 |     dev_comments: bool,
144 | ) -> Result<impl Iterator<Item = Documentation>> {
145 |     let it = TraverseModulesIter::with_depth_limit(path, max_depth)?
146 |         .filter_map(move |path: PathBuf| -> Option<Documentation> {
147 |             fs::read_to_string(&path).ok().map(|content| {
148 |                 Documentation::load_from_str(
149 |                     ContentOrigin::RustSourceFile(path),
150 |                     content.as_str(),
151 |                     doc_comments,
152 |                     dev_comments,
153 |                 )
154 |             })
155 |         })
156 |         .filter(|documentation| !documentation.is_empty());
157 |     Ok(it)
158 | }
159 | 
160 | #[cfg(test)]
161 | mod tests {
162 |     use super::*;
163 | 
164 |     fn demo_dir() -> PathBuf {
165 |         manifest_dir().join("demo")
166 |     }
167 | 
168 |     #[test]
169 |     fn traverse_main_rs() {
170 |         let _ = env_logger::builder()
171 |             .filter_level(log::LevelFilter::Trace)
172 |             .is_test(true)
173 |             .try_init();
174 |         let manifest_path = demo_dir().join("src/main.rs");
175 | 
176 |         let expect = indexmap::indexset! {
177 |             "src/main.rs",
178 |             "src/lib.rs",
179 |             "src/nested/mod.rs",
180 |             "src/nested/justone.rs",
181 |             "src/nested/justtwo.rs",
182 |             "src/nested/again/mod.rs",
183 |             "src/nested/again/code.rs",
184 |             "src/nested/fragments.rs",
185 |             "src/nested/fragments/enumerate.rs",
186 |             "src/nested/fragments/simple.rs",
187 |         }
188 |         .into_iter()
189 |         .map(|sub| demo_dir().join(sub))
190 |         .collect::<indexmap::set::IndexSet<PathBuf>>();
191 | 
192 |         let found = TraverseModulesIter::new(manifest_path.as_path())
193 |             .expect("Must succeed to traverse file tree.")
194 |             .into_iter()
195 |             .collect::<Vec<PathBuf>>();
196 | 
197 |         let unexpected_files: Vec<_> = dbg!(&found)
198 |             .iter()
199 |             .filter(|found_path| !expect.contains(*found_path))
200 |             .collect();
201 |         assert_eq!(Vec::<&PathBuf>::new(), unexpected_files);
202 | 
203 |         let missing_files: Vec<_> = expect
204 |             .iter()
205 |             .filter(|expected_path| !found.contains(expected_path))
206 |             .collect();
207 |         assert_eq!(Vec::<&PathBuf>::new(), missing_files);
208 | 
209 |         assert_eq!(found.len(), expect.len());
210 |     }
211 | }
212 | 


--------------------------------------------------------------------------------
/src/checker/quirks.rs:
--------------------------------------------------------------------------------
  1 | //! A set of quirks, not necessarily specific to a checker
  2 | 
  3 | use crate::Range;
  4 | use fancy_regex::Regex;
  5 | 
  6 | /// Returns `true` iff the replacements contains a variant of `word` without
  7 | /// dashes.
  8 | pub(crate) fn replacements_contain_dashless<T: AsRef<str>>(word: &str, replacements: &[T]) -> bool {
  9 |     let dashless = word.chars().filter(|c| *c != '-').collect::<String>();
 10 |     // if the word does not contain any dashes, skip the replacement iterations
 11 |     if dashless == word {
 12 |         return false;
 13 |     }
 14 |     replacements
 15 |         .iter()
 16 |         .map(|s| s.as_ref())
 17 |         .any(|x| x == &dashless)
 18 | }
 19 | 
 20 | /// Returns `true` iff the replacements contains a variant of `word` with
 21 | /// additional dashes.
 22 | pub(crate) fn replacements_contain_dashed<T: AsRef<str>>(word: &str, replacements: &[T]) -> bool {
 23 |     // before doing lots of work, check if the word itself contains a dash, if so
 24 |     // the below logic cannot yield and positive results
 25 |     if word.chars().any(|c| c == '-') {
 26 |         return false;
 27 |     }
 28 | 
 29 |     replacements
 30 |         .iter()
 31 |         .map(|s| s.as_ref())
 32 |         // avoid lots of string iterations in find
 33 |         .filter(|s| s.as_bytes().first() == word.as_bytes().first())
 34 |         .any(|s| itertools::equal(s.chars().filter(|c| *c != '-'), word.chars()))
 35 | }
 36 | 
 37 | /// Transformed word with information on the transformation outcome.
 38 | #[derive(Debug, Eq, PartialEq)]
 39 | pub(crate) enum Transformed<'i> {
 40 |     /// A allow-listed chunk
 41 |     Whitelisted((Range, &'i str)),
 42 |     /// A set of word-fragments to be checked.
 43 |     Fragments(Vec<(Range, &'i str)>),
 44 |     /// A word to be checked. Equiv to no match.
 45 |     Atomic((Range, &'i str)),
 46 | }
 47 | 
 48 | /// Transforms a word into a set of fragment-ranges and associated str slices.
 49 | pub(crate) fn transform<'i, R: AsRef<Regex>>(
 50 |     transform_regex: &[R],
 51 |     word: &'i str,
 52 |     range: Range,
 53 | ) -> Transformed<'i> {
 54 |     let mut q = std::collections::VecDeque::<(Range, &'_ str)>::with_capacity(32);
 55 |     let mut words = Vec::with_capacity(16);
 56 |     let mut whitelisted = 0usize;
 57 |     q.push_back((range.clone(), word));
 58 |     while let Some((range, word)) = q.pop_front() {
 59 |         // work on a fragment now
 60 |         match transform_inner(transform_regex, word, range.clone()) {
 61 |             // we try to recursively match the fragments with the regex expr until they become atomic words or whitelisted
 62 |             Transformed::Fragments(v) => q.extend(v),
 63 |             Transformed::Atomic(word) => words.push(word),
 64 |             Transformed::Whitelisted(_) => whitelisted += 1,
 65 |         }
 66 |     }
 67 | 
 68 |     // no match found at all, this word is "atomic" and will be checked as is
 69 |     if whitelisted == 0usize {
 70 |         // empty means nothing, one word with the same range means we only found the initial provided word
 71 |         if words.is_empty() || (words.len() == 1 && words[0].0.len() == word.len()) {
 72 |             return Transformed::Atomic((range, word));
 73 |         }
 74 |     }
 75 | 
 76 |     if !words.is_empty() {
 77 |         // collect all the words as fragments again (they actually really are)
 78 |         Transformed::Fragments(words)
 79 |     } else {
 80 |         // if there are no words to be checked, everything is whitelisted
 81 |         Transformed::Whitelisted((range, word))
 82 |     }
 83 | }
 84 | 
 85 | /// Inner loop transform
 86 | ///
 87 | /// Returns `Some(vec![..])` if any captures were found.
 88 | fn transform_inner<'i, R: AsRef<Regex>>(
 89 |     transform_regex: &[R],
 90 |     word: &'i str,
 91 |     range: Range,
 92 | ) -> Transformed<'i> {
 93 |     for regex in transform_regex.iter().map(AsRef::as_ref) {
 94 |         match regex.captures(word) {
 95 |             Ok(Some(captures)) => {
 96 |                 // first one is always the full match
 97 |                 if captures.len() == 1 {
 98 |                     // means match, but no captures,
 99 |                     // which is equiv to an implicit whitelist
100 |                     return Transformed::Whitelisted((range, word));
101 |                 }
102 |                 let intermediate = captures
103 |                     .iter()
104 |                     .skip(1)
105 |                     .flatten()
106 |                     .map(|m| {
107 |                         let intra_word_range = m.start()..m.end();
108 |                         log::trace!(target:"quirks",
109 |                             "Found capture for word >{}<, with match >{}< and capture >{}< at {:?}",
110 |                             captures.get(0).unwrap().as_str(),
111 |                             word,
112 |                             m.as_str(),
113 |                             &intra_word_range
114 |                         );
115 |                         let offset = word
116 |                             .char_indices()
117 |                             .take_while(|(byte_pos, _)| m.start() > *byte_pos)
118 |                             .count();
119 |                         let range = Range {
120 |                             start: range.start + offset,
121 |                             end: range.start + offset + m.as_str().chars().count(),
122 |                         };
123 |                         (range, &word[intra_word_range])
124 |                     })
125 |                     .collect::<Vec<_>>();
126 | 
127 |                 return Transformed::Fragments(intermediate);
128 |             }
129 |             Ok(None) => {
130 |                 // no regex match, try the next regex
131 |                 continue;
132 |             }
133 |             Err(e) => {
134 |                 log::warn!(target:"quirks", "Matching regex >{}< errored: {}", regex.as_str(), e);
135 |                 break;
136 |             }
137 |         }
138 |     }
139 |     // nothing matched, check the entire word instead
140 |     Transformed::Atomic((range, word))
141 | }
142 | 
143 | #[cfg(test)]
144 | mod tests {
145 |     use super::*;
146 |     use crate::config::WrappedRegex;
147 |     use env_logger;
148 | 
149 |     #[test]
150 |     fn dashed() {
151 |         let _ = env_logger::builder()
152 |             .is_test(true)
153 |             .filter(None, log::LevelFilter::Trace)
154 |             .try_init();
155 | 
156 |         const REPLACEMENTS: &'static [&'static str] = &["fffff", "qqq", "z", "zeta-ray"];
157 |         const WORD: &str = "zetaray";
158 |         assert!(replacements_contain_dashed(WORD, REPLACEMENTS));
159 |     }
160 | 
161 |     #[test]
162 |     fn dashless() {
163 |         let _ = env_logger::builder()
164 |             .is_test(true)
165 |             .filter(None, log::LevelFilter::Trace)
166 |             .try_init();
167 | 
168 |         const WORD: &str = "zeta-ray";
169 |         const REPLACEMENTS: &'static [&'static str] = &["fffff", "qqq", "z", "zetaray"];
170 |         assert!(replacements_contain_dashless(WORD, REPLACEMENTS));
171 |     }
172 | 
173 |     #[test]
174 |     fn transformer() {
175 |         let _ = env_logger::builder()
176 |             .is_test(true)
177 |             .filter(None, log::LevelFilter::Trace)
178 |             .try_init();
179 | 
180 |         let re = vec![
181 |             WrappedRegex::from(Regex::new("^[0-9]+x$").unwrap()), //whitelist
182 |             WrappedRegex::from(Regex::new(r#"^'([^\s]+)'$"#).unwrap()),
183 |             WrappedRegex::from(Regex::new("(Alpha)(beta)").unwrap()),
184 |         ];
185 | 
186 |         let words = ["2x", r#"''so-to-speak''"#, "Alphabeta", "Nothing"];
187 | 
188 |         // whitelist
189 |         assert_eq!(
190 |             transform(re.as_slice(), words[0], 10..24),
191 |             Transformed::Whitelisted((10..24, words[0]))
192 |         );
193 | 
194 |         // single quoted, recursive 2x
195 |         assert_eq!(
196 |             transform(re.as_slice(), words[1], 10..25),
197 |             Transformed::Fragments(vec![(12..23, &words[1][2..13])])
198 |         );
199 | 
200 |         // multi capture
201 |         assert_eq!(
202 |             transform(re.as_slice(), words[2], 10..19),
203 |             Transformed::Fragments(vec![(10..15, &words[2][0..5]), (15..19, &words[2][5..9]),])
204 |         );
205 | 
206 |         // no match
207 |         assert_eq!(
208 |             transform(re.as_slice(), words[3], 10..17),
209 |             Transformed::Atomic((10..17, words[3]))
210 |         );
211 |     }
212 | }
213 | 


--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 | 


--------------------------------------------------------------------------------
/doc-chunks/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! # Doc Chunks
  2 | //!
  3 | //! `Documentation` is a representation of one or multiple documents.
  4 | //!
  5 | //! A `literal` is a token provided by `proc_macro2` or `ra_ap_syntax` crate, which is then converted by
  6 | //! means of `TrimmedLiteral` using `Cluster`ing into a `CheckableChunk` (mostly
  7 | //! named just `chunk`).
  8 | //!
  9 | //! `CheckableChunk`s can consist of multiple fragments, where each fragment can
 10 | //! span multiple lines, yet each fragment is covering a consecutive `Span` in
 11 | //! the origin content. Each fragment also has a direct mapping to the
 12 | //! `CheckableChunk` internal string representation.
 13 | //!
 14 | //! And `Documentation` holds one or many `CheckableChunks` per file path.
 15 | 
 16 | #![deny(unused_crate_dependencies)]
 17 | 
 18 | // contains test helpers
 19 | pub mod span;
 20 | pub mod testcase;
 21 | pub use self::span::Span;
 22 | pub use proc_macro2::LineColumn;
 23 | 
 24 | pub mod util;
 25 | use self::util::{load_span_from, sub_char_range};
 26 | 
 27 | use indexmap::IndexMap;
 28 | use proc_macro2::TokenTree;
 29 | use rayon::prelude::*;
 30 | use serde::Deserialize;
 31 | use std::path::PathBuf;
 32 | use toml::Spanned;
 33 | 
 34 | /// Range based on `usize`, simplification.
 35 | pub type Range = core::ops::Range<usize>;
 36 | 
 37 | /// Apply an offset to `start` and `end` members, equaling a shift of the range.
 38 | pub fn apply_offset(range: &mut Range, offset: usize) {
 39 |     range.start = range.start.saturating_add(offset);
 40 |     range.end = range.end.saturating_add(offset);
 41 | }
 42 | 
 43 | pub mod chunk;
 44 | pub mod cluster;
 45 | mod developer;
 46 | pub mod errors;
 47 | pub mod literal;
 48 | pub mod literalset;
 49 | pub mod markdown;
 50 | 
 51 | pub use chunk::*;
 52 | pub use cluster::*;
 53 | pub use errors::*;
 54 | pub use literal::*;
 55 | pub use literalset::*;
 56 | pub use markdown::*;
 57 | 
 58 | /// Collection of all the documentation entries across the project
 59 | #[derive(Debug, Clone)]
 60 | pub struct Documentation {
 61 |     /// Mapping of a path to documentation literals
 62 |     index: IndexMap<ContentOrigin, Vec<CheckableChunk>>,
 63 | }
 64 | 
 65 | impl Default for Documentation {
 66 |     fn default() -> Self {
 67 |         Self::new()
 68 |     }
 69 | }
 70 | 
 71 | impl Documentation {
 72 |     /// Create a new and empty doc.
 73 |     pub fn new() -> Self {
 74 |         Self {
 75 |             index: IndexMap::with_capacity(64),
 76 |         }
 77 |     }
 78 | 
 79 |     /// Check if a particular key is contained.
 80 |     pub fn contains_key(&self, key: &ContentOrigin) -> bool {
 81 |         self.index.contains_key(key)
 82 |     }
 83 | 
 84 |     /// Check if the document contains any checkable items.
 85 |     #[inline(always)]
 86 |     pub fn is_empty(&self) -> bool {
 87 |         self.index.is_empty()
 88 |     }
 89 | 
 90 |     /// Borrowing iterator across content origins and associated sets of chunks.
 91 |     #[inline(always)]
 92 |     pub fn iter(&self) -> impl Iterator<Item = (&ContentOrigin, &Vec<CheckableChunk>)> {
 93 |         self.index.iter()
 94 |     }
 95 | 
 96 |     /// Borrowing iterator across content origins and associated sets of chunks.
 97 |     pub fn par_iter(&self) -> impl ParallelIterator<Item = (&ContentOrigin, &Vec<CheckableChunk>)> {
 98 |         self.index.par_iter()
 99 |     }
100 | 
101 |     /// Consuming iterator across content origins and associated sets of chunks.
102 |     pub fn into_par_iter(
103 |         self,
104 |     ) -> impl ParallelIterator<Item = (ContentOrigin, Vec<CheckableChunk>)> {
105 |         self.index.into_par_iter()
106 |     }
107 | 
108 |     /// Extend `self` by joining in other `Documentation`s.
109 |     pub fn extend<I, J>(&mut self, other: I)
110 |     where
111 |         I: IntoIterator<Item = (ContentOrigin, Vec<CheckableChunk>), IntoIter = J>,
112 |         J: Iterator<Item = (ContentOrigin, Vec<CheckableChunk>)>,
113 |     {
114 |         other
115 |             .into_iter()
116 |             .for_each(|(origin, chunks): (_, Vec<CheckableChunk>)| {
117 |                 self.add_inner(origin, chunks);
118 |             });
119 |     }
120 | 
121 |     /// Adds a set of `CheckableChunk`s to the documentation to be checked.
122 |     pub fn add_inner(&mut self, origin: ContentOrigin, mut chunks: Vec<CheckableChunk>) {
123 |         self.index
124 |             .entry(origin)
125 |             .and_modify(|acc: &mut Vec<CheckableChunk>| {
126 |                 acc.append(&mut chunks);
127 |             })
128 |             .or_insert_with(|| chunks);
129 |         // Ok(()) TODO make this failable
130 |     }
131 | 
132 |     /// Adds a rust content str to the documentation.
133 |     pub fn add_rust(
134 |         &mut self,
135 |         origin: ContentOrigin,
136 |         content: &str,
137 |         doc_comments: bool,
138 |         dev_comments: bool,
139 |     ) -> Result<()> {
140 |         let cluster = Clusters::load_from_str(content, doc_comments, dev_comments)?;
141 | 
142 |         let chunks = Vec::<CheckableChunk>::from(cluster);
143 |         self.add_inner(origin, chunks);
144 |         Ok(())
145 |     }
146 | 
147 |     /// Adds a content string to the documentation sourced from the
148 |     /// `description` field in a `Cargo.toml` manifest.
149 |     pub fn add_cargo_manifest_description(
150 |         &mut self,
151 |         path: PathBuf,
152 |         manifest_content: &str,
153 |     ) -> Result<()> {
154 |         fn extract_range_of_description(manifest_content: &str) -> Result<Range> {
155 |             #[derive(Deserialize, Debug)]
156 |             struct Manifest {
157 |                 package: Spanned<Package>,
158 |             }
159 | 
160 |             #[derive(Deserialize, Debug)]
161 |             struct Package {
162 |                 description: Spanned<String>,
163 |             }
164 | 
165 |             let value: Manifest = toml::from_str(manifest_content)?;
166 |             let d = value.package.into_inner().description;
167 |             let range = d.span();
168 |             Ok(range)
169 |         }
170 | 
171 |         let mut range = extract_range_of_description(manifest_content)?;
172 |         let description = sub_char_range(manifest_content, range.clone());
173 | 
174 |         // Attention: `description` does include `\"\"\"` as well as `\\\n`, the latter is not a big issue,
175 |         // but the trailing start and end delimiters are.
176 |         // TODO: split into multiple on `\\\n` and create multiple range/span mappings.
177 |         let description = if range.len() > 6 {
178 |             if description.starts_with("\"\"\"") {
179 |                 range.start += 3;
180 |                 range.end -= 3;
181 |                 assert!(!range.is_empty());
182 |             }
183 |             dbg!(&description[3..(description.len()) - 3])
184 |         } else {
185 |             description
186 |         };
187 | 
188 |         fn convert_range_to_span(content: &str, range: Range) -> Option<Span> {
189 |             let mut line = 0_usize;
190 |             let mut column = 0_usize;
191 |             let mut prev = '\n';
192 |             let mut start = None;
193 |             for (offset, c) in content.chars().enumerate() {
194 |                 if prev == '\n' {
195 |                     column = 0;
196 |                     line += 1;
197 |                 }
198 |                 prev = c;
199 | 
200 |                 if offset == range.start {
201 |                     start = Some(LineColumn { line, column });
202 |                     continue;
203 |                 }
204 |                 // take care of inclusivity
205 |                 if offset + 1 == range.end {
206 |                     let end = LineColumn { line, column };
207 |                     return Some(Span {
208 |                         start: start.unwrap(),
209 |                         end,
210 |                     });
211 |                 }
212 |                 column += 1;
213 |             }
214 |             None
215 |         }
216 | 
217 |         let span = convert_range_to_span(manifest_content, range.clone()).expect(
218 |             "Description is part of the manifest since it was parsed from the same source. qed",
219 |         );
220 |         let origin = ContentOrigin::CargoManifestDescription(path);
221 |         let source_mapping = dbg!(indexmap::indexmap! {
222 |             range => span
223 |         });
224 |         self.add_inner(
225 |             origin,
226 |             vec![CheckableChunk::from_str(
227 |                 description,
228 |                 source_mapping,
229 |                 CommentVariant::TomlEntry,
230 |             )],
231 |         );
232 |         Ok(())
233 |     }
234 | 
235 |     /// Adds a common mark content str to the documentation.
236 |     pub fn add_commonmark(&mut self, origin: ContentOrigin, content: &str) -> Result<()> {
237 |         // extract the full content span and range
238 |         let start = LineColumn { line: 1, column: 0 };
239 |         let end = content
240 |             .lines()
241 |             .enumerate()
242 |             .last()
243 |             .map(|(idx, linecontent)| (idx + 1, linecontent))
244 |             .map(|(linenumber, linecontent)| LineColumn {
245 |                 line: linenumber,
246 |                 column: linecontent.chars().count().saturating_sub(1),
247 |             })
248 |             .ok_or_else(|| {
249 |                 Error::Span(
250 |                     "Common mark / markdown file does not contain a single line".to_string(),
251 |                 )
252 |             })?;
253 | 
254 |         let span = Span { start, end };
255 |         let source_mapping = indexmap::indexmap! {
256 |             0..content.chars().count() => span
257 |         };
258 |         self.add_inner(
259 |             origin,
260 |             vec![CheckableChunk::from_str(
261 |                 content,
262 |                 source_mapping,
263 |                 CommentVariant::CommonMark,
264 |             )],
265 |         );
266 |         Ok(())
267 |     }
268 | 
269 |     /// Obtain the set of chunks for a particular origin.
270 |     #[inline(always)]
271 |     pub fn get(&self, origin: &ContentOrigin) -> Option<&[CheckableChunk]> {
272 |         self.index.get(origin).map(AsRef::as_ref)
273 |     }
274 | 
275 |     /// Count the number of origins.
276 |     #[inline(always)]
277 |     pub fn entry_count(&self) -> usize {
278 |         self.index.len()
279 |     }
280 | 
281 |     /// Load a document from a single string with a defined origin.
282 |     pub fn load_from_str(
283 |         origin: ContentOrigin,
284 |         content: &str,
285 |         doc_comments: bool,
286 |         dev_comments: bool,
287 |     ) -> Self {
288 |         let mut docs = Documentation::new();
289 | 
290 |         match origin.clone() {
291 |             ContentOrigin::RustDocTest(_path, span) => {
292 |                 if let Ok(excerpt) = load_span_from(&mut content.as_bytes(), span) {
293 |                     docs.add_rust(origin.clone(), excerpt.as_str(), doc_comments, dev_comments)
294 |                 } else {
295 |                     // TODO
296 |                     Ok(())
297 |                 }
298 |             }
299 |             origin @ ContentOrigin::RustSourceFile(_) => {
300 |                 docs.add_rust(origin, content, doc_comments, dev_comments)
301 |             }
302 |             ContentOrigin::CargoManifestDescription(path) => {
303 |                 docs.add_cargo_manifest_description(path, content)
304 |             }
305 |             origin @ ContentOrigin::CommonMarkFile(_) => docs.add_commonmark(origin, content),
306 |             origin @ ContentOrigin::TestEntityRust => {
307 |                 docs.add_rust(origin, content, doc_comments, dev_comments)
308 |             }
309 |             origin @ ContentOrigin::TestEntityCommonMark => docs.add_commonmark(origin, content),
310 |         }
311 |         .unwrap_or_else(move |e| {
312 |             log::warn!(
313 |                 "BUG: Failed to load content from {origin} (dev_comments={dev_comments:?}): {e:?}",
314 |             );
315 |         });
316 |         docs
317 |     }
318 | 
319 |     pub fn len(&self) -> usize {
320 |         self.index.len()
321 |     }
322 | }
323 | 
324 | impl IntoIterator for Documentation {
325 |     type Item = (ContentOrigin, Vec<CheckableChunk>);
326 |     type IntoIter = indexmap::map::IntoIter<ContentOrigin, Vec<CheckableChunk>>;
327 | 
328 |     fn into_iter(self) -> Self::IntoIter {
329 |         self.index.into_iter()
330 |     }
331 | }
332 | 


--------------------------------------------------------------------------------
/src/checker/mod.rs:
--------------------------------------------------------------------------------
  1 | //! Checker
  2 | //!
  3 | //! Trait to handle additional trackers. Contains also helpers to avoid
  4 | //! re-implementing generic algorithms again and again, i.e. tokenization.
  5 | 
  6 | use crate::{CheckableChunk, Config, ContentOrigin, Detector, Suggestion};
  7 | 
  8 | use crate::errors::*;
  9 | 
 10 | mod cached;
 11 | use self::cached::Cached;
 12 | 
 13 | use std::collections::HashSet;
 14 | 
 15 | mod tokenize;
 16 | 
 17 | #[cfg(feature = "hunspell")]
 18 | pub(crate) use self::hunspell::HunspellChecker;
 19 | #[cfg(feature = "nlprules")]
 20 | pub(crate) use self::nlprules::NlpRulesChecker;
 21 | #[cfg(feature = "spellbook")]
 22 | pub(crate) use self::spellbook::SpellbookChecker;
 23 | pub(crate) use self::tokenize::*;
 24 | #[cfg(feature = "zet")]
 25 | pub(crate) use self::zspell::ZetChecker;
 26 | 
 27 | #[cfg(feature = "hunspell")]
 28 | mod hunspell;
 29 | 
 30 | #[cfg(feature = "zet")]
 31 | mod zspell;
 32 | 
 33 | #[cfg(feature = "spellbook")]
 34 | mod spellbook;
 35 | 
 36 | #[cfg(feature = "nlprules")]
 37 | mod nlprules;
 38 | 
 39 | mod dictaffix;
 40 | 
 41 | #[cfg(any(feature = "spellbook", feature = "zet", feature = "hunspell"))]
 42 | mod quirks;
 43 | 
 44 | /// Implementation for a checker
 45 | pub trait Checker {
 46 |     type Config;
 47 | 
 48 |     fn detector() -> Detector;
 49 | 
 50 |     fn check<'a, 's>(
 51 |         &self,
 52 |         origin: &ContentOrigin,
 53 |         chunks: &'a [CheckableChunk],
 54 |     ) -> Result<Vec<Suggestion<'s>>>
 55 |     where
 56 |         'a: 's;
 57 | }
 58 | 
 59 | /// Check a full document for violations using the tools we have.
 60 | ///
 61 | /// Only configured checkers are used.
 62 | pub struct Checkers {
 63 |     hunspell: Option<HunspellChecker>,
 64 |     #[cfg(feature = "zet")]
 65 |     zet: Option<ZetChecker>,
 66 |     #[cfg(feature = "spellbook")]
 67 |     spellbook: Option<SpellbookChecker>,
 68 |     nlprules: Option<NlpRulesChecker>,
 69 | }
 70 | 
 71 | impl Checkers {
 72 |     pub fn new(config: Config) -> Result<Self> {
 73 |         macro_rules! create_checker {
 74 |             ($feature:literal, $checker:ty, $config:expr, $checker_config:expr) => {
 75 |                 if !cfg!(feature = $feature) {
 76 |                     log::debug!("Feature {} is disabled by compilation.", $feature);
 77 |                     None
 78 |                 } else {
 79 |                     let config = $config;
 80 |                     let detector = <$checker>::detector();
 81 |                     if config.is_enabled(detector) {
 82 |                         log::debug!("Enabling {} checks.", detector);
 83 |                         Some(<$checker>::new($checker_config.unwrap())?)
 84 |                     } else {
 85 |                         log::debug!("Checker {detector} is disabled by configuration.");
 86 |                         None
 87 |                     }
 88 |                 }
 89 |             };
 90 |         }
 91 | 
 92 |         let hunspell = create_checker!(
 93 |             "hunspell",
 94 |             HunspellChecker,
 95 |             &config,
 96 |             config.hunspell.as_ref()
 97 |         );
 98 |         #[cfg(feature = "zet")]
 99 |         let zet = create_checker!("zet", ZetChecker, &config, config.zet.as_ref());
100 |         #[cfg(feature = "spellbook")]
101 |         let spellbook = create_checker!(
102 |             "spellbook",
103 |             SpellbookChecker,
104 |             &config,
105 |             config.spellbook.as_ref()
106 |         );
107 |         let nlprules = create_checker!(
108 |             "nlprules",
109 |             NlpRulesChecker,
110 |             &config,
111 |             config.nlprules.as_ref()
112 |         );
113 |         Ok(Self {
114 |             hunspell,
115 |             #[cfg(feature = "zet")]
116 |             zet,
117 |             #[cfg(feature = "spellbook")]
118 |             spellbook,
119 |             nlprules,
120 |         })
121 |     }
122 | }
123 | 
124 | impl Checker for Checkers {
125 |     type Config = Config;
126 | 
127 |     fn detector() -> Detector {
128 |         unreachable!()
129 |     }
130 | 
131 |     fn check<'a, 's>(
132 |         &self,
133 |         origin: &ContentOrigin,
134 |         chunks: &'a [CheckableChunk],
135 |     ) -> Result<Vec<Suggestion<'s>>>
136 |     where
137 |         'a: 's,
138 |     {
139 |         let mut collective = HashSet::<Suggestion<'s>>::new();
140 |         if let Some(ref hunspell) = self.hunspell {
141 |             collective.extend(hunspell.check(origin, chunks)?);
142 |         }
143 |         #[cfg(feature = "zet")]
144 |         if let Some(ref zet) = self.zet {
145 |             collective.extend(zet.check(origin, chunks)?);
146 |         }
147 |         #[cfg(feature = "spellbook")]
148 |         if let Some(ref spellbook) = self.spellbook {
149 |             collective.extend(spellbook.check(origin, chunks)?);
150 |         }
151 |         if let Some(ref nlprule) = self.nlprules {
152 |             collective.extend(nlprule.check(origin, chunks)?);
153 |         }
154 | 
155 |         let mut suggestions: Vec<Suggestion<'s>> = Vec::from_iter(collective);
156 |         suggestions.sort();
157 |         if suggestions.is_empty() {
158 |             return Ok(suggestions);
159 |         }
160 | 
161 |         // Iterate through suggestions and identify overlapping ones.
162 |         let suggestions = Vec::from_iter(suggestions.clone().into_iter().enumerate().filter_map(
163 |             |(idx, cur)| {
164 |                 if idx == 0 || !cur.is_overlapped(&suggestions[idx - 1]) {
165 |                     Some(cur)
166 |                 } else {
167 |                     None
168 |                 }
169 |             },
170 |         ));
171 | 
172 |         Ok(suggestions)
173 |     }
174 | }
175 | 
176 | #[cfg(test)]
177 | pub mod dummy;
178 | 
179 | #[cfg(test)]
180 | pub mod tests {
181 |     use super::*;
182 |     use crate::load_span_from;
183 |     use crate::ContentOrigin;
184 |     use crate::Documentation;
185 |     use crate::LineColumn;
186 |     use crate::Range;
187 |     use crate::Span;
188 |     use std::path::PathBuf;
189 | 
190 |     use crate::fluff_up;
191 | 
192 |     const TEXT: &str = "With markdown removed, for sure.";
193 |     lazy_static::lazy_static! {
194 |         static ref TOKENS: Vec<&'static str> = vec![
195 |             "With",
196 |             "markdown",
197 |             "removed",
198 |             ",",
199 |             "for",
200 |             "sure",
201 |             ".",
202 |         ];
203 |     }
204 | 
205 |     #[test]
206 |     fn tokens() {
207 |         let tokenizer = tokenizer::<&PathBuf>(None).unwrap();
208 |         let ranges: Vec<Range> = dbg!(apply_tokenizer(&tokenizer, TEXT).collect());
209 |         for (range, expect) in ranges.into_iter().zip(TOKENS.iter()) {
210 |             assert_eq!(&&TEXT[range], expect);
211 |         }
212 |     }
213 | 
214 |     pub fn extraction_test_body(content: &str, expected_spans: &[Span]) {
215 |         let _ = env_logger::builder()
216 |             .filter(None, log::LevelFilter::Trace)
217 |             .is_test(true)
218 |             .try_init();
219 |         let doc_comments = true;
220 |         let dev_comments = false;
221 |         let docs = Documentation::load_from_str(
222 |             ContentOrigin::TestEntityRust,
223 |             content,
224 |             doc_comments,
225 |             dev_comments,
226 |         );
227 |         let (origin, chunks) = docs.into_iter().next().expect("Contains exactly one file");
228 |         let suggestions = dummy::DummyChecker
229 |             .check(&origin, &chunks[..])
230 |             .expect("Dummy extraction must never fail");
231 | 
232 |         // with a known number of suggestions
233 |         assert_eq!(suggestions.len(), expected_spans.len());
234 | 
235 |         for (index, (suggestion, expected_span)) in
236 |             suggestions.iter().zip(expected_spans.iter()).enumerate()
237 |         {
238 |             assert_eq!(
239 |                 suggestion.replacements,
240 |                 vec![format!("replacement_{index}")],
241 |                 "found vs expected replacement"
242 |             );
243 |             let extracts = load_span_from(&mut content.as_bytes(), suggestion.span).unwrap();
244 |             let expected_extracts =
245 |                 load_span_from(&mut content.as_bytes(), *expected_span).unwrap();
246 |             assert_eq!(
247 |                 (suggestion.span, extracts),
248 |                 (*expected_span, expected_extracts),
249 |                 "found vs expected span"
250 |             );
251 |         }
252 |     }
253 | 
254 |     #[test]
255 |     fn extract_suggestions_simple() {
256 |         const SIMPLE: &str = fluff_up!("two literals");
257 | 
258 |         /// keep in mind, `Span` bounds are inclusive, unlike Ranges, where
259 |         /// `range.end` is _exclusive_
260 |         const EXPECTED_SPANS: &[Span] = &[
261 |             Span {
262 |                 start: LineColumn { line: 1, column: 4 },
263 |                 end: LineColumn { line: 1, column: 6 },
264 |             },
265 |             Span {
266 |                 start: LineColumn { line: 1, column: 8 },
267 |                 end: LineColumn {
268 |                     line: 1,
269 |                     column: 15,
270 |                 },
271 |             },
272 |         ];
273 |         extraction_test_body(dbg!(SIMPLE), EXPECTED_SPANS);
274 |     }
275 | 
276 |     #[test]
277 |     fn extract_suggestions_left_aligned() {
278 |         const SIMPLE: &str = fluff_up!("two  literals ");
279 | 
280 |         /// keep in mind, `Span` bounds are inclusive, unlike Ranges, where
281 |         /// `range.end` is _exclusive_
282 |         const EXPECTED_SPANS: &[Span] = &[
283 |             Span {
284 |                 start: LineColumn { line: 1, column: 4 },
285 |                 end: LineColumn { line: 1, column: 6 },
286 |             },
287 |             Span {
288 |                 start: LineColumn { line: 1, column: 9 },
289 |                 end: LineColumn {
290 |                     line: 1,
291 |                     column: 16,
292 |                 },
293 |             },
294 |         ];
295 |         extraction_test_body(dbg!(SIMPLE), EXPECTED_SPANS);
296 |     }
297 | 
298 |     #[test]
299 |     fn extract_suggestions_3spaces() {
300 |         const SIMPLE: &str = fluff_up!("  third  testcase ");
301 | 
302 |         /// keep in mind, `Span` bounds are inclusive, unlike Ranges, where
303 |         /// `range.end` is _exclusive_
304 |         const EXPECTED_SPANS: &[Span] = &[
305 |             Span {
306 |                 start: LineColumn { line: 1, column: 6 },
307 |                 end: LineColumn {
308 |                     line: 1,
309 |                     column: 10,
310 |                 },
311 |             },
312 |             Span {
313 |                 start: LineColumn {
314 |                     line: 1,
315 |                     column: 13,
316 |                 },
317 |                 end: LineColumn {
318 |                     line: 1,
319 |                     column: 20,
320 |                 },
321 |             },
322 |         ];
323 |         extraction_test_body(dbg!(SIMPLE), EXPECTED_SPANS);
324 |     }
325 | 
326 |     #[test]
327 |     fn checker_discrepancies() {
328 |         let _ = env_logger::Builder::new()
329 |             .default_format()
330 |             .filter_level(log::LevelFilter::Debug)
331 |             .filter(Some("dicaff"), log::LevelFilter::Trace)
332 |             .is_test(true)
333 |             .try_init();
334 | 
335 |         let x = r###"
336 | /// With all patches applied.
337 | ///
338 | /// No line in need of a reflow.
339 | ///
340 | /// `Patch`s foo.
341 | ///
342 | /// I am a TODO where TODO is in the extra dictionary.
343 | struct X;
344 | "###;
345 | 
346 |         let mut doc = Documentation::new();
347 |         doc.add_rust(ContentOrigin::TestEntityRust, x, true, false)
348 |             .unwrap();
349 | 
350 |         let config = Config::default();
351 |         assert!(config.is_enabled(Detector::Hunspell));
352 |         assert!(config.is_enabled(Detector::Spellbook));
353 |         assert!(config.is_enabled(Detector::ZSpell));
354 |         let cs = Checkers::new(config).unwrap();
355 | 
356 |         let (origin, ccs) = doc.iter().next().unwrap();
357 |         dbg!(&ccs);
358 |         let assert_cmp = |a: &[Suggestion<'_>], b: &[Suggestion<'_>]| {
359 |             assert_eq!(a.len(), b.len());
360 |             for (a, b) in a.iter().zip(b.iter()) {
361 |                 assert_eq!(a.range, b.range);
362 |                 assert_eq!(a.chunk, b.chunk);
363 |             }
364 |         };
365 | 
366 |         let hun = dbg!(cs.hunspell.unwrap().check(origin, ccs)).unwrap();
367 |         let book = dbg!(cs.spellbook.unwrap().check(origin, ccs)).unwrap();
368 |         let z = dbg!(cs.zet.unwrap().check(origin, ccs)).unwrap();
369 |         assert_cmp(&hun, &z);
370 |         assert_cmp(&z, &book);
371 |     }
372 | }
373 | 


--------------------------------------------------------------------------------
/src/checker/zspell.rs:
--------------------------------------------------------------------------------
  1 | //! A dictionary check with affixes, backed by `libhunspell`
  2 | //!
  3 | //! Does not check grammar, but tokenizes the documentation chunk, and checks
  4 | //! the individual tokens against the dictionary using the defined affixes. Can
  5 | //! handle multiple dictionaries.
  6 | 
  7 | use super::{apply_tokenizer, Checker, Detector, Suggestion};
  8 | 
  9 | use crate::checker::dictaffix::DicAff;
 10 | use crate::config::WrappedRegex;
 11 | use crate::documentation::{CheckableChunk, ContentOrigin, PlainOverlay};
 12 | use crate::util::sub_chars;
 13 | use crate::Range;
 14 | 
 15 | use nlprule::Tokenizer;
 16 | 
 17 | use std::path::PathBuf;
 18 | use std::sync::Arc;
 19 | 
 20 | use doc_chunks::Ignores;
 21 | 
 22 | use crate::errors::*;
 23 | 
 24 | use super::quirks::{
 25 |     replacements_contain_dashed, replacements_contain_dashless, transform, Transformed,
 26 | };
 27 | 
 28 | use super::hunspell::consists_of_vulgar_fractions_or_emojis;
 29 | 
 30 | #[derive(Clone)]
 31 | pub struct ZetCheckerInner {
 32 |     zspell: zspell::Dictionary,
 33 |     transform_regex: Vec<WrappedRegex>,
 34 |     allow_concatenated: bool,
 35 |     allow_dashed: bool,
 36 |     allow_emojis: bool,
 37 |     check_footnote_references: bool,
 38 |     ignorelist: String,
 39 | }
 40 | 
 41 | impl ZetCheckerInner {
 42 |     fn new(config: &<ZetChecker as Checker>::Config) -> Result<Self> {
 43 |         // TODO allow override
 44 |         let (
 45 |             transform_regex,
 46 |             allow_concatenated,
 47 |             allow_dashed,
 48 |             allow_emojis,
 49 |             check_footnote_references,
 50 |         ) = {
 51 |             let quirks = &config.quirks;
 52 |             (
 53 |                 quirks.transform_regex().to_vec(),
 54 |                 quirks.allow_concatenated(),
 55 |                 quirks.allow_dashed(),
 56 |                 quirks.allow_emojis(),
 57 |                 quirks.check_footnote_references(),
 58 |             )
 59 |         };
 60 |         // FIXME rename the config option
 61 |         let ignorelist = config.tokenization_splitchars.clone();
 62 |         // without these, a lot of those would be flagged as mistakes.
 63 |         debug_assert!(ignorelist.contains(','));
 64 |         debug_assert!(ignorelist.contains('.'));
 65 |         debug_assert!(ignorelist.contains(';'));
 66 |         debug_assert!(ignorelist.contains('!'));
 67 |         debug_assert!(ignorelist.contains('?'));
 68 | 
 69 |         let DicAff { dic, aff } = DicAff::load(
 70 |             &config.extra_dictionaries[..],
 71 |             &config.search_dirs,
 72 |             config.lang(),
 73 |             config.use_builtin,
 74 |             config.skip_os_lookups,
 75 |         )?;
 76 | 
 77 |         let aff = aff.replace("UTF8", "UTF-8");
 78 |         let zet = zspell::builder().config_str(&aff).dict_str(&dic).build()?;
 79 | 
 80 |         log::debug!("Dictionary setup completed successfully.");
 81 |         Ok(Self {
 82 |             zspell: zet,
 83 |             transform_regex,
 84 |             allow_concatenated,
 85 |             allow_dashed,
 86 |             allow_emojis,
 87 |             check_footnote_references,
 88 |             ignorelist,
 89 |         })
 90 |     }
 91 | }
 92 | 
 93 | #[derive(Clone)]
 94 | pub struct ZetChecker(pub Arc<ZetCheckerInner>, pub Arc<Tokenizer>);
 95 | 
 96 | impl std::ops::Deref for ZetChecker {
 97 |     type Target = ZetCheckerInner;
 98 |     fn deref(&self) -> &Self::Target {
 99 |         self.0.deref()
100 |     }
101 | }
102 | 
103 | impl ZetChecker {
104 |     pub fn new(config: &<ZetChecker as Checker>::Config) -> Result<Self> {
105 |         let tokenizer = super::tokenizer::<&PathBuf>(None)?;
106 |         let inner = ZetCheckerInner::new(config)?;
107 |         let hunspell = Arc::new(inner);
108 |         Ok(ZetChecker(hunspell, tokenizer))
109 |     }
110 | }
111 | 
112 | impl Checker for ZetChecker {
113 |     type Config = crate::config::ZetConfig;
114 | 
115 |     fn detector() -> Detector {
116 |         Detector::ZSpell
117 |     }
118 | 
119 |     fn check<'a, 's>(
120 |         &self,
121 |         origin: &ContentOrigin,
122 |         chunks: &'a [CheckableChunk],
123 |     ) -> Result<Vec<Suggestion<'s>>>
124 |     where
125 |         'a: 's,
126 |     {
127 |         let mut acc = Vec::with_capacity(chunks.len());
128 | 
129 |         for chunk in chunks {
130 |             let plain = chunk.erase_cmark(&Ignores {
131 |                 footnote_references: !self.0.check_footnote_references,
132 |             });
133 |             log::trace!("{plain:?}");
134 |             let txt = plain.as_str();
135 | 
136 |             'tokenization: for range in apply_tokenizer(&self.1, txt) {
137 |                 let word = sub_chars(txt, range.clone());
138 |                 if range.len() == 1
139 |                     && word
140 |                         .chars()
141 |                         .next()
142 |                         .filter(|c| self.ignorelist.contains(*c))
143 |                         .is_some()
144 |                 {
145 |                     continue 'tokenization;
146 |                 }
147 |                 if self.transform_regex.is_empty() {
148 |                     obtain_suggestions(
149 |                         &plain,
150 |                         chunk,
151 |                         &self.zspell,
152 |                         origin,
153 |                         word,
154 |                         range,
155 |                         self.allow_concatenated,
156 |                         self.allow_dashed,
157 |                         self.allow_emojis,
158 |                         &mut acc,
159 |                     )
160 |                 } else {
161 |                     match transform(&self.transform_regex[..], word.as_str(), range.clone()) {
162 |                         Transformed::Fragments(word_fragments) => {
163 |                             for (range, word_fragment) in word_fragments {
164 |                                 obtain_suggestions(
165 |                                     &plain,
166 |                                     chunk,
167 |                                     &self.zspell,
168 |                                     origin,
169 |                                     word_fragment.to_owned(),
170 |                                     range,
171 |                                     self.allow_concatenated,
172 |                                     self.allow_dashed,
173 |                                     self.allow_emojis,
174 |                                     &mut acc,
175 |                                 );
176 |                             }
177 |                         }
178 |                         Transformed::Atomic((range, word)) => {
179 |                             obtain_suggestions(
180 |                                 &plain,
181 |                                 chunk,
182 |                                 &self.zspell,
183 |                                 origin,
184 |                                 word.to_owned(),
185 |                                 range,
186 |                                 self.allow_concatenated,
187 |                                 self.allow_dashed,
188 |                                 self.allow_emojis,
189 |                                 &mut acc,
190 |                             );
191 |                         }
192 |                         Transformed::Whitelisted(_) => {}
193 |                     }
194 |                 }
195 |             }
196 |         }
197 |         Ok(acc)
198 |     }
199 | }
200 | 
201 | fn obtain_suggestions<'s>(
202 |     plain: &PlainOverlay,
203 |     chunk: &'s CheckableChunk,
204 |     zspell: &zspell::Dictionary,
205 |     origin: &ContentOrigin,
206 |     word: String,
207 |     range: Range,
208 |     allow_concatenated: bool,
209 |     allow_dashed: bool,
210 |     allow_emojis: bool,
211 |     acc: &mut Vec<Suggestion<'s>>,
212 | ) {
213 |     log::trace!("Checking {word} in {range:?}..");
214 | 
215 |     match zspell.check_word(&word) {
216 |         false => {
217 |             log::trace!("No match for word (plain range: {range:?}): >{word}<");
218 |             // get rid of single character suggestions
219 |             let replacements = vec![];
220 |             // single char suggestions tend to be useless
221 | 
222 |             log::debug!(target: "zspell", "{word} --{{suggest}}--> {replacements:?}");
223 | 
224 |             // strings made of vulgar fraction or emoji
225 |             if allow_emojis && consists_of_vulgar_fractions_or_emojis(&word) {
226 |                 log::trace!(target: "quirks", "Found emoji or vulgar fraction character, treating {word} as ok");
227 |                 return;
228 |             }
229 | 
230 |             if allow_concatenated && replacements_contain_dashless(&word, replacements.as_slice()) {
231 |                 log::trace!(target: "quirks", "Found dashless word in replacement suggestions, treating {word} as ok");
232 |                 return;
233 |             }
234 |             if allow_dashed && replacements_contain_dashed(&word, replacements.as_slice()) {
235 |                 log::trace!(target: "quirks", "Found dashed word in replacement suggestions, treating {word} as ok");
236 |                 return;
237 |             }
238 |             for (range, span) in plain.find_spans(range.clone()) {
239 |                 acc.push(Suggestion {
240 |                     detector: Detector::ZSpell,
241 |                     range,
242 |                     span,
243 |                     origin: origin.clone(),
244 |                     replacements: replacements.clone(),
245 |                     chunk,
246 |                     description: Some("Possible spelling mistake found.".to_owned()),
247 |                 })
248 |             }
249 |         }
250 |         true => {
251 |             log::trace!("Found a match for word (plain range: {range:?}): >{word}<",);
252 |         }
253 |     }
254 | }
255 | 
256 | #[cfg(test)]
257 | mod tests {
258 |     use crate::checker::dictaffix::is_valid_hunspell_dic;
259 | 
260 |     use super::*;
261 | 
262 |     #[test]
263 |     fn hunspell_dic_format() {
264 |         const GOOD: &str = "2
265 | whitespazes
266 | catsndogs
267 | ";
268 |         const BAD_1: &str = "foo
269 | 12349
270 | bar
271 | ";
272 |         const BAD_2: &str = "2
273 | 12349
274 | bar
275 | ";
276 |         const BAD_3: &str = "foo
277 | xxx
278 | bar
279 | ";
280 |         assert!(is_valid_hunspell_dic(&mut GOOD.as_bytes()).is_ok());
281 |         assert!(is_valid_hunspell_dic(&mut BAD_1.as_bytes()).is_err());
282 |         assert!(is_valid_hunspell_dic(&mut BAD_2.as_bytes()).is_err());
283 |         assert!(is_valid_hunspell_dic(&mut BAD_3.as_bytes()).is_err());
284 |     }
285 | 
286 |     macro_rules! parametrized_vulgar_fraction_or_emoji {
287 |         ($($name:ident: $value:expr,)*) => {
288 |         $(
289 |             #[test]
290 |             fn $name() {
291 |                 let (input, expected) = $value;
292 |                 assert_eq!(expected, consists_of_vulgar_fractions_or_emojis(input));
293 |             }
294 |         )*
295 |         }
296 |     }
297 | 
298 |     parametrized_vulgar_fraction_or_emoji! {
299 |         empty: ("", false),
300 |         emojis: ("🐍🤗🦀", true),
301 |         contains_emojis: ("🦀acean", false),
302 |         contains_only_unicode: ("⅔⅔⅔↉↉↉", true),
303 |         contains_emojis_and_unicodes: ("🐍🤗⅒🦀⅔¾", true),
304 |         no_emojis: ("no emoji string", false),
305 |         is_number: ("123", true),
306 |         is_latin_letter: ("a", false),
307 |         vulgar_fraction_one_quarter_and_emojis: ("¼🤗🦀", true),
308 |         emojis_and_vulgar_fraction_one_half: ("🤗🦀½", true),
309 |         emojis_and_vulgar_fraction_three_quarters: ("🤗🦀¾", true),
310 |         emojis_and_vulgar_fraction_one_seventh: ("🤗🦀⅐", true),
311 |         emojis_and_vulgar_fraction_one_ninth: ("🤗🦀⅑", true),
312 |         emojis_and_vulgar_fraction_one_tenth: ("🤗🦀⅒", true),
313 |         emojis_and_vulgar_fraction_one_third: ("🤗🦀⅓", true),
314 |         emojis_and_vulgar_fraction_two_thirds: ("🤗🦀⅔", true),
315 |         emojis_and_vulgar_fraction_one_fifth: ("🤗🦀⅕", true),
316 |         emojis_and_vulgar_fraction_two_fifth: ("🤗🦀⅖", true),
317 |         emojis_and_vulgar_fraction_three_fifths: ("🤗🦀⅗", true),
318 |         emojis_and_vulgar_fraction_four_fifths: ( "🐍⅘", true),
319 |         emojis_and_vulgar_fraction_one_sixth: ("🐍⅙", true),
320 |         emojis_and_vulgar_fraction_five_sixths: ("🐍⅚", true),
321 |         emojis_and_vulgar_fraction_one_eighth: ("🦀🐍⅛", true),
322 |         emojis_and_vulgar_fraction_three_eighths: ("🦀🐍⅜", true),
323 |         emojis_and_vulgar_fraction_five_eights: ("🦀🐍⅝", true),
324 |         emojis_and_vulgar_fraction_five_eighths: ("🦀🐍⅝", true),
325 |         emojis_and_vulgar_fraction_seven_eighths: ("🦀🐍⅞", true),
326 |         emojis_and_vulgar_fraction_zero_thirds: ("🦀🐍↉", true),
327 |     }
328 | }
329 | 


--------------------------------------------------------------------------------
/src/checker/spellbook.rs:
--------------------------------------------------------------------------------
  1 | //! A dictionary check with affixes, backed by `libhunspell`
  2 | //!
  3 | //! Does not check grammar, but tokenizes the documentation chunk, and checks
  4 | //! the individual tokens against the dictionary using the defined affixes. Can
  5 | //! handle multiple dictionaries.
  6 | 
  7 | use super::{apply_tokenizer, Checker, Detector, Suggestion};
  8 | 
  9 | use crate::checker::dictaffix::DicAff;
 10 | use crate::config::WrappedRegex;
 11 | use crate::documentation::{CheckableChunk, ContentOrigin, PlainOverlay};
 12 | use crate::util::sub_chars;
 13 | use crate::Range;
 14 | 
 15 | use nlprule::Tokenizer;
 16 | 
 17 | use std::path::PathBuf;
 18 | use std::sync::Arc;
 19 | 
 20 | use doc_chunks::Ignores;
 21 | 
 22 | use crate::errors::*;
 23 | 
 24 | use super::quirks::{
 25 |     replacements_contain_dashed, replacements_contain_dashless, transform, Transformed,
 26 | };
 27 | 
 28 | use super::hunspell::consists_of_vulgar_fractions_or_emojis;
 29 | 
 30 | #[derive(Clone)]
 31 | pub struct SpellbookCheckerInner {
 32 |     spellbook: ::spellbook::Dictionary,
 33 |     transform_regex: Vec<WrappedRegex>,
 34 |     allow_concatenated: bool,
 35 |     allow_dashed: bool,
 36 |     allow_emojis: bool,
 37 |     check_footnote_references: bool,
 38 |     ignorelist: String,
 39 | }
 40 | 
 41 | impl SpellbookCheckerInner {
 42 |     fn new(config: &<SpellbookChecker as Checker>::Config) -> Result<Self> {
 43 |         // TODO allow override
 44 |         let (
 45 |             transform_regex,
 46 |             allow_concatenated,
 47 |             allow_dashed,
 48 |             allow_emojis,
 49 |             check_footnote_references,
 50 |         ) = {
 51 |             let quirks = &config.quirks;
 52 |             (
 53 |                 quirks.transform_regex().to_vec(),
 54 |                 quirks.allow_concatenated(),
 55 |                 quirks.allow_dashed(),
 56 |                 quirks.allow_emojis(),
 57 |                 quirks.check_footnote_references(),
 58 |             )
 59 |         };
 60 |         // FIXME rename the config option
 61 |         let ignorelist = config.tokenization_splitchars.clone();
 62 |         // without these, a lot of those would be flagged as mistakes.
 63 |         debug_assert!(ignorelist.contains(','));
 64 |         debug_assert!(ignorelist.contains('.'));
 65 |         debug_assert!(ignorelist.contains(';'));
 66 |         debug_assert!(ignorelist.contains('!'));
 67 |         debug_assert!(ignorelist.contains('?'));
 68 | 
 69 |         let DicAff { dic, aff } = DicAff::load(
 70 |             &config.extra_dictionaries[..],
 71 |             &config.search_dirs,
 72 |             config.lang(),
 73 |             config.use_builtin,
 74 |             config.skip_os_lookups,
 75 |         )?;
 76 | 
 77 |         let spellbook = ::spellbook::Dictionary::new(&aff, &dic)
 78 |             .map_err(|e| eyre!("Failed to parse dictionary: {e}"))?;
 79 | 
 80 |         log::debug!("Dictionary setup completed successfully.");
 81 |         Ok(Self {
 82 |             spellbook,
 83 |             transform_regex,
 84 |             allow_concatenated,
 85 |             allow_dashed,
 86 |             allow_emojis,
 87 |             check_footnote_references,
 88 |             ignorelist,
 89 |         })
 90 |     }
 91 | }
 92 | 
 93 | #[derive(Clone)]
 94 | pub struct SpellbookChecker(pub Arc<SpellbookCheckerInner>, pub Arc<Tokenizer>);
 95 | 
 96 | impl std::ops::Deref for SpellbookChecker {
 97 |     type Target = SpellbookCheckerInner;
 98 |     fn deref(&self) -> &Self::Target {
 99 |         self.0.deref()
100 |     }
101 | }
102 | 
103 | impl SpellbookChecker {
104 |     pub fn new(config: &<SpellbookChecker as Checker>::Config) -> Result<Self> {
105 |         let tokenizer = super::tokenizer::<&PathBuf>(None)?;
106 |         let inner = SpellbookCheckerInner::new(config)?;
107 |         let hunspell = Arc::new(inner);
108 |         Ok(SpellbookChecker(hunspell, tokenizer))
109 |     }
110 | }
111 | 
112 | impl Checker for SpellbookChecker {
113 |     type Config = crate::config::SpellbookConfig;
114 | 
115 |     fn detector() -> Detector {
116 |         Detector::Spellbook
117 |     }
118 | 
119 |     fn check<'a, 's>(
120 |         &self,
121 |         origin: &ContentOrigin,
122 |         chunks: &'a [CheckableChunk],
123 |     ) -> Result<Vec<Suggestion<'s>>>
124 |     where
125 |         'a: 's,
126 |     {
127 |         let mut acc = Vec::with_capacity(chunks.len());
128 | 
129 |         for chunk in chunks {
130 |             let plain = chunk.erase_cmark(&Ignores {
131 |                 footnote_references: !self.0.check_footnote_references,
132 |             });
133 |             log::trace!("{plain:?}");
134 |             let txt = plain.as_str();
135 | 
136 |             'tokenization: for range in apply_tokenizer(&self.1, txt) {
137 |                 let word = sub_chars(txt, range.clone());
138 |                 if range.len() == 1
139 |                     && word
140 |                         .chars()
141 |                         .next()
142 |                         .filter(|c| self.ignorelist.contains(*c))
143 |                         .is_some()
144 |                 {
145 |                     continue 'tokenization;
146 |                 }
147 |                 if self.transform_regex.is_empty() {
148 |                     obtain_suggestions(
149 |                         &plain,
150 |                         chunk,
151 |                         &self.spellbook,
152 |                         origin,
153 |                         word,
154 |                         range,
155 |                         self.allow_concatenated,
156 |                         self.allow_dashed,
157 |                         self.allow_emojis,
158 |                         &mut acc,
159 |                     )
160 |                 } else {
161 |                     match transform(&self.transform_regex[..], word.as_str(), range.clone()) {
162 |                         Transformed::Fragments(word_fragments) => {
163 |                             for (range, word_fragment) in word_fragments {
164 |                                 obtain_suggestions(
165 |                                     &plain,
166 |                                     chunk,
167 |                                     &self.spellbook,
168 |                                     origin,
169 |                                     word_fragment.to_owned(),
170 |                                     range,
171 |                                     self.allow_concatenated,
172 |                                     self.allow_dashed,
173 |                                     self.allow_emojis,
174 |                                     &mut acc,
175 |                                 );
176 |                             }
177 |                         }
178 |                         Transformed::Atomic((range, word)) => {
179 |                             obtain_suggestions(
180 |                                 &plain,
181 |                                 chunk,
182 |                                 &self.spellbook,
183 |                                 origin,
184 |                                 word.to_owned(),
185 |                                 range,
186 |                                 self.allow_concatenated,
187 |                                 self.allow_dashed,
188 |                                 self.allow_emojis,
189 |                                 &mut acc,
190 |                             );
191 |                         }
192 |                         Transformed::Whitelisted(_) => {}
193 |                     }
194 |                 }
195 |             }
196 |         }
197 |         Ok(acc)
198 |     }
199 | }
200 | 
201 | fn obtain_suggestions<'s>(
202 |     plain: &PlainOverlay,
203 |     chunk: &'s CheckableChunk,
204 |     dictionary: &::spellbook::Dictionary,
205 |     origin: &ContentOrigin,
206 |     word: String,
207 |     range: Range,
208 |     allow_concatenated: bool,
209 |     allow_dashed: bool,
210 |     allow_emojis: bool,
211 |     acc: &mut Vec<Suggestion<'s>>,
212 | ) {
213 |     log::trace!("Checking {word} in {range:?}..");
214 | 
215 |     match dictionary.check(&word) {
216 |         false => {
217 |             log::trace!(target: "spellbook", "No match for word (plain range: {range:?}): >{word}<");
218 |             // get rid of single character suggestions
219 |             let replacements = vec![];
220 |             // single char suggestions tend to be useless
221 | 
222 |             log::debug!(target: "spellbook", "{word} --{{suggest}}--> {replacements:?}");
223 | 
224 |             // strings made of vulgar fraction or emoji
225 |             if allow_emojis && consists_of_vulgar_fractions_or_emojis(&word) {
226 |                 log::trace!(target: "quirks", "Found emoji or vulgar fraction character, treating {word} as ok");
227 |                 return;
228 |             }
229 | 
230 |             if allow_concatenated && replacements_contain_dashless(&word, replacements.as_slice()) {
231 |                 log::trace!(target: "quirks", "Found dashless word in replacement suggestions, treating {word} as ok");
232 |                 return;
233 |             }
234 |             if allow_dashed && replacements_contain_dashed(&word, replacements.as_slice()) {
235 |                 log::trace!(target: "quirks", "Found dashed word in replacement suggestions, treating {word} as ok");
236 |                 return;
237 |             }
238 |             for (range, span) in plain.find_spans(range.clone()) {
239 |                 acc.push(Suggestion {
240 |                     detector: Detector::Spellbook,
241 |                     range,
242 |                     span,
243 |                     origin: origin.clone(),
244 |                     replacements: replacements.clone(),
245 |                     chunk,
246 |                     description: Some("Possible spelling mistake found.".to_owned()),
247 |                 })
248 |             }
249 |         }
250 |         true => {
251 |             log::trace!(target: "spellbook", "Found a match for word (plain range: {range:?}): >{word}<",);
252 |         }
253 |     }
254 | }
255 | 
256 | #[cfg(test)]
257 | mod tests {
258 |     use crate::checker::dictaffix::is_valid_hunspell_dic;
259 | 
260 |     use super::*;
261 | 
262 |     #[test]
263 |     fn hunspell_dic_format() {
264 |         const GOOD: &str = "2
265 | whitespazes
266 | catsndogs
267 | ";
268 |         const BAD_1: &str = "foo
269 | 12349
270 | bar
271 | ";
272 |         const BAD_2: &str = "2
273 | 12349
274 | bar
275 | ";
276 |         const BAD_3: &str = "foo
277 | xxx
278 | bar
279 | ";
280 |         assert!(is_valid_hunspell_dic(&mut GOOD.as_bytes()).is_ok());
281 |         assert!(is_valid_hunspell_dic(&mut BAD_1.as_bytes()).is_err());
282 |         assert!(is_valid_hunspell_dic(&mut BAD_2.as_bytes()).is_err());
283 |         assert!(is_valid_hunspell_dic(&mut BAD_3.as_bytes()).is_err());
284 |     }
285 | 
286 |     macro_rules! parametrized_vulgar_fraction_or_emoji {
287 |         ($($name:ident: $value:expr,)*) => {
288 |         $(
289 |             #[test]
290 |             fn $name() {
291 |                 let (input, expected) = $value;
292 |                 assert_eq!(expected, consists_of_vulgar_fractions_or_emojis(input));
293 |             }
294 |         )*
295 |         }
296 |     }
297 | 
298 |     parametrized_vulgar_fraction_or_emoji! {
299 |         empty: ("", false),
300 |         emojis: ("🐍🤗🦀", true),
301 |         contains_emojis: ("🦀acean", false),
302 |         contains_only_unicode: ("⅔⅔⅔↉↉↉", true),
303 |         contains_emojis_and_unicodes: ("🐍🤗⅒🦀⅔¾", true),
304 |         no_emojis: ("no emoji string", false),
305 |         is_number: ("123", true),
306 |         is_latin_letter: ("a", false),
307 |         vulgar_fraction_one_quarter_and_emojis: ("¼🤗🦀", true),
308 |         emojis_and_vulgar_fraction_one_half: ("🤗🦀½", true),
309 |         emojis_and_vulgar_fraction_three_quarters: ("🤗🦀¾", true),
310 |         emojis_and_vulgar_fraction_one_seventh: ("🤗🦀⅐", true),
311 |         emojis_and_vulgar_fraction_one_ninth: ("🤗🦀⅑", true),
312 |         emojis_and_vulgar_fraction_one_tenth: ("🤗🦀⅒", true),
313 |         emojis_and_vulgar_fraction_one_third: ("🤗🦀⅓", true),
314 |         emojis_and_vulgar_fraction_two_thirds: ("🤗🦀⅔", true),
315 |         emojis_and_vulgar_fraction_one_fifth: ("🤗🦀⅕", true),
316 |         emojis_and_vulgar_fraction_two_fifth: ("🤗🦀⅖", true),
317 |         emojis_and_vulgar_fraction_three_fifths: ("🤗🦀⅗", true),
318 |         emojis_and_vulgar_fraction_four_fifths: ( "🐍⅘", true),
319 |         emojis_and_vulgar_fraction_one_sixth: ("🐍⅙", true),
320 |         emojis_and_vulgar_fraction_five_sixths: ("🐍⅚", true),
321 |         emojis_and_vulgar_fraction_one_eighth: ("🦀🐍⅛", true),
322 |         emojis_and_vulgar_fraction_three_eighths: ("🦀🐍⅜", true),
323 |         emojis_and_vulgar_fraction_five_eights: ("🦀🐍⅝", true),
324 |         emojis_and_vulgar_fraction_five_eighths: ("🦀🐍⅝", true),
325 |         emojis_and_vulgar_fraction_seven_eighths: ("🦀🐍⅞", true),
326 |         emojis_and_vulgar_fraction_zero_thirds: ("🦀🐍↉", true),
327 |     }
328 | }
329 | 


--------------------------------------------------------------------------------
/src/config/mod.rs:
--------------------------------------------------------------------------------
  1 | //! Configure cargo-spellcheck
  2 | //!
  3 | //! Supports `Hunspell` and `LanguageTool` scopes.
  4 | //!
  5 | //! A default configuration will be generated in the default location by
  6 | //! default. Default. Default default default.
  7 | 
  8 | // TODO pendeng refactor, avoid spending time on documenting the status quo.
  9 | #![allow(missing_docs)]
 10 | 
 11 | pub mod args;
 12 | 
 13 | mod regex;
 14 | pub use self::regex::*;
 15 | 
 16 | mod reflow;
 17 | pub use self::reflow::*;
 18 | 
 19 | mod hunspell;
 20 | pub use self::hunspell::*;
 21 | 
 22 | mod nlprules;
 23 | pub use self::nlprules::*;
 24 | 
 25 | mod search_dirs;
 26 | pub use search_dirs::*;
 27 | 
 28 | mod iso;
 29 | pub use iso::*;
 30 | 
 31 | use crate::errors::*;
 32 | use crate::Detector;
 33 | use fancy_regex::Regex;
 34 | 
 35 | use fs_err as fs;
 36 | use serde::{Deserialize, Serialize};
 37 | use std::convert::AsRef;
 38 | use std::fmt;
 39 | use std::io::Read;
 40 | use std::path::{Path, PathBuf};
 41 | 
 42 | #[derive(Deserialize, Serialize, Debug, Clone)]
 43 | #[serde(deny_unknown_fields)]
 44 | pub struct Config {
 45 |     // Options that modify the inputs being picked up.
 46 |     #[serde(default)]
 47 |     #[serde(alias = "dev-comments")]
 48 |     #[serde(alias = "devcomments")]
 49 |     pub dev_comments: bool,
 50 | 
 51 |     #[serde(default)]
 52 |     #[serde(alias = "skip-readme")]
 53 |     #[serde(alias = "skipreadme")]
 54 |     pub skip_readme: bool,
 55 | 
 56 |     #[serde(alias = "Hunspell")]
 57 |     #[serde(default = "default_hunspell")]
 58 |     pub hunspell: Option<HunspellConfig>,
 59 | 
 60 |     #[serde(alias = "ZSpell")]
 61 |     #[serde(default = "default_zspell")]
 62 |     pub zet: Option<ZetConfig>,
 63 | 
 64 |     #[serde(alias = "Spellbook")]
 65 |     #[serde(alias = "book")]
 66 |     #[serde(default = "default_spellbook")]
 67 |     pub spellbook: Option<SpellbookConfig>,
 68 | 
 69 |     #[serde(alias = "Nlp")]
 70 |     #[serde(alias = "NLP")]
 71 |     #[serde(alias = "nlp")]
 72 |     #[serde(alias = "NLP")]
 73 |     #[serde(alias = "NlpRules")]
 74 |     #[serde(default = "default_nlprules")]
 75 |     pub nlprules: Option<NlpRulesConfig>,
 76 | 
 77 |     #[serde(alias = "ReFlow")]
 78 |     #[serde(alias = "Reflow")]
 79 |     pub reflow: Option<ReflowConfig>,
 80 | }
 81 | 
 82 | impl Config {
 83 |     const QUALIFIER: &'static str = "rs";
 84 |     const ORGANIZATION: &'static str = "fff";
 85 |     const APPLICATION: &'static str = "cargo_spellcheck";
 86 | 
 87 |     /// Sanitize all relative paths to absolute paths in relation to `base`.
 88 |     fn sanitize_paths(&mut self, base: &Path) -> Result<()> {
 89 |         if let Some(ref mut hunspell) = self.hunspell {
 90 |             hunspell.sanitize_paths(base)?;
 91 |         }
 92 |         if let Some(ref mut zspell) = self.zet {
 93 |             zspell.sanitize_paths(base)?;
 94 |         }
 95 |         if let Some(ref mut spellbook) = self.spellbook {
 96 |             spellbook.sanitize_paths(base)?;
 97 |         }
 98 |         Ok(())
 99 |     }
100 | 
101 |     pub fn parse<S: AsRef<str>>(s: S) -> Result<Self> {
102 |         Ok(toml::from_str(s.as_ref())?)
103 |     }
104 | 
105 |     pub fn load_from<P: AsRef<Path>>(path: P) -> Result<Option<Self>> {
106 |         let (contents, path) = match Self::load_content(path) {
107 |             Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
108 |                 return Ok(None);
109 |             }
110 |             Err(e) => bail!(e),
111 |             Ok(contents) => contents,
112 |         };
113 |         Self::parse(&contents)
114 |             .wrap_err_with(|| {
115 |                 eyre!(
116 |                     "Syntax of a given config file({}) is broken",
117 |                     path.display()
118 |                 )
119 |             })
120 |             .and_then(|mut cfg| {
121 |                 if let Some(base) = path.parent() {
122 |                     cfg.sanitize_paths(base)?;
123 |                 }
124 |                 Ok(Some(cfg))
125 |             })
126 |     }
127 | 
128 |     pub fn load_content<P: AsRef<Path>>(path: P) -> std::io::Result<(String, PathBuf)> {
129 |         let path = path.as_ref().canonicalize()?;
130 |         let mut file = fs::File::open(&path)?;
131 | 
132 |         let mut contents = String::with_capacity(1024);
133 |         file.read_to_string(&mut contents)?;
134 |         Ok((contents, path))
135 |     }
136 | 
137 |     pub fn load() -> Result<Option<Self>> {
138 |         if let Some(base) = directories::BaseDirs::new() {
139 |             Self::load_from(
140 |                 base.config_dir()
141 |                     .join("cargo_spellcheck")
142 |                     .join("config.toml"),
143 |             )
144 |         } else {
145 |             bail!("No idea where your config directory is located. XDG compliance would be nice.")
146 |         }
147 |     }
148 | 
149 |     pub fn to_toml(&self) -> Result<String> {
150 |         toml::to_string(self).wrap_err_with(|| eyre!("Failed to convert to toml"))
151 |     }
152 | 
153 |     pub fn write_values_to<W: std::io::Write>(&self, mut writer: W) -> Result<Self> {
154 |         let s = self.to_toml()?;
155 |         writer.write_all(s.as_bytes())?;
156 |         Ok(self.clone())
157 |     }
158 | 
159 |     pub fn write_values_to_path<P: AsRef<Path>>(&self, path: P) -> Result<Self> {
160 |         let path = path.as_ref();
161 | 
162 |         if let Some(path) = path.parent() {
163 |             fs::create_dir_all(path).wrap_err_with(|| {
164 |                 eyre!("Failed to create config parent dirs {}", path.display())
165 |             })?;
166 |         }
167 | 
168 |         let file = fs::OpenOptions::new()
169 |             .create(true)
170 |             .write(true)
171 |             .truncate(true)
172 |             .open(path)
173 |             .wrap_err_with(|| eyre!("Failed to write default values to {}", path.display()))?;
174 | 
175 |         let writer = std::io::BufWriter::new(file);
176 | 
177 |         self.write_values_to(writer)
178 |             .wrap_err_with(|| eyre!("Failed to write default config to {}", path.display()))
179 |     }
180 | 
181 |     pub fn write_values_to_default_path(&self) -> Result<Self> {
182 |         let path = Self::default_path()?;
183 |         self.write_values_to_path(path)
184 |     }
185 | 
186 |     pub fn write_default_values_to<P: AsRef<Path>>(path: P) -> Result<Self> {
187 |         Self::default().write_values_to_path(path)
188 |     }
189 | 
190 |     pub fn default_path() -> Result<PathBuf> {
191 |         if let Some(base) =
192 |             directories::ProjectDirs::from(Self::QUALIFIER, Self::ORGANIZATION, Self::APPLICATION)
193 |         {
194 |             Ok(base.config_dir().join("config.toml"))
195 |         } else {
196 |             bail!("No idea where your config directory is located. `$HOME` must be set.")
197 |         }
198 |     }
199 | 
200 |     /// Obtain a project specific config file.
201 |     pub fn project_config(manifest_dir: impl AsRef<Path>) -> Result<PathBuf> {
202 |         let path = manifest_dir
203 |             .as_ref()
204 |             .to_owned()
205 |             .join(".config")
206 |             .join("spellcheck.toml");
207 | 
208 |         let path = path.canonicalize()?;
209 | 
210 |         if path.is_file() {
211 |             Ok(path)
212 |         } else {
213 |             bail!(
214 |                 "Local project dir config {} does not exist or is not a file.",
215 |                 path.display()
216 |             )
217 |         }
218 |     }
219 | 
220 |     pub fn write_default_values() -> Result<Self> {
221 |         let d = Self::default_path()?;
222 |         Self::write_default_values_to(d.join("config.toml"))
223 |     }
224 | 
225 |     pub fn is_enabled(&self, detector: Detector) -> bool {
226 |         match detector {
227 |             Detector::Hunspell => self.hunspell.is_some(),
228 |             Detector::ZSpell => self.zet.is_some(),
229 |             Detector::Spellbook => self.spellbook.is_some(),
230 |             Detector::NlpRules => self.nlprules.is_some(),
231 |             Detector::Reflow => self.reflow.is_some(),
232 |             #[cfg(test)]
233 |             Detector::Dummy => true,
234 |         }
235 |     }
236 | 
237 |     pub fn full() -> Self {
238 |         Default::default()
239 |     }
240 | }
241 | 
242 | fn default_nlprules() -> Option<NlpRulesConfig> {
243 |     if cfg!(feature = "nlprules") {
244 |         Some(NlpRulesConfig::default())
245 |     } else {
246 |         log::warn!("Cannot enable nlprules, since it wasn't compiled with `nlprules` as checker");
247 |         None
248 |     }
249 | }
250 | 
251 | fn default_hunspell() -> Option<HunspellConfig> {
252 |     Some(HunspellConfig::default())
253 | }
254 | fn default_zspell() -> Option<ZetConfig> {
255 |     Some(ZetConfig::default())
256 | }
257 | fn default_spellbook() -> Option<SpellbookConfig> {
258 |     Some(SpellbookConfig::default())
259 | }
260 | 
261 | impl Default for Config {
262 |     fn default() -> Self {
263 |         Self {
264 |             dev_comments: false,
265 |             skip_readme: false,
266 |             hunspell: default_hunspell(),
267 |             zet: default_zspell(),
268 |             spellbook: default_spellbook(),
269 |             nlprules: default_nlprules(),
270 |             reflow: Some(ReflowConfig::default()),
271 |         }
272 |     }
273 | }
274 | 
275 | #[cfg(test)]
276 | mod tests {
277 |     use super::*;
278 |     use assert_matches::assert_matches;
279 | 
280 |     #[test]
281 |     fn can_serialize_to_toml() {
282 |         let config = dbg!(Config::full());
283 |         assert_matches!(config.to_toml(), Ok(_s));
284 |     }
285 | 
286 |     #[test]
287 |     fn project_config_works() {
288 |         let path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
289 |             .join(".config")
290 |             .join("spellcheck.toml");
291 |         assert_matches!(Config::load_from(&path), Ok(_));
292 |     }
293 | 
294 |     #[test]
295 |     fn all() {
296 |         let _ = Config::parse(
297 |             r#"
298 | dev_comments = true
299 | skip-readme = true
300 | 
301 | [Hunspell]
302 | lang = "en_US"
303 | search_dirs = ["/usr/lib64/hunspell"]
304 | extra_dictionaries = ["/home/bernhard/test.dic"]
305 | 			"#,
306 |         )
307 |         .unwrap();
308 |     }
309 | 
310 |     #[test]
311 |     fn empty() {
312 |         assert!(Config::parse(
313 |             r#"
314 | 			"#,
315 |         )
316 |         .is_ok());
317 |     }
318 |     #[test]
319 |     fn partial_1() {
320 |         let _cfg = Config::parse(
321 |             r#"
322 | [hunspell]
323 | lang = "en_GB"
324 | search_dirs = ["/usr/lib64/hunspell"]
325 | extra_dictionaries = ["/home/bernhard/test.dic"]
326 | 			"#,
327 |         )
328 |         .unwrap();
329 |     }
330 | 
331 |     #[test]
332 |     fn partial_3() {
333 |         let cfg = Config::parse(
334 |             r#"
335 | [Hunspell]
336 | lang = "de_AT"
337 | search_dirs = ["/usr/lib64/hunspell"]
338 | extra_dictionaries = ["/home/bernhard/test.dic"]
339 | 			"#,
340 |         )
341 |         .unwrap();
342 |         let _hunspell = cfg.hunspell.expect("Must contain hunspell cfg");
343 |     }
344 | 
345 |     #[test]
346 |     fn partial_4() {
347 |         let cfg = Config::parse(
348 |             r#"
349 | [Hunspell]
350 | lang = "en_US"
351 | 			"#,
352 |         )
353 |         .unwrap();
354 |         let _hunspell = cfg.hunspell.expect("Must contain hunspell cfg");
355 |     }
356 | 
357 |     #[test]
358 |     fn partial_5() {
359 |         assert!(Config::parse(
360 |             r#"
361 | [hUNspell]
362 | lang = "en_US"
363 | 			"#,
364 |         )
365 |         .is_err());
366 |     }
367 | 
368 |     #[test]
369 |     fn partial_6() {
370 |         let cfg = Config::parse(
371 |             r#"
372 | [hunspell]
373 | 			"#,
374 |         )
375 |         .unwrap();
376 |         let _hunspell = cfg.hunspell.expect("Must contain hunspell cfg");
377 |     }
378 | 
379 |     #[test]
380 |     fn partial_7() {
381 |         let cfg = Config::parse(
382 |             r#"
383 | [Hunspell.quirks]
384 | allow_concatenation = true
385 | allow_dashes = true
386 | transform_regex = ["^'([^\\s])'$", "^[0-9]+x$"]
387 | 			"#,
388 |         )
389 |         .unwrap();
390 |         let _hunspell = cfg.hunspell.expect("Must contain hunspell cfg");
391 |     }
392 | 
393 |     #[test]
394 |     fn partial_8() {
395 |         let cfg = Config::parse(
396 |             r#"
397 | [Hunspell]
398 | search_dirs = ["/search/1", "/search/2"]
399 | skip_os_lookups = true
400 | 			"#,
401 |         )
402 |         .unwrap();
403 | 
404 |         let hunspell: HunspellConfig = cfg.hunspell.expect("Must contain hunspell cfg");
405 |         assert!(hunspell.skip_os_lookups);
406 | 
407 |         let search_dirs = hunspell.search_dirs;
408 |         let search_dirs2: Vec<_> = search_dirs.as_ref().clone();
409 |         assert!(!search_dirs2.is_empty());
410 | 
411 |         assert_eq!(search_dirs.iter(false).count(), 2);
412 | 
413 |         #[cfg(target_os = "linux")]
414 |         assert_eq!(search_dirs.iter(true).count(), 5);
415 | 
416 |         #[cfg(target_os = "windows")]
417 |         assert_eq!(search_dirs.iter(true).count(), 2);
418 | 
419 |         #[cfg(target_os = "macos")]
420 |         assert!(search_dirs.iter(true).count() >= 3);
421 |     }
422 | 
423 |     #[test]
424 |     fn partial_9() {
425 |         let cfg = Config::parse(
426 |             r#"
427 | [Reflow]
428 | max_line_length = 42
429 | "#,
430 |         )
431 |         .unwrap();
432 |         assert_eq!(
433 |             cfg.reflow.expect("Must contain reflow cfg").max_line_length,
434 |             42
435 |         );
436 |     }
437 | }
438 | 


--------------------------------------------------------------------------------
/doc-chunks/src/literalset.rs:
--------------------------------------------------------------------------------
  1 | pub use super::{TrimmedLiteral, TrimmedLiteralDisplay};
  2 | 
  3 | use crate::{CheckableChunk, CommentVariant, Range};
  4 | 
  5 | use std::fmt;
  6 | 
  7 | /// A set of consecutive literals.
  8 | ///
  9 | /// Provides means to render them as a code block
 10 | #[derive(Clone, Default, Debug, Hash, PartialEq, Eq)]
 11 | pub struct LiteralSet {
 12 |     /// consecutive set of literals mapped by line number
 13 |     literals: Vec<TrimmedLiteral>,
 14 |     /// lines spanned (start, end) inclusive
 15 |     pub coverage: (usize, usize),
 16 |     /// Track what kind of comment the literals are
 17 |     variant: CommentVariant,
 18 | }
 19 | 
 20 | impl LiteralSet {
 21 |     /// Initiate a new set based on the first literal
 22 |     pub fn from(literal: TrimmedLiteral) -> Self {
 23 |         Self {
 24 |             coverage: (literal.span().start.line, literal.span().end.line),
 25 |             variant: literal.variant(),
 26 |             literals: vec![literal],
 27 |         }
 28 |     }
 29 | 
 30 |     /// Add a literal to a literal set, if the previous lines literal already
 31 |     /// exists.
 32 |     ///
 33 |     /// Returns literal within the Err variant if not adjacent
 34 |     pub fn add_adjacent(&mut self, literal: TrimmedLiteral) -> Result<(), TrimmedLiteral> {
 35 |         if literal.variant().category() != self.variant.category() {
 36 |             log::debug!(
 37 |                 "Adjacent literal is not the same comment variant: {:?} vs {:?}",
 38 |                 literal.variant().category(),
 39 |                 self.variant.category()
 40 |             );
 41 |             return Err(literal);
 42 |         }
 43 |         let previous_line = literal.span().end.line;
 44 |         if previous_line == self.coverage.1 + 1 {
 45 |             self.coverage.1 += 1;
 46 |             self.literals.push(literal);
 47 |             return Ok(());
 48 |         }
 49 | 
 50 |         let next_line = literal.span().start.line;
 51 |         if next_line + 1 == self.coverage.0 {
 52 |             self.literals.push(literal);
 53 |             self.coverage.1 -= 1;
 54 |             return Ok(());
 55 |         }
 56 | 
 57 |         Err(literal)
 58 |     }
 59 | 
 60 |     /// The set of trimmed literals that is covered.
 61 |     pub fn literals(&self) -> Vec<&TrimmedLiteral> {
 62 |         self.literals.iter().by_ref().collect()
 63 |     }
 64 | 
 65 |     /// The number of literals inside this set.
 66 |     pub fn len(&self) -> usize {
 67 |         self.literals.len()
 68 |     }
 69 | 
 70 |     /// Convert to a checkable chunk.
 71 |     ///
 72 |     /// Creates the map from content ranges to source spans.
 73 |     pub fn into_chunk(self) -> crate::CheckableChunk {
 74 |         let n = self.len();
 75 |         let mut source_mapping = indexmap::IndexMap::with_capacity(n);
 76 |         let mut content = String::with_capacity(n * 120);
 77 |         if n > 0 {
 78 |             // cursor operates on characters
 79 |             let mut cursor = 0usize;
 80 |             // for use with `Range`
 81 |             let mut start; // inclusive
 82 |             let mut end; // exclusive
 83 |             let mut it = self.literals.iter();
 84 |             let mut next = it.next();
 85 |             while let Some(literal) = next {
 86 |                 start = cursor;
 87 |                 cursor += literal.len_in_chars();
 88 |                 end = cursor;
 89 | 
 90 |                 let span = literal.span();
 91 |                 let range = Range { start, end };
 92 | 
 93 |                 // TODO this does not hold anymore for `#[doc=foo!(..)]`.
 94 |                 // TODO where the span is covering `foo!()`, but the
 95 |                 // TODO rendered length is 0.
 96 |                 if literal.variant() != CommentVariant::MacroDocEqMacro {
 97 |                     if let Some(span_len) = span.one_line_len() {
 98 |                         assert_eq!(range.len(), span_len);
 99 |                     }
100 |                 }
101 |                 // keep zero length values too, to guarantee continuity
102 |                 source_mapping.insert(range, span);
103 |                 content.push_str(literal.as_str());
104 |                 // the newline is _not_ covered by a span, after all it's inserted by us!
105 |                 next = it.next();
106 |                 if next.is_some() {
107 |                     // for the last, skip the newline
108 |                     content.push('\n');
109 |                     cursor += 1;
110 |                 }
111 |             }
112 |         }
113 |         // all literals in a set have the same variant, so lets take the first one
114 |         let variant = if let Some(literal) = self.literals.first() {
115 |             literal.variant()
116 |         } else {
117 |             crate::CommentVariant::Unknown
118 |         };
119 |         CheckableChunk::from_string(content, source_mapping, variant)
120 |     }
121 | }
122 | 
123 | impl<'s> fmt::Display for LiteralSet {
124 |     fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
125 |         let n = self.len();
126 |         if n > 0 {
127 |             for literal in self.literals.iter().take(n - 1) {
128 |                 writeln!(formatter, "{}", literal.as_str())?;
129 |             }
130 |             if let Some(literal) = self.literals.last() {
131 |                 write!(formatter, "{}", literal.as_str())?;
132 |             }
133 |         }
134 |         Ok(())
135 |     }
136 | }
137 | /// A debug helper to print concatenated length of all items.
138 | #[macro_export]
139 | macro_rules! chyrp_dbg {
140 |     ($first:literal $(, $( $line:literal ),+ )? $(,)? $(@ $prefix:literal)? ) => {
141 |         dbg!(concat!($first $( $(, "\n", $line )+ )?).len());
142 |         dbg!(concat!($first $( $(, "\n", $line )+ )?));
143 |     }
144 | }
145 | 
146 | /// A helper macro creating valid doc string using the macro syntax
147 | /// `#[doc=r#"..."#]`.
148 | ///
149 | /// Example:
150 | ///
151 | /// ```rust
152 | /// # use doc_chunks::chyrp_up;
153 | /// let x = chyrp_up!(["some", "thing"]);
154 | /// let y = r##"#[doc=r#"some
155 | /// thing"#]
156 | /// struct ChyrpChyrp;"##;
157 | ///
158 | /// assert_eq!(x,y);
159 | /// ```
160 | #[macro_export]
161 | macro_rules! chyrp_up {
162 |     ([ $( $line:literal ),+ $(,)? ] $(@ $prefix:literal)? ) => {
163 |         chyrp_up!( $( $line ),+ $(@ $prefix)? )
164 |     };
165 |     ($first:literal $(, $( $line:literal ),+ )? $(,)? $(@ $prefix:literal)? ) => {
166 |         concat!($( $prefix ,)? r##"#[doc=r#""##, $first $( $(, "\n", $line )+ )?, r##""#]"##, "\n", "struct ChyrpChyrp;")
167 |     };
168 | }
169 | 
170 | /// A helper macro creating valid doc string using the macro syntax
171 | /// `/// ...`.
172 | ///
173 | /// Example:
174 | ///
175 | /// ```rust
176 | /// # use doc_chunks::fluff_up;
177 | /// let x = fluff_up!(["some", "thing"]);
178 | /// let y = r#"/// some
179 | /// /// thing
180 | /// struct Fluff;"#;
181 | ///
182 | /// assert_eq!(x,y);
183 | /// ```
184 | #[macro_export]
185 | macro_rules! fluff_up {
186 |     ([ $( $line:literal ),+ $(,)?] $( @ $prefix:literal)?) => {
187 |         fluff_up!($( $line ),+ $(@ $prefix)?)
188 |     };
189 |     ($($line:literal ),+ $(,)? ) => {
190 |         fluff_up!($( $line ),+ @ "")
191 |     };
192 |     ($($line:literal ),+ $(,)? @ $prefix:literal ) => {
193 |         concat!("" $(, $prefix, "/// ", $line, "\n")+ , "struct Fluff;")
194 |     };
195 | }
196 | 
197 | pub mod testhelper {
198 |     use super::*;
199 |     use crate::testcase::annotated_literals;
200 | 
201 |     pub fn gen_literal_set(source: &str) -> LiteralSet {
202 |         let literals = dbg!(annotated_literals(dbg!(source)));
203 | 
204 |         let mut iter = dbg!(literals).into_iter();
205 |         let literal = iter
206 |             .next()
207 |             .expect("Must have at least one item in laterals");
208 |         let mut cls = LiteralSet::from(literal);
209 | 
210 |         for literal in iter {
211 |             assert!(cls.add_adjacent(literal).is_ok());
212 |         }
213 |         dbg!(cls)
214 |     }
215 | }
216 | 
217 | #[cfg(test)]
218 | mod tests {
219 |     use super::*;
220 | 
221 |     use super::testhelper::gen_literal_set;
222 |     use crate::util::load_span_from;
223 |     use crate::util::sub_chars;
224 | 
225 |     #[test]
226 |     fn fluff_one() {
227 |         const RAW: &str = fluff_up!(["a"]);
228 |         const EXPECT: &str = r#"/// a
229 | struct Fluff;"#;
230 |         assert_eq!(RAW, EXPECT);
231 |     }
232 | 
233 |     #[test]
234 |     fn fluff_multi() {
235 |         const RAW: &str = fluff_up!(["a", "b", "c"]);
236 |         const EXPECT: &str = r#"/// a
237 | /// b
238 | /// c
239 | struct Fluff;"#;
240 |         assert_eq!(RAW, EXPECT);
241 |     }
242 | 
243 |     // range within the literalset content string
244 |     const EXMALIBU_RANGE_START: usize = 9;
245 |     const EXMALIBU_RANGE_END: usize = EXMALIBU_RANGE_START + 8;
246 |     const EXMALIBU_RANGE: Range = EXMALIBU_RANGE_START..EXMALIBU_RANGE_END;
247 |     const RAW: &str = r#"/// Another exmalibu verification pass.
248 | /// 🚤w🌴x🌋y🍈z🍉0
249 | /// ♫ Boats float, ♫♫ don't they? ♫
250 | struct Vikings;
251 | "#;
252 | 
253 |     const EXMALIBU_CHUNK_STR: &str = r#" Another exmalibu verification pass.
254 |  🚤w🌴x🌋y🍈z🍉0
255 |  ♫ Boats float, ♫♫ don't they? ♫"#;
256 | 
257 |     #[test]
258 |     fn combine_literals() {
259 |         let _ = env_logger::builder()
260 |             .is_test(true)
261 |             .filter(None, log::LevelFilter::Trace)
262 |             .try_init();
263 | 
264 |         let cls = gen_literal_set(RAW);
265 | 
266 |         assert_eq!(cls.len(), 3);
267 |         assert_eq!(cls.to_string(), EXMALIBU_CHUNK_STR.to_owned());
268 |     }
269 | 
270 |     #[test]
271 |     fn coverage() {
272 |         let _ = env_logger::builder()
273 |             .is_test(true)
274 |             .filter(None, log::LevelFilter::Trace)
275 |             .try_init();
276 | 
277 |         let literal_set = gen_literal_set(RAW);
278 |         let chunk: CheckableChunk = literal_set.into_chunk();
279 |         let map_range_to_span = chunk.find_spans(EXMALIBU_RANGE);
280 |         let (_range, _span) = map_range_to_span
281 |             .first()
282 |             .expect("Must be at least one literal");
283 | 
284 |         let range_for_raw_str = Range {
285 |             start: EXMALIBU_RANGE_START,
286 |             end: EXMALIBU_RANGE_END,
287 |         };
288 | 
289 |         // check test integrity
290 |         assert_eq!("exmalibu", &EXMALIBU_CHUNK_STR[EXMALIBU_RANGE]);
291 | 
292 |         // check actual result
293 |         assert_eq!(
294 |             &EXMALIBU_CHUNK_STR[EXMALIBU_RANGE],
295 |             &chunk.as_str()[range_for_raw_str.clone()]
296 |         );
297 |     }
298 | 
299 |     macro_rules! test_raw {
300 |         ($test: ident, [ $($txt: literal),+ $(,)? ]; $range: expr, $expected: literal) => {
301 |             #[test]
302 |             fn $test() {
303 |                 test_raw!([$($txt),+] ; $range, $expected);
304 |             }
305 |         };
306 | 
307 |         ([$($txt:literal),+ $(,)?]; $range: expr, $expected: literal) => {
308 |             let _ = env_logger::builder()
309 |                 .filter(None, log::LevelFilter::Trace)
310 |                 .is_test(true)
311 |                 .try_init();
312 | 
313 |             let range: Range = $range;
314 | 
315 |             const RAW: &str = fluff_up!($( $txt),+);
316 |             const START: usize = 3; // skip `///` which is the span we get from the literal
317 |             let _end: usize = START $( + $txt.len())+;
318 |             let literal_set = gen_literal_set(dbg!(RAW));
319 | 
320 | 
321 |             let chunk: CheckableChunk = dbg!(literal_set.into_chunk());
322 |             let map_range_to_span = chunk.find_spans(range.clone());
323 | 
324 |             let mut iter = dbg!(map_range_to_span).into_iter();
325 |             let (range, _span) = iter.next().expect("Must be at least one literal");
326 | 
327 |             // the range for raw str contains an offset of 3 when used with `///`
328 |             let range_for_raw_str = Range {
329 |                 start: range.start + START,
330 |                 end: range.end + START,
331 |             };
332 | 
333 |             assert_eq!(&RAW[range_for_raw_str.clone()], &chunk.as_str()[range], "Testing range extract vs stringified chunk for integrity");
334 |             assert_eq!(&RAW[range_for_raw_str], $expected, "Testing range extract vs expected");
335 |         };
336 |     }
337 | 
338 |     #[test]
339 |     fn first_line_extract_0() {
340 |         test_raw!(["livelyness", "yyy"] ; 2..6, "ivel");
341 |     }
342 | 
343 |     #[test]
344 |     fn first_line_extract_1() {
345 |         test_raw!(["+ 12 + x0"] ; 9..10, "0");
346 |     }
347 | 
348 |     #[test]
349 |     fn literal_set_into_chunk() {
350 |         let _ = env_logger::builder()
351 |             .filter(None, log::LevelFilter::Trace)
352 |             .is_test(true)
353 |             .try_init();
354 | 
355 |         let literal_set = dbg!(gen_literal_set(RAW));
356 | 
357 |         let chunk = dbg!(literal_set.clone().into_chunk());
358 |         let it = literal_set.literals();
359 | 
360 |         for (range, span, s) in itertools::cons_tuples(chunk.iter().zip(it)) {
361 |             if range.len() == 0 {
362 |                 continue;
363 |             }
364 |             assert_eq!(
365 |                 load_span_from(RAW.as_bytes(), span.clone()).expect("Span extraction must work"),
366 |                 sub_chars(chunk.as_str(), range.clone())
367 |             );
368 | 
369 |             let r: Range = span.to_content_range(&chunk).expect("Should work");
370 |             // the range for raw str contains an offset of 3 when used with `///`
371 |             assert_eq!(
372 |                 sub_chars(chunk.as_str(), range.clone()),
373 |                 s.as_str().to_owned()
374 |             );
375 |             assert_eq!(&r, range);
376 |         }
377 |     }
378 | }
379 | 


--------------------------------------------------------------------------------