├── .github ├── stale.yml ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── --ask-a-question.md │ ├── ---request-a-new-feature.md │ └── ---report-a-bug.md ├── auto_pr_team.yml ├── FUNDING.yml ├── no-response.yml └── pull_request_template.md ├── demo ├── .gitignore ├── member │ ├── true │ │ ├── README.md │ │ ├── lib.rs │ │ └── Cargo.toml │ ├── stray.rs │ └── procmacro │ │ ├── src │ │ └── lib.rs │ │ └── Cargo.toml ├── .config │ ├── topic.dic │ └── spellcheck.toml ├── src │ ├── nested │ │ ├── justone.rs │ │ ├── justtwo.rs │ │ ├── again │ │ │ ├── mod.rs │ │ │ └── code.rs │ │ ├── fragments │ │ │ ├── simple.rs │ │ │ └── enumerate.rs │ │ ├── fragments.rs │ │ └── mod.rs │ ├── main.rs │ └── lib.rs ├── Cargo.lock ├── README.md └── Cargo.toml ├── .gitignore ├── nlprule-data └── 0.6.4 │ └── en │ ├── en_rules.bin.xz │ └── en_tokenizer.bin.xz ├── src ├── errors.rs ├── config │ ├── reflow.rs │ ├── nlprules.rs │ ├── regex.rs │ ├── search_dirs.rs │ ├── iso.rs │ ├── hunspell.rs │ └── mod.rs ├── main.rs ├── checker │ ├── dummy.rs │ ├── nlprules.rs │ ├── cached.rs │ ├── dictaffix.rs │ ├── quirks.rs │ ├── mod.rs │ ├── zspell.rs │ └── spellbook.rs ├── tinhat.rs ├── lib.rs ├── action │ └── bandaid.rs └── traverse │ └── iter.rs ├── doc-chunks ├── README.md ├── src │ ├── errors.rs │ ├── testcase.rs │ ├── cluster.rs │ ├── lib.rs │ └── literalset.rs └── Cargo.toml ├── .pre-commit-hooks.yaml ├── .config ├── lingo.dic └── spellcheck.toml ├── docs ├── checkers.md ├── features.md ├── automation.md ├── remedy.md └── configuration.md ├── LICENSE-MIT ├── .vscode └── launch.json ├── cliff.toml ├── tests └── signal_handler.rs ├── hunspell-data └── en_US.aff ├── Cargo.toml ├── README.md └── LICENSE-APACHE /.github/stale.yml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /demo/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /demo/member/true/README.md: -------------------------------------------------------------------------------- 1 | # READ ME (maybe) -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | .vscode/settings.json 3 | -------------------------------------------------------------------------------- /demo/.config/topic.dic: -------------------------------------------------------------------------------- 1 | 10 2 | topic/A 3 | tkae/topic -------------------------------------------------------------------------------- /demo/src/nested/justone.rs: -------------------------------------------------------------------------------- 1 | /// Wroeng. 2 | struct W; -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false -------------------------------------------------------------------------------- /demo/src/nested/justtwo.rs: -------------------------------------------------------------------------------- 1 | /// Alphy 2 | /// Beto 3 | struct Abc; -------------------------------------------------------------------------------- /.github/auto_pr_team.yml: -------------------------------------------------------------------------------- 1 | org: cargo-spellcheck 2 | team: contributors 3 | -------------------------------------------------------------------------------- /demo/member/stray.rs: -------------------------------------------------------------------------------- 1 | /// Nobady references this. 2 | struct Lost; 3 | -------------------------------------------------------------------------------- /demo/src/nested/again/mod.rs: -------------------------------------------------------------------------------- 1 | mod code; 2 | 3 | /// Again. 4 | struct Again; 5 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: drahnr 2 | patreon: drahnr 3 | liberapay: drahnr 4 | open_collective: bernhard-schuster 5 | -------------------------------------------------------------------------------- /demo/member/procmacro/src/lib.rs: -------------------------------------------------------------------------------- 1 | /// Foo bar baz. 2 | fn empty() { 3 | unimplemented!("and never will be") 4 | } 5 | -------------------------------------------------------------------------------- /demo/member/true/lib.rs: -------------------------------------------------------------------------------- 1 | //! Some extar crate docs. 2 | 3 | /// ZZZZzzz makes the snake. 4 | pub fn x() { 5 | 6 | } 7 | -------------------------------------------------------------------------------- /demo/src/nested/fragments/simple.rs: -------------------------------------------------------------------------------- 1 | /// First. 2 | /// Secondo. 3 | /// Thurd number one. 4 | /// Another thurd. 5 | struct Q; -------------------------------------------------------------------------------- /nlprule-data/0.6.4/en/en_rules.bin.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drahnr/cargo-spellcheck/HEAD/nlprule-data/0.6.4/en/en_rules.bin.xz -------------------------------------------------------------------------------- /src/errors.rs: -------------------------------------------------------------------------------- 1 | //! Global error usage without cluttering each file. 2 | pub use color_eyre::eyre::{bail, eyre, Error, Result, WrapErr}; 3 | -------------------------------------------------------------------------------- /nlprule-data/0.6.4/en/en_tokenizer.bin.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drahnr/cargo-spellcheck/HEAD/nlprule-data/0.6.4/en/en_tokenizer.bin.xz -------------------------------------------------------------------------------- /demo/src/nested/fragments/enumerate.rs: -------------------------------------------------------------------------------- 1 | /// Secret. 2 | /// 3 | /// Somethign secret. 4 | enum Instrument { 5 | /// An instroment. 6 | Xylophon, 7 | } -------------------------------------------------------------------------------- /doc-chunks/README.md: -------------------------------------------------------------------------------- 1 | # doc-chunks 2 | 3 | Extract clustered documentation lines and provide 4 | a spanned and commonmark aware overlay with a 5 | span based mapping. -------------------------------------------------------------------------------- /demo/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | [[package]] 4 | name = "cargo-spellcheck-demo" 5 | version = "1.0.0" 6 | -------------------------------------------------------------------------------- /.pre-commit-hooks.yaml: -------------------------------------------------------------------------------- 1 | - id: cargo-spellcheck 2 | name: cargo-spellcheck 3 | description: Spellcheck rust files 4 | entry: cargo-spellcheck 5 | language: rust 6 | types: [rust] 7 | args: ["--code=99", "--"] 8 | -------------------------------------------------------------------------------- /demo/src/nested/again/code.rs: -------------------------------------------------------------------------------- 1 | //! 2 | //! Enclozed `codez` not checked. 3 | //! 4 | 5 | 6 | /// Do you like `wroeng` very mouch? 7 | /// Sharon stayed home from school the other day. Because she was sick. 8 | struct Coda; 9 | -------------------------------------------------------------------------------- /.github/no-response.yml: -------------------------------------------------------------------------------- 1 | daysUntilClose: 7 2 | 3 | responseRequiredLabel: needs-more-information 4 | 5 | closeComment: > 6 | Feel free to re-open once there is more information available. 7 | 8 | If you are not the original author, please create a new issue. 9 | -------------------------------------------------------------------------------- /demo/.config/spellcheck.toml: -------------------------------------------------------------------------------- 1 | [Hunspell] 2 | lang = "en_US" 3 | search_dirs = ["."] 4 | extra_dictionaries = ["topic.dic"] 5 | 6 | [Hunspell.quirks] 7 | transform_regex = ["^'([^\\s])'$", "^[0-9]+x$"] 8 | allow_concatenation = true 9 | allow_dashed = false 10 | -------------------------------------------------------------------------------- /demo/README.md: -------------------------------------------------------------------------------- 1 | # zilly demo proyekt 2 | 3 | A plethora of spelling mistackz inclusive. 4 | 5 |
6 | ᐲ🠒🍉 see #104
7 |
8 |
9 | 'Verify #88'
10 | '"Does not pop up"'
11 | "ever again"
12 |
13 | Mojis are ok 🍈🍐🍇
--------------------------------------------------------------------------------
/demo/src/main.rs:
--------------------------------------------------------------------------------
1 | /*! Just a lil smthin smthin. */
2 |
3 | mod lib;
4 |
5 | /* dev */
6 | pub mod nested;
7 |
8 | /**
9 | Not so preferable doc comment, use `///` instead.
10 | */
11 | fn main() {
12 | lib::a();
13 | lib::b();
14 | lib::c();
15 | }
16 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/--ask-a-question.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: "❓ Ask a question"
3 | about: Something is not clear to you from the documentation
4 | title: ''
5 | labels: documentation, question
6 | assignees: drahnr
7 |
8 | ---
9 |
10 | ** Q: **
11 |
12 |
13 |
--------------------------------------------------------------------------------
/demo/member/procmacro/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "cargo-spellcheck-demo-proc-macro"
3 | version = "1.0.0"
4 | authors = ["Bernhard Schuster ` to return a non-zero
6 | return code if mistakes are found instead of `0`.
7 |
8 | ### GitHub Actions
9 |
10 | [Create a workflow](https://docs.github.com/en/actions/quickstart) for your project and add the following example as steps.
11 |
12 | The first step installs cargo-spellcheck on the runner.
13 | The second step loads your source code into the runner environment.
14 | The third step runs a command in a shell like you would normally do with cargo spellcheck.
15 | Specify your arguments as needed.
16 |
17 | ```yaml
18 | - name: Install cargo-spellcheck
19 | uses: taiki-e/install-action@v2
20 | with:
21 | tool: cargo-spellcheck
22 |
23 | - uses: actions/checkout@v3
24 |
25 | - name: Run cargo-spellcheck
26 | run: cargo spellcheck --code 1
27 | ```
28 |
29 | ### Other
30 |
31 | Install `cargo-spellcheck` via [`cargo-binstall`](https://github.com/cargo-bins/cargo-binstall) and then use it like you would locally.
32 | Alternatively you can use `cargo install cargo-spellcheck` to compile it from source.
33 |
34 | ```bash
35 | cargo binstall --no-confirm cargo-spellcheck
36 |
37 | cargo-spellcheck --code 1
38 | ```
39 |
40 | ## Git hooks
41 |
42 | If you want to manually configure `cargo-spellcheck` to run on git commits:
43 |
44 | ```bash
45 | #!/usr/bin/env bash
46 |
47 | # Redirect output to stderr.
48 | exec 1>&2
49 |
50 | exec cargo spellcheck --code 99 $(git diff-index --cached --name-only --diff-filter=AM HEAD)
51 | ```
52 |
53 | Alternatively you can use [`pre-commit`](https://pre-commit.com/) to manage your git commit hooks
54 | for you. This can be done by appending these lines to `.pre-commit-config.yaml` in your project:
55 |
56 | ```yaml
57 | - repo: https://github.com/drahnr/cargo-spellcheck.git
58 | rev: master
59 | - id: cargo-spellcheck
60 |
61 | ```
62 |
63 | You will need to install the hooks running `pre-commit install-hooks` and `cargo-spellcheck` will
64 | get installed and wired up as a git commit hook for you.
65 |
--------------------------------------------------------------------------------
/src/config/regex.rs:
--------------------------------------------------------------------------------
1 | use super::*;
2 |
3 | #[derive(Debug)]
4 | pub struct WrappedRegex(pub Regex);
5 |
6 | impl Clone for WrappedRegex {
7 | fn clone(&self) -> Self {
8 | // @todo inefficient.. but right now this should almost never happen
9 | // @todo implement a lazy static `Arc>`
10 | Self(Regex::new(self.as_str()).unwrap())
11 | }
12 | }
13 |
14 | impl std::ops::Deref for WrappedRegex {
15 | type Target = Regex;
16 | fn deref(&self) -> &Self::Target {
17 | &self.0
18 | }
19 | }
20 |
21 | impl std::convert::AsRef for WrappedRegex {
22 | fn as_ref(&self) -> &Regex {
23 | &self.0
24 | }
25 | }
26 |
27 | impl Serialize for WrappedRegex {
28 | fn serialize(&self, serializer: S) -> Result
29 | where
30 | S: serde::ser::Serializer,
31 | {
32 | serializer.serialize_str(self.as_str())
33 | }
34 | }
35 |
36 | impl<'de> Deserialize<'de> for WrappedRegex {
37 | fn deserialize(deserializer: D) -> Result
38 | where
39 | D: serde::de::Deserializer<'de>,
40 | {
41 | deserializer
42 | .deserialize_any(RegexVisitor)
43 | .map(WrappedRegex::from)
44 | }
45 | }
46 |
47 | impl From for Regex {
48 | fn from(val: WrappedRegex) -> Self {
49 | val.0
50 | }
51 | }
52 |
53 | impl From for WrappedRegex {
54 | fn from(other: Regex) -> WrappedRegex {
55 | WrappedRegex(other)
56 | }
57 | }
58 |
59 | struct RegexVisitor;
60 |
61 | impl<'de> serde::de::Visitor<'de> for RegexVisitor {
62 | type Value = Regex;
63 |
64 | fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
65 | formatter.write_str("String with valid regex expression")
66 | }
67 |
68 | fn visit_str(self, value: &str) -> Result
69 | where
70 | E: serde::de::Error,
71 | {
72 | let re = Regex::new(value).map_err(E::custom)?;
73 | Ok(re)
74 | }
75 |
76 | fn visit_string(self, value: String) -> Result
77 | where
78 | E: serde::de::Error,
79 | {
80 | self.visit_str::(value.as_str())
81 | }
82 | }
83 |
--------------------------------------------------------------------------------
/src/checker/dummy.rs:
--------------------------------------------------------------------------------
1 | //! Everything is wrong, so wrong, even if it's correct.
2 | //!
3 | //! A test checker, only available for unit tests.
4 |
5 | // use super::tokenize;
6 | use super::{apply_tokenizer, Checker};
7 |
8 | use crate::suggestion::{Detector, Suggestion};
9 | use crate::util::sub_chars;
10 | use crate::{errors::*, CheckableChunk, ContentOrigin};
11 |
12 | /// A test checker that tokenizes and marks everything as wrong
13 | pub struct DummyChecker;
14 |
15 | impl DummyChecker {
16 | pub fn new(_config: &::Config) -> Result {
17 | Ok(Self)
18 | }
19 | }
20 |
21 | impl Checker for DummyChecker {
22 | type Config = ();
23 |
24 | fn detector() -> Detector {
25 | Detector::Dummy
26 | }
27 |
28 | fn check<'a, 's>(
29 | &self,
30 | origin: &ContentOrigin,
31 | chunks: &'a [CheckableChunk],
32 | ) -> Result>>
33 | where
34 | 'a: 's,
35 | {
36 | let tokenizer = super::tokenizer::<&std::path::PathBuf>(None)?;
37 |
38 | let mut acc = Vec::with_capacity(chunks.len());
39 | let chunk = chunks
40 | .first()
41 | .expect("DummyChecker expects at least one chunk");
42 | let plain = chunk.erase_cmark(&Default::default());
43 | let txt = plain.as_str();
44 | for (index, range) in apply_tokenizer(&tokenizer, txt).enumerate() {
45 | log::trace!("****Token[{}]: >{}<", index, sub_chars(txt, range.clone()));
46 | let detector = Detector::Dummy;
47 | let range2span = plain.find_spans(range.clone());
48 | for (range, span) in range2span {
49 | log::trace!(
50 | "Suggestion for {:?} -> {}",
51 | range,
52 | chunk.display(range.clone())
53 | );
54 | let replacements = vec![format!("replacement_{index}")];
55 | let suggestion = Suggestion {
56 | detector,
57 | span,
58 | range,
59 | origin: origin.clone(),
60 | replacements,
61 | chunk,
62 | description: None,
63 | };
64 | acc.push(suggestion);
65 | }
66 | }
67 | Ok(acc)
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/cliff.toml:
--------------------------------------------------------------------------------
1 | # configuration file for git-cliff (0.1.0)
2 |
3 | [changelog]
4 | # changelog header
5 | header = """
6 | # Changelog
7 | All notable changes to this project will be documented in this file.\n
8 | """
9 | # template for the changelog body
10 | # https://tera.netlify.app/docs/#introduction
11 | body = """
12 | {% if version %}\
13 | ## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
14 | {% else %}\
15 | ## [unreleased]
16 | {% endif %}\
17 | {% for group, commits in commits | group_by(attribute="group") %}
18 | ### {{ group | upper_first }}
19 | {% for commit in commits %}
20 | - {% if commit.breaking %}[**breaking**] {% endif %}{{ commit.message | upper_first }}\
21 | {% endfor %}
22 | {% endfor %}\n
23 | """
24 | # remove the leading and trailing whitespaces from the template
25 | trim = true
26 | # changelog footer
27 | footer = """
28 |
29 | """
30 |
31 | [git]
32 | # parse the commits based on https://www.conventionalcommits.org
33 | conventional_commits = true
34 | # filter out the commits that are not conventional
35 | filter_unconventional = true
36 | # regex for parsing and grouping commits
37 | commit_parsers = [
38 | { message = "^feat", group = "Features" },
39 | { message = "^fix", group = "Bug Fixes" },
40 | { message = "^doc", group = "Documentation" },
41 | { message = "^perf", group = "Performance" },
42 | { message = "^refactor", group = "Refactor" },
43 | { message = "^style", group = "Styling" },
44 | { message = "^test", group = "Testing" },
45 | { message = "^chore\\(release\\): prepare for", skip = true },
46 | { message = "^(chore[/:]\\s*)?(cargo\\s+)?(fmt|fix|clippy|spellcheck)", skip = true },
47 | { message = "^[vV]?0\\.[0-9]\\.[0-9]+", skip = true },
48 | { message = "^\\(cargo-release\\)", skip = true },
49 | { message = "^(chore/)?rele?ase:", skip = true },
50 | { message = "^chore", group = "Miscellaneous Tasks" },
51 | { body = ".*security", group = "Security" },
52 | ]
53 | # filter out the commits that are not matched by commit parsers
54 | filter_commits = false
55 | # glob pattern for matching git tags
56 | tag_pattern = "v[0-9]*"
57 | # regex for ignoring tags
58 | ignore_tags = ""
59 | # sort the tags topologically
60 | topo_order = false
61 | # sort the commits inside sections by oldest/newest order
62 | sort_commits = "oldest"
63 |
--------------------------------------------------------------------------------
/tests/signal_handler.rs:
--------------------------------------------------------------------------------
1 | #![cfg(target_os = "linux")]
2 |
3 | use nix::sys::signal::*;
4 | use nix::sys::wait::*;
5 | use nix::unistd::Pid;
6 | use nix::unistd::{fork, ForkResult};
7 |
8 | use cargo_spellcheck::{signal_handler, TinHat};
9 |
10 | #[test]
11 | fn signal_handler_works() -> Result<(), Box> {
12 | let _ = env_logger::Builder::new()
13 | .filter_level(log::LevelFilter::Trace)
14 | .is_test(true)
15 | .try_init();
16 |
17 | println!("Signal handler check");
18 |
19 | const QUIT: Signal = Signal::SIGQUIT;
20 |
21 | let sigs = {
22 | let mut sigs = SigSet::empty();
23 | sigs.add(QUIT);
24 | sigs
25 | };
26 |
27 | // best effort unblock
28 | let _ = sigprocmask(SigmaskHow::SIG_UNBLOCK, Some(&sigs), None);
29 | let _ = pthread_sigmask(SigmaskHow::SIG_UNBLOCK, Some(&sigs), None);
30 |
31 | if let Ok(ForkResult::Parent { child, .. }) = unsafe { fork() } {
32 | println!("[parent] Wait for child");
33 |
34 | loop {
35 | let options = WaitPidFlag::WNOHANG;
36 | match nix::sys::wait::waitpid(child, Some(options)) {
37 | Ok(WaitStatus::StillAlive) => {
38 | std::thread::sleep(std::time::Duration::from_millis(50));
39 | continue;
40 | }
41 | Ok(WaitStatus::Signaled(_pid, signal, _core_dump)) => {
42 | assert_eq!(signal, QUIT);
43 | unreachable!("Should exit via exit. qed")
44 | }
45 | Ok(WaitStatus::Exited(_pid, _exit_code)) => {
46 | return Ok(());
47 | }
48 | Ok(ws) => unreachable!("Unexpected wait status: {ws:?}"),
49 | Err(errno) => unreachable!("Did not expect an error: {errno:?}"),
50 | }
51 | }
52 | } else {
53 | signal_handler(|| {});
54 |
55 | // signal while blocking signals
56 | {
57 | let hat = TinHat::on();
58 | println!("[child] Raise signal");
59 |
60 | kill(Pid::this(), QUIT).unwrap();
61 |
62 | std::thread::sleep(std::time::Duration::from_millis(1));
63 | drop(hat);
64 | }
65 |
66 | std::thread::sleep(std::time::Duration::from_secs(10_000));
67 | unreachable!("[child] Signal handler exits before panic.");
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/src/tinhat.rs:
--------------------------------------------------------------------------------
1 | //! Tinhat
2 | //!
3 | //! Makes sure the cosmic signals don't meddle with IO that's in progress.
4 | //!
5 | //! ```
6 | //! # use cargo_spellcheck::TinHat;
7 | //! let th = TinHat::on();
8 | //! // do IO
9 | //! drop(th);
10 | //! ```
11 |
12 | use std::sync::atomic::{AtomicBool, AtomicU16, Ordering};
13 |
14 | #[cfg(not(target_os = "windows"))]
15 | use signal_hook::{
16 | consts::signal::{SIGINT, SIGQUIT, SIGTERM},
17 | iterator,
18 | };
19 |
20 | /// Global atomic to block signal processing while a file write is currently in
21 | /// progress.
22 | static WRITE_IN_PROGRESS: AtomicU16 = AtomicU16::new(0);
23 | /// Delay if the signal handler is currently running.
24 | static SIGNAL_HANDLER_AT_WORK: AtomicBool = AtomicBool::new(false);
25 |
26 | /// Handle incoming signals.
27 | ///
28 | /// Only relevant for *-nix platforms.
29 | #[cfg(not(target_os = "windows"))]
30 | pub fn signal_handler(fx: F)
31 | where
32 | F: FnOnce() + Send + 'static,
33 | {
34 | let mut signals =
35 | iterator::Signals::new([SIGTERM, SIGINT, SIGQUIT]).expect("Failed to create Signals");
36 |
37 | std::thread::spawn(move || {
38 | for s in signals.forever() {
39 | match s {
40 | SIGTERM | SIGINT | SIGQUIT => {
41 | SIGNAL_HANDLER_AT_WORK.store(true, Ordering::SeqCst);
42 | // Wait for potential writing to disk to be finished.
43 | while WRITE_IN_PROGRESS.load(Ordering::Acquire) > 0 {
44 | std::hint::spin_loop();
45 | std::thread::yield_now();
46 | }
47 | fx();
48 | signal_hook::low_level::exit(130);
49 | }
50 | sig => log::warn!("Received unhandled signal {sig}, ignoring"),
51 | }
52 | }
53 | });
54 | }
55 |
56 | /// Blocks (UNIX) signals.
57 | pub struct TinHat;
58 |
59 | impl TinHat {
60 | /// Put the tin hat on, and only allow signals being processed once it's
61 | /// dropped.
62 | pub fn on() -> Self {
63 | // If there is a signal handler in progress, block.
64 | while SIGNAL_HANDLER_AT_WORK.load(Ordering::Acquire) {
65 | std::hint::spin_loop();
66 | std::thread::yield_now();
67 | }
68 | let _ = WRITE_IN_PROGRESS.fetch_add(1, Ordering::Release);
69 | Self
70 | }
71 | }
72 |
73 | impl Drop for TinHat {
74 | fn drop(&mut self) {
75 | let _ = WRITE_IN_PROGRESS.fetch_sub(1, Ordering::Release);
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/docs/remedy.md:
--------------------------------------------------------------------------------
1 | # Fixing spelling mistakes
2 |
3 | While cargo-spellcheck is good at _pointing out_ existing spellchecks,
4 | it's sometimes not obvious how to resolve them or what the correct way is
5 | to resolve them.
6 |
7 | The following covers an abstracted set of commonly encountered `cargo spellcheck`
8 | complaints and how to resolve them:
9 |
10 | ## Configuration
11 |
12 | Make sure your runs are idempotent if you run on two different systems,
13 | which is easiest achieved by using the builtin affix and dictionaries
14 | besides the topic specifc lingo dictionary that should come with your project.
15 |
16 | ```toml
17 | # .config/spellcheck.toml
18 |
19 | [Hunspell]
20 | # snip
21 | skip_os_lookups = true
22 | use_builtin = true
23 | # snip
24 | ```
25 |
26 | ---
27 |
28 | Avoiding `nlprule` backend by passing `--checkers=hunspell` might be a good idea,
29 | since `nlprule` tends to have a few false positives.
30 |
31 | ## Examples
32 |
33 | ### Missing word variants
34 |
35 | Sometimes some word forms belong into topic specific lingo and as such should be added to
36 | the topic specific dictionary. Make use of suffix patterns such as `/S` for plural `s` and `/M` for `'s`. This will keep your dictionary to a minimum. Please check the [affix file included here](./hunspell-data/en_US.aff) or your OS' provided affix file.
37 | [It is required to understand the slightly arkane format of `.aff` and `.dic` files.](https://www.systutorials.com/docs/linux/man/4-hunspell/#lbAE) which is also available via `man 4 hunspell`.
38 |
39 | ### Types in doc comments
40 |
41 | ```raw
42 | lib.rs : 2
43 | 858 | See [MmrLeafVersion] type documentation for more details.
44 | | ^^^^^^^^^^^^^^
45 | | Possible spelling mistake found.
46 | ```
47 |
48 | can be resolved by using
49 |
50 | ```md
51 | [`MmrLeafVersion`]
52 | ```
53 |
54 | with additional ticks.
55 |
56 | This is a general pattern for _types_ that make an appearence in the doc comments.
57 |
58 | ### Patterns
59 |
60 | In some cases it's a pattern one wants to whitelist, such `10x` or `117x` which can be done via
61 | the configuration adding a allowlist regex `^[0-9]+x$`.
62 |
63 |
64 | ### TODO, XXX, and FIXME
65 |
66 | Should not be present in doc comments, but only make it into developer comments, i.e. `// FIXME foo` or `/* FIXME foo */`
67 |
68 | ### markdown: autolink
69 |
70 |
71 | ```raw
72 | error: spellcheck(Hunspell)
73 | --> test.md:96
74 | |
75 | 96 | The test coverage in `lcov` can the be published to .
76 | | ^^^^^^^
77 | | - codec
78 | |
79 | | Possible spelling mistake found.
80 | ```
81 |
82 | will spellcheck all components of the url, since it is not a _valid_ autolink. Add the protocol type.
83 |
84 | ```md
85 |
86 | ```
87 |
88 | and the content will be omitted from spellchecking.
89 |
--------------------------------------------------------------------------------
/src/config/search_dirs.rs:
--------------------------------------------------------------------------------
1 | use super::*;
2 |
3 | /// Obtain OS specific search directories.
4 | fn os_specific_search_dirs() -> &'static [PathBuf] {
5 | lazy_static::lazy_static! {
6 | static ref OS_SPECIFIC_LOOKUP_DIRS: Vec =
7 | if cfg!(target_os = "macos") {
8 | directories::BaseDirs::new()
9 | .map(|base| vec![base.home_dir().to_owned().join("/Library/Spelling/"), PathBuf::from("/Library/Spelling/")])
10 | .unwrap_or_default()
11 | } else if cfg!(target_os = "linux") {
12 | vec![
13 | // Fedora
14 | PathBuf::from("/usr/share/myspell/"),
15 | PathBuf::from("/usr/share/hunspell/"),
16 | // Arch Linux
17 | PathBuf::from("/usr/share/myspell/dicts/"),
18 | ]
19 | } else {
20 | Vec::new()
21 | };
22 |
23 | }
24 | OS_SPECIFIC_LOOKUP_DIRS.as_slice()
25 | }
26 |
27 | /// A collection of search directories. OS specific paths are only provided in
28 | /// the iterator.
29 | #[derive(Debug, Clone)]
30 | pub struct SearchDirs(pub Vec);
31 |
32 | impl Default for SearchDirs {
33 | fn default() -> Self {
34 | Self(Vec::with_capacity(8))
35 | }
36 | }
37 |
38 | impl SearchDirs {
39 | pub fn iter(&self, extend_by_os: bool) -> impl Iterator- {
40 | let chained = if extend_by_os {
41 | os_specific_search_dirs().iter()
42 | } else {
43 | [].iter()
44 | };
45 | self.0.iter().chain(chained)
46 | }
47 | }
48 |
49 | impl std::convert::AsRef
> for SearchDirs {
50 | fn as_ref(&self) -> &Vec {
51 | &self.0
52 | }
53 | }
54 |
55 | impl Serialize for SearchDirs {
56 | fn serialize(&self, serializer: S) -> Result
57 | where
58 | S: serde::ser::Serializer,
59 | {
60 | serializer.serialize_newtype_struct("SearchDirs", &self.0)
61 | }
62 | }
63 |
64 | impl<'de> Deserialize<'de> for SearchDirs {
65 | fn deserialize(deserializer: D) -> Result
66 | where
67 | D: serde::de::Deserializer<'de>,
68 | {
69 | deserializer
70 | .deserialize_newtype_struct("SearchDirs", SearchDirVisitor)
71 | .map(Into::into)
72 | }
73 | }
74 |
75 | impl From for Vec {
76 | fn from(val: SearchDirs) -> Self {
77 | val.0
78 | }
79 | }
80 |
81 | impl From> for SearchDirs {
82 | fn from(other: Vec) -> SearchDirs {
83 | SearchDirs(other)
84 | }
85 | }
86 |
87 | /// A search directory visitor, auto extending the search directory with OS
88 | /// defaults.
89 | struct SearchDirVisitor;
90 |
91 | impl<'de> serde::de::Visitor<'de> for SearchDirVisitor {
92 | type Value = Vec;
93 |
94 | fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
95 | formatter.write_str("Search Dir Visitors must be an optional sequence of path")
96 | }
97 |
98 | fn visit_newtype_struct(self, deserializer: D) -> Result
99 | where
100 | D: serde::de::Deserializer<'de>,
101 | {
102 | let seq = deserializer.deserialize_seq(self)?;
103 | Ok(seq)
104 | }
105 |
106 | fn visit_seq(self, mut seq: A) -> Result
107 | where
108 | A: serde::de::SeqAccess<'de>,
109 | {
110 | let mut v = Vec::with_capacity(8);
111 | while let Some(item) = seq.next_element()? {
112 | v.push(item);
113 | }
114 | Ok(v)
115 | }
116 | }
117 |
--------------------------------------------------------------------------------
/hunspell-data/en_US.aff:
--------------------------------------------------------------------------------
1 | SET UTF8
2 | TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ'
3 | NOSUGGEST !
4 |
5 | # ordinal numbers
6 | COMPOUNDMIN 1
7 | # only in compounds: 1th, 2th, 3th
8 | ONLYINCOMPOUND c
9 | # compound rules:
10 | # 1. [0-9]*1[0-9]th (10th, 11th, 12th, 56714th, etc.)
11 | # 2. [0-9]*[02-9](1st|2nd|3rd|[4-9]th) (21st, 22nd, 123rd, 1234th, etc.)
12 | COMPOUNDRULE 2
13 | COMPOUNDRULE n*1t
14 | COMPOUNDRULE n*mp
15 | WORDCHARS 0123456789'
16 |
17 | PFX A Y 1
18 | PFX A 0 re .
19 |
20 | PFX I Y 1
21 | PFX I 0 in .
22 |
23 | PFX U Y 1
24 | PFX U 0 un .
25 |
26 | PFX C Y 1
27 | PFX C 0 de .
28 |
29 | PFX E Y 1
30 | PFX E 0 dis .
31 |
32 | PFX F Y 1
33 | PFX F 0 con .
34 |
35 | PFX K Y 1
36 | PFX K 0 pro .
37 |
38 | SFX V N 2
39 | SFX V e ive e
40 | SFX V 0 ive [^e]
41 |
42 | SFX N Y 3
43 | SFX N e ion e
44 | SFX N y ication y
45 | SFX N 0 en [^ey]
46 |
47 | SFX X Y 3
48 | SFX X e ions e
49 | SFX X y ications y
50 | SFX X 0 ens [^ey]
51 |
52 | SFX H N 2
53 | SFX H y ieth y
54 | SFX H 0 th [^y]
55 |
56 | SFX Y Y 1
57 | SFX Y 0 ly .
58 |
59 | SFX G Y 2
60 | SFX G e ing e
61 | SFX G 0 ing [^e]
62 |
63 | SFX J Y 2
64 | SFX J e ings e
65 | SFX J 0 ings [^e]
66 |
67 | SFX D Y 4
68 | SFX D 0 d e
69 | SFX D y ied [^aeiou]y
70 | SFX D 0 ed [^ey]
71 | SFX D 0 ed [aeiou]y
72 |
73 | SFX T N 4
74 | SFX T 0 st e
75 | SFX T y iest [^aeiou]y
76 | SFX T 0 est [aeiou]y
77 | SFX T 0 est [^ey]
78 |
79 | SFX R Y 4
80 | SFX R 0 r e
81 | SFX R y ier [^aeiou]y
82 | SFX R 0 er [aeiou]y
83 | SFX R 0 er [^ey]
84 |
85 | SFX Z Y 4
86 | SFX Z 0 rs e
87 | SFX Z y iers [^aeiou]y
88 | SFX Z 0 ers [aeiou]y
89 | SFX Z 0 ers [^ey]
90 |
91 | SFX S Y 4
92 | SFX S y ies [^aeiou]y
93 | SFX S 0 s [aeiou]y
94 | SFX S 0 es [sxzh]
95 | SFX S 0 s [^sxzhy]
96 |
97 | SFX P Y 3
98 | SFX P y iness [^aeiou]y
99 | SFX P 0 ness [aeiou]y
100 | SFX P 0 ness [^y]
101 |
102 | SFX M Y 1
103 | SFX M 0 's .
104 |
105 | SFX B Y 3
106 | SFX B 0 able [^aeiou]
107 | SFX B 0 able ee
108 | SFX B e able [^aeiou]e
109 |
110 | SFX L Y 1
111 | SFX L 0 ment .
112 |
113 | SFX i N 1
114 | SFX i us i us
115 |
116 | REP 90
117 | REP a ei
118 | REP ei a
119 | REP a ey
120 | REP ey a
121 | REP ai ie
122 | REP ie ai
123 | REP alot a_lot
124 | REP are air
125 | REP are ear
126 | REP are eir
127 | REP air are
128 | REP air ere
129 | REP ere air
130 | REP ere ear
131 | REP ere eir
132 | REP ear are
133 | REP ear air
134 | REP ear ere
135 | REP eir are
136 | REP eir ere
137 | REP ch te
138 | REP te ch
139 | REP ch ti
140 | REP ti ch
141 | REP ch tu
142 | REP tu ch
143 | REP ch s
144 | REP s ch
145 | REP ch k
146 | REP k ch
147 | REP f ph
148 | REP ph f
149 | REP gh f
150 | REP f gh
151 | REP i igh
152 | REP igh i
153 | REP i uy
154 | REP uy i
155 | REP i ee
156 | REP ee i
157 | REP j di
158 | REP di j
159 | REP j gg
160 | REP gg j
161 | REP j ge
162 | REP ge j
163 | REP s ti
164 | REP ti s
165 | REP s ci
166 | REP ci s
167 | REP k cc
168 | REP cc k
169 | REP k qu
170 | REP qu k
171 | REP kw qu
172 | REP o eau
173 | REP eau o
174 | REP o ew
175 | REP ew o
176 | REP oo ew
177 | REP ew oo
178 | REP ew ui
179 | REP ui ew
180 | REP oo ui
181 | REP ui oo
182 | REP ew u
183 | REP u ew
184 | REP oo u
185 | REP u oo
186 | REP u oe
187 | REP oe u
188 | REP u ieu
189 | REP ieu u
190 | REP ue ew
191 | REP ew ue
192 | REP uff ough
193 | REP oo ieu
194 | REP ieu oo
195 | REP ier ear
196 | REP ear ier
197 | REP ear air
198 | REP air ear
199 | REP w qu
200 | REP qu w
201 | REP z ss
202 | REP ss z
203 | REP shun tion
204 | REP shun sion
205 | REP shun cion
206 | REP sitted sat
207 |
--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "cargo-spellcheck"
3 | version = "0.15.5"
4 | authors = ["Bernhard Schuster "]
5 | edition = "2021"
6 | rust-version = "1.85.0"
7 | repository = "https://github.com/drahnr/cargo-spellcheck.git"
8 | homepage = "https://github.com/drahnr/cargo-spellcheck"
9 | license = "MIT OR Apache-2.0"
10 | keywords = ["spellcheck", "spelling", "grammar"]
11 | description = "Checks all doc comments for spelling mistakes"
12 | readme = "README.md"
13 | build = "build.rs"
14 | include = [
15 | "nlprule-data/**/*.bin.xz",
16 | "hunspell-data/*",
17 | "src/**/*.rs",
18 | "Cargo.toml",
19 | "build.rs",
20 | "/LICENSE-*",
21 | "/README.md",
22 | "tests/**/*.rs",
23 | "CHANGELOG.md",
24 | ]
25 |
26 | [workspace]
27 | members = ["./doc-chunks"]
28 |
29 |
30 | [build-dependencies]
31 | nlprule-build = { version = "=0.6.4", optional = true }
32 | # compress the nlprule artifacts to be under the 10 MB limit
33 | # that cargo enforces
34 | xz2 = "0.1"
35 |
36 | [dependencies]
37 |
38 | doc-chunks = { version = "0.2.1", path = "./doc-chunks" }
39 |
40 | color-eyre = "0.6"
41 | cargo_toml = "0.21"
42 | console = "0.15"
43 | crossterm = "0.27"
44 | # for the config file
45 | directories = "5"
46 |
47 | clap = { version = "4.1.8", features = ["derive", "env"] }
48 | clap_complete = "4.1.4"
49 | clap-verbosity-flag = "2.0"
50 |
51 | env_logger = "0.11"
52 | fancy-regex = "0.13"
53 | fs-err = { version = "2", features = ["io_safety"] }
54 | indexmap = { version = "2", features = ["rayon", "serde"] }
55 | itertools = "0.12"
56 | lazy_static = "1"
57 | memchr = "2"
58 | log = "0.4"
59 | num_cpus = "1.13"
60 | proc-macro2 = { version = "1", features = ["span-locations"] }
61 | pulldown-cmark = "0.10"
62 | rayon = "1.5"
63 | regex = "1.5"
64 | serde = { version = "1", features = ["derive"] }
65 | signal-hook = "0.3"
66 | syn = { version = "2", features = ["full"] }
67 | thiserror = "1"
68 | # for parsing and extracting elements from Cargo.toml
69 | toml = "0.8"
70 | glob = "0.3"
71 | # for the config file
72 | ignore = "0.4.18"
73 | tokio = { version = "1", features = ["full", "rt-multi-thread"] }
74 | futures = "0.3"
75 |
76 | uuid = { version = "1.0.0", features = ["v4"] }
77 |
78 | # config parsing, must be independent of features
79 |
80 | # TODO parse the country codes of dictionaries?
81 | iso_country = { version = "0.1", features = ["serde"] }
82 | isolang = { version = "2", features = ["serde"] }
83 |
84 | url = { version = "2", features = ["serde"] }
85 |
86 | # dictionary lookup with affixes
87 | hunspell-rs = { version = "0.4.0", optional = true }
88 | fd-lock = { version = "4", optional = true }
89 | encoding_rs = { version = "0.8.31", optional = true, features = [] }
90 | zspell = { version = "0.5.5", optional = true }
91 | spellbook = { version = "0.1", optional = true }
92 |
93 | # full grammar check, but also tokenization and disambiguation
94 | nlprule = { version = "=0.6.4", optional = true }
95 |
96 | # cache some expensive expansions
97 | xz2 = "0.1"
98 | sha2 = "0.10"
99 | bincode = "1"
100 | hex = "0.4"
101 | thousands = "0.2"
102 |
103 | [dev-dependencies]
104 | # for stripping ansi color codes
105 | console = "0.15"
106 | assert_matches = "1"
107 | maplit = "1"
108 | serde_plain = "1"
109 | nix = "0.26.2"
110 |
111 | [features]
112 | default = ["all"]
113 |
114 | # hunspell uses the segmenter provided by nlprules
115 | hunspell = [
116 | "dep:hunspell-rs",
117 | "hunspell-rs?/bundled",
118 | "dep:fd-lock",
119 | "nlprules",
120 | "dep:encoding_rs",
121 | ]
122 | zet = ["dep:zspell"]
123 | spellbook = ["dep:spellbook"]
124 | nlprules = ["dep:nlprule", "nlprule?/regex-fancy", "dep:nlprule-build"]
125 |
126 | all = ["hunspell", "zet", "spellbook", "nlprules"]
127 |
128 | [profile.dev]
129 | build-override = { opt-level = 2 }
130 |
131 | [profile.dev.package]
132 | backtrace = { opt-level = 3 }
133 | bincode = { opt-level = 3 }
134 | xz2 = { opt-level = 3 }
135 | sha2 = { opt-level = 3 }
136 | hunspell-rs = { opt-level = 3 }
137 | nlprule = { opt-level = 3 }
138 |
139 | [profile.release]
140 | debug = true
141 |
142 | [package.metadata.spellcheck]
143 | config = ".config/spellcheck.toml"
144 |
145 |
146 | [[test]]
147 | name = "signal_handler"
148 | path = "tests/signal_handler.rs"
149 |
--------------------------------------------------------------------------------
/docs/configuration.md:
--------------------------------------------------------------------------------
1 | # Configuration
2 |
3 | ## Source
4 |
5 | There are various ways to specify the configuration. The prioritization is as
6 | follows:
7 |
8 | _Explicit_ specification:
9 |
10 | 1. Command line flags `--cfg=...`.
11 | 1. `Cargo.toml` package metadata
12 |
13 | ```toml
14 | [package.metadata.spellcheck]
15 | config = "somewhere/cfg.toml"
16 | ```
17 |
18 | 1. `Cargo.toml` workspace metadata
19 |
20 | ```toml
21 | [workspace.metadata.spellcheck]
22 | config = "somewhere/else/cfg.toml"
23 | ```
24 |
25 | which will fail if specified and not existent on the filesystem.
26 |
27 | If neither of those ways of specification is present, continue with the
28 | _implicit_.
29 |
30 | 1. `Cargo.toml` metadata in the current working directory `CWD`.
31 | 1. Check the first arguments location if present, else the current working directory for `.config/spellcheck.toml`.
32 | 1. Fallback to per user configuration files:
33 | * Linux: `/home/alice/.config/cargo_spellcheck/config.toml`
34 | * Windows: `C:\Users\Alice\AppData\Roaming\cargo_spellcheck\config.toml`
35 | * macOS: `/Users/Alice/Library/Preferences/cargo_spellcheck/config.toml`
36 | 1. Use the default, builtin configuration (see `config` sub-command).
37 |
38 | Since this is rather complex, add `-vv` to your invocation to see the `info`
39 | level logs printed, which will contain the config path.
40 | ### Format
41 |
42 | ```toml
43 | # Project settings where a Cargo.toml exists and is passed
44 | # ${CARGO_MANIFEST_DIR}/.config/spellcheck.toml
45 |
46 | # Also take into account developer comments
47 | dev_comments = false
48 |
49 | # Skip the README.md file as defined in the cargo manifest
50 | skip_readme = false
51 |
52 | [Hunspell]
53 | # lang and name of `.dic` file
54 | lang = "en_US"
55 | # OS specific additives
56 | # Linux: [ /usr/share/myspell ]
57 | # Windows: []
58 | # macOS [ /home/alice/Libraries/hunspell, /Libraries/hunspell ]
59 |
60 | # Additional search paths, which take presedence over the default
61 | # os specific search dirs, searched in order, defaults last
62 | # search_dirs = []
63 |
64 | # Adds additional dictionaries, can be specified as
65 | # absolute paths or relative in the search dirs (in this order).
66 | # Relative paths are resolved relative to the configuration file
67 | # which is used.
68 | # Refer to `man 5 hunspell`
69 | # or https://www.systutorials.com/docs/linux/man/4-hunspell/#lbAE
70 | # on how to define a custom dictionary file.
71 | extra_dictionaries = []
72 |
73 | # If set to `true`, the OS specific default search paths
74 | # are skipped and only explicitly specified ones are used.
75 | skip_os_lookups = false
76 |
77 | # Use the builtin dictionaries if none were found in
78 | # in the configured lookup paths.
79 | # Usually combined with `skip_os_lookups=true`
80 | # to enforce the `builtin` usage for consistent
81 | # results across distributions and CI runs.
82 | # Setting this will still use the dictionaries
83 | # specified in `extra_dictionaries = [..]`
84 | # for topic specific lingo.
85 | use_builtin = true
86 |
87 |
88 | [Hunspell.quirks]
89 | # Transforms words that are provided by the tokenizer
90 | # into word fragments based on the capture groups which are to
91 | # be checked.
92 | # If no capture groups are present, the matched word is whitelisted.
93 | transform_regex = ["^'([^\\s])'$", "^[0-9]+x$"]
94 | # Accepts `alphabeta` variants if the checker provides a replacement suggestion
95 | # of `alpha-beta`.
96 | allow_concatenation = true
97 | # And the counterpart, which accepts words with dashes, when the suggestion has
98 | # recommendations without the dashes. This is less common.
99 | allow_dashed = false
100 | # Check the expressions in the footnote references. By default this is turned on
101 | # to remain backwards compatible but disabling it could be particularly useful
102 | # when one uses abbreviations instead of numbers as footnote references. For
103 | # instance by default the fragment `hello[^xyz]` would be spellchecked as
104 | # `helloxyz` which is obviously a misspelled word, but by turning this check
105 | # off, it will skip validating the reference altogether and will only check the
106 | # word `hello`.
107 | check_footnote_references = false
108 |
109 | [NlpRules]
110 | # Allows the user to override the default included
111 | # exports of LanguageTool, with other custom
112 | # languages
113 |
114 | # override_rules = "/path/to/rules_binencoded.bin"
115 | # override_tokenizer = "/path/to/tokenizer_binencoded.bin"
116 |
117 | [Reflow]
118 | # Reflows doc comments to adhere to adhere to a given maximum line width limit.
119 | max_line_length = 80
120 | ```
121 |
122 | To increase verbosity add `-v` (multiple) to increase verbosity.
123 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # cargo-spellcheck
2 |
3 | [](https://crates.io/crates/cargo-spellcheck)
4 | [](https://ci.fff.rs/teams/main/pipelines/cargo-spellcheck/jobs/master-validate)
5 | 
6 | 
7 |
8 | Check your spelling with `hunspell` and/or `nlprule`.
9 |
10 | ## Use Cases
11 |
12 | Run `cargo spellcheck --fix` or `cargo spellcheck fix` to fix all your
13 | documentation comments in order to avoid nasty typos all over your source tree.
14 | Meant as a helper simplifying review as well as improving CI checks after a
15 | learning phase for custom/topic specific lingo.
16 |
17 | `cargo-spellcheck` is also a valuable tool to run from git commit hooks or CI/CD
18 | systems.
19 |
20 | ### Check For Spelling and/or Grammar Mistakes
21 |
22 | ```zsh
23 | cargo spellcheck check
24 | ```
25 |
26 | error: spellcheck
27 | --> src/main.rs:44
28 | |
29 | 44 | Fun facets shalld cause some erroris.
30 | | ^^^^^^
31 | | - shall or shall d
32 | |
33 |
34 | ### Apply Suggestions Interactively
35 |
36 | ```zsh
37 | cargo spellcheck fix
38 | ```
39 |
40 | error: spellcheck(Hunspell)
41 | --> /media/supersonic1t/projects/cargo-spellcheck/src/literalset.rs:291
42 | |
43 | 291 | Returns literl within the Err variant if not adjacent
44 | | ^^^^^^
45 |
46 | (13/14) Apply this suggestion [y,n,q,a,d,j,e,?]?
47 |
48 | lite
49 | litter
50 | litterer
51 | liter l
52 | liters
53 | literal
54 | liter
55 | » a custom replacement literal
56 |
57 | ## Installation
58 |
59 | `cargo install --locked cargo-spellcheck`
60 |
61 | The `--locked` flag is the preferred way of installing to get the tested set of
62 | dependencies.
63 |
64 | on OS X, you need to ensure that `libclang.dylib` can be found by the linker
65 |
66 | which can be achieved by setting `DYLB_FALLBACK_LIBRARY_PATH`:
67 |
68 | ```
69 | export DYLD_FALLBACK_LIBRARY_PATH= \
70 | "$(xcode-select --print-path)/Toolchains/XcodeDefault.xctoolchain/usr/lib/"
71 | ```
72 |
73 | In Linux, the file is `libclang.so` which can be installed via:
74 |
75 | ```
76 | apt-get install libclang-dev
77 | ```
78 |
79 | Afterwards, you can set the variable `LIBCLANG_PATH` via:
80 |
81 | ```
82 | export LIBCLANG_PATH=/usr/lib/llvm-14/lib/
83 | ```
84 |
85 | ## Completions
86 |
87 | `cargo spellcheck completions` for autodetection of your current shell via
88 | `$SHELL`,
89 |
90 | or
91 |
92 | `cargo spellcheck completions --shell zsh`
93 |
94 | to explicitly specify your shell type.
95 |
96 | Commonly it's use like this from your shell's `.rc*` file:
97 |
98 | `source <(cargo spellcheck completions)`
99 |
100 | Note: There is a [relevant clap issue
101 | (#3508)](https://github.com/clap-rs/clap/issues/3508) that makes this fail in
102 | some cases.
103 |
104 | ## 🎈 Contribute!
105 |
106 | Contributions are very welcome!
107 |
108 | Generally the preferred way of doing so, is to comment in an issue that you
109 | would like to tackle the implementation/fix.
110 |
111 | This is usually followed by an initial PR where the implementation is then
112 | discussed and iteratively refined. No need to get it all correct
113 | the first time!
114 |
115 | ## Documentation
116 |
117 | - [Features and Roadmap](docs/features.md)
118 | - [Remedies for common issues](docs/remedy.md)
119 | - [Configuration](docs/configuration.md)
120 | - [Available Checkers](docs/checkers.md)
121 | - [Automation of `cargo-spellcheck`](docs/automation.md)
122 |
--------------------------------------------------------------------------------
/src/config/iso.rs:
--------------------------------------------------------------------------------
1 | //! Abstracts the combination of language code and country code into one
2 | //! convenient type.
3 | //!
4 | //! Language code follows the
5 | //! [ISO 639-1](https://en.wikipedia.org/wiki/ISO_639-1) format. Country code
6 | //! follows the [Alpha-2 ISO_3166-1](https://en.wikipedia.org/wiki/ISO_3166-1)
7 | //! format.
8 | //!
9 | //! It results in a mildly adapted [IETF language
10 | //! tag](https://en.wikipedia.org/wiki/IETF_language_tag).
11 |
12 | use iso_country::Country;
13 | use isolang::Language;
14 |
15 | use std::{fmt, str::FromStr};
16 |
17 | use serde::de::{self, Deserialize, Deserializer};
18 | use serde::ser::Serializer;
19 |
20 | /// 5 digit language and country code as used by the dictionaries.
21 | #[derive(Debug, Clone, Copy, PartialEq, Eq)]
22 | pub struct Lang5 {
23 | pub lang: Language,
24 | pub country: Country,
25 | }
26 |
27 | impl PartialEq for Lang5 {
28 | fn eq(&self, other: &str) -> bool {
29 | self.to_string().as_str() == other
30 | }
31 | }
32 |
33 | impl PartialEq for Lang5
34 | where
35 | X: AsRef,
36 | {
37 | fn eq(&self, other: &X) -> bool {
38 | self.to_string().as_str() == other.as_ref()
39 | }
40 | }
41 |
42 | impl<'a> PartialEq for &'a str {
43 | fn eq(&self, other: &Lang5) -> bool {
44 | let other = other.to_string();
45 | *self == other.as_str()
46 | }
47 | }
48 |
49 | impl PartialEq for String {
50 | fn eq(&self, other: &Lang5) -> bool {
51 | *self == other.to_string()
52 | }
53 | }
54 |
55 | impl Default for Lang5 {
56 | fn default() -> Self {
57 | Self::en_US
58 | }
59 | }
60 |
61 | impl Lang5 {
62 | #[allow(non_upper_case_globals)]
63 | pub const en_US: Lang5 = Lang5 {
64 | lang: Language::Eng,
65 | country: Country::US,
66 | };
67 | }
68 |
69 | impl fmt::Display for Lang5 {
70 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
71 | let language = self.lang.to_639_1().unwrap_or("??");
72 | let country = self.country;
73 | write!(f, "{language}_{country}")?;
74 | Ok(())
75 | }
76 | }
77 |
78 | #[derive(Debug, Clone, thiserror::Error)]
79 | #[error("Wrong character, expected '_' found '{0}'")]
80 | struct Lang5SpacerError(char);
81 |
82 | #[derive(Debug, Clone, Copy, Default)]
83 | struct Lang5Visitor;
84 |
85 | impl<'de> de::Visitor<'de> for Lang5Visitor {
86 | type Value = Lang5;
87 |
88 | fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
89 | write!(
90 | formatter,
91 | "Expected a 5 digit lang and country code in the form of LL_CC"
92 | )
93 | }
94 |
95 | fn visit_str(self, v: &str) -> Result
96 | where
97 | E: de::Error,
98 | {
99 | self.visit_borrowed_str::(v)
100 | }
101 |
102 | fn visit_string(self, s: String) -> Result
103 | where
104 | E: de::Error,
105 | {
106 | self.visit_borrowed_str::(s.as_str())
107 | }
108 |
109 | fn visit_borrowed_str(self, s: &'de str) -> Result
110 | where
111 | E: serde::de::Error,
112 | {
113 | if s.len() != 5 {
114 | return Err(serde::de::Error::custom(Lang5SpacerError('l')));
115 | }
116 | let lang = Language::from_639_1(&s[0..2])
117 | .ok_or(Lang5SpacerError('2'))
118 | .map_err(serde::de::Error::custom)?;
119 | let c = s.chars().nth(2).unwrap();
120 | if c != '_' {
121 | return Err(serde::de::Error::custom(Lang5SpacerError(c)))?;
122 | }
123 | let country = Country::from_str(&s[3..5]).map_err(serde::de::Error::custom)?;
124 | Ok(Lang5 { lang, country })
125 | }
126 | }
127 |
128 | impl<'de> Deserialize<'de> for Lang5 {
129 | fn deserialize(deserializer: D) -> Result
130 | where
131 | D: Deserializer<'de>,
132 | {
133 | deserializer.deserialize_str(Lang5Visitor)
134 | }
135 | }
136 |
137 | impl serde::Serialize for Lang5 {
138 | fn serialize(&self, serializer: S) -> Result
139 | where
140 | S: Serializer,
141 | {
142 | serializer.serialize_str(self.to_string().as_str())
143 | }
144 | }
145 |
146 | #[cfg(test)]
147 | mod tests {
148 | use super::*;
149 | use assert_matches::assert_matches;
150 |
151 | const EXPECTED: Lang5 = Lang5 {
152 | lang: Language::Deu,
153 | country: Country::AU,
154 | };
155 | const S: &str = "de_AU";
156 |
157 | #[test]
158 | fn iso_lang_german_austria_serde() {
159 | assert_eq!(S.to_owned(), EXPECTED.to_string());
160 |
161 | assert_matches!(serde_plain::from_str::(S), Ok(x) => assert_eq!(EXPECTED, x));
162 | }
163 |
164 | #[test]
165 | fn cmp_variants() {
166 | assert!(EXPECTED == S);
167 | assert!(EXPECTED == &S);
168 | assert!(EXPECTED == S.to_owned());
169 | assert!(EXPECTED == &S.to_owned());
170 | assert!(&EXPECTED == S);
171 | }
172 | }
173 |
--------------------------------------------------------------------------------
/src/checker/nlprules.rs:
--------------------------------------------------------------------------------
1 | //! A NLP based rule checker base on `nlprule`
2 | //!
3 | //! Does check grammar, and is supposed to only check for grammar. Sentence
4 | //! splitting is done in hand-waving way. To be improved.
5 |
6 | use super::{Checker, Detector, Suggestion};
7 | use crate::{CheckableChunk, ContentOrigin};
8 |
9 | use crate::errors::*;
10 |
11 | use std::collections::{hash_map::Entry, HashMap};
12 | use std::{
13 | path::{Path, PathBuf},
14 | sync::{Arc, Mutex},
15 | };
16 |
17 | use nlprule::{Rules, Tokenizer};
18 |
19 | use lazy_static::lazy_static;
20 |
21 | lazy_static! {
22 | static ref RULES: Mutex, Arc>> = Mutex::new(HashMap::new());
23 | }
24 |
25 | pub(crate) fn filtered_rules + Clone>(
26 | override_path: Option,
27 | ) -> Result> {
28 | match RULES
29 | .lock()
30 | .unwrap()
31 | .entry(override_path.clone().map(|x| x.as_ref().to_path_buf()))
32 | {
33 | Entry::Occupied(occupied) => Ok(occupied.get().clone()),
34 | Entry::Vacant(empty) => {
35 | let rules = super::rules(override_path)?;
36 | let rules = rules
37 | .rules()
38 | .iter()
39 | .filter(|rule| {
40 | match rule
41 | .category_type()
42 | .map(str::to_lowercase)
43 | .as_ref()
44 | .map(|x| x as &str)
45 | {
46 | // The hunspell backend is aware of
47 | // custom lingo, which this one is not,
48 | // so there would be a lot of false
49 | // positives.
50 | Some("misspelling") => false,
51 | // Anything quotes related is not relevant
52 | // for code documentation.
53 | Some("typographical") => false,
54 | _other => true,
55 | }
56 | })
57 | .cloned()
58 | .collect::();
59 |
60 | let rules = Arc::new(rules);
61 | empty.insert(rules.clone());
62 | Ok(rules)
63 | }
64 | }
65 | }
66 |
67 | pub(crate) struct NlpRulesChecker {
68 | tokenizer: Arc,
69 | rules: Arc,
70 | }
71 |
72 | impl NlpRulesChecker {
73 | pub fn new(config: &::Config) -> Result {
74 | let tokenizer = super::tokenizer(config.override_tokenizer.as_ref())?;
75 | let rules = filtered_rules(config.override_tokenizer.as_ref())?;
76 | Ok(Self { tokenizer, rules })
77 | }
78 | }
79 |
80 | impl Checker for NlpRulesChecker {
81 | type Config = crate::config::NlpRulesConfig;
82 |
83 | fn detector() -> Detector {
84 | Detector::NlpRules
85 | }
86 |
87 | fn check<'a, 's>(
88 | &self,
89 | origin: &ContentOrigin,
90 | chunks: &'a [CheckableChunk],
91 | ) -> Result>>
92 | where
93 | 'a: 's,
94 | {
95 | let mut acc = Vec::with_capacity(chunks.len());
96 |
97 | for chunk in chunks {
98 | acc.extend(check_chunk(
99 | origin.clone(),
100 | chunk,
101 | &self.tokenizer,
102 | &self.rules,
103 | ));
104 | }
105 |
106 | Ok(acc)
107 | }
108 | }
109 |
110 | /// Check the plain text contained in chunk, which can be one or more sentences.
111 | fn check_chunk<'a>(
112 | origin: ContentOrigin,
113 | chunk: &'a CheckableChunk,
114 | tokenizer: &Tokenizer,
115 | rules: &Rules,
116 | ) -> Vec> {
117 | // TODO We should control which parts need to be ignored of the markdown
118 | // entities, however the `NlpRulesConfig`, which is the only configuration
119 | // we receive in the constructor does not contain the same quirks (or in
120 | // fact any other similar settings) as the Hunspell one, so we cannot obtain
121 | // this setting, therefore we fallback to default
122 | let plain = chunk.erase_cmark(&Default::default());
123 | log::trace!("{plain:?}");
124 | let txt = plain.as_str();
125 |
126 | let mut acc = Vec::with_capacity(32);
127 |
128 | let nlpfixes = rules.suggest(txt, tokenizer);
129 | if nlpfixes.is_empty() {
130 | return Vec::new();
131 | }
132 |
133 | 'nlp: for fix in nlpfixes {
134 | let message = fix.message();
135 | let replacements = fix.replacements();
136 | let start = fix.span().char().start;
137 | let end = fix.span().char().end;
138 | if start > end {
139 | log::debug!("BUG: crate nlprule yielded a negative range {:?} for chunk in {}, please file a bug", start..end, &origin);
140 | continue 'nlp;
141 | }
142 | let range = start..end;
143 | acc.extend(
144 | plain
145 | .find_spans(range)
146 | .into_iter()
147 | .map(|(range, span)| Suggestion {
148 | detector: Detector::NlpRules,
149 | range,
150 | span,
151 | origin: origin.clone(),
152 | replacements: replacements.to_vec(),
153 | chunk,
154 | description: Some(message.to_owned()),
155 | }),
156 | );
157 | }
158 |
159 | acc
160 | }
161 |
--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
1 | #![deny(dead_code)]
2 | #![deny(missing_docs)]
3 | // #![deny(unused_crate_dependencies)]
4 | #![allow(clippy::non_ascii_literal)]
5 | // be explicit about certain offsets and how they are constructed
6 | #![allow(clippy::identity_op)]
7 | // in small cli projects, this is ok for now
8 | #![allow(clippy::wildcard_imports)]
9 | // personal strong preference for `from_iter`
10 | #![allow(clippy::from_iter_instead_of_collect)]
11 | #![allow(clippy::new_without_default)]
12 | #![allow(clippy::items_after_statements)]
13 | // Prevent the stray dbg! macros
14 | #![cfg_attr(not(test), deny(clippy::dbg_macro))]
15 | #![cfg_attr(test, allow(clippy::dbg_macro))]
16 |
17 | //! cargo-spellcheck
18 | //!
19 | //! A syntax tree based doc comment and common mark spell checker.
20 |
21 | pub use doc_chunks as documentation;
22 | #[cfg(test)]
23 | pub(crate) use doc_chunks::{chyrp_up, fluff_up};
24 |
25 | pub mod action;
26 | mod checker;
27 | mod config;
28 | pub mod errors;
29 | mod reflow;
30 | mod suggestion;
31 | mod tinhat;
32 | mod traverse;
33 |
34 | pub use self::action::*;
35 | pub use self::config::args::*;
36 | pub use self::config::{Config, HunspellConfig, LanguageToolConfig};
37 | pub use self::documentation::span::*;
38 | pub use self::documentation::util::*;
39 | pub use self::documentation::{
40 | util, CheckableChunk, Clusters, CommentVariant, CommentVariantCategory, ContentOrigin,
41 | Documentation, PlainOverlay, Range,
42 | };
43 | pub use self::suggestion::*;
44 | pub use self::tinhat::*;
45 |
46 | use self::errors::{bail, Result};
47 |
48 | use std::io::Write;
49 |
50 | #[cfg(target_os = "windows")]
51 | use signal_hook as _;
52 |
53 | use checker::Checker;
54 |
55 | /// A simple exit code representation.
56 | ///
57 | /// `Custom` can be specified by the user, others map to their UNIX equivalents
58 | /// where available.
59 | #[derive(Debug, Clone, Copy, Eq, PartialEq)]
60 | pub enum ExitCode {
61 | /// Regular termination and does not imply anything in regards to spelling
62 | /// mistakes found or not.
63 | Success,
64 | /// Terminate requested by a *nix signal.
65 | Signal,
66 | /// A custom exit code, as specified with `--code=`.
67 | Custom(u8),
68 | // Failure is already default for `Err(_)`
69 | }
70 |
71 | impl ExitCode {
72 | /// Convert `ExitCode` to primitive.
73 | pub fn as_u8(&self) -> u8 {
74 | match *self {
75 | Self::Success => 0u8,
76 | Self::Signal => 130u8,
77 | Self::Custom(code) => code,
78 | }
79 | }
80 | }
81 |
82 | /// The inner main.
83 | pub fn run(args: Args) -> Result {
84 | let _ = ::rayon::ThreadPoolBuilder::new()
85 | .num_threads(args.job_count())
86 | .build_global();
87 |
88 | env_logger::Builder::from_env(env_logger::Env::new().filter_or("CARGO_SPELLCHECK", "warn"))
89 | .filter_level(args.verbosity())
90 | .filter_module("nlprule", log::LevelFilter::Error)
91 | .filter_module("mio", log::LevelFilter::Error)
92 | .init();
93 |
94 | #[cfg(not(target_os = "windows"))]
95 | signal_handler(move || {
96 | if let Err(e) = action::interactive::ScopedRaw::restore_terminal() {
97 | log::warn!("Failed to restore terminal: {e}");
98 | }
99 | });
100 |
101 | let (unified, config) = match &args.command {
102 | Some(Sub::Completions { shell }) => {
103 | let sink = &mut std::io::stdout();
104 | generate_completions(*shell, sink);
105 | let _ = sink.flush();
106 | return Ok(ExitCode::Success);
107 | }
108 | _ => args.unified()?,
109 | };
110 |
111 | match unified {
112 | // must unify first, for the proper paths
113 | UnifiedArgs::Config {
114 | dest_config,
115 | checker_filter_set,
116 | } => {
117 | log::trace!("Configuration chore");
118 | let mut config = Config::full();
119 | Args::checker_selection_override(
120 | checker_filter_set.as_ref().map(AsRef::as_ref),
121 | &mut config,
122 | )?;
123 |
124 | match dest_config {
125 | ConfigWriteDestination::Stdout => {
126 | println!("{}", config.to_toml()?);
127 | return Ok(ExitCode::Success);
128 | }
129 | ConfigWriteDestination::File { overwrite, path } => {
130 | if path.exists() && !overwrite {
131 | bail!(
132 | "Attempting to overwrite {} requires `--force`.",
133 | path.display()
134 | );
135 | }
136 |
137 | log::info!("Writing configuration file to {}", path.display());
138 | config.write_values_to_path(path)?;
139 | }
140 | }
141 | Ok(ExitCode::Success)
142 | }
143 | UnifiedArgs::Operate {
144 | action,
145 | paths,
146 | recursive,
147 | skip_readme,
148 | config_path,
149 | dev_comments,
150 | exit_code_override,
151 | } => {
152 | log::debug!("Executing: {action:?} with {config:?} from {config_path:?}");
153 |
154 | let documents =
155 | traverse::extract(paths, recursive, skip_readme, dev_comments, &config)?;
156 |
157 | let rt = tokio::runtime::Runtime::new()?;
158 | let finish = rt.block_on(async move { action.run(documents, config).await })?;
159 |
160 | match finish {
161 | Finish::Success | Finish::MistakeCount(0) => Ok(ExitCode::Success),
162 | Finish::MistakeCount(_n) => Ok(ExitCode::Custom(exit_code_override)),
163 | Finish::Abort => Ok(ExitCode::Signal),
164 | }
165 | }
166 | }
167 | }
168 |
169 | #[cfg(test)]
170 | mod tests;
171 |
--------------------------------------------------------------------------------
/src/checker/cached.rs:
--------------------------------------------------------------------------------
1 | use crate::errors::*;
2 |
3 | use hex::ToHex;
4 | use serde::de::DeserializeOwned;
5 | use serde::ser::Serialize;
6 | use sha2::Digest;
7 | use std::io::Seek;
8 | use std::path::Path;
9 | use std::time::{Duration, Instant};
10 |
11 | #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
12 | struct CacheEntry {
13 | what: String,
14 | val: T,
15 | }
16 |
17 | pub struct CachedValue {
18 | /// Time it took to..
19 | /// load the value from disk if it was there.
20 | pub fetch: Option,
21 | /// Updating the disk cache
22 | pub update: Option,
23 | /// Create a new one if needed
24 | pub creation: Option,
25 | /// The accumulated duration,
26 | pub total: Duration,
27 | /// The actual value.
28 | pub value: T,
29 | }
30 |
31 | pub struct Cached {
32 | cache_file: fd_lock::RwLock,
33 | // What to cache.
34 | what: String,
35 | _phantom: std::marker::PhantomData,
36 | }
37 |
38 | impl<'a, T> Cached
39 | where
40 | T: Serialize + DeserializeOwned,
41 | {
42 | /// Create a new `Cached` instance, to create a expanded version of something that's identified by `what`.
43 | pub fn new(what: impl AsRef, cache_dir: impl AsRef) -> Result {
44 | let what = what.as_ref();
45 | let what_digest = sha2::Sha256::digest(what.as_bytes());
46 | let cache_dir = cache_dir.as_ref();
47 | fs_err::create_dir_all(cache_dir)?;
48 | let cache_file = cache_dir.join(what_digest.as_slice().encode_hex::());
49 | let cache_file = fs_err::OpenOptions::new()
50 | .create(true)
51 | .read(true)
52 | .write(true)
53 | .open(cache_file)?;
54 | Ok(Self {
55 | cache_file: fd_lock::RwLock::new(cache_file),
56 | what: what.to_owned(),
57 | _phantom: std::marker::PhantomData,
58 | })
59 | }
60 |
61 | pub fn fetch_or_update(
62 | &mut self,
63 | create: impl FnOnce(&str) -> Result,
64 | ) -> Result> {
65 | let total_start = Instant::now();
66 | match self.fetch() {
67 | Ok(Some(value)) => {
68 | let elapsed = total_start.elapsed();
69 | Ok(CachedValue {
70 | value,
71 | fetch: Some(elapsed),
72 | update: None,
73 | creation: None,
74 | total: elapsed,
75 | })
76 | }
77 | Ok(None) => {
78 | let fetch = Some(total_start.elapsed());
79 |
80 | let creation_start = Instant::now();
81 | let value = create(self.what.as_str())?;
82 | let creation = Some(creation_start.elapsed());
83 |
84 | let update_start = Instant::now();
85 | if let Err(err) = self.update(&value) {
86 | log::warn!("Failed to write value to cached: {err:?}");
87 | }
88 | let update = Some(update_start.elapsed());
89 | let total = total_start.elapsed();
90 | Ok(CachedValue {
91 | value,
92 | fetch,
93 | update,
94 | creation,
95 | total,
96 | })
97 | }
98 | Err(err) => {
99 | log::warn!("Overriding existing value that failed to load: {err:?}");
100 |
101 | let fetch = Some(total_start.elapsed());
102 |
103 | let creation_start = Instant::now();
104 | let value = create(self.what.as_str())?;
105 | let creation = Some(creation_start.elapsed());
106 |
107 | let update_start = Instant::now();
108 | if let Err(err) = self.update(&value) {
109 | log::warn!("Failed to update cached: {err:?}");
110 | }
111 | let update = Some(update_start.elapsed());
112 | let total = total_start.elapsed();
113 | Ok(CachedValue {
114 | value,
115 | fetch,
116 | update,
117 | creation,
118 | total,
119 | })
120 | }
121 | }
122 | }
123 | pub fn fetch(&mut self) -> Result