├── src
    ├── lang
    │   ├── .gitkeep
    │   └── ac
    ├── indent.rs
    ├── diff.rs
    ├── ignore.rs
    ├── lang.rs
    ├── frontmatter.rs
    ├── features.rs
    ├── linebreak.rs
    ├── logging.rs
    ├── ranges.rs
    ├── wrap.rs
    ├── detect.rs
    ├── call.rs
    ├── fs.rs
    ├── main.rs
    ├── cfg.rs
    └── parse.rs
├── .gitignore
├── .mdslw.toml
├── Cargo.toml
├── .github
    └── workflows
    │   └── ci.yml
├── Makefile
├── README.md
└── LICENCE


/src/lang/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /dist
3 | /.coverage.*
4 | /.envrc
5 | 


--------------------------------------------------------------------------------
/.mdslw.toml:
--------------------------------------------------------------------------------
1 | case = "ignore"
2 | end-markers = "?!:."
3 | features = "format-block-quotes,collate-link-defs,outsource-inline-links"
4 | ignores = ""
5 | lang = "ac"
6 | max-width = 80
7 | suppressions = ""
8 | upstream = ""
9 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "mdslw"
 3 | version = "0.16.1"
 4 | edition = "2021"
 5 | 
 6 | [profile.release]
 7 | # Optimize release binaries.
 8 | strip = true
 9 | lto = true
10 | 
11 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
12 | 
13 | [dependencies]
14 | anyhow = { version = "1", features = ["std", "backtrace"] }
15 | clap = { version = "4", features = ["env", "derive"] }
16 | clap_complete = "4"
17 | ignore = "0.4"
18 | include_dir = "0.7"
19 | log = { version = "0.4", features = ["std"] }
20 | pulldown-cmark = { version = "0.13", default-features = false }
21 | rayon = "1"
22 | similar = "2"
23 | tempfile = "3"
24 | serde = { version = "1", features = ["derive"] }
25 | toml = { version = "0.9", default-features = false, features = ["parse", "display", "serde"] }
26 | 
27 | [build-dependencies]
28 | reqwest = {version = "0.12", features = ["default", "json", "blocking"]}
29 | serde_json = { version = "1" }
30 | 


--------------------------------------------------------------------------------
/src/indent.rs:
--------------------------------------------------------------------------------
 1 | /* An opinionated line wrapper for markdown files.
 2 | Copyright (C) 2023  Torsten Long
 3 | 
 4 | This program is free software: you can redistribute it and/or modify
 5 | it under the terms of the GNU General Public License as published by
 6 | the Free Software Foundation, either version 3 of the License, or
 7 | (at your option) any later version.
 8 | 
 9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | GNU General Public License for more details.
13 | 
14 | You should have received a copy of the GNU General Public License
15 | along with this program.  If not, see <https://www.gnu.org/licenses/>.
16 | */
17 | 
18 | pub fn build_indent(num: usize) -> String {
19 |     (0..num).map(|_| ' ').collect::<String>()
20 | }
21 | 
22 | #[cfg(test)]
23 | mod test {
24 |     use super::*;
25 | 
26 |     #[test]
27 |     fn can_build_indents() {
28 |         let three = build_indent(3);
29 |         assert_eq!(three, String::from("   "));
30 | 
31 |         let four = build_indent(4);
32 |         assert_eq!(four, String::from("    "));
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/src/lang/ac:
--------------------------------------------------------------------------------
  1 | AA.
  2 | AB.
  3 | Abs.
  4 | A.D.
  5 | Adj.
  6 | Adv.
  7 | Alt.
  8 | a.m.
  9 | A.M.
 10 | Approx.
 11 | A.S.
 12 | Aug.
 13 | btw.
 14 | Btw.
 15 | B.V.
 16 | Capt.
 17 | C.F.
 18 | cf.
 19 | Cf.
 20 | CF.
 21 | C.O.D.
 22 | Comm.
 23 | Conn.
 24 | Cont.
 25 | D.A.
 26 | D.C.
 27 | DC.
 28 | Dec.
 29 | Dept.
 30 | Dr.
 31 | DR.
 32 | e.g.
 33 | E.g.
 34 | E.G.
 35 | Est.
 36 | etc.
 37 | Etc.
 38 | ETC.
 39 | Feb.
 40 | Fn.
 41 | Fri.
 42 | Gb.
 43 | Hon.B.A.
 44 | Hz.
 45 | I.D.
 46 | i.e.
 47 | I.e.
 48 | I.E.
 49 | I.T.
 50 | Jan.
 51 | J.B.
 52 | J.D.
 53 | J.K.
 54 | Jun.
 55 | Kb.
 56 | K.R.
 57 | L.A.
 58 | Lev.
 59 | lib.
 60 | Lib.
 61 | L.P.
 62 | Lt.
 63 | Lt.Cdr.
 64 | Maj.
 65 | Mar.
 66 | Mart.
 67 | Mb.
 68 | Md.
 69 | Mgr.
 70 | M.I.T.
 71 | M.R.
 72 | Mr.
 73 | MR.
 74 | Mrs.
 75 | Ms.
 76 | M.T.
 77 | Mt.
 78 | Nov.
 79 | nr.
 80 | Nr.
 81 | num.
 82 | Num.
 83 | N.V.
 84 | N.Y.
 85 | PC.
 86 | Ph.D.
 87 | Phys.
 88 | P.M.
 89 | P.O.
 90 | pp.
 91 | PP.
 92 | Prof.
 93 | P.V.
 94 | Pvt.
 95 | Rep.
 96 | Rev.
 97 | R.L.
 98 | R.T.
 99 | S.A.
100 | S.A.R.
101 | S.E.
102 | Sep.
103 | Sept.
104 | Sgt.
105 | S.p.A.
106 | Sq.
107 | U.S.
108 | U.S.A.
109 | U.S.C.
110 | vs.
111 | VS.
112 | Yr.
113 | 
114 | 


--------------------------------------------------------------------------------
/src/diff.rs:
--------------------------------------------------------------------------------
 1 | /* An opinionated line wrapper for markdown files.
 2 | Copyright (C) 2023  Torsten Long
 3 | 
 4 | This program is free software: you can redistribute it and/or modify
 5 | it under the terms of the GNU General Public License as published by
 6 | the Free Software Foundation, either version 3 of the License, or
 7 | (at your option) any later version.
 8 | 
 9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | GNU General Public License for more details.
13 | 
14 | You should have received a copy of the GNU General Public License
15 | along with this program.  If not, see <https://www.gnu.org/licenses/>.
16 | */
17 | 
18 | use std::path::Path;
19 | 
20 | use similar::{udiff::unified_diff, Algorithm};
21 | 
22 | const CONTEXT: usize = 4;
23 | 
24 | pub enum Algo {
25 |     Myers,
26 |     Patience,
27 |     Lcs,
28 | }
29 | 
30 | impl Algo {
31 |     fn to_internal(&self) -> Algorithm {
32 |         match self {
33 |             Self::Myers => Algorithm::Myers,
34 |             Self::Patience => Algorithm::Patience,
35 |             Self::Lcs => Algorithm::Lcs,
36 |         }
37 |     }
38 | 
39 |     pub fn generate(&self, new: &str, org: &str, filename: &Path) -> String {
40 |         let original = format!("original:{}", filename.to_string_lossy());
41 |         let processed = format!("processed:{}", filename.to_string_lossy());
42 |         let names = (original.as_ref(), processed.as_ref());
43 |         unified_diff(self.to_internal(), org, new, CONTEXT, Some(names))
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/src/ignore.rs:
--------------------------------------------------------------------------------
 1 | /* An opinionated line wrapper for markdown files.
 2 | Copyright (C) 2023  Torsten Long
 3 | 
 4 | This program is free software: you can redistribute it and/or modify
 5 | it under the terms of the GNU General Public License as published by
 6 | the Free Software Foundation, either version 3 of the License, or
 7 | (at your option) any later version.
 8 | 
 9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | GNU General Public License for more details.
13 | 
14 | You should have received a copy of the GNU General Public License
15 | along with this program.  If not, see <https://www.gnu.org/licenses/>.
16 | */
17 | 
18 | const IGNORE_START: &str = "mdslw-ignore-start";
19 | const IGNORE_END: &str = "mdslw-ignore-end";
20 | 
21 | const PRETTIER_IGNORE_START: &str = "prettier-ignore-start";
22 | const PRETTIER_IGNORE_END: &str = "prettier-ignore-end";
23 | 
24 | fn is_html_comment(s: &str) -> bool {
25 |     s.starts_with("<!--") && (s.ends_with("-->") || s.ends_with("-->\n"))
26 | }
27 | 
28 | pub struct IgnoreByHtmlComment {
29 |     ignore: bool,
30 | }
31 | 
32 | impl IgnoreByHtmlComment {
33 |     pub fn new() -> Self {
34 |         Self { ignore: false }
35 |     }
36 | 
37 |     /// Determine whether the HTML that is processed is a comment and whether it modifies the
38 |     /// ignore behaviour.
39 |     pub fn process_html(&mut self, s: &str) {
40 |         if is_html_comment(s) {
41 |             if s.contains(IGNORE_START) || s.contains(PRETTIER_IGNORE_START) {
42 |                 log::debug!("detected ignore start directive");
43 |                 self.ignore = true
44 |             }
45 |             if s.contains(IGNORE_END) || s.contains(PRETTIER_IGNORE_END) {
46 |                 log::debug!("detected ignore stop directive");
47 |                 self.ignore = false
48 |             }
49 |         }
50 |     }
51 | 
52 |     pub fn should_be_ignored(&self) -> bool {
53 |         self.ignore
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/src/lang.rs:
--------------------------------------------------------------------------------
 1 | /* An opinionated line wrapper for markdown files.
 2 | Copyright (C) 2023  Torsten Long
 3 | 
 4 | This program is free software: you can redistribute it and/or modify
 5 | it under the terms of the GNU General Public License as published by
 6 | the Free Software Foundation, either version 3 of the License, or
 7 | (at your option) any later version.
 8 | 
 9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | GNU General Public License for more details.
13 | 
14 | You should have received a copy of the GNU General Public License
15 | along with this program.  If not, see <https://www.gnu.org/licenses/>.
16 | */
17 | 
18 | use anyhow::{Error, Result};
19 | use include_dir::{include_dir, Dir};
20 | 
21 | static LANG_FILES_DIR: Dir<'_> = include_dir!("$MDSLW_LANG_DIR");
22 | 
23 | pub fn keep_word_list(lang_names: &str) -> Result<String> {
24 |     let mut errors = vec![];
25 | 
26 |     let keep_words = lang_names
27 |         .split_terminator(',')
28 |         .flat_map(|el| el.split_whitespace())
29 |         .filter_map(|el| {
30 |             if el == "none" {
31 |                 Some(String::new())
32 |             } else if let Some(content) = LANG_FILES_DIR
33 |                 .get_file(el)
34 |                 .and_then(|el| el.contents_utf8())
35 |             {
36 |                 log::debug!("loaded keep word list for language '{}'", el);
37 |                 Some(content.to_string())
38 |             } else {
39 |                 errors.push(el);
40 |                 None
41 |             }
42 |         })
43 |         .collect::<String>();
44 | 
45 |     if errors.is_empty() {
46 |         Ok(keep_words)
47 |     } else {
48 |         Err(Error::msg(format!(
49 |             "unknown or unsupported languages: {}",
50 |             errors.join(", ")
51 |         )))
52 |     }
53 | }
54 | 
55 | #[cfg(test)]
56 | mod test {
57 |     use super::*;
58 | 
59 |     #[test]
60 |     fn nothing_disables_words() -> Result<()> {
61 |         let list = keep_word_list("")?;
62 |         assert_eq!(list, String::new());
63 |         Ok(())
64 |     }
65 | 
66 |     #[test]
67 |     fn none_disables_words() -> Result<()> {
68 |         let list = keep_word_list("none")?;
69 |         assert_eq!(list, String::new());
70 |         Ok(())
71 |     }
72 | 
73 |     #[test]
74 |     fn some_langs_are_supported() -> Result<()> {
75 |         let langs = "de en es fr it";
76 |         let list = keep_word_list(langs)?;
77 |         assert_ne!(list, String::new());
78 |         Ok(())
79 |     }
80 | 
81 |     #[test]
82 |     fn unsupported_langs() {
83 |         let langs = "unsupported";
84 |         let list = keep_word_list(langs);
85 |         assert!(list.is_err());
86 |     }
87 | }
88 | 


--------------------------------------------------------------------------------
/src/frontmatter.rs:
--------------------------------------------------------------------------------
 1 | /* An opinionated line wrapper for markdown files.
 2 | Copyright (C) 2023  Torsten Long
 3 | 
 4 | This program is free software: you can redistribute it and/or modify
 5 | it under the terms of the GNU General Public License as published by
 6 | the Free Software Foundation, either version 3 of the License, or
 7 | (at your option) any later version.
 8 | 
 9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | GNU General Public License for more details.
13 | 
14 | You should have received a copy of the GNU General Public License
15 | along with this program.  If not, see <https://www.gnu.org/licenses/>.
16 | */
17 | 
18 | const FRONTMATTER_SEPARATOR: &str = "---\n";
19 | 
20 | pub fn extract_frontmatter(text: &str) -> String {
21 |     let mut lines = text.split_inclusive('\n');
22 |     let first = lines.next();
23 |     if Some(FRONTMATTER_SEPARATOR) != first {
24 |         log::debug!("no frontmatter starting delimiter detected");
25 |         String::new()
26 |     } else {
27 |         let mut matter_len = FRONTMATTER_SEPARATOR.len();
28 |         let mut found_end_sep = false;
29 |         lines
30 |             .take_while(|line| {
31 |                 let do_continue = !found_end_sep;
32 |                 found_end_sep |= line == &FRONTMATTER_SEPARATOR;
33 |                 do_continue
34 |             })
35 |             .for_each(|line| matter_len += line.len());
36 |         if !found_end_sep {
37 |             // There was no frontmatter since we did not find the end separator.
38 |             log::debug!("no frontmatter ending delimiter detected");
39 |             String::new()
40 |         } else {
41 |             log::debug!("found {} bytes of frontmatter", matter_len);
42 |             // There was indeed frontmatter. This slicing operation can never error out sinc we did
43 |             // extract the frontmatter from the text.
44 |             let matter = &text[..matter_len];
45 |             matter.to_owned()
46 |         }
47 |     }
48 | }
49 | 
50 | #[cfg(test)]
51 | mod test {
52 |     use super::*;
53 | 
54 |     const FRONTMATTER_FOR_TEST: &str = "---\nsome text\nasdf: ---\nmultiple: lines\n---\n";
55 | 
56 |     #[test]
57 |     fn extracting_frontmatter() {
58 |         let matter = extract_frontmatter(FRONTMATTER_FOR_TEST);
59 | 
60 |         assert_eq!(matter, FRONTMATTER_FOR_TEST.to_string());
61 |     }
62 | 
63 |     #[test]
64 |     fn splitting_frontmatter_with_rest() {
65 |         let matter = extract_frontmatter(&format!("{}some\nmore\ntext\n", FRONTMATTER_FOR_TEST));
66 | 
67 |         assert_eq!(matter, FRONTMATTER_FOR_TEST.to_string());
68 |     }
69 | 
70 |     #[test]
71 |     fn frontmatter_has_to_start_text() {
72 |         let text = format!("something\n{}", FRONTMATTER_FOR_TEST);
73 |         let matter = extract_frontmatter(&text);
74 | 
75 |         assert_eq!(matter, String::new());
76 |     }
77 | 
78 |     #[test]
79 |     fn frontmatter_has_to_have_ending_separator() {
80 |         let text = FRONTMATTER_FOR_TEST[..FRONTMATTER_FOR_TEST.len() - 1].to_string();
81 |         let matter = extract_frontmatter(&text);
82 | 
83 |         assert_eq!(matter, String::new());
84 |     }
85 | }
86 | 


--------------------------------------------------------------------------------
/src/features.rs:
--------------------------------------------------------------------------------
  1 | /* An opinionated line wrapper for markdown files.
  2 | Copyright (C) 2023  Torsten Long
  3 | 
  4 | This program is free software: you can redistribute it and/or modify
  5 | it under the terms of the GNU General Public License as published by
  6 | the Free Software Foundation, either version 3 of the License, or
  7 | (at your option) any later version.
  8 | 
  9 | This program is distributed in the hope that it will be useful,
 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | GNU General Public License for more details.
 13 | 
 14 | You should have received a copy of the GNU General Public License
 15 | along with this program.  If not, see <https://www.gnu.org/licenses/>.
 16 | */
 17 | 
 18 | use anyhow::{Error, Result};
 19 | 
 20 | use crate::detect::BreakCfg;
 21 | use crate::parse::ParseCfg;
 22 | 
 23 | #[derive(Debug, PartialEq)]
 24 | pub struct FeatureCfg {
 25 |     pub keep_spaces_in_links: bool,
 26 |     pub format_block_quotes: bool,
 27 |     pub collate_link_defs: bool,
 28 |     pub outsource_inline_links: bool,
 29 |     pub break_cfg: BreakCfg,
 30 |     pub parse_cfg: ParseCfg,
 31 | }
 32 | 
 33 | impl Default for FeatureCfg {
 34 |     fn default() -> Self {
 35 |         FeatureCfg {
 36 |             keep_spaces_in_links: false,
 37 |             format_block_quotes: false,
 38 |             collate_link_defs: false,
 39 |             outsource_inline_links: false,
 40 |             parse_cfg: ParseCfg {
 41 |                 keep_linebreaks: false,
 42 |             },
 43 |             break_cfg: BreakCfg {
 44 |                 keep_linebreaks: false,
 45 |             },
 46 |         }
 47 |     }
 48 | }
 49 | 
 50 | impl std::str::FromStr for FeatureCfg {
 51 |     type Err = Error;
 52 | 
 53 |     fn from_str(s: &str) -> Result<Self> {
 54 |         let mut cfg = Self::default();
 55 |         let mut errors = vec![];
 56 | 
 57 |         // Parse all possible features and toggle them as desired.
 58 |         for feature in s
 59 |             .split_terminator(',')
 60 |             .flat_map(|el| el.split_whitespace())
 61 |             .map(|el| el.trim())
 62 |             .filter(|el| !el.is_empty())
 63 |         {
 64 |             match feature {
 65 |                 "keep-spaces-in-links" => cfg.keep_spaces_in_links = true,
 66 |                 "format-block-quotes" => cfg.format_block_quotes = true,
 67 |                 "collate-link-defs" => cfg.collate_link_defs = true,
 68 |                 "outsource-inline-links" => cfg.outsource_inline_links = true,
 69 |                 "keep-linebreaks" => {
 70 |                     cfg.parse_cfg.keep_linebreaks = true;
 71 |                     cfg.break_cfg.keep_linebreaks = true;
 72 |                 }
 73 |                 // Do not accept any other entry.
 74 |                 _ => errors.push(feature),
 75 |             }
 76 |         }
 77 | 
 78 |         if errors.is_empty() {
 79 |             log::debug!("loaded features: {:?}", cfg);
 80 |             Ok(cfg)
 81 |         } else {
 82 |             Err(Error::msg(format!(
 83 |                 "unknown features: {}",
 84 |                 errors.join(", ")
 85 |             )))
 86 |         }
 87 |     }
 88 | }
 89 | 
 90 | #[cfg(test)]
 91 | mod test {
 92 |     use super::*;
 93 |     #[test]
 94 |     fn swapping_all_features_and_disregard_whitspace() -> Result<()> {
 95 |         let default = FeatureCfg::default();
 96 |         let swapped = FeatureCfg {
 97 |             keep_spaces_in_links: !default.keep_spaces_in_links,
 98 |             format_block_quotes: !default.format_block_quotes,
 99 |             collate_link_defs: !default.collate_link_defs,
100 |             outsource_inline_links: !default.outsource_inline_links,
101 |             parse_cfg: ParseCfg {
102 |                 keep_linebreaks: !default.parse_cfg.keep_linebreaks,
103 |             },
104 |             break_cfg: BreakCfg {
105 |                 keep_linebreaks: !default.break_cfg.keep_linebreaks,
106 |             },
107 |         };
108 | 
109 |         let parsed =
110 |             "keep-spaces-in-links , keep-linebreaks ,format-block-quotes, collate-link-defs,outsource-inline-links"
111 |                 .parse::<FeatureCfg>()?;
112 | 
113 |         assert_eq!(parsed, swapped);
114 |         Ok(())
115 |     }
116 | 
117 |     #[test]
118 |     fn failure_to_parse() -> Result<()> {
119 |         let parsed = "unknown".parse::<FeatureCfg>();
120 |         assert!(parsed.is_err());
121 |         Ok(())
122 |     }
123 | }
124 | 


--------------------------------------------------------------------------------
/src/linebreak.rs:
--------------------------------------------------------------------------------
  1 | /* An opinionated line wrapper for markdown files.
  2 | Copyright (C) 2023  Torsten Long
  3 | 
  4 | This program is free software: you can redistribute it and/or modify
  5 | it under the terms of the GNU General Public License as published by
  6 | the Free Software Foundation, either version 3 of the License, or
  7 | (at your option) any later version.
  8 | 
  9 | This program is distributed in the hope that it will be useful,
 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | GNU General Public License for more details.
 13 | 
 14 | You should have received a copy of the GNU General Public License
 15 | along with this program.  If not, see <https://www.gnu.org/licenses/>.
 16 | */
 17 | 
 18 | use std::collections::HashSet;
 19 | 
 20 | use crate::detect::{BreakDetector, WhitespaceDetector};
 21 | 
 22 | pub fn insert_linebreaks_after_sentence_ends(text: &str, detector: &BreakDetector) -> String {
 23 |     let merged = normalise_linebreaks(text, &detector.whitespace);
 24 |     let sentence_ends = find_sentence_ends(&merged, detector);
 25 | 
 26 |     merged
 27 |         .chars()
 28 |         .enumerate()
 29 |         .filter_map(|(idx, el)| {
 30 |             if sentence_ends.contains(&Char::Skip(idx)) {
 31 |                 None
 32 |             } else if sentence_ends.contains(&Char::Split(idx)) {
 33 |                 Some(format!("\n{}", el))
 34 |             } else {
 35 |                 Some(format!("{}", el))
 36 |             }
 37 |         })
 38 |         .collect::<String>()
 39 | }
 40 | 
 41 | /// Replace all linebreaks by spaces unless they have been escaped by a non-breaking space.
 42 | fn normalise_linebreaks(text: &str, detector: &WhitespaceDetector) -> String {
 43 |     let mut last_was_nbsp = false;
 44 |     text.chars()
 45 |         .map(|el| {
 46 |             let replacement = if el != '\n' || last_was_nbsp { el } else { ' ' };
 47 |             last_was_nbsp = detector.is_nbsp(&el);
 48 |             replacement
 49 |         })
 50 |         .collect::<String>()
 51 | }
 52 | 
 53 | #[derive(Eq, Hash, PartialEq, Debug)]
 54 | enum Char {
 55 |     Skip(usize),
 56 |     Split(usize),
 57 | }
 58 | 
 59 | fn find_sentence_ends(text: &str, detector: &BreakDetector) -> HashSet<Char> {
 60 |     let as_chars = text.chars().collect::<Vec<_>>();
 61 | 
 62 |     as_chars
 63 |         .iter()
 64 |         .enumerate()
 65 |         .filter_map(|(idx, ch)| {
 66 |             let next = as_chars.get(idx + 1);
 67 | 
 68 |             if detector.is_breaking_marker(ch, next)
 69 |                 && !detector.ends_with_keep_word(&as_chars, &idx)
 70 |             {
 71 |                 Some([Char::Skip(idx + 1), Char::Split(idx + 2)])
 72 |             } else {
 73 |                 None
 74 |             }
 75 |         })
 76 |         .flatten()
 77 |         .collect::<HashSet<_>>()
 78 | }
 79 | 
 80 | #[cfg(test)]
 81 | mod test {
 82 |     use super::*;
 83 |     use crate::detect::BreakCfg;
 84 | 
 85 |     const CFG_FOR_TESTS: &BreakCfg = &BreakCfg {
 86 |         keep_linebreaks: false,
 87 |     };
 88 | 
 89 |     #[test]
 90 |     fn finding_sentence_ends() {
 91 |         let text = "words that. are. followed by. periods. period.";
 92 |         let detector = BreakDetector::new("are. by.", "", false, ".", CFG_FOR_TESTS);
 93 | 
 94 |         let ends = find_sentence_ends(text, &detector);
 95 | 
 96 |         // We never detect a sentence at and the end of the text.
 97 |         let expected = vec![
 98 |             Char::Skip(11),
 99 |             Char::Split(12),
100 |             Char::Skip(38),
101 |             Char::Split(39),
102 |         ]
103 |         .into_iter()
104 |         .collect::<HashSet<_>>();
105 | 
106 |         assert_eq!(expected, ends);
107 |     }
108 | 
109 |     #[test]
110 |     fn normalising_linebreaks() {
111 |         // All whitespace, including tabs, is merged into single spaces.
112 |         let text = " \n text with 	 lots\n \nof   white \n     space    	   ";
113 |         let expected = "   text with 	 lots  \nof   white \n     space    	   ";
114 | 
115 |         let merged = normalise_linebreaks(text, &WhitespaceDetector::default());
116 | 
117 |         assert_eq!(expected, merged);
118 |     }
119 | 
120 |     #[test]
121 |     fn inserting_linebreaks_between_sentences() {
122 |         let text = "words that. are. followed by. periods. period.";
123 |         let detector = BreakDetector::new("are. by.", "", false, ".", CFG_FOR_TESTS);
124 | 
125 |         let broken = insert_linebreaks_after_sentence_ends(text, &detector);
126 | 
127 |         // We never detect a sentence at and the end of the text.
128 |         let expected = "words that.\nare. followed by. periods.\nperiod.";
129 | 
130 |         assert_eq!(expected, broken);
131 |     }
132 | }
133 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: "ci"
  2 | 
  3 | on:
  4 |   push:
  5 |   pull_request:
  6 |     branches: main
  7 | 
  8 | # Ensure there is only ever one workflow of this kind running at a time.
  9 | concurrency:
 10 |   group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
 11 |   cancel-in-progress: true
 12 | 
 13 | permissions:
 14 |   contents: write
 15 | 
 16 | jobs:
 17 |   ci:
 18 |     runs-on: ubuntu-latest
 19 |     # Avoid very long running jobs.
 20 |     timeout-minutes: 30
 21 | 
 22 |     steps:
 23 |     - name: Checkout repository
 24 |       uses: actions/checkout@v4
 25 |       with:
 26 |         fetch-depth: 0
 27 | 
 28 |     - name: Remove possible compilation remnants
 29 |       run: rm -rf ./target ./dist
 30 | 
 31 |     - name: Install dependencies
 32 |       run: |
 33 |         sudo apt-get update
 34 |         sudo apt-get install -yqq bash curl make git jq
 35 | 
 36 |     - name: Install Rust
 37 |       run: |
 38 |         curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > install_rust.sh
 39 |         sh install_rust.sh -y
 40 |         echo "$HOME/.cargo/bin" >> "${GITHUB_PATH}"
 41 | 
 42 |     - name: Build binary
 43 |       run: make build-dev
 44 |       # Treat all warnings as errors.
 45 |       env:
 46 |         RUSTFLAGS: "-Dwarnings"
 47 | 
 48 |     - name: Lint
 49 |       run: make lint
 50 | 
 51 |     - name: Run tests
 52 |       run: make test
 53 | 
 54 |     - name: Check coverage
 55 |       run: make coverage
 56 | 
 57 |     # Skip coverage upload for now because something goes wrong. TODO: Debug.
 58 |     # - uses: actions/upload-artifact@v4
 59 |     #  if: always()
 60 |     #  with:
 61 |     #    name: coverage
 62 |     #    path: |
 63 |     #      .coverage.html
 64 |     #      .coverage.json
 65 |     #    if-no-files-found: error
 66 |     #    retention-days: 7
 67 | 
 68 |   macos-release:
 69 |     runs-on: macos-latest
 70 |     needs: [ci]
 71 |     # Avoid very long running jobs.
 72 |     timeout-minutes: 20
 73 | 
 74 |     steps:
 75 |     - name: Checkout repository
 76 |       uses: actions/checkout@v4
 77 |       with:
 78 |         fetch-depth: 0
 79 | 
 80 |     - name: Remove possible compilation remnants
 81 |       run: rm -rf ./target ./dist
 82 | 
 83 |     - name: Add targets
 84 |       run: |
 85 |         rustup update
 86 |         rustup target add x86_64-apple-darwin
 87 |         rustup target add aarch64-apple-darwin
 88 | 
 89 |     - name: Build binaries
 90 |       run: |
 91 |         mkdir ./dist
 92 |         cargo build --release --target=aarch64-apple-darwin
 93 |         cp target/aarch64-apple-darwin/release/mdslw ./dist/mdslw_aarch64-apple-darwin
 94 |         cargo build --release --target=x86_64-apple-darwin
 95 |         cp target/x86_64-apple-darwin/release/mdslw ./dist/mdslw_x86_64-apple-darwin
 96 | 
 97 |     - uses: actions/upload-artifact@v4
 98 |       if: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }}
 99 |       with:
100 |         name: macos-release
101 |         path: ./dist/
102 |         if-no-files-found: error
103 |         retention-days: 1
104 | 
105 |   release:
106 |     runs-on: ubuntu-latest
107 |     needs: [ci, macos-release]
108 |     # Avoid very long running jobs.
109 |     timeout-minutes: 30
110 | 
111 |     steps:
112 |     - name: Checkout repository
113 |       uses: actions/checkout@v4
114 |       with:
115 |         fetch-depth: 0
116 | 
117 |     - name: Remove possible compilation remnants
118 |       run: rm -rf ./target ./dist
119 | 
120 |     - name: Install dependencies
121 |       run: |
122 |         sudo apt-get update
123 |         sudo apt-get install -yqq bash curl make git jq
124 | 
125 |     - name: Install Rust
126 |       run: |
127 |         curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > install_rust.sh
128 |         sh install_rust.sh -y
129 |         echo "$HOME/.cargo/bin" >> "${GITHUB_PATH}"
130 | 
131 |     - name: Install cross-compilation dependencies
132 |       if: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }}
133 |       run: |
134 |         # For ARM Linux.
135 |         sudo apt-get install -yqq gcc-arm-linux-gnueabihf
136 |         # For Windows.
137 |         sudo apt-get install -yqq mingw-w64
138 | 
139 |     - name: Install toolchains
140 |       if: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }}
141 |       run: make install-toolchains
142 | 
143 |     - name: Build all release binaries apart from MacOS
144 |       if: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }}
145 |       run: |
146 |         echo '[target.armv7-unknown-linux-gnueabihf]' >> ~/.cargo/config
147 |         echo 'linker = "arm-linux-gnueabihf-gcc"' >> ~/.cargo/config
148 | 
149 |         make build-prod-all
150 | 
151 |     - name: Copy release binaries
152 |       if: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }}
153 |       run: make copy-relese-binaries
154 | 
155 |     - name: Retrieve MacOS binaries
156 |       uses: actions/download-artifact@v4
157 |       if: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }}
158 |       with:
159 |         name: macos-release
160 |         path: ./dist/
161 | 
162 |     - name: List release binaries
163 |       if: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }}
164 |       run: ls -l ./dist/*
165 | 
166 |     - name: Make release
167 |       if: ${{ startsWith(github.ref, 'refs/tags/') }}
168 |       uses: softprops/action-gh-release@v1
169 |       with:
170 |         files: |
171 |           dist/mdslw_x86_64-unknown-linux-musl
172 |           dist/mdslw_armv7-unknown-linux-gnueabihf
173 |           dist/mdslw_x86_64-apple-darwin
174 |           dist/mdslw_aarch64-apple-darwin
175 |           dist/mdslw_x86_64-pc-windows-gnu.exe
176 | 


--------------------------------------------------------------------------------
/src/logging.rs:
--------------------------------------------------------------------------------
  1 | /* An opinionated line wrapper for markdown files.
  2 | Copyright (C) 2023  Torsten Long
  3 | 
  4 | This program is free software: you can redistribute it and/or modify
  5 | it under the terms of the GNU General Public License as published by
  6 | the Free Software Foundation, either version 3 of the License, or
  7 | (at your option) any later version.
  8 | 
  9 | This program is distributed in the hope that it will be useful,
 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | GNU General Public License for more details.
 13 | 
 14 | You should have received a copy of the GNU General Public License
 15 | along with this program.  If not, see <https://www.gnu.org/licenses/>.
 16 | */
 17 | 
 18 | use std::time;
 19 | 
 20 | use log::{Level, Log, Metadata, Record};
 21 | 
 22 | /// Execute a trace log while lazily evaluating the expressions whose values shall be logged. This
 23 | /// macro takes a string literal, followed by expressions that will be evaluated lazily.
 24 | #[macro_export]
 25 | macro_rules! trace_log {
 26 |     ($fmt_str:literal, $($exprs:expr),*) => {
 27 |         if log::log_enabled!(log::Level::Trace) {
 28 |             log::trace!($fmt_str, $($exprs),*);
 29 |         }
 30 |     };
 31 | }
 32 | 
 33 | pub fn init_logging(level: u8) -> Result<(), log::SetLoggerError> {
 34 |     log::set_boxed_logger(Box::new(Logger::new(level)))
 35 |         .map(|()| log::set_max_level(log::LevelFilter::Trace))
 36 | }
 37 | 
 38 | const SELF_MODULE_NAME: &str = env!("CARGO_PKG_NAME");
 39 | 
 40 | struct Logger {
 41 |     starttime: time::Instant,
 42 |     level: Level,
 43 |     module_name: String,
 44 |     module_prefix: String,
 45 | }
 46 | 
 47 | impl Logger {
 48 |     pub fn new(log_level: u8) -> Self {
 49 |         let level = match log_level {
 50 |             0 => Level::Warn,
 51 |             1 => Level::Info,
 52 |             2 => Level::Debug,
 53 |             _ => Level::Trace,
 54 |         };
 55 |         Self {
 56 |             level,
 57 |             starttime: time::Instant::now(),
 58 |             module_name: SELF_MODULE_NAME.to_string(),
 59 |             module_prefix: format!("{}::", SELF_MODULE_NAME),
 60 |         }
 61 |     }
 62 | }
 63 | 
 64 | impl Log for Logger {
 65 |     fn enabled(&self, metadata: &Metadata) -> bool {
 66 |         metadata.level() <= self.level
 67 |     }
 68 | 
 69 |     fn log(&self, record: &Record) {
 70 |         if let Some(msg) = self.format_message(record) {
 71 |             eprintln!("{}", msg);
 72 |         }
 73 |     }
 74 | 
 75 |     fn flush(&self) {}
 76 | }
 77 | 
 78 | impl Logger {
 79 |     fn format_log_location(&self, record: &Record) -> String {
 80 |         let module = record.module_path_static().unwrap_or("");
 81 | 
 82 |         if module == self.module_name || module.starts_with(&self.module_prefix) {
 83 |             let file = record.file_static().unwrap_or("");
 84 |             let line = record.line().unwrap_or(0);
 85 |             format!("{}:{}:{}", module, file, line)
 86 |         } else {
 87 |             module.to_owned()
 88 |         }
 89 |     }
 90 | 
 91 |     fn format_message(&self, record: &Record) -> Option<String> {
 92 |         if self.enabled(record.metadata()) {
 93 |             let elapsed = self.starttime.elapsed();
 94 |             let elapsed_secs = elapsed.as_secs();
 95 |             let elapsed_millis = elapsed.subsec_millis();
 96 |             let thread_idx = rayon::current_thread_index()
 97 |                 .map(|el| format!("@{}", el))
 98 |                 .unwrap_or_default();
 99 | 
100 |             Some(format!(
101 |                 "{}{}: {}s{}ms {}: {}",
102 |                 record.level(),
103 |                 thread_idx,
104 |                 elapsed_secs,
105 |                 elapsed_millis,
106 |                 self.format_log_location(record),
107 |                 record.args()
108 |             ))
109 |         } else {
110 |             None
111 |         }
112 |     }
113 | }
114 | 
115 | #[cfg(test)]
116 | mod test {
117 |     use super::*;
118 |     use anyhow::{Error, Result};
119 | 
120 |     #[test]
121 |     fn new_logger() {
122 |         let logger0 = Logger::new(0);
123 |         assert_eq!(logger0.level, Level::Warn);
124 | 
125 |         let logger1 = Logger::new(1);
126 |         assert_eq!(logger1.level, Level::Info);
127 | 
128 |         let logger2 = Logger::new(2);
129 |         assert_eq!(logger2.level, Level::Debug);
130 | 
131 |         let logger3 = Logger::new(3);
132 |         assert_eq!(logger3.level, Level::Trace);
133 |     }
134 | 
135 |     #[test]
136 |     fn logger_enabled() {
137 |         let logger = Logger::new(0);
138 |         assert_eq!(logger.level, Level::Warn);
139 | 
140 |         let metadata_err = Metadata::builder().level(Level::Error).build();
141 |         let metadata_debug = Metadata::builder().level(Level::Debug).build();
142 | 
143 |         assert!(logger.enabled(&metadata_err));
144 |         assert!(!logger.enabled(&metadata_debug));
145 |     }
146 | 
147 |     #[test]
148 |     fn logging_a_message_from_own_module() -> Result<()> {
149 |         let args = format_args!("some thing");
150 |         let metadata = Metadata::builder().level(Level::Error).build();
151 |         let record = Record::builder()
152 |             .metadata(metadata)
153 |             .module_path_static(Some("mdslw::test"))
154 |             .file_static(Some("test_file"))
155 |             .args(args)
156 |             .build();
157 | 
158 |         let logger = Logger::new(0);
159 |         let msg = logger
160 |             .format_message(&record)
161 |             .ok_or(Error::msg("cannot build message"))?;
162 | 
163 |         // Check beginning and end because the test might take longer than 1ms, which would fail
164 |         // it.
165 |         assert!(msg.starts_with("ERROR: 0s"), "incorrect start: {}", msg);
166 |         assert!(
167 |             msg.ends_with("ms mdslw::test:test_file:0: some thing"),
168 |             "incorrect end: {}",
169 |             msg
170 |         );
171 | 
172 |         Ok(())
173 |     }
174 | 
175 |     #[test]
176 |     fn logging_a_message_from_another_module() -> Result<()> {
177 |         let args = format_args!("some thing");
178 |         let metadata = Metadata::builder().level(Level::Error).build();
179 |         let record = Record::builder()
180 |             .metadata(metadata)
181 |             .module_path_static(Some("some::other::module"))
182 |             .file_static(Some("test_file"))
183 |             .args(args)
184 |             .build();
185 | 
186 |         let logger = Logger::new(0);
187 |         let msg = logger
188 |             .format_message(&record)
189 |             .ok_or(Error::msg("cannot build message"))?;
190 | 
191 |         // Check beginning and end because the test might take longer than 1ms, which would fail
192 |         // it.
193 |         assert!(msg.starts_with("ERROR: 0s"), "incorrect start: {}", msg);
194 |         assert!(
195 |             msg.ends_with("ms some::other::module: some thing"),
196 |             "incorrect end: {}",
197 |             msg
198 |         );
199 | 
200 |         Ok(())
201 |     }
202 | }
203 | 


--------------------------------------------------------------------------------
/src/ranges.rs:
--------------------------------------------------------------------------------
  1 | /* An opinionated line wrapper for markdown files.
  2 | Copyright (C) 2023  Torsten Long
  3 | 
  4 | This program is free software: you can redistribute it and/or modify
  5 | it under the terms of the GNU General Public License as published by
  6 | the Free Software Foundation, either version 3 of the License, or
  7 | (at your option) any later version.
  8 | 
  9 | This program is distributed in the hope that it will be useful,
 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | GNU General Public License for more details.
 13 | 
 14 | You should have received a copy of the GNU General Public License
 15 | along with this program.  If not, see <https://www.gnu.org/licenses/>.
 16 | */
 17 | 
 18 | use crate::parse::CharRange;
 19 | use crate::trace_log;
 20 | 
 21 | #[derive(Debug, PartialEq)]
 22 | pub enum WrapType {
 23 |     Indent(usize),
 24 |     Verbatim,
 25 | }
 26 | 
 27 | #[derive(Debug, PartialEq)]
 28 | /// TextRange describes a range of characters in a document including whether they shall be
 29 | /// repeated verbatim or not. It also contains the number of spaces of indent to use when wrapping
 30 | /// the contained text.
 31 | pub struct TextRange {
 32 |     pub wrap: WrapType,
 33 |     pub range: CharRange,
 34 | }
 35 | 
 36 | /// The first arguments contains those ranges in the document that shall be wrapped. Every
 37 | /// character in the document that is not inside such a range will be taken verbatim. This also
 38 | /// determines the starting indent in spaces for every range that shall be wrapped.
 39 | pub fn fill_markdown_ranges(wrap_ranges: Vec<CharRange>, text: &str) -> Vec<TextRange> {
 40 |     let mut last_end = 0;
 41 | 
 42 |     let lines = line_ranges(text);
 43 | 
 44 |     wrap_ranges
 45 |         .into_iter()
 46 |         // Append an element that points at the end of the document to ensure that we always add
 47 |         // the last ranges in the document because we always add a verbatim range before the
 48 |         // non-verbatim range.
 49 |         .chain([CharRange {
 50 |             start: text.len(),
 51 |             end: text.len(),
 52 |         }])
 53 |         .flat_map(|el| {
 54 |             let verbatim = TextRange {
 55 |                 wrap: WrapType::Verbatim,
 56 |                 range: CharRange {
 57 |                     start: last_end,
 58 |                     end: el.start,
 59 |                 },
 60 |             };
 61 |             last_end = el.end;
 62 | 
 63 |             let wrap_line_start = find_line_start(el.start, &lines).unwrap_or(el.start);
 64 |             let wrap = TextRange {
 65 |                 wrap: WrapType::Indent(el.start - wrap_line_start),
 66 |                 range: el,
 67 |             };
 68 |             [verbatim, wrap]
 69 |         })
 70 |         .filter(|el| !el.range.is_empty())
 71 |         .map(|el| {
 72 |             if let WrapType::Indent(indent) = el.wrap {
 73 |                 trace_log!(
 74 |                     "formattable text with {} spaces indent: {}",
 75 |                     indent,
 76 |                     text[el.range.clone()].replace('\n', "\\n")
 77 |                 );
 78 |             } else {
 79 |                 trace_log!(
 80 |                     "verbatim text: {}",
 81 |                     text[el.range.clone()].replace('\n', "\\n")
 82 |                 );
 83 |             }
 84 |             el
 85 |         })
 86 |         .collect::<Vec<_>>()
 87 | }
 88 | 
 89 | /// Determine character ranges for each line in the document.
 90 | fn line_ranges(text: &str) -> Vec<CharRange> {
 91 |     let mut start = 0;
 92 | 
 93 |     text.split_inclusive('\n')
 94 |         .map(|el| {
 95 |             let end = start + el.len();
 96 |             let range = CharRange { start, end };
 97 |             start = end;
 98 |             range
 99 |         })
100 |         .collect::<Vec<_>>()
101 | }
102 | 
103 | /// Find the start of the line that "point" is in.
104 | fn find_line_start(point: usize, line_ranges: &[CharRange]) -> Option<usize> {
105 |     line_ranges
106 |         .iter()
107 |         .find(|el| el.contains(&point))
108 |         .map(|el| el.start)
109 | }
110 | 
111 | #[cfg(test)]
112 | mod test {
113 |     use super::*;
114 | 
115 |     #[test]
116 |     fn finding_line_start() {
117 |         let ranges = vec![
118 |             CharRange { start: 0, end: 10 },
119 |             CharRange { start: 10, end: 12 },
120 |             CharRange { start: 22, end: 31 },
121 |             CharRange { start: 31, end: 33 },
122 |         ];
123 | 
124 |         for (point, expected) in [
125 |             (5, Some(0)),
126 |             (10, Some(10)),
127 |             (15, None),
128 |             (22, Some(22)),
129 |             (28, Some(22)),
130 |             (30, Some(22)),
131 |             (31, Some(31)),
132 |             (35, None),
133 |         ] {
134 |             let start = find_line_start(point, &ranges);
135 |             assert_eq!(expected, start);
136 |         }
137 |     }
138 | 
139 |     #[test]
140 |     fn getting_line_ranges() {
141 |         let text = r#"
142 | text
143 | more text
144 | 
145 | even more text
146 | "#;
147 |         let ranges = line_ranges(text);
148 |         let expected = vec![
149 |             CharRange { start: 0, end: 1 },
150 |             CharRange { start: 1, end: 6 },
151 |             CharRange { start: 6, end: 16 },
152 |             CharRange { start: 16, end: 17 },
153 |             CharRange { start: 17, end: 32 },
154 |         ];
155 |         assert_eq!(expected, ranges);
156 |     }
157 | 
158 |     #[test]
159 |     fn filling_ranges() {
160 |         let text = r#"
161 | text
162 | more text
163 | 
164 | even more text
165 | "#;
166 |         let wrap_ranges = vec![
167 |             CharRange { start: 1, end: 6 },
168 |             CharRange { start: 22, end: 26 },
169 |             CharRange { start: 31, end: 32 },
170 |         ];
171 |         let filled = fill_markdown_ranges(wrap_ranges, text);
172 | 
173 |         let expected = vec![
174 |             TextRange {
175 |                 wrap: WrapType::Verbatim,
176 |                 range: CharRange { start: 0, end: 1 },
177 |             },
178 |             TextRange {
179 |                 wrap: WrapType::Indent(0),
180 |                 range: CharRange { start: 1, end: 6 },
181 |             },
182 |             TextRange {
183 |                 wrap: WrapType::Verbatim,
184 |                 range: CharRange { start: 6, end: 22 },
185 |             },
186 |             TextRange {
187 |                 wrap: WrapType::Indent(5),
188 |                 range: CharRange { start: 22, end: 26 },
189 |             },
190 |             TextRange {
191 |                 wrap: WrapType::Verbatim,
192 |                 range: CharRange { start: 26, end: 31 },
193 |             },
194 |             TextRange {
195 |                 wrap: WrapType::Indent(14),
196 |                 range: CharRange { start: 31, end: 32 },
197 |             },
198 |         ];
199 | 
200 |         assert_eq!(expected.len(), filled.len());
201 |         for (v1, v2) in expected.into_iter().zip(filled) {
202 |             assert_eq!(v1, v2);
203 |         }
204 |     }
205 | }
206 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | SHELL := /bin/bash -euo pipefail
  2 | 
  3 | SRC := $(shell find src -name "*.rs")
  4 | TARGET_DEV := target/debug/mdslw
  5 | TARGET_PROD := target/x86_64-unknown-linux-musl/release/mdslw
  6 | 
  7 | default: build-dev
  8 | 
  9 | build-dev: $(TARGET_DEV)
 10 | 
 11 | $(TARGET_DEV): Cargo.lock Cargo.toml $(SRC)
 12 | 	cargo build
 13 | 
 14 | .PHONY: install-toolchains
 15 | install-toolchains:
 16 | 	rustup target add x86_64-unknown-linux-musl
 17 | 	rustup target add armv7-unknown-linux-gnueabihf
 18 | 	rustup target add x86_64-pc-windows-gnu
 19 | 
 20 | build-prod: $(TARGET_PROD)
 21 | 
 22 | # Build prod for the dev system.
 23 | $(TARGET_PROD): Cargo.lock Cargo.toml $(SRC)
 24 | 	RUSTFLAGS='-Dwarnings -C link-arg=-s -C relocation-model=static' \
 25 | 	cargo build -j "$$(nproc --all)" --release --target=x86_64-unknown-linux-musl
 26 | 
 27 | .PHONY: build-prod-all
 28 | build-prod-all:
 29 | 	echo ==== x86_64-unknown-linux-musl ====
 30 | 	$(MAKE) --always-make build-prod
 31 | 	echo ==== armv7-unknown-linux-gnueabihf ====
 32 | 	RUSTFLAGS='-Dwarnings -C link-arg=-s' \
 33 | 	cargo build -j "$$(nproc --all)" --release --target=armv7-unknown-linux-gnueabihf
 34 | 	echo ==== x86_64-pc-windows-gnu ====
 35 | 	RUSTFLAGS='-Dwarnings -C link-arg=-s' \
 36 | 	cargo build -j "$$(nproc --all)" --release --target x86_64-pc-windows-gnu
 37 | 
 38 | .PHONY: copy-relese-binaries
 39 | copy-relese-binaries:
 40 | 	rm -rf ./dist
 41 | 	mkdir -p ./dist
 42 | 	cp target/x86_64-unknown-linux-musl/release/mdslw ./dist/mdslw_x86_64-unknown-linux-musl
 43 | 	cp target/armv7-unknown-linux-gnueabihf/release/mdslw ./dist/mdslw_armv7-unknown-linux-gnueabihf
 44 | 	cp target/x86_64-pc-windows-gnu/release/mdslw.exe ./dist/mdslw_x86_64-pc-windows-gnu.exe
 45 | 
 46 | .PHONY: test
 47 | test:
 48 | 	RUSTFLAGS="-Dwarnings" cargo test
 49 | 	$(MAKE) test-features test-langs test-default-config assert-version-tag test-envs-match-flags
 50 | 
 51 | FEATURES := $(shell grep "/// {n}   \* [a-z-]* => " src/cfg.rs | awk '{print $$4}' | tr '\n' ',' | sed 's/,$$//')
 52 | 
 53 | .PHONY: test-features
 54 | test-features:
 55 | 	[[ -n "$(FEATURES)" ]]
 56 | 	RUSTFLAGS="-Dwarnings" cargo run -- --features="$(FEATURES)" <<< "markdown"
 57 | 
 58 | .PHONY: assert-version-tag
 59 | assert-version-tag:
 60 | 	# Extract tag and compare it to the version known by mdslw. When not run on a
 61 | 	# tag, this target checks that the version known by the tool is not identical
 62 | 	# to any existing tag. When run on a tag, it checks that the version known is
 63 | 	# identical to the current tag.
 64 | 	echo >&2 "Tags: $$(git tag --list | tr '\n' ' ')"
 65 | 	version=$$(RUSTFLAGS="-Dwarnings" cargo run -- --version | awk '{print $$2'}) && \
 66 | 	echo >&2 "Version: $${version}" && \
 67 | 	tag=$$(git describe --exact-match --tags | sed 's/^v//' || :) && \
 68 | 	if [[ -n "$${tag}" ]]; then \
 69 | 		if [[ "$${tag}" != "$${version}" ]]; then \
 70 | 			echo >&2 "Version tag $${tag} does not match tool version $${version}."; \
 71 | 			exit 1; \
 72 | 		fi; \
 73 | 	else \
 74 | 		tags=$$(git tag --list) && match= && \
 75 | 		for t in $${tags}; do \
 76 | 			if [[ "$${version}" == "$$t" ]]; then match="$$t"; fi; \
 77 | 		done && \
 78 | 		if [[ -n "$${match-}" ]]; then \
 79 | 			echo >&2 "Found an existing matching git version tag: $$match"; \
 80 | 			exit 1; \
 81 | 		fi; \
 82 | 	fi
 83 | 
 84 | .PHONY: test-envs-match-flags
 85 | test-envs-match-flags:
 86 | 	flags=($$(cargo run -- --help | grep -E "^ +-" | grep -E -o -- "--[0-9a-zA-Z-]+" | grep -vE -- '--(help|verbose|version)' | sort -fu)) && \
 87 | 	envs=($$(cargo run -- --help | grep -o '\[env: [^=]*=' | sed 's/^\[env: //;s/=$$//' | sort -fu)) && \
 88 | 	echo FLAGS: "$${flags[@]}" && echo ENVS: "$${envs[@]}" && \
 89 | 	[[ "$${#flags[@]}" == "$${#envs[@]}" ]] && \
 90 | 	for idx in "$${!flags[@]}"; do \
 91 | 		flag="$${flags[$${idx}]}" && env="$${envs[$${idx}]}" && \
 92 | 		if [[ -n "$$(tr -d '[:upper:]_' <<< $$env)" || -n "$$(tr -d '[:lower:]-' <<< $$flag)" ]]; then \
 93 | 			echo >&2 "Malformed env or flag: $${env} || $${flag}"; exit 1; \
 94 | 		fi; \
 95 | 		if [[ "mdslw_$$(sed 's/^__//' <<< $${flag//-/_})" != "$${env,,}" ]]; then \
 96 | 			echo >&2 "Env/flag mismatch: $${env} != $${flag}"; exit 1; \
 97 | 		fi; \
 98 | 	done
 99 | 
100 | .PHONY: lint
101 | lint:
102 | 	rustup component add clippy
103 | 	RUSTFLAGS="-Dwarnings" cargo check --all-features --all-targets
104 | 	RUSTFLAGS="-Dwarnings" cargo clippy --all-features --all-targets --no-deps
105 | 
106 | # Extract languages requested by the code to keep them in sync.
107 | LANGS := $(shell grep -o '/// Supported languages are:\( *[a-z][a-z]\)* *' ./src/cfg.rs | awk -F: '{print $$2}' | tr -s '[:space:]')
108 | 
109 | .PHONY: test-langs
110 | test-langs:
111 | 	[[ -n "$(LANGS)" ]]
112 | 	RUSTFLAGS="-Dwarnings" cargo run -- --lang="$(LANGS) ac" <<< "markdown"
113 | 
114 | .PHONY: test-default-config
115 | test-default-config:
116 | 	from_readme=$$( \
117 | 		state=0; while read -r line; do \
118 | 		if [[ "$${line}" == "<!-- cfg-end -->" ]]; then state=0; fi; \
119 | 		if [[ "$${state}" -eq 1 ]]; then echo "$${line}"; fi; \
120 | 		if [[ "$${line}" == "<!-- cfg-start -->" ]]; then state=1; fi; \
121 | 		done < README.md | sed '/^$$/d' | grep -v '^```'\
122 | 	) && \
123 | 	from_tool=$$(RUSTFLAGS="-Dwarnings" cargo run -- --default-config) && \
124 | 	[[ "$${from_tool}" == "$${from_readme}" ]]
125 | 
126 | COVERAGE := .coverage.html
127 | PROFRAW := .coverage.profraw
128 | PROFDATA := .coverage.profdata
129 | COVERAGE_JSON := .coverage.json
130 | RUSTC_ROOT := $(shell rustc --print sysroot)
131 | LLVM_PROFILE_FILE := $(PROFRAW)
132 | export LLVM_PROFILE_FILE
133 | MIN_COV_PERCENT := 80
134 | 
135 | .PHONY: coverage
136 | coverage:
137 | 	rm -f "$(COVERAGE)" "$(PROFRAW)" "$(PROFDATA)"
138 | 	# Install dependencies
139 | 	rustup component add llvm-tools
140 | 	cargo install rustfilt
141 | 	# Build stand-alone test executable.
142 | 	RUSTFLAGS="-C instrument-coverage=all" \
143 | 		cargo build --tests
144 | 	# Find and run executable to generate coverage report.
145 | 	exe=$$( \
146 | 		find target/debug/deps/ -executable -name "mdslw-*" \
147 | 		| xargs ls -t | head -n1 \
148 | 	) && \
149 | 	prof_exe=$$(find $(RUSTC_ROOT) -executable -name "llvm-profdata" | head -n1) && \
150 | 	cov_exe=$$(find $(RUSTC_ROOT) -executable -name "llvm-cov" | head -n1) && \
151 | 	"$${exe}" && \
152 | 	"$${prof_exe}" merge \
153 | 		-sparse "$(PROFRAW)" -o "$(PROFDATA)" && \
154 | 	"$${cov_exe}" show \
155 | 		-Xdemangler=rustfilt "$${exe}" \
156 | 		--format=html \
157 | 		--instr-profile="$(PROFDATA)" \
158 | 		--show-line-counts-or-regions \
159 | 		--show-instantiations \
160 | 		--show-branches=count \
161 | 		--sources "$$(readlink -e src)" \
162 | 		> "$(COVERAGE)" && \
163 | 	if [[ -t 1 ]]; then xdg-open "$(COVERAGE)"; fi && \
164 | 	"$${cov_exe}" export \
165 | 		-Xdemangler=rustfilt "$${exe}" \
166 | 		--format=text \
167 | 		--instr-profile="$(PROFDATA)" \
168 | 		--sources "$$(readlink -e src)" \
169 | 		> "$(COVERAGE_JSON)"
170 | 	echo "Per-file coverage:" && \
171 | 		jq -r ".data[].files[] | [.summary.lines.percent, .filename] | @csv" \
172 | 		< "$(COVERAGE_JSON)" \
173 | 		| sort -t, -k 2 \
174 | 		| sed "s;$${PWD};.;" \
175 | 		| awk -F, '{printf("%.2f%% => %s\n", $$1, $$2)}'
176 | 	jq -r ".data[].totals.lines.percent" \
177 | 		< "$(COVERAGE_JSON)" \
178 | 		| awk '{if ($$1<$(MIN_COV_PERCENT)) \
179 | 			{printf("coverage low: %.2f%%<$(MIN_COV_PERCENT)%%\n", $$1); exit(1)} \
180 | 			else{printf("coverage OK: %.2f%%\n", $$1)} \
181 | 		}' >&2
182 | 


--------------------------------------------------------------------------------
/src/wrap.rs:
--------------------------------------------------------------------------------
  1 | /* An opinionated line wrapper for markdown files.
  2 | Copyright (C) 2023  Torsten Long
  3 | 
  4 | This program is free software: you can redistribute it and/or modify
  5 | it under the terms of the GNU General Public License as published by
  6 | the Free Software Foundation, either version 3 of the License, or
  7 | (at your option) any later version.
  8 | 
  9 | This program is distributed in the hope that it will be useful,
 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | GNU General Public License for more details.
 13 | 
 14 | You should have received a copy of the GNU General Public License
 15 | along with this program.  If not, see <https://www.gnu.org/licenses/>.
 16 | */
 17 | 
 18 | use crate::detect::{BreakDetector, WhitespaceDetector};
 19 | use crate::indent::build_indent;
 20 | use crate::linebreak::insert_linebreaks_after_sentence_ends;
 21 | use crate::ranges::{TextRange, WrapType};
 22 | use crate::trace_log;
 23 | 
 24 | pub fn add_linebreaks_and_wrap(
 25 |     ranges: Vec<TextRange>,
 26 |     max_width: &Option<usize>,
 27 |     detector: &BreakDetector,
 28 |     text: &str,
 29 | ) -> String {
 30 |     let mut result = String::new();
 31 | 
 32 |     for range in ranges {
 33 |         if let WrapType::Indent(indent_spaces) = range.wrap {
 34 |             trace_log!(
 35 |                 "wrapping text: {}",
 36 |                 text[range.range.clone()].replace('\n', "\\n")
 37 |             );
 38 |             let indent = build_indent(indent_spaces);
 39 |             trace_log!("keeping indent in mind: '{}'", indent);
 40 |             let broken = insert_linebreaks_after_sentence_ends(&text[range.range], detector);
 41 |             trace_log!(
 42 |                 "with linebreaks after sentences: {}",
 43 |                 broken.replace('\n', "\\n")
 44 |             );
 45 |             let wrapped = broken
 46 |                 .split('\n')
 47 |                 .enumerate()
 48 |                 .flat_map(|(idx, el)| {
 49 |                     wrap_long_line_and_collapse_inline_whitespace(
 50 |                         el,
 51 |                         idx,
 52 |                         max_width,
 53 |                         &indent,
 54 |                         &detector.whitespace,
 55 |                     )
 56 |                 })
 57 |                 .collect::<Vec<_>>()
 58 |                 .join("\n");
 59 |             trace_log!(
 60 |                 "after wrapping long sentences: {}",
 61 |                 wrapped.replace('\n', "\\n")
 62 |             );
 63 |             result.push_str(&wrapped);
 64 |         } else {
 65 |             trace_log!(
 66 |                 "keeping text: {}",
 67 |                 text[range.range.clone()].to_string().replace('\n', "\\n")
 68 |             );
 69 |             result.push_str(&text[range.range]);
 70 |         }
 71 |     }
 72 | 
 73 |     result.trim_end().to_string()
 74 | }
 75 | 
 76 | /// The main purpose of this function is to wrap a long line, making sure to add the linebreak
 77 | /// between words. It does so by splitting by whitespace and then joining again by spaces. One side
 78 | /// effect that we accept here is that all consecutive inline whitespace will be replaced by a
 79 | /// single space due to the splitting-and-joining process.
 80 | fn wrap_long_line_and_collapse_inline_whitespace(
 81 |     sentence: &str,
 82 |     sentence_idx: usize,
 83 |     max_width: &Option<usize>,
 84 |     indent: &str,
 85 |     detector: &WhitespaceDetector,
 86 | ) -> Vec<String> {
 87 |     let mut lines = vec![];
 88 |     let mut words = detector
 89 |         .split_whitespace(sentence)
 90 |         .filter(|el| !el.is_empty());
 91 |     let (mut line, first_indent_len) = if let Some(first_word) = words.next() {
 92 |         // The first sentence is already properly indented. Every other sentence has to be
 93 |         // indented manually.
 94 |         if sentence_idx == 0 {
 95 |             (String::from(first_word), indent.chars().count())
 96 |         } else {
 97 |             (format!("{}{}", indent, first_word), 0)
 98 |         }
 99 |     } else {
100 |         (String::new(), 0)
101 |     };
102 |     let mut line_len = line.chars().count() + first_indent_len;
103 |     let width = max_width.unwrap_or(0);
104 |     for word in words {
105 |         let chars = word.chars().count();
106 |         if width == 0 || line_len + 1 + chars <= width {
107 |             line.push(' ');
108 |             line.push_str(word);
109 |             line_len += chars + 1;
110 |         } else {
111 |             lines.push(line);
112 |             line = String::from(indent);
113 |             line.push_str(word);
114 |             line_len = line.chars().count();
115 |         }
116 |     }
117 |     lines.push(line);
118 |     lines
119 | }
120 | 
121 | #[cfg(test)]
122 | mod test {
123 |     use super::*;
124 |     use crate::detect::BreakCfg;
125 |     use crate::parse::CharRange;
126 | 
127 |     const CFG_FOR_TESTS: &BreakCfg = &BreakCfg {
128 |         keep_linebreaks: false,
129 |     };
130 | 
131 |     #[test]
132 |     fn wrapping_long_sentence() {
133 |         let sentence = "this sentence is not that long but will be wrapped";
134 |         let sentence_idx = 0;
135 |         let max_width = 11;
136 |         let indent = "  ";
137 |         let wrapped = wrap_long_line_and_collapse_inline_whitespace(
138 |             sentence,
139 |             sentence_idx,
140 |             &Some(max_width),
141 |             indent,
142 |             &WhitespaceDetector::default(),
143 |         );
144 | 
145 |         // No indent for the start of the sentence due to the sentence_idx.
146 |         let expected = vec![
147 |             "this",
148 |             "  sentence",
149 |             "  is not",
150 |             "  that long",
151 |             "  but will",
152 |             "  be",
153 |             "  wrapped",
154 |         ];
155 | 
156 |         assert_eq!(expected, wrapped);
157 |     }
158 | 
159 |     #[test]
160 |     fn wrapping_long_sentence_that_is_not_the_first() {
161 |         let sentence = "some sentence with words";
162 |         let sentence_idx = 1;
163 |         let max_width = 5;
164 |         // Indent will be copied, does not have to be whitespace.
165 |         let indent = "|";
166 |         let wrapped = wrap_long_line_and_collapse_inline_whitespace(
167 |             sentence,
168 |             sentence_idx,
169 |             &Some(max_width),
170 |             indent,
171 |             &WhitespaceDetector::default(),
172 |         );
173 | 
174 |         // Note the indent for the start of the sentence due to the sentence_idx.
175 |         let expected = vec!["|some", "|sentence", "|with", "|words"];
176 | 
177 |         assert_eq!(expected, wrapped);
178 |     }
179 | 
180 |     #[test]
181 |     fn not_wrapping_long_sentence_unless_requested() {
182 |         let sentence = "this sentence is somewhat long but will not be wrapped";
183 |         let sentence_idx = 0;
184 |         let indent = "  ";
185 |         let wrapped = wrap_long_line_and_collapse_inline_whitespace(
186 |             sentence,
187 |             sentence_idx,
188 |             &None,
189 |             indent,
190 |             &WhitespaceDetector::default(),
191 |         );
192 | 
193 |         let expected = vec![sentence];
194 | 
195 |         assert_eq!(expected, wrapped);
196 |     }
197 | 
198 |     #[test]
199 |     fn adding_linebreaks_after_sentences() {
200 |         let ranges = vec![
201 |             TextRange {
202 |                 wrap: WrapType::Indent(0),
203 |                 range: CharRange { start: 0, end: 33 },
204 |             },
205 |             // The pipe should remain verbatim.
206 |             TextRange {
207 |                 wrap: WrapType::Verbatim,
208 |                 range: CharRange { start: 33, end: 36 },
209 |             },
210 |             TextRange {
211 |                 wrap: WrapType::Indent(3),
212 |                 range: CharRange { start: 36, end: 74 },
213 |             },
214 |         ];
215 |         let text = String::from(
216 |             "Some text. It contains sentences. | It's separated in two. Parts, that is.",
217 |         );
218 |         let detector = BreakDetector::new("", "", false, ".", CFG_FOR_TESTS);
219 | 
220 |         let wrapped = add_linebreaks_and_wrap(ranges, &None, &detector, &text);
221 | 
222 |         // Whitespace at the start of a range is also merged into one space. Not sure if that makes
223 |         // sense but it does not appear to be relevant in practice, probably due to the way we
224 |         // parse the markdown files. That is, none of the ranges we get appear to start with
225 |         // whitespace at all.
226 |         let expected = String::from(
227 |             "Some text.\nIt contains sentences. | It's separated in two.\n   Parts, that is.",
228 |         );
229 |         assert_eq!(expected, wrapped);
230 |     }
231 | 
232 |     #[test]
233 |     fn adding_linebreaks_after_sentences_with_keep_words() {
234 |         let ranges = vec![TextRange {
235 |             wrap: WrapType::Indent(0),
236 |             range: CharRange { start: 0, end: 33 },
237 |         }];
238 |         let text = String::from("Some text. It contains sentences.");
239 |         let detector = BreakDetector::new("TEXT.", "", false, ".", CFG_FOR_TESTS);
240 | 
241 |         let wrapped = add_linebreaks_and_wrap(ranges, &None, &detector, &text);
242 | 
243 |         let expected = String::from("Some text. It contains sentences.");
244 |         assert_eq!(expected, wrapped);
245 |     }
246 | }
247 | 


--------------------------------------------------------------------------------
/src/detect.rs:
--------------------------------------------------------------------------------
  1 | /* An opinionated line wrapper for markdown files.
  2 | Copyright (C) 2023  Torsten Long
  3 | 
  4 | This program is free software: you can redistribute it and/or modify
  5 | it under the terms of the GNU General Public License as published by
  6 | the Free Software Foundation, either version 3 of the License, or
  7 | (at your option) any later version.
  8 | 
  9 | This program is distributed in the hope that it will be useful,
 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | GNU General Public License for more details.
 13 | 
 14 | You should have received a copy of the GNU General Public License
 15 | along with this program.  If not, see <https://www.gnu.org/licenses/>.
 16 | */
 17 | 
 18 | use std::collections::HashSet;
 19 | 
 20 | pub struct BreakDetector {
 21 |     // Information related to whitespace.
 22 |     pub whitespace: WhitespaceDetector,
 23 | 
 24 |     // Information related to keep words.
 25 |     keep_words: HashSet<(String, usize)>,
 26 |     keep_words_preserve_case: bool,
 27 | 
 28 |     // Information related to end markers.
 29 |     end_markers: String,
 30 | }
 31 | 
 32 | #[derive(Default)]
 33 | pub struct WhitespaceDetector {
 34 |     whitespace_to_detect: String,
 35 | }
 36 | 
 37 | impl<'a> WhitespaceDetector {
 38 |     const NBSP: &'static str = "\u{00a0}\u{2007}\u{202f}\u{2060}\u{feff}";
 39 | 
 40 |     pub fn new(keep_linebreaks: bool) -> Self {
 41 |         let mut whitespace_to_detect = String::from(Self::NBSP);
 42 |         if keep_linebreaks {
 43 |             log::debug!("not treating linebreaks as modifiable whitespace");
 44 |             whitespace_to_detect.push('\n')
 45 |         } else {
 46 |             log::debug!("treating linebreaks as modifiable whitespace");
 47 |         }
 48 |         Self {
 49 |             whitespace_to_detect,
 50 |         }
 51 |     }
 52 | 
 53 |     pub fn split_whitespace(&self, s: &'a str) -> std::vec::IntoIter<&'a str> {
 54 |         s.split(|el| self.is_whitespace(&el))
 55 |             .filter(|el| !el.is_empty())
 56 |             .collect::<Vec<_>>()
 57 |             .into_iter()
 58 |     }
 59 | 
 60 |     pub fn is_whitespace(&self, ch: &char) -> bool {
 61 |         // The character is whiespace if it is detected to be UTF8 whitespace and if it is not in
 62 |         // the list of excluded whitespace characters known by this struct.
 63 |         ch.is_whitespace() && !self.whitespace_to_detect.contains(*ch)
 64 |     }
 65 | 
 66 |     pub fn is_nbsp(&self, ch: &char) -> bool {
 67 |         Self::NBSP.contains(*ch)
 68 |     }
 69 | }
 70 | 
 71 | #[derive(Debug, PartialEq)]
 72 | pub struct BreakCfg {
 73 |     pub keep_linebreaks: bool,
 74 | }
 75 | 
 76 | impl BreakDetector {
 77 |     pub fn new(
 78 |         keep_words: &str,
 79 |         keep_word_ignores: &str,
 80 |         keep_words_preserve_case: bool,
 81 |         end_markers: &str,
 82 |         break_cfg: &BreakCfg,
 83 |     ) -> Self {
 84 |         let (cased_words, cased_ignores) = if keep_words_preserve_case {
 85 |             (keep_words.to_owned(), keep_word_ignores.to_owned())
 86 |         } else {
 87 |             (keep_words.to_lowercase(), keep_word_ignores.to_lowercase())
 88 |         };
 89 | 
 90 |         let ignores = cased_ignores.split_whitespace().collect::<HashSet<_>>();
 91 |         let internal_keep_words = cased_words
 92 |             .split_whitespace()
 93 |             .filter(|el| !ignores.contains(el))
 94 |             .map(|el| (el.to_string(), el.len() - 1))
 95 |             .collect::<HashSet<_>>();
 96 | 
 97 |         log::debug!("end markers: '{}'", end_markers);
 98 |         log::debug!("using {} unique keep words", internal_keep_words.len());
 99 |         let case_info = if keep_words_preserve_case { "" } else { "in" };
100 |         log::debug!("treating keep words case-{}sensitively", case_info);
101 | 
102 |         Self {
103 |             // Keep words.
104 |             keep_words_preserve_case,
105 |             keep_words: internal_keep_words,
106 |             // End markers.
107 |             end_markers: end_markers.to_string(),
108 |             // Whitspace.
109 |             whitespace: WhitespaceDetector::new(break_cfg.keep_linebreaks),
110 |         }
111 |     }
112 | 
113 |     /// Checks whether "text" ends with one of the keep words known by self at "idx".
114 |     pub fn ends_with_keep_word(&self, text: &[char], idx: &usize) -> bool {
115 |         if idx < &text.len() {
116 |             self.keep_words
117 |                 .iter()
118 |                 // Only check words that can actually be in the text.
119 |                 .filter(|(_el, disp)| idx >= disp)
120 |                 // Determine whether any keep word matches.
121 |                 .any(|(el, disp)| {
122 |                     // Check whether the word is at the start of the text or whether, if it starts
123 |                     // with an alphanumeric character, it is preceded by a character that is not
124 |                     // alphanumeric. That way, we avoid matching a keep word of "g." on a text going
125 |                     // "e.g.". Note that, here, idx>=disp holds. If a "word" does not start with an
126 |                     // alphanumeric character, then the definition of "word" is ambibuous anyway. In
127 |                     // such a case, we also match partially.
128 |                     (idx == disp || !text[idx-disp-1..=idx-disp].iter().all(|el| el.is_alphanumeric())) &&
129 |                     // Check whether all characters of the keep word and the slice through the text
130 |                     // are identical.
131 |                     text[idx - disp..=*idx]
132 |                         .iter()
133 |                         // Convert the text we compare with to lower case, but only those parts
134 |                         // that we actually do compare with. The conversion is somewhat annoying
135 |                         // and complicated because a single upper-case character might map to
136 |                         // multiple lower-case ones when converted (not sure why that would be so).
137 |                         .flat_map(|el| {
138 |                             if self.keep_words_preserve_case {
139 |                                 vec![*el]
140 |                             } else {
141 |                                 el.to_lowercase().collect::<Vec<_>>()
142 |                             }
143 |                         })
144 |                         // The strings self.data is already in lower case if desired. No conversion
145 |                         // needed here.
146 |                         .zip(el.chars())
147 |                         .all(|(ch1, ch2)| ch1 == ch2)
148 |                 })
149 |         } else {
150 |             false
151 |         }
152 |     }
153 | 
154 |     /// Checks whether ch is an end marker and whether the surrounding characters indicate that ch
155 |     /// is actually at the end of a sentence.
156 |     pub fn is_breaking_marker(&self, ch: &char, next: Option<&char>) -> bool {
157 |         // The current character has to be an end marker. If it is not, it does not end a sentence.
158 |         self.end_markers.contains(*ch)
159 |             // The next character must be whitespace. If it is not, this character is in the middle
160 |             // of a word and, thus, not at the end of a sentence.
161 |             && is_whitespace(next, &self.whitespace)
162 |     }
163 | }
164 | 
165 | // Some helper functions that make it easier to work with Option<&char> follow.
166 | 
167 | fn is_whitespace(ch: Option<&char>, detector: &WhitespaceDetector) -> bool {
168 |     ch.map(|el| detector.is_whitespace(el)).unwrap_or(false)
169 | }
170 | 
171 | #[cfg(test)]
172 | mod test {
173 |     use super::*;
174 | 
175 |     const TEXT_FOR_TESTS: &str = "Lorem iPsum doLor SiT aMeT. ConSectEtur adIpiSciNg ELiT.";
176 |     const CFG_FOR_TESTS: &BreakCfg = &BreakCfg {
177 |         keep_linebreaks: false,
178 |     };
179 | 
180 |     #[test]
181 |     fn case_insensitive_match() {
182 |         let detector = BreakDetector::new("ipsum sit adipiscing", "", false, "", CFG_FOR_TESTS);
183 |         let text = TEXT_FOR_TESTS.chars().collect::<Vec<_>>();
184 | 
185 |         let found = (0..text.len())
186 |             .filter(|el| detector.ends_with_keep_word(&text, el))
187 |             .collect::<Vec<_>>();
188 | 
189 |         assert_eq!(found, vec![10, 20, 49]);
190 |     }
191 | 
192 |     #[test]
193 |     fn case_sensitive_match() {
194 |         let detector = BreakDetector::new("ipsum SiT adipiscing", "", true, "", CFG_FOR_TESTS);
195 |         let text = TEXT_FOR_TESTS.chars().collect::<Vec<_>>();
196 | 
197 |         let found = (0..text.len())
198 |             .filter(|el| detector.ends_with_keep_word(&text, el))
199 |             .collect::<Vec<_>>();
200 | 
201 |         assert_eq!(found, vec![20]);
202 |     }
203 | 
204 |     #[test]
205 |     fn matches_at_start_and_end() {
206 |         let detector = BreakDetector::new("lorem elit.", "", false, "", CFG_FOR_TESTS);
207 |         let text = TEXT_FOR_TESTS.chars().collect::<Vec<_>>();
208 | 
209 |         // Try to search outside the text's range, which will never match.
210 |         let found = (0..text.len() + 5)
211 |             .filter(|el| detector.ends_with_keep_word(&text, el))
212 |             .collect::<Vec<_>>();
213 | 
214 |         assert_eq!(found, vec![4, 55]);
215 |     }
216 | 
217 |     #[test]
218 |     fn ignoring_words_case_sensitively() {
219 |         let detector = BreakDetector::new("ipsum SiT adipiscing", "SiT", true, "", CFG_FOR_TESTS);
220 |         let text = TEXT_FOR_TESTS.chars().collect::<Vec<_>>();
221 | 
222 |         let found = (0..text.len())
223 |             .filter(|el| detector.ends_with_keep_word(&text, el))
224 |             .collect::<Vec<_>>();
225 | 
226 |         assert_eq!(found, vec![]);
227 |     }
228 | 
229 |     #[test]
230 |     fn ignoring_words_case_insensitively() {
231 |         let detector = BreakDetector::new("ipsum sit adipiscing", "sit", false, "", CFG_FOR_TESTS);
232 |         let text = TEXT_FOR_TESTS.chars().collect::<Vec<_>>();
233 | 
234 |         let found = (0..text.len())
235 |             .filter(|el| detector.ends_with_keep_word(&text, el))
236 |             .collect::<Vec<_>>();
237 | 
238 |         assert_eq!(found, vec![10, 49]);
239 |     }
240 | 
241 |     #[test]
242 |     fn ingores_that_are_no_suppressions_are_ignored() {
243 |         let detector = BreakDetector::new(
244 |             "ipsum sit adipiscing",
245 |             "sit asdf blub muhaha",
246 |             false,
247 |             "",
248 |             CFG_FOR_TESTS,
249 |         );
250 |         let text = TEXT_FOR_TESTS.chars().collect::<Vec<_>>();
251 | 
252 |         let found = (0..text.len())
253 |             .filter(|el| detector.ends_with_keep_word(&text, el))
254 |             .collect::<Vec<_>>();
255 | 
256 |         assert_eq!(found, vec![10, 49]);
257 |     }
258 | }
259 | 


--------------------------------------------------------------------------------
/src/call.rs:
--------------------------------------------------------------------------------
  1 | /* An opinionated line wrapper for markdown files.
  2 | Copyright (C) 2023  Torsten Long
  3 | 
  4 | This program is free software: you can redistribute it and/or modify
  5 | it under the terms of the GNU General Public License as published by
  6 | the Free Software Foundation, either version 3 of the License, or
  7 | (at your option) any later version.
  8 | 
  9 | This program is distributed in the hope that it will be useful,
 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | GNU General Public License for more details.
 13 | 
 14 | You should have received a copy of the GNU General Public License
 15 | along with this program.  If not, see <https://www.gnu.org/licenses/>.
 16 | */
 17 | 
 18 | use std::collections::VecDeque;
 19 | use std::fmt;
 20 | use std::io::Write;
 21 | use std::path::Path;
 22 | use std::path::PathBuf;
 23 | use std::process::{Command, Stdio};
 24 | use std::sync::Mutex;
 25 | 
 26 | use anyhow::{Context, Error, Result};
 27 | 
 28 | use crate::trace_log;
 29 | 
 30 | pub struct Upstream {
 31 |     cmd: String,
 32 |     args: Vec<String>,
 33 | }
 34 | 
 35 | impl Upstream {
 36 |     pub fn from_cfg(command: &str, args: &str, sep: &str) -> Result<Self> {
 37 |         let mut split_args = if sep.is_empty() {
 38 |             args.split_whitespace()
 39 |                 .map(String::from)
 40 |                 .collect::<VecDeque<_>>()
 41 |         } else {
 42 |             args.split(sep).map(String::from).collect::<VecDeque<_>>()
 43 |         };
 44 |         let cmd = if !command.is_empty() {
 45 |             command.to_string()
 46 |         } else {
 47 |             split_args
 48 |                 .pop_front()
 49 |                 .ok_or_else(|| {
 50 |                     Error::msg(format!(
 51 |                         "Failed to extract upstream command from arguments '{}'.",
 52 |                         args
 53 |                     ))
 54 |                 })?
 55 |                 .to_string()
 56 |         };
 57 |         let result = Self {
 58 |             cmd,
 59 |             args: split_args.into_iter().collect::<Vec<_>>(),
 60 |         };
 61 |         log::debug!("using upstream formatter {}", result);
 62 |         Ok(result)
 63 |     }
 64 | }
 65 | 
 66 | impl fmt::Display for Upstream {
 67 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 68 |         write!(f, "'{}' '{}'", self.cmd, self.args.join("' '"))
 69 |     }
 70 | }
 71 | 
 72 | pub fn upstream_formatter(
 73 |     upstream: &Upstream,
 74 |     file_content: String,
 75 |     workdir: &Path,
 76 | ) -> Result<String> {
 77 |     let fallback_workdir = PathBuf::from(".");
 78 |     let workdir = if workdir.components().count() == 0 {
 79 |         &fallback_workdir
 80 |     } else {
 81 |         workdir
 82 |     };
 83 |     log::debug!(
 84 |         "running upstream executable in directory: {}",
 85 |         workdir.to_string_lossy()
 86 |     );
 87 | 
 88 |     let mut process = Command::new(&upstream.cmd)
 89 |         .args(&upstream.args)
 90 |         .stdin(Stdio::piped())
 91 |         .stdout(Stdio::piped())
 92 |         .stderr(Stdio::piped())
 93 |         .current_dir(workdir)
 94 |         .spawn()
 95 |         .context("failed to spawn upstream auto-formatter")?;
 96 | 
 97 |     let mut stdin = process
 98 |         .stdin
 99 |         .take()
100 |         .context("failed to acquire stdin of upstream auto-formatter")?;
101 | 
102 |     // Write to stdin in a separate thread. Is there really is no other way to do that? Calling
103 |     // "expect" here is not a problem because, if the process panics, we receive an error.
104 |     std::thread::spawn(move || {
105 |         stdin
106 |             .write_all(file_content.as_bytes())
107 |             .expect("failed to write stdin to upstream auto-formatter")
108 |     });
109 | 
110 |     let output = process
111 |         .wait_with_output()
112 |         .context("failed to wait for output of upstream auto-formatter")?;
113 | 
114 |     let stdout = String::from_utf8_lossy(&output.stdout);
115 |     let stderr = String::from_utf8_lossy(&output.stderr);
116 | 
117 |     if output.status.success() {
118 |         Ok(stdout.to_string())
119 |     } else {
120 |         Err(Error::msg(format!(
121 |             "failed to read stdout of upstream auto-formatter \"{}\". Stderr follows: \n\n{}",
122 |             upstream, stderr,
123 |         )))
124 |     }
125 | }
126 | 
127 | pub struct Pager {
128 |     stdin: Option<std::process::ChildStdin>,
129 |     process: std::process::Child,
130 | }
131 | 
132 | impl Pager {
133 |     pub fn send(&mut self, s: &str) -> Result<()> {
134 |         log::debug!("sending {} bytes to downstream pager's stdin", s.len());
135 |         trace_log!("message sent to downstream pager: {}", s);
136 |         if let Some(ref mut stdin) = self.stdin {
137 |             stdin
138 |                 .write_all(s.as_bytes())
139 |                 .context("failed to send text to pager's stdin")
140 |         } else {
141 |             unreachable!("cannot send to closed stdin of downstream pager");
142 |         }
143 |     }
144 | }
145 | 
146 | impl Drop for Pager {
147 |     fn drop(&mut self) {
148 |         {
149 |             log::debug!("closing stdin of downstream pager");
150 |             // Have pager's stdin go out of scope before waiting for the pager
151 |             // process. This should not be needed according to the docs of
152 |             // "wait", because supposedly that stdin is closed before waiting to
153 |             // prevent deadlocks, but it seems to be needed, because there is a
154 |             // deadlock without this.
155 |             let _ = self.stdin.take();
156 |         }
157 |         self.process
158 |             .wait()
159 |             .expect("failed to wait for pager to finish");
160 |     }
161 | }
162 | 
163 | /// If to_null is set, the output of this pager will be directed to /dev/null.
164 | /// That is used solely for testing.
165 | fn downstream_pager(pager: &str, workdir: std::path::PathBuf, to_null: bool) -> Result<Pager> {
166 |     let split_pager = pager.split_whitespace().collect::<Vec<_>>();
167 | 
168 |     // Interpret an empty directory as the current directory.
169 |     let pager_workdir = if workdir.components().count() == 0 {
170 |         ".".into()
171 |     } else {
172 |         workdir
173 |     };
174 |     log::debug!(
175 |         "running downstream pager in directory: {}",
176 |         pager_workdir.to_string_lossy()
177 |     );
178 | 
179 |     let cmd = split_pager
180 |         .first()
181 |         .ok_or(Error::msg("must specify a pager command"))
182 |         .context("failed to determine downstream pager command")?;
183 |     log::debug!("using pager executable {}", cmd);
184 | 
185 |     let args = split_pager[1..].to_owned();
186 |     log::debug!("using pager arguments {:?}", args);
187 | 
188 |     let mut process_cfg = Command::new(cmd);
189 |     process_cfg
190 |         .args(&args)
191 |         .stdin(Stdio::piped())
192 |         .current_dir(pager_workdir);
193 |     if to_null {
194 |         process_cfg.stdout(Stdio::null());
195 |     }
196 |     let mut process = process_cfg
197 |         .spawn()
198 |         .context("failed to spawn downstream pager")?;
199 | 
200 |     let stdin = process
201 |         .stdin
202 |         .take()
203 |         .context("failed to acquire stdin of the downstream pager")?;
204 | 
205 |     Ok(Pager {
206 |         stdin: Some(stdin),
207 |         process,
208 |     })
209 | }
210 | 
211 | /// A helper to ensure that text written to stdout is not mangled due to parallelisation.
212 | pub enum ParallelPrinter {
213 |     // First bool indicates whether there had been a failure writing to the pager.
214 |     Paged(Mutex<(bool, Pager)>),
215 |     Direct(Mutex<()>),
216 | }
217 | 
218 | impl ParallelPrinter {
219 |     pub fn new(pager: &Option<String>) -> Result<Self> {
220 |         if let Some(pager) = pager {
221 |             if !pager.is_empty() {
222 |                 let downstream = downstream_pager(pager, PathBuf::from("."), false)?;
223 |                 Ok(Self::Paged(Mutex::new((false, downstream))))
224 |             } else {
225 |                 Ok(Self::Direct(Mutex::new(())))
226 |             }
227 |         } else {
228 |             Ok(Self::Direct(Mutex::new(())))
229 |         }
230 |     }
231 | 
232 |     pub fn println(&self, text: &str) {
233 |         match self {
234 |             Self::Paged(mutex) => {
235 |                 let mut result = mutex
236 |                     .lock()
237 |                     .expect("failed to lock mutex due to previous panic");
238 |                 // We do not retry sending to the pager in case sending failed once.
239 |                 if !result.0 {
240 |                     if let Err(err) = result.1.send(text) {
241 |                         log::error!("{:?}", err);
242 |                         result.0 = true;
243 |                     }
244 |                 }
245 |             }
246 |             Self::Direct(mutex) => {
247 |                 // Assigning to keep the lock. The lock is lifted once the binding is dropped.
248 |                 let _lock = mutex
249 |                     .lock()
250 |                     .expect("failed to lock mutex due to previous panic");
251 |                 println!("{}", text);
252 |             }
253 |         }
254 |     }
255 | }
256 | 
257 | #[cfg(test)]
258 | mod test {
259 |     use super::*;
260 | 
261 |     #[test]
262 |     fn can_call_simple_executable_with_stdio_handling() -> Result<()> {
263 |         let input = String::from("some text");
264 |         let piped = upstream_formatter(
265 |             &Upstream::from_cfg("", "cat", " ")?,
266 |             input.clone(),
267 |             &PathBuf::from("."),
268 |         )
269 |         .unwrap();
270 |         assert_eq!(input, piped);
271 |         Ok(())
272 |     }
273 | 
274 |     #[test]
275 |     fn can_call_with_args() -> Result<()> {
276 |         let piped = upstream_formatter(
277 |             &Upstream::from_cfg("echo", "some text", "")?,
278 |             String::new(),
279 |             &PathBuf::from("."),
280 |         )
281 |         .unwrap();
282 |         assert_eq!("some text\n", piped);
283 |         Ok(())
284 |     }
285 | 
286 |     #[test]
287 |     fn need_to_provide_command() -> Result<()> {
288 |         let result = upstream_formatter(
289 |             &Upstream::from_cfg("", "", " ")?,
290 |             String::new(),
291 |             &PathBuf::from("."),
292 |         );
293 |         assert!(result.is_err());
294 |         Ok(())
295 |     }
296 | 
297 |     #[test]
298 |     fn unknown_executable_fails() -> Result<()> {
299 |         let result = upstream_formatter(
300 |             &Upstream::from_cfg("", "executable-unknown-asdf", " ")?,
301 |             String::new(),
302 |             &PathBuf::from("."),
303 |         );
304 |         assert!(result.is_err());
305 |         Ok(())
306 |     }
307 | 
308 |     #[test]
309 |     fn can_call_pager_with_args() -> Result<()> {
310 |         let mut pager = downstream_pager(&String::from("cat -"), ".".into(), true)?;
311 |         pager.send("some text")?;
312 |         Ok(())
313 |     }
314 | 
315 |     #[test]
316 |     fn need_to_provide_pager_command() {
317 |         let result = downstream_pager("", ".".into(), true);
318 |         assert!(result.is_err());
319 |     }
320 | 
321 |     #[test]
322 |     fn unknown_pager_executable_fails() {
323 |         let result = downstream_pager(&String::from("executable-unknown-asdf"), ".".into(), true);
324 |         assert!(result.is_err());
325 |     }
326 | }
327 | 


--------------------------------------------------------------------------------
/src/fs.rs:
--------------------------------------------------------------------------------
  1 | /* An opinionated line wrapper for markdown files.
  2 | Copyright (C) 2023  Torsten Long
  3 | 
  4 | This program is free software: you can redistribute it and/or modify
  5 | it under the terms of the GNU General Public License as published by
  6 | the Free Software Foundation, either version 3 of the License, or
  7 | (at your option) any later version.
  8 | 
  9 | This program is distributed in the hope that it will be useful,
 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | GNU General Public License for more details.
 13 | 
 14 | You should have received a copy of the GNU General Public License
 15 | along with this program.  If not, see <https://www.gnu.org/licenses/>.
 16 | */
 17 | 
 18 | use std::collections::HashSet;
 19 | use std::path::{Path, PathBuf};
 20 | 
 21 | use anyhow::{Context, Error, Result};
 22 | use ignore::Walk;
 23 | 
 24 | pub fn find_files_with_extension(paths: &[PathBuf], extension: &str) -> Result<HashSet<PathBuf>> {
 25 |     let mut errors = vec![];
 26 | 
 27 |     let found = paths
 28 |         .iter()
 29 |         .filter_map(|top_level_path| {
 30 |             if top_level_path.is_file() {
 31 |                 log::debug!("found file on disk: {}", top_level_path.to_string_lossy());
 32 |                 Some(vec![top_level_path.clone()])
 33 |             } else if top_level_path.is_dir() {
 34 |                 log::debug!(
 35 |                     "crawling directory on disk: {}",
 36 |                     top_level_path.to_string_lossy()
 37 |                 );
 38 |                 Some(
 39 |                     // Recursively extract all files with the given extension.
 40 |                     Walk::new(top_level_path)
 41 |                         .filter_map(|path_entry| match path_entry {
 42 |                             Ok(path) => Some(path),
 43 |                             Err(err) => {
 44 |                                 let path = top_level_path.to_string_lossy();
 45 |                                 log::error!("failed to crawl {}: {}", path, err);
 46 |                                 None
 47 |                             }
 48 |                         })
 49 |                         .filter_map(|el| match el.path().canonicalize() {
 50 |                             Ok(path) => Some(path),
 51 |                             Err(err) => {
 52 |                                 let path = el.path().to_string_lossy();
 53 |                                 if el.path_is_symlink() {
 54 |                                     log::error!("ignoring broken symlink: {}: {}", err, path);
 55 |                                 } else {
 56 |                                     log::error!("ignoring inaccessible path: {}: {}", err, path);
 57 |                                 }
 58 |                                 None
 59 |                             }
 60 |                         })
 61 |                         // Only keep actual markdown files and symlinks to them.
 62 |                         .filter(|el| el.is_file() && el.to_string_lossy().ends_with(extension))
 63 |                         .map(strip_cwd_if_possible)
 64 |                         .inspect(|el| {
 65 |                             log::debug!("discovered file on disk: {}", el.to_string_lossy());
 66 |                         })
 67 |                         .collect::<Vec<_>>(),
 68 |                 )
 69 |             } else {
 70 |                 errors.push(top_level_path.to_string_lossy().to_string());
 71 |                 None
 72 |             }
 73 |         })
 74 |         .flatten()
 75 |         .collect::<HashSet<_>>();
 76 | 
 77 |     if errors.is_empty() {
 78 |         log::debug!(
 79 |             "discovered {} files with extension {}",
 80 |             found.len(),
 81 |             extension
 82 |         );
 83 |         Ok(found)
 84 |     } else {
 85 |         Err(Error::msg(format!(
 86 |             "failed to find paths: '{}'",
 87 |             errors.join("' '")
 88 |         )))
 89 |     }
 90 | }
 91 | 
 92 | pub fn read_stdin() -> String {
 93 |     std::io::stdin()
 94 |         .lines()
 95 |         // Interrupt as soon as one line could not be read.
 96 |         .map_while(Result::ok)
 97 |         .collect::<Vec<_>>()
 98 |         .join("\n")
 99 | }
100 | 
101 | pub fn get_file_content_and_dir(path: &Path) -> Result<(String, PathBuf)> {
102 |     let text = std::fs::read_to_string(path).context("failed to read file")?;
103 |     let dir = path
104 |         .parent()
105 |         .map(|el| el.to_path_buf())
106 |         .ok_or(Error::msg("failed to determine parent directory"))?;
107 | 
108 |     Ok((text, dir))
109 | }
110 | 
111 | fn strip_cwd_if_possible(path: PathBuf) -> PathBuf {
112 |     std::env::current_dir()
113 |         .map(|cwd| path.strip_prefix(cwd).unwrap_or(&path))
114 |         .unwrap_or(&path)
115 |         .to_path_buf()
116 | }
117 | 
118 | // For convenience, this can also take paths to existing files and scans upwards, starting in
119 | // their directories. Since we want to avoid scanning the same directories over and over again,
120 | // we also use a cache to remember paths that we have already scanned. We abort scanning upwards
121 | // as soon as we find that we have already scanned a path.
122 | pub fn find_files_upwards(
123 |     dir: &Path,
124 |     file_name: &str,
125 |     cache: &mut Option<HashSet<PathBuf>>,
126 | ) -> Vec<PathBuf> {
127 |     let mut result = vec![];
128 |     log::debug!(
129 |         "finding {} upwards from {}",
130 |         file_name,
131 |         dir.to_string_lossy()
132 |     );
133 |     for dir in UpwardsDirsIterator::new(dir) {
134 |         if cache.as_ref().map(|el| el.contains(&dir)).unwrap_or(false) {
135 |             log::debug!("early stop of upwards search at {}", dir.to_string_lossy());
136 |             break;
137 |         } else {
138 |             let maybe_file = dir.join(file_name);
139 |             if maybe_file.is_file() {
140 |                 log::debug!(
141 |                     "found file in upwards search: {}",
142 |                     maybe_file.to_string_lossy()
143 |                 );
144 |                 result.push(maybe_file);
145 |             }
146 |             cache.as_mut().map(|el| el.insert(dir));
147 |         }
148 |     }
149 |     log::debug!("found {} files in upwards search", result.len());
150 |     result
151 | }
152 | 
153 | #[derive(Debug)]
154 | pub struct UpwardsDirsIterator(Option<PathBuf>);
155 | 
156 | impl UpwardsDirsIterator {
157 |     pub fn new(dir_or_file: &Path) -> Self {
158 |         match dir_or_file.canonicalize() {
159 |             Ok(path) => {
160 |                 if path.is_file() {
161 |                     Self(path.parent().map(|el| el.to_path_buf()))
162 |                 } else {
163 |                     Self(Some(path.to_owned()))
164 |                 }
165 |             }
166 |             Err(_) => Self(None),
167 |         }
168 |     }
169 | }
170 | 
171 | impl Iterator for UpwardsDirsIterator {
172 |     type Item = PathBuf;
173 | 
174 |     fn next(&mut self) -> Option<Self::Item> {
175 |         let result = self.0.clone();
176 |         if let Some(ref mut base) = self.0 {
177 |             if !base.pop() {
178 |                 self.0 = None;
179 |             }
180 |         }
181 |         result
182 |     }
183 | }
184 | 
185 | #[cfg(test)]
186 | mod test {
187 |     use super::*;
188 | 
189 |     // Actual tests follow.
190 |     #[test]
191 |     fn listing_non_existent_fails() {
192 |         let is_err = find_files_with_extension(&["i do not exist".into()], ".md").is_err();
193 |         assert!(is_err);
194 |     }
195 | 
196 |     // A struct that will automatically create and delete a temporary directory and that can create
197 |     // arbitrary temporary files underneath it, including their parent dirs.
198 |     struct TempDir(tempfile::TempDir);
199 | 
200 |     impl TempDir {
201 |         fn new() -> Result<Self> {
202 |             Ok(Self(tempfile::TempDir::new()?))
203 |         }
204 | 
205 |         fn new_file_in_dir(&self, path: PathBuf) -> Result<PathBuf> {
206 |             let mut result = PathBuf::from(self.0.path());
207 | 
208 |             // Create directory containing file.
209 |             if let Some(parent) = path.parent() {
210 |                 result.extend(parent);
211 |                 std::fs::create_dir_all(&result)?;
212 |             }
213 | 
214 |             if let Some(file_name) = path.file_name() {
215 |                 result.push(file_name);
216 |                 std::fs::File::create(&result)?;
217 |                 Ok(result)
218 |             } else {
219 |                 Err(Error::msg("no file given"))
220 |             }
221 |         }
222 | 
223 |         fn new_file_in_dir_with_content(&self, path: PathBuf, content: &str) -> Result<PathBuf> {
224 |             let path = self.new_file_in_dir(path)?;
225 |             std::fs::write(&path, content.as_bytes())?;
226 |             Ok(path)
227 |         }
228 | 
229 |         /// Remove the temporary directory from the prefix.
230 |         fn strip(&self, path: PathBuf) -> PathBuf {
231 |             path.as_path()
232 |                 .strip_prefix(self.0.path())
233 |                 .unwrap_or(&path)
234 |                 .to_path_buf()
235 |         }
236 |     }
237 | 
238 |     #[test]
239 |     fn finding_all_md_files_in_repo() -> Result<()> {
240 |         let tmp = TempDir::new()?;
241 |         // Create some directory tree that will then be searched.
242 |         tmp.new_file_in_dir("f_1.md".into())?;
243 |         tmp.new_file_in_dir("no_md_1.ext".into())?;
244 |         tmp.new_file_in_dir("no_md_2.ext".into())?;
245 |         tmp.new_file_in_dir("dir/f_2.md".into())?;
246 |         tmp.new_file_in_dir("dir/no_md_1.ext".into())?;
247 |         tmp.new_file_in_dir("other_dir/f_3.md".into())?;
248 |         tmp.new_file_in_dir("other_dir/no_md_1.ext".into())?;
249 | 
250 |         let ext_found = find_files_with_extension(&[tmp.0.path().into()], ".ext")?;
251 |         assert_eq!(ext_found.len(), 4);
252 | 
253 |         let found = find_files_with_extension(&[tmp.0.path().into()], ".md")?;
254 |         assert_eq!(found.len(), 3);
255 | 
256 |         Ok(())
257 |     }
258 | 
259 |     #[test]
260 |     fn auto_ignoring_files() -> Result<()> {
261 |         let tmp = TempDir::new()?;
262 |         // Create some directory tree that will then be searched.
263 |         tmp.new_file_in_dir("f.md".into())?;
264 |         tmp.new_file_in_dir("file.md".into())?;
265 |         tmp.new_file_in_dir("stuff.md".into())?;
266 |         tmp.new_file_in_dir("dir/f.md".into())?;
267 |         tmp.new_file_in_dir("dir/file.md".into())?;
268 |         tmp.new_file_in_dir("dir/stuff.md".into())?;
269 |         tmp.new_file_in_dir("dir/fstuff.md".into())?;
270 |         tmp.new_file_in_dir("other_dir/f.md".into())?;
271 |         tmp.new_file_in_dir("other_dir/file.md".into())?;
272 |         tmp.new_file_in_dir("other_dir/stuff.md".into())?;
273 |         tmp.new_file_in_dir("other_dir/fstuff.md".into())?;
274 | 
275 |         tmp.new_file_in_dir_with_content(".ignore".into(), "stuff.md\n")?;
276 |         tmp.new_file_in_dir_with_content("dir/.ignore".into(), "file.md\n")?;
277 |         tmp.new_file_in_dir_with_content("other_dir/.ignore".into(), "f*.md\n")?;
278 | 
279 |         let found = find_files_with_extension(&[tmp.0.path().into()], ".md")?
280 |             .into_iter()
281 |             .map(|el| tmp.strip(el))
282 |             .map(|el| el.to_string_lossy().to_string())
283 |             .collect::<HashSet<_>>();
284 | 
285 |         let expected = vec!["file.md", "f.md", "dir/fstuff.md", "dir/f.md"]
286 |             .into_iter()
287 |             .map(|el| el.to_string())
288 |             .collect::<HashSet<_>>();
289 | 
290 |         assert_eq!(found, expected);
291 | 
292 |         Ok(())
293 |     }
294 | 
295 |     #[test]
296 |     fn finding_files_upwards() -> Result<()> {
297 |         let tmp = TempDir::new()?;
298 |         // Create some directory tree that will then be searched.
299 |         tmp.new_file_in_dir("find_me".into())?;
300 |         tmp.new_file_in_dir("do_not_find_me".into())?;
301 |         tmp.new_file_in_dir("other_dir/find_me".into())?;
302 |         tmp.new_file_in_dir("other_dir/do_not_find_me".into())?;
303 |         tmp.new_file_in_dir("dir/subdir/find_me".into())?;
304 |         let start = tmp.new_file_in_dir("dir/subdir/do_not_find_me".into())?;
305 |         tmp.new_file_in_dir("dir/subdir/one_more_layer/find_me".into())?;
306 |         tmp.new_file_in_dir("dir/subdir/one_more_layer/do_not_find_me".into())?;
307 | 
308 |         let found = find_files_upwards(&start, "find_me", &mut None)
309 |             .into_iter()
310 |             .map(|el| tmp.strip(el))
311 |             .map(|el| el.to_string_lossy().to_string())
312 |             .collect::<Vec<_>>();
313 | 
314 |         let expected = vec!["dir/subdir/find_me", "find_me"];
315 | 
316 |         assert_eq!(found, expected);
317 | 
318 |         Ok(())
319 |     }
320 | 
321 |     #[test]
322 |     fn iterating_dirs_upwards() -> Result<()> {
323 |         let tmp = TempDir::new()?;
324 |         let start = tmp.new_file_in_dir("dir/subdir/do_not_find_me".into())?;
325 | 
326 |         let dirs = UpwardsDirsIterator::new(&start)
327 |             .map(|el| tmp.strip(el))
328 |             .map(|el| el.to_string_lossy().to_string())
329 |             .collect::<Vec<_>>();
330 | 
331 |         assert_eq!(start.components().count() - 1, dirs.len(), "{:?}", dirs);
332 |         let dirs = dirs
333 |             .into_iter()
334 |             .filter(|el| !el.starts_with("/") && !el.is_empty())
335 |             .collect::<Vec<_>>();
336 | 
337 |         let expected = vec!["dir/subdir", "dir"];
338 | 
339 |         assert_eq!(dirs, expected);
340 | 
341 |         Ok(())
342 |     }
343 | }
344 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
  1 | /* An opinionated line wrapper for markdown files.
  2 | Copyright (C) 2023  Torsten Long
  3 | 
  4 | This program is free software: you can redistribute it and/or modify
  5 | it under the terms of the GNU General Public License as published by
  6 | the Free Software Foundation, either version 3 of the License, or
  7 | (at your option) any later version.
  8 | 
  9 | This program is distributed in the hope that it will be useful,
 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | GNU General Public License for more details.
 13 | 
 14 | You should have received a copy of the GNU General Public License
 15 | along with this program.  If not, see <https://www.gnu.org/licenses/>.
 16 | */
 17 | 
 18 | // Imports.
 19 | mod call;
 20 | mod cfg;
 21 | mod detect;
 22 | mod diff;
 23 | mod features;
 24 | mod frontmatter;
 25 | mod fs;
 26 | mod ignore;
 27 | mod indent;
 28 | mod lang;
 29 | mod linebreak;
 30 | mod logging;
 31 | mod parse;
 32 | mod ranges;
 33 | mod replace;
 34 | mod wrap;
 35 | 
 36 | use std::collections::{HashMap, HashSet};
 37 | use std::io;
 38 | use std::path::{Path, PathBuf};
 39 | 
 40 | use anyhow::{Context, Error, Result};
 41 | use clap::{CommandFactory, Parser};
 42 | use clap_complete::generate;
 43 | use rayon::prelude::*;
 44 | 
 45 | const CONFIG_FILE: &str = ".mdslw.toml";
 46 | 
 47 | fn generate_report(
 48 |     mode: &cfg::ReportMode,
 49 |     new: &str,
 50 |     org: &str,
 51 |     filename: &Path,
 52 | ) -> Option<String> {
 53 |     match mode {
 54 |         cfg::ReportMode::None => None,
 55 |         cfg::ReportMode::Changed => {
 56 |             if new != org {
 57 |                 Some(format!("{}", filename.to_string_lossy()))
 58 |             } else {
 59 |                 None
 60 |             }
 61 |         }
 62 |         cfg::ReportMode::State => {
 63 |             let ch = if new == org { 'U' } else { 'C' };
 64 |             Some(format!("{}:{}", ch, filename.to_string_lossy()))
 65 |         }
 66 |         cfg::ReportMode::DiffMyers => Some(diff::Algo::Myers.generate(new, org, filename)),
 67 |         cfg::ReportMode::DiffPatience => Some(diff::Algo::Patience.generate(new, org, filename)),
 68 |         cfg::ReportMode::DiffLcs => Some(diff::Algo::Lcs.generate(new, org, filename)),
 69 |     }
 70 | }
 71 | 
 72 | struct Processor {
 73 |     feature_cfg: features::FeatureCfg,
 74 |     detector: detect::BreakDetector,
 75 |     max_width: Option<usize>,
 76 | }
 77 | 
 78 | impl Processor {
 79 |     fn process(&self, text: String, width_reduction: usize) -> String {
 80 |         // First, process the actual text.
 81 |         let ends_on_linebreak = text.ends_with('\n');
 82 |         let text = if self.feature_cfg.keep_spaces_in_links {
 83 |             log::debug!("not replacing spaces in links by non-breaking spaces");
 84 |             text
 85 |         } else {
 86 |             log::debug!("replacing spaces in links by non-breaking spaces");
 87 |             replace::replace_spaces_in_links_by_nbsp(text)
 88 |         };
 89 |         let text = if self.feature_cfg.outsource_inline_links {
 90 |             log::debug!("outsourcing inline links");
 91 |             replace::outsource_inline_links(
 92 |                 text,
 93 |                 &self.feature_cfg.collate_link_defs,
 94 |                 &self.detector.whitespace,
 95 |             )
 96 |         } else {
 97 |             log::debug!("not outsourcing inline links");
 98 |             text
 99 |         };
100 |         let text = if self.feature_cfg.collate_link_defs {
101 |             log::debug!("collating links at the end of the document");
102 |             replace::collate_link_defs_at_end(text, &self.detector.whitespace)
103 |         } else {
104 |             log::debug!("not collating links at the end of the document");
105 |             text
106 |         };
107 |         let parsed = parse::parse_markdown(&text, &self.feature_cfg.parse_cfg);
108 |         let filled = ranges::fill_markdown_ranges(parsed, &text);
109 |         let width = &self
110 |             .max_width
111 |             .map(|el| el.checked_sub(width_reduction).unwrap_or(el));
112 |         let formatted = wrap::add_linebreaks_and_wrap(filled, width, &self.detector, &text);
113 | 
114 |         // Keep newlines at the end of the file in tact. They disappear sometimes.
115 |         let file_end = if !formatted.ends_with('\n') && ends_on_linebreak {
116 |             log::debug!("adding missing trailing newline character");
117 |             "\n"
118 |         } else {
119 |             ""
120 |         };
121 |         let text = format!("{}{}", formatted, file_end);
122 | 
123 |         // At last, process all block quotes.
124 |         if self.feature_cfg.format_block_quotes {
125 |             log::debug!("formatting text in block quotes");
126 |             parse::BlockQuotes::new(&text)
127 |                 .apply_to_matches_and_join(|t, indent| self.process(t, indent + width_reduction))
128 |         } else {
129 |             log::debug!("not formatting text in block quotes");
130 |             text
131 |         }
132 |     }
133 | }
134 | 
135 | fn process(document: String, file_dir: &Path, cfg: &cfg::PerFileCfg) -> Result<(String, String)> {
136 |     // Prepare user-configured options. These could be outsourced if we didn't intend to allow
137 |     // per-file configurations.
138 |     let lang_keep_words = lang::keep_word_list(&cfg.lang).context("cannot load keep words")?;
139 |     let feature_cfg = cfg
140 |         .features
141 |         .parse::<features::FeatureCfg>()
142 |         .context("cannot parse selected features")?;
143 |     let detector = detect::BreakDetector::new(
144 |         &(lang_keep_words + &cfg.suppressions),
145 |         &cfg.ignores,
146 |         cfg.case == cfg::Case::Keep,
147 |         &cfg.end_markers,
148 |         &feature_cfg.break_cfg,
149 |     );
150 |     let max_width = if cfg.max_width == 0 {
151 |         log::debug!("not limiting line length");
152 |         None
153 |     } else {
154 |         log::debug!("limiting line length to {} characters", cfg.max_width);
155 |         Some(cfg.max_width)
156 |     };
157 |     let processor = Processor {
158 |         feature_cfg,
159 |         detector,
160 |         max_width,
161 |     };
162 | 
163 |     // Actually process the text.
164 |     let frontmatter = frontmatter::extract_frontmatter(&document);
165 |     let text = document[frontmatter.len()..].to_string();
166 | 
167 |     let after_upstream = if let Ok(upstream) = call::Upstream::from_cfg(
168 |         &cfg.upstream_command,
169 |         &cfg.upstream,
170 |         &cfg.upstream_separator,
171 |     ) {
172 |         log::debug!("calling upstream formatter: {}", cfg.upstream);
173 |         call::upstream_formatter(&upstream, text, file_dir)?
174 |     } else {
175 |         log::debug!("not calling any upstream formatter");
176 |         text
177 |     };
178 | 
179 |     let processed = format!("{}{}", frontmatter, processor.process(after_upstream, 0));
180 |     Ok((processed, document))
181 | }
182 | 
183 | fn process_stdin<F>(mode: &cfg::OpMode, build_cfg: F, file_path: &PathBuf) -> Result<bool>
184 | where
185 |     F: Fn(&str, &PathBuf) -> Result<cfg::PerFileCfg>,
186 | {
187 |     log::debug!("processing content from stdin and writing to stdout");
188 |     let text = fs::read_stdin();
189 | 
190 |     let config = build_cfg(&text, file_path).context("failed to build complete config")?;
191 | 
192 |     let file_dir = file_path
193 |         .parent()
194 |         .map(|el| el.to_path_buf())
195 |         .unwrap_or(PathBuf::from("."));
196 |     let (processed, text) = process(text, file_dir.as_path(), &config)?;
197 | 
198 |     // Decide what to output.
199 |     match mode {
200 |         cfg::OpMode::Format | cfg::OpMode::Both => {
201 |             log::debug!("writing modified file to stdout");
202 |             print!("{}", processed);
203 |         }
204 |         cfg::OpMode::Check => {
205 |             log::debug!("writing original file to stdout in check mode");
206 |             print!("{}", text);
207 |         }
208 |     }
209 | 
210 |     Ok(processed == text)
211 | }
212 | 
213 | fn process_file<F>(mode: &cfg::OpMode, path: &PathBuf, build_cfg: F) -> Result<(String, String)>
214 | where
215 |     F: Fn(&str, &PathBuf) -> Result<cfg::PerFileCfg>,
216 | {
217 |     let report_path = path.to_string_lossy();
218 |     log::debug!("processing {}", report_path);
219 | 
220 |     let (text, file_dir) = fs::get_file_content_and_dir(path)?;
221 |     let config = build_cfg(&text, path).context("failed to build complete config")?;
222 |     let (processed, text) = process(text, &file_dir, &config)?;
223 | 
224 |     // Decide whether to overwrite existing files.
225 |     match mode {
226 |         cfg::OpMode::Format | cfg::OpMode::Both => {
227 |             if processed == text {
228 |                 log::debug!("keeping OK file {}", report_path);
229 |             } else {
230 |                 log::debug!("modifying NOK file {} in place", report_path);
231 |                 std::fs::write(path, processed.as_bytes()).context("failed to write file")?;
232 |             }
233 |         }
234 |         // Do not write anything in check mode.
235 |         cfg::OpMode::Check => {
236 |             log::debug!("not modifying file {} in check mode", report_path);
237 |         }
238 |     }
239 | 
240 |     Ok((processed, text))
241 | }
242 | 
243 | fn read_config_file(path: &Path) -> Option<(PathBuf, cfg::CfgFile)> {
244 |     let result = std::fs::read_to_string(path)
245 |         .context("failed to read file")
246 |         .and_then(|el| {
247 |             toml::from_str::<cfg::CfgFile>(&el).context("that failed to parse due to error:")
248 |         });
249 | 
250 |     match result {
251 |         Ok(cfg) => {
252 |             log::debug!("parsed config file {}", path.to_string_lossy());
253 |             Some((path.to_path_buf(), cfg))
254 |         }
255 |         Err(err) => {
256 |             log::error!("ignoring config file {} {:?}", path.to_string_lossy(), err);
257 |             None
258 |         }
259 |     }
260 | }
261 | 
262 | fn build_document_specific_config(
263 |     document: &str,
264 |     document_path: &Path,
265 |     cli: &cfg::CliArgs,
266 |     configs: &Vec<(PathBuf, cfg::CfgFile)>,
267 | ) -> Result<cfg::PerFileCfg> {
268 |     let config_from_frontmatter = toml::from_str::<cfg::CfgFile>(
269 |         &parse::get_value_for_mdslw_toml_yaml_key(&frontmatter::extract_frontmatter(document)),
270 |     )
271 |     .with_context(|| {
272 |         format!(
273 |             "failed to parse frontmatter entry as toml config:\n{}",
274 |             document
275 |         )
276 |     })?;
277 |     let config_tuple = [(document_path.to_path_buf(), config_from_frontmatter)];
278 |     Ok(cfg::merge_configs(cli, config_tuple.iter().chain(configs)))
279 | }
280 | 
281 | fn print_config_file() -> Result<()> {
282 |     toml::to_string(&cfg::CfgFile::default())
283 |         .context("converting to toml format")
284 |         .map(|cfg| println!("{}", cfg))
285 | }
286 | 
287 | fn main() -> Result<()> {
288 |     // Perform actions that cannot be changed on a per-file level.
289 |     // Argument parsing.
290 |     let cli = cfg::CliArgs::parse();
291 |     // Initialising logging.
292 |     logging::init_logging(cli.verbose)?;
293 |     // Generation of shell completion.
294 |     if let Some(shell) = cli.completion {
295 |         log::info!("generating shell completion for {}", shell);
296 |         let mut cmd = cfg::CliArgs::command();
297 |         let name = cmd.get_name().to_string();
298 |         generate(shell, &mut cmd, name, &mut io::stdout());
299 |         return Ok(());
300 |     }
301 |     // Generation of default config file.
302 |     if cli.default_config {
303 |         log::info!("writing default config file to stdout");
304 |         return print_config_file();
305 |     }
306 | 
307 |     // All other actions could technically be specified on a per-file level.
308 |     let cwd = PathBuf::from(".");
309 |     let unchanged = if cli.paths.is_empty() {
310 |         let file_path = cli.stdin_filepath.clone().unwrap_or(PathBuf::from("STDIN"));
311 |         let file_dir = file_path.parent().unwrap_or(cwd.as_path());
312 |         let configs = fs::find_files_upwards(file_dir, CONFIG_FILE, &mut None)
313 |             .into_iter()
314 |             .filter_map(|el| read_config_file(&el))
315 |             .collect::<Vec<_>>();
316 |         let build_document_config = |document: &str, file_path: &PathBuf| {
317 |             build_document_specific_config(document, file_path, &cli, &configs)
318 |         };
319 |         process_stdin(&cli.mode, build_document_config, &file_path)
320 |     } else {
321 |         let md_files = fs::find_files_with_extension(&cli.paths, &cli.extension)
322 |             .context("failed to discover markdown files")?;
323 |         log::debug!("will process {} markdown file(s) from disk", md_files.len());
324 |         let config_files = {
325 |             // Define a temporary cache to avoid scanning the same directories again and again.
326 |             let mut cache = Some(HashSet::new());
327 |             md_files
328 |                 .iter()
329 |                 .flat_map(|el| fs::find_files_upwards(el, CONFIG_FILE, &mut cache))
330 |                 .filter_map(|el| read_config_file(&el))
331 |                 .collect::<HashMap<_, _>>()
332 |         };
333 |         log::debug!("loaded {} configs from disk", config_files.len());
334 | 
335 |         // Set number of threads depending on user's choice.
336 |         if let Some(num_jobs) = cli.jobs {
337 |             rayon::ThreadPoolBuilder::new()
338 |                 .num_threads(num_jobs)
339 |                 .build_global()
340 |                 .context("failed to initialise processing thread-pool")?;
341 |         }
342 | 
343 |         // Enable pager only for diff output.
344 |         let diff_pager = if cli.report.is_diff_mode() {
345 |             &cli.diff_pager
346 |         } else {
347 |             log::debug!("disabling possibly set diff pager for non-diff report");
348 |             &None
349 |         };
350 |         let par_printer = call::ParallelPrinter::new(diff_pager)?;
351 | 
352 |         // Process all MD files we found.
353 |         md_files
354 |             .par_iter()
355 |             .map(|path| {
356 |                 log::info!("processing markdown file {}", path.to_string_lossy());
357 |                 let configs = fs::UpwardsDirsIterator::new(path)
358 |                     .filter_map(|el| {
359 |                         config_files
360 |                             .get(&el.join(CONFIG_FILE))
361 |                             .map(|cfg| (el, cfg.clone()))
362 |                     })
363 |                     .collect::<Vec<_>>();
364 |                 let build_document_config = |document: &str, file_path: &PathBuf| {
365 |                     build_document_specific_config(document, file_path, &cli, &configs)
366 |                 };
367 |                 match process_file(&cli.mode, path, build_document_config) {
368 |                     Ok((processed, text)) => {
369 |                         if let Some(rep) = generate_report(&cli.report, &processed, &text, path) {
370 |                             par_printer.println(&rep);
371 |                         }
372 |                         Ok(processed == text)
373 |                     }
374 |                     Err(err) => {
375 |                         log::error!("failed to process {}: {:?}", path.to_string_lossy(), err);
376 |                         Err(Error::msg("there were errors processing at least one file"))
377 |                     }
378 |                 }
379 |             })
380 |             .reduce(
381 |                 || Ok(true),
382 |                 |a, b| match (a, b) {
383 |                     (Err(err), _) => Err(err),
384 |                     (_, Err(err)) => Err(err),
385 |                     (Ok(f1), Ok(f2)) => Ok(f1 && f2),
386 |                 },
387 |             )
388 |     };
389 | 
390 |     log::debug!("finished execution");
391 |     // Process exit code.
392 |     match unchanged {
393 |         Ok(true) => Ok(()),
394 |         Ok(false) => match cli.mode {
395 |             cfg::OpMode::Format => Ok(()),
396 |             cfg::OpMode::Check => Err(Error::msg("at least one processed file would be changed")),
397 |             cfg::OpMode::Both => Err(Error::msg("at least one processed file changed")),
398 |         },
399 |         Err(err) => Err(err),
400 |     }
401 | }
402 | 


--------------------------------------------------------------------------------
/src/cfg.rs:
--------------------------------------------------------------------------------
  1 | /* An opinionated line wrapper for markdown files.
  2 | Copyright (C) 2023  Torsten Long
  3 | 
  4 | This program is free software: you can redistribute it and/or modify
  5 | it under the terms of the GNU General Public License as published by
  6 | the Free Software Foundation, either version 3 of the License, or
  7 | (at your option) any later version.
  8 | 
  9 | This program is distributed in the hope that it will be useful,
 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | GNU General Public License for more details.
 13 | 
 14 | You should have received a copy of the GNU General Public License
 15 | along with this program.  If not, see <https://www.gnu.org/licenses/>.
 16 | */
 17 | 
 18 | use std::fmt;
 19 | use std::path::PathBuf;
 20 | use std::str::FromStr;
 21 | 
 22 | use clap::{builder::OsStr, Parser, ValueEnum};
 23 | use clap_complete::Shell;
 24 | use serde::{Deserialize, Serialize};
 25 | 
 26 | // Command-line interface definition.
 27 | 
 28 | /// A generic value that knows its origin. That is, we use the "Default" variant when defining
 29 | /// default values in the CliArgs struct but we always parse to the "Parsed" variant when parsing
 30 | /// from a command line argument. That way, we can distinguish whether an option has been provided
 31 | /// on the command line or was taken as a default.
 32 | ///
 33 | /// Note that default_value_t will perform a display-then-parse-again round trip, which means it
 34 | /// actually does not matter whether we use the "Parsed" or the "Default" variant in the
 35 | /// default_value_t bit. However, we explicitly add a zero-width space to the end of every default
 36 | /// value to be able to determine whether teh value is a default. Note that that will result in
 37 | /// unexpected behaviour if a user ever adds such a character to the end of an argument, but what
 38 | /// can you do. It's either that, or replacing clap, or not having config file support. In my view,
 39 | /// config file support is worth this work-around.
 40 | #[derive(Clone, Debug)]
 41 | pub enum ValueWOrigin<T> {
 42 |     Default(T),
 43 |     Parsed(T),
 44 | }
 45 | 
 46 | impl<T> ValueWOrigin<T> {
 47 |     // All default values that can also come from config files will end in this character. It is the
 48 |     // UTF8 zero-width space. All terminals that I tested do not display that character, but it is
 49 |     // present in the internal default string. We append that character to every default value that
 50 |     // can also come from a config file. That way, we can actually determine whether a value is a
 51 |     // default or not. See the Implementation of FromStr for this struct.
 52 |     const ZWS: char = '\u{200b}';
 53 |     const ZWS_LEN: usize = Self::ZWS.len_utf8();
 54 | 
 55 |     /// Get the correct value with the following precedence:
 56 |     ///   - If we contain a "Parsed", return the value contained in it. The user has specified that
 57 |     ///     on the command line, which means it takes precedence.
 58 |     ///   - If we contain a "Default" and the other value contains a "Some", return that.
 59 |     ///     That means the user has not specified that option on the command line, but a config file
 60 |     ///     contains it.
 61 |     ///   - Otherwise, return the value in the "Default".
 62 |     ///     In that case, neither has the user specified that option on the command line, nor is it
 63 |     ///     contained in any config file.
 64 |     fn resolve(&self, other: Option<T>) -> T
 65 |     where
 66 |         T: Clone,
 67 |     {
 68 |         match self {
 69 |             ValueWOrigin::Parsed(val) => val.clone(),
 70 |             ValueWOrigin::Default(val) => other.unwrap_or_else(|| val.clone()),
 71 |         }
 72 |     }
 73 | }
 74 | 
 75 | impl<T> FromStr for ValueWOrigin<T>
 76 | where
 77 |     T: FromStr,
 78 | {
 79 |     type Err = <T as FromStr>::Err;
 80 | 
 81 |     fn from_str(s: &str) -> Result<Self, Self::Err> {
 82 |         if s.ends_with(Self::ZWS) {
 83 |             match s[..s.len() - Self::ZWS_LEN].parse::<T>() {
 84 |                 Ok(val) => Ok(Self::Default(val)),
 85 |                 Err(err) => Err(err),
 86 |             }
 87 |         } else {
 88 |             match s.parse::<T>() {
 89 |                 Ok(val) => Ok(Self::Parsed(val)),
 90 |                 Err(err) => Err(err),
 91 |             }
 92 |         }
 93 |     }
 94 | }
 95 | 
 96 | impl<T> fmt::Display for ValueWOrigin<T>
 97 | where
 98 |     T: fmt::Display,
 99 | {
100 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
101 |         match self {
102 |             ValueWOrigin::Parsed(val) | ValueWOrigin::Default(val) => {
103 |                 write!(f, "{}", val)
104 |             }
105 |         }
106 |     }
107 | }
108 | 
109 | #[derive(Copy, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
110 | pub enum OpMode {
111 |     Both,
112 |     Check,
113 |     Format,
114 | }
115 | 
116 | #[derive(Serialize, Deserialize, Copy, Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
117 | #[serde(rename_all = "kebab-case")]
118 | pub enum Case {
119 |     Ignore,
120 |     Keep,
121 | }
122 | 
123 | impl FromStr for Case {
124 |     type Err = String;
125 | 
126 |     fn from_str(s: &str) -> Result<Self, Self::Err> {
127 |         match s {
128 |             "keep" => Ok(Self::Keep),
129 |             "ignore" => Ok(Self::Ignore),
130 |             _ => Err(String::from("possible values: ignore, keep")),
131 |         }
132 |     }
133 | }
134 | 
135 | impl fmt::Display for Case {
136 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
137 |         match self {
138 |             Self::Ignore => {
139 |                 write!(f, "ignore")
140 |             }
141 |             Self::Keep => {
142 |                 write!(f, "keep")
143 |             }
144 |         }
145 |     }
146 | }
147 | 
148 | #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
149 | pub enum ReportMode {
150 |     None,
151 |     Changed,
152 |     State,
153 |     DiffMyers,
154 |     DiffPatience,
155 |     DiffLcs,
156 | }
157 | 
158 | impl ReportMode {
159 |     pub fn is_diff_mode(&self) -> bool {
160 |         self == &ReportMode::DiffMyers
161 |             || self == &ReportMode::DiffPatience
162 |             || self == &ReportMode::DiffLcs
163 |     }
164 | }
165 | 
166 | #[derive(Parser, Debug)]
167 | #[command(author, version, about, long_about = None)]
168 | pub struct CliArgs {
169 |     /// Paths to files or directories that shall be processed.
170 |     pub paths: Vec<PathBuf>,
171 |     /// The maximum line width that is acceptable. A value of 0 disables wrapping of{n}   long
172 |     /// lines.
173 |     #[arg(
174 |         short = 'w',
175 |         long,
176 |         env = "MDSLW_MAX_WIDTH",
177 |         default_value = "80\u{200b}"
178 |     )]
179 |     pub max_width: ValueWOrigin<usize>,
180 |     /// A set of characters that are acceptable end of sentence markers.
181 |     #[arg(short, long, env = "MDSLW_END_MARKERS", default_value = "?!:.\u{200b}")]
182 |     pub end_markers: ValueWOrigin<String>,
183 |     /// Mode of operation: "check" means exit with error if format has to be adjusted but do not
184 |     /// format,{n}   "format" means format the file and exit with error in case of problems only,
185 |     /// "both" means do both{n}   (useful as pre-commit hook).
186 |     #[arg(value_enum, short, long, env = "MDSLW_MODE", default_value_t = OpMode::Format)]
187 |     pub mode: OpMode,
188 |     /// A space-separated list of languages whose suppression words as specified by unicode should
189 |     /// be {n}   taken into account. See here for all languages:
190 |     /// {n}   https://github.com/unicode-org/cldr-json/tree/main/cldr-json/cldr-segments-full/segments
191 |     /// {n}   Use "none" to disable.
192 |     /// Supported languages are: de en es fr it. Use "ac" for "author's choice",{n}   a list
193 |     /// for the Enlish language defined by this tool's author.
194 |     #[arg(short, long, env = "MDSLW_LANG", default_value = "ac\u{200b}")]
195 |     pub lang: ValueWOrigin<String>,
196 |     /// Space-separated list of words that end in one of END_MARKERS but that should not be
197 |     /// followed by a line{n}   break. This is in addition to what is specified via --lang.
198 |     #[arg(short, long, env = "MDSLW_SUPPRESSIONS", default_value = "\u{200b}")]
199 |     pub suppressions: ValueWOrigin<String>,
200 |     /// Space-separated list of words that end in one of END_MARKERS and that should be
201 |     /// removed{n}   from the list of suppressions.
202 |     #[arg(short, long, env = "MDSLW_IGNORES", default_value = "\u{200b}")]
203 |     pub ignores: ValueWOrigin<String>,
204 |     /// Specify an upstream auto-formatter that reads from stdin and writes to stdout.
205 |     /// {n}   It will be called before mdslw will run. Useful if you want to chain multiple
206 |     /// tools.{n}   Specify the command that will be executed. For example, specify "prettier"
207 |     /// to{n}   call prettier first.
208 |     /// The upstream auto-formatter runs in each file's directory if PATHS are{n}   specified.
209 |     #[arg(long, env = "MDSLW_UPSTREAM_COMMAND", default_value = "\u{200b}")]
210 |     pub upstream_command: ValueWOrigin<String>,
211 |     /// Specify the arguments for the upstream auto-formatter. If --upstream-command is not set,
212 |     /// {n}   the first word will be used as command. For example, with
213 |     /// --upstream-command="prettier",{n}   set --upstream="--parser=markdown" to enable markdown
214 |     /// parsing.
215 |     #[arg(short, long, env = "MDSLW_UPSTREAM", default_value = "\u{200b}")]
216 |     pub upstream: ValueWOrigin<String>,
217 |     /// Specify a string that will be used to separate the value passed to --upstream into words.
218 |     /// {n}   If empty, splitting is based on whitespace.
219 |     #[arg(long, env = "MDSLW_UPSTREAM_SEPARATOR", default_value = "\u{200b}")]
220 |     pub upstream_separator: ValueWOrigin<String>,
221 |     /// How to handle the case of provided suppression words, both via --lang
222 |     /// and{n}   --suppressions. Possible values: ignore, keep
223 |     #[arg(short, long, env = "MDSLW_CASE", default_value = "ignore\u{200b}")]
224 |     pub case: ValueWOrigin<Case>,
225 |     /// The file extension used to find markdown files when an entry in{n}   PATHS is a directory.
226 |     #[arg(long, env = "MDSLW_EXTENSION", default_value_t = String::from(".md"))]
227 |     pub extension: String,
228 |     // The "." below is used to cause clap to format the help message nicely.
229 |     /// Comma-separated list of optional features to enable or disable. Currently, the following
230 |     /// are supported:
231 |     /// {n}   * keep-spaces-in-links => do not replace spaces in link texts by non-breaking spaces
232 |     /// {n}   * keep-linebreaks => do not remove existing linebreaks during the line-wrapping
233 |     ///         process
234 |     /// {n}   * format-block-quotes => format text in block quotes
235 |     /// {n}   * collate-link-defs => gather all link definitions, i.e. `[link name]: url`, in a
236 |     ///         block at the end{n}       of the document in alphabetical order, sorted
237 |     ///         case-insensitively; links can be categorised with{n}       comments as
238 |     ///         `<!-- link-category: CATEGORY_NAME -->`, which will cause sorting per category
239 |     /// {n}   * outsource-inline-links => replace all inline links by named links using a link
240 |     ///         definition,{n}       i.e. `[link](url)` becomes `[link][def]` and `[def]: url`
241 |     /// {n}  .
242 |     #[arg(long, env = "MDSLW_FEATURES", default_value = "\u{200b}")]
243 |     pub features: ValueWOrigin<String>,
244 |     /// Output shell completion file for the given shell to stdout and exit.{n}  .
245 |     #[arg(value_enum, long, env = "MDSLW_COMPLETION")]
246 |     pub completion: Option<Shell>,
247 |     /// Specify the number of threads to use for processing files from disk in parallel. Defaults
248 |     /// to the number of{n}   logical processors.
249 |     #[arg(short, long, env = "MDSLW_JOBS")]
250 |     pub jobs: Option<usize>,
251 |     /// What to report to stdout, ignored when reading from stdin:
252 |     /// {n}   * "none" => report nothing but be silent instead
253 |     /// {n}   * "changed" => output the names of files that were changed
254 |     /// {n}   * "state" => output <state>:<filename> where <state> is "U" for "unchanged" or
255 |     ///       "C" for "changed"
256 |     /// {n}   * "diff-myers" => output a unified diff based on the myers algorithm
257 |     /// {n}   * "diff-patience" => output a unified diff based on the patience algorithm
258 |     /// {n}   * "diff-lcs" => output a unified diff based on the lcs algorithm
259 |     ///       {n}  .
260 |     #[arg(value_enum, short, long, env = "MDSLW_REPORT", default_value_t = ReportMode::None)]
261 |     pub report: ReportMode,
262 |     /// Specify a downstream pager for diffs (with args) that reads diffs from stdin.
263 |     /// {n}   Useful if you want to display a diff nicely. For example, specify
264 |     /// {n}   "delta --side-by-side" to get a side-by-side view.
265 |     #[arg(value_enum, short, long, env = "MDSLW_DIFF_PAGER")]
266 |     pub diff_pager: Option<String>,
267 |     /// The path to the file that is read from stdin. This is used to determine relevant config
268 |     /// files{n}   when reading from stdin and to run an upstream formatter.
269 |     #[arg(long, env = "MDSLW_STDIN_FILEPATH")]
270 |     pub stdin_filepath: Option<PathBuf>,
271 |     /// Output the default config file in TOML format to stdout and exit.
272 |     #[arg(long, env = "MDSLW_DEFAULT_CONFIG")]
273 |     pub default_config: bool,
274 |     /// Specify to increase verbosity of log output. Specify multiple times to increase even
275 |     /// further.
276 |     #[arg(short, long, action = clap::ArgAction::Count)]
277 |     pub verbose: u8,
278 | }
279 | 
280 | #[derive(Debug, PartialEq)]
281 | pub struct PerFileCfg {
282 |     pub max_width: usize,
283 |     pub end_markers: String,
284 |     pub lang: String,
285 |     pub suppressions: String,
286 |     pub ignores: String,
287 |     pub upstream_command: String,
288 |     pub upstream: String,
289 |     pub upstream_separator: String,
290 |     pub case: Case,
291 |     pub features: String,
292 | }
293 | 
294 | #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
295 | #[serde(rename_all = "kebab-case", deny_unknown_fields)]
296 | pub struct CfgFile {
297 |     pub max_width: Option<usize>,
298 |     pub end_markers: Option<String>,
299 |     pub lang: Option<String>,
300 |     pub suppressions: Option<String>,
301 |     pub ignores: Option<String>,
302 |     pub upstream_command: Option<String>,
303 |     pub upstream: Option<String>,
304 |     pub upstream_separator: Option<String>,
305 |     pub case: Option<Case>,
306 |     pub features: Option<String>,
307 | }
308 | 
309 | impl CfgFile {
310 |     /// Merge one config file into this one. Some-values in self take precedence. The return value
311 |     /// indicates whether all fields of the struct are fully defined, which means that further
312 |     /// merging won't have any effect.
313 |     pub fn merge_with(&mut self, other: &Self) -> bool {
314 |         let mut fully_defined = true;
315 | 
316 |         // Reduce code duplication with a macro.
317 |         macro_rules! merge_field {
318 |             ($field:ident) => {
319 |                 if self.$field.is_none() {
320 |                     self.$field = other.$field.clone();
321 |                 }
322 |                 fully_defined = fully_defined && self.$field.is_some();
323 |             };
324 |         }
325 | 
326 |         merge_field!(max_width);
327 |         merge_field!(end_markers);
328 |         merge_field!(lang);
329 |         merge_field!(suppressions);
330 |         merge_field!(ignores);
331 |         merge_field!(upstream_command);
332 |         merge_field!(upstream);
333 |         merge_field!(upstream_separator);
334 |         merge_field!(case);
335 |         merge_field!(features);
336 | 
337 |         fully_defined
338 |     }
339 | 
340 |     fn new() -> Self {
341 |         Self {
342 |             max_width: None,
343 |             end_markers: None,
344 |             lang: None,
345 |             suppressions: None,
346 |             ignores: None,
347 |             upstream_command: None,
348 |             upstream: None,
349 |             upstream_separator: None,
350 |             case: None,
351 |             features: None,
352 |         }
353 |     }
354 | }
355 | 
356 | impl Default for CfgFile {
357 |     fn default() -> Self {
358 |         let no_args: Vec<OsStr> = vec![];
359 |         let default_cli = CliArgs::parse_from(no_args);
360 | 
361 |         macro_rules! merge_fields {
362 |             (@ | $($result:tt)*) => { Self{ $($result)* } };
363 |             (@ $name:ident $($names:ident)* | $($result:tt)*) => {
364 |                 merge_fields!(
365 |                     @ $($names)* |
366 |                     $name: Some(default_cli.$name.resolve(None)),
367 |                     $($result)*
368 |                 )
369 |             };
370 |             ($($names:ident)*) => { merge_fields!(@ $($names)* | ) };
371 |         }
372 | 
373 |         merge_fields!(max_width end_markers lang suppressions ignores upstream_command upstream upstream_separator case features)
374 |     }
375 | }
376 | 
377 | pub fn merge_configs<'a, I>(cli: &CliArgs, files: I) -> PerFileCfg
378 | where
379 |     I: IntoIterator<Item = &'a (PathBuf, CfgFile)>,
380 | {
381 |     let mut merged = CfgFile::new();
382 |     for (path, other) in files {
383 |         log::debug!("merging config file {}", path.to_string_lossy());
384 |         if merged.merge_with(other) {
385 |             log::debug!("config fully defined, stopping merge");
386 |             break;
387 |         }
388 |     }
389 |     log::debug!("configuration loaded from files: {:?}", merged);
390 |     log::debug!("configuration loaded from CLI: {:?}", cli);
391 | 
392 |     macro_rules! merge_fields {
393 |         (@ | $($result:tt)*) => { PerFileCfg{ $($result)* } };
394 |         (@ $name:ident $($names:ident)* | $($result:tt)*) => {
395 |             merge_fields!(
396 |                 @ $($names)* |
397 |                 $name: cli.$name.resolve(merged.$name),
398 |                 $($result)*
399 |             )
400 |         };
401 |         ($($names:ident)*) => { merge_fields!(@ $($names)* | ) };
402 |     }
403 | 
404 |     let result = merge_fields!(max_width end_markers lang suppressions ignores upstream_command upstream upstream_separator case features);
405 |     log::debug!("merged configuration: {:?}", result);
406 |     result
407 | }
408 | 
409 | #[cfg(test)]
410 | mod test {
411 |     use super::*;
412 | 
413 |     // Actual tests follow.
414 |     #[test]
415 |     fn merging_two_partially_defined_config_files() {
416 |         let mut main_cfg = CfgFile {
417 |             max_width: Some(10),
418 |             end_markers: None,
419 |             lang: None,
420 |             suppressions: None,
421 |             ignores: Some("some words".into()),
422 |             upstream_command: None,
423 |             upstream: None,
424 |             upstream_separator: None,
425 |             case: None,
426 |             features: None,
427 |         };
428 |         let other_cfg = CfgFile {
429 |             max_width: None,
430 |             end_markers: None,
431 |             lang: Some("ac".into()),
432 |             suppressions: None,
433 |             ignores: None,
434 |             upstream_command: Some("some".into()),
435 |             upstream: None,
436 |             upstream_separator: None,
437 |             case: None,
438 |             features: Some("feature".into()),
439 |         };
440 | 
441 |         let fully_defined = main_cfg.merge_with(&other_cfg);
442 |         assert!(!fully_defined);
443 | 
444 |         let expected_cfg = CfgFile {
445 |             max_width: Some(10),
446 |             end_markers: None,
447 |             lang: Some("ac".into()),
448 |             suppressions: None,
449 |             ignores: Some("some words".into()),
450 |             upstream_command: Some("some".into()),
451 |             upstream: None,
452 |             upstream_separator: None,
453 |             case: None,
454 |             features: Some("feature".into()),
455 |         };
456 | 
457 |         assert_eq!(expected_cfg, main_cfg);
458 |     }
459 | 
460 |     #[test]
461 |     fn options_in_main_config_are_kept() {
462 |         let mut main_cfg = CfgFile {
463 |             max_width: Some(10),
464 |             end_markers: None,
465 |             lang: None,
466 |             suppressions: None,
467 |             ignores: Some("some words".into()),
468 |             upstream_command: None,
469 |             upstream: None,
470 |             upstream_separator: None,
471 |             case: None,
472 |             features: None,
473 |         };
474 |         let other_cfg = CfgFile {
475 |             max_width: Some(20),
476 |             end_markers: None,
477 |             lang: None,
478 |             suppressions: None,
479 |             ignores: Some("some other words".into()),
480 |             upstream_command: None,
481 |             upstream: None,
482 |             upstream_separator: None,
483 |             case: None,
484 |             features: None,
485 |         };
486 |         assert_ne!(main_cfg, other_cfg);
487 | 
488 |         let fully_defined = main_cfg.merge_with(&other_cfg);
489 |         assert!(!fully_defined);
490 | 
491 |         let expected_cfg = CfgFile {
492 |             max_width: Some(10),
493 |             end_markers: None,
494 |             lang: None,
495 |             suppressions: None,
496 |             ignores: Some("some words".into()),
497 |             upstream_command: None,
498 |             upstream: None,
499 |             upstream_separator: None,
500 |             case: None,
501 |             features: None,
502 |         };
503 | 
504 |         assert_eq!(expected_cfg, main_cfg);
505 |     }
506 | 
507 |     #[test]
508 |     fn fully_defined_config_is_immutable() {
509 |         let mut main_cfg = CfgFile {
510 |             max_width: None,
511 |             end_markers: None,
512 |             lang: None,
513 |             suppressions: None,
514 |             ignores: None,
515 |             upstream_command: None,
516 |             upstream: None,
517 |             upstream_separator: None,
518 |             case: None,
519 |             features: None,
520 |         };
521 |         let missing_options = CfgFile {
522 |             max_width: Some(20),
523 |             end_markers: Some("marker".into()),
524 |             lang: Some("lang".into()),
525 |             suppressions: Some("suppressions".into()),
526 |             ignores: Some("some other words".into()),
527 |             upstream_command: Some("upstream-command".into()),
528 |             upstream: Some("upstream".into()),
529 |             upstream_separator: Some("sep".into()),
530 |             case: Some(Case::Ignore),
531 |             features: Some("feature".into()),
532 |         };
533 |         let other_options = CfgFile {
534 |             max_width: Some(10),
535 |             end_markers: Some("nothing".into()),
536 |             lang: Some("asdf".into()),
537 |             suppressions: Some("just text".into()),
538 |             ignores: Some("ignore this".into()),
539 |             upstream_command: Some("does not matter".into()),
540 |             upstream: Some("swimming is nice".into()),
541 |             upstream_separator: Some("let's not split up".into()),
542 |             case: Some(Case::Keep),
543 |             features: Some("everything".into()),
544 |         };
545 | 
546 |         let fully_defined = main_cfg.merge_with(&missing_options);
547 |         assert!(fully_defined);
548 |         let fully_defined = main_cfg.merge_with(&other_options);
549 |         assert!(fully_defined);
550 | 
551 |         let expected_cfg = CfgFile {
552 |             max_width: Some(20),
553 |             end_markers: Some("marker".into()),
554 |             lang: Some("lang".into()),
555 |             suppressions: Some("suppressions".into()),
556 |             ignores: Some("some other words".into()),
557 |             upstream_command: Some("upstream-command".into()),
558 |             upstream: Some("upstream".into()),
559 |             upstream_separator: Some("sep".into()),
560 |             case: Some(Case::Ignore),
561 |             features: Some("feature".into()),
562 |         };
563 | 
564 |         assert_eq!(expected_cfg, main_cfg);
565 |     }
566 | 
567 |     #[test]
568 |     fn merging_cli_with_two_config_files() {
569 |         let main_cfg = CfgFile {
570 |             max_width: Some(10),
571 |             end_markers: None,
572 |             lang: None,
573 |             suppressions: None,
574 |             ignores: Some("some words".into()),
575 |             upstream_command: None,
576 |             upstream: None,
577 |             upstream_separator: None,
578 |             case: None,
579 |             features: None,
580 |         };
581 |         let other_cfg = CfgFile {
582 |             max_width: None,
583 |             end_markers: None,
584 |             lang: Some("ac".into()),
585 |             suppressions: None,
586 |             ignores: None,
587 |             upstream_command: None,
588 |             upstream: None,
589 |             upstream_separator: None,
590 |             case: None,
591 |             features: Some("feature".into()),
592 |         };
593 |         let default_cfg = CfgFile::default();
594 | 
595 |         let files = vec![
596 |             (PathBuf::from("main"), main_cfg),
597 |             (PathBuf::from("other"), other_cfg),
598 |             (PathBuf::from("default"), default_cfg),
599 |         ];
600 |         let no_args: Vec<OsStr> = vec![];
601 |         let cli = CliArgs::parse_from(no_args);
602 |         let merged = merge_configs(&cli, &files);
603 | 
604 |         let expected_cfg = PerFileCfg {
605 |             max_width: 10,
606 |             end_markers: "?!:.".into(),
607 |             lang: "ac".into(),
608 |             suppressions: "".into(),
609 |             ignores: "some words".into(),
610 |             upstream_command: "".into(),
611 |             upstream: "".into(),
612 |             upstream_separator: "".into(),
613 |             case: Case::Ignore,
614 |             features: "feature".into(),
615 |         };
616 | 
617 |         assert_eq!(expected_cfg, merged);
618 |     }
619 | }
620 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## Prepare your markdown for easy diff'ing!
  2 | 
  3 | <!-- vim-markdown-toc GFM -->
  4 | 
  5 | - [About](#about)
  6 | - [Motivation](#motivation)
  7 | - [Pronunciation](#pronunciation)
  8 | - [Working Principle](#working-principle)
  9 |   - [Caveats](#caveats)
 10 |   - [About Markdown Extensions](#about-markdown-extensions)
 11 | - [Command Reference](#command-reference)
 12 |   - [Command Line Arguments](#command-line-arguments)
 13 |   - [Automatic File Discovery](#automatic-file-discovery)
 14 |   - [Environment Variables](#environment-variables)
 15 |   - [Config Files](#config-files)
 16 |     - [Per-File Configuration](#per-file-configuration)
 17 | - [Installation](#installation)
 18 |   - [Building From Source](#building-from-source)
 19 | - [Editor Integration](#editor-integration)
 20 |   - [neovim](#neovim)
 21 |   - [vim](#vim)
 22 |   - [VS Code](#vs-code)
 23 | - [Tips And Tricks](#tips-and-tricks)
 24 |   - [Non-Breaking Spaces](#non-breaking-spaces)
 25 |   - [Disabling Auto-Formatting](#disabling-auto-formatting)
 26 | - [How To Contribute](#how-to-contribute)
 27 | - [Licence](#licence)
 28 | 
 29 | <!-- vim-markdown-toc -->
 30 | 
 31 | # About
 32 | 
 33 | This is `mdslw`, the MarkDown Sentence Line Wrapper, an auto-formatter that
 34 | prepares your markdown for easy diff'ing.
 35 | 
 36 | # Motivation
 37 | 
 38 | Markdown documents are written for different purposes.
 39 | Some of them are meant to be read in plain text, while others are first rendered
 40 | and then presented to the reader.
 41 | In the latter case, the documents are often kept in version control and edited
 42 | with the same workflows as other code.
 43 | 
 44 | When editing source code, software developers do not want changes in one
 45 | location to show up as changes in unrelated locations.
 46 | Now imagine a markdown document like this:
 47 | 
 48 | ```markdown
 49 | # Lorem Ipsum
 50 | 
 51 | Lorem ipsum dolor sit amet. Consectetur adipiscing elit. Sed do eiusmod tempor
 52 | incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam.
 53 | ```
 54 | 
 55 | Adding the new sentence `Excepteur sint occaecat cupidatat non proident.` after
 56 | the second one and re-arranging the text as a block would result in a diff view
 57 | like this that shows changes in several lines:
 58 | 
 59 | ```diff
 60 | 3,4c3,5
 61 | < Lorem ipsum dolor sit amet. Consectetur adipiscing elit. Sed do eiusmod tempor
 62 | < incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam.
 63 | ---
 64 | > Lorem ipsum dolor sit amet. Consectetur adipiscing elit. Excepteur sint occaecat
 65 | > cupidatat non proident. Sed do eiusmod tempor incididunt ut labore et dolore
 66 | > magna aliqua. Ut enim ad minim veniam.
 67 | ```
 68 | 
 69 | Now imagine the original text had a line break after every sentence, i.e. it had
 70 | looked like this:
 71 | 
 72 | ```markdown
 73 | # Lorem Ipsum
 74 | 
 75 | Lorem ipsum dolor sit amet.
 76 | Consectetur adipiscing elit.
 77 | Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
 78 | Ut enim ad minim veniam.
 79 | ```
 80 | 
 81 | For text formatted like this, a diff would only show up for the sentences that
 82 | are actually affected, simplifying the review process:
 83 | 
 84 | ```diff
 85 | 4a5
 86 | > Excepteur sint occaecat cupidatat non proident.
 87 | ```
 88 | 
 89 | Most rendering engines treat a single linebreak like a single space.
 90 | Thus, both documents would be identical when presented to the reader even though
 91 | the latter is significantly nicer to keep up to date with version control.
 92 | The tool `mdslw` aims to auto-format markdown documents in exactly this way.
 93 | 
 94 | # Pronunciation
 95 | 
 96 | If you are wondering how to pronounce `mdslw`, you can either say each letter
 97 | individually or pronounce it like mud-slaw (`mʌd-slɔ`).
 98 | 
 99 | # Working Principle
100 | 
101 | The tool `mdslw` operates according to a very simple process that can be
102 | described as follows:
103 | 
104 | - Parse the document and determine areas in the document that contain text.
105 |   Only process those.
106 | - There exists a limited number of characters (`.!?:` by default) that serve as
107 |   end-of-sentence markers if they occur alone.
108 |   If such a character is followed by whitespace, it denotes the end of a
109 |   sentence, _unless_ the last word before the character is part of a known set
110 |   of words, matched case-insensitively by default.
111 |   Those words can be taken from an included list for a specific language and
112 |   also specified directly.
113 | - Insert a line break after every character that ends a sentence, but keep
114 |   indents in lists and enumerations in tact.
115 | - Collapse all consecutive whitespace into a single space.
116 |   While doing so, preserve both [non-breaking spaces] and linebreaks that are
117 |   preceded by [non-breaking spaces].
118 | - Before line wrapping, replace all spaces in link texts by
119 |   [non-breaking spaces].
120 | - Wrap lines that are longer than the maximum line width (80 characters by
121 |   default) without splitting words or splitting at [non-breaking spaces] while
122 |   also keeping indents in tact.
123 | 
124 | In contrast to most other tools the author could find, `mdslw` does not parse
125 | the entire document into an internal data structure just to render it back
126 | because that might result in changes in unexpected locations.
127 | Instead, it adjusts only those areas that do contain text that can be wrapped.
128 | That is, `mdslw` never touches any parts of a document that cannot be
129 | line-wrapped automatically.
130 | That includes, for example, code blocks, HTML blocks, and pipe tables.
131 | 
132 | ## Caveats
133 | 
134 | - The default settings of `mdslw` are strongly geared towards the English
135 |   language, even though it works for other languages, too.
136 | - Like with any other auto-formatter, you give up some freedom for the benefit
137 |   of automatic handling of certain issues.
138 | - Inline code sections are wrapped like any other text, which may cause issues
139 |   with certain renderers.
140 | - While `mdslw` has been tested with documents containing unicode characters
141 |   such as emojis, the outcome can still be unexpected.
142 |   For example, any emoji is treated as a single character when determining line
143 |   width even though some editors might draw certain emojis wider.
144 |   Any feedback is welcome!
145 | - Since `mdslw` collapses all consecutive whitespace into a single space during
146 |   the line-wrapping process, it does not work well with documents using tabs in
147 |   text.
148 |   A tab, including all whitespace before and after it, will also be replaced by
149 |   a single space.
150 |   Use the `keep-linebreaks` feature and prefix linebreaks by
151 |   [non-breaking spaces] to influence this behaviour.
152 | - There are flavours of markdown that define additional markup syntax that
153 |   `mdslw` cannot recognise but instead detects as text.
154 |   Consequently, `mdslw` might cause formatting changes that causes such special
155 |   syntax to be lost.
156 |   You can use [non-breaking spaces] to work around that.
157 | - Some line breaks added by `mdslw` might not be considered nice looking.
158 |   Use [non-breaking spaces] instead of normal ones to prevent a line break at a
159 |   position.
160 | 
161 | ## About Markdown Extensions
162 | 
163 | There are quite a lot of markdown extensions out there.
164 | It is not possible for `mdslw` to support all of them.
165 | Instead, `mdslw` aims at supporting CommonMark as well as _some_ extensions used
166 | by its users.
167 | A new extension can be supported if supporting it does not negatively impact
168 | CommonMark support and if support can be added relatively easily.
169 | Please feel free to suggest support for a new extension as a
170 | [contribution](#how-to-contribute).
171 | 
172 | # Command Reference
173 | 
174 | Call as:
175 | 
176 | ```bash
177 | mdslw [OPTIONS] [PATHS]...
178 | ```
179 | 
180 | A `PATH` can point to a file or a directory.
181 | If it is a file, then it will be auto-formatted irrespective of its extension.
182 | If it is a directory, then `mdslw` will discover all files ending in `.md`
183 | recursively and auto-format those.
184 | If you do not specify any path, then `mdslw` will read from stdin and write to
185 | stdout.
186 | 
187 | The following is a list of all supported
188 | [command line arguments](#command-line-arguments).
189 | Note that you can also configure `mdslw` via
190 | [environment variables](#environment-variables) or
191 | [config files](#config-files).
192 | Values are resolved in the following order:
193 | 
194 | - Defaults
195 | - Config files
196 | - Environment variables
197 | - Command line arguments
198 | 
199 | ## Command Line Arguments
200 | 
201 | - `--help`:
202 |   Print the help message.
203 | - `--version`:
204 |   Print the tool's version number.
205 | - `--max-width <MAX_WIDTH>`:
206 |   The maximum line width that is acceptable.
207 |   A value of 0 disables wrapping of long lines altogether.
208 |   The default value is 80.
209 | - `--end-markers <END_MARKERS>`:
210 |   The set of characters that are end of sentence markers, defaults to `?!:.`.
211 | - `--mode <MODE>`:
212 |   A value of `check` means to exit with an error if the format had to be
213 |   adjusted but not to perform any formatting.
214 |   A value of `format`, the default, means to format the file and exit with
215 |   success.
216 |   A value of `both` means to do both (useful when used as a `pre-commit` hook).
217 | - `--lang <LANG>`:
218 |   A space-separated list of languages whose suppression words as specified by
219 |   unicode should be taken into account.
220 |   See [here][unicode] for all languages.
221 |   Currently supported are `en`, `de`, `es`, `fr`, and `it`.
222 |   Use `none` to disable.
223 |   Use `ac` (the default) for "author's choice", a list for the English language
224 |   defined and curated by this tool's author.
225 | - `--suppressions <SUPPRESSIONS>`:
226 |   A space-separated list of words that end in one of `END_MARKERS` but that
227 |   should not be followed by a line break.
228 |   This is in addition to what is specified via `--lang`.
229 |   Defaults to the empty string.
230 | - `--ignores <IGNORES>`:
231 |   Space-separated list of words that end in one of `END_MARKERS` and that should
232 |   be removed from the list of suppressions.
233 |   Defaults to the empty string.
234 | - `--upstream-command <UPSTREAM_COMMAND>`:
235 |   Specify an upstream auto-formatter that reads from stdin and writes to stdout.
236 |   It will be called before `mdslw` will run.
237 |   This is useful if you want to chain multiple tools.
238 |   Specify the command that will be executed.
239 |   For example, specify `prettier` to call `prettier` first.
240 |   The upstream auto-formatter runs in each file's directory if `PATHS` are
241 |   specified
242 | - `--upstream <UPSTREAM>`:
243 |   Specify the arguments for the upstream auto-formatter.
244 |   If `--upstream-cmd` is not set, the first word will be used as command.
245 |   For example, with `--upstream-cmd="prettier"`, use
246 |   `--upstream="--parser=markdown"` to enable markdown parsing.
247 | - `--upstream-separator <UPSTREAM_SEPARATOR>`:
248 |   Specify a string that will be used to separate the value passed to
249 |   `--upstream` into words.
250 |   If empty, splitting is based on whitespace.
251 | - `--upstream <UPSTREAM>`:
252 |   Specify an upstream auto-formatter (with args) that reads from stdin and
253 |   writes to stdout.
254 |   It will be called before `mdslw` will run and `mdslw` will use its output.
255 |   This is useful if you want to chain multiple tools.
256 |   For example, specify `prettier --parser=markdown` to call `prettier` first.
257 |   The upstream auto-formatter is run in each file's directory if `PATHS` are
258 |   specified.
259 | - `--case <CASE>`:
260 |   How to handle the case of provided suppression words, both via `--lang` and
261 |   `--suppressions`.
262 |   A value of `ignore`, the default, means to match case-insensitively while a
263 |   value of `keep` means to match case-sensitively.
264 | - `--extension <EXTENSION>`:
265 |   The file extension used to find markdown files when a `PATH` is a directory,
266 |   defaults to `.md`.
267 | - `--features <FEATURES>`:
268 |   Comma-separated list of optional features to enable or disable.
269 |   Currently, the following are supported (the opposite setting is the default in
270 |   each case):
271 |   - `keep-spaces-in-links`:
272 |     Do not replace spaces in link texts by [non-breaking spaces].
273 |   - `keep-linebreaks`:
274 |     Do not remove existing linebreaks during the line-wrapping process.
275 |   - `format-block-quotes`:
276 |     Format text in block quotes.
277 |   - `collate-link-defs`:
278 |     Gather all link definitions, i.e. `[link name]: url`, in a block at the end
279 |     of the document in alphabetical order, sorted case-insensitively.
280 |     Links can be defined as belonging to a category called `CATEGORY_NAME` with
281 |     the comment `<!-- link-category: CATEGORY_NAME -->`.
282 |     Each link definition following such a comment will be considered as part of
283 |     the specified category.
284 |     Link definitions will be sorted per category and categories will also be
285 |     sorted by name.
286 |   - `outsource-inline-links`:
287 |     Replace all inline links by named links using a link definition, i.e.
288 |     `[link](url)` becomes `[link][def]` and `[def]: url`.
289 |     All new link definitions will be added at the end of the document.
290 |     Existing link definitions will be reused.
291 |     Link definitions in block quotes will be put at the end of the block quote
292 |     if `format-block-quotes` is set.
293 | - `--completion <COMPLETION>`:
294 |   Output shell completion file for the given shell to stdout and exit.
295 |   The following shells are supported:
296 |   bash, elvish, fish, powershell, zsh.
297 | - `--jobs <JOBS>`:
298 |   Specify the number of threads to use for processing files from disk in
299 |   parallel.
300 |   Defaults to the number of logical processors.
301 | - `--report <REPORT>`:
302 |   What to report to stdout, ignored when reading from stdin:
303 |   - `none`, the default:
304 |     Report nothing but be silent instead, which is useful in scripts.
305 |   - `changed`:
306 |     Output the names of files that were changed, which is useful for downstream
307 |     processing with tools such as `xargs`.
308 |   - `state`:
309 |     Output `<state>:<filename>` where `<state>` is `U` for "unchanged" or `C`
310 |     for "changed", which is useful for downstream filtering with tools such as
311 |     `grep`.
312 |   - `diff-myers`:
313 |     Output a unified diff based on the [myers algorithm].
314 |     Pipe the output to tools such as [bat], [delta], or [diff-so-fancy] to get
315 |     syntax highlighting.
316 |     You can use the `--diff-pager` setting to define such a pager.
317 |   - `diff-patience`:
318 |     Output a unified diff based on the [patience algorithm].
319 |     See `diff-myers` for useful downstream tools.
320 |   - `diff-lcs`:
321 |     Output a unified diff based on the [lcs algorithm].
322 |     See `diff-myers` for useful downstream tools.
323 | - `--diff-pager <DIFF_PAGER>`:
324 |   Specify a downstream pager for diffs (with args) that reads diffs from stdin.
325 |   This is useful if you want to display a diff nicely.
326 |   For example, specify `delta --side-by-side` to get a side-by-side view.
327 |   This flag is ignored unless a diff-type report has been requested.
328 | - `--stdin-filepath <STDIN_FILEPATH>`:
329 |   The path to the file that is read from stdin.
330 |   This is used to determine relevant config files when reading from stdin and to
331 |   run an upstream formatter.
332 |   Defaults to the current working directory.
333 | - `--default-config`:
334 |   Output the default config file in TOML format to stdout and exit.
335 | - `--verbose`:
336 |   Specify to increase verbosity of log output.
337 |   Specify multiple times to increase even further.
338 | 
339 | ## Automatic File Discovery
340 | 
341 | This tool uses the [ignore crate] in its default settings to discover files when
342 | given a directory as a `PATH`.
343 | Details about those defaults can be found [here][ignore defaults].
344 | Briefly summarised, the following rules apply when deciding whether a file shall
345 | be ignored:
346 | 
347 | - Hidden files (starting with `.`) are ignored.
348 | - Files matching patterns specified in a file called `.ignore` are ignored.
349 |   The patterns affect all files in the same directory or child directories.
350 | - If run inside a git repository, files matching patterns specified in a file
351 |   called `.gitignore` are ignored.
352 |   The patterns affect all files in the same directory or child directories.
353 | 
354 | If you wish to format a file that is being ignored by `mdslw`, then pass it as
355 | an argument directly.
356 | Files passed as arguments are never ignored and will always be processed.
357 | 
358 | ## Environment Variables
359 | 
360 | Instead of or in addition to configuring `mdslw` via
361 | [command line arguments](#command-line-arguments) or
362 | [config files](#config-files), you can configure it via environment variables.
363 | For any command line option `--some-option=value`, you can instead set an
364 | environment variable `MDSLW_SOME_OPTION=value`.
365 | For example, instead of setting `--end-markers=".?!"`, you could set
366 | `MDSLW_END_MARKERS=".?!"` instead.
367 | When set, the value specified via the environment variable will take precedence
368 | over the default value and a value taken from config files.
369 | When set, a command line argument will take precedence over the environment
370 | variable.
371 | Take a call like this for example:
372 | 
373 | ```bash
374 | export MDSLW_EXTENSION=".markdown"
375 | export MDSLW_MODE=both
376 | mdslw --mode=check .
377 | ```
378 | 
379 | This call will search for files with the extension `.markdown` instead of the
380 | default `.md`.
381 | Furthermore, files will only be checked due to `--mode=check`, even though the
382 | environment variable `MDSLW_MODE=both` has been set.
383 | Defaults will be used for everything else.
384 | 
385 | ## Config Files
386 | 
387 | Instead of or in addition to configuring `mdslw` via
388 | [command line arguments](#command-line-arguments) or
389 | [environment variables](#environment-variables), you can configure it via config
390 | files.
391 | Such a file has to have the exact name `.mdslw.toml` and affects all files in or
392 | below its own directory.
393 | Multiple config files will be merged.
394 | Options given in config files closer to a markdown file take precedence.
395 | 
396 | Configuration files are limited to options that influence the formatted result.
397 | They cannot influence how `mdslw` operates.
398 | For example, the option `--mode` cannot be set via config files while
399 | `--max-width` can.
400 | The following example shows all the possible options that can be set via config
401 | files.
402 | Note that all entries are optional in config files, which means that any number
403 | of them may be left out.
404 | The following is a full config file containing all the default values.
405 | 
406 | <!-- cfg-start -->
407 | 
408 | ```toml
409 | max-width = 80
410 | end-markers = "?!:."
411 | lang = "ac"
412 | suppressions = ""
413 | ignores = ""
414 | upstream-command = ""
415 | upstream = ""
416 | upstream-separator = ""
417 | case = "ignore"
418 | features = ""
419 | ```
420 | 
421 | <!-- cfg-end -->
422 | 
423 | When set, the value specified via the config file will take precedence over the
424 | default value.
425 | When set, an environment variable or a command line argument will take
426 | precedence over a value taken from config files.
427 | 
428 | ### Per-File Configuration
429 | 
430 | You can embed a configuration for `mdslw` inside a markdown file.
431 | That configuration affects only the file it is embedded in.
432 | It will be merged with other config files affecting the markdown file in
433 | question just like other config files.
434 | 
435 | An embedded configuration needs to reside inside the YAML front matter as part
436 | of a _block scalar string_ associated with the YAML key `mdslw-toml` (see below
437 | for an example).
438 | To get an overview of all the different possibilities for defining multi-line
439 | strings in YAML documents, please see [here][yaml-block-scalars].
440 | The embedded configuration string needs to follow the same format as all other
441 | config files for `mdslw` (see above).
442 | 
443 | For example, you can embed the default config file into a markdown document as
444 | in the following example.
445 | It is strongly recommended to use the `|` block style indicator without a block
446 | chomping indicator as done in the following example.
447 | 
448 | ```markdown
449 | ---
450 | # This is the YAML front matter.
451 | mdslw-toml: |
452 |   max-width = 80
453 |   end-markers = "?!:."
454 |   lang = "ac"
455 |   suppressions = ""
456 |   ignores = ""
457 |   upstream-command = ""
458 |   upstream = ""
459 |   upstream-separator = ""
460 |   case = "ignore"
461 |   features = ""
462 | ---
463 | The actual markdown document follows.
464 | ```
465 | 
466 | Note that `mdslw` does not feature a full YAML parser because, as of October
467 | 2025, there is no suitable library available.
468 | Instead, `mdslw` comes with its own limited YAML parser.
469 | That parser supports only block scalar strings without an indentation indicator.
470 | 
471 | # Installation
472 | 
473 | Go to the project's [latest release], select the correct binary for your system,
474 | and download it.
475 | See below for how to select the correct one.
476 | Rename the downloaded binary to `mdslw` (or `mdslw.exe` on Windows) and move it
477 | to a location that is in your `$PATH` such as `/usr/local/bin` (will be
478 | different on Windows).
479 | Moving it there will likely require admin or `root` permissions, e.g. via
480 | `sudo`.
481 | On Unix systems, you also have to make the binary executable via the command
482 | `chmod +x mdslw`, pointing to the actual location of `mdslw`.
483 | From now on, you can simply type `mdslw` in your terminal to use it!
484 | 
485 | The naming of the release binaries uses the [llvm target triple].
486 | You can also use the following list to pick the correct binary for your machine:
487 | 
488 | - `mdslw_x86_64-unknown-linux-musl`:
489 |   Linux desktop or laptop using 64-bit x86-compatible CPUs
490 | - `mdslw_armv7-unknown-linux-gnueabihf`:
491 |   RaspberryPi or similar single-board computers using ARMv7-compatible CPUs
492 | - `mdslw_x86_64-pc-windows-gnu.exe`:
493 |   Windows desktop or laptop using 64-bit x86-compatible CPUs
494 | - `mdslw_aarch64-apple-darwin`:
495 |   Mac using M1, M2, or other Mx CPUs based on Apple silicon, i.e. the new ones
496 |   after the [transition from Intel CPUs][apple-architecture-transition-arm]
497 | - `mdslw_x86_64-apple-darwin`:
498 |   Mac using 64-bit x86-compatible CPUs, i.e. the old ones after the
499 |   [transition from the PowerPC architecture][apple-architecture-transition-ppc]
500 | 
501 | ## Building From Source
502 | 
503 | First, install rust, including `cargo`, via [rustup].
504 | Once you have `cargo`, execute the following command in a terminal:
505 | 
506 | ```bash
507 | cargo install --git https://github.com/razziel89/mdslw --locked
508 | ```
509 | 
510 | # Editor Integration
511 | 
512 | Contributions describing integrations with more editors are welcome!
513 | 
514 | ## neovim
515 | 
516 | The recommended way of integrating `mdslw` with neovim is through
517 | [conform.nvim].
518 | Simply install the plugin and modify your `init.vim` like this to add `mdslw` as
519 | a formatter for the markdown file type:
520 | 
521 | ```lua
522 | require("conform").setup({
523 |   formatters_by_ft = {
524 |     markdown = { "mdslw" },
525 |   },
526 |   formatters = {
527 |     mdslw = { prepend_args = { "--stdin-filepath", "$FILENAME" } },
528 |   },
529 | })
530 | ```
531 | 
532 | Alternatively, you can also use the vim-like integration shown below.
533 | 
534 | ## vim
535 | 
536 | Add the following to your `~/.vimrc` to have your editor auto-format every `.md`
537 | document before writing it out:
538 | 
539 | ```vim
540 | function MdFormat()
541 |   if executable("mdslw")
542 |     set lazyredraw
543 |     " Enter and exit insert mode to keep track
544 |     " of the cursor position, useful when undoing.
545 |     execute "normal! ii\<BS>"
546 |     let cursor_pos = getpos(".")
547 |     %!mdslw --stdin-filepath "%"
548 |     if v:shell_error != 0
549 |       u
550 |     endif
551 |     call setpos('.', cursor_pos)
552 |     set nolazyredraw
553 |   endif
554 | endfunction
555 | 
556 | autocmd BufWritePre *.md silent! :call MdFormat()
557 | ```
558 | 
559 | ## VS Code
560 | 
561 | Assuming you have `mdslw` installed and in your `PATH`, you can integrate it
562 | with VS Code.
563 | To do so, install the extension [run on save] and add the following snippet to
564 | your `settings.json`:
565 | 
566 | ```json
567 | {
568 |   "emeraldwalk.runonsave": {
569 |     "commands": [
570 |       {
571 |         "match": ".*\\.md$",
572 |         "cmd": "mdslw '${file}'"
573 |       }
574 |     ]
575 |   }
576 | }
577 | ```
578 | 
579 | From now on, every time you save to an existing markdown file, `mdslw` will
580 | auto-format it.
581 | This snippet assumes an empty `settings.json` file.
582 | If yours is not empty, you will have to merge it with the existing one.
583 | 
584 | # Tips And Tricks
585 | 
586 | ## Non-Breaking Spaces
587 | 
588 | The following codepoints are recognised as [non-breaking spaces] by default:
589 | 
590 | - U+00A0
591 | - U+2007
592 | - U+202F
593 | - U+2060
594 | - U+FEFF
595 | 
596 | How to insert [non-breaking spaces] depends on your operating system as well as
597 | your editor.
598 | The below will cover the non-breaking space U+00A0.
599 | 
600 | **vim/neovim**
601 | 
602 | Adding this to your `~/.vimrc` or `init.vim` will let you insert non-breaking
603 | spaces when pressing CTRL+s in insert mode and also show them as `+`:
604 | 
605 | ```vim
606 | " Make it easy to insert non-breaking spaces and show them by default.
607 | set list listchars+=nbsp:+
608 | inoremap <C-s> <C-k>NS
609 | " Alternatively, you can use this if your neovim/vim does not support this
610 | " digraph. Note that your browser might not copy the non-breaking space at the
611 | " end of the following line correctly.
612 | inoremap <C-s>  
613 | ```
614 | 
615 | ❗Tips for how to add and show non-breaking spaces in other editors are welcome.
616 | 
617 | ## Disabling Auto-Formatting
618 | 
619 | You can tell `mdslw` to stop auto-formatting parts of your document.
620 | Everything between the HTML comments `<!-- mdslw-ignore-start -->` and
621 | `<!-- mdslw-ignore-end -->` will not be formatted.
622 | For convenience, `mdslw` also recognises `prettier`'s range ignore directives
623 | `<!-- prettier-ignore-start -->` and `<!-- prettier-ignore-end -->`.
624 | 
625 | In addition, [non-breaking spaces](#non-breaking-spaces) can be used to prevent
626 | modifications to your documents.
627 | Replacing a space by a non-breaking space prevents `mdslw` from adding a line
628 | break at that position.
629 | Furthermore, preceding a line break by a non-breaking space prevents `mdslw`
630 | from removing the line break.
631 | 
632 | # How To Contribute
633 | 
634 | If you have found a bug and want to fix it, please simply go ahead and fork the
635 | repository, fix the bug, and open a pull request to this repository!
636 | Bug fixes are always welcome.
637 | 
638 | In all other cases, please open an issue on GitHub first to discuss the
639 | contribution.
640 | The feature you would like to introduce might already be in development.
641 | Please also take note of [the intended scope](#about-markdown-extensions) of
642 | `mdslw`.
643 | 
644 | # Licence
645 | 
646 | [GPLv3]
647 | 
648 | If you want to use this piece of software under a different, more permissive
649 | open-source licence, please contact me.
650 | I am very open to discussing this point.
651 | 
652 | <!-- link-category: dependencies -->
653 | 
654 | [GPLv3]: ./LICENCE
655 | [ignore crate]: https://docs.rs/ignore/latest/ignore/
656 | [ignore defaults]: https://docs.rs/ignore/latest/ignore/struct.WalkBuilder.html#method.standard_filters
657 | 
658 | <!-- link-category: diff algorithms -->
659 | 
660 | [lcs algorithm]: https://docs.rs/similar/latest/similar/algorithms/lcs/index.html
661 | [myers algorithm]: https://docs.rs/similar/latest/similar/algorithms/myers/index.html
662 | [patience algorithm]: https://docs.rs/similar/latest/similar/algorithms/patience/index.html
663 | 
664 | <!-- link-category: diff pagers -->
665 | 
666 | [bat]: https://github.com/sharkdp/bat
667 | [delta]: https://github.com/dandavison/delta
668 | [diff-so-fancy]: https://github.com/so-fancy/diff-so-fancy
669 | 
670 | <!-- link-category: editor integrations -->
671 | 
672 | [conform.nvim]: https://github.com/stevearc/conform.nvim
673 | [run on save]: https://marketplace.visualstudio.com/items?itemName=emeraldwalk.RunOnSave
674 | 
675 | <!-- link-category: external docs -->
676 | 
677 | [non-breaking spaces]: https://en.wikipedia.org/wiki/Non-breaking_space
678 | [unicode]: https://github.com/unicode-org/cldr-json/tree/main/cldr-json/cldr-segments-full/segments
679 | [yaml-block-scalars]: https://yaml-multiline.info/
680 | 
681 | <!-- link-category: installation -->
682 | 
683 | [apple-architecture-transition-arm]: https://en.wikipedia.org/wiki/Mac_transition_to_Apple_Silicon
684 | [apple-architecture-transition-ppc]: https://en.wikipedia.org/wiki/Mac_transition_to_Intel_processors
685 | [latest release]: https://github.com/razziel89/mdslw/releases/latest
686 | [llvm target triple]: https://clang.llvm.org/docs/CrossCompilation.html#target-triple
687 | [rustup]: https://rustup.rs/
688 | 


--------------------------------------------------------------------------------
/src/parse.rs:
--------------------------------------------------------------------------------
  1 | /* An opinionated line wrapper for markdown files.
  2 | Copyright (C) 2023  Torsten Long
  3 | 
  4 | This program is free software: you can redistribute it and/or modify
  5 | it under the terms of the GNU General Public License as published by
  6 | the Free Software Foundation, either version 3 of the License, or
  7 | (at your option) any later version.
  8 | 
  9 | This program is distributed in the hope that it will be useful,
 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | GNU General Public License for more details.
 13 | 
 14 | You should have received a copy of the GNU General Public License
 15 | along with this program.  If not, see <https://www.gnu.org/licenses/>.
 16 | */
 17 | 
 18 | use core::ops::Range;
 19 | use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
 20 | use std::collections::HashMap;
 21 | use std::fmt::Write;
 22 | 
 23 | use crate::detect::WhitespaceDetector;
 24 | use crate::ignore::IgnoreByHtmlComment;
 25 | use crate::indent::build_indent;
 26 | use crate::trace_log;
 27 | 
 28 | const YAML_CONFIG_KEY: &str = "mdslw-toml";
 29 | const YAML_CONFIG_KEY_WITH_COLON: &str = "mdslw-toml:";
 30 | 
 31 | /// CharRange describes a range of characters in a document.
 32 | pub type CharRange = Range<usize>;
 33 | 
 34 | #[derive(Debug, PartialEq)]
 35 | pub struct ParseCfg {
 36 |     pub keep_linebreaks: bool,
 37 | }
 38 | 
 39 | /// Determine ranges of characters that shall later be wrapped and have their indents fixed.
 40 | pub fn parse_markdown(text: &str, parse_cfg: &ParseCfg) -> Vec<CharRange> {
 41 |     // Enable some options by default to support parsing common kinds of documents.
 42 |     let mut opts = Options::empty();
 43 |     // If we do not want to modify some elements, we detect them with the parser and consider them
 44 |     // as verbatim in the function "to_be_wrapped".
 45 |     log::debug!("detecting tables");
 46 |     opts.insert(Options::ENABLE_TABLES);
 47 |     log::debug!("detecting definition lists");
 48 |     opts.insert(Options::ENABLE_DEFINITION_LIST);
 49 |     // Do not enable other options:
 50 |     // opts.insert(Options::ENABLE_FOOTNOTES);
 51 |     // opts.insert(Options::ENABLE_TASKLISTS);
 52 |     // opts.insert(Options::ENABLE_HEADING_ATTRIBUTES);
 53 |     // opts.insert(Options::ENABLE_SMART_PUNCTUATION);
 54 |     // opts.insert(Options::ENABLE_STRIKETHROUGH);
 55 |     let events_and_ranges = Parser::new_ext(text, opts)
 56 |         .into_offset_iter()
 57 |         .inspect(|(event, range)| {
 58 |             trace_log!("parsed [{}, {}): {:?}", range.start, range.end, event)
 59 |         })
 60 |         .collect::<Vec<_>>();
 61 |     let whitespaces = whitespace_indices(text, &WhitespaceDetector::new(parse_cfg.keep_linebreaks));
 62 | 
 63 |     merge_ranges(to_be_wrapped(events_and_ranges, &whitespaces), &whitespaces)
 64 | }
 65 | 
 66 | /// Filter out those ranges of text that shall be wrapped. See comments in the function for
 67 | /// what sections are handled in which way.
 68 | fn to_be_wrapped(
 69 |     events: Vec<(Event, CharRange)>,
 70 |     whitespaces: &HashMap<usize, char>,
 71 | ) -> Vec<CharRange> {
 72 |     let mut verbatim_level: usize = 0;
 73 |     let mut ignore = IgnoreByHtmlComment::new();
 74 | 
 75 |     events
 76 |         .into_iter()
 77 |         // Mark every range that is between two ignore directives as verbatim by filtering it out.
 78 |         .filter(|(event, _range)| {
 79 |             if let Event::Html(s) = event {
 80 |                 ignore.process_html(s)
 81 |             }
 82 |             !ignore.should_be_ignored()
 83 |         })
 84 |         .filter(|(event, range)| match event {
 85 |             Event::Start(tag) => {
 86 |                 match tag {
 87 |                     // Most delimited blocks should stay as they are. Introducing line breaks would
 88 |                     // cause problems here.
 89 |                     Tag::BlockQuote(..)
 90 |                     | Tag::CodeBlock(..)
 91 |                     | Tag::FootnoteDefinition(..)
 92 |                     | Tag::Heading { .. }
 93 |                     | Tag::Image { .. }
 94 |                     | Tag::Superscript
 95 |                     | Tag::Subscript
 96 |                     | Tag::Table(..)
 97 |                     | Tag::TableCell
 98 |                     | Tag::TableHead
 99 |                     | Tag::TableRow => {
100 |                         verbatim_level += 1;
101 |                         false
102 |                     }
103 |                     // In case of some blocks, we do not want to extract the text contained inside
104 |                     // them but keep everything the block encompasses.
105 |                     Tag::Emphasis | Tag::Link { .. } | Tag::Strikethrough | Tag::Strong => {
106 |                         verbatim_level += 1;
107 |                         true
108 |                     }
109 |                     // Other delimited blocks can be both, inside a verbatim block or inside text.
110 |                     // However, the text they embrace is the important bit but we do not want to
111 |                     // extract the entire range.
112 |                     Tag::Item
113 |                     | Tag::List(..)
114 |                     | Tag::Paragraph
115 |                     | Tag::MetadataBlock(..)
116 |                     | Tag::DefinitionList
117 |                     | Tag::DefinitionListTitle
118 |                     | Tag::DefinitionListDefinition => false,
119 | 
120 |                     // See below for why HTML blocks are treated like this.
121 |                     Tag::HtmlBlock => !range
122 |                         .clone()
123 |                         .filter_map(|el| whitespaces.get(&el))
124 |                         .any(|el| el == &'\n'),
125 |                 }
126 |             }
127 | 
128 |             Event::End(tag) => {
129 |                 match tag {
130 |                     // Kept as they were.
131 |                     TagEnd::BlockQuote(..)
132 |                     | TagEnd::CodeBlock
133 |                     | TagEnd::FootnoteDefinition
134 |                     | TagEnd::Heading(..)
135 |                     | TagEnd::Superscript
136 |                     | TagEnd::Subscript
137 |                     | TagEnd::Image
138 |                     | TagEnd::Table
139 |                     | TagEnd::TableCell
140 |                     | TagEnd::TableHead
141 |                     | TagEnd::TableRow => {
142 |                         verbatim_level = verbatim_level
143 |                             .checked_sub(1)
144 |                             .expect("tags should be balanced");
145 |                         false
146 |                     }
147 |                     // Should be wrapped but text not extracted.
148 |                     TagEnd::Emphasis | TagEnd::Link | TagEnd::Strikethrough | TagEnd::Strong => {
149 |                         verbatim_level = verbatim_level
150 |                             .checked_sub(1)
151 |                             .expect("tags should be balanced");
152 |                         false
153 |                     }
154 | 
155 |                     // Can be anything.
156 |                     TagEnd::Item
157 |                     | TagEnd::List(..)
158 |                     | TagEnd::DefinitionList
159 |                     | TagEnd::DefinitionListTitle
160 |                     | TagEnd::DefinitionListDefinition
161 |                     | TagEnd::Paragraph
162 |                     | TagEnd::HtmlBlock
163 |                     | TagEnd::MetadataBlock(..) => false,
164 |                 }
165 |             }
166 | 
167 |             // More elements that are not blocks and that should be taken verbatim.
168 |             Event::TaskListMarker(..) | Event::FootnoteReference(..) | Event::Rule => false,
169 | 
170 |             // We do not support detecting math so far as we do not intend to modify match in any
171 |             // way. That is, we treat it as any other text and don't have the parser detect math
172 |             // specifically.
173 |             Event::InlineMath(..) | Event::DisplayMath(..) => false,
174 | 
175 |             // Allow editing HTML only if it is inline, i.e. if the range containing the HTML
176 |             // contains no whitespace. Treat it like text in that case.
177 |             Event::Html(..) | Event::InlineHtml(..) => !range
178 |                 .clone()
179 |                 .filter_map(|el| whitespaces.get(&el))
180 |                 .any(|el| el == &'\n'),
181 | 
182 |             // The following should be wrapped if they are not inside a verbatim block. Note that
183 |             // that also includes blocks that are extracted in their enirey (e.g. links). In the
184 |             // context of text contained within, they cound as verbatim blocks, too.
185 |             Event::SoftBreak | Event::HardBreak | Event::Text(..) | Event::Code(..) => {
186 |                 verbatim_level == 0
187 |             }
188 |         })
189 |         .map(|(_event, range)| range)
190 |         .collect::<Vec<_>>()
191 | }
192 | 
193 | #[derive(Debug)]
194 | enum RangeMatch<'a> {
195 |     Matches((usize, &'a str)),
196 |     NoMatch(&'a str),
197 | }
198 | 
199 | pub struct BlockQuotes<'a>(Vec<RangeMatch<'a>>);
200 | 
201 | impl<'a> BlockQuotes<'a> {
202 |     pub const FULL_PREFIX: &'static str = "> ";
203 |     pub const FULL_PREFIX_LEN: usize = Self::FULL_PREFIX.len();
204 |     pub const SHORT_PREFIX: &'static str = ">";
205 | 
206 |     fn strip_prefix(text: &str, indent: usize) -> String {
207 |         // The first line does start with the actual prefix, while the other lines start with a
208 |         // number of other characters. Thus, we strip the off for all but the first line.
209 |         text.split_inclusive('\n')
210 |             .enumerate()
211 |             .map(|(idx, t)| {
212 |                 let t = if idx == 0 { t } else { &t[indent..t.len()] };
213 |                 t.strip_prefix(Self::SHORT_PREFIX)
214 |                     .map(|el| el.strip_prefix(' ').unwrap_or(el))
215 |                     .unwrap_or(t)
216 |             })
217 |             .collect::<String>()
218 |     }
219 | 
220 |     fn add_prefix(text: String, indent: usize) -> String {
221 |         let indent = build_indent(indent);
222 |         // The "write!" calls should never fail since we write to a String that we create here.
223 |         let mut result = String::new();
224 |         text.split_inclusive('\n')
225 |             .enumerate()
226 |             .for_each(|(idx, line)| {
227 |                 let prefix = if line.len() == 1 {
228 |                     Self::SHORT_PREFIX
229 |                 } else {
230 |                     Self::FULL_PREFIX
231 |                 };
232 |                 // The first line is already correctly indented. For the other lines, we have to add
233 |                 // the indent.
234 |                 let ind = if idx == 0 { "" } else { &indent };
235 |                 write!(result, "{}{}{}", ind, prefix, line)
236 |                     .expect("building block-quote formated result");
237 |             });
238 |         result
239 |     }
240 | 
241 |     fn indents(text: &str) -> Vec<usize> {
242 |         text.split_inclusive('\n')
243 |             .flat_map(|line| 0..line.len())
244 |             .collect::<Vec<_>>()
245 |     }
246 | 
247 |     pub fn new(text: &'a str) -> Self {
248 |         let mut level: usize = 0;
249 |         // In case we ever need to iterate over other kinds of syntax, the tag as well as the
250 |         // function stripping prefixes will have to be adjusted.
251 | 
252 |         let indents = Self::indents(text);
253 |         let mut start = 0;
254 | 
255 |         let mut ranges = Parser::new(text)
256 |             .into_offset_iter()
257 |             .filter_map(|(event, range)| match event {
258 |                 Event::Start(start) => {
259 |                     if matches!(start, Tag::BlockQuote(..)) {
260 |                         level += 1;
261 |                     }
262 |                     if level == 1 && matches!(start, Tag::BlockQuote(..)) {
263 |                         // Using a CharRange here to prevent the flat_map below from flattening
264 |                         // all the ranges, since Range<usize> supports flattening but our
265 |                         // CharRange does not.
266 |                         Some(CharRange {
267 |                             start: range.start,
268 |                             end: range.end,
269 |                         })
270 |                     } else {
271 |                         None
272 |                     }
273 |                 }
274 |                 Event::End(end) => {
275 |                     if matches!(end, TagEnd::BlockQuote(..)) {
276 |                         level -= 1;
277 |                     }
278 |                     None
279 |                 }
280 |                 _ => None,
281 |             })
282 |             .flat_map(|range| {
283 |                 let prev_start = start;
284 |                 let this_start = range.start;
285 |                 start = range.end;
286 | 
287 |                 let this = RangeMatch::Matches((indents[this_start], &text[range]));
288 |                 if this_start == prev_start {
289 |                     vec![this]
290 |                 } else {
291 |                     let missing = RangeMatch::NoMatch(&text[prev_start..this_start]);
292 |                     vec![missing, this]
293 |                 }
294 |             })
295 |             .collect::<Vec<_>>();
296 | 
297 |         if start != text.len() {
298 |             ranges.push(RangeMatch::NoMatch(&text[start..text.len()]));
299 |         }
300 | 
301 |         Self(ranges)
302 |     }
303 | 
304 |     /// The argument `func` should keep a line break at the end if its arguments ends in one. In
305 |     /// most cases, it ends in a line break.
306 |     pub fn apply_to_matches_and_join<MapFn>(self, func: MapFn) -> String
307 |     where
308 |         MapFn: Fn(String, usize) -> String,
309 |     {
310 |         self.0
311 |             .into_iter()
312 |             .map(|el| match el {
313 |                 RangeMatch::NoMatch(s) => s.to_string(),
314 |                 RangeMatch::Matches(s) => Self::add_prefix(
315 |                     func(Self::strip_prefix(s.1, s.0), s.0 + Self::FULL_PREFIX_LEN),
316 |                     s.0,
317 |                 ),
318 |             })
319 |             .collect::<String>()
320 |     }
321 | }
322 | 
323 | /// Check whether there is nothing but whitespace between the end of the previous range and the
324 | /// start of the next one, if the ranges do not connect directly anyway. Note that we still keep
325 | /// paragraphs separated by keeping ranges separate that are separated by more linebreaks than one.
326 | fn merge_ranges(ranges: Vec<CharRange>, whitespaces: &HashMap<usize, char>) -> Vec<CharRange> {
327 |     let mut next_range: Option<CharRange> = None;
328 |     let mut merged = vec![];
329 | 
330 |     for range in ranges {
331 |         if let Some(next) = next_range {
332 |             let contains_just_whitespace =
333 |                 (next.end..range.start).all(|el| whitespaces.contains_key(&el));
334 |             let at_most_one_linebreak = (next.end..range.start)
335 |                 .filter(|el| Some(&'\n') == whitespaces.get(el))
336 |                 .count()
337 |                 <= 1;
338 |             let is_contained = range.start >= next.start && range.end <= next.end;
339 | 
340 |             if is_contained {
341 |                 // Skip the range if it is already included.
342 |                 next_range = Some(next);
343 |             } else if contains_just_whitespace && at_most_one_linebreak {
344 |                 // Extend the range.
345 |                 next_range = Some(CharRange {
346 |                     start: next.start,
347 |                     end: range.end,
348 |                 });
349 |             } else {
350 |                 // Remember the range and continue extending.
351 |                 merged.push(next);
352 |                 next_range = Some(range);
353 |             }
354 |         } else {
355 |             next_range = Some(range);
356 |         }
357 |     }
358 | 
359 |     // Treat the last range that may be left.
360 |     if let Some(next) = next_range {
361 |         merged.push(next)
362 |     }
363 | 
364 |     // Remove ranges that contain at most 1 character. They never have to be wrapped.
365 |     let removed = merged
366 |         .into_iter()
367 |         .filter(|el| el.len() > 1)
368 |         .collect::<Vec<_>>();
369 | 
370 |     trace_log!(
371 |         "formattable byte ranges: {}",
372 |         removed
373 |             .iter()
374 |             .map(|range| format!("[{},{})", range.start, range.end))
375 |             .collect::<Vec<_>>()
376 |             .join(" ")
377 |     );
378 | 
379 |     removed
380 | }
381 | 
382 | /// Get all indices that point to whitespace as well as the characters they point to.
383 | fn whitespace_indices(text: &str, detector: &WhitespaceDetector) -> HashMap<usize, char> {
384 |     text.char_indices()
385 |         .filter_map(|(pos, ch)| {
386 |             if detector.is_whitespace(&ch) {
387 |                 Some((pos, ch))
388 |             } else {
389 |                 None
390 |             }
391 |         })
392 |         .collect::<HashMap<_, _>>()
393 | }
394 | 
395 | enum YAMLBlockStartLineType {
396 |     Pipe,
397 |     Angle,
398 |     None,
399 | }
400 | 
401 | impl YAMLBlockStartLineType {
402 |     fn is_actual_start_line(&self) -> bool {
403 |         !matches!(self, Self::None)
404 |     }
405 | }
406 | 
407 | /// Parse a YAML text without an external dependency. We interpret text as being a single YAML
408 | /// document. We search until we find a line starting with the given key. We return everything that
409 | /// is at the same indentation as the line following the key.
410 | pub fn get_value_for_mdslw_toml_yaml_key(text: &str) -> String {
411 |     trace_log!(
412 |         "extracting value for key {} from yaml: {}",
413 |         YAML_CONFIG_KEY,
414 |         text.replace("\n", "\\n")
415 |     );
416 |     let start_line_type = |line: &str| {
417 |         // Only perform the split by words if we can be reasonably sure that this might be the
418 |         // correct line, i.e. one that starts with the key that we expect.
419 |         if !line.starts_with(YAML_CONFIG_KEY) {
420 |             return YAMLBlockStartLineType::None;
421 |         }
422 |         let split = line.split_whitespace().collect::<Vec<&str>>();
423 |         let first_word = split
424 |             .first()
425 |             .expect("Internal error, there should have been a first word.");
426 |         if first_word == &YAML_CONFIG_KEY {
427 |             match split[1..] {
428 |                 [":", "|"] | [":", "|-"] | [":", "|+"] => YAMLBlockStartLineType::Pipe,
429 |                 [":", ">"] | [":", ">-"] | [":", ">+"] => YAMLBlockStartLineType::Angle,
430 |                 _ => YAMLBlockStartLineType::None,
431 |             }
432 |         } else if first_word == &YAML_CONFIG_KEY_WITH_COLON {
433 |             match split[1..] {
434 |                 ["|"] | ["|-"] | ["|+"] => YAMLBlockStartLineType::Pipe,
435 |                 [">"] | [">-"] | [">+"] => YAMLBlockStartLineType::Angle,
436 |                 _ => YAMLBlockStartLineType::None,
437 |             }
438 |         } else {
439 |             YAMLBlockStartLineType::None
440 |         }
441 |     };
442 |     // We skip everything until the first line that we expect, including that first line. We end up
443 |     // either with an empty iterator or an iterator whose first element is the first value line.
444 |     let mut skipped = text
445 |         .lines()
446 |         .skip_while(|line| !start_line_type(line).is_actual_start_line());
447 |     let block_type = if let Some(line) = skipped.next() {
448 |         start_line_type(line)
449 |     } else {
450 |         YAMLBlockStartLineType::None
451 |     };
452 |     let mut peekable = skipped.skip_while(|line| line.is_empty()).peekable();
453 |     let first_line = peekable.peek();
454 |     // Check whether we have a value line or not.
455 |     if let Some(line) = first_line {
456 |         // We check whether the first value line is indented. If so, we remember the indent since
457 |         // every following value line has to have the exact same indent.
458 |         let first_indent = line.len() - line.trim_start().len();
459 |         if first_indent > 0 {
460 |             let result = peekable
461 |                 .take_while(|line| {
462 |                     line.is_empty() || line.len() - line.trim_start().len() == first_indent
463 |                 })
464 |                 .map(|line| line.trim())
465 |                 .collect::<Vec<&str>>()
466 |                 .join("\n");
467 |             log::info!(
468 |                 "found value for key {} from yaml:\n{}",
469 |                 YAML_CONFIG_KEY,
470 |                 result
471 |             );
472 |             match block_type {
473 |                 YAMLBlockStartLineType::Pipe => result,
474 |                 YAMLBlockStartLineType::Angle => result
475 |                     .split("\n\n")
476 |                     .map(|line| line.replace("\n", " "))
477 |                     .collect::<Vec<_>>()
478 |                     .join("\n"),
479 |                 YAMLBlockStartLineType::None => String::new(),
480 |             }
481 |         } else {
482 |             log::info!("no value line found");
483 |             String::new()
484 |         }
485 |     } else {
486 |         log::info!("key {} not found", YAML_CONFIG_KEY);
487 |         String::new()
488 |     }
489 | }
490 | 
491 | #[cfg(test)]
492 | mod test {
493 |     use super::*;
494 | 
495 |     #[test]
496 |     fn detect_whitespace() {
497 |         let text = "some test with witespace at 	some\nlocations";
498 |         let detected = whitespace_indices(text, &WhitespaceDetector::default());
499 |         let expected = vec![
500 |             (4, ' '),
501 |             (9, ' '),
502 |             (14, ' '),
503 |             (24, ' '),
504 |             (27, ' '),
505 |             (28, '\t'),
506 |             (33, '\n'),
507 |         ]
508 |         .into_iter()
509 |         .collect::<HashMap<_, _>>();
510 | 
511 |         assert_eq!(expected, detected);
512 |     }
513 | 
514 |     #[test]
515 |     fn merging_ranges() {
516 |         let ranges = vec![
517 |             CharRange { start: 0, end: 4 },
518 |             CharRange { start: 5, end: 9 },
519 |             CharRange { start: 11, end: 15 },
520 |             CharRange { start: 11, end: 14 },
521 |             CharRange { start: 16, end: 19 },
522 |             CharRange { start: 23, end: 36 },
523 |         ];
524 |         let whitespace = whitespace_indices(
525 |             "some text\n\nmore text | even more text",
526 |             &WhitespaceDetector::default(),
527 |         );
528 | 
529 |         let merged = merge_ranges(ranges, &whitespace);
530 | 
531 |         let expected = vec![
532 |             CharRange { start: 0, end: 9 },
533 |             CharRange { start: 11, end: 19 },
534 |             CharRange { start: 23, end: 36 },
535 |         ];
536 | 
537 |         assert_eq!(expected, merged);
538 |     }
539 | 
540 |     #[test]
541 |     fn parsing_markdown() {
542 |         let text = r#"
543 | ## Some Heading
544 | 
545 | Some text.
546 | 
547 | <!-- some html -->
548 | 
549 | - More text.
550 | - More text.
551 |   - Even more text.
552 |   - Some text with a [link].
553 | 
554 | ```code
555 | some code
556 | ```
557 | 
558 | [link]: https://something.com "some link"
559 | "#;
560 |         let cfg = ParseCfg {
561 |             keep_linebreaks: false,
562 |         };
563 |         let parsed = parse_markdown(text, &cfg);
564 | 
565 |         // [18..28, 52..62, 65..75, 80..95, 100..124]
566 |         let expected = vec![
567 |             CharRange { start: 18, end: 28 },
568 |             CharRange { start: 52, end: 62 },
569 |             CharRange { start: 65, end: 75 },
570 |             CharRange { start: 80, end: 95 },
571 |             CharRange {
572 |                 start: 100,
573 |                 end: 124,
574 |             },
575 |         ];
576 | 
577 |         assert_eq!(expected, parsed);
578 |     }
579 | 
580 |     #[test]
581 |     fn applying_to_no_block_quotes_remains_unchanged() {
582 |         let text = r#"
583 | ## Some Heading
584 | 
585 | Some text without block quotes.
586 | 
587 | <!-- some html -->
588 | 
589 | - More text.
590 | - More text.
591 |   - Even more text.
592 |   - Some text with a [link].
593 | 
594 | ```code
595 | some code
596 | ```
597 | 
598 | [link]: https://something.com "some link"
599 | "#;
600 | 
601 |         let unchanged = BlockQuotes::new(text).apply_to_matches_and_join(|_, _| String::new());
602 |         assert_eq!(text.to_string(), unchanged);
603 |     }
604 | 
605 |     #[test]
606 |     fn applying_to_block_quotes() {
607 |         let text = r#"
608 | ## Some Heading
609 | 
610 | Some text with block quotes.
611 | 
612 | > This first text is block quoted.
613 | >
614 | >> This text is quoted at the second level.
615 | >
616 | > Some more quotes at the first level.
617 | 
618 | <!-- some html -->
619 | 
620 | - More text.
621 | - More text.
622 |   - Even more text.
623 |   - Some text with a [link].
624 | 
625 | > This second text is also block quoted.
626 | >
627 | > > This text is quoted at the second level.
628 | >
629 | > Some more quotes at the first level.
630 | 
631 | - Some text.
632 | 
633 |   > This third text is block quoted but inside an itemization.
634 |   >
635 |   >> This text is quoted at the second level.
636 |   >
637 |   > Some more quotes at the first level.
638 | 
639 |   More text.
640 | 
641 | [link]: https://something.com "some link"
642 | "#;
643 | 
644 |         let expected = r#"
645 | ## Some Heading
646 | 
647 | Some text with block quotes.
648 | 
649 | > 2:115
650 | > 2:115
651 | > 2:115
652 | 
653 | <!-- some html -->
654 | 
655 | - More text.
656 | - More text.
657 |   - Even more text.
658 |   - Some text with a [link].
659 | 
660 | > 2:121
661 | > 2:121
662 | > 2:121
663 | 
664 | - Some text.
665 | 
666 |   > 4:141
667 |   > 4:141
668 |   > 4:141
669 | 
670 |   More text.
671 | 
672 | [link]: https://something.com "some link"
673 | "#;
674 | 
675 |         let changed = BlockQuotes::new(text).apply_to_matches_and_join(|s, i| {
676 |             format!("{}:{}\n{}:{}\n{}:{}\n", i, s.len(), i, s.len(), i, s.len())
677 |         });
678 |         assert_eq!(expected, changed);
679 |     }
680 | 
681 |     #[test]
682 |     fn flattening_vecs_of_char_ranges_retains_ranges() {
683 |         let to_be_flattened = vec![
684 |             vec![CharRange { start: 0, end: 10 }],
685 |             vec![
686 |                 CharRange {
687 |                     start: 100,
688 |                     end: 110,
689 |                 },
690 |                 CharRange {
691 |                     start: 200,
692 |                     end: 210,
693 |                 },
694 |             ],
695 |         ];
696 |         let flat = to_be_flattened.into_iter().flatten().collect::<Vec<_>>();
697 |         let expected = vec![(0..10), (100..110), (200..210)];
698 |         assert_eq!(expected, flat);
699 |     }
700 | 
701 |     fn build_yaml(
702 |         key: &str,
703 |         space_before_colon: bool,
704 |         block_marker: &str,
705 |         indent_spaces: usize,
706 |         content: &str,
707 |     ) -> String {
708 |         let indent = (0..indent_spaces).map(|_| " ").collect::<String>();
709 |         let indented = content
710 |             .lines()
711 |             .map(|line| format!("{}{}\n", indent, line))
712 |             .collect::<String>();
713 |         let maybe_space = if space_before_colon { " " } else { "" };
714 |         let result = format!("{}{}: {}\n{}", key, maybe_space, block_marker, indented);
715 |         // Ensure that values were filled in.
716 |         assert_ne!(result, String::from(": \n"));
717 |         result
718 |     }
719 | 
720 |     const YAML_BASE_CONTENT: &str = r#"
721 | some content with an empty line
722 | 
723 | at the beginning and in the middle"#;
724 | 
725 |     #[test]
726 |     fn building_yaml() {
727 |         let yaml = build_yaml(YAML_CONFIG_KEY, true, "|", 4, YAML_BASE_CONTENT);
728 |         let expected = r#"mdslw-toml : |
729 |     
730 |     some content with an empty line
731 |     
732 |     at the beginning and in the middle
733 | "#;
734 |         assert_eq!(yaml, expected);
735 |     }
736 | 
737 |     #[test]
738 |     fn extracting_yaml_string_pipe_block_markers() {
739 |         for has_space in [true, false] {
740 |             for marker in ["|", "|-", "|+"] {
741 |                 let yaml = build_yaml(YAML_CONFIG_KEY, has_space, marker, 4, YAML_BASE_CONTENT);
742 |                 let extracted = get_value_for_mdslw_toml_yaml_key(&yaml);
743 |                 assert_eq!(extracted, YAML_BASE_CONTENT);
744 |             }
745 |         }
746 |     }
747 | 
748 |     #[test]
749 |     fn extracting_yaml_string_angle_block_markers() {
750 |         let expected = r#" some content with an empty line
751 | at the beginning and in the middle"#;
752 |         for has_space in [true, false] {
753 |             for marker in [">", ">-", ">+"] {
754 |                 let yaml = build_yaml(YAML_CONFIG_KEY, has_space, marker, 4, YAML_BASE_CONTENT);
755 |                 let extracted = get_value_for_mdslw_toml_yaml_key(&yaml);
756 |                 assert_eq!(extracted, expected);
757 |             }
758 |         }
759 |     }
760 | 
761 |     #[test]
762 |     fn extracting_yaml_string_pipe_block_markers_wrong_key() {
763 |         let key = "some-other-key";
764 |         assert_ne!(key, YAML_CONFIG_KEY);
765 |         for has_space in [true, false] {
766 |             for marker in ["|", "|-", "|+"] {
767 |                 let yaml = build_yaml(key, has_space, marker, 4, YAML_BASE_CONTENT);
768 |                 let extracted = get_value_for_mdslw_toml_yaml_key(&yaml);
769 |                 assert_eq!(extracted, String::new());
770 |             }
771 |         }
772 |     }
773 | 
774 |     #[test]
775 |     fn extracting_yaml_string_angle_block_markers_wrong_key() {
776 |         let key = "some-other-key";
777 |         assert_ne!(key, YAML_CONFIG_KEY);
778 |         for has_space in [true, false] {
779 |             for marker in [">", ">-", ">+"] {
780 |                 let yaml = build_yaml(key, has_space, marker, 4, YAML_BASE_CONTENT);
781 |                 let extracted = get_value_for_mdslw_toml_yaml_key(&yaml);
782 |                 assert_eq!(extracted, String::new());
783 |             }
784 |         }
785 |     }
786 | 
787 |     #[test]
788 |     fn extracting_yaml_string_empty_content() {
789 |         let key = "some-other-key";
790 |         for has_space in [true, false] {
791 |             for marker in ["|", "|-", "|+"] {
792 |                 let yaml = build_yaml(YAML_CONFIG_KEY, has_space, marker, 4, "")
793 |                     + build_yaml(key, has_space, marker, 4, "").as_str();
794 |                 let extracted = get_value_for_mdslw_toml_yaml_key(&yaml);
795 |                 assert_eq!(extracted, "");
796 |             }
797 |         }
798 |     }
799 | 
800 |     #[test]
801 |     fn malformed_yaml_file_does_not_break_extraction() {
802 |         let yaml = build_yaml(YAML_CONFIG_KEY, false, "|", 4, "does not matter\nat all");
803 |         let malformed = yaml.replace(": |", "");
804 |         let extracted = get_value_for_mdslw_toml_yaml_key(&malformed);
805 |         assert_eq!(extracted, "".to_string());
806 |     }
807 | 
808 |     #[test]
809 |     fn config_keys_are_identical() {
810 |         assert_eq!(
811 |             YAML_CONFIG_KEY.to_string() + ":",
812 |             YAML_CONFIG_KEY_WITH_COLON
813 |         );
814 |     }
815 | }
816 | 


--------------------------------------------------------------------------------
/LICENCE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU General Public License is a free, copyleft license for
 11 | software and other kinds of works.
 12 | 
 13 |   The licenses for most software and other practical works are designed
 14 | to take away your freedom to share and change the works.  By contrast,
 15 | the GNU General Public License is intended to guarantee your freedom to
 16 | share and change all versions of a program--to make sure it remains free
 17 | software for all its users.  We, the Free Software Foundation, use the
 18 | GNU General Public License for most of our software; it applies also to
 19 | any other work released this way by its authors.  You can apply it to
 20 | your programs, too.
 21 | 
 22 |   When we speak of free software, we are referring to freedom, not
 23 | price.  Our General Public Licenses are designed to make sure that you
 24 | have the freedom to distribute copies of free software (and charge for
 25 | them if you wish), that you receive source code or can get it if you
 26 | want it, that you can change the software or use pieces of it in new
 27 | free programs, and that you know you can do these things.
 28 | 
 29 |   To protect your rights, we need to prevent others from denying you
 30 | these rights or asking you to surrender the rights.  Therefore, you have
 31 | certain responsibilities if you distribute copies of the software, or if
 32 | you modify it: responsibilities to respect the freedom of others.
 33 | 
 34 |   For example, if you distribute copies of such a program, whether
 35 | gratis or for a fee, you must pass on to the recipients the same
 36 | freedoms that you received.  You must make sure that they, too, receive
 37 | or can get the source code.  And you must show them these terms so they
 38 | know their rights.
 39 | 
 40 |   Developers that use the GNU GPL protect your rights with two steps:
 41 | (1) assert copyright on the software, and (2) offer you this License
 42 | giving you legal permission to copy, distribute and/or modify it.
 43 | 
 44 |   For the developers' and authors' protection, the GPL clearly explains
 45 | that there is no warranty for this free software.  For both users' and
 46 | authors' sake, the GPL requires that modified versions be marked as
 47 | changed, so that their problems will not be attributed erroneously to
 48 | authors of previous versions.
 49 | 
 50 |   Some devices are designed to deny users access to install or run
 51 | modified versions of the software inside them, although the manufacturer
 52 | can do so.  This is fundamentally incompatible with the aim of
 53 | protecting users' freedom to change the software.  The systematic
 54 | pattern of such abuse occurs in the area of products for individuals to
 55 | use, which is precisely where it is most unacceptable.  Therefore, we
 56 | have designed this version of the GPL to prohibit the practice for those
 57 | products.  If such problems arise substantially in other domains, we
 58 | stand ready to extend this provision to those domains in future versions
 59 | of the GPL, as needed to protect the freedom of users.
 60 | 
 61 |   Finally, every program is threatened constantly by software patents.
 62 | States should not allow patents to restrict development and use of
 63 | software on general-purpose computers, but in those that do, we wish to
 64 | avoid the special danger that patents applied to a free program could
 65 | make it effectively proprietary.  To prevent this, the GPL assures that
 66 | patents cannot be used to render the program non-free.
 67 | 
 68 |   The precise terms and conditions for copying, distribution and
 69 | modification follow.
 70 | 
 71 |                        TERMS AND CONDITIONS
 72 | 
 73 |   0. Definitions.
 74 | 
 75 |   "This License" refers to version 3 of the GNU General Public License.
 76 | 
 77 |   "Copyright" also means copyright-like laws that apply to other kinds of
 78 | works, such as semiconductor masks.
 79 | 
 80 |   "The Program" refers to any copyrightable work licensed under this
 81 | License.  Each licensee is addressed as "you".  "Licensees" and
 82 | "recipients" may be individuals or organizations.
 83 | 
 84 |   To "modify" a work means to copy from or adapt all or part of the work
 85 | in a fashion requiring copyright permission, other than the making of an
 86 | exact copy.  The resulting work is called a "modified version" of the
 87 | earlier work or a work "based on" the earlier work.
 88 | 
 89 |   A "covered work" means either the unmodified Program or a work based
 90 | on the Program.
 91 | 
 92 |   To "propagate" a work means to do anything with it that, without
 93 | permission, would make you directly or secondarily liable for
 94 | infringement under applicable copyright law, except executing it on a
 95 | computer or modifying a private copy.  Propagation includes copying,
 96 | distribution (with or without modification), making available to the
 97 | public, and in some countries other activities as well.
 98 | 
 99 |   To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies.  Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 | 
103 |   An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License.  If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 | 
112 |   1. Source Code.
113 | 
114 |   The "source code" for a work means the preferred form of the work
115 | for making modifications to it.  "Object code" means any non-source
116 | form of a work.
117 | 
118 |   A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 | 
123 |   The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form.  A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 | 
134 |   The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities.  However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work.  For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 | 
147 |   The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 | 
151 |   The Corresponding Source for a work in source code form is that
152 | same work.
153 | 
154 |   2. Basic Permissions.
155 | 
156 |   All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met.  This License explicitly affirms your unlimited
159 | permission to run the unmodified Program.  The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work.  This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 | 
164 |   You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force.  You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright.  Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 | 
175 |   Conveying under any other circumstances is permitted solely under
176 | the conditions stated below.  Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 | 
179 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 | 
181 |   No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 | 
187 |   When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 | 
195 |   4. Conveying Verbatim Copies.
196 | 
197 |   You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 | 
205 |   You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 | 
208 |   5. Conveying Modified Source Versions.
209 | 
210 |   You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 | 
214 |     a) The work must carry prominent notices stating that you modified
215 |     it, and giving a relevant date.
216 | 
217 |     b) The work must carry prominent notices stating that it is
218 |     released under this License and any conditions added under section
219 |     7.  This requirement modifies the requirement in section 4 to
220 |     "keep intact all notices".
221 | 
222 |     c) You must license the entire work, as a whole, under this
223 |     License to anyone who comes into possession of a copy.  This
224 |     License will therefore apply, along with any applicable section 7
225 |     additional terms, to the whole of the work, and all its parts,
226 |     regardless of how they are packaged.  This License gives no
227 |     permission to license the work in any other way, but it does not
228 |     invalidate such permission if you have separately received it.
229 | 
230 |     d) If the work has interactive user interfaces, each must display
231 |     Appropriate Legal Notices; however, if the Program has interactive
232 |     interfaces that do not display Appropriate Legal Notices, your
233 |     work need not make them do so.
234 | 
235 |   A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit.  Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 | 
245 |   6. Conveying Non-Source Forms.
246 | 
247 |   You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 | 
252 |     a) Convey the object code in, or embodied in, a physical product
253 |     (including a physical distribution medium), accompanied by the
254 |     Corresponding Source fixed on a durable physical medium
255 |     customarily used for software interchange.
256 | 
257 |     b) Convey the object code in, or embodied in, a physical product
258 |     (including a physical distribution medium), accompanied by a
259 |     written offer, valid for at least three years and valid for as
260 |     long as you offer spare parts or customer support for that product
261 |     model, to give anyone who possesses the object code either (1) a
262 |     copy of the Corresponding Source for all the software in the
263 |     product that is covered by this License, on a durable physical
264 |     medium customarily used for software interchange, for a price no
265 |     more than your reasonable cost of physically performing this
266 |     conveying of source, or (2) access to copy the
267 |     Corresponding Source from a network server at no charge.
268 | 
269 |     c) Convey individual copies of the object code with a copy of the
270 |     written offer to provide the Corresponding Source.  This
271 |     alternative is allowed only occasionally and noncommercially, and
272 |     only if you received the object code with such an offer, in accord
273 |     with subsection 6b.
274 | 
275 |     d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge.  You need not require recipients to copy the
279 |     Corresponding Source along with the object code.  If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source.  Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | 
288 |     e) Convey the object code using peer-to-peer transmission, provided
289 |     you inform other peers where the object code and Corresponding
290 |     Source of the work are being offered to the general public at no
291 |     charge under subsection 6d.
292 | 
293 |   A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 | 
297 |   A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling.  In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage.  For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product.  A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 | 
310 |   "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source.  The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 | 
318 |   If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information.  But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 |   The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed.  Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 | 
337 |   Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 | 
343 |   7. Additional Terms.
344 | 
345 |   "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law.  If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 | 
354 |   When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it.  (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.)  You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 | 
361 |   Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 | 
365 |     a) Disclaiming warranty or limiting liability differently from the
366 |     terms of sections 15 and 16 of this License; or
367 | 
368 |     b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | 
372 |     c) Prohibiting misrepresentation of the origin of that material, or
373 |     requiring that modified versions of such material be marked in
374 |     reasonable ways as different from the original version; or
375 | 
376 |     d) Limiting the use for publicity purposes of names of licensors or
377 |     authors of the material; or
378 | 
379 |     e) Declining to grant rights under trademark law for use of some
380 |     trade names, trademarks, or service marks; or
381 | 
382 |     f) Requiring indemnification of licensors and authors of that
383 |     material by anyone who conveys the material (or modified versions of
384 |     it) with contractual assumptions of liability to the recipient, for
385 |     any liability that these contractual assumptions directly impose on
386 |     those licensors and authors.
387 | 
388 |   All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10.  If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term.  If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 | 
398 |   If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 | 
403 |   Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 | 
407 |   8. Termination.
408 | 
409 |   You may not propagate or modify a covered work except as expressly
410 | provided under this License.  Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 | 
415 |   However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 | 
422 |   Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 | 
429 |   Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License.  If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 | 
435 |   9. Acceptance Not Required for Having Copies.
436 | 
437 |   You are not required to accept this License in order to receive or
438 | run a copy of the Program.  Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance.  However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work.  These actions infringe copyright if you do
443 | not accept this License.  Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 | 
446 |   10. Automatic Licensing of Downstream Recipients.
447 | 
448 |   Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License.  You are not responsible
451 | for enforcing compliance by third parties with this License.
452 | 
453 |   An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations.  If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 | 
463 |   You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License.  For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 | 
471 |   11. Patents.
472 | 
473 |   A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based.  The
475 | work thus licensed is called the contributor's "contributor version".
476 | 
477 |   A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version.  For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 | 
487 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 | 
492 |   In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement).  To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 | 
499 |   If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients.  "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 | 
513 |   If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 | 
521 |   A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License.  You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 | 
536 |   Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 | 
540 |   12. No Surrender of Others' Freedom.
541 | 
542 |   If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License.  If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all.  For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 | 
552 |   13. Use with the GNU Affero General Public License.
553 | 
554 |   Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work.  The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 | 
563 |   14. Revised Versions of this License.
564 | 
565 |   The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time.  Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 | 
570 |   Each version is given a distinguishing version number.  If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation.  If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 | 
579 |   If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 | 
584 |   Later license versions may give you additional or different
585 | permissions.  However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 | 
589 |   15. Disclaimer of Warranty.
590 | 
591 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 | 
600 |   16. Limitation of Liability.
601 | 
602 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 | 
612 |   17. Interpretation of Sections 15 and 16.
613 | 
614 |   If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 | 
621 |                      END OF TERMS AND CONDITIONS
622 | 
623 |             How to Apply These Terms to Your New Programs
624 | 
625 |   If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 | 
629 |   To do so, attach the following notices to the program.  It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 | 
634 |     <one line to give the program's name and a brief idea of what it does.>
635 |     Copyright (C) <year>  <name of author>
636 | 
637 |     This program is free software: you can redistribute it and/or modify
638 |     it under the terms of the GNU General Public License as published by
639 |     the Free Software Foundation, either version 3 of the License, or
640 |     (at your option) any later version.
641 | 
642 |     This program is distributed in the hope that it will be useful,
643 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
644 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
645 |     GNU General Public License for more details.
646 | 
647 |     You should have received a copy of the GNU General Public License
648 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
649 | 
650 | Also add information on how to contact you by electronic and paper mail.
651 | 
652 |   If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 | 
655 |     <program>  Copyright (C) <year>  <name of author>
656 |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 |     This is free software, and you are welcome to redistribute it
658 |     under certain conditions; type `show c' for details.
659 | 
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License.  Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 | 
664 |   You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | <https://www.gnu.org/licenses/>.
668 | 
669 |   The GNU General Public License does not permit incorporating your program
670 | into proprietary programs.  If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library.  If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License.  But first, please read
674 | <https://www.gnu.org/licenses/why-not-lgpl.html>.
675 | 
676 | 


--------------------------------------------------------------------------------