├── src
├── lang
│ ├── .gitkeep
│ └── ac
├── indent.rs
├── diff.rs
├── ignore.rs
├── lang.rs
├── frontmatter.rs
├── features.rs
├── linebreak.rs
├── logging.rs
├── ranges.rs
├── wrap.rs
├── detect.rs
├── call.rs
├── fs.rs
├── main.rs
├── cfg.rs
└── parse.rs
├── .gitignore
├── .mdslw.toml
├── Cargo.toml
├── .github
└── workflows
│ └── ci.yml
├── Makefile
├── README.md
└── LICENCE
/src/lang/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /dist
3 | /.coverage.*
4 | /.envrc
5 |
--------------------------------------------------------------------------------
/.mdslw.toml:
--------------------------------------------------------------------------------
1 | case = "ignore"
2 | end-markers = "?!:."
3 | features = "format-block-quotes,collate-link-defs,outsource-inline-links"
4 | ignores = ""
5 | lang = "ac"
6 | max-width = 80
7 | suppressions = ""
8 | upstream = ""
9 |
--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "mdslw"
3 | version = "0.16.1"
4 | edition = "2021"
5 |
6 | [profile.release]
7 | # Optimize release binaries.
8 | strip = true
9 | lto = true
10 |
11 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
12 |
13 | [dependencies]
14 | anyhow = { version = "1", features = ["std", "backtrace"] }
15 | clap = { version = "4", features = ["env", "derive"] }
16 | clap_complete = "4"
17 | ignore = "0.4"
18 | include_dir = "0.7"
19 | log = { version = "0.4", features = ["std"] }
20 | pulldown-cmark = { version = "0.13", default-features = false }
21 | rayon = "1"
22 | similar = "2"
23 | tempfile = "3"
24 | serde = { version = "1", features = ["derive"] }
25 | toml = { version = "0.9", default-features = false, features = ["parse", "display", "serde"] }
26 |
27 | [build-dependencies]
28 | reqwest = {version = "0.12", features = ["default", "json", "blocking"]}
29 | serde_json = { version = "1" }
30 |
--------------------------------------------------------------------------------
/src/indent.rs:
--------------------------------------------------------------------------------
1 | /* An opinionated line wrapper for markdown files.
2 | Copyright (C) 2023 Torsten Long
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 |
18 | pub fn build_indent(num: usize) -> String {
19 | (0..num).map(|_| ' ').collect::()
20 | }
21 |
22 | #[cfg(test)]
23 | mod test {
24 | use super::*;
25 |
26 | #[test]
27 | fn can_build_indents() {
28 | let three = build_indent(3);
29 | assert_eq!(three, String::from(" "));
30 |
31 | let four = build_indent(4);
32 | assert_eq!(four, String::from(" "));
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/src/lang/ac:
--------------------------------------------------------------------------------
1 | AA.
2 | AB.
3 | Abs.
4 | A.D.
5 | Adj.
6 | Adv.
7 | Alt.
8 | a.m.
9 | A.M.
10 | Approx.
11 | A.S.
12 | Aug.
13 | btw.
14 | Btw.
15 | B.V.
16 | Capt.
17 | C.F.
18 | cf.
19 | Cf.
20 | CF.
21 | C.O.D.
22 | Comm.
23 | Conn.
24 | Cont.
25 | D.A.
26 | D.C.
27 | DC.
28 | Dec.
29 | Dept.
30 | Dr.
31 | DR.
32 | e.g.
33 | E.g.
34 | E.G.
35 | Est.
36 | etc.
37 | Etc.
38 | ETC.
39 | Feb.
40 | Fn.
41 | Fri.
42 | Gb.
43 | Hon.B.A.
44 | Hz.
45 | I.D.
46 | i.e.
47 | I.e.
48 | I.E.
49 | I.T.
50 | Jan.
51 | J.B.
52 | J.D.
53 | J.K.
54 | Jun.
55 | Kb.
56 | K.R.
57 | L.A.
58 | Lev.
59 | lib.
60 | Lib.
61 | L.P.
62 | Lt.
63 | Lt.Cdr.
64 | Maj.
65 | Mar.
66 | Mart.
67 | Mb.
68 | Md.
69 | Mgr.
70 | M.I.T.
71 | M.R.
72 | Mr.
73 | MR.
74 | Mrs.
75 | Ms.
76 | M.T.
77 | Mt.
78 | Nov.
79 | nr.
80 | Nr.
81 | num.
82 | Num.
83 | N.V.
84 | N.Y.
85 | PC.
86 | Ph.D.
87 | Phys.
88 | P.M.
89 | P.O.
90 | pp.
91 | PP.
92 | Prof.
93 | P.V.
94 | Pvt.
95 | Rep.
96 | Rev.
97 | R.L.
98 | R.T.
99 | S.A.
100 | S.A.R.
101 | S.E.
102 | Sep.
103 | Sept.
104 | Sgt.
105 | S.p.A.
106 | Sq.
107 | U.S.
108 | U.S.A.
109 | U.S.C.
110 | vs.
111 | VS.
112 | Yr.
113 |
114 |
--------------------------------------------------------------------------------
/src/diff.rs:
--------------------------------------------------------------------------------
1 | /* An opinionated line wrapper for markdown files.
2 | Copyright (C) 2023 Torsten Long
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 |
18 | use std::path::Path;
19 |
20 | use similar::{udiff::unified_diff, Algorithm};
21 |
22 | const CONTEXT: usize = 4;
23 |
24 | pub enum Algo {
25 | Myers,
26 | Patience,
27 | Lcs,
28 | }
29 |
30 | impl Algo {
31 | fn to_internal(&self) -> Algorithm {
32 | match self {
33 | Self::Myers => Algorithm::Myers,
34 | Self::Patience => Algorithm::Patience,
35 | Self::Lcs => Algorithm::Lcs,
36 | }
37 | }
38 |
39 | pub fn generate(&self, new: &str, org: &str, filename: &Path) -> String {
40 | let original = format!("original:{}", filename.to_string_lossy());
41 | let processed = format!("processed:{}", filename.to_string_lossy());
42 | let names = (original.as_ref(), processed.as_ref());
43 | unified_diff(self.to_internal(), org, new, CONTEXT, Some(names))
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/src/ignore.rs:
--------------------------------------------------------------------------------
1 | /* An opinionated line wrapper for markdown files.
2 | Copyright (C) 2023 Torsten Long
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 |
18 | const IGNORE_START: &str = "mdslw-ignore-start";
19 | const IGNORE_END: &str = "mdslw-ignore-end";
20 |
21 | const PRETTIER_IGNORE_START: &str = "prettier-ignore-start";
22 | const PRETTIER_IGNORE_END: &str = "prettier-ignore-end";
23 |
24 | fn is_html_comment(s: &str) -> bool {
25 | s.starts_with("") || s.ends_with("-->\n"))
26 | }
27 |
28 | pub struct IgnoreByHtmlComment {
29 | ignore: bool,
30 | }
31 |
32 | impl IgnoreByHtmlComment {
33 | pub fn new() -> Self {
34 | Self { ignore: false }
35 | }
36 |
37 | /// Determine whether the HTML that is processed is a comment and whether it modifies the
38 | /// ignore behaviour.
39 | pub fn process_html(&mut self, s: &str) {
40 | if is_html_comment(s) {
41 | if s.contains(IGNORE_START) || s.contains(PRETTIER_IGNORE_START) {
42 | log::debug!("detected ignore start directive");
43 | self.ignore = true
44 | }
45 | if s.contains(IGNORE_END) || s.contains(PRETTIER_IGNORE_END) {
46 | log::debug!("detected ignore stop directive");
47 | self.ignore = false
48 | }
49 | }
50 | }
51 |
52 | pub fn should_be_ignored(&self) -> bool {
53 | self.ignore
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/src/lang.rs:
--------------------------------------------------------------------------------
1 | /* An opinionated line wrapper for markdown files.
2 | Copyright (C) 2023 Torsten Long
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 |
18 | use anyhow::{Error, Result};
19 | use include_dir::{include_dir, Dir};
20 |
21 | static LANG_FILES_DIR: Dir<'_> = include_dir!("$MDSLW_LANG_DIR");
22 |
23 | pub fn keep_word_list(lang_names: &str) -> Result {
24 | let mut errors = vec![];
25 |
26 | let keep_words = lang_names
27 | .split_terminator(',')
28 | .flat_map(|el| el.split_whitespace())
29 | .filter_map(|el| {
30 | if el == "none" {
31 | Some(String::new())
32 | } else if let Some(content) = LANG_FILES_DIR
33 | .get_file(el)
34 | .and_then(|el| el.contents_utf8())
35 | {
36 | log::debug!("loaded keep word list for language '{}'", el);
37 | Some(content.to_string())
38 | } else {
39 | errors.push(el);
40 | None
41 | }
42 | })
43 | .collect::();
44 |
45 | if errors.is_empty() {
46 | Ok(keep_words)
47 | } else {
48 | Err(Error::msg(format!(
49 | "unknown or unsupported languages: {}",
50 | errors.join(", ")
51 | )))
52 | }
53 | }
54 |
55 | #[cfg(test)]
56 | mod test {
57 | use super::*;
58 |
59 | #[test]
60 | fn nothing_disables_words() -> Result<()> {
61 | let list = keep_word_list("")?;
62 | assert_eq!(list, String::new());
63 | Ok(())
64 | }
65 |
66 | #[test]
67 | fn none_disables_words() -> Result<()> {
68 | let list = keep_word_list("none")?;
69 | assert_eq!(list, String::new());
70 | Ok(())
71 | }
72 |
73 | #[test]
74 | fn some_langs_are_supported() -> Result<()> {
75 | let langs = "de en es fr it";
76 | let list = keep_word_list(langs)?;
77 | assert_ne!(list, String::new());
78 | Ok(())
79 | }
80 |
81 | #[test]
82 | fn unsupported_langs() {
83 | let langs = "unsupported";
84 | let list = keep_word_list(langs);
85 | assert!(list.is_err());
86 | }
87 | }
88 |
--------------------------------------------------------------------------------
/src/frontmatter.rs:
--------------------------------------------------------------------------------
1 | /* An opinionated line wrapper for markdown files.
2 | Copyright (C) 2023 Torsten Long
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 |
18 | const FRONTMATTER_SEPARATOR: &str = "---\n";
19 |
20 | pub fn extract_frontmatter(text: &str) -> String {
21 | let mut lines = text.split_inclusive('\n');
22 | let first = lines.next();
23 | if Some(FRONTMATTER_SEPARATOR) != first {
24 | log::debug!("no frontmatter starting delimiter detected");
25 | String::new()
26 | } else {
27 | let mut matter_len = FRONTMATTER_SEPARATOR.len();
28 | let mut found_end_sep = false;
29 | lines
30 | .take_while(|line| {
31 | let do_continue = !found_end_sep;
32 | found_end_sep |= line == &FRONTMATTER_SEPARATOR;
33 | do_continue
34 | })
35 | .for_each(|line| matter_len += line.len());
36 | if !found_end_sep {
37 | // There was no frontmatter since we did not find the end separator.
38 | log::debug!("no frontmatter ending delimiter detected");
39 | String::new()
40 | } else {
41 | log::debug!("found {} bytes of frontmatter", matter_len);
42 | // There was indeed frontmatter. This slicing operation can never error out sinc we did
43 | // extract the frontmatter from the text.
44 | let matter = &text[..matter_len];
45 | matter.to_owned()
46 | }
47 | }
48 | }
49 |
50 | #[cfg(test)]
51 | mod test {
52 | use super::*;
53 |
54 | const FRONTMATTER_FOR_TEST: &str = "---\nsome text\nasdf: ---\nmultiple: lines\n---\n";
55 |
56 | #[test]
57 | fn extracting_frontmatter() {
58 | let matter = extract_frontmatter(FRONTMATTER_FOR_TEST);
59 |
60 | assert_eq!(matter, FRONTMATTER_FOR_TEST.to_string());
61 | }
62 |
63 | #[test]
64 | fn splitting_frontmatter_with_rest() {
65 | let matter = extract_frontmatter(&format!("{}some\nmore\ntext\n", FRONTMATTER_FOR_TEST));
66 |
67 | assert_eq!(matter, FRONTMATTER_FOR_TEST.to_string());
68 | }
69 |
70 | #[test]
71 | fn frontmatter_has_to_start_text() {
72 | let text = format!("something\n{}", FRONTMATTER_FOR_TEST);
73 | let matter = extract_frontmatter(&text);
74 |
75 | assert_eq!(matter, String::new());
76 | }
77 |
78 | #[test]
79 | fn frontmatter_has_to_have_ending_separator() {
80 | let text = FRONTMATTER_FOR_TEST[..FRONTMATTER_FOR_TEST.len() - 1].to_string();
81 | let matter = extract_frontmatter(&text);
82 |
83 | assert_eq!(matter, String::new());
84 | }
85 | }
86 |
--------------------------------------------------------------------------------
/src/features.rs:
--------------------------------------------------------------------------------
1 | /* An opinionated line wrapper for markdown files.
2 | Copyright (C) 2023 Torsten Long
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 |
18 | use anyhow::{Error, Result};
19 |
20 | use crate::detect::BreakCfg;
21 | use crate::parse::ParseCfg;
22 |
23 | #[derive(Debug, PartialEq)]
24 | pub struct FeatureCfg {
25 | pub keep_spaces_in_links: bool,
26 | pub format_block_quotes: bool,
27 | pub collate_link_defs: bool,
28 | pub outsource_inline_links: bool,
29 | pub break_cfg: BreakCfg,
30 | pub parse_cfg: ParseCfg,
31 | }
32 |
33 | impl Default for FeatureCfg {
34 | fn default() -> Self {
35 | FeatureCfg {
36 | keep_spaces_in_links: false,
37 | format_block_quotes: false,
38 | collate_link_defs: false,
39 | outsource_inline_links: false,
40 | parse_cfg: ParseCfg {
41 | keep_linebreaks: false,
42 | },
43 | break_cfg: BreakCfg {
44 | keep_linebreaks: false,
45 | },
46 | }
47 | }
48 | }
49 |
50 | impl std::str::FromStr for FeatureCfg {
51 | type Err = Error;
52 |
53 | fn from_str(s: &str) -> Result {
54 | let mut cfg = Self::default();
55 | let mut errors = vec![];
56 |
57 | // Parse all possible features and toggle them as desired.
58 | for feature in s
59 | .split_terminator(',')
60 | .flat_map(|el| el.split_whitespace())
61 | .map(|el| el.trim())
62 | .filter(|el| !el.is_empty())
63 | {
64 | match feature {
65 | "keep-spaces-in-links" => cfg.keep_spaces_in_links = true,
66 | "format-block-quotes" => cfg.format_block_quotes = true,
67 | "collate-link-defs" => cfg.collate_link_defs = true,
68 | "outsource-inline-links" => cfg.outsource_inline_links = true,
69 | "keep-linebreaks" => {
70 | cfg.parse_cfg.keep_linebreaks = true;
71 | cfg.break_cfg.keep_linebreaks = true;
72 | }
73 | // Do not accept any other entry.
74 | _ => errors.push(feature),
75 | }
76 | }
77 |
78 | if errors.is_empty() {
79 | log::debug!("loaded features: {:?}", cfg);
80 | Ok(cfg)
81 | } else {
82 | Err(Error::msg(format!(
83 | "unknown features: {}",
84 | errors.join(", ")
85 | )))
86 | }
87 | }
88 | }
89 |
90 | #[cfg(test)]
91 | mod test {
92 | use super::*;
93 | #[test]
94 | fn swapping_all_features_and_disregard_whitspace() -> Result<()> {
95 | let default = FeatureCfg::default();
96 | let swapped = FeatureCfg {
97 | keep_spaces_in_links: !default.keep_spaces_in_links,
98 | format_block_quotes: !default.format_block_quotes,
99 | collate_link_defs: !default.collate_link_defs,
100 | outsource_inline_links: !default.outsource_inline_links,
101 | parse_cfg: ParseCfg {
102 | keep_linebreaks: !default.parse_cfg.keep_linebreaks,
103 | },
104 | break_cfg: BreakCfg {
105 | keep_linebreaks: !default.break_cfg.keep_linebreaks,
106 | },
107 | };
108 |
109 | let parsed =
110 | "keep-spaces-in-links , keep-linebreaks ,format-block-quotes, collate-link-defs,outsource-inline-links"
111 | .parse::()?;
112 |
113 | assert_eq!(parsed, swapped);
114 | Ok(())
115 | }
116 |
117 | #[test]
118 | fn failure_to_parse() -> Result<()> {
119 | let parsed = "unknown".parse::();
120 | assert!(parsed.is_err());
121 | Ok(())
122 | }
123 | }
124 |
--------------------------------------------------------------------------------
/src/linebreak.rs:
--------------------------------------------------------------------------------
1 | /* An opinionated line wrapper for markdown files.
2 | Copyright (C) 2023 Torsten Long
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 |
18 | use std::collections::HashSet;
19 |
20 | use crate::detect::{BreakDetector, WhitespaceDetector};
21 |
22 | pub fn insert_linebreaks_after_sentence_ends(text: &str, detector: &BreakDetector) -> String {
23 | let merged = normalise_linebreaks(text, &detector.whitespace);
24 | let sentence_ends = find_sentence_ends(&merged, detector);
25 |
26 | merged
27 | .chars()
28 | .enumerate()
29 | .filter_map(|(idx, el)| {
30 | if sentence_ends.contains(&Char::Skip(idx)) {
31 | None
32 | } else if sentence_ends.contains(&Char::Split(idx)) {
33 | Some(format!("\n{}", el))
34 | } else {
35 | Some(format!("{}", el))
36 | }
37 | })
38 | .collect::()
39 | }
40 |
41 | /// Replace all linebreaks by spaces unless they have been escaped by a non-breaking space.
42 | fn normalise_linebreaks(text: &str, detector: &WhitespaceDetector) -> String {
43 | let mut last_was_nbsp = false;
44 | text.chars()
45 | .map(|el| {
46 | let replacement = if el != '\n' || last_was_nbsp { el } else { ' ' };
47 | last_was_nbsp = detector.is_nbsp(&el);
48 | replacement
49 | })
50 | .collect::()
51 | }
52 |
53 | #[derive(Eq, Hash, PartialEq, Debug)]
54 | enum Char {
55 | Skip(usize),
56 | Split(usize),
57 | }
58 |
59 | fn find_sentence_ends(text: &str, detector: &BreakDetector) -> HashSet {
60 | let as_chars = text.chars().collect::>();
61 |
62 | as_chars
63 | .iter()
64 | .enumerate()
65 | .filter_map(|(idx, ch)| {
66 | let next = as_chars.get(idx + 1);
67 |
68 | if detector.is_breaking_marker(ch, next)
69 | && !detector.ends_with_keep_word(&as_chars, &idx)
70 | {
71 | Some([Char::Skip(idx + 1), Char::Split(idx + 2)])
72 | } else {
73 | None
74 | }
75 | })
76 | .flatten()
77 | .collect::>()
78 | }
79 |
80 | #[cfg(test)]
81 | mod test {
82 | use super::*;
83 | use crate::detect::BreakCfg;
84 |
85 | const CFG_FOR_TESTS: &BreakCfg = &BreakCfg {
86 | keep_linebreaks: false,
87 | };
88 |
89 | #[test]
90 | fn finding_sentence_ends() {
91 | let text = "words that. are. followed by. periods. period.";
92 | let detector = BreakDetector::new("are. by.", "", false, ".", CFG_FOR_TESTS);
93 |
94 | let ends = find_sentence_ends(text, &detector);
95 |
96 | // We never detect a sentence at and the end of the text.
97 | let expected = vec![
98 | Char::Skip(11),
99 | Char::Split(12),
100 | Char::Skip(38),
101 | Char::Split(39),
102 | ]
103 | .into_iter()
104 | .collect::>();
105 |
106 | assert_eq!(expected, ends);
107 | }
108 |
109 | #[test]
110 | fn normalising_linebreaks() {
111 | // All whitespace, including tabs, is merged into single spaces.
112 | let text = " \n text with lots\n \nof white \n space ";
113 | let expected = " text with lots \nof white \n space ";
114 |
115 | let merged = normalise_linebreaks(text, &WhitespaceDetector::default());
116 |
117 | assert_eq!(expected, merged);
118 | }
119 |
120 | #[test]
121 | fn inserting_linebreaks_between_sentences() {
122 | let text = "words that. are. followed by. periods. period.";
123 | let detector = BreakDetector::new("are. by.", "", false, ".", CFG_FOR_TESTS);
124 |
125 | let broken = insert_linebreaks_after_sentence_ends(text, &detector);
126 |
127 | // We never detect a sentence at and the end of the text.
128 | let expected = "words that.\nare. followed by. periods.\nperiod.";
129 |
130 | assert_eq!(expected, broken);
131 | }
132 | }
133 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: "ci"
2 |
3 | on:
4 | push:
5 | pull_request:
6 | branches: main
7 |
8 | # Ensure there is only ever one workflow of this kind running at a time.
9 | concurrency:
10 | group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
11 | cancel-in-progress: true
12 |
13 | permissions:
14 | contents: write
15 |
16 | jobs:
17 | ci:
18 | runs-on: ubuntu-latest
19 | # Avoid very long running jobs.
20 | timeout-minutes: 30
21 |
22 | steps:
23 | - name: Checkout repository
24 | uses: actions/checkout@v4
25 | with:
26 | fetch-depth: 0
27 |
28 | - name: Remove possible compilation remnants
29 | run: rm -rf ./target ./dist
30 |
31 | - name: Install dependencies
32 | run: |
33 | sudo apt-get update
34 | sudo apt-get install -yqq bash curl make git jq
35 |
36 | - name: Install Rust
37 | run: |
38 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > install_rust.sh
39 | sh install_rust.sh -y
40 | echo "$HOME/.cargo/bin" >> "${GITHUB_PATH}"
41 |
42 | - name: Build binary
43 | run: make build-dev
44 | # Treat all warnings as errors.
45 | env:
46 | RUSTFLAGS: "-Dwarnings"
47 |
48 | - name: Lint
49 | run: make lint
50 |
51 | - name: Run tests
52 | run: make test
53 |
54 | - name: Check coverage
55 | run: make coverage
56 |
57 | # Skip coverage upload for now because something goes wrong. TODO: Debug.
58 | # - uses: actions/upload-artifact@v4
59 | # if: always()
60 | # with:
61 | # name: coverage
62 | # path: |
63 | # .coverage.html
64 | # .coverage.json
65 | # if-no-files-found: error
66 | # retention-days: 7
67 |
68 | macos-release:
69 | runs-on: macos-latest
70 | needs: [ci]
71 | # Avoid very long running jobs.
72 | timeout-minutes: 20
73 |
74 | steps:
75 | - name: Checkout repository
76 | uses: actions/checkout@v4
77 | with:
78 | fetch-depth: 0
79 |
80 | - name: Remove possible compilation remnants
81 | run: rm -rf ./target ./dist
82 |
83 | - name: Add targets
84 | run: |
85 | rustup update
86 | rustup target add x86_64-apple-darwin
87 | rustup target add aarch64-apple-darwin
88 |
89 | - name: Build binaries
90 | run: |
91 | mkdir ./dist
92 | cargo build --release --target=aarch64-apple-darwin
93 | cp target/aarch64-apple-darwin/release/mdslw ./dist/mdslw_aarch64-apple-darwin
94 | cargo build --release --target=x86_64-apple-darwin
95 | cp target/x86_64-apple-darwin/release/mdslw ./dist/mdslw_x86_64-apple-darwin
96 |
97 | - uses: actions/upload-artifact@v4
98 | if: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }}
99 | with:
100 | name: macos-release
101 | path: ./dist/
102 | if-no-files-found: error
103 | retention-days: 1
104 |
105 | release:
106 | runs-on: ubuntu-latest
107 | needs: [ci, macos-release]
108 | # Avoid very long running jobs.
109 | timeout-minutes: 30
110 |
111 | steps:
112 | - name: Checkout repository
113 | uses: actions/checkout@v4
114 | with:
115 | fetch-depth: 0
116 |
117 | - name: Remove possible compilation remnants
118 | run: rm -rf ./target ./dist
119 |
120 | - name: Install dependencies
121 | run: |
122 | sudo apt-get update
123 | sudo apt-get install -yqq bash curl make git jq
124 |
125 | - name: Install Rust
126 | run: |
127 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > install_rust.sh
128 | sh install_rust.sh -y
129 | echo "$HOME/.cargo/bin" >> "${GITHUB_PATH}"
130 |
131 | - name: Install cross-compilation dependencies
132 | if: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }}
133 | run: |
134 | # For ARM Linux.
135 | sudo apt-get install -yqq gcc-arm-linux-gnueabihf
136 | # For Windows.
137 | sudo apt-get install -yqq mingw-w64
138 |
139 | - name: Install toolchains
140 | if: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }}
141 | run: make install-toolchains
142 |
143 | - name: Build all release binaries apart from MacOS
144 | if: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }}
145 | run: |
146 | echo '[target.armv7-unknown-linux-gnueabihf]' >> ~/.cargo/config
147 | echo 'linker = "arm-linux-gnueabihf-gcc"' >> ~/.cargo/config
148 |
149 | make build-prod-all
150 |
151 | - name: Copy release binaries
152 | if: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }}
153 | run: make copy-relese-binaries
154 |
155 | - name: Retrieve MacOS binaries
156 | uses: actions/download-artifact@v4
157 | if: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }}
158 | with:
159 | name: macos-release
160 | path: ./dist/
161 |
162 | - name: List release binaries
163 | if: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }}
164 | run: ls -l ./dist/*
165 |
166 | - name: Make release
167 | if: ${{ startsWith(github.ref, 'refs/tags/') }}
168 | uses: softprops/action-gh-release@v1
169 | with:
170 | files: |
171 | dist/mdslw_x86_64-unknown-linux-musl
172 | dist/mdslw_armv7-unknown-linux-gnueabihf
173 | dist/mdslw_x86_64-apple-darwin
174 | dist/mdslw_aarch64-apple-darwin
175 | dist/mdslw_x86_64-pc-windows-gnu.exe
176 |
--------------------------------------------------------------------------------
/src/logging.rs:
--------------------------------------------------------------------------------
1 | /* An opinionated line wrapper for markdown files.
2 | Copyright (C) 2023 Torsten Long
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 |
18 | use std::time;
19 |
20 | use log::{Level, Log, Metadata, Record};
21 |
22 | /// Execute a trace log while lazily evaluating the expressions whose values shall be logged. This
23 | /// macro takes a string literal, followed by expressions that will be evaluated lazily.
24 | #[macro_export]
25 | macro_rules! trace_log {
26 | ($fmt_str:literal, $($exprs:expr),*) => {
27 | if log::log_enabled!(log::Level::Trace) {
28 | log::trace!($fmt_str, $($exprs),*);
29 | }
30 | };
31 | }
32 |
33 | pub fn init_logging(level: u8) -> Result<(), log::SetLoggerError> {
34 | log::set_boxed_logger(Box::new(Logger::new(level)))
35 | .map(|()| log::set_max_level(log::LevelFilter::Trace))
36 | }
37 |
38 | const SELF_MODULE_NAME: &str = env!("CARGO_PKG_NAME");
39 |
40 | struct Logger {
41 | starttime: time::Instant,
42 | level: Level,
43 | module_name: String,
44 | module_prefix: String,
45 | }
46 |
47 | impl Logger {
48 | pub fn new(log_level: u8) -> Self {
49 | let level = match log_level {
50 | 0 => Level::Warn,
51 | 1 => Level::Info,
52 | 2 => Level::Debug,
53 | _ => Level::Trace,
54 | };
55 | Self {
56 | level,
57 | starttime: time::Instant::now(),
58 | module_name: SELF_MODULE_NAME.to_string(),
59 | module_prefix: format!("{}::", SELF_MODULE_NAME),
60 | }
61 | }
62 | }
63 |
64 | impl Log for Logger {
65 | fn enabled(&self, metadata: &Metadata) -> bool {
66 | metadata.level() <= self.level
67 | }
68 |
69 | fn log(&self, record: &Record) {
70 | if let Some(msg) = self.format_message(record) {
71 | eprintln!("{}", msg);
72 | }
73 | }
74 |
75 | fn flush(&self) {}
76 | }
77 |
78 | impl Logger {
79 | fn format_log_location(&self, record: &Record) -> String {
80 | let module = record.module_path_static().unwrap_or("");
81 |
82 | if module == self.module_name || module.starts_with(&self.module_prefix) {
83 | let file = record.file_static().unwrap_or("");
84 | let line = record.line().unwrap_or(0);
85 | format!("{}:{}:{}", module, file, line)
86 | } else {
87 | module.to_owned()
88 | }
89 | }
90 |
91 | fn format_message(&self, record: &Record) -> Option {
92 | if self.enabled(record.metadata()) {
93 | let elapsed = self.starttime.elapsed();
94 | let elapsed_secs = elapsed.as_secs();
95 | let elapsed_millis = elapsed.subsec_millis();
96 | let thread_idx = rayon::current_thread_index()
97 | .map(|el| format!("@{}", el))
98 | .unwrap_or_default();
99 |
100 | Some(format!(
101 | "{}{}: {}s{}ms {}: {}",
102 | record.level(),
103 | thread_idx,
104 | elapsed_secs,
105 | elapsed_millis,
106 | self.format_log_location(record),
107 | record.args()
108 | ))
109 | } else {
110 | None
111 | }
112 | }
113 | }
114 |
115 | #[cfg(test)]
116 | mod test {
117 | use super::*;
118 | use anyhow::{Error, Result};
119 |
120 | #[test]
121 | fn new_logger() {
122 | let logger0 = Logger::new(0);
123 | assert_eq!(logger0.level, Level::Warn);
124 |
125 | let logger1 = Logger::new(1);
126 | assert_eq!(logger1.level, Level::Info);
127 |
128 | let logger2 = Logger::new(2);
129 | assert_eq!(logger2.level, Level::Debug);
130 |
131 | let logger3 = Logger::new(3);
132 | assert_eq!(logger3.level, Level::Trace);
133 | }
134 |
135 | #[test]
136 | fn logger_enabled() {
137 | let logger = Logger::new(0);
138 | assert_eq!(logger.level, Level::Warn);
139 |
140 | let metadata_err = Metadata::builder().level(Level::Error).build();
141 | let metadata_debug = Metadata::builder().level(Level::Debug).build();
142 |
143 | assert!(logger.enabled(&metadata_err));
144 | assert!(!logger.enabled(&metadata_debug));
145 | }
146 |
147 | #[test]
148 | fn logging_a_message_from_own_module() -> Result<()> {
149 | let args = format_args!("some thing");
150 | let metadata = Metadata::builder().level(Level::Error).build();
151 | let record = Record::builder()
152 | .metadata(metadata)
153 | .module_path_static(Some("mdslw::test"))
154 | .file_static(Some("test_file"))
155 | .args(args)
156 | .build();
157 |
158 | let logger = Logger::new(0);
159 | let msg = logger
160 | .format_message(&record)
161 | .ok_or(Error::msg("cannot build message"))?;
162 |
163 | // Check beginning and end because the test might take longer than 1ms, which would fail
164 | // it.
165 | assert!(msg.starts_with("ERROR: 0s"), "incorrect start: {}", msg);
166 | assert!(
167 | msg.ends_with("ms mdslw::test:test_file:0: some thing"),
168 | "incorrect end: {}",
169 | msg
170 | );
171 |
172 | Ok(())
173 | }
174 |
175 | #[test]
176 | fn logging_a_message_from_another_module() -> Result<()> {
177 | let args = format_args!("some thing");
178 | let metadata = Metadata::builder().level(Level::Error).build();
179 | let record = Record::builder()
180 | .metadata(metadata)
181 | .module_path_static(Some("some::other::module"))
182 | .file_static(Some("test_file"))
183 | .args(args)
184 | .build();
185 |
186 | let logger = Logger::new(0);
187 | let msg = logger
188 | .format_message(&record)
189 | .ok_or(Error::msg("cannot build message"))?;
190 |
191 | // Check beginning and end because the test might take longer than 1ms, which would fail
192 | // it.
193 | assert!(msg.starts_with("ERROR: 0s"), "incorrect start: {}", msg);
194 | assert!(
195 | msg.ends_with("ms some::other::module: some thing"),
196 | "incorrect end: {}",
197 | msg
198 | );
199 |
200 | Ok(())
201 | }
202 | }
203 |
--------------------------------------------------------------------------------
/src/ranges.rs:
--------------------------------------------------------------------------------
1 | /* An opinionated line wrapper for markdown files.
2 | Copyright (C) 2023 Torsten Long
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 |
18 | use crate::parse::CharRange;
19 | use crate::trace_log;
20 |
21 | #[derive(Debug, PartialEq)]
22 | pub enum WrapType {
23 | Indent(usize),
24 | Verbatim,
25 | }
26 |
27 | #[derive(Debug, PartialEq)]
28 | /// TextRange describes a range of characters in a document including whether they shall be
29 | /// repeated verbatim or not. It also contains the number of spaces of indent to use when wrapping
30 | /// the contained text.
31 | pub struct TextRange {
32 | pub wrap: WrapType,
33 | pub range: CharRange,
34 | }
35 |
36 | /// The first arguments contains those ranges in the document that shall be wrapped. Every
37 | /// character in the document that is not inside such a range will be taken verbatim. This also
38 | /// determines the starting indent in spaces for every range that shall be wrapped.
39 | pub fn fill_markdown_ranges(wrap_ranges: Vec, text: &str) -> Vec {
40 | let mut last_end = 0;
41 |
42 | let lines = line_ranges(text);
43 |
44 | wrap_ranges
45 | .into_iter()
46 | // Append an element that points at the end of the document to ensure that we always add
47 | // the last ranges in the document because we always add a verbatim range before the
48 | // non-verbatim range.
49 | .chain([CharRange {
50 | start: text.len(),
51 | end: text.len(),
52 | }])
53 | .flat_map(|el| {
54 | let verbatim = TextRange {
55 | wrap: WrapType::Verbatim,
56 | range: CharRange {
57 | start: last_end,
58 | end: el.start,
59 | },
60 | };
61 | last_end = el.end;
62 |
63 | let wrap_line_start = find_line_start(el.start, &lines).unwrap_or(el.start);
64 | let wrap = TextRange {
65 | wrap: WrapType::Indent(el.start - wrap_line_start),
66 | range: el,
67 | };
68 | [verbatim, wrap]
69 | })
70 | .filter(|el| !el.range.is_empty())
71 | .map(|el| {
72 | if let WrapType::Indent(indent) = el.wrap {
73 | trace_log!(
74 | "formattable text with {} spaces indent: {}",
75 | indent,
76 | text[el.range.clone()].replace('\n', "\\n")
77 | );
78 | } else {
79 | trace_log!(
80 | "verbatim text: {}",
81 | text[el.range.clone()].replace('\n', "\\n")
82 | );
83 | }
84 | el
85 | })
86 | .collect::>()
87 | }
88 |
89 | /// Determine character ranges for each line in the document.
90 | fn line_ranges(text: &str) -> Vec {
91 | let mut start = 0;
92 |
93 | text.split_inclusive('\n')
94 | .map(|el| {
95 | let end = start + el.len();
96 | let range = CharRange { start, end };
97 | start = end;
98 | range
99 | })
100 | .collect::>()
101 | }
102 |
103 | /// Find the start of the line that "point" is in.
104 | fn find_line_start(point: usize, line_ranges: &[CharRange]) -> Option {
105 | line_ranges
106 | .iter()
107 | .find(|el| el.contains(&point))
108 | .map(|el| el.start)
109 | }
110 |
111 | #[cfg(test)]
112 | mod test {
113 | use super::*;
114 |
115 | #[test]
116 | fn finding_line_start() {
117 | let ranges = vec![
118 | CharRange { start: 0, end: 10 },
119 | CharRange { start: 10, end: 12 },
120 | CharRange { start: 22, end: 31 },
121 | CharRange { start: 31, end: 33 },
122 | ];
123 |
124 | for (point, expected) in [
125 | (5, Some(0)),
126 | (10, Some(10)),
127 | (15, None),
128 | (22, Some(22)),
129 | (28, Some(22)),
130 | (30, Some(22)),
131 | (31, Some(31)),
132 | (35, None),
133 | ] {
134 | let start = find_line_start(point, &ranges);
135 | assert_eq!(expected, start);
136 | }
137 | }
138 |
139 | #[test]
140 | fn getting_line_ranges() {
141 | let text = r#"
142 | text
143 | more text
144 |
145 | even more text
146 | "#;
147 | let ranges = line_ranges(text);
148 | let expected = vec![
149 | CharRange { start: 0, end: 1 },
150 | CharRange { start: 1, end: 6 },
151 | CharRange { start: 6, end: 16 },
152 | CharRange { start: 16, end: 17 },
153 | CharRange { start: 17, end: 32 },
154 | ];
155 | assert_eq!(expected, ranges);
156 | }
157 |
158 | #[test]
159 | fn filling_ranges() {
160 | let text = r#"
161 | text
162 | more text
163 |
164 | even more text
165 | "#;
166 | let wrap_ranges = vec![
167 | CharRange { start: 1, end: 6 },
168 | CharRange { start: 22, end: 26 },
169 | CharRange { start: 31, end: 32 },
170 | ];
171 | let filled = fill_markdown_ranges(wrap_ranges, text);
172 |
173 | let expected = vec![
174 | TextRange {
175 | wrap: WrapType::Verbatim,
176 | range: CharRange { start: 0, end: 1 },
177 | },
178 | TextRange {
179 | wrap: WrapType::Indent(0),
180 | range: CharRange { start: 1, end: 6 },
181 | },
182 | TextRange {
183 | wrap: WrapType::Verbatim,
184 | range: CharRange { start: 6, end: 22 },
185 | },
186 | TextRange {
187 | wrap: WrapType::Indent(5),
188 | range: CharRange { start: 22, end: 26 },
189 | },
190 | TextRange {
191 | wrap: WrapType::Verbatim,
192 | range: CharRange { start: 26, end: 31 },
193 | },
194 | TextRange {
195 | wrap: WrapType::Indent(14),
196 | range: CharRange { start: 31, end: 32 },
197 | },
198 | ];
199 |
200 | assert_eq!(expected.len(), filled.len());
201 | for (v1, v2) in expected.into_iter().zip(filled) {
202 | assert_eq!(v1, v2);
203 | }
204 | }
205 | }
206 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | SHELL := /bin/bash -euo pipefail
2 |
3 | SRC := $(shell find src -name "*.rs")
4 | TARGET_DEV := target/debug/mdslw
5 | TARGET_PROD := target/x86_64-unknown-linux-musl/release/mdslw
6 |
7 | default: build-dev
8 |
9 | build-dev: $(TARGET_DEV)
10 |
11 | $(TARGET_DEV): Cargo.lock Cargo.toml $(SRC)
12 | cargo build
13 |
14 | .PHONY: install-toolchains
15 | install-toolchains:
16 | rustup target add x86_64-unknown-linux-musl
17 | rustup target add armv7-unknown-linux-gnueabihf
18 | rustup target add x86_64-pc-windows-gnu
19 |
20 | build-prod: $(TARGET_PROD)
21 |
22 | # Build prod for the dev system.
23 | $(TARGET_PROD): Cargo.lock Cargo.toml $(SRC)
24 | RUSTFLAGS='-Dwarnings -C link-arg=-s -C relocation-model=static' \
25 | cargo build -j "$$(nproc --all)" --release --target=x86_64-unknown-linux-musl
26 |
27 | .PHONY: build-prod-all
28 | build-prod-all:
29 | echo ==== x86_64-unknown-linux-musl ====
30 | $(MAKE) --always-make build-prod
31 | echo ==== armv7-unknown-linux-gnueabihf ====
32 | RUSTFLAGS='-Dwarnings -C link-arg=-s' \
33 | cargo build -j "$$(nproc --all)" --release --target=armv7-unknown-linux-gnueabihf
34 | echo ==== x86_64-pc-windows-gnu ====
35 | RUSTFLAGS='-Dwarnings -C link-arg=-s' \
36 | cargo build -j "$$(nproc --all)" --release --target x86_64-pc-windows-gnu
37 |
38 | .PHONY: copy-relese-binaries
39 | copy-relese-binaries:
40 | rm -rf ./dist
41 | mkdir -p ./dist
42 | cp target/x86_64-unknown-linux-musl/release/mdslw ./dist/mdslw_x86_64-unknown-linux-musl
43 | cp target/armv7-unknown-linux-gnueabihf/release/mdslw ./dist/mdslw_armv7-unknown-linux-gnueabihf
44 | cp target/x86_64-pc-windows-gnu/release/mdslw.exe ./dist/mdslw_x86_64-pc-windows-gnu.exe
45 |
46 | .PHONY: test
47 | test:
48 | RUSTFLAGS="-Dwarnings" cargo test
49 | $(MAKE) test-features test-langs test-default-config assert-version-tag test-envs-match-flags
50 |
51 | FEATURES := $(shell grep "/// {n} \* [a-z-]* => " src/cfg.rs | awk '{print $$4}' | tr '\n' ',' | sed 's/,$$//')
52 |
53 | .PHONY: test-features
54 | test-features:
55 | [[ -n "$(FEATURES)" ]]
56 | RUSTFLAGS="-Dwarnings" cargo run -- --features="$(FEATURES)" <<< "markdown"
57 |
58 | .PHONY: assert-version-tag
59 | assert-version-tag:
60 | # Extract tag and compare it to the version known by mdslw. When not run on a
61 | # tag, this target checks that the version known by the tool is not identical
62 | # to any existing tag. When run on a tag, it checks that the version known is
63 | # identical to the current tag.
64 | echo >&2 "Tags: $$(git tag --list | tr '\n' ' ')"
65 | version=$$(RUSTFLAGS="-Dwarnings" cargo run -- --version | awk '{print $$2'}) && \
66 | echo >&2 "Version: $${version}" && \
67 | tag=$$(git describe --exact-match --tags | sed 's/^v//' || :) && \
68 | if [[ -n "$${tag}" ]]; then \
69 | if [[ "$${tag}" != "$${version}" ]]; then \
70 | echo >&2 "Version tag $${tag} does not match tool version $${version}."; \
71 | exit 1; \
72 | fi; \
73 | else \
74 | tags=$$(git tag --list) && match= && \
75 | for t in $${tags}; do \
76 | if [[ "$${version}" == "$$t" ]]; then match="$$t"; fi; \
77 | done && \
78 | if [[ -n "$${match-}" ]]; then \
79 | echo >&2 "Found an existing matching git version tag: $$match"; \
80 | exit 1; \
81 | fi; \
82 | fi
83 |
84 | .PHONY: test-envs-match-flags
85 | test-envs-match-flags:
86 | flags=($$(cargo run -- --help | grep -E "^ +-" | grep -E -o -- "--[0-9a-zA-Z-]+" | grep -vE -- '--(help|verbose|version)' | sort -fu)) && \
87 | envs=($$(cargo run -- --help | grep -o '\[env: [^=]*=' | sed 's/^\[env: //;s/=$$//' | sort -fu)) && \
88 | echo FLAGS: "$${flags[@]}" && echo ENVS: "$${envs[@]}" && \
89 | [[ "$${#flags[@]}" == "$${#envs[@]}" ]] && \
90 | for idx in "$${!flags[@]}"; do \
91 | flag="$${flags[$${idx}]}" && env="$${envs[$${idx}]}" && \
92 | if [[ -n "$$(tr -d '[:upper:]_' <<< $$env)" || -n "$$(tr -d '[:lower:]-' <<< $$flag)" ]]; then \
93 | echo >&2 "Malformed env or flag: $${env} || $${flag}"; exit 1; \
94 | fi; \
95 | if [[ "mdslw_$$(sed 's/^__//' <<< $${flag//-/_})" != "$${env,,}" ]]; then \
96 | echo >&2 "Env/flag mismatch: $${env} != $${flag}"; exit 1; \
97 | fi; \
98 | done
99 |
100 | .PHONY: lint
101 | lint:
102 | rustup component add clippy
103 | RUSTFLAGS="-Dwarnings" cargo check --all-features --all-targets
104 | RUSTFLAGS="-Dwarnings" cargo clippy --all-features --all-targets --no-deps
105 |
106 | # Extract languages requested by the code to keep them in sync.
107 | LANGS := $(shell grep -o '/// Supported languages are:\( *[a-z][a-z]\)* *' ./src/cfg.rs | awk -F: '{print $$2}' | tr -s '[:space:]')
108 |
109 | .PHONY: test-langs
110 | test-langs:
111 | [[ -n "$(LANGS)" ]]
112 | RUSTFLAGS="-Dwarnings" cargo run -- --lang="$(LANGS) ac" <<< "markdown"
113 |
114 | .PHONY: test-default-config
115 | test-default-config:
116 | from_readme=$$( \
117 | state=0; while read -r line; do \
118 | if [[ "$${line}" == "" ]]; then state=0; fi; \
119 | if [[ "$${state}" -eq 1 ]]; then echo "$${line}"; fi; \
120 | if [[ "$${line}" == "" ]]; then state=1; fi; \
121 | done < README.md | sed '/^$$/d' | grep -v '^```'\
122 | ) && \
123 | from_tool=$$(RUSTFLAGS="-Dwarnings" cargo run -- --default-config) && \
124 | [[ "$${from_tool}" == "$${from_readme}" ]]
125 |
126 | COVERAGE := .coverage.html
127 | PROFRAW := .coverage.profraw
128 | PROFDATA := .coverage.profdata
129 | COVERAGE_JSON := .coverage.json
130 | RUSTC_ROOT := $(shell rustc --print sysroot)
131 | LLVM_PROFILE_FILE := $(PROFRAW)
132 | export LLVM_PROFILE_FILE
133 | MIN_COV_PERCENT := 80
134 |
135 | .PHONY: coverage
136 | coverage:
137 | rm -f "$(COVERAGE)" "$(PROFRAW)" "$(PROFDATA)"
138 | # Install dependencies
139 | rustup component add llvm-tools
140 | cargo install rustfilt
141 | # Build stand-alone test executable.
142 | RUSTFLAGS="-C instrument-coverage=all" \
143 | cargo build --tests
144 | # Find and run executable to generate coverage report.
145 | exe=$$( \
146 | find target/debug/deps/ -executable -name "mdslw-*" \
147 | | xargs ls -t | head -n1 \
148 | ) && \
149 | prof_exe=$$(find $(RUSTC_ROOT) -executable -name "llvm-profdata" | head -n1) && \
150 | cov_exe=$$(find $(RUSTC_ROOT) -executable -name "llvm-cov" | head -n1) && \
151 | "$${exe}" && \
152 | "$${prof_exe}" merge \
153 | -sparse "$(PROFRAW)" -o "$(PROFDATA)" && \
154 | "$${cov_exe}" show \
155 | -Xdemangler=rustfilt "$${exe}" \
156 | --format=html \
157 | --instr-profile="$(PROFDATA)" \
158 | --show-line-counts-or-regions \
159 | --show-instantiations \
160 | --show-branches=count \
161 | --sources "$$(readlink -e src)" \
162 | > "$(COVERAGE)" && \
163 | if [[ -t 1 ]]; then xdg-open "$(COVERAGE)"; fi && \
164 | "$${cov_exe}" export \
165 | -Xdemangler=rustfilt "$${exe}" \
166 | --format=text \
167 | --instr-profile="$(PROFDATA)" \
168 | --sources "$$(readlink -e src)" \
169 | > "$(COVERAGE_JSON)"
170 | echo "Per-file coverage:" && \
171 | jq -r ".data[].files[] | [.summary.lines.percent, .filename] | @csv" \
172 | < "$(COVERAGE_JSON)" \
173 | | sort -t, -k 2 \
174 | | sed "s;$${PWD};.;" \
175 | | awk -F, '{printf("%.2f%% => %s\n", $$1, $$2)}'
176 | jq -r ".data[].totals.lines.percent" \
177 | < "$(COVERAGE_JSON)" \
178 | | awk '{if ($$1<$(MIN_COV_PERCENT)) \
179 | {printf("coverage low: %.2f%%<$(MIN_COV_PERCENT)%%\n", $$1); exit(1)} \
180 | else{printf("coverage OK: %.2f%%\n", $$1)} \
181 | }' >&2
182 |
--------------------------------------------------------------------------------
/src/wrap.rs:
--------------------------------------------------------------------------------
1 | /* An opinionated line wrapper for markdown files.
2 | Copyright (C) 2023 Torsten Long
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 |
18 | use crate::detect::{BreakDetector, WhitespaceDetector};
19 | use crate::indent::build_indent;
20 | use crate::linebreak::insert_linebreaks_after_sentence_ends;
21 | use crate::ranges::{TextRange, WrapType};
22 | use crate::trace_log;
23 |
24 | pub fn add_linebreaks_and_wrap(
25 | ranges: Vec,
26 | max_width: &Option,
27 | detector: &BreakDetector,
28 | text: &str,
29 | ) -> String {
30 | let mut result = String::new();
31 |
32 | for range in ranges {
33 | if let WrapType::Indent(indent_spaces) = range.wrap {
34 | trace_log!(
35 | "wrapping text: {}",
36 | text[range.range.clone()].replace('\n', "\\n")
37 | );
38 | let indent = build_indent(indent_spaces);
39 | trace_log!("keeping indent in mind: '{}'", indent);
40 | let broken = insert_linebreaks_after_sentence_ends(&text[range.range], detector);
41 | trace_log!(
42 | "with linebreaks after sentences: {}",
43 | broken.replace('\n', "\\n")
44 | );
45 | let wrapped = broken
46 | .split('\n')
47 | .enumerate()
48 | .flat_map(|(idx, el)| {
49 | wrap_long_line_and_collapse_inline_whitespace(
50 | el,
51 | idx,
52 | max_width,
53 | &indent,
54 | &detector.whitespace,
55 | )
56 | })
57 | .collect::>()
58 | .join("\n");
59 | trace_log!(
60 | "after wrapping long sentences: {}",
61 | wrapped.replace('\n', "\\n")
62 | );
63 | result.push_str(&wrapped);
64 | } else {
65 | trace_log!(
66 | "keeping text: {}",
67 | text[range.range.clone()].to_string().replace('\n', "\\n")
68 | );
69 | result.push_str(&text[range.range]);
70 | }
71 | }
72 |
73 | result.trim_end().to_string()
74 | }
75 |
76 | /// The main purpose of this function is to wrap a long line, making sure to add the linebreak
77 | /// between words. It does so by splitting by whitespace and then joining again by spaces. One side
78 | /// effect that we accept here is that all consecutive inline whitespace will be replaced by a
79 | /// single space due to the splitting-and-joining process.
80 | fn wrap_long_line_and_collapse_inline_whitespace(
81 | sentence: &str,
82 | sentence_idx: usize,
83 | max_width: &Option,
84 | indent: &str,
85 | detector: &WhitespaceDetector,
86 | ) -> Vec {
87 | let mut lines = vec![];
88 | let mut words = detector
89 | .split_whitespace(sentence)
90 | .filter(|el| !el.is_empty());
91 | let (mut line, first_indent_len) = if let Some(first_word) = words.next() {
92 | // The first sentence is already properly indented. Every other sentence has to be
93 | // indented manually.
94 | if sentence_idx == 0 {
95 | (String::from(first_word), indent.chars().count())
96 | } else {
97 | (format!("{}{}", indent, first_word), 0)
98 | }
99 | } else {
100 | (String::new(), 0)
101 | };
102 | let mut line_len = line.chars().count() + first_indent_len;
103 | let width = max_width.unwrap_or(0);
104 | for word in words {
105 | let chars = word.chars().count();
106 | if width == 0 || line_len + 1 + chars <= width {
107 | line.push(' ');
108 | line.push_str(word);
109 | line_len += chars + 1;
110 | } else {
111 | lines.push(line);
112 | line = String::from(indent);
113 | line.push_str(word);
114 | line_len = line.chars().count();
115 | }
116 | }
117 | lines.push(line);
118 | lines
119 | }
120 |
121 | #[cfg(test)]
122 | mod test {
123 | use super::*;
124 | use crate::detect::BreakCfg;
125 | use crate::parse::CharRange;
126 |
127 | const CFG_FOR_TESTS: &BreakCfg = &BreakCfg {
128 | keep_linebreaks: false,
129 | };
130 |
131 | #[test]
132 | fn wrapping_long_sentence() {
133 | let sentence = "this sentence is not that long but will be wrapped";
134 | let sentence_idx = 0;
135 | let max_width = 11;
136 | let indent = " ";
137 | let wrapped = wrap_long_line_and_collapse_inline_whitespace(
138 | sentence,
139 | sentence_idx,
140 | &Some(max_width),
141 | indent,
142 | &WhitespaceDetector::default(),
143 | );
144 |
145 | // No indent for the start of the sentence due to the sentence_idx.
146 | let expected = vec![
147 | "this",
148 | " sentence",
149 | " is not",
150 | " that long",
151 | " but will",
152 | " be",
153 | " wrapped",
154 | ];
155 |
156 | assert_eq!(expected, wrapped);
157 | }
158 |
159 | #[test]
160 | fn wrapping_long_sentence_that_is_not_the_first() {
161 | let sentence = "some sentence with words";
162 | let sentence_idx = 1;
163 | let max_width = 5;
164 | // Indent will be copied, does not have to be whitespace.
165 | let indent = "|";
166 | let wrapped = wrap_long_line_and_collapse_inline_whitespace(
167 | sentence,
168 | sentence_idx,
169 | &Some(max_width),
170 | indent,
171 | &WhitespaceDetector::default(),
172 | );
173 |
174 | // Note the indent for the start of the sentence due to the sentence_idx.
175 | let expected = vec!["|some", "|sentence", "|with", "|words"];
176 |
177 | assert_eq!(expected, wrapped);
178 | }
179 |
180 | #[test]
181 | fn not_wrapping_long_sentence_unless_requested() {
182 | let sentence = "this sentence is somewhat long but will not be wrapped";
183 | let sentence_idx = 0;
184 | let indent = " ";
185 | let wrapped = wrap_long_line_and_collapse_inline_whitespace(
186 | sentence,
187 | sentence_idx,
188 | &None,
189 | indent,
190 | &WhitespaceDetector::default(),
191 | );
192 |
193 | let expected = vec![sentence];
194 |
195 | assert_eq!(expected, wrapped);
196 | }
197 |
198 | #[test]
199 | fn adding_linebreaks_after_sentences() {
200 | let ranges = vec![
201 | TextRange {
202 | wrap: WrapType::Indent(0),
203 | range: CharRange { start: 0, end: 33 },
204 | },
205 | // The pipe should remain verbatim.
206 | TextRange {
207 | wrap: WrapType::Verbatim,
208 | range: CharRange { start: 33, end: 36 },
209 | },
210 | TextRange {
211 | wrap: WrapType::Indent(3),
212 | range: CharRange { start: 36, end: 74 },
213 | },
214 | ];
215 | let text = String::from(
216 | "Some text. It contains sentences. | It's separated in two. Parts, that is.",
217 | );
218 | let detector = BreakDetector::new("", "", false, ".", CFG_FOR_TESTS);
219 |
220 | let wrapped = add_linebreaks_and_wrap(ranges, &None, &detector, &text);
221 |
222 | // Whitespace at the start of a range is also merged into one space. Not sure if that makes
223 | // sense but it does not appear to be relevant in practice, probably due to the way we
224 | // parse the markdown files. That is, none of the ranges we get appear to start with
225 | // whitespace at all.
226 | let expected = String::from(
227 | "Some text.\nIt contains sentences. | It's separated in two.\n Parts, that is.",
228 | );
229 | assert_eq!(expected, wrapped);
230 | }
231 |
232 | #[test]
233 | fn adding_linebreaks_after_sentences_with_keep_words() {
234 | let ranges = vec![TextRange {
235 | wrap: WrapType::Indent(0),
236 | range: CharRange { start: 0, end: 33 },
237 | }];
238 | let text = String::from("Some text. It contains sentences.");
239 | let detector = BreakDetector::new("TEXT.", "", false, ".", CFG_FOR_TESTS);
240 |
241 | let wrapped = add_linebreaks_and_wrap(ranges, &None, &detector, &text);
242 |
243 | let expected = String::from("Some text. It contains sentences.");
244 | assert_eq!(expected, wrapped);
245 | }
246 | }
247 |
--------------------------------------------------------------------------------
/src/detect.rs:
--------------------------------------------------------------------------------
1 | /* An opinionated line wrapper for markdown files.
2 | Copyright (C) 2023 Torsten Long
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 |
18 | use std::collections::HashSet;
19 |
20 | pub struct BreakDetector {
21 | // Information related to whitespace.
22 | pub whitespace: WhitespaceDetector,
23 |
24 | // Information related to keep words.
25 | keep_words: HashSet<(String, usize)>,
26 | keep_words_preserve_case: bool,
27 |
28 | // Information related to end markers.
29 | end_markers: String,
30 | }
31 |
32 | #[derive(Default)]
33 | pub struct WhitespaceDetector {
34 | whitespace_to_detect: String,
35 | }
36 |
37 | impl<'a> WhitespaceDetector {
38 | const NBSP: &'static str = "\u{00a0}\u{2007}\u{202f}\u{2060}\u{feff}";
39 |
40 | pub fn new(keep_linebreaks: bool) -> Self {
41 | let mut whitespace_to_detect = String::from(Self::NBSP);
42 | if keep_linebreaks {
43 | log::debug!("not treating linebreaks as modifiable whitespace");
44 | whitespace_to_detect.push('\n')
45 | } else {
46 | log::debug!("treating linebreaks as modifiable whitespace");
47 | }
48 | Self {
49 | whitespace_to_detect,
50 | }
51 | }
52 |
53 | pub fn split_whitespace(&self, s: &'a str) -> std::vec::IntoIter<&'a str> {
54 | s.split(|el| self.is_whitespace(&el))
55 | .filter(|el| !el.is_empty())
56 | .collect::>()
57 | .into_iter()
58 | }
59 |
60 | pub fn is_whitespace(&self, ch: &char) -> bool {
61 | // The character is whiespace if it is detected to be UTF8 whitespace and if it is not in
62 | // the list of excluded whitespace characters known by this struct.
63 | ch.is_whitespace() && !self.whitespace_to_detect.contains(*ch)
64 | }
65 |
66 | pub fn is_nbsp(&self, ch: &char) -> bool {
67 | Self::NBSP.contains(*ch)
68 | }
69 | }
70 |
71 | #[derive(Debug, PartialEq)]
72 | pub struct BreakCfg {
73 | pub keep_linebreaks: bool,
74 | }
75 |
76 | impl BreakDetector {
77 | pub fn new(
78 | keep_words: &str,
79 | keep_word_ignores: &str,
80 | keep_words_preserve_case: bool,
81 | end_markers: &str,
82 | break_cfg: &BreakCfg,
83 | ) -> Self {
84 | let (cased_words, cased_ignores) = if keep_words_preserve_case {
85 | (keep_words.to_owned(), keep_word_ignores.to_owned())
86 | } else {
87 | (keep_words.to_lowercase(), keep_word_ignores.to_lowercase())
88 | };
89 |
90 | let ignores = cased_ignores.split_whitespace().collect::>();
91 | let internal_keep_words = cased_words
92 | .split_whitespace()
93 | .filter(|el| !ignores.contains(el))
94 | .map(|el| (el.to_string(), el.len() - 1))
95 | .collect::>();
96 |
97 | log::debug!("end markers: '{}'", end_markers);
98 | log::debug!("using {} unique keep words", internal_keep_words.len());
99 | let case_info = if keep_words_preserve_case { "" } else { "in" };
100 | log::debug!("treating keep words case-{}sensitively", case_info);
101 |
102 | Self {
103 | // Keep words.
104 | keep_words_preserve_case,
105 | keep_words: internal_keep_words,
106 | // End markers.
107 | end_markers: end_markers.to_string(),
108 | // Whitspace.
109 | whitespace: WhitespaceDetector::new(break_cfg.keep_linebreaks),
110 | }
111 | }
112 |
113 | /// Checks whether "text" ends with one of the keep words known by self at "idx".
114 | pub fn ends_with_keep_word(&self, text: &[char], idx: &usize) -> bool {
115 | if idx < &text.len() {
116 | self.keep_words
117 | .iter()
118 | // Only check words that can actually be in the text.
119 | .filter(|(_el, disp)| idx >= disp)
120 | // Determine whether any keep word matches.
121 | .any(|(el, disp)| {
122 | // Check whether the word is at the start of the text or whether, if it starts
123 | // with an alphanumeric character, it is preceded by a character that is not
124 | // alphanumeric. That way, we avoid matching a keep word of "g." on a text going
125 | // "e.g.". Note that, here, idx>=disp holds. If a "word" does not start with an
126 | // alphanumeric character, then the definition of "word" is ambibuous anyway. In
127 | // such a case, we also match partially.
128 | (idx == disp || !text[idx-disp-1..=idx-disp].iter().all(|el| el.is_alphanumeric())) &&
129 | // Check whether all characters of the keep word and the slice through the text
130 | // are identical.
131 | text[idx - disp..=*idx]
132 | .iter()
133 | // Convert the text we compare with to lower case, but only those parts
134 | // that we actually do compare with. The conversion is somewhat annoying
135 | // and complicated because a single upper-case character might map to
136 | // multiple lower-case ones when converted (not sure why that would be so).
137 | .flat_map(|el| {
138 | if self.keep_words_preserve_case {
139 | vec![*el]
140 | } else {
141 | el.to_lowercase().collect::>()
142 | }
143 | })
144 | // The strings self.data is already in lower case if desired. No conversion
145 | // needed here.
146 | .zip(el.chars())
147 | .all(|(ch1, ch2)| ch1 == ch2)
148 | })
149 | } else {
150 | false
151 | }
152 | }
153 |
154 | /// Checks whether ch is an end marker and whether the surrounding characters indicate that ch
155 | /// is actually at the end of a sentence.
156 | pub fn is_breaking_marker(&self, ch: &char, next: Option<&char>) -> bool {
157 | // The current character has to be an end marker. If it is not, it does not end a sentence.
158 | self.end_markers.contains(*ch)
159 | // The next character must be whitespace. If it is not, this character is in the middle
160 | // of a word and, thus, not at the end of a sentence.
161 | && is_whitespace(next, &self.whitespace)
162 | }
163 | }
164 |
165 | // Some helper functions that make it easier to work with Option<&char> follow.
166 |
167 | fn is_whitespace(ch: Option<&char>, detector: &WhitespaceDetector) -> bool {
168 | ch.map(|el| detector.is_whitespace(el)).unwrap_or(false)
169 | }
170 |
171 | #[cfg(test)]
172 | mod test {
173 | use super::*;
174 |
175 | const TEXT_FOR_TESTS: &str = "Lorem iPsum doLor SiT aMeT. ConSectEtur adIpiSciNg ELiT.";
176 | const CFG_FOR_TESTS: &BreakCfg = &BreakCfg {
177 | keep_linebreaks: false,
178 | };
179 |
180 | #[test]
181 | fn case_insensitive_match() {
182 | let detector = BreakDetector::new("ipsum sit adipiscing", "", false, "", CFG_FOR_TESTS);
183 | let text = TEXT_FOR_TESTS.chars().collect::>();
184 |
185 | let found = (0..text.len())
186 | .filter(|el| detector.ends_with_keep_word(&text, el))
187 | .collect::>();
188 |
189 | assert_eq!(found, vec![10, 20, 49]);
190 | }
191 |
192 | #[test]
193 | fn case_sensitive_match() {
194 | let detector = BreakDetector::new("ipsum SiT adipiscing", "", true, "", CFG_FOR_TESTS);
195 | let text = TEXT_FOR_TESTS.chars().collect::>();
196 |
197 | let found = (0..text.len())
198 | .filter(|el| detector.ends_with_keep_word(&text, el))
199 | .collect::>();
200 |
201 | assert_eq!(found, vec![20]);
202 | }
203 |
204 | #[test]
205 | fn matches_at_start_and_end() {
206 | let detector = BreakDetector::new("lorem elit.", "", false, "", CFG_FOR_TESTS);
207 | let text = TEXT_FOR_TESTS.chars().collect::>();
208 |
209 | // Try to search outside the text's range, which will never match.
210 | let found = (0..text.len() + 5)
211 | .filter(|el| detector.ends_with_keep_word(&text, el))
212 | .collect::>();
213 |
214 | assert_eq!(found, vec![4, 55]);
215 | }
216 |
217 | #[test]
218 | fn ignoring_words_case_sensitively() {
219 | let detector = BreakDetector::new("ipsum SiT adipiscing", "SiT", true, "", CFG_FOR_TESTS);
220 | let text = TEXT_FOR_TESTS.chars().collect::>();
221 |
222 | let found = (0..text.len())
223 | .filter(|el| detector.ends_with_keep_word(&text, el))
224 | .collect::>();
225 |
226 | assert_eq!(found, vec![]);
227 | }
228 |
229 | #[test]
230 | fn ignoring_words_case_insensitively() {
231 | let detector = BreakDetector::new("ipsum sit adipiscing", "sit", false, "", CFG_FOR_TESTS);
232 | let text = TEXT_FOR_TESTS.chars().collect::>();
233 |
234 | let found = (0..text.len())
235 | .filter(|el| detector.ends_with_keep_word(&text, el))
236 | .collect::>();
237 |
238 | assert_eq!(found, vec![10, 49]);
239 | }
240 |
241 | #[test]
242 | fn ingores_that_are_no_suppressions_are_ignored() {
243 | let detector = BreakDetector::new(
244 | "ipsum sit adipiscing",
245 | "sit asdf blub muhaha",
246 | false,
247 | "",
248 | CFG_FOR_TESTS,
249 | );
250 | let text = TEXT_FOR_TESTS.chars().collect::>();
251 |
252 | let found = (0..text.len())
253 | .filter(|el| detector.ends_with_keep_word(&text, el))
254 | .collect::>();
255 |
256 | assert_eq!(found, vec![10, 49]);
257 | }
258 | }
259 |
--------------------------------------------------------------------------------
/src/call.rs:
--------------------------------------------------------------------------------
1 | /* An opinionated line wrapper for markdown files.
2 | Copyright (C) 2023 Torsten Long
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 |
18 | use std::collections::VecDeque;
19 | use std::fmt;
20 | use std::io::Write;
21 | use std::path::Path;
22 | use std::path::PathBuf;
23 | use std::process::{Command, Stdio};
24 | use std::sync::Mutex;
25 |
26 | use anyhow::{Context, Error, Result};
27 |
28 | use crate::trace_log;
29 |
30 | pub struct Upstream {
31 | cmd: String,
32 | args: Vec,
33 | }
34 |
35 | impl Upstream {
36 | pub fn from_cfg(command: &str, args: &str, sep: &str) -> Result {
37 | let mut split_args = if sep.is_empty() {
38 | args.split_whitespace()
39 | .map(String::from)
40 | .collect::>()
41 | } else {
42 | args.split(sep).map(String::from).collect::>()
43 | };
44 | let cmd = if !command.is_empty() {
45 | command.to_string()
46 | } else {
47 | split_args
48 | .pop_front()
49 | .ok_or_else(|| {
50 | Error::msg(format!(
51 | "Failed to extract upstream command from arguments '{}'.",
52 | args
53 | ))
54 | })?
55 | .to_string()
56 | };
57 | let result = Self {
58 | cmd,
59 | args: split_args.into_iter().collect::>(),
60 | };
61 | log::debug!("using upstream formatter {}", result);
62 | Ok(result)
63 | }
64 | }
65 |
66 | impl fmt::Display for Upstream {
67 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
68 | write!(f, "'{}' '{}'", self.cmd, self.args.join("' '"))
69 | }
70 | }
71 |
72 | pub fn upstream_formatter(
73 | upstream: &Upstream,
74 | file_content: String,
75 | workdir: &Path,
76 | ) -> Result {
77 | let fallback_workdir = PathBuf::from(".");
78 | let workdir = if workdir.components().count() == 0 {
79 | &fallback_workdir
80 | } else {
81 | workdir
82 | };
83 | log::debug!(
84 | "running upstream executable in directory: {}",
85 | workdir.to_string_lossy()
86 | );
87 |
88 | let mut process = Command::new(&upstream.cmd)
89 | .args(&upstream.args)
90 | .stdin(Stdio::piped())
91 | .stdout(Stdio::piped())
92 | .stderr(Stdio::piped())
93 | .current_dir(workdir)
94 | .spawn()
95 | .context("failed to spawn upstream auto-formatter")?;
96 |
97 | let mut stdin = process
98 | .stdin
99 | .take()
100 | .context("failed to acquire stdin of upstream auto-formatter")?;
101 |
102 | // Write to stdin in a separate thread. Is there really is no other way to do that? Calling
103 | // "expect" here is not a problem because, if the process panics, we receive an error.
104 | std::thread::spawn(move || {
105 | stdin
106 | .write_all(file_content.as_bytes())
107 | .expect("failed to write stdin to upstream auto-formatter")
108 | });
109 |
110 | let output = process
111 | .wait_with_output()
112 | .context("failed to wait for output of upstream auto-formatter")?;
113 |
114 | let stdout = String::from_utf8_lossy(&output.stdout);
115 | let stderr = String::from_utf8_lossy(&output.stderr);
116 |
117 | if output.status.success() {
118 | Ok(stdout.to_string())
119 | } else {
120 | Err(Error::msg(format!(
121 | "failed to read stdout of upstream auto-formatter \"{}\". Stderr follows: \n\n{}",
122 | upstream, stderr,
123 | )))
124 | }
125 | }
126 |
127 | pub struct Pager {
128 | stdin: Option,
129 | process: std::process::Child,
130 | }
131 |
132 | impl Pager {
133 | pub fn send(&mut self, s: &str) -> Result<()> {
134 | log::debug!("sending {} bytes to downstream pager's stdin", s.len());
135 | trace_log!("message sent to downstream pager: {}", s);
136 | if let Some(ref mut stdin) = self.stdin {
137 | stdin
138 | .write_all(s.as_bytes())
139 | .context("failed to send text to pager's stdin")
140 | } else {
141 | unreachable!("cannot send to closed stdin of downstream pager");
142 | }
143 | }
144 | }
145 |
146 | impl Drop for Pager {
147 | fn drop(&mut self) {
148 | {
149 | log::debug!("closing stdin of downstream pager");
150 | // Have pager's stdin go out of scope before waiting for the pager
151 | // process. This should not be needed according to the docs of
152 | // "wait", because supposedly that stdin is closed before waiting to
153 | // prevent deadlocks, but it seems to be needed, because there is a
154 | // deadlock without this.
155 | let _ = self.stdin.take();
156 | }
157 | self.process
158 | .wait()
159 | .expect("failed to wait for pager to finish");
160 | }
161 | }
162 |
163 | /// If to_null is set, the output of this pager will be directed to /dev/null.
164 | /// That is used solely for testing.
165 | fn downstream_pager(pager: &str, workdir: std::path::PathBuf, to_null: bool) -> Result {
166 | let split_pager = pager.split_whitespace().collect::>();
167 |
168 | // Interpret an empty directory as the current directory.
169 | let pager_workdir = if workdir.components().count() == 0 {
170 | ".".into()
171 | } else {
172 | workdir
173 | };
174 | log::debug!(
175 | "running downstream pager in directory: {}",
176 | pager_workdir.to_string_lossy()
177 | );
178 |
179 | let cmd = split_pager
180 | .first()
181 | .ok_or(Error::msg("must specify a pager command"))
182 | .context("failed to determine downstream pager command")?;
183 | log::debug!("using pager executable {}", cmd);
184 |
185 | let args = split_pager[1..].to_owned();
186 | log::debug!("using pager arguments {:?}", args);
187 |
188 | let mut process_cfg = Command::new(cmd);
189 | process_cfg
190 | .args(&args)
191 | .stdin(Stdio::piped())
192 | .current_dir(pager_workdir);
193 | if to_null {
194 | process_cfg.stdout(Stdio::null());
195 | }
196 | let mut process = process_cfg
197 | .spawn()
198 | .context("failed to spawn downstream pager")?;
199 |
200 | let stdin = process
201 | .stdin
202 | .take()
203 | .context("failed to acquire stdin of the downstream pager")?;
204 |
205 | Ok(Pager {
206 | stdin: Some(stdin),
207 | process,
208 | })
209 | }
210 |
211 | /// A helper to ensure that text written to stdout is not mangled due to parallelisation.
212 | pub enum ParallelPrinter {
213 | // First bool indicates whether there had been a failure writing to the pager.
214 | Paged(Mutex<(bool, Pager)>),
215 | Direct(Mutex<()>),
216 | }
217 |
218 | impl ParallelPrinter {
219 | pub fn new(pager: &Option) -> Result {
220 | if let Some(pager) = pager {
221 | if !pager.is_empty() {
222 | let downstream = downstream_pager(pager, PathBuf::from("."), false)?;
223 | Ok(Self::Paged(Mutex::new((false, downstream))))
224 | } else {
225 | Ok(Self::Direct(Mutex::new(())))
226 | }
227 | } else {
228 | Ok(Self::Direct(Mutex::new(())))
229 | }
230 | }
231 |
232 | pub fn println(&self, text: &str) {
233 | match self {
234 | Self::Paged(mutex) => {
235 | let mut result = mutex
236 | .lock()
237 | .expect("failed to lock mutex due to previous panic");
238 | // We do not retry sending to the pager in case sending failed once.
239 | if !result.0 {
240 | if let Err(err) = result.1.send(text) {
241 | log::error!("{:?}", err);
242 | result.0 = true;
243 | }
244 | }
245 | }
246 | Self::Direct(mutex) => {
247 | // Assigning to keep the lock. The lock is lifted once the binding is dropped.
248 | let _lock = mutex
249 | .lock()
250 | .expect("failed to lock mutex due to previous panic");
251 | println!("{}", text);
252 | }
253 | }
254 | }
255 | }
256 |
257 | #[cfg(test)]
258 | mod test {
259 | use super::*;
260 |
261 | #[test]
262 | fn can_call_simple_executable_with_stdio_handling() -> Result<()> {
263 | let input = String::from("some text");
264 | let piped = upstream_formatter(
265 | &Upstream::from_cfg("", "cat", " ")?,
266 | input.clone(),
267 | &PathBuf::from("."),
268 | )
269 | .unwrap();
270 | assert_eq!(input, piped);
271 | Ok(())
272 | }
273 |
274 | #[test]
275 | fn can_call_with_args() -> Result<()> {
276 | let piped = upstream_formatter(
277 | &Upstream::from_cfg("echo", "some text", "")?,
278 | String::new(),
279 | &PathBuf::from("."),
280 | )
281 | .unwrap();
282 | assert_eq!("some text\n", piped);
283 | Ok(())
284 | }
285 |
286 | #[test]
287 | fn need_to_provide_command() -> Result<()> {
288 | let result = upstream_formatter(
289 | &Upstream::from_cfg("", "", " ")?,
290 | String::new(),
291 | &PathBuf::from("."),
292 | );
293 | assert!(result.is_err());
294 | Ok(())
295 | }
296 |
297 | #[test]
298 | fn unknown_executable_fails() -> Result<()> {
299 | let result = upstream_formatter(
300 | &Upstream::from_cfg("", "executable-unknown-asdf", " ")?,
301 | String::new(),
302 | &PathBuf::from("."),
303 | );
304 | assert!(result.is_err());
305 | Ok(())
306 | }
307 |
308 | #[test]
309 | fn can_call_pager_with_args() -> Result<()> {
310 | let mut pager = downstream_pager(&String::from("cat -"), ".".into(), true)?;
311 | pager.send("some text")?;
312 | Ok(())
313 | }
314 |
315 | #[test]
316 | fn need_to_provide_pager_command() {
317 | let result = downstream_pager("", ".".into(), true);
318 | assert!(result.is_err());
319 | }
320 |
321 | #[test]
322 | fn unknown_pager_executable_fails() {
323 | let result = downstream_pager(&String::from("executable-unknown-asdf"), ".".into(), true);
324 | assert!(result.is_err());
325 | }
326 | }
327 |
--------------------------------------------------------------------------------
/src/fs.rs:
--------------------------------------------------------------------------------
1 | /* An opinionated line wrapper for markdown files.
2 | Copyright (C) 2023 Torsten Long
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 |
18 | use std::collections::HashSet;
19 | use std::path::{Path, PathBuf};
20 |
21 | use anyhow::{Context, Error, Result};
22 | use ignore::Walk;
23 |
24 | pub fn find_files_with_extension(paths: &[PathBuf], extension: &str) -> Result> {
25 | let mut errors = vec![];
26 |
27 | let found = paths
28 | .iter()
29 | .filter_map(|top_level_path| {
30 | if top_level_path.is_file() {
31 | log::debug!("found file on disk: {}", top_level_path.to_string_lossy());
32 | Some(vec![top_level_path.clone()])
33 | } else if top_level_path.is_dir() {
34 | log::debug!(
35 | "crawling directory on disk: {}",
36 | top_level_path.to_string_lossy()
37 | );
38 | Some(
39 | // Recursively extract all files with the given extension.
40 | Walk::new(top_level_path)
41 | .filter_map(|path_entry| match path_entry {
42 | Ok(path) => Some(path),
43 | Err(err) => {
44 | let path = top_level_path.to_string_lossy();
45 | log::error!("failed to crawl {}: {}", path, err);
46 | None
47 | }
48 | })
49 | .filter_map(|el| match el.path().canonicalize() {
50 | Ok(path) => Some(path),
51 | Err(err) => {
52 | let path = el.path().to_string_lossy();
53 | if el.path_is_symlink() {
54 | log::error!("ignoring broken symlink: {}: {}", err, path);
55 | } else {
56 | log::error!("ignoring inaccessible path: {}: {}", err, path);
57 | }
58 | None
59 | }
60 | })
61 | // Only keep actual markdown files and symlinks to them.
62 | .filter(|el| el.is_file() && el.to_string_lossy().ends_with(extension))
63 | .map(strip_cwd_if_possible)
64 | .inspect(|el| {
65 | log::debug!("discovered file on disk: {}", el.to_string_lossy());
66 | })
67 | .collect::>(),
68 | )
69 | } else {
70 | errors.push(top_level_path.to_string_lossy().to_string());
71 | None
72 | }
73 | })
74 | .flatten()
75 | .collect::>();
76 |
77 | if errors.is_empty() {
78 | log::debug!(
79 | "discovered {} files with extension {}",
80 | found.len(),
81 | extension
82 | );
83 | Ok(found)
84 | } else {
85 | Err(Error::msg(format!(
86 | "failed to find paths: '{}'",
87 | errors.join("' '")
88 | )))
89 | }
90 | }
91 |
92 | pub fn read_stdin() -> String {
93 | std::io::stdin()
94 | .lines()
95 | // Interrupt as soon as one line could not be read.
96 | .map_while(Result::ok)
97 | .collect::>()
98 | .join("\n")
99 | }
100 |
101 | pub fn get_file_content_and_dir(path: &Path) -> Result<(String, PathBuf)> {
102 | let text = std::fs::read_to_string(path).context("failed to read file")?;
103 | let dir = path
104 | .parent()
105 | .map(|el| el.to_path_buf())
106 | .ok_or(Error::msg("failed to determine parent directory"))?;
107 |
108 | Ok((text, dir))
109 | }
110 |
111 | fn strip_cwd_if_possible(path: PathBuf) -> PathBuf {
112 | std::env::current_dir()
113 | .map(|cwd| path.strip_prefix(cwd).unwrap_or(&path))
114 | .unwrap_or(&path)
115 | .to_path_buf()
116 | }
117 |
118 | // For convenience, this can also take paths to existing files and scans upwards, starting in
119 | // their directories. Since we want to avoid scanning the same directories over and over again,
120 | // we also use a cache to remember paths that we have already scanned. We abort scanning upwards
121 | // as soon as we find that we have already scanned a path.
122 | pub fn find_files_upwards(
123 | dir: &Path,
124 | file_name: &str,
125 | cache: &mut Option>,
126 | ) -> Vec {
127 | let mut result = vec![];
128 | log::debug!(
129 | "finding {} upwards from {}",
130 | file_name,
131 | dir.to_string_lossy()
132 | );
133 | for dir in UpwardsDirsIterator::new(dir) {
134 | if cache.as_ref().map(|el| el.contains(&dir)).unwrap_or(false) {
135 | log::debug!("early stop of upwards search at {}", dir.to_string_lossy());
136 | break;
137 | } else {
138 | let maybe_file = dir.join(file_name);
139 | if maybe_file.is_file() {
140 | log::debug!(
141 | "found file in upwards search: {}",
142 | maybe_file.to_string_lossy()
143 | );
144 | result.push(maybe_file);
145 | }
146 | cache.as_mut().map(|el| el.insert(dir));
147 | }
148 | }
149 | log::debug!("found {} files in upwards search", result.len());
150 | result
151 | }
152 |
153 | #[derive(Debug)]
154 | pub struct UpwardsDirsIterator(Option);
155 |
156 | impl UpwardsDirsIterator {
157 | pub fn new(dir_or_file: &Path) -> Self {
158 | match dir_or_file.canonicalize() {
159 | Ok(path) => {
160 | if path.is_file() {
161 | Self(path.parent().map(|el| el.to_path_buf()))
162 | } else {
163 | Self(Some(path.to_owned()))
164 | }
165 | }
166 | Err(_) => Self(None),
167 | }
168 | }
169 | }
170 |
171 | impl Iterator for UpwardsDirsIterator {
172 | type Item = PathBuf;
173 |
174 | fn next(&mut self) -> Option {
175 | let result = self.0.clone();
176 | if let Some(ref mut base) = self.0 {
177 | if !base.pop() {
178 | self.0 = None;
179 | }
180 | }
181 | result
182 | }
183 | }
184 |
185 | #[cfg(test)]
186 | mod test {
187 | use super::*;
188 |
189 | // Actual tests follow.
190 | #[test]
191 | fn listing_non_existent_fails() {
192 | let is_err = find_files_with_extension(&["i do not exist".into()], ".md").is_err();
193 | assert!(is_err);
194 | }
195 |
196 | // A struct that will automatically create and delete a temporary directory and that can create
197 | // arbitrary temporary files underneath it, including their parent dirs.
198 | struct TempDir(tempfile::TempDir);
199 |
200 | impl TempDir {
201 | fn new() -> Result {
202 | Ok(Self(tempfile::TempDir::new()?))
203 | }
204 |
205 | fn new_file_in_dir(&self, path: PathBuf) -> Result {
206 | let mut result = PathBuf::from(self.0.path());
207 |
208 | // Create directory containing file.
209 | if let Some(parent) = path.parent() {
210 | result.extend(parent);
211 | std::fs::create_dir_all(&result)?;
212 | }
213 |
214 | if let Some(file_name) = path.file_name() {
215 | result.push(file_name);
216 | std::fs::File::create(&result)?;
217 | Ok(result)
218 | } else {
219 | Err(Error::msg("no file given"))
220 | }
221 | }
222 |
223 | fn new_file_in_dir_with_content(&self, path: PathBuf, content: &str) -> Result {
224 | let path = self.new_file_in_dir(path)?;
225 | std::fs::write(&path, content.as_bytes())?;
226 | Ok(path)
227 | }
228 |
229 | /// Remove the temporary directory from the prefix.
230 | fn strip(&self, path: PathBuf) -> PathBuf {
231 | path.as_path()
232 | .strip_prefix(self.0.path())
233 | .unwrap_or(&path)
234 | .to_path_buf()
235 | }
236 | }
237 |
238 | #[test]
239 | fn finding_all_md_files_in_repo() -> Result<()> {
240 | let tmp = TempDir::new()?;
241 | // Create some directory tree that will then be searched.
242 | tmp.new_file_in_dir("f_1.md".into())?;
243 | tmp.new_file_in_dir("no_md_1.ext".into())?;
244 | tmp.new_file_in_dir("no_md_2.ext".into())?;
245 | tmp.new_file_in_dir("dir/f_2.md".into())?;
246 | tmp.new_file_in_dir("dir/no_md_1.ext".into())?;
247 | tmp.new_file_in_dir("other_dir/f_3.md".into())?;
248 | tmp.new_file_in_dir("other_dir/no_md_1.ext".into())?;
249 |
250 | let ext_found = find_files_with_extension(&[tmp.0.path().into()], ".ext")?;
251 | assert_eq!(ext_found.len(), 4);
252 |
253 | let found = find_files_with_extension(&[tmp.0.path().into()], ".md")?;
254 | assert_eq!(found.len(), 3);
255 |
256 | Ok(())
257 | }
258 |
259 | #[test]
260 | fn auto_ignoring_files() -> Result<()> {
261 | let tmp = TempDir::new()?;
262 | // Create some directory tree that will then be searched.
263 | tmp.new_file_in_dir("f.md".into())?;
264 | tmp.new_file_in_dir("file.md".into())?;
265 | tmp.new_file_in_dir("stuff.md".into())?;
266 | tmp.new_file_in_dir("dir/f.md".into())?;
267 | tmp.new_file_in_dir("dir/file.md".into())?;
268 | tmp.new_file_in_dir("dir/stuff.md".into())?;
269 | tmp.new_file_in_dir("dir/fstuff.md".into())?;
270 | tmp.new_file_in_dir("other_dir/f.md".into())?;
271 | tmp.new_file_in_dir("other_dir/file.md".into())?;
272 | tmp.new_file_in_dir("other_dir/stuff.md".into())?;
273 | tmp.new_file_in_dir("other_dir/fstuff.md".into())?;
274 |
275 | tmp.new_file_in_dir_with_content(".ignore".into(), "stuff.md\n")?;
276 | tmp.new_file_in_dir_with_content("dir/.ignore".into(), "file.md\n")?;
277 | tmp.new_file_in_dir_with_content("other_dir/.ignore".into(), "f*.md\n")?;
278 |
279 | let found = find_files_with_extension(&[tmp.0.path().into()], ".md")?
280 | .into_iter()
281 | .map(|el| tmp.strip(el))
282 | .map(|el| el.to_string_lossy().to_string())
283 | .collect::>();
284 |
285 | let expected = vec!["file.md", "f.md", "dir/fstuff.md", "dir/f.md"]
286 | .into_iter()
287 | .map(|el| el.to_string())
288 | .collect::>();
289 |
290 | assert_eq!(found, expected);
291 |
292 | Ok(())
293 | }
294 |
295 | #[test]
296 | fn finding_files_upwards() -> Result<()> {
297 | let tmp = TempDir::new()?;
298 | // Create some directory tree that will then be searched.
299 | tmp.new_file_in_dir("find_me".into())?;
300 | tmp.new_file_in_dir("do_not_find_me".into())?;
301 | tmp.new_file_in_dir("other_dir/find_me".into())?;
302 | tmp.new_file_in_dir("other_dir/do_not_find_me".into())?;
303 | tmp.new_file_in_dir("dir/subdir/find_me".into())?;
304 | let start = tmp.new_file_in_dir("dir/subdir/do_not_find_me".into())?;
305 | tmp.new_file_in_dir("dir/subdir/one_more_layer/find_me".into())?;
306 | tmp.new_file_in_dir("dir/subdir/one_more_layer/do_not_find_me".into())?;
307 |
308 | let found = find_files_upwards(&start, "find_me", &mut None)
309 | .into_iter()
310 | .map(|el| tmp.strip(el))
311 | .map(|el| el.to_string_lossy().to_string())
312 | .collect::>();
313 |
314 | let expected = vec!["dir/subdir/find_me", "find_me"];
315 |
316 | assert_eq!(found, expected);
317 |
318 | Ok(())
319 | }
320 |
321 | #[test]
322 | fn iterating_dirs_upwards() -> Result<()> {
323 | let tmp = TempDir::new()?;
324 | let start = tmp.new_file_in_dir("dir/subdir/do_not_find_me".into())?;
325 |
326 | let dirs = UpwardsDirsIterator::new(&start)
327 | .map(|el| tmp.strip(el))
328 | .map(|el| el.to_string_lossy().to_string())
329 | .collect::>();
330 |
331 | assert_eq!(start.components().count() - 1, dirs.len(), "{:?}", dirs);
332 | let dirs = dirs
333 | .into_iter()
334 | .filter(|el| !el.starts_with("/") && !el.is_empty())
335 | .collect::>();
336 |
337 | let expected = vec!["dir/subdir", "dir"];
338 |
339 | assert_eq!(dirs, expected);
340 |
341 | Ok(())
342 | }
343 | }
344 |
--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
1 | /* An opinionated line wrapper for markdown files.
2 | Copyright (C) 2023 Torsten Long
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 |
18 | // Imports.
19 | mod call;
20 | mod cfg;
21 | mod detect;
22 | mod diff;
23 | mod features;
24 | mod frontmatter;
25 | mod fs;
26 | mod ignore;
27 | mod indent;
28 | mod lang;
29 | mod linebreak;
30 | mod logging;
31 | mod parse;
32 | mod ranges;
33 | mod replace;
34 | mod wrap;
35 |
36 | use std::collections::{HashMap, HashSet};
37 | use std::io;
38 | use std::path::{Path, PathBuf};
39 |
40 | use anyhow::{Context, Error, Result};
41 | use clap::{CommandFactory, Parser};
42 | use clap_complete::generate;
43 | use rayon::prelude::*;
44 |
45 | const CONFIG_FILE: &str = ".mdslw.toml";
46 |
47 | fn generate_report(
48 | mode: &cfg::ReportMode,
49 | new: &str,
50 | org: &str,
51 | filename: &Path,
52 | ) -> Option {
53 | match mode {
54 | cfg::ReportMode::None => None,
55 | cfg::ReportMode::Changed => {
56 | if new != org {
57 | Some(format!("{}", filename.to_string_lossy()))
58 | } else {
59 | None
60 | }
61 | }
62 | cfg::ReportMode::State => {
63 | let ch = if new == org { 'U' } else { 'C' };
64 | Some(format!("{}:{}", ch, filename.to_string_lossy()))
65 | }
66 | cfg::ReportMode::DiffMyers => Some(diff::Algo::Myers.generate(new, org, filename)),
67 | cfg::ReportMode::DiffPatience => Some(diff::Algo::Patience.generate(new, org, filename)),
68 | cfg::ReportMode::DiffLcs => Some(diff::Algo::Lcs.generate(new, org, filename)),
69 | }
70 | }
71 |
72 | struct Processor {
73 | feature_cfg: features::FeatureCfg,
74 | detector: detect::BreakDetector,
75 | max_width: Option,
76 | }
77 |
78 | impl Processor {
79 | fn process(&self, text: String, width_reduction: usize) -> String {
80 | // First, process the actual text.
81 | let ends_on_linebreak = text.ends_with('\n');
82 | let text = if self.feature_cfg.keep_spaces_in_links {
83 | log::debug!("not replacing spaces in links by non-breaking spaces");
84 | text
85 | } else {
86 | log::debug!("replacing spaces in links by non-breaking spaces");
87 | replace::replace_spaces_in_links_by_nbsp(text)
88 | };
89 | let text = if self.feature_cfg.outsource_inline_links {
90 | log::debug!("outsourcing inline links");
91 | replace::outsource_inline_links(
92 | text,
93 | &self.feature_cfg.collate_link_defs,
94 | &self.detector.whitespace,
95 | )
96 | } else {
97 | log::debug!("not outsourcing inline links");
98 | text
99 | };
100 | let text = if self.feature_cfg.collate_link_defs {
101 | log::debug!("collating links at the end of the document");
102 | replace::collate_link_defs_at_end(text, &self.detector.whitespace)
103 | } else {
104 | log::debug!("not collating links at the end of the document");
105 | text
106 | };
107 | let parsed = parse::parse_markdown(&text, &self.feature_cfg.parse_cfg);
108 | let filled = ranges::fill_markdown_ranges(parsed, &text);
109 | let width = &self
110 | .max_width
111 | .map(|el| el.checked_sub(width_reduction).unwrap_or(el));
112 | let formatted = wrap::add_linebreaks_and_wrap(filled, width, &self.detector, &text);
113 |
114 | // Keep newlines at the end of the file in tact. They disappear sometimes.
115 | let file_end = if !formatted.ends_with('\n') && ends_on_linebreak {
116 | log::debug!("adding missing trailing newline character");
117 | "\n"
118 | } else {
119 | ""
120 | };
121 | let text = format!("{}{}", formatted, file_end);
122 |
123 | // At last, process all block quotes.
124 | if self.feature_cfg.format_block_quotes {
125 | log::debug!("formatting text in block quotes");
126 | parse::BlockQuotes::new(&text)
127 | .apply_to_matches_and_join(|t, indent| self.process(t, indent + width_reduction))
128 | } else {
129 | log::debug!("not formatting text in block quotes");
130 | text
131 | }
132 | }
133 | }
134 |
135 | fn process(document: String, file_dir: &Path, cfg: &cfg::PerFileCfg) -> Result<(String, String)> {
136 | // Prepare user-configured options. These could be outsourced if we didn't intend to allow
137 | // per-file configurations.
138 | let lang_keep_words = lang::keep_word_list(&cfg.lang).context("cannot load keep words")?;
139 | let feature_cfg = cfg
140 | .features
141 | .parse::()
142 | .context("cannot parse selected features")?;
143 | let detector = detect::BreakDetector::new(
144 | &(lang_keep_words + &cfg.suppressions),
145 | &cfg.ignores,
146 | cfg.case == cfg::Case::Keep,
147 | &cfg.end_markers,
148 | &feature_cfg.break_cfg,
149 | );
150 | let max_width = if cfg.max_width == 0 {
151 | log::debug!("not limiting line length");
152 | None
153 | } else {
154 | log::debug!("limiting line length to {} characters", cfg.max_width);
155 | Some(cfg.max_width)
156 | };
157 | let processor = Processor {
158 | feature_cfg,
159 | detector,
160 | max_width,
161 | };
162 |
163 | // Actually process the text.
164 | let frontmatter = frontmatter::extract_frontmatter(&document);
165 | let text = document[frontmatter.len()..].to_string();
166 |
167 | let after_upstream = if let Ok(upstream) = call::Upstream::from_cfg(
168 | &cfg.upstream_command,
169 | &cfg.upstream,
170 | &cfg.upstream_separator,
171 | ) {
172 | log::debug!("calling upstream formatter: {}", cfg.upstream);
173 | call::upstream_formatter(&upstream, text, file_dir)?
174 | } else {
175 | log::debug!("not calling any upstream formatter");
176 | text
177 | };
178 |
179 | let processed = format!("{}{}", frontmatter, processor.process(after_upstream, 0));
180 | Ok((processed, document))
181 | }
182 |
183 | fn process_stdin(mode: &cfg::OpMode, build_cfg: F, file_path: &PathBuf) -> Result
184 | where
185 | F: Fn(&str, &PathBuf) -> Result,
186 | {
187 | log::debug!("processing content from stdin and writing to stdout");
188 | let text = fs::read_stdin();
189 |
190 | let config = build_cfg(&text, file_path).context("failed to build complete config")?;
191 |
192 | let file_dir = file_path
193 | .parent()
194 | .map(|el| el.to_path_buf())
195 | .unwrap_or(PathBuf::from("."));
196 | let (processed, text) = process(text, file_dir.as_path(), &config)?;
197 |
198 | // Decide what to output.
199 | match mode {
200 | cfg::OpMode::Format | cfg::OpMode::Both => {
201 | log::debug!("writing modified file to stdout");
202 | print!("{}", processed);
203 | }
204 | cfg::OpMode::Check => {
205 | log::debug!("writing original file to stdout in check mode");
206 | print!("{}", text);
207 | }
208 | }
209 |
210 | Ok(processed == text)
211 | }
212 |
213 | fn process_file(mode: &cfg::OpMode, path: &PathBuf, build_cfg: F) -> Result<(String, String)>
214 | where
215 | F: Fn(&str, &PathBuf) -> Result,
216 | {
217 | let report_path = path.to_string_lossy();
218 | log::debug!("processing {}", report_path);
219 |
220 | let (text, file_dir) = fs::get_file_content_and_dir(path)?;
221 | let config = build_cfg(&text, path).context("failed to build complete config")?;
222 | let (processed, text) = process(text, &file_dir, &config)?;
223 |
224 | // Decide whether to overwrite existing files.
225 | match mode {
226 | cfg::OpMode::Format | cfg::OpMode::Both => {
227 | if processed == text {
228 | log::debug!("keeping OK file {}", report_path);
229 | } else {
230 | log::debug!("modifying NOK file {} in place", report_path);
231 | std::fs::write(path, processed.as_bytes()).context("failed to write file")?;
232 | }
233 | }
234 | // Do not write anything in check mode.
235 | cfg::OpMode::Check => {
236 | log::debug!("not modifying file {} in check mode", report_path);
237 | }
238 | }
239 |
240 | Ok((processed, text))
241 | }
242 |
243 | fn read_config_file(path: &Path) -> Option<(PathBuf, cfg::CfgFile)> {
244 | let result = std::fs::read_to_string(path)
245 | .context("failed to read file")
246 | .and_then(|el| {
247 | toml::from_str::(&el).context("that failed to parse due to error:")
248 | });
249 |
250 | match result {
251 | Ok(cfg) => {
252 | log::debug!("parsed config file {}", path.to_string_lossy());
253 | Some((path.to_path_buf(), cfg))
254 | }
255 | Err(err) => {
256 | log::error!("ignoring config file {} {:?}", path.to_string_lossy(), err);
257 | None
258 | }
259 | }
260 | }
261 |
262 | fn build_document_specific_config(
263 | document: &str,
264 | document_path: &Path,
265 | cli: &cfg::CliArgs,
266 | configs: &Vec<(PathBuf, cfg::CfgFile)>,
267 | ) -> Result {
268 | let config_from_frontmatter = toml::from_str::(
269 | &parse::get_value_for_mdslw_toml_yaml_key(&frontmatter::extract_frontmatter(document)),
270 | )
271 | .with_context(|| {
272 | format!(
273 | "failed to parse frontmatter entry as toml config:\n{}",
274 | document
275 | )
276 | })?;
277 | let config_tuple = [(document_path.to_path_buf(), config_from_frontmatter)];
278 | Ok(cfg::merge_configs(cli, config_tuple.iter().chain(configs)))
279 | }
280 |
281 | fn print_config_file() -> Result<()> {
282 | toml::to_string(&cfg::CfgFile::default())
283 | .context("converting to toml format")
284 | .map(|cfg| println!("{}", cfg))
285 | }
286 |
287 | fn main() -> Result<()> {
288 | // Perform actions that cannot be changed on a per-file level.
289 | // Argument parsing.
290 | let cli = cfg::CliArgs::parse();
291 | // Initialising logging.
292 | logging::init_logging(cli.verbose)?;
293 | // Generation of shell completion.
294 | if let Some(shell) = cli.completion {
295 | log::info!("generating shell completion for {}", shell);
296 | let mut cmd = cfg::CliArgs::command();
297 | let name = cmd.get_name().to_string();
298 | generate(shell, &mut cmd, name, &mut io::stdout());
299 | return Ok(());
300 | }
301 | // Generation of default config file.
302 | if cli.default_config {
303 | log::info!("writing default config file to stdout");
304 | return print_config_file();
305 | }
306 |
307 | // All other actions could technically be specified on a per-file level.
308 | let cwd = PathBuf::from(".");
309 | let unchanged = if cli.paths.is_empty() {
310 | let file_path = cli.stdin_filepath.clone().unwrap_or(PathBuf::from("STDIN"));
311 | let file_dir = file_path.parent().unwrap_or(cwd.as_path());
312 | let configs = fs::find_files_upwards(file_dir, CONFIG_FILE, &mut None)
313 | .into_iter()
314 | .filter_map(|el| read_config_file(&el))
315 | .collect::>();
316 | let build_document_config = |document: &str, file_path: &PathBuf| {
317 | build_document_specific_config(document, file_path, &cli, &configs)
318 | };
319 | process_stdin(&cli.mode, build_document_config, &file_path)
320 | } else {
321 | let md_files = fs::find_files_with_extension(&cli.paths, &cli.extension)
322 | .context("failed to discover markdown files")?;
323 | log::debug!("will process {} markdown file(s) from disk", md_files.len());
324 | let config_files = {
325 | // Define a temporary cache to avoid scanning the same directories again and again.
326 | let mut cache = Some(HashSet::new());
327 | md_files
328 | .iter()
329 | .flat_map(|el| fs::find_files_upwards(el, CONFIG_FILE, &mut cache))
330 | .filter_map(|el| read_config_file(&el))
331 | .collect::>()
332 | };
333 | log::debug!("loaded {} configs from disk", config_files.len());
334 |
335 | // Set number of threads depending on user's choice.
336 | if let Some(num_jobs) = cli.jobs {
337 | rayon::ThreadPoolBuilder::new()
338 | .num_threads(num_jobs)
339 | .build_global()
340 | .context("failed to initialise processing thread-pool")?;
341 | }
342 |
343 | // Enable pager only for diff output.
344 | let diff_pager = if cli.report.is_diff_mode() {
345 | &cli.diff_pager
346 | } else {
347 | log::debug!("disabling possibly set diff pager for non-diff report");
348 | &None
349 | };
350 | let par_printer = call::ParallelPrinter::new(diff_pager)?;
351 |
352 | // Process all MD files we found.
353 | md_files
354 | .par_iter()
355 | .map(|path| {
356 | log::info!("processing markdown file {}", path.to_string_lossy());
357 | let configs = fs::UpwardsDirsIterator::new(path)
358 | .filter_map(|el| {
359 | config_files
360 | .get(&el.join(CONFIG_FILE))
361 | .map(|cfg| (el, cfg.clone()))
362 | })
363 | .collect::>();
364 | let build_document_config = |document: &str, file_path: &PathBuf| {
365 | build_document_specific_config(document, file_path, &cli, &configs)
366 | };
367 | match process_file(&cli.mode, path, build_document_config) {
368 | Ok((processed, text)) => {
369 | if let Some(rep) = generate_report(&cli.report, &processed, &text, path) {
370 | par_printer.println(&rep);
371 | }
372 | Ok(processed == text)
373 | }
374 | Err(err) => {
375 | log::error!("failed to process {}: {:?}", path.to_string_lossy(), err);
376 | Err(Error::msg("there were errors processing at least one file"))
377 | }
378 | }
379 | })
380 | .reduce(
381 | || Ok(true),
382 | |a, b| match (a, b) {
383 | (Err(err), _) => Err(err),
384 | (_, Err(err)) => Err(err),
385 | (Ok(f1), Ok(f2)) => Ok(f1 && f2),
386 | },
387 | )
388 | };
389 |
390 | log::debug!("finished execution");
391 | // Process exit code.
392 | match unchanged {
393 | Ok(true) => Ok(()),
394 | Ok(false) => match cli.mode {
395 | cfg::OpMode::Format => Ok(()),
396 | cfg::OpMode::Check => Err(Error::msg("at least one processed file would be changed")),
397 | cfg::OpMode::Both => Err(Error::msg("at least one processed file changed")),
398 | },
399 | Err(err) => Err(err),
400 | }
401 | }
402 |
--------------------------------------------------------------------------------
/src/cfg.rs:
--------------------------------------------------------------------------------
1 | /* An opinionated line wrapper for markdown files.
2 | Copyright (C) 2023 Torsten Long
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 |
18 | use std::fmt;
19 | use std::path::PathBuf;
20 | use std::str::FromStr;
21 |
22 | use clap::{builder::OsStr, Parser, ValueEnum};
23 | use clap_complete::Shell;
24 | use serde::{Deserialize, Serialize};
25 |
26 | // Command-line interface definition.
27 |
28 | /// A generic value that knows its origin. That is, we use the "Default" variant when defining
29 | /// default values in the CliArgs struct but we always parse to the "Parsed" variant when parsing
30 | /// from a command line argument. That way, we can distinguish whether an option has been provided
31 | /// on the command line or was taken as a default.
32 | ///
33 | /// Note that default_value_t will perform a display-then-parse-again round trip, which means it
34 | /// actually does not matter whether we use the "Parsed" or the "Default" variant in the
35 | /// default_value_t bit. However, we explicitly add a zero-width space to the end of every default
36 | /// value to be able to determine whether teh value is a default. Note that that will result in
37 | /// unexpected behaviour if a user ever adds such a character to the end of an argument, but what
38 | /// can you do. It's either that, or replacing clap, or not having config file support. In my view,
39 | /// config file support is worth this work-around.
40 | #[derive(Clone, Debug)]
41 | pub enum ValueWOrigin {
42 | Default(T),
43 | Parsed(T),
44 | }
45 |
46 | impl ValueWOrigin {
47 | // All default values that can also come from config files will end in this character. It is the
48 | // UTF8 zero-width space. All terminals that I tested do not display that character, but it is
49 | // present in the internal default string. We append that character to every default value that
50 | // can also come from a config file. That way, we can actually determine whether a value is a
51 | // default or not. See the Implementation of FromStr for this struct.
52 | const ZWS: char = '\u{200b}';
53 | const ZWS_LEN: usize = Self::ZWS.len_utf8();
54 |
55 | /// Get the correct value with the following precedence:
56 | /// - If we contain a "Parsed", return the value contained in it. The user has specified that
57 | /// on the command line, which means it takes precedence.
58 | /// - If we contain a "Default" and the other value contains a "Some", return that.
59 | /// That means the user has not specified that option on the command line, but a config file
60 | /// contains it.
61 | /// - Otherwise, return the value in the "Default".
62 | /// In that case, neither has the user specified that option on the command line, nor is it
63 | /// contained in any config file.
64 | fn resolve(&self, other: Option) -> T
65 | where
66 | T: Clone,
67 | {
68 | match self {
69 | ValueWOrigin::Parsed(val) => val.clone(),
70 | ValueWOrigin::Default(val) => other.unwrap_or_else(|| val.clone()),
71 | }
72 | }
73 | }
74 |
75 | impl FromStr for ValueWOrigin
76 | where
77 | T: FromStr,
78 | {
79 | type Err = ::Err;
80 |
81 | fn from_str(s: &str) -> Result {
82 | if s.ends_with(Self::ZWS) {
83 | match s[..s.len() - Self::ZWS_LEN].parse::() {
84 | Ok(val) => Ok(Self::Default(val)),
85 | Err(err) => Err(err),
86 | }
87 | } else {
88 | match s.parse::() {
89 | Ok(val) => Ok(Self::Parsed(val)),
90 | Err(err) => Err(err),
91 | }
92 | }
93 | }
94 | }
95 |
96 | impl fmt::Display for ValueWOrigin
97 | where
98 | T: fmt::Display,
99 | {
100 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
101 | match self {
102 | ValueWOrigin::Parsed(val) | ValueWOrigin::Default(val) => {
103 | write!(f, "{}", val)
104 | }
105 | }
106 | }
107 | }
108 |
109 | #[derive(Copy, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
110 | pub enum OpMode {
111 | Both,
112 | Check,
113 | Format,
114 | }
115 |
116 | #[derive(Serialize, Deserialize, Copy, Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
117 | #[serde(rename_all = "kebab-case")]
118 | pub enum Case {
119 | Ignore,
120 | Keep,
121 | }
122 |
123 | impl FromStr for Case {
124 | type Err = String;
125 |
126 | fn from_str(s: &str) -> Result {
127 | match s {
128 | "keep" => Ok(Self::Keep),
129 | "ignore" => Ok(Self::Ignore),
130 | _ => Err(String::from("possible values: ignore, keep")),
131 | }
132 | }
133 | }
134 |
135 | impl fmt::Display for Case {
136 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
137 | match self {
138 | Self::Ignore => {
139 | write!(f, "ignore")
140 | }
141 | Self::Keep => {
142 | write!(f, "keep")
143 | }
144 | }
145 | }
146 | }
147 |
148 | #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
149 | pub enum ReportMode {
150 | None,
151 | Changed,
152 | State,
153 | DiffMyers,
154 | DiffPatience,
155 | DiffLcs,
156 | }
157 |
158 | impl ReportMode {
159 | pub fn is_diff_mode(&self) -> bool {
160 | self == &ReportMode::DiffMyers
161 | || self == &ReportMode::DiffPatience
162 | || self == &ReportMode::DiffLcs
163 | }
164 | }
165 |
166 | #[derive(Parser, Debug)]
167 | #[command(author, version, about, long_about = None)]
168 | pub struct CliArgs {
169 | /// Paths to files or directories that shall be processed.
170 | pub paths: Vec,
171 | /// The maximum line width that is acceptable. A value of 0 disables wrapping of{n} long
172 | /// lines.
173 | #[arg(
174 | short = 'w',
175 | long,
176 | env = "MDSLW_MAX_WIDTH",
177 | default_value = "80\u{200b}"
178 | )]
179 | pub max_width: ValueWOrigin,
180 | /// A set of characters that are acceptable end of sentence markers.
181 | #[arg(short, long, env = "MDSLW_END_MARKERS", default_value = "?!:.\u{200b}")]
182 | pub end_markers: ValueWOrigin,
183 | /// Mode of operation: "check" means exit with error if format has to be adjusted but do not
184 | /// format,{n} "format" means format the file and exit with error in case of problems only,
185 | /// "both" means do both{n} (useful as pre-commit hook).
186 | #[arg(value_enum, short, long, env = "MDSLW_MODE", default_value_t = OpMode::Format)]
187 | pub mode: OpMode,
188 | /// A space-separated list of languages whose suppression words as specified by unicode should
189 | /// be {n} taken into account. See here for all languages:
190 | /// {n} https://github.com/unicode-org/cldr-json/tree/main/cldr-json/cldr-segments-full/segments
191 | /// {n} Use "none" to disable.
192 | /// Supported languages are: de en es fr it. Use "ac" for "author's choice",{n} a list
193 | /// for the Enlish language defined by this tool's author.
194 | #[arg(short, long, env = "MDSLW_LANG", default_value = "ac\u{200b}")]
195 | pub lang: ValueWOrigin,
196 | /// Space-separated list of words that end in one of END_MARKERS but that should not be
197 | /// followed by a line{n} break. This is in addition to what is specified via --lang.
198 | #[arg(short, long, env = "MDSLW_SUPPRESSIONS", default_value = "\u{200b}")]
199 | pub suppressions: ValueWOrigin,
200 | /// Space-separated list of words that end in one of END_MARKERS and that should be
201 | /// removed{n} from the list of suppressions.
202 | #[arg(short, long, env = "MDSLW_IGNORES", default_value = "\u{200b}")]
203 | pub ignores: ValueWOrigin,
204 | /// Specify an upstream auto-formatter that reads from stdin and writes to stdout.
205 | /// {n} It will be called before mdslw will run. Useful if you want to chain multiple
206 | /// tools.{n} Specify the command that will be executed. For example, specify "prettier"
207 | /// to{n} call prettier first.
208 | /// The upstream auto-formatter runs in each file's directory if PATHS are{n} specified.
209 | #[arg(long, env = "MDSLW_UPSTREAM_COMMAND", default_value = "\u{200b}")]
210 | pub upstream_command: ValueWOrigin,
211 | /// Specify the arguments for the upstream auto-formatter. If --upstream-command is not set,
212 | /// {n} the first word will be used as command. For example, with
213 | /// --upstream-command="prettier",{n} set --upstream="--parser=markdown" to enable markdown
214 | /// parsing.
215 | #[arg(short, long, env = "MDSLW_UPSTREAM", default_value = "\u{200b}")]
216 | pub upstream: ValueWOrigin,
217 | /// Specify a string that will be used to separate the value passed to --upstream into words.
218 | /// {n} If empty, splitting is based on whitespace.
219 | #[arg(long, env = "MDSLW_UPSTREAM_SEPARATOR", default_value = "\u{200b}")]
220 | pub upstream_separator: ValueWOrigin,
221 | /// How to handle the case of provided suppression words, both via --lang
222 | /// and{n} --suppressions. Possible values: ignore, keep
223 | #[arg(short, long, env = "MDSLW_CASE", default_value = "ignore\u{200b}")]
224 | pub case: ValueWOrigin,
225 | /// The file extension used to find markdown files when an entry in{n} PATHS is a directory.
226 | #[arg(long, env = "MDSLW_EXTENSION", default_value_t = String::from(".md"))]
227 | pub extension: String,
228 | // The "." below is used to cause clap to format the help message nicely.
229 | /// Comma-separated list of optional features to enable or disable. Currently, the following
230 | /// are supported:
231 | /// {n} * keep-spaces-in-links => do not replace spaces in link texts by non-breaking spaces
232 | /// {n} * keep-linebreaks => do not remove existing linebreaks during the line-wrapping
233 | /// process
234 | /// {n} * format-block-quotes => format text in block quotes
235 | /// {n} * collate-link-defs => gather all link definitions, i.e. `[link name]: url`, in a
236 | /// block at the end{n} of the document in alphabetical order, sorted
237 | /// case-insensitively; links can be categorised with{n} comments as
238 | /// ``, which will cause sorting per category
239 | /// {n} * outsource-inline-links => replace all inline links by named links using a link
240 | /// definition,{n} i.e. `[link](url)` becomes `[link][def]` and `[def]: url`
241 | /// {n} .
242 | #[arg(long, env = "MDSLW_FEATURES", default_value = "\u{200b}")]
243 | pub features: ValueWOrigin,
244 | /// Output shell completion file for the given shell to stdout and exit.{n} .
245 | #[arg(value_enum, long, env = "MDSLW_COMPLETION")]
246 | pub completion: Option,
247 | /// Specify the number of threads to use for processing files from disk in parallel. Defaults
248 | /// to the number of{n} logical processors.
249 | #[arg(short, long, env = "MDSLW_JOBS")]
250 | pub jobs: Option,
251 | /// What to report to stdout, ignored when reading from stdin:
252 | /// {n} * "none" => report nothing but be silent instead
253 | /// {n} * "changed" => output the names of files that were changed
254 | /// {n} * "state" => output : where is "U" for "unchanged" or
255 | /// "C" for "changed"
256 | /// {n} * "diff-myers" => output a unified diff based on the myers algorithm
257 | /// {n} * "diff-patience" => output a unified diff based on the patience algorithm
258 | /// {n} * "diff-lcs" => output a unified diff based on the lcs algorithm
259 | /// {n} .
260 | #[arg(value_enum, short, long, env = "MDSLW_REPORT", default_value_t = ReportMode::None)]
261 | pub report: ReportMode,
262 | /// Specify a downstream pager for diffs (with args) that reads diffs from stdin.
263 | /// {n} Useful if you want to display a diff nicely. For example, specify
264 | /// {n} "delta --side-by-side" to get a side-by-side view.
265 | #[arg(value_enum, short, long, env = "MDSLW_DIFF_PAGER")]
266 | pub diff_pager: Option,
267 | /// The path to the file that is read from stdin. This is used to determine relevant config
268 | /// files{n} when reading from stdin and to run an upstream formatter.
269 | #[arg(long, env = "MDSLW_STDIN_FILEPATH")]
270 | pub stdin_filepath: Option,
271 | /// Output the default config file in TOML format to stdout and exit.
272 | #[arg(long, env = "MDSLW_DEFAULT_CONFIG")]
273 | pub default_config: bool,
274 | /// Specify to increase verbosity of log output. Specify multiple times to increase even
275 | /// further.
276 | #[arg(short, long, action = clap::ArgAction::Count)]
277 | pub verbose: u8,
278 | }
279 |
280 | #[derive(Debug, PartialEq)]
281 | pub struct PerFileCfg {
282 | pub max_width: usize,
283 | pub end_markers: String,
284 | pub lang: String,
285 | pub suppressions: String,
286 | pub ignores: String,
287 | pub upstream_command: String,
288 | pub upstream: String,
289 | pub upstream_separator: String,
290 | pub case: Case,
291 | pub features: String,
292 | }
293 |
294 | #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
295 | #[serde(rename_all = "kebab-case", deny_unknown_fields)]
296 | pub struct CfgFile {
297 | pub max_width: Option,
298 | pub end_markers: Option,
299 | pub lang: Option,
300 | pub suppressions: Option,
301 | pub ignores: Option,
302 | pub upstream_command: Option,
303 | pub upstream: Option,
304 | pub upstream_separator: Option,
305 | pub case: Option,
306 | pub features: Option,
307 | }
308 |
309 | impl CfgFile {
310 | /// Merge one config file into this one. Some-values in self take precedence. The return value
311 | /// indicates whether all fields of the struct are fully defined, which means that further
312 | /// merging won't have any effect.
313 | pub fn merge_with(&mut self, other: &Self) -> bool {
314 | let mut fully_defined = true;
315 |
316 | // Reduce code duplication with a macro.
317 | macro_rules! merge_field {
318 | ($field:ident) => {
319 | if self.$field.is_none() {
320 | self.$field = other.$field.clone();
321 | }
322 | fully_defined = fully_defined && self.$field.is_some();
323 | };
324 | }
325 |
326 | merge_field!(max_width);
327 | merge_field!(end_markers);
328 | merge_field!(lang);
329 | merge_field!(suppressions);
330 | merge_field!(ignores);
331 | merge_field!(upstream_command);
332 | merge_field!(upstream);
333 | merge_field!(upstream_separator);
334 | merge_field!(case);
335 | merge_field!(features);
336 |
337 | fully_defined
338 | }
339 |
340 | fn new() -> Self {
341 | Self {
342 | max_width: None,
343 | end_markers: None,
344 | lang: None,
345 | suppressions: None,
346 | ignores: None,
347 | upstream_command: None,
348 | upstream: None,
349 | upstream_separator: None,
350 | case: None,
351 | features: None,
352 | }
353 | }
354 | }
355 |
356 | impl Default for CfgFile {
357 | fn default() -> Self {
358 | let no_args: Vec = vec![];
359 | let default_cli = CliArgs::parse_from(no_args);
360 |
361 | macro_rules! merge_fields {
362 | (@ | $($result:tt)*) => { Self{ $($result)* } };
363 | (@ $name:ident $($names:ident)* | $($result:tt)*) => {
364 | merge_fields!(
365 | @ $($names)* |
366 | $name: Some(default_cli.$name.resolve(None)),
367 | $($result)*
368 | )
369 | };
370 | ($($names:ident)*) => { merge_fields!(@ $($names)* | ) };
371 | }
372 |
373 | merge_fields!(max_width end_markers lang suppressions ignores upstream_command upstream upstream_separator case features)
374 | }
375 | }
376 |
377 | pub fn merge_configs<'a, I>(cli: &CliArgs, files: I) -> PerFileCfg
378 | where
379 | I: IntoIterator- ,
380 | {
381 | let mut merged = CfgFile::new();
382 | for (path, other) in files {
383 | log::debug!("merging config file {}", path.to_string_lossy());
384 | if merged.merge_with(other) {
385 | log::debug!("config fully defined, stopping merge");
386 | break;
387 | }
388 | }
389 | log::debug!("configuration loaded from files: {:?}", merged);
390 | log::debug!("configuration loaded from CLI: {:?}", cli);
391 |
392 | macro_rules! merge_fields {
393 | (@ | $($result:tt)*) => { PerFileCfg{ $($result)* } };
394 | (@ $name:ident $($names:ident)* | $($result:tt)*) => {
395 | merge_fields!(
396 | @ $($names)* |
397 | $name: cli.$name.resolve(merged.$name),
398 | $($result)*
399 | )
400 | };
401 | ($($names:ident)*) => { merge_fields!(@ $($names)* | ) };
402 | }
403 |
404 | let result = merge_fields!(max_width end_markers lang suppressions ignores upstream_command upstream upstream_separator case features);
405 | log::debug!("merged configuration: {:?}", result);
406 | result
407 | }
408 |
409 | #[cfg(test)]
410 | mod test {
411 | use super::*;
412 |
413 | // Actual tests follow.
414 | #[test]
415 | fn merging_two_partially_defined_config_files() {
416 | let mut main_cfg = CfgFile {
417 | max_width: Some(10),
418 | end_markers: None,
419 | lang: None,
420 | suppressions: None,
421 | ignores: Some("some words".into()),
422 | upstream_command: None,
423 | upstream: None,
424 | upstream_separator: None,
425 | case: None,
426 | features: None,
427 | };
428 | let other_cfg = CfgFile {
429 | max_width: None,
430 | end_markers: None,
431 | lang: Some("ac".into()),
432 | suppressions: None,
433 | ignores: None,
434 | upstream_command: Some("some".into()),
435 | upstream: None,
436 | upstream_separator: None,
437 | case: None,
438 | features: Some("feature".into()),
439 | };
440 |
441 | let fully_defined = main_cfg.merge_with(&other_cfg);
442 | assert!(!fully_defined);
443 |
444 | let expected_cfg = CfgFile {
445 | max_width: Some(10),
446 | end_markers: None,
447 | lang: Some("ac".into()),
448 | suppressions: None,
449 | ignores: Some("some words".into()),
450 | upstream_command: Some("some".into()),
451 | upstream: None,
452 | upstream_separator: None,
453 | case: None,
454 | features: Some("feature".into()),
455 | };
456 |
457 | assert_eq!(expected_cfg, main_cfg);
458 | }
459 |
460 | #[test]
461 | fn options_in_main_config_are_kept() {
462 | let mut main_cfg = CfgFile {
463 | max_width: Some(10),
464 | end_markers: None,
465 | lang: None,
466 | suppressions: None,
467 | ignores: Some("some words".into()),
468 | upstream_command: None,
469 | upstream: None,
470 | upstream_separator: None,
471 | case: None,
472 | features: None,
473 | };
474 | let other_cfg = CfgFile {
475 | max_width: Some(20),
476 | end_markers: None,
477 | lang: None,
478 | suppressions: None,
479 | ignores: Some("some other words".into()),
480 | upstream_command: None,
481 | upstream: None,
482 | upstream_separator: None,
483 | case: None,
484 | features: None,
485 | };
486 | assert_ne!(main_cfg, other_cfg);
487 |
488 | let fully_defined = main_cfg.merge_with(&other_cfg);
489 | assert!(!fully_defined);
490 |
491 | let expected_cfg = CfgFile {
492 | max_width: Some(10),
493 | end_markers: None,
494 | lang: None,
495 | suppressions: None,
496 | ignores: Some("some words".into()),
497 | upstream_command: None,
498 | upstream: None,
499 | upstream_separator: None,
500 | case: None,
501 | features: None,
502 | };
503 |
504 | assert_eq!(expected_cfg, main_cfg);
505 | }
506 |
507 | #[test]
508 | fn fully_defined_config_is_immutable() {
509 | let mut main_cfg = CfgFile {
510 | max_width: None,
511 | end_markers: None,
512 | lang: None,
513 | suppressions: None,
514 | ignores: None,
515 | upstream_command: None,
516 | upstream: None,
517 | upstream_separator: None,
518 | case: None,
519 | features: None,
520 | };
521 | let missing_options = CfgFile {
522 | max_width: Some(20),
523 | end_markers: Some("marker".into()),
524 | lang: Some("lang".into()),
525 | suppressions: Some("suppressions".into()),
526 | ignores: Some("some other words".into()),
527 | upstream_command: Some("upstream-command".into()),
528 | upstream: Some("upstream".into()),
529 | upstream_separator: Some("sep".into()),
530 | case: Some(Case::Ignore),
531 | features: Some("feature".into()),
532 | };
533 | let other_options = CfgFile {
534 | max_width: Some(10),
535 | end_markers: Some("nothing".into()),
536 | lang: Some("asdf".into()),
537 | suppressions: Some("just text".into()),
538 | ignores: Some("ignore this".into()),
539 | upstream_command: Some("does not matter".into()),
540 | upstream: Some("swimming is nice".into()),
541 | upstream_separator: Some("let's not split up".into()),
542 | case: Some(Case::Keep),
543 | features: Some("everything".into()),
544 | };
545 |
546 | let fully_defined = main_cfg.merge_with(&missing_options);
547 | assert!(fully_defined);
548 | let fully_defined = main_cfg.merge_with(&other_options);
549 | assert!(fully_defined);
550 |
551 | let expected_cfg = CfgFile {
552 | max_width: Some(20),
553 | end_markers: Some("marker".into()),
554 | lang: Some("lang".into()),
555 | suppressions: Some("suppressions".into()),
556 | ignores: Some("some other words".into()),
557 | upstream_command: Some("upstream-command".into()),
558 | upstream: Some("upstream".into()),
559 | upstream_separator: Some("sep".into()),
560 | case: Some(Case::Ignore),
561 | features: Some("feature".into()),
562 | };
563 |
564 | assert_eq!(expected_cfg, main_cfg);
565 | }
566 |
567 | #[test]
568 | fn merging_cli_with_two_config_files() {
569 | let main_cfg = CfgFile {
570 | max_width: Some(10),
571 | end_markers: None,
572 | lang: None,
573 | suppressions: None,
574 | ignores: Some("some words".into()),
575 | upstream_command: None,
576 | upstream: None,
577 | upstream_separator: None,
578 | case: None,
579 | features: None,
580 | };
581 | let other_cfg = CfgFile {
582 | max_width: None,
583 | end_markers: None,
584 | lang: Some("ac".into()),
585 | suppressions: None,
586 | ignores: None,
587 | upstream_command: None,
588 | upstream: None,
589 | upstream_separator: None,
590 | case: None,
591 | features: Some("feature".into()),
592 | };
593 | let default_cfg = CfgFile::default();
594 |
595 | let files = vec![
596 | (PathBuf::from("main"), main_cfg),
597 | (PathBuf::from("other"), other_cfg),
598 | (PathBuf::from("default"), default_cfg),
599 | ];
600 | let no_args: Vec = vec![];
601 | let cli = CliArgs::parse_from(no_args);
602 | let merged = merge_configs(&cli, &files);
603 |
604 | let expected_cfg = PerFileCfg {
605 | max_width: 10,
606 | end_markers: "?!:.".into(),
607 | lang: "ac".into(),
608 | suppressions: "".into(),
609 | ignores: "some words".into(),
610 | upstream_command: "".into(),
611 | upstream: "".into(),
612 | upstream_separator: "".into(),
613 | case: Case::Ignore,
614 | features: "feature".into(),
615 | };
616 |
617 | assert_eq!(expected_cfg, merged);
618 | }
619 | }
620 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Prepare your markdown for easy diff'ing!
2 |
3 |
4 |
5 | - [About](#about)
6 | - [Motivation](#motivation)
7 | - [Pronunciation](#pronunciation)
8 | - [Working Principle](#working-principle)
9 | - [Caveats](#caveats)
10 | - [About Markdown Extensions](#about-markdown-extensions)
11 | - [Command Reference](#command-reference)
12 | - [Command Line Arguments](#command-line-arguments)
13 | - [Automatic File Discovery](#automatic-file-discovery)
14 | - [Environment Variables](#environment-variables)
15 | - [Config Files](#config-files)
16 | - [Per-File Configuration](#per-file-configuration)
17 | - [Installation](#installation)
18 | - [Building From Source](#building-from-source)
19 | - [Editor Integration](#editor-integration)
20 | - [neovim](#neovim)
21 | - [vim](#vim)
22 | - [VS Code](#vs-code)
23 | - [Tips And Tricks](#tips-and-tricks)
24 | - [Non-Breaking Spaces](#non-breaking-spaces)
25 | - [Disabling Auto-Formatting](#disabling-auto-formatting)
26 | - [How To Contribute](#how-to-contribute)
27 | - [Licence](#licence)
28 |
29 |
30 |
31 | # About
32 |
33 | This is `mdslw`, the MarkDown Sentence Line Wrapper, an auto-formatter that
34 | prepares your markdown for easy diff'ing.
35 |
36 | # Motivation
37 |
38 | Markdown documents are written for different purposes.
39 | Some of them are meant to be read in plain text, while others are first rendered
40 | and then presented to the reader.
41 | In the latter case, the documents are often kept in version control and edited
42 | with the same workflows as other code.
43 |
44 | When editing source code, software developers do not want changes in one
45 | location to show up as changes in unrelated locations.
46 | Now imagine a markdown document like this:
47 |
48 | ```markdown
49 | # Lorem Ipsum
50 |
51 | Lorem ipsum dolor sit amet. Consectetur adipiscing elit. Sed do eiusmod tempor
52 | incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam.
53 | ```
54 |
55 | Adding the new sentence `Excepteur sint occaecat cupidatat non proident.` after
56 | the second one and re-arranging the text as a block would result in a diff view
57 | like this that shows changes in several lines:
58 |
59 | ```diff
60 | 3,4c3,5
61 | < Lorem ipsum dolor sit amet. Consectetur adipiscing elit. Sed do eiusmod tempor
62 | < incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam.
63 | ---
64 | > Lorem ipsum dolor sit amet. Consectetur adipiscing elit. Excepteur sint occaecat
65 | > cupidatat non proident. Sed do eiusmod tempor incididunt ut labore et dolore
66 | > magna aliqua. Ut enim ad minim veniam.
67 | ```
68 |
69 | Now imagine the original text had a line break after every sentence, i.e. it had
70 | looked like this:
71 |
72 | ```markdown
73 | # Lorem Ipsum
74 |
75 | Lorem ipsum dolor sit amet.
76 | Consectetur adipiscing elit.
77 | Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
78 | Ut enim ad minim veniam.
79 | ```
80 |
81 | For text formatted like this, a diff would only show up for the sentences that
82 | are actually affected, simplifying the review process:
83 |
84 | ```diff
85 | 4a5
86 | > Excepteur sint occaecat cupidatat non proident.
87 | ```
88 |
89 | Most rendering engines treat a single linebreak like a single space.
90 | Thus, both documents would be identical when presented to the reader even though
91 | the latter is significantly nicer to keep up to date with version control.
92 | The tool `mdslw` aims to auto-format markdown documents in exactly this way.
93 |
94 | # Pronunciation
95 |
96 | If you are wondering how to pronounce `mdslw`, you can either say each letter
97 | individually or pronounce it like mud-slaw (`mʌd-slɔ`).
98 |
99 | # Working Principle
100 |
101 | The tool `mdslw` operates according to a very simple process that can be
102 | described as follows:
103 |
104 | - Parse the document and determine areas in the document that contain text.
105 | Only process those.
106 | - There exists a limited number of characters (`.!?:` by default) that serve as
107 | end-of-sentence markers if they occur alone.
108 | If such a character is followed by whitespace, it denotes the end of a
109 | sentence, _unless_ the last word before the character is part of a known set
110 | of words, matched case-insensitively by default.
111 | Those words can be taken from an included list for a specific language and
112 | also specified directly.
113 | - Insert a line break after every character that ends a sentence, but keep
114 | indents in lists and enumerations in tact.
115 | - Collapse all consecutive whitespace into a single space.
116 | While doing so, preserve both [non-breaking spaces] and linebreaks that are
117 | preceded by [non-breaking spaces].
118 | - Before line wrapping, replace all spaces in link texts by
119 | [non-breaking spaces].
120 | - Wrap lines that are longer than the maximum line width (80 characters by
121 | default) without splitting words or splitting at [non-breaking spaces] while
122 | also keeping indents in tact.
123 |
124 | In contrast to most other tools the author could find, `mdslw` does not parse
125 | the entire document into an internal data structure just to render it back
126 | because that might result in changes in unexpected locations.
127 | Instead, it adjusts only those areas that do contain text that can be wrapped.
128 | That is, `mdslw` never touches any parts of a document that cannot be
129 | line-wrapped automatically.
130 | That includes, for example, code blocks, HTML blocks, and pipe tables.
131 |
132 | ## Caveats
133 |
134 | - The default settings of `mdslw` are strongly geared towards the English
135 | language, even though it works for other languages, too.
136 | - Like with any other auto-formatter, you give up some freedom for the benefit
137 | of automatic handling of certain issues.
138 | - Inline code sections are wrapped like any other text, which may cause issues
139 | with certain renderers.
140 | - While `mdslw` has been tested with documents containing unicode characters
141 | such as emojis, the outcome can still be unexpected.
142 | For example, any emoji is treated as a single character when determining line
143 | width even though some editors might draw certain emojis wider.
144 | Any feedback is welcome!
145 | - Since `mdslw` collapses all consecutive whitespace into a single space during
146 | the line-wrapping process, it does not work well with documents using tabs in
147 | text.
148 | A tab, including all whitespace before and after it, will also be replaced by
149 | a single space.
150 | Use the `keep-linebreaks` feature and prefix linebreaks by
151 | [non-breaking spaces] to influence this behaviour.
152 | - There are flavours of markdown that define additional markup syntax that
153 | `mdslw` cannot recognise but instead detects as text.
154 | Consequently, `mdslw` might cause formatting changes that causes such special
155 | syntax to be lost.
156 | You can use [non-breaking spaces] to work around that.
157 | - Some line breaks added by `mdslw` might not be considered nice looking.
158 | Use [non-breaking spaces] instead of normal ones to prevent a line break at a
159 | position.
160 |
161 | ## About Markdown Extensions
162 |
163 | There are quite a lot of markdown extensions out there.
164 | It is not possible for `mdslw` to support all of them.
165 | Instead, `mdslw` aims at supporting CommonMark as well as _some_ extensions used
166 | by its users.
167 | A new extension can be supported if supporting it does not negatively impact
168 | CommonMark support and if support can be added relatively easily.
169 | Please feel free to suggest support for a new extension as a
170 | [contribution](#how-to-contribute).
171 |
172 | # Command Reference
173 |
174 | Call as:
175 |
176 | ```bash
177 | mdslw [OPTIONS] [PATHS]...
178 | ```
179 |
180 | A `PATH` can point to a file or a directory.
181 | If it is a file, then it will be auto-formatted irrespective of its extension.
182 | If it is a directory, then `mdslw` will discover all files ending in `.md`
183 | recursively and auto-format those.
184 | If you do not specify any path, then `mdslw` will read from stdin and write to
185 | stdout.
186 |
187 | The following is a list of all supported
188 | [command line arguments](#command-line-arguments).
189 | Note that you can also configure `mdslw` via
190 | [environment variables](#environment-variables) or
191 | [config files](#config-files).
192 | Values are resolved in the following order:
193 |
194 | - Defaults
195 | - Config files
196 | - Environment variables
197 | - Command line arguments
198 |
199 | ## Command Line Arguments
200 |
201 | - `--help`:
202 | Print the help message.
203 | - `--version`:
204 | Print the tool's version number.
205 | - `--max-width `:
206 | The maximum line width that is acceptable.
207 | A value of 0 disables wrapping of long lines altogether.
208 | The default value is 80.
209 | - `--end-markers `:
210 | The set of characters that are end of sentence markers, defaults to `?!:.`.
211 | - `--mode `:
212 | A value of `check` means to exit with an error if the format had to be
213 | adjusted but not to perform any formatting.
214 | A value of `format`, the default, means to format the file and exit with
215 | success.
216 | A value of `both` means to do both (useful when used as a `pre-commit` hook).
217 | - `--lang `:
218 | A space-separated list of languages whose suppression words as specified by
219 | unicode should be taken into account.
220 | See [here][unicode] for all languages.
221 | Currently supported are `en`, `de`, `es`, `fr`, and `it`.
222 | Use `none` to disable.
223 | Use `ac` (the default) for "author's choice", a list for the English language
224 | defined and curated by this tool's author.
225 | - `--suppressions `:
226 | A space-separated list of words that end in one of `END_MARKERS` but that
227 | should not be followed by a line break.
228 | This is in addition to what is specified via `--lang`.
229 | Defaults to the empty string.
230 | - `--ignores `:
231 | Space-separated list of words that end in one of `END_MARKERS` and that should
232 | be removed from the list of suppressions.
233 | Defaults to the empty string.
234 | - `--upstream-command `:
235 | Specify an upstream auto-formatter that reads from stdin and writes to stdout.
236 | It will be called before `mdslw` will run.
237 | This is useful if you want to chain multiple tools.
238 | Specify the command that will be executed.
239 | For example, specify `prettier` to call `prettier` first.
240 | The upstream auto-formatter runs in each file's directory if `PATHS` are
241 | specified
242 | - `--upstream `:
243 | Specify the arguments for the upstream auto-formatter.
244 | If `--upstream-cmd` is not set, the first word will be used as command.
245 | For example, with `--upstream-cmd="prettier"`, use
246 | `--upstream="--parser=markdown"` to enable markdown parsing.
247 | - `--upstream-separator `:
248 | Specify a string that will be used to separate the value passed to
249 | `--upstream` into words.
250 | If empty, splitting is based on whitespace.
251 | - `--upstream `:
252 | Specify an upstream auto-formatter (with args) that reads from stdin and
253 | writes to stdout.
254 | It will be called before `mdslw` will run and `mdslw` will use its output.
255 | This is useful if you want to chain multiple tools.
256 | For example, specify `prettier --parser=markdown` to call `prettier` first.
257 | The upstream auto-formatter is run in each file's directory if `PATHS` are
258 | specified.
259 | - `--case `:
260 | How to handle the case of provided suppression words, both via `--lang` and
261 | `--suppressions`.
262 | A value of `ignore`, the default, means to match case-insensitively while a
263 | value of `keep` means to match case-sensitively.
264 | - `--extension `:
265 | The file extension used to find markdown files when a `PATH` is a directory,
266 | defaults to `.md`.
267 | - `--features `:
268 | Comma-separated list of optional features to enable or disable.
269 | Currently, the following are supported (the opposite setting is the default in
270 | each case):
271 | - `keep-spaces-in-links`:
272 | Do not replace spaces in link texts by [non-breaking spaces].
273 | - `keep-linebreaks`:
274 | Do not remove existing linebreaks during the line-wrapping process.
275 | - `format-block-quotes`:
276 | Format text in block quotes.
277 | - `collate-link-defs`:
278 | Gather all link definitions, i.e. `[link name]: url`, in a block at the end
279 | of the document in alphabetical order, sorted case-insensitively.
280 | Links can be defined as belonging to a category called `CATEGORY_NAME` with
281 | the comment ``.
282 | Each link definition following such a comment will be considered as part of
283 | the specified category.
284 | Link definitions will be sorted per category and categories will also be
285 | sorted by name.
286 | - `outsource-inline-links`:
287 | Replace all inline links by named links using a link definition, i.e.
288 | `[link](url)` becomes `[link][def]` and `[def]: url`.
289 | All new link definitions will be added at the end of the document.
290 | Existing link definitions will be reused.
291 | Link definitions in block quotes will be put at the end of the block quote
292 | if `format-block-quotes` is set.
293 | - `--completion `:
294 | Output shell completion file for the given shell to stdout and exit.
295 | The following shells are supported:
296 | bash, elvish, fish, powershell, zsh.
297 | - `--jobs `:
298 | Specify the number of threads to use for processing files from disk in
299 | parallel.
300 | Defaults to the number of logical processors.
301 | - `--report `:
302 | What to report to stdout, ignored when reading from stdin:
303 | - `none`, the default:
304 | Report nothing but be silent instead, which is useful in scripts.
305 | - `changed`:
306 | Output the names of files that were changed, which is useful for downstream
307 | processing with tools such as `xargs`.
308 | - `state`:
309 | Output `:` where `` is `U` for "unchanged" or `C`
310 | for "changed", which is useful for downstream filtering with tools such as
311 | `grep`.
312 | - `diff-myers`:
313 | Output a unified diff based on the [myers algorithm].
314 | Pipe the output to tools such as [bat], [delta], or [diff-so-fancy] to get
315 | syntax highlighting.
316 | You can use the `--diff-pager` setting to define such a pager.
317 | - `diff-patience`:
318 | Output a unified diff based on the [patience algorithm].
319 | See `diff-myers` for useful downstream tools.
320 | - `diff-lcs`:
321 | Output a unified diff based on the [lcs algorithm].
322 | See `diff-myers` for useful downstream tools.
323 | - `--diff-pager `:
324 | Specify a downstream pager for diffs (with args) that reads diffs from stdin.
325 | This is useful if you want to display a diff nicely.
326 | For example, specify `delta --side-by-side` to get a side-by-side view.
327 | This flag is ignored unless a diff-type report has been requested.
328 | - `--stdin-filepath `:
329 | The path to the file that is read from stdin.
330 | This is used to determine relevant config files when reading from stdin and to
331 | run an upstream formatter.
332 | Defaults to the current working directory.
333 | - `--default-config`:
334 | Output the default config file in TOML format to stdout and exit.
335 | - `--verbose`:
336 | Specify to increase verbosity of log output.
337 | Specify multiple times to increase even further.
338 |
339 | ## Automatic File Discovery
340 |
341 | This tool uses the [ignore crate] in its default settings to discover files when
342 | given a directory as a `PATH`.
343 | Details about those defaults can be found [here][ignore defaults].
344 | Briefly summarised, the following rules apply when deciding whether a file shall
345 | be ignored:
346 |
347 | - Hidden files (starting with `.`) are ignored.
348 | - Files matching patterns specified in a file called `.ignore` are ignored.
349 | The patterns affect all files in the same directory or child directories.
350 | - If run inside a git repository, files matching patterns specified in a file
351 | called `.gitignore` are ignored.
352 | The patterns affect all files in the same directory or child directories.
353 |
354 | If you wish to format a file that is being ignored by `mdslw`, then pass it as
355 | an argument directly.
356 | Files passed as arguments are never ignored and will always be processed.
357 |
358 | ## Environment Variables
359 |
360 | Instead of or in addition to configuring `mdslw` via
361 | [command line arguments](#command-line-arguments) or
362 | [config files](#config-files), you can configure it via environment variables.
363 | For any command line option `--some-option=value`, you can instead set an
364 | environment variable `MDSLW_SOME_OPTION=value`.
365 | For example, instead of setting `--end-markers=".?!"`, you could set
366 | `MDSLW_END_MARKERS=".?!"` instead.
367 | When set, the value specified via the environment variable will take precedence
368 | over the default value and a value taken from config files.
369 | When set, a command line argument will take precedence over the environment
370 | variable.
371 | Take a call like this for example:
372 |
373 | ```bash
374 | export MDSLW_EXTENSION=".markdown"
375 | export MDSLW_MODE=both
376 | mdslw --mode=check .
377 | ```
378 |
379 | This call will search for files with the extension `.markdown` instead of the
380 | default `.md`.
381 | Furthermore, files will only be checked due to `--mode=check`, even though the
382 | environment variable `MDSLW_MODE=both` has been set.
383 | Defaults will be used for everything else.
384 |
385 | ## Config Files
386 |
387 | Instead of or in addition to configuring `mdslw` via
388 | [command line arguments](#command-line-arguments) or
389 | [environment variables](#environment-variables), you can configure it via config
390 | files.
391 | Such a file has to have the exact name `.mdslw.toml` and affects all files in or
392 | below its own directory.
393 | Multiple config files will be merged.
394 | Options given in config files closer to a markdown file take precedence.
395 |
396 | Configuration files are limited to options that influence the formatted result.
397 | They cannot influence how `mdslw` operates.
398 | For example, the option `--mode` cannot be set via config files while
399 | `--max-width` can.
400 | The following example shows all the possible options that can be set via config
401 | files.
402 | Note that all entries are optional in config files, which means that any number
403 | of them may be left out.
404 | The following is a full config file containing all the default values.
405 |
406 |
407 |
408 | ```toml
409 | max-width = 80
410 | end-markers = "?!:."
411 | lang = "ac"
412 | suppressions = ""
413 | ignores = ""
414 | upstream-command = ""
415 | upstream = ""
416 | upstream-separator = ""
417 | case = "ignore"
418 | features = ""
419 | ```
420 |
421 |
422 |
423 | When set, the value specified via the config file will take precedence over the
424 | default value.
425 | When set, an environment variable or a command line argument will take
426 | precedence over a value taken from config files.
427 |
428 | ### Per-File Configuration
429 |
430 | You can embed a configuration for `mdslw` inside a markdown file.
431 | That configuration affects only the file it is embedded in.
432 | It will be merged with other config files affecting the markdown file in
433 | question just like other config files.
434 |
435 | An embedded configuration needs to reside inside the YAML front matter as part
436 | of a _block scalar string_ associated with the YAML key `mdslw-toml` (see below
437 | for an example).
438 | To get an overview of all the different possibilities for defining multi-line
439 | strings in YAML documents, please see [here][yaml-block-scalars].
440 | The embedded configuration string needs to follow the same format as all other
441 | config files for `mdslw` (see above).
442 |
443 | For example, you can embed the default config file into a markdown document as
444 | in the following example.
445 | It is strongly recommended to use the `|` block style indicator without a block
446 | chomping indicator as done in the following example.
447 |
448 | ```markdown
449 | ---
450 | # This is the YAML front matter.
451 | mdslw-toml: |
452 | max-width = 80
453 | end-markers = "?!:."
454 | lang = "ac"
455 | suppressions = ""
456 | ignores = ""
457 | upstream-command = ""
458 | upstream = ""
459 | upstream-separator = ""
460 | case = "ignore"
461 | features = ""
462 | ---
463 | The actual markdown document follows.
464 | ```
465 |
466 | Note that `mdslw` does not feature a full YAML parser because, as of October
467 | 2025, there is no suitable library available.
468 | Instead, `mdslw` comes with its own limited YAML parser.
469 | That parser supports only block scalar strings without an indentation indicator.
470 |
471 | # Installation
472 |
473 | Go to the project's [latest release], select the correct binary for your system,
474 | and download it.
475 | See below for how to select the correct one.
476 | Rename the downloaded binary to `mdslw` (or `mdslw.exe` on Windows) and move it
477 | to a location that is in your `$PATH` such as `/usr/local/bin` (will be
478 | different on Windows).
479 | Moving it there will likely require admin or `root` permissions, e.g. via
480 | `sudo`.
481 | On Unix systems, you also have to make the binary executable via the command
482 | `chmod +x mdslw`, pointing to the actual location of `mdslw`.
483 | From now on, you can simply type `mdslw` in your terminal to use it!
484 |
485 | The naming of the release binaries uses the [llvm target triple].
486 | You can also use the following list to pick the correct binary for your machine:
487 |
488 | - `mdslw_x86_64-unknown-linux-musl`:
489 | Linux desktop or laptop using 64-bit x86-compatible CPUs
490 | - `mdslw_armv7-unknown-linux-gnueabihf`:
491 | RaspberryPi or similar single-board computers using ARMv7-compatible CPUs
492 | - `mdslw_x86_64-pc-windows-gnu.exe`:
493 | Windows desktop or laptop using 64-bit x86-compatible CPUs
494 | - `mdslw_aarch64-apple-darwin`:
495 | Mac using M1, M2, or other Mx CPUs based on Apple silicon, i.e. the new ones
496 | after the [transition from Intel CPUs][apple-architecture-transition-arm]
497 | - `mdslw_x86_64-apple-darwin`:
498 | Mac using 64-bit x86-compatible CPUs, i.e. the old ones after the
499 | [transition from the PowerPC architecture][apple-architecture-transition-ppc]
500 |
501 | ## Building From Source
502 |
503 | First, install rust, including `cargo`, via [rustup].
504 | Once you have `cargo`, execute the following command in a terminal:
505 |
506 | ```bash
507 | cargo install --git https://github.com/razziel89/mdslw --locked
508 | ```
509 |
510 | # Editor Integration
511 |
512 | Contributions describing integrations with more editors are welcome!
513 |
514 | ## neovim
515 |
516 | The recommended way of integrating `mdslw` with neovim is through
517 | [conform.nvim].
518 | Simply install the plugin and modify your `init.vim` like this to add `mdslw` as
519 | a formatter for the markdown file type:
520 |
521 | ```lua
522 | require("conform").setup({
523 | formatters_by_ft = {
524 | markdown = { "mdslw" },
525 | },
526 | formatters = {
527 | mdslw = { prepend_args = { "--stdin-filepath", "$FILENAME" } },
528 | },
529 | })
530 | ```
531 |
532 | Alternatively, you can also use the vim-like integration shown below.
533 |
534 | ## vim
535 |
536 | Add the following to your `~/.vimrc` to have your editor auto-format every `.md`
537 | document before writing it out:
538 |
539 | ```vim
540 | function MdFormat()
541 | if executable("mdslw")
542 | set lazyredraw
543 | " Enter and exit insert mode to keep track
544 | " of the cursor position, useful when undoing.
545 | execute "normal! ii\"
546 | let cursor_pos = getpos(".")
547 | %!mdslw --stdin-filepath "%"
548 | if v:shell_error != 0
549 | u
550 | endif
551 | call setpos('.', cursor_pos)
552 | set nolazyredraw
553 | endif
554 | endfunction
555 |
556 | autocmd BufWritePre *.md silent! :call MdFormat()
557 | ```
558 |
559 | ## VS Code
560 |
561 | Assuming you have `mdslw` installed and in your `PATH`, you can integrate it
562 | with VS Code.
563 | To do so, install the extension [run on save] and add the following snippet to
564 | your `settings.json`:
565 |
566 | ```json
567 | {
568 | "emeraldwalk.runonsave": {
569 | "commands": [
570 | {
571 | "match": ".*\\.md$",
572 | "cmd": "mdslw '${file}'"
573 | }
574 | ]
575 | }
576 | }
577 | ```
578 |
579 | From now on, every time you save to an existing markdown file, `mdslw` will
580 | auto-format it.
581 | This snippet assumes an empty `settings.json` file.
582 | If yours is not empty, you will have to merge it with the existing one.
583 |
584 | # Tips And Tricks
585 |
586 | ## Non-Breaking Spaces
587 |
588 | The following codepoints are recognised as [non-breaking spaces] by default:
589 |
590 | - U+00A0
591 | - U+2007
592 | - U+202F
593 | - U+2060
594 | - U+FEFF
595 |
596 | How to insert [non-breaking spaces] depends on your operating system as well as
597 | your editor.
598 | The below will cover the non-breaking space U+00A0.
599 |
600 | **vim/neovim**
601 |
602 | Adding this to your `~/.vimrc` or `init.vim` will let you insert non-breaking
603 | spaces when pressing CTRL+s in insert mode and also show them as `+`:
604 |
605 | ```vim
606 | " Make it easy to insert non-breaking spaces and show them by default.
607 | set list listchars+=nbsp:+
608 | inoremap NS
609 | " Alternatively, you can use this if your neovim/vim does not support this
610 | " digraph. Note that your browser might not copy the non-breaking space at the
611 | " end of the following line correctly.
612 | inoremap
613 | ```
614 |
615 | ❗Tips for how to add and show non-breaking spaces in other editors are welcome.
616 |
617 | ## Disabling Auto-Formatting
618 |
619 | You can tell `mdslw` to stop auto-formatting parts of your document.
620 | Everything between the HTML comments `` and
621 | `` will not be formatted.
622 | For convenience, `mdslw` also recognises `prettier`'s range ignore directives
623 | `` and ``.
624 |
625 | In addition, [non-breaking spaces](#non-breaking-spaces) can be used to prevent
626 | modifications to your documents.
627 | Replacing a space by a non-breaking space prevents `mdslw` from adding a line
628 | break at that position.
629 | Furthermore, preceding a line break by a non-breaking space prevents `mdslw`
630 | from removing the line break.
631 |
632 | # How To Contribute
633 |
634 | If you have found a bug and want to fix it, please simply go ahead and fork the
635 | repository, fix the bug, and open a pull request to this repository!
636 | Bug fixes are always welcome.
637 |
638 | In all other cases, please open an issue on GitHub first to discuss the
639 | contribution.
640 | The feature you would like to introduce might already be in development.
641 | Please also take note of [the intended scope](#about-markdown-extensions) of
642 | `mdslw`.
643 |
644 | # Licence
645 |
646 | [GPLv3]
647 |
648 | If you want to use this piece of software under a different, more permissive
649 | open-source licence, please contact me.
650 | I am very open to discussing this point.
651 |
652 |
653 |
654 | [GPLv3]: ./LICENCE
655 | [ignore crate]: https://docs.rs/ignore/latest/ignore/
656 | [ignore defaults]: https://docs.rs/ignore/latest/ignore/struct.WalkBuilder.html#method.standard_filters
657 |
658 |
659 |
660 | [lcs algorithm]: https://docs.rs/similar/latest/similar/algorithms/lcs/index.html
661 | [myers algorithm]: https://docs.rs/similar/latest/similar/algorithms/myers/index.html
662 | [patience algorithm]: https://docs.rs/similar/latest/similar/algorithms/patience/index.html
663 |
664 |
665 |
666 | [bat]: https://github.com/sharkdp/bat
667 | [delta]: https://github.com/dandavison/delta
668 | [diff-so-fancy]: https://github.com/so-fancy/diff-so-fancy
669 |
670 |
671 |
672 | [conform.nvim]: https://github.com/stevearc/conform.nvim
673 | [run on save]: https://marketplace.visualstudio.com/items?itemName=emeraldwalk.RunOnSave
674 |
675 |
676 |
677 | [non-breaking spaces]: https://en.wikipedia.org/wiki/Non-breaking_space
678 | [unicode]: https://github.com/unicode-org/cldr-json/tree/main/cldr-json/cldr-segments-full/segments
679 | [yaml-block-scalars]: https://yaml-multiline.info/
680 |
681 |
682 |
683 | [apple-architecture-transition-arm]: https://en.wikipedia.org/wiki/Mac_transition_to_Apple_Silicon
684 | [apple-architecture-transition-ppc]: https://en.wikipedia.org/wiki/Mac_transition_to_Intel_processors
685 | [latest release]: https://github.com/razziel89/mdslw/releases/latest
686 | [llvm target triple]: https://clang.llvm.org/docs/CrossCompilation.html#target-triple
687 | [rustup]: https://rustup.rs/
688 |
--------------------------------------------------------------------------------
/src/parse.rs:
--------------------------------------------------------------------------------
1 | /* An opinionated line wrapper for markdown files.
2 | Copyright (C) 2023 Torsten Long
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 |
18 | use core::ops::Range;
19 | use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
20 | use std::collections::HashMap;
21 | use std::fmt::Write;
22 |
23 | use crate::detect::WhitespaceDetector;
24 | use crate::ignore::IgnoreByHtmlComment;
25 | use crate::indent::build_indent;
26 | use crate::trace_log;
27 |
28 | const YAML_CONFIG_KEY: &str = "mdslw-toml";
29 | const YAML_CONFIG_KEY_WITH_COLON: &str = "mdslw-toml:";
30 |
31 | /// CharRange describes a range of characters in a document.
32 | pub type CharRange = Range;
33 |
34 | #[derive(Debug, PartialEq)]
35 | pub struct ParseCfg {
36 | pub keep_linebreaks: bool,
37 | }
38 |
39 | /// Determine ranges of characters that shall later be wrapped and have their indents fixed.
40 | pub fn parse_markdown(text: &str, parse_cfg: &ParseCfg) -> Vec {
41 | // Enable some options by default to support parsing common kinds of documents.
42 | let mut opts = Options::empty();
43 | // If we do not want to modify some elements, we detect them with the parser and consider them
44 | // as verbatim in the function "to_be_wrapped".
45 | log::debug!("detecting tables");
46 | opts.insert(Options::ENABLE_TABLES);
47 | log::debug!("detecting definition lists");
48 | opts.insert(Options::ENABLE_DEFINITION_LIST);
49 | // Do not enable other options:
50 | // opts.insert(Options::ENABLE_FOOTNOTES);
51 | // opts.insert(Options::ENABLE_TASKLISTS);
52 | // opts.insert(Options::ENABLE_HEADING_ATTRIBUTES);
53 | // opts.insert(Options::ENABLE_SMART_PUNCTUATION);
54 | // opts.insert(Options::ENABLE_STRIKETHROUGH);
55 | let events_and_ranges = Parser::new_ext(text, opts)
56 | .into_offset_iter()
57 | .inspect(|(event, range)| {
58 | trace_log!("parsed [{}, {}): {:?}", range.start, range.end, event)
59 | })
60 | .collect::>();
61 | let whitespaces = whitespace_indices(text, &WhitespaceDetector::new(parse_cfg.keep_linebreaks));
62 |
63 | merge_ranges(to_be_wrapped(events_and_ranges, &whitespaces), &whitespaces)
64 | }
65 |
66 | /// Filter out those ranges of text that shall be wrapped. See comments in the function for
67 | /// what sections are handled in which way.
68 | fn to_be_wrapped(
69 | events: Vec<(Event, CharRange)>,
70 | whitespaces: &HashMap,
71 | ) -> Vec {
72 | let mut verbatim_level: usize = 0;
73 | let mut ignore = IgnoreByHtmlComment::new();
74 |
75 | events
76 | .into_iter()
77 | // Mark every range that is between two ignore directives as verbatim by filtering it out.
78 | .filter(|(event, _range)| {
79 | if let Event::Html(s) = event {
80 | ignore.process_html(s)
81 | }
82 | !ignore.should_be_ignored()
83 | })
84 | .filter(|(event, range)| match event {
85 | Event::Start(tag) => {
86 | match tag {
87 | // Most delimited blocks should stay as they are. Introducing line breaks would
88 | // cause problems here.
89 | Tag::BlockQuote(..)
90 | | Tag::CodeBlock(..)
91 | | Tag::FootnoteDefinition(..)
92 | | Tag::Heading { .. }
93 | | Tag::Image { .. }
94 | | Tag::Superscript
95 | | Tag::Subscript
96 | | Tag::Table(..)
97 | | Tag::TableCell
98 | | Tag::TableHead
99 | | Tag::TableRow => {
100 | verbatim_level += 1;
101 | false
102 | }
103 | // In case of some blocks, we do not want to extract the text contained inside
104 | // them but keep everything the block encompasses.
105 | Tag::Emphasis | Tag::Link { .. } | Tag::Strikethrough | Tag::Strong => {
106 | verbatim_level += 1;
107 | true
108 | }
109 | // Other delimited blocks can be both, inside a verbatim block or inside text.
110 | // However, the text they embrace is the important bit but we do not want to
111 | // extract the entire range.
112 | Tag::Item
113 | | Tag::List(..)
114 | | Tag::Paragraph
115 | | Tag::MetadataBlock(..)
116 | | Tag::DefinitionList
117 | | Tag::DefinitionListTitle
118 | | Tag::DefinitionListDefinition => false,
119 |
120 | // See below for why HTML blocks are treated like this.
121 | Tag::HtmlBlock => !range
122 | .clone()
123 | .filter_map(|el| whitespaces.get(&el))
124 | .any(|el| el == &'\n'),
125 | }
126 | }
127 |
128 | Event::End(tag) => {
129 | match tag {
130 | // Kept as they were.
131 | TagEnd::BlockQuote(..)
132 | | TagEnd::CodeBlock
133 | | TagEnd::FootnoteDefinition
134 | | TagEnd::Heading(..)
135 | | TagEnd::Superscript
136 | | TagEnd::Subscript
137 | | TagEnd::Image
138 | | TagEnd::Table
139 | | TagEnd::TableCell
140 | | TagEnd::TableHead
141 | | TagEnd::TableRow => {
142 | verbatim_level = verbatim_level
143 | .checked_sub(1)
144 | .expect("tags should be balanced");
145 | false
146 | }
147 | // Should be wrapped but text not extracted.
148 | TagEnd::Emphasis | TagEnd::Link | TagEnd::Strikethrough | TagEnd::Strong => {
149 | verbatim_level = verbatim_level
150 | .checked_sub(1)
151 | .expect("tags should be balanced");
152 | false
153 | }
154 |
155 | // Can be anything.
156 | TagEnd::Item
157 | | TagEnd::List(..)
158 | | TagEnd::DefinitionList
159 | | TagEnd::DefinitionListTitle
160 | | TagEnd::DefinitionListDefinition
161 | | TagEnd::Paragraph
162 | | TagEnd::HtmlBlock
163 | | TagEnd::MetadataBlock(..) => false,
164 | }
165 | }
166 |
167 | // More elements that are not blocks and that should be taken verbatim.
168 | Event::TaskListMarker(..) | Event::FootnoteReference(..) | Event::Rule => false,
169 |
170 | // We do not support detecting math so far as we do not intend to modify match in any
171 | // way. That is, we treat it as any other text and don't have the parser detect math
172 | // specifically.
173 | Event::InlineMath(..) | Event::DisplayMath(..) => false,
174 |
175 | // Allow editing HTML only if it is inline, i.e. if the range containing the HTML
176 | // contains no whitespace. Treat it like text in that case.
177 | Event::Html(..) | Event::InlineHtml(..) => !range
178 | .clone()
179 | .filter_map(|el| whitespaces.get(&el))
180 | .any(|el| el == &'\n'),
181 |
182 | // The following should be wrapped if they are not inside a verbatim block. Note that
183 | // that also includes blocks that are extracted in their enirey (e.g. links). In the
184 | // context of text contained within, they cound as verbatim blocks, too.
185 | Event::SoftBreak | Event::HardBreak | Event::Text(..) | Event::Code(..) => {
186 | verbatim_level == 0
187 | }
188 | })
189 | .map(|(_event, range)| range)
190 | .collect::>()
191 | }
192 |
193 | #[derive(Debug)]
194 | enum RangeMatch<'a> {
195 | Matches((usize, &'a str)),
196 | NoMatch(&'a str),
197 | }
198 |
199 | pub struct BlockQuotes<'a>(Vec>);
200 |
201 | impl<'a> BlockQuotes<'a> {
202 | pub const FULL_PREFIX: &'static str = "> ";
203 | pub const FULL_PREFIX_LEN: usize = Self::FULL_PREFIX.len();
204 | pub const SHORT_PREFIX: &'static str = ">";
205 |
206 | fn strip_prefix(text: &str, indent: usize) -> String {
207 | // The first line does start with the actual prefix, while the other lines start with a
208 | // number of other characters. Thus, we strip the off for all but the first line.
209 | text.split_inclusive('\n')
210 | .enumerate()
211 | .map(|(idx, t)| {
212 | let t = if idx == 0 { t } else { &t[indent..t.len()] };
213 | t.strip_prefix(Self::SHORT_PREFIX)
214 | .map(|el| el.strip_prefix(' ').unwrap_or(el))
215 | .unwrap_or(t)
216 | })
217 | .collect::()
218 | }
219 |
220 | fn add_prefix(text: String, indent: usize) -> String {
221 | let indent = build_indent(indent);
222 | // The "write!" calls should never fail since we write to a String that we create here.
223 | let mut result = String::new();
224 | text.split_inclusive('\n')
225 | .enumerate()
226 | .for_each(|(idx, line)| {
227 | let prefix = if line.len() == 1 {
228 | Self::SHORT_PREFIX
229 | } else {
230 | Self::FULL_PREFIX
231 | };
232 | // The first line is already correctly indented. For the other lines, we have to add
233 | // the indent.
234 | let ind = if idx == 0 { "" } else { &indent };
235 | write!(result, "{}{}{}", ind, prefix, line)
236 | .expect("building block-quote formated result");
237 | });
238 | result
239 | }
240 |
241 | fn indents(text: &str) -> Vec {
242 | text.split_inclusive('\n')
243 | .flat_map(|line| 0..line.len())
244 | .collect::>()
245 | }
246 |
247 | pub fn new(text: &'a str) -> Self {
248 | let mut level: usize = 0;
249 | // In case we ever need to iterate over other kinds of syntax, the tag as well as the
250 | // function stripping prefixes will have to be adjusted.
251 |
252 | let indents = Self::indents(text);
253 | let mut start = 0;
254 |
255 | let mut ranges = Parser::new(text)
256 | .into_offset_iter()
257 | .filter_map(|(event, range)| match event {
258 | Event::Start(start) => {
259 | if matches!(start, Tag::BlockQuote(..)) {
260 | level += 1;
261 | }
262 | if level == 1 && matches!(start, Tag::BlockQuote(..)) {
263 | // Using a CharRange here to prevent the flat_map below from flattening
264 | // all the ranges, since Range supports flattening but our
265 | // CharRange does not.
266 | Some(CharRange {
267 | start: range.start,
268 | end: range.end,
269 | })
270 | } else {
271 | None
272 | }
273 | }
274 | Event::End(end) => {
275 | if matches!(end, TagEnd::BlockQuote(..)) {
276 | level -= 1;
277 | }
278 | None
279 | }
280 | _ => None,
281 | })
282 | .flat_map(|range| {
283 | let prev_start = start;
284 | let this_start = range.start;
285 | start = range.end;
286 |
287 | let this = RangeMatch::Matches((indents[this_start], &text[range]));
288 | if this_start == prev_start {
289 | vec![this]
290 | } else {
291 | let missing = RangeMatch::NoMatch(&text[prev_start..this_start]);
292 | vec![missing, this]
293 | }
294 | })
295 | .collect::>();
296 |
297 | if start != text.len() {
298 | ranges.push(RangeMatch::NoMatch(&text[start..text.len()]));
299 | }
300 |
301 | Self(ranges)
302 | }
303 |
304 | /// The argument `func` should keep a line break at the end if its arguments ends in one. In
305 | /// most cases, it ends in a line break.
306 | pub fn apply_to_matches_and_join(self, func: MapFn) -> String
307 | where
308 | MapFn: Fn(String, usize) -> String,
309 | {
310 | self.0
311 | .into_iter()
312 | .map(|el| match el {
313 | RangeMatch::NoMatch(s) => s.to_string(),
314 | RangeMatch::Matches(s) => Self::add_prefix(
315 | func(Self::strip_prefix(s.1, s.0), s.0 + Self::FULL_PREFIX_LEN),
316 | s.0,
317 | ),
318 | })
319 | .collect::()
320 | }
321 | }
322 |
323 | /// Check whether there is nothing but whitespace between the end of the previous range and the
324 | /// start of the next one, if the ranges do not connect directly anyway. Note that we still keep
325 | /// paragraphs separated by keeping ranges separate that are separated by more linebreaks than one.
326 | fn merge_ranges(ranges: Vec, whitespaces: &HashMap) -> Vec {
327 | let mut next_range: Option = None;
328 | let mut merged = vec![];
329 |
330 | for range in ranges {
331 | if let Some(next) = next_range {
332 | let contains_just_whitespace =
333 | (next.end..range.start).all(|el| whitespaces.contains_key(&el));
334 | let at_most_one_linebreak = (next.end..range.start)
335 | .filter(|el| Some(&'\n') == whitespaces.get(el))
336 | .count()
337 | <= 1;
338 | let is_contained = range.start >= next.start && range.end <= next.end;
339 |
340 | if is_contained {
341 | // Skip the range if it is already included.
342 | next_range = Some(next);
343 | } else if contains_just_whitespace && at_most_one_linebreak {
344 | // Extend the range.
345 | next_range = Some(CharRange {
346 | start: next.start,
347 | end: range.end,
348 | });
349 | } else {
350 | // Remember the range and continue extending.
351 | merged.push(next);
352 | next_range = Some(range);
353 | }
354 | } else {
355 | next_range = Some(range);
356 | }
357 | }
358 |
359 | // Treat the last range that may be left.
360 | if let Some(next) = next_range {
361 | merged.push(next)
362 | }
363 |
364 | // Remove ranges that contain at most 1 character. They never have to be wrapped.
365 | let removed = merged
366 | .into_iter()
367 | .filter(|el| el.len() > 1)
368 | .collect::>();
369 |
370 | trace_log!(
371 | "formattable byte ranges: {}",
372 | removed
373 | .iter()
374 | .map(|range| format!("[{},{})", range.start, range.end))
375 | .collect::>()
376 | .join(" ")
377 | );
378 |
379 | removed
380 | }
381 |
382 | /// Get all indices that point to whitespace as well as the characters they point to.
383 | fn whitespace_indices(text: &str, detector: &WhitespaceDetector) -> HashMap {
384 | text.char_indices()
385 | .filter_map(|(pos, ch)| {
386 | if detector.is_whitespace(&ch) {
387 | Some((pos, ch))
388 | } else {
389 | None
390 | }
391 | })
392 | .collect::>()
393 | }
394 |
395 | enum YAMLBlockStartLineType {
396 | Pipe,
397 | Angle,
398 | None,
399 | }
400 |
401 | impl YAMLBlockStartLineType {
402 | fn is_actual_start_line(&self) -> bool {
403 | !matches!(self, Self::None)
404 | }
405 | }
406 |
407 | /// Parse a YAML text without an external dependency. We interpret text as being a single YAML
408 | /// document. We search until we find a line starting with the given key. We return everything that
409 | /// is at the same indentation as the line following the key.
410 | pub fn get_value_for_mdslw_toml_yaml_key(text: &str) -> String {
411 | trace_log!(
412 | "extracting value for key {} from yaml: {}",
413 | YAML_CONFIG_KEY,
414 | text.replace("\n", "\\n")
415 | );
416 | let start_line_type = |line: &str| {
417 | // Only perform the split by words if we can be reasonably sure that this might be the
418 | // correct line, i.e. one that starts with the key that we expect.
419 | if !line.starts_with(YAML_CONFIG_KEY) {
420 | return YAMLBlockStartLineType::None;
421 | }
422 | let split = line.split_whitespace().collect::>();
423 | let first_word = split
424 | .first()
425 | .expect("Internal error, there should have been a first word.");
426 | if first_word == &YAML_CONFIG_KEY {
427 | match split[1..] {
428 | [":", "|"] | [":", "|-"] | [":", "|+"] => YAMLBlockStartLineType::Pipe,
429 | [":", ">"] | [":", ">-"] | [":", ">+"] => YAMLBlockStartLineType::Angle,
430 | _ => YAMLBlockStartLineType::None,
431 | }
432 | } else if first_word == &YAML_CONFIG_KEY_WITH_COLON {
433 | match split[1..] {
434 | ["|"] | ["|-"] | ["|+"] => YAMLBlockStartLineType::Pipe,
435 | [">"] | [">-"] | [">+"] => YAMLBlockStartLineType::Angle,
436 | _ => YAMLBlockStartLineType::None,
437 | }
438 | } else {
439 | YAMLBlockStartLineType::None
440 | }
441 | };
442 | // We skip everything until the first line that we expect, including that first line. We end up
443 | // either with an empty iterator or an iterator whose first element is the first value line.
444 | let mut skipped = text
445 | .lines()
446 | .skip_while(|line| !start_line_type(line).is_actual_start_line());
447 | let block_type = if let Some(line) = skipped.next() {
448 | start_line_type(line)
449 | } else {
450 | YAMLBlockStartLineType::None
451 | };
452 | let mut peekable = skipped.skip_while(|line| line.is_empty()).peekable();
453 | let first_line = peekable.peek();
454 | // Check whether we have a value line or not.
455 | if let Some(line) = first_line {
456 | // We check whether the first value line is indented. If so, we remember the indent since
457 | // every following value line has to have the exact same indent.
458 | let first_indent = line.len() - line.trim_start().len();
459 | if first_indent > 0 {
460 | let result = peekable
461 | .take_while(|line| {
462 | line.is_empty() || line.len() - line.trim_start().len() == first_indent
463 | })
464 | .map(|line| line.trim())
465 | .collect::>()
466 | .join("\n");
467 | log::info!(
468 | "found value for key {} from yaml:\n{}",
469 | YAML_CONFIG_KEY,
470 | result
471 | );
472 | match block_type {
473 | YAMLBlockStartLineType::Pipe => result,
474 | YAMLBlockStartLineType::Angle => result
475 | .split("\n\n")
476 | .map(|line| line.replace("\n", " "))
477 | .collect::>()
478 | .join("\n"),
479 | YAMLBlockStartLineType::None => String::new(),
480 | }
481 | } else {
482 | log::info!("no value line found");
483 | String::new()
484 | }
485 | } else {
486 | log::info!("key {} not found", YAML_CONFIG_KEY);
487 | String::new()
488 | }
489 | }
490 |
491 | #[cfg(test)]
492 | mod test {
493 | use super::*;
494 |
495 | #[test]
496 | fn detect_whitespace() {
497 | let text = "some test with witespace at some\nlocations";
498 | let detected = whitespace_indices(text, &WhitespaceDetector::default());
499 | let expected = vec![
500 | (4, ' '),
501 | (9, ' '),
502 | (14, ' '),
503 | (24, ' '),
504 | (27, ' '),
505 | (28, '\t'),
506 | (33, '\n'),
507 | ]
508 | .into_iter()
509 | .collect::>();
510 |
511 | assert_eq!(expected, detected);
512 | }
513 |
514 | #[test]
515 | fn merging_ranges() {
516 | let ranges = vec![
517 | CharRange { start: 0, end: 4 },
518 | CharRange { start: 5, end: 9 },
519 | CharRange { start: 11, end: 15 },
520 | CharRange { start: 11, end: 14 },
521 | CharRange { start: 16, end: 19 },
522 | CharRange { start: 23, end: 36 },
523 | ];
524 | let whitespace = whitespace_indices(
525 | "some text\n\nmore text | even more text",
526 | &WhitespaceDetector::default(),
527 | );
528 |
529 | let merged = merge_ranges(ranges, &whitespace);
530 |
531 | let expected = vec![
532 | CharRange { start: 0, end: 9 },
533 | CharRange { start: 11, end: 19 },
534 | CharRange { start: 23, end: 36 },
535 | ];
536 |
537 | assert_eq!(expected, merged);
538 | }
539 |
540 | #[test]
541 | fn parsing_markdown() {
542 | let text = r#"
543 | ## Some Heading
544 |
545 | Some text.
546 |
547 |
548 |
549 | - More text.
550 | - More text.
551 | - Even more text.
552 | - Some text with a [link].
553 |
554 | ```code
555 | some code
556 | ```
557 |
558 | [link]: https://something.com "some link"
559 | "#;
560 | let cfg = ParseCfg {
561 | keep_linebreaks: false,
562 | };
563 | let parsed = parse_markdown(text, &cfg);
564 |
565 | // [18..28, 52..62, 65..75, 80..95, 100..124]
566 | let expected = vec![
567 | CharRange { start: 18, end: 28 },
568 | CharRange { start: 52, end: 62 },
569 | CharRange { start: 65, end: 75 },
570 | CharRange { start: 80, end: 95 },
571 | CharRange {
572 | start: 100,
573 | end: 124,
574 | },
575 | ];
576 |
577 | assert_eq!(expected, parsed);
578 | }
579 |
580 | #[test]
581 | fn applying_to_no_block_quotes_remains_unchanged() {
582 | let text = r#"
583 | ## Some Heading
584 |
585 | Some text without block quotes.
586 |
587 |
588 |
589 | - More text.
590 | - More text.
591 | - Even more text.
592 | - Some text with a [link].
593 |
594 | ```code
595 | some code
596 | ```
597 |
598 | [link]: https://something.com "some link"
599 | "#;
600 |
601 | let unchanged = BlockQuotes::new(text).apply_to_matches_and_join(|_, _| String::new());
602 | assert_eq!(text.to_string(), unchanged);
603 | }
604 |
605 | #[test]
606 | fn applying_to_block_quotes() {
607 | let text = r#"
608 | ## Some Heading
609 |
610 | Some text with block quotes.
611 |
612 | > This first text is block quoted.
613 | >
614 | >> This text is quoted at the second level.
615 | >
616 | > Some more quotes at the first level.
617 |
618 |
619 |
620 | - More text.
621 | - More text.
622 | - Even more text.
623 | - Some text with a [link].
624 |
625 | > This second text is also block quoted.
626 | >
627 | > > This text is quoted at the second level.
628 | >
629 | > Some more quotes at the first level.
630 |
631 | - Some text.
632 |
633 | > This third text is block quoted but inside an itemization.
634 | >
635 | >> This text is quoted at the second level.
636 | >
637 | > Some more quotes at the first level.
638 |
639 | More text.
640 |
641 | [link]: https://something.com "some link"
642 | "#;
643 |
644 | let expected = r#"
645 | ## Some Heading
646 |
647 | Some text with block quotes.
648 |
649 | > 2:115
650 | > 2:115
651 | > 2:115
652 |
653 |
654 |
655 | - More text.
656 | - More text.
657 | - Even more text.
658 | - Some text with a [link].
659 |
660 | > 2:121
661 | > 2:121
662 | > 2:121
663 |
664 | - Some text.
665 |
666 | > 4:141
667 | > 4:141
668 | > 4:141
669 |
670 | More text.
671 |
672 | [link]: https://something.com "some link"
673 | "#;
674 |
675 | let changed = BlockQuotes::new(text).apply_to_matches_and_join(|s, i| {
676 | format!("{}:{}\n{}:{}\n{}:{}\n", i, s.len(), i, s.len(), i, s.len())
677 | });
678 | assert_eq!(expected, changed);
679 | }
680 |
681 | #[test]
682 | fn flattening_vecs_of_char_ranges_retains_ranges() {
683 | let to_be_flattened = vec![
684 | vec![CharRange { start: 0, end: 10 }],
685 | vec![
686 | CharRange {
687 | start: 100,
688 | end: 110,
689 | },
690 | CharRange {
691 | start: 200,
692 | end: 210,
693 | },
694 | ],
695 | ];
696 | let flat = to_be_flattened.into_iter().flatten().collect::>();
697 | let expected = vec![(0..10), (100..110), (200..210)];
698 | assert_eq!(expected, flat);
699 | }
700 |
701 | fn build_yaml(
702 | key: &str,
703 | space_before_colon: bool,
704 | block_marker: &str,
705 | indent_spaces: usize,
706 | content: &str,
707 | ) -> String {
708 | let indent = (0..indent_spaces).map(|_| " ").collect::();
709 | let indented = content
710 | .lines()
711 | .map(|line| format!("{}{}\n", indent, line))
712 | .collect::();
713 | let maybe_space = if space_before_colon { " " } else { "" };
714 | let result = format!("{}{}: {}\n{}", key, maybe_space, block_marker, indented);
715 | // Ensure that values were filled in.
716 | assert_ne!(result, String::from(": \n"));
717 | result
718 | }
719 |
720 | const YAML_BASE_CONTENT: &str = r#"
721 | some content with an empty line
722 |
723 | at the beginning and in the middle"#;
724 |
725 | #[test]
726 | fn building_yaml() {
727 | let yaml = build_yaml(YAML_CONFIG_KEY, true, "|", 4, YAML_BASE_CONTENT);
728 | let expected = r#"mdslw-toml : |
729 |
730 | some content with an empty line
731 |
732 | at the beginning and in the middle
733 | "#;
734 | assert_eq!(yaml, expected);
735 | }
736 |
737 | #[test]
738 | fn extracting_yaml_string_pipe_block_markers() {
739 | for has_space in [true, false] {
740 | for marker in ["|", "|-", "|+"] {
741 | let yaml = build_yaml(YAML_CONFIG_KEY, has_space, marker, 4, YAML_BASE_CONTENT);
742 | let extracted = get_value_for_mdslw_toml_yaml_key(&yaml);
743 | assert_eq!(extracted, YAML_BASE_CONTENT);
744 | }
745 | }
746 | }
747 |
748 | #[test]
749 | fn extracting_yaml_string_angle_block_markers() {
750 | let expected = r#" some content with an empty line
751 | at the beginning and in the middle"#;
752 | for has_space in [true, false] {
753 | for marker in [">", ">-", ">+"] {
754 | let yaml = build_yaml(YAML_CONFIG_KEY, has_space, marker, 4, YAML_BASE_CONTENT);
755 | let extracted = get_value_for_mdslw_toml_yaml_key(&yaml);
756 | assert_eq!(extracted, expected);
757 | }
758 | }
759 | }
760 |
761 | #[test]
762 | fn extracting_yaml_string_pipe_block_markers_wrong_key() {
763 | let key = "some-other-key";
764 | assert_ne!(key, YAML_CONFIG_KEY);
765 | for has_space in [true, false] {
766 | for marker in ["|", "|-", "|+"] {
767 | let yaml = build_yaml(key, has_space, marker, 4, YAML_BASE_CONTENT);
768 | let extracted = get_value_for_mdslw_toml_yaml_key(&yaml);
769 | assert_eq!(extracted, String::new());
770 | }
771 | }
772 | }
773 |
774 | #[test]
775 | fn extracting_yaml_string_angle_block_markers_wrong_key() {
776 | let key = "some-other-key";
777 | assert_ne!(key, YAML_CONFIG_KEY);
778 | for has_space in [true, false] {
779 | for marker in [">", ">-", ">+"] {
780 | let yaml = build_yaml(key, has_space, marker, 4, YAML_BASE_CONTENT);
781 | let extracted = get_value_for_mdslw_toml_yaml_key(&yaml);
782 | assert_eq!(extracted, String::new());
783 | }
784 | }
785 | }
786 |
787 | #[test]
788 | fn extracting_yaml_string_empty_content() {
789 | let key = "some-other-key";
790 | for has_space in [true, false] {
791 | for marker in ["|", "|-", "|+"] {
792 | let yaml = build_yaml(YAML_CONFIG_KEY, has_space, marker, 4, "")
793 | + build_yaml(key, has_space, marker, 4, "").as_str();
794 | let extracted = get_value_for_mdslw_toml_yaml_key(&yaml);
795 | assert_eq!(extracted, "");
796 | }
797 | }
798 | }
799 |
800 | #[test]
801 | fn malformed_yaml_file_does_not_break_extraction() {
802 | let yaml = build_yaml(YAML_CONFIG_KEY, false, "|", 4, "does not matter\nat all");
803 | let malformed = yaml.replace(": |", "");
804 | let extracted = get_value_for_mdslw_toml_yaml_key(&malformed);
805 | assert_eq!(extracted, "".to_string());
806 | }
807 |
808 | #[test]
809 | fn config_keys_are_identical() {
810 | assert_eq!(
811 | YAML_CONFIG_KEY.to_string() + ":",
812 | YAML_CONFIG_KEY_WITH_COLON
813 | );
814 | }
815 | }
816 |
--------------------------------------------------------------------------------
/LICENCE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 |
635 | Copyright (C)
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | Copyright (C)
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
675 |
676 |
--------------------------------------------------------------------------------