├── src ├── lang │ ├── .gitkeep │ └── ac ├── indent.rs ├── diff.rs ├── ignore.rs ├── lang.rs ├── frontmatter.rs ├── features.rs ├── linebreak.rs ├── logging.rs ├── ranges.rs ├── wrap.rs ├── detect.rs ├── call.rs ├── fs.rs ├── main.rs ├── cfg.rs └── parse.rs ├── .gitignore ├── .mdslw.toml ├── Cargo.toml ├── .github └── workflows │ └── ci.yml ├── Makefile ├── README.md └── LICENCE /src/lang/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /dist 3 | /.coverage.* 4 | /.envrc 5 | -------------------------------------------------------------------------------- /.mdslw.toml: -------------------------------------------------------------------------------- 1 | case = "ignore" 2 | end-markers = "?!:." 3 | features = "format-block-quotes,collate-link-defs,outsource-inline-links" 4 | ignores = "" 5 | lang = "ac" 6 | max-width = 80 7 | suppressions = "" 8 | upstream = "" 9 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "mdslw" 3 | version = "0.16.1" 4 | edition = "2021" 5 | 6 | [profile.release] 7 | # Optimize release binaries. 8 | strip = true 9 | lto = true 10 | 11 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 12 | 13 | [dependencies] 14 | anyhow = { version = "1", features = ["std", "backtrace"] } 15 | clap = { version = "4", features = ["env", "derive"] } 16 | clap_complete = "4" 17 | ignore = "0.4" 18 | include_dir = "0.7" 19 | log = { version = "0.4", features = ["std"] } 20 | pulldown-cmark = { version = "0.13", default-features = false } 21 | rayon = "1" 22 | similar = "2" 23 | tempfile = "3" 24 | serde = { version = "1", features = ["derive"] } 25 | toml = { version = "0.9", default-features = false, features = ["parse", "display", "serde"] } 26 | 27 | [build-dependencies] 28 | reqwest = {version = "0.12", features = ["default", "json", "blocking"]} 29 | serde_json = { version = "1" } 30 | -------------------------------------------------------------------------------- /src/indent.rs: -------------------------------------------------------------------------------- 1 | /* An opinionated line wrapper for markdown files. 2 | Copyright (C) 2023 Torsten Long 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | 18 | pub fn build_indent(num: usize) -> String { 19 | (0..num).map(|_| ' ').collect::() 20 | } 21 | 22 | #[cfg(test)] 23 | mod test { 24 | use super::*; 25 | 26 | #[test] 27 | fn can_build_indents() { 28 | let three = build_indent(3); 29 | assert_eq!(three, String::from(" ")); 30 | 31 | let four = build_indent(4); 32 | assert_eq!(four, String::from(" ")); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/lang/ac: -------------------------------------------------------------------------------- 1 | AA. 2 | AB. 3 | Abs. 4 | A.D. 5 | Adj. 6 | Adv. 7 | Alt. 8 | a.m. 9 | A.M. 10 | Approx. 11 | A.S. 12 | Aug. 13 | btw. 14 | Btw. 15 | B.V. 16 | Capt. 17 | C.F. 18 | cf. 19 | Cf. 20 | CF. 21 | C.O.D. 22 | Comm. 23 | Conn. 24 | Cont. 25 | D.A. 26 | D.C. 27 | DC. 28 | Dec. 29 | Dept. 30 | Dr. 31 | DR. 32 | e.g. 33 | E.g. 34 | E.G. 35 | Est. 36 | etc. 37 | Etc. 38 | ETC. 39 | Feb. 40 | Fn. 41 | Fri. 42 | Gb. 43 | Hon.B.A. 44 | Hz. 45 | I.D. 46 | i.e. 47 | I.e. 48 | I.E. 49 | I.T. 50 | Jan. 51 | J.B. 52 | J.D. 53 | J.K. 54 | Jun. 55 | Kb. 56 | K.R. 57 | L.A. 58 | Lev. 59 | lib. 60 | Lib. 61 | L.P. 62 | Lt. 63 | Lt.Cdr. 64 | Maj. 65 | Mar. 66 | Mart. 67 | Mb. 68 | Md. 69 | Mgr. 70 | M.I.T. 71 | M.R. 72 | Mr. 73 | MR. 74 | Mrs. 75 | Ms. 76 | M.T. 77 | Mt. 78 | Nov. 79 | nr. 80 | Nr. 81 | num. 82 | Num. 83 | N.V. 84 | N.Y. 85 | PC. 86 | Ph.D. 87 | Phys. 88 | P.M. 89 | P.O. 90 | pp. 91 | PP. 92 | Prof. 93 | P.V. 94 | Pvt. 95 | Rep. 96 | Rev. 97 | R.L. 98 | R.T. 99 | S.A. 100 | S.A.R. 101 | S.E. 102 | Sep. 103 | Sept. 104 | Sgt. 105 | S.p.A. 106 | Sq. 107 | U.S. 108 | U.S.A. 109 | U.S.C. 110 | vs. 111 | VS. 112 | Yr. 113 | 114 | -------------------------------------------------------------------------------- /src/diff.rs: -------------------------------------------------------------------------------- 1 | /* An opinionated line wrapper for markdown files. 2 | Copyright (C) 2023 Torsten Long 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | 18 | use std::path::Path; 19 | 20 | use similar::{udiff::unified_diff, Algorithm}; 21 | 22 | const CONTEXT: usize = 4; 23 | 24 | pub enum Algo { 25 | Myers, 26 | Patience, 27 | Lcs, 28 | } 29 | 30 | impl Algo { 31 | fn to_internal(&self) -> Algorithm { 32 | match self { 33 | Self::Myers => Algorithm::Myers, 34 | Self::Patience => Algorithm::Patience, 35 | Self::Lcs => Algorithm::Lcs, 36 | } 37 | } 38 | 39 | pub fn generate(&self, new: &str, org: &str, filename: &Path) -> String { 40 | let original = format!("original:{}", filename.to_string_lossy()); 41 | let processed = format!("processed:{}", filename.to_string_lossy()); 42 | let names = (original.as_ref(), processed.as_ref()); 43 | unified_diff(self.to_internal(), org, new, CONTEXT, Some(names)) 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/ignore.rs: -------------------------------------------------------------------------------- 1 | /* An opinionated line wrapper for markdown files. 2 | Copyright (C) 2023 Torsten Long 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | 18 | const IGNORE_START: &str = "mdslw-ignore-start"; 19 | const IGNORE_END: &str = "mdslw-ignore-end"; 20 | 21 | const PRETTIER_IGNORE_START: &str = "prettier-ignore-start"; 22 | const PRETTIER_IGNORE_END: &str = "prettier-ignore-end"; 23 | 24 | fn is_html_comment(s: &str) -> bool { 25 | s.starts_with("") || s.ends_with("-->\n")) 26 | } 27 | 28 | pub struct IgnoreByHtmlComment { 29 | ignore: bool, 30 | } 31 | 32 | impl IgnoreByHtmlComment { 33 | pub fn new() -> Self { 34 | Self { ignore: false } 35 | } 36 | 37 | /// Determine whether the HTML that is processed is a comment and whether it modifies the 38 | /// ignore behaviour. 39 | pub fn process_html(&mut self, s: &str) { 40 | if is_html_comment(s) { 41 | if s.contains(IGNORE_START) || s.contains(PRETTIER_IGNORE_START) { 42 | log::debug!("detected ignore start directive"); 43 | self.ignore = true 44 | } 45 | if s.contains(IGNORE_END) || s.contains(PRETTIER_IGNORE_END) { 46 | log::debug!("detected ignore stop directive"); 47 | self.ignore = false 48 | } 49 | } 50 | } 51 | 52 | pub fn should_be_ignored(&self) -> bool { 53 | self.ignore 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/lang.rs: -------------------------------------------------------------------------------- 1 | /* An opinionated line wrapper for markdown files. 2 | Copyright (C) 2023 Torsten Long 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | 18 | use anyhow::{Error, Result}; 19 | use include_dir::{include_dir, Dir}; 20 | 21 | static LANG_FILES_DIR: Dir<'_> = include_dir!("$MDSLW_LANG_DIR"); 22 | 23 | pub fn keep_word_list(lang_names: &str) -> Result { 24 | let mut errors = vec![]; 25 | 26 | let keep_words = lang_names 27 | .split_terminator(',') 28 | .flat_map(|el| el.split_whitespace()) 29 | .filter_map(|el| { 30 | if el == "none" { 31 | Some(String::new()) 32 | } else if let Some(content) = LANG_FILES_DIR 33 | .get_file(el) 34 | .and_then(|el| el.contents_utf8()) 35 | { 36 | log::debug!("loaded keep word list for language '{}'", el); 37 | Some(content.to_string()) 38 | } else { 39 | errors.push(el); 40 | None 41 | } 42 | }) 43 | .collect::(); 44 | 45 | if errors.is_empty() { 46 | Ok(keep_words) 47 | } else { 48 | Err(Error::msg(format!( 49 | "unknown or unsupported languages: {}", 50 | errors.join(", ") 51 | ))) 52 | } 53 | } 54 | 55 | #[cfg(test)] 56 | mod test { 57 | use super::*; 58 | 59 | #[test] 60 | fn nothing_disables_words() -> Result<()> { 61 | let list = keep_word_list("")?; 62 | assert_eq!(list, String::new()); 63 | Ok(()) 64 | } 65 | 66 | #[test] 67 | fn none_disables_words() -> Result<()> { 68 | let list = keep_word_list("none")?; 69 | assert_eq!(list, String::new()); 70 | Ok(()) 71 | } 72 | 73 | #[test] 74 | fn some_langs_are_supported() -> Result<()> { 75 | let langs = "de en es fr it"; 76 | let list = keep_word_list(langs)?; 77 | assert_ne!(list, String::new()); 78 | Ok(()) 79 | } 80 | 81 | #[test] 82 | fn unsupported_langs() { 83 | let langs = "unsupported"; 84 | let list = keep_word_list(langs); 85 | assert!(list.is_err()); 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /src/frontmatter.rs: -------------------------------------------------------------------------------- 1 | /* An opinionated line wrapper for markdown files. 2 | Copyright (C) 2023 Torsten Long 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | 18 | const FRONTMATTER_SEPARATOR: &str = "---\n"; 19 | 20 | pub fn extract_frontmatter(text: &str) -> String { 21 | let mut lines = text.split_inclusive('\n'); 22 | let first = lines.next(); 23 | if Some(FRONTMATTER_SEPARATOR) != first { 24 | log::debug!("no frontmatter starting delimiter detected"); 25 | String::new() 26 | } else { 27 | let mut matter_len = FRONTMATTER_SEPARATOR.len(); 28 | let mut found_end_sep = false; 29 | lines 30 | .take_while(|line| { 31 | let do_continue = !found_end_sep; 32 | found_end_sep |= line == &FRONTMATTER_SEPARATOR; 33 | do_continue 34 | }) 35 | .for_each(|line| matter_len += line.len()); 36 | if !found_end_sep { 37 | // There was no frontmatter since we did not find the end separator. 38 | log::debug!("no frontmatter ending delimiter detected"); 39 | String::new() 40 | } else { 41 | log::debug!("found {} bytes of frontmatter", matter_len); 42 | // There was indeed frontmatter. This slicing operation can never error out sinc we did 43 | // extract the frontmatter from the text. 44 | let matter = &text[..matter_len]; 45 | matter.to_owned() 46 | } 47 | } 48 | } 49 | 50 | #[cfg(test)] 51 | mod test { 52 | use super::*; 53 | 54 | const FRONTMATTER_FOR_TEST: &str = "---\nsome text\nasdf: ---\nmultiple: lines\n---\n"; 55 | 56 | #[test] 57 | fn extracting_frontmatter() { 58 | let matter = extract_frontmatter(FRONTMATTER_FOR_TEST); 59 | 60 | assert_eq!(matter, FRONTMATTER_FOR_TEST.to_string()); 61 | } 62 | 63 | #[test] 64 | fn splitting_frontmatter_with_rest() { 65 | let matter = extract_frontmatter(&format!("{}some\nmore\ntext\n", FRONTMATTER_FOR_TEST)); 66 | 67 | assert_eq!(matter, FRONTMATTER_FOR_TEST.to_string()); 68 | } 69 | 70 | #[test] 71 | fn frontmatter_has_to_start_text() { 72 | let text = format!("something\n{}", FRONTMATTER_FOR_TEST); 73 | let matter = extract_frontmatter(&text); 74 | 75 | assert_eq!(matter, String::new()); 76 | } 77 | 78 | #[test] 79 | fn frontmatter_has_to_have_ending_separator() { 80 | let text = FRONTMATTER_FOR_TEST[..FRONTMATTER_FOR_TEST.len() - 1].to_string(); 81 | let matter = extract_frontmatter(&text); 82 | 83 | assert_eq!(matter, String::new()); 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/features.rs: -------------------------------------------------------------------------------- 1 | /* An opinionated line wrapper for markdown files. 2 | Copyright (C) 2023 Torsten Long 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | 18 | use anyhow::{Error, Result}; 19 | 20 | use crate::detect::BreakCfg; 21 | use crate::parse::ParseCfg; 22 | 23 | #[derive(Debug, PartialEq)] 24 | pub struct FeatureCfg { 25 | pub keep_spaces_in_links: bool, 26 | pub format_block_quotes: bool, 27 | pub collate_link_defs: bool, 28 | pub outsource_inline_links: bool, 29 | pub break_cfg: BreakCfg, 30 | pub parse_cfg: ParseCfg, 31 | } 32 | 33 | impl Default for FeatureCfg { 34 | fn default() -> Self { 35 | FeatureCfg { 36 | keep_spaces_in_links: false, 37 | format_block_quotes: false, 38 | collate_link_defs: false, 39 | outsource_inline_links: false, 40 | parse_cfg: ParseCfg { 41 | keep_linebreaks: false, 42 | }, 43 | break_cfg: BreakCfg { 44 | keep_linebreaks: false, 45 | }, 46 | } 47 | } 48 | } 49 | 50 | impl std::str::FromStr for FeatureCfg { 51 | type Err = Error; 52 | 53 | fn from_str(s: &str) -> Result { 54 | let mut cfg = Self::default(); 55 | let mut errors = vec![]; 56 | 57 | // Parse all possible features and toggle them as desired. 58 | for feature in s 59 | .split_terminator(',') 60 | .flat_map(|el| el.split_whitespace()) 61 | .map(|el| el.trim()) 62 | .filter(|el| !el.is_empty()) 63 | { 64 | match feature { 65 | "keep-spaces-in-links" => cfg.keep_spaces_in_links = true, 66 | "format-block-quotes" => cfg.format_block_quotes = true, 67 | "collate-link-defs" => cfg.collate_link_defs = true, 68 | "outsource-inline-links" => cfg.outsource_inline_links = true, 69 | "keep-linebreaks" => { 70 | cfg.parse_cfg.keep_linebreaks = true; 71 | cfg.break_cfg.keep_linebreaks = true; 72 | } 73 | // Do not accept any other entry. 74 | _ => errors.push(feature), 75 | } 76 | } 77 | 78 | if errors.is_empty() { 79 | log::debug!("loaded features: {:?}", cfg); 80 | Ok(cfg) 81 | } else { 82 | Err(Error::msg(format!( 83 | "unknown features: {}", 84 | errors.join(", ") 85 | ))) 86 | } 87 | } 88 | } 89 | 90 | #[cfg(test)] 91 | mod test { 92 | use super::*; 93 | #[test] 94 | fn swapping_all_features_and_disregard_whitspace() -> Result<()> { 95 | let default = FeatureCfg::default(); 96 | let swapped = FeatureCfg { 97 | keep_spaces_in_links: !default.keep_spaces_in_links, 98 | format_block_quotes: !default.format_block_quotes, 99 | collate_link_defs: !default.collate_link_defs, 100 | outsource_inline_links: !default.outsource_inline_links, 101 | parse_cfg: ParseCfg { 102 | keep_linebreaks: !default.parse_cfg.keep_linebreaks, 103 | }, 104 | break_cfg: BreakCfg { 105 | keep_linebreaks: !default.break_cfg.keep_linebreaks, 106 | }, 107 | }; 108 | 109 | let parsed = 110 | "keep-spaces-in-links , keep-linebreaks ,format-block-quotes, collate-link-defs,outsource-inline-links" 111 | .parse::()?; 112 | 113 | assert_eq!(parsed, swapped); 114 | Ok(()) 115 | } 116 | 117 | #[test] 118 | fn failure_to_parse() -> Result<()> { 119 | let parsed = "unknown".parse::(); 120 | assert!(parsed.is_err()); 121 | Ok(()) 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/linebreak.rs: -------------------------------------------------------------------------------- 1 | /* An opinionated line wrapper for markdown files. 2 | Copyright (C) 2023 Torsten Long 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | 18 | use std::collections::HashSet; 19 | 20 | use crate::detect::{BreakDetector, WhitespaceDetector}; 21 | 22 | pub fn insert_linebreaks_after_sentence_ends(text: &str, detector: &BreakDetector) -> String { 23 | let merged = normalise_linebreaks(text, &detector.whitespace); 24 | let sentence_ends = find_sentence_ends(&merged, detector); 25 | 26 | merged 27 | .chars() 28 | .enumerate() 29 | .filter_map(|(idx, el)| { 30 | if sentence_ends.contains(&Char::Skip(idx)) { 31 | None 32 | } else if sentence_ends.contains(&Char::Split(idx)) { 33 | Some(format!("\n{}", el)) 34 | } else { 35 | Some(format!("{}", el)) 36 | } 37 | }) 38 | .collect::() 39 | } 40 | 41 | /// Replace all linebreaks by spaces unless they have been escaped by a non-breaking space. 42 | fn normalise_linebreaks(text: &str, detector: &WhitespaceDetector) -> String { 43 | let mut last_was_nbsp = false; 44 | text.chars() 45 | .map(|el| { 46 | let replacement = if el != '\n' || last_was_nbsp { el } else { ' ' }; 47 | last_was_nbsp = detector.is_nbsp(&el); 48 | replacement 49 | }) 50 | .collect::() 51 | } 52 | 53 | #[derive(Eq, Hash, PartialEq, Debug)] 54 | enum Char { 55 | Skip(usize), 56 | Split(usize), 57 | } 58 | 59 | fn find_sentence_ends(text: &str, detector: &BreakDetector) -> HashSet { 60 | let as_chars = text.chars().collect::>(); 61 | 62 | as_chars 63 | .iter() 64 | .enumerate() 65 | .filter_map(|(idx, ch)| { 66 | let next = as_chars.get(idx + 1); 67 | 68 | if detector.is_breaking_marker(ch, next) 69 | && !detector.ends_with_keep_word(&as_chars, &idx) 70 | { 71 | Some([Char::Skip(idx + 1), Char::Split(idx + 2)]) 72 | } else { 73 | None 74 | } 75 | }) 76 | .flatten() 77 | .collect::>() 78 | } 79 | 80 | #[cfg(test)] 81 | mod test { 82 | use super::*; 83 | use crate::detect::BreakCfg; 84 | 85 | const CFG_FOR_TESTS: &BreakCfg = &BreakCfg { 86 | keep_linebreaks: false, 87 | }; 88 | 89 | #[test] 90 | fn finding_sentence_ends() { 91 | let text = "words that. are. followed by. periods. period."; 92 | let detector = BreakDetector::new("are. by.", "", false, ".", CFG_FOR_TESTS); 93 | 94 | let ends = find_sentence_ends(text, &detector); 95 | 96 | // We never detect a sentence at and the end of the text. 97 | let expected = vec![ 98 | Char::Skip(11), 99 | Char::Split(12), 100 | Char::Skip(38), 101 | Char::Split(39), 102 | ] 103 | .into_iter() 104 | .collect::>(); 105 | 106 | assert_eq!(expected, ends); 107 | } 108 | 109 | #[test] 110 | fn normalising_linebreaks() { 111 | // All whitespace, including tabs, is merged into single spaces. 112 | let text = " \n text with lots\n \nof white \n space  "; 113 | let expected = " text with lots  \nof white \n space  "; 114 | 115 | let merged = normalise_linebreaks(text, &WhitespaceDetector::default()); 116 | 117 | assert_eq!(expected, merged); 118 | } 119 | 120 | #[test] 121 | fn inserting_linebreaks_between_sentences() { 122 | let text = "words that. are. followed by. periods. period."; 123 | let detector = BreakDetector::new("are. by.", "", false, ".", CFG_FOR_TESTS); 124 | 125 | let broken = insert_linebreaks_after_sentence_ends(text, &detector); 126 | 127 | // We never detect a sentence at and the end of the text. 128 | let expected = "words that.\nare. followed by. periods.\nperiod."; 129 | 130 | assert_eq!(expected, broken); 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: "ci" 2 | 3 | on: 4 | push: 5 | pull_request: 6 | branches: main 7 | 8 | # Ensure there is only ever one workflow of this kind running at a time. 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} 11 | cancel-in-progress: true 12 | 13 | permissions: 14 | contents: write 15 | 16 | jobs: 17 | ci: 18 | runs-on: ubuntu-latest 19 | # Avoid very long running jobs. 20 | timeout-minutes: 30 21 | 22 | steps: 23 | - name: Checkout repository 24 | uses: actions/checkout@v4 25 | with: 26 | fetch-depth: 0 27 | 28 | - name: Remove possible compilation remnants 29 | run: rm -rf ./target ./dist 30 | 31 | - name: Install dependencies 32 | run: | 33 | sudo apt-get update 34 | sudo apt-get install -yqq bash curl make git jq 35 | 36 | - name: Install Rust 37 | run: | 38 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > install_rust.sh 39 | sh install_rust.sh -y 40 | echo "$HOME/.cargo/bin" >> "${GITHUB_PATH}" 41 | 42 | - name: Build binary 43 | run: make build-dev 44 | # Treat all warnings as errors. 45 | env: 46 | RUSTFLAGS: "-Dwarnings" 47 | 48 | - name: Lint 49 | run: make lint 50 | 51 | - name: Run tests 52 | run: make test 53 | 54 | - name: Check coverage 55 | run: make coverage 56 | 57 | # Skip coverage upload for now because something goes wrong. TODO: Debug. 58 | # - uses: actions/upload-artifact@v4 59 | # if: always() 60 | # with: 61 | # name: coverage 62 | # path: | 63 | # .coverage.html 64 | # .coverage.json 65 | # if-no-files-found: error 66 | # retention-days: 7 67 | 68 | macos-release: 69 | runs-on: macos-latest 70 | needs: [ci] 71 | # Avoid very long running jobs. 72 | timeout-minutes: 20 73 | 74 | steps: 75 | - name: Checkout repository 76 | uses: actions/checkout@v4 77 | with: 78 | fetch-depth: 0 79 | 80 | - name: Remove possible compilation remnants 81 | run: rm -rf ./target ./dist 82 | 83 | - name: Add targets 84 | run: | 85 | rustup update 86 | rustup target add x86_64-apple-darwin 87 | rustup target add aarch64-apple-darwin 88 | 89 | - name: Build binaries 90 | run: | 91 | mkdir ./dist 92 | cargo build --release --target=aarch64-apple-darwin 93 | cp target/aarch64-apple-darwin/release/mdslw ./dist/mdslw_aarch64-apple-darwin 94 | cargo build --release --target=x86_64-apple-darwin 95 | cp target/x86_64-apple-darwin/release/mdslw ./dist/mdslw_x86_64-apple-darwin 96 | 97 | - uses: actions/upload-artifact@v4 98 | if: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }} 99 | with: 100 | name: macos-release 101 | path: ./dist/ 102 | if-no-files-found: error 103 | retention-days: 1 104 | 105 | release: 106 | runs-on: ubuntu-latest 107 | needs: [ci, macos-release] 108 | # Avoid very long running jobs. 109 | timeout-minutes: 30 110 | 111 | steps: 112 | - name: Checkout repository 113 | uses: actions/checkout@v4 114 | with: 115 | fetch-depth: 0 116 | 117 | - name: Remove possible compilation remnants 118 | run: rm -rf ./target ./dist 119 | 120 | - name: Install dependencies 121 | run: | 122 | sudo apt-get update 123 | sudo apt-get install -yqq bash curl make git jq 124 | 125 | - name: Install Rust 126 | run: | 127 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > install_rust.sh 128 | sh install_rust.sh -y 129 | echo "$HOME/.cargo/bin" >> "${GITHUB_PATH}" 130 | 131 | - name: Install cross-compilation dependencies 132 | if: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }} 133 | run: | 134 | # For ARM Linux. 135 | sudo apt-get install -yqq gcc-arm-linux-gnueabihf 136 | # For Windows. 137 | sudo apt-get install -yqq mingw-w64 138 | 139 | - name: Install toolchains 140 | if: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }} 141 | run: make install-toolchains 142 | 143 | - name: Build all release binaries apart from MacOS 144 | if: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }} 145 | run: | 146 | echo '[target.armv7-unknown-linux-gnueabihf]' >> ~/.cargo/config 147 | echo 'linker = "arm-linux-gnueabihf-gcc"' >> ~/.cargo/config 148 | 149 | make build-prod-all 150 | 151 | - name: Copy release binaries 152 | if: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }} 153 | run: make copy-relese-binaries 154 | 155 | - name: Retrieve MacOS binaries 156 | uses: actions/download-artifact@v4 157 | if: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }} 158 | with: 159 | name: macos-release 160 | path: ./dist/ 161 | 162 | - name: List release binaries 163 | if: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }} 164 | run: ls -l ./dist/* 165 | 166 | - name: Make release 167 | if: ${{ startsWith(github.ref, 'refs/tags/') }} 168 | uses: softprops/action-gh-release@v1 169 | with: 170 | files: | 171 | dist/mdslw_x86_64-unknown-linux-musl 172 | dist/mdslw_armv7-unknown-linux-gnueabihf 173 | dist/mdslw_x86_64-apple-darwin 174 | dist/mdslw_aarch64-apple-darwin 175 | dist/mdslw_x86_64-pc-windows-gnu.exe 176 | -------------------------------------------------------------------------------- /src/logging.rs: -------------------------------------------------------------------------------- 1 | /* An opinionated line wrapper for markdown files. 2 | Copyright (C) 2023 Torsten Long 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | 18 | use std::time; 19 | 20 | use log::{Level, Log, Metadata, Record}; 21 | 22 | /// Execute a trace log while lazily evaluating the expressions whose values shall be logged. This 23 | /// macro takes a string literal, followed by expressions that will be evaluated lazily. 24 | #[macro_export] 25 | macro_rules! trace_log { 26 | ($fmt_str:literal, $($exprs:expr),*) => { 27 | if log::log_enabled!(log::Level::Trace) { 28 | log::trace!($fmt_str, $($exprs),*); 29 | } 30 | }; 31 | } 32 | 33 | pub fn init_logging(level: u8) -> Result<(), log::SetLoggerError> { 34 | log::set_boxed_logger(Box::new(Logger::new(level))) 35 | .map(|()| log::set_max_level(log::LevelFilter::Trace)) 36 | } 37 | 38 | const SELF_MODULE_NAME: &str = env!("CARGO_PKG_NAME"); 39 | 40 | struct Logger { 41 | starttime: time::Instant, 42 | level: Level, 43 | module_name: String, 44 | module_prefix: String, 45 | } 46 | 47 | impl Logger { 48 | pub fn new(log_level: u8) -> Self { 49 | let level = match log_level { 50 | 0 => Level::Warn, 51 | 1 => Level::Info, 52 | 2 => Level::Debug, 53 | _ => Level::Trace, 54 | }; 55 | Self { 56 | level, 57 | starttime: time::Instant::now(), 58 | module_name: SELF_MODULE_NAME.to_string(), 59 | module_prefix: format!("{}::", SELF_MODULE_NAME), 60 | } 61 | } 62 | } 63 | 64 | impl Log for Logger { 65 | fn enabled(&self, metadata: &Metadata) -> bool { 66 | metadata.level() <= self.level 67 | } 68 | 69 | fn log(&self, record: &Record) { 70 | if let Some(msg) = self.format_message(record) { 71 | eprintln!("{}", msg); 72 | } 73 | } 74 | 75 | fn flush(&self) {} 76 | } 77 | 78 | impl Logger { 79 | fn format_log_location(&self, record: &Record) -> String { 80 | let module = record.module_path_static().unwrap_or(""); 81 | 82 | if module == self.module_name || module.starts_with(&self.module_prefix) { 83 | let file = record.file_static().unwrap_or(""); 84 | let line = record.line().unwrap_or(0); 85 | format!("{}:{}:{}", module, file, line) 86 | } else { 87 | module.to_owned() 88 | } 89 | } 90 | 91 | fn format_message(&self, record: &Record) -> Option { 92 | if self.enabled(record.metadata()) { 93 | let elapsed = self.starttime.elapsed(); 94 | let elapsed_secs = elapsed.as_secs(); 95 | let elapsed_millis = elapsed.subsec_millis(); 96 | let thread_idx = rayon::current_thread_index() 97 | .map(|el| format!("@{}", el)) 98 | .unwrap_or_default(); 99 | 100 | Some(format!( 101 | "{}{}: {}s{}ms {}: {}", 102 | record.level(), 103 | thread_idx, 104 | elapsed_secs, 105 | elapsed_millis, 106 | self.format_log_location(record), 107 | record.args() 108 | )) 109 | } else { 110 | None 111 | } 112 | } 113 | } 114 | 115 | #[cfg(test)] 116 | mod test { 117 | use super::*; 118 | use anyhow::{Error, Result}; 119 | 120 | #[test] 121 | fn new_logger() { 122 | let logger0 = Logger::new(0); 123 | assert_eq!(logger0.level, Level::Warn); 124 | 125 | let logger1 = Logger::new(1); 126 | assert_eq!(logger1.level, Level::Info); 127 | 128 | let logger2 = Logger::new(2); 129 | assert_eq!(logger2.level, Level::Debug); 130 | 131 | let logger3 = Logger::new(3); 132 | assert_eq!(logger3.level, Level::Trace); 133 | } 134 | 135 | #[test] 136 | fn logger_enabled() { 137 | let logger = Logger::new(0); 138 | assert_eq!(logger.level, Level::Warn); 139 | 140 | let metadata_err = Metadata::builder().level(Level::Error).build(); 141 | let metadata_debug = Metadata::builder().level(Level::Debug).build(); 142 | 143 | assert!(logger.enabled(&metadata_err)); 144 | assert!(!logger.enabled(&metadata_debug)); 145 | } 146 | 147 | #[test] 148 | fn logging_a_message_from_own_module() -> Result<()> { 149 | let args = format_args!("some thing"); 150 | let metadata = Metadata::builder().level(Level::Error).build(); 151 | let record = Record::builder() 152 | .metadata(metadata) 153 | .module_path_static(Some("mdslw::test")) 154 | .file_static(Some("test_file")) 155 | .args(args) 156 | .build(); 157 | 158 | let logger = Logger::new(0); 159 | let msg = logger 160 | .format_message(&record) 161 | .ok_or(Error::msg("cannot build message"))?; 162 | 163 | // Check beginning and end because the test might take longer than 1ms, which would fail 164 | // it. 165 | assert!(msg.starts_with("ERROR: 0s"), "incorrect start: {}", msg); 166 | assert!( 167 | msg.ends_with("ms mdslw::test:test_file:0: some thing"), 168 | "incorrect end: {}", 169 | msg 170 | ); 171 | 172 | Ok(()) 173 | } 174 | 175 | #[test] 176 | fn logging_a_message_from_another_module() -> Result<()> { 177 | let args = format_args!("some thing"); 178 | let metadata = Metadata::builder().level(Level::Error).build(); 179 | let record = Record::builder() 180 | .metadata(metadata) 181 | .module_path_static(Some("some::other::module")) 182 | .file_static(Some("test_file")) 183 | .args(args) 184 | .build(); 185 | 186 | let logger = Logger::new(0); 187 | let msg = logger 188 | .format_message(&record) 189 | .ok_or(Error::msg("cannot build message"))?; 190 | 191 | // Check beginning and end because the test might take longer than 1ms, which would fail 192 | // it. 193 | assert!(msg.starts_with("ERROR: 0s"), "incorrect start: {}", msg); 194 | assert!( 195 | msg.ends_with("ms some::other::module: some thing"), 196 | "incorrect end: {}", 197 | msg 198 | ); 199 | 200 | Ok(()) 201 | } 202 | } 203 | -------------------------------------------------------------------------------- /src/ranges.rs: -------------------------------------------------------------------------------- 1 | /* An opinionated line wrapper for markdown files. 2 | Copyright (C) 2023 Torsten Long 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | 18 | use crate::parse::CharRange; 19 | use crate::trace_log; 20 | 21 | #[derive(Debug, PartialEq)] 22 | pub enum WrapType { 23 | Indent(usize), 24 | Verbatim, 25 | } 26 | 27 | #[derive(Debug, PartialEq)] 28 | /// TextRange describes a range of characters in a document including whether they shall be 29 | /// repeated verbatim or not. It also contains the number of spaces of indent to use when wrapping 30 | /// the contained text. 31 | pub struct TextRange { 32 | pub wrap: WrapType, 33 | pub range: CharRange, 34 | } 35 | 36 | /// The first arguments contains those ranges in the document that shall be wrapped. Every 37 | /// character in the document that is not inside such a range will be taken verbatim. This also 38 | /// determines the starting indent in spaces for every range that shall be wrapped. 39 | pub fn fill_markdown_ranges(wrap_ranges: Vec, text: &str) -> Vec { 40 | let mut last_end = 0; 41 | 42 | let lines = line_ranges(text); 43 | 44 | wrap_ranges 45 | .into_iter() 46 | // Append an element that points at the end of the document to ensure that we always add 47 | // the last ranges in the document because we always add a verbatim range before the 48 | // non-verbatim range. 49 | .chain([CharRange { 50 | start: text.len(), 51 | end: text.len(), 52 | }]) 53 | .flat_map(|el| { 54 | let verbatim = TextRange { 55 | wrap: WrapType::Verbatim, 56 | range: CharRange { 57 | start: last_end, 58 | end: el.start, 59 | }, 60 | }; 61 | last_end = el.end; 62 | 63 | let wrap_line_start = find_line_start(el.start, &lines).unwrap_or(el.start); 64 | let wrap = TextRange { 65 | wrap: WrapType::Indent(el.start - wrap_line_start), 66 | range: el, 67 | }; 68 | [verbatim, wrap] 69 | }) 70 | .filter(|el| !el.range.is_empty()) 71 | .map(|el| { 72 | if let WrapType::Indent(indent) = el.wrap { 73 | trace_log!( 74 | "formattable text with {} spaces indent: {}", 75 | indent, 76 | text[el.range.clone()].replace('\n', "\\n") 77 | ); 78 | } else { 79 | trace_log!( 80 | "verbatim text: {}", 81 | text[el.range.clone()].replace('\n', "\\n") 82 | ); 83 | } 84 | el 85 | }) 86 | .collect::>() 87 | } 88 | 89 | /// Determine character ranges for each line in the document. 90 | fn line_ranges(text: &str) -> Vec { 91 | let mut start = 0; 92 | 93 | text.split_inclusive('\n') 94 | .map(|el| { 95 | let end = start + el.len(); 96 | let range = CharRange { start, end }; 97 | start = end; 98 | range 99 | }) 100 | .collect::>() 101 | } 102 | 103 | /// Find the start of the line that "point" is in. 104 | fn find_line_start(point: usize, line_ranges: &[CharRange]) -> Option { 105 | line_ranges 106 | .iter() 107 | .find(|el| el.contains(&point)) 108 | .map(|el| el.start) 109 | } 110 | 111 | #[cfg(test)] 112 | mod test { 113 | use super::*; 114 | 115 | #[test] 116 | fn finding_line_start() { 117 | let ranges = vec![ 118 | CharRange { start: 0, end: 10 }, 119 | CharRange { start: 10, end: 12 }, 120 | CharRange { start: 22, end: 31 }, 121 | CharRange { start: 31, end: 33 }, 122 | ]; 123 | 124 | for (point, expected) in [ 125 | (5, Some(0)), 126 | (10, Some(10)), 127 | (15, None), 128 | (22, Some(22)), 129 | (28, Some(22)), 130 | (30, Some(22)), 131 | (31, Some(31)), 132 | (35, None), 133 | ] { 134 | let start = find_line_start(point, &ranges); 135 | assert_eq!(expected, start); 136 | } 137 | } 138 | 139 | #[test] 140 | fn getting_line_ranges() { 141 | let text = r#" 142 | text 143 | more text 144 | 145 | even more text 146 | "#; 147 | let ranges = line_ranges(text); 148 | let expected = vec![ 149 | CharRange { start: 0, end: 1 }, 150 | CharRange { start: 1, end: 6 }, 151 | CharRange { start: 6, end: 16 }, 152 | CharRange { start: 16, end: 17 }, 153 | CharRange { start: 17, end: 32 }, 154 | ]; 155 | assert_eq!(expected, ranges); 156 | } 157 | 158 | #[test] 159 | fn filling_ranges() { 160 | let text = r#" 161 | text 162 | more text 163 | 164 | even more text 165 | "#; 166 | let wrap_ranges = vec![ 167 | CharRange { start: 1, end: 6 }, 168 | CharRange { start: 22, end: 26 }, 169 | CharRange { start: 31, end: 32 }, 170 | ]; 171 | let filled = fill_markdown_ranges(wrap_ranges, text); 172 | 173 | let expected = vec![ 174 | TextRange { 175 | wrap: WrapType::Verbatim, 176 | range: CharRange { start: 0, end: 1 }, 177 | }, 178 | TextRange { 179 | wrap: WrapType::Indent(0), 180 | range: CharRange { start: 1, end: 6 }, 181 | }, 182 | TextRange { 183 | wrap: WrapType::Verbatim, 184 | range: CharRange { start: 6, end: 22 }, 185 | }, 186 | TextRange { 187 | wrap: WrapType::Indent(5), 188 | range: CharRange { start: 22, end: 26 }, 189 | }, 190 | TextRange { 191 | wrap: WrapType::Verbatim, 192 | range: CharRange { start: 26, end: 31 }, 193 | }, 194 | TextRange { 195 | wrap: WrapType::Indent(14), 196 | range: CharRange { start: 31, end: 32 }, 197 | }, 198 | ]; 199 | 200 | assert_eq!(expected.len(), filled.len()); 201 | for (v1, v2) in expected.into_iter().zip(filled) { 202 | assert_eq!(v1, v2); 203 | } 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SHELL := /bin/bash -euo pipefail 2 | 3 | SRC := $(shell find src -name "*.rs") 4 | TARGET_DEV := target/debug/mdslw 5 | TARGET_PROD := target/x86_64-unknown-linux-musl/release/mdslw 6 | 7 | default: build-dev 8 | 9 | build-dev: $(TARGET_DEV) 10 | 11 | $(TARGET_DEV): Cargo.lock Cargo.toml $(SRC) 12 | cargo build 13 | 14 | .PHONY: install-toolchains 15 | install-toolchains: 16 | rustup target add x86_64-unknown-linux-musl 17 | rustup target add armv7-unknown-linux-gnueabihf 18 | rustup target add x86_64-pc-windows-gnu 19 | 20 | build-prod: $(TARGET_PROD) 21 | 22 | # Build prod for the dev system. 23 | $(TARGET_PROD): Cargo.lock Cargo.toml $(SRC) 24 | RUSTFLAGS='-Dwarnings -C link-arg=-s -C relocation-model=static' \ 25 | cargo build -j "$$(nproc --all)" --release --target=x86_64-unknown-linux-musl 26 | 27 | .PHONY: build-prod-all 28 | build-prod-all: 29 | echo ==== x86_64-unknown-linux-musl ==== 30 | $(MAKE) --always-make build-prod 31 | echo ==== armv7-unknown-linux-gnueabihf ==== 32 | RUSTFLAGS='-Dwarnings -C link-arg=-s' \ 33 | cargo build -j "$$(nproc --all)" --release --target=armv7-unknown-linux-gnueabihf 34 | echo ==== x86_64-pc-windows-gnu ==== 35 | RUSTFLAGS='-Dwarnings -C link-arg=-s' \ 36 | cargo build -j "$$(nproc --all)" --release --target x86_64-pc-windows-gnu 37 | 38 | .PHONY: copy-relese-binaries 39 | copy-relese-binaries: 40 | rm -rf ./dist 41 | mkdir -p ./dist 42 | cp target/x86_64-unknown-linux-musl/release/mdslw ./dist/mdslw_x86_64-unknown-linux-musl 43 | cp target/armv7-unknown-linux-gnueabihf/release/mdslw ./dist/mdslw_armv7-unknown-linux-gnueabihf 44 | cp target/x86_64-pc-windows-gnu/release/mdslw.exe ./dist/mdslw_x86_64-pc-windows-gnu.exe 45 | 46 | .PHONY: test 47 | test: 48 | RUSTFLAGS="-Dwarnings" cargo test 49 | $(MAKE) test-features test-langs test-default-config assert-version-tag test-envs-match-flags 50 | 51 | FEATURES := $(shell grep "/// {n} \* [a-z-]* => " src/cfg.rs | awk '{print $$4}' | tr '\n' ',' | sed 's/,$$//') 52 | 53 | .PHONY: test-features 54 | test-features: 55 | [[ -n "$(FEATURES)" ]] 56 | RUSTFLAGS="-Dwarnings" cargo run -- --features="$(FEATURES)" <<< "markdown" 57 | 58 | .PHONY: assert-version-tag 59 | assert-version-tag: 60 | # Extract tag and compare it to the version known by mdslw. When not run on a 61 | # tag, this target checks that the version known by the tool is not identical 62 | # to any existing tag. When run on a tag, it checks that the version known is 63 | # identical to the current tag. 64 | echo >&2 "Tags: $$(git tag --list | tr '\n' ' ')" 65 | version=$$(RUSTFLAGS="-Dwarnings" cargo run -- --version | awk '{print $$2'}) && \ 66 | echo >&2 "Version: $${version}" && \ 67 | tag=$$(git describe --exact-match --tags | sed 's/^v//' || :) && \ 68 | if [[ -n "$${tag}" ]]; then \ 69 | if [[ "$${tag}" != "$${version}" ]]; then \ 70 | echo >&2 "Version tag $${tag} does not match tool version $${version}."; \ 71 | exit 1; \ 72 | fi; \ 73 | else \ 74 | tags=$$(git tag --list) && match= && \ 75 | for t in $${tags}; do \ 76 | if [[ "$${version}" == "$$t" ]]; then match="$$t"; fi; \ 77 | done && \ 78 | if [[ -n "$${match-}" ]]; then \ 79 | echo >&2 "Found an existing matching git version tag: $$match"; \ 80 | exit 1; \ 81 | fi; \ 82 | fi 83 | 84 | .PHONY: test-envs-match-flags 85 | test-envs-match-flags: 86 | flags=($$(cargo run -- --help | grep -E "^ +-" | grep -E -o -- "--[0-9a-zA-Z-]+" | grep -vE -- '--(help|verbose|version)' | sort -fu)) && \ 87 | envs=($$(cargo run -- --help | grep -o '\[env: [^=]*=' | sed 's/^\[env: //;s/=$$//' | sort -fu)) && \ 88 | echo FLAGS: "$${flags[@]}" && echo ENVS: "$${envs[@]}" && \ 89 | [[ "$${#flags[@]}" == "$${#envs[@]}" ]] && \ 90 | for idx in "$${!flags[@]}"; do \ 91 | flag="$${flags[$${idx}]}" && env="$${envs[$${idx}]}" && \ 92 | if [[ -n "$$(tr -d '[:upper:]_' <<< $$env)" || -n "$$(tr -d '[:lower:]-' <<< $$flag)" ]]; then \ 93 | echo >&2 "Malformed env or flag: $${env} || $${flag}"; exit 1; \ 94 | fi; \ 95 | if [[ "mdslw_$$(sed 's/^__//' <<< $${flag//-/_})" != "$${env,,}" ]]; then \ 96 | echo >&2 "Env/flag mismatch: $${env} != $${flag}"; exit 1; \ 97 | fi; \ 98 | done 99 | 100 | .PHONY: lint 101 | lint: 102 | rustup component add clippy 103 | RUSTFLAGS="-Dwarnings" cargo check --all-features --all-targets 104 | RUSTFLAGS="-Dwarnings" cargo clippy --all-features --all-targets --no-deps 105 | 106 | # Extract languages requested by the code to keep them in sync. 107 | LANGS := $(shell grep -o '/// Supported languages are:\( *[a-z][a-z]\)* *' ./src/cfg.rs | awk -F: '{print $$2}' | tr -s '[:space:]') 108 | 109 | .PHONY: test-langs 110 | test-langs: 111 | [[ -n "$(LANGS)" ]] 112 | RUSTFLAGS="-Dwarnings" cargo run -- --lang="$(LANGS) ac" <<< "markdown" 113 | 114 | .PHONY: test-default-config 115 | test-default-config: 116 | from_readme=$$( \ 117 | state=0; while read -r line; do \ 118 | if [[ "$${line}" == "" ]]; then state=0; fi; \ 119 | if [[ "$${state}" -eq 1 ]]; then echo "$${line}"; fi; \ 120 | if [[ "$${line}" == "" ]]; then state=1; fi; \ 121 | done < README.md | sed '/^$$/d' | grep -v '^```'\ 122 | ) && \ 123 | from_tool=$$(RUSTFLAGS="-Dwarnings" cargo run -- --default-config) && \ 124 | [[ "$${from_tool}" == "$${from_readme}" ]] 125 | 126 | COVERAGE := .coverage.html 127 | PROFRAW := .coverage.profraw 128 | PROFDATA := .coverage.profdata 129 | COVERAGE_JSON := .coverage.json 130 | RUSTC_ROOT := $(shell rustc --print sysroot) 131 | LLVM_PROFILE_FILE := $(PROFRAW) 132 | export LLVM_PROFILE_FILE 133 | MIN_COV_PERCENT := 80 134 | 135 | .PHONY: coverage 136 | coverage: 137 | rm -f "$(COVERAGE)" "$(PROFRAW)" "$(PROFDATA)" 138 | # Install dependencies 139 | rustup component add llvm-tools 140 | cargo install rustfilt 141 | # Build stand-alone test executable. 142 | RUSTFLAGS="-C instrument-coverage=all" \ 143 | cargo build --tests 144 | # Find and run executable to generate coverage report. 145 | exe=$$( \ 146 | find target/debug/deps/ -executable -name "mdslw-*" \ 147 | | xargs ls -t | head -n1 \ 148 | ) && \ 149 | prof_exe=$$(find $(RUSTC_ROOT) -executable -name "llvm-profdata" | head -n1) && \ 150 | cov_exe=$$(find $(RUSTC_ROOT) -executable -name "llvm-cov" | head -n1) && \ 151 | "$${exe}" && \ 152 | "$${prof_exe}" merge \ 153 | -sparse "$(PROFRAW)" -o "$(PROFDATA)" && \ 154 | "$${cov_exe}" show \ 155 | -Xdemangler=rustfilt "$${exe}" \ 156 | --format=html \ 157 | --instr-profile="$(PROFDATA)" \ 158 | --show-line-counts-or-regions \ 159 | --show-instantiations \ 160 | --show-branches=count \ 161 | --sources "$$(readlink -e src)" \ 162 | > "$(COVERAGE)" && \ 163 | if [[ -t 1 ]]; then xdg-open "$(COVERAGE)"; fi && \ 164 | "$${cov_exe}" export \ 165 | -Xdemangler=rustfilt "$${exe}" \ 166 | --format=text \ 167 | --instr-profile="$(PROFDATA)" \ 168 | --sources "$$(readlink -e src)" \ 169 | > "$(COVERAGE_JSON)" 170 | echo "Per-file coverage:" && \ 171 | jq -r ".data[].files[] | [.summary.lines.percent, .filename] | @csv" \ 172 | < "$(COVERAGE_JSON)" \ 173 | | sort -t, -k 2 \ 174 | | sed "s;$${PWD};.;" \ 175 | | awk -F, '{printf("%.2f%% => %s\n", $$1, $$2)}' 176 | jq -r ".data[].totals.lines.percent" \ 177 | < "$(COVERAGE_JSON)" \ 178 | | awk '{if ($$1<$(MIN_COV_PERCENT)) \ 179 | {printf("coverage low: %.2f%%<$(MIN_COV_PERCENT)%%\n", $$1); exit(1)} \ 180 | else{printf("coverage OK: %.2f%%\n", $$1)} \ 181 | }' >&2 182 | -------------------------------------------------------------------------------- /src/wrap.rs: -------------------------------------------------------------------------------- 1 | /* An opinionated line wrapper for markdown files. 2 | Copyright (C) 2023 Torsten Long 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | 18 | use crate::detect::{BreakDetector, WhitespaceDetector}; 19 | use crate::indent::build_indent; 20 | use crate::linebreak::insert_linebreaks_after_sentence_ends; 21 | use crate::ranges::{TextRange, WrapType}; 22 | use crate::trace_log; 23 | 24 | pub fn add_linebreaks_and_wrap( 25 | ranges: Vec, 26 | max_width: &Option, 27 | detector: &BreakDetector, 28 | text: &str, 29 | ) -> String { 30 | let mut result = String::new(); 31 | 32 | for range in ranges { 33 | if let WrapType::Indent(indent_spaces) = range.wrap { 34 | trace_log!( 35 | "wrapping text: {}", 36 | text[range.range.clone()].replace('\n', "\\n") 37 | ); 38 | let indent = build_indent(indent_spaces); 39 | trace_log!("keeping indent in mind: '{}'", indent); 40 | let broken = insert_linebreaks_after_sentence_ends(&text[range.range], detector); 41 | trace_log!( 42 | "with linebreaks after sentences: {}", 43 | broken.replace('\n', "\\n") 44 | ); 45 | let wrapped = broken 46 | .split('\n') 47 | .enumerate() 48 | .flat_map(|(idx, el)| { 49 | wrap_long_line_and_collapse_inline_whitespace( 50 | el, 51 | idx, 52 | max_width, 53 | &indent, 54 | &detector.whitespace, 55 | ) 56 | }) 57 | .collect::>() 58 | .join("\n"); 59 | trace_log!( 60 | "after wrapping long sentences: {}", 61 | wrapped.replace('\n', "\\n") 62 | ); 63 | result.push_str(&wrapped); 64 | } else { 65 | trace_log!( 66 | "keeping text: {}", 67 | text[range.range.clone()].to_string().replace('\n', "\\n") 68 | ); 69 | result.push_str(&text[range.range]); 70 | } 71 | } 72 | 73 | result.trim_end().to_string() 74 | } 75 | 76 | /// The main purpose of this function is to wrap a long line, making sure to add the linebreak 77 | /// between words. It does so by splitting by whitespace and then joining again by spaces. One side 78 | /// effect that we accept here is that all consecutive inline whitespace will be replaced by a 79 | /// single space due to the splitting-and-joining process. 80 | fn wrap_long_line_and_collapse_inline_whitespace( 81 | sentence: &str, 82 | sentence_idx: usize, 83 | max_width: &Option, 84 | indent: &str, 85 | detector: &WhitespaceDetector, 86 | ) -> Vec { 87 | let mut lines = vec![]; 88 | let mut words = detector 89 | .split_whitespace(sentence) 90 | .filter(|el| !el.is_empty()); 91 | let (mut line, first_indent_len) = if let Some(first_word) = words.next() { 92 | // The first sentence is already properly indented. Every other sentence has to be 93 | // indented manually. 94 | if sentence_idx == 0 { 95 | (String::from(first_word), indent.chars().count()) 96 | } else { 97 | (format!("{}{}", indent, first_word), 0) 98 | } 99 | } else { 100 | (String::new(), 0) 101 | }; 102 | let mut line_len = line.chars().count() + first_indent_len; 103 | let width = max_width.unwrap_or(0); 104 | for word in words { 105 | let chars = word.chars().count(); 106 | if width == 0 || line_len + 1 + chars <= width { 107 | line.push(' '); 108 | line.push_str(word); 109 | line_len += chars + 1; 110 | } else { 111 | lines.push(line); 112 | line = String::from(indent); 113 | line.push_str(word); 114 | line_len = line.chars().count(); 115 | } 116 | } 117 | lines.push(line); 118 | lines 119 | } 120 | 121 | #[cfg(test)] 122 | mod test { 123 | use super::*; 124 | use crate::detect::BreakCfg; 125 | use crate::parse::CharRange; 126 | 127 | const CFG_FOR_TESTS: &BreakCfg = &BreakCfg { 128 | keep_linebreaks: false, 129 | }; 130 | 131 | #[test] 132 | fn wrapping_long_sentence() { 133 | let sentence = "this sentence is not that long but will be wrapped"; 134 | let sentence_idx = 0; 135 | let max_width = 11; 136 | let indent = " "; 137 | let wrapped = wrap_long_line_and_collapse_inline_whitespace( 138 | sentence, 139 | sentence_idx, 140 | &Some(max_width), 141 | indent, 142 | &WhitespaceDetector::default(), 143 | ); 144 | 145 | // No indent for the start of the sentence due to the sentence_idx. 146 | let expected = vec![ 147 | "this", 148 | " sentence", 149 | " is not", 150 | " that long", 151 | " but will", 152 | " be", 153 | " wrapped", 154 | ]; 155 | 156 | assert_eq!(expected, wrapped); 157 | } 158 | 159 | #[test] 160 | fn wrapping_long_sentence_that_is_not_the_first() { 161 | let sentence = "some sentence with words"; 162 | let sentence_idx = 1; 163 | let max_width = 5; 164 | // Indent will be copied, does not have to be whitespace. 165 | let indent = "|"; 166 | let wrapped = wrap_long_line_and_collapse_inline_whitespace( 167 | sentence, 168 | sentence_idx, 169 | &Some(max_width), 170 | indent, 171 | &WhitespaceDetector::default(), 172 | ); 173 | 174 | // Note the indent for the start of the sentence due to the sentence_idx. 175 | let expected = vec!["|some", "|sentence", "|with", "|words"]; 176 | 177 | assert_eq!(expected, wrapped); 178 | } 179 | 180 | #[test] 181 | fn not_wrapping_long_sentence_unless_requested() { 182 | let sentence = "this sentence is somewhat long but will not be wrapped"; 183 | let sentence_idx = 0; 184 | let indent = " "; 185 | let wrapped = wrap_long_line_and_collapse_inline_whitespace( 186 | sentence, 187 | sentence_idx, 188 | &None, 189 | indent, 190 | &WhitespaceDetector::default(), 191 | ); 192 | 193 | let expected = vec![sentence]; 194 | 195 | assert_eq!(expected, wrapped); 196 | } 197 | 198 | #[test] 199 | fn adding_linebreaks_after_sentences() { 200 | let ranges = vec![ 201 | TextRange { 202 | wrap: WrapType::Indent(0), 203 | range: CharRange { start: 0, end: 33 }, 204 | }, 205 | // The pipe should remain verbatim. 206 | TextRange { 207 | wrap: WrapType::Verbatim, 208 | range: CharRange { start: 33, end: 36 }, 209 | }, 210 | TextRange { 211 | wrap: WrapType::Indent(3), 212 | range: CharRange { start: 36, end: 74 }, 213 | }, 214 | ]; 215 | let text = String::from( 216 | "Some text. It contains sentences. | It's separated in two. Parts, that is.", 217 | ); 218 | let detector = BreakDetector::new("", "", false, ".", CFG_FOR_TESTS); 219 | 220 | let wrapped = add_linebreaks_and_wrap(ranges, &None, &detector, &text); 221 | 222 | // Whitespace at the start of a range is also merged into one space. Not sure if that makes 223 | // sense but it does not appear to be relevant in practice, probably due to the way we 224 | // parse the markdown files. That is, none of the ranges we get appear to start with 225 | // whitespace at all. 226 | let expected = String::from( 227 | "Some text.\nIt contains sentences. | It's separated in two.\n Parts, that is.", 228 | ); 229 | assert_eq!(expected, wrapped); 230 | } 231 | 232 | #[test] 233 | fn adding_linebreaks_after_sentences_with_keep_words() { 234 | let ranges = vec![TextRange { 235 | wrap: WrapType::Indent(0), 236 | range: CharRange { start: 0, end: 33 }, 237 | }]; 238 | let text = String::from("Some text. It contains sentences."); 239 | let detector = BreakDetector::new("TEXT.", "", false, ".", CFG_FOR_TESTS); 240 | 241 | let wrapped = add_linebreaks_and_wrap(ranges, &None, &detector, &text); 242 | 243 | let expected = String::from("Some text. It contains sentences."); 244 | assert_eq!(expected, wrapped); 245 | } 246 | } 247 | -------------------------------------------------------------------------------- /src/detect.rs: -------------------------------------------------------------------------------- 1 | /* An opinionated line wrapper for markdown files. 2 | Copyright (C) 2023 Torsten Long 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | 18 | use std::collections::HashSet; 19 | 20 | pub struct BreakDetector { 21 | // Information related to whitespace. 22 | pub whitespace: WhitespaceDetector, 23 | 24 | // Information related to keep words. 25 | keep_words: HashSet<(String, usize)>, 26 | keep_words_preserve_case: bool, 27 | 28 | // Information related to end markers. 29 | end_markers: String, 30 | } 31 | 32 | #[derive(Default)] 33 | pub struct WhitespaceDetector { 34 | whitespace_to_detect: String, 35 | } 36 | 37 | impl<'a> WhitespaceDetector { 38 | const NBSP: &'static str = "\u{00a0}\u{2007}\u{202f}\u{2060}\u{feff}"; 39 | 40 | pub fn new(keep_linebreaks: bool) -> Self { 41 | let mut whitespace_to_detect = String::from(Self::NBSP); 42 | if keep_linebreaks { 43 | log::debug!("not treating linebreaks as modifiable whitespace"); 44 | whitespace_to_detect.push('\n') 45 | } else { 46 | log::debug!("treating linebreaks as modifiable whitespace"); 47 | } 48 | Self { 49 | whitespace_to_detect, 50 | } 51 | } 52 | 53 | pub fn split_whitespace(&self, s: &'a str) -> std::vec::IntoIter<&'a str> { 54 | s.split(|el| self.is_whitespace(&el)) 55 | .filter(|el| !el.is_empty()) 56 | .collect::>() 57 | .into_iter() 58 | } 59 | 60 | pub fn is_whitespace(&self, ch: &char) -> bool { 61 | // The character is whiespace if it is detected to be UTF8 whitespace and if it is not in 62 | // the list of excluded whitespace characters known by this struct. 63 | ch.is_whitespace() && !self.whitespace_to_detect.contains(*ch) 64 | } 65 | 66 | pub fn is_nbsp(&self, ch: &char) -> bool { 67 | Self::NBSP.contains(*ch) 68 | } 69 | } 70 | 71 | #[derive(Debug, PartialEq)] 72 | pub struct BreakCfg { 73 | pub keep_linebreaks: bool, 74 | } 75 | 76 | impl BreakDetector { 77 | pub fn new( 78 | keep_words: &str, 79 | keep_word_ignores: &str, 80 | keep_words_preserve_case: bool, 81 | end_markers: &str, 82 | break_cfg: &BreakCfg, 83 | ) -> Self { 84 | let (cased_words, cased_ignores) = if keep_words_preserve_case { 85 | (keep_words.to_owned(), keep_word_ignores.to_owned()) 86 | } else { 87 | (keep_words.to_lowercase(), keep_word_ignores.to_lowercase()) 88 | }; 89 | 90 | let ignores = cased_ignores.split_whitespace().collect::>(); 91 | let internal_keep_words = cased_words 92 | .split_whitespace() 93 | .filter(|el| !ignores.contains(el)) 94 | .map(|el| (el.to_string(), el.len() - 1)) 95 | .collect::>(); 96 | 97 | log::debug!("end markers: '{}'", end_markers); 98 | log::debug!("using {} unique keep words", internal_keep_words.len()); 99 | let case_info = if keep_words_preserve_case { "" } else { "in" }; 100 | log::debug!("treating keep words case-{}sensitively", case_info); 101 | 102 | Self { 103 | // Keep words. 104 | keep_words_preserve_case, 105 | keep_words: internal_keep_words, 106 | // End markers. 107 | end_markers: end_markers.to_string(), 108 | // Whitspace. 109 | whitespace: WhitespaceDetector::new(break_cfg.keep_linebreaks), 110 | } 111 | } 112 | 113 | /// Checks whether "text" ends with one of the keep words known by self at "idx". 114 | pub fn ends_with_keep_word(&self, text: &[char], idx: &usize) -> bool { 115 | if idx < &text.len() { 116 | self.keep_words 117 | .iter() 118 | // Only check words that can actually be in the text. 119 | .filter(|(_el, disp)| idx >= disp) 120 | // Determine whether any keep word matches. 121 | .any(|(el, disp)| { 122 | // Check whether the word is at the start of the text or whether, if it starts 123 | // with an alphanumeric character, it is preceded by a character that is not 124 | // alphanumeric. That way, we avoid matching a keep word of "g." on a text going 125 | // "e.g.". Note that, here, idx>=disp holds. If a "word" does not start with an 126 | // alphanumeric character, then the definition of "word" is ambibuous anyway. In 127 | // such a case, we also match partially. 128 | (idx == disp || !text[idx-disp-1..=idx-disp].iter().all(|el| el.is_alphanumeric())) && 129 | // Check whether all characters of the keep word and the slice through the text 130 | // are identical. 131 | text[idx - disp..=*idx] 132 | .iter() 133 | // Convert the text we compare with to lower case, but only those parts 134 | // that we actually do compare with. The conversion is somewhat annoying 135 | // and complicated because a single upper-case character might map to 136 | // multiple lower-case ones when converted (not sure why that would be so). 137 | .flat_map(|el| { 138 | if self.keep_words_preserve_case { 139 | vec![*el] 140 | } else { 141 | el.to_lowercase().collect::>() 142 | } 143 | }) 144 | // The strings self.data is already in lower case if desired. No conversion 145 | // needed here. 146 | .zip(el.chars()) 147 | .all(|(ch1, ch2)| ch1 == ch2) 148 | }) 149 | } else { 150 | false 151 | } 152 | } 153 | 154 | /// Checks whether ch is an end marker and whether the surrounding characters indicate that ch 155 | /// is actually at the end of a sentence. 156 | pub fn is_breaking_marker(&self, ch: &char, next: Option<&char>) -> bool { 157 | // The current character has to be an end marker. If it is not, it does not end a sentence. 158 | self.end_markers.contains(*ch) 159 | // The next character must be whitespace. If it is not, this character is in the middle 160 | // of a word and, thus, not at the end of a sentence. 161 | && is_whitespace(next, &self.whitespace) 162 | } 163 | } 164 | 165 | // Some helper functions that make it easier to work with Option<&char> follow. 166 | 167 | fn is_whitespace(ch: Option<&char>, detector: &WhitespaceDetector) -> bool { 168 | ch.map(|el| detector.is_whitespace(el)).unwrap_or(false) 169 | } 170 | 171 | #[cfg(test)] 172 | mod test { 173 | use super::*; 174 | 175 | const TEXT_FOR_TESTS: &str = "Lorem iPsum doLor SiT aMeT. ConSectEtur adIpiSciNg ELiT."; 176 | const CFG_FOR_TESTS: &BreakCfg = &BreakCfg { 177 | keep_linebreaks: false, 178 | }; 179 | 180 | #[test] 181 | fn case_insensitive_match() { 182 | let detector = BreakDetector::new("ipsum sit adipiscing", "", false, "", CFG_FOR_TESTS); 183 | let text = TEXT_FOR_TESTS.chars().collect::>(); 184 | 185 | let found = (0..text.len()) 186 | .filter(|el| detector.ends_with_keep_word(&text, el)) 187 | .collect::>(); 188 | 189 | assert_eq!(found, vec![10, 20, 49]); 190 | } 191 | 192 | #[test] 193 | fn case_sensitive_match() { 194 | let detector = BreakDetector::new("ipsum SiT adipiscing", "", true, "", CFG_FOR_TESTS); 195 | let text = TEXT_FOR_TESTS.chars().collect::>(); 196 | 197 | let found = (0..text.len()) 198 | .filter(|el| detector.ends_with_keep_word(&text, el)) 199 | .collect::>(); 200 | 201 | assert_eq!(found, vec![20]); 202 | } 203 | 204 | #[test] 205 | fn matches_at_start_and_end() { 206 | let detector = BreakDetector::new("lorem elit.", "", false, "", CFG_FOR_TESTS); 207 | let text = TEXT_FOR_TESTS.chars().collect::>(); 208 | 209 | // Try to search outside the text's range, which will never match. 210 | let found = (0..text.len() + 5) 211 | .filter(|el| detector.ends_with_keep_word(&text, el)) 212 | .collect::>(); 213 | 214 | assert_eq!(found, vec![4, 55]); 215 | } 216 | 217 | #[test] 218 | fn ignoring_words_case_sensitively() { 219 | let detector = BreakDetector::new("ipsum SiT adipiscing", "SiT", true, "", CFG_FOR_TESTS); 220 | let text = TEXT_FOR_TESTS.chars().collect::>(); 221 | 222 | let found = (0..text.len()) 223 | .filter(|el| detector.ends_with_keep_word(&text, el)) 224 | .collect::>(); 225 | 226 | assert_eq!(found, vec![]); 227 | } 228 | 229 | #[test] 230 | fn ignoring_words_case_insensitively() { 231 | let detector = BreakDetector::new("ipsum sit adipiscing", "sit", false, "", CFG_FOR_TESTS); 232 | let text = TEXT_FOR_TESTS.chars().collect::>(); 233 | 234 | let found = (0..text.len()) 235 | .filter(|el| detector.ends_with_keep_word(&text, el)) 236 | .collect::>(); 237 | 238 | assert_eq!(found, vec![10, 49]); 239 | } 240 | 241 | #[test] 242 | fn ingores_that_are_no_suppressions_are_ignored() { 243 | let detector = BreakDetector::new( 244 | "ipsum sit adipiscing", 245 | "sit asdf blub muhaha", 246 | false, 247 | "", 248 | CFG_FOR_TESTS, 249 | ); 250 | let text = TEXT_FOR_TESTS.chars().collect::>(); 251 | 252 | let found = (0..text.len()) 253 | .filter(|el| detector.ends_with_keep_word(&text, el)) 254 | .collect::>(); 255 | 256 | assert_eq!(found, vec![10, 49]); 257 | } 258 | } 259 | -------------------------------------------------------------------------------- /src/call.rs: -------------------------------------------------------------------------------- 1 | /* An opinionated line wrapper for markdown files. 2 | Copyright (C) 2023 Torsten Long 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | 18 | use std::collections::VecDeque; 19 | use std::fmt; 20 | use std::io::Write; 21 | use std::path::Path; 22 | use std::path::PathBuf; 23 | use std::process::{Command, Stdio}; 24 | use std::sync::Mutex; 25 | 26 | use anyhow::{Context, Error, Result}; 27 | 28 | use crate::trace_log; 29 | 30 | pub struct Upstream { 31 | cmd: String, 32 | args: Vec, 33 | } 34 | 35 | impl Upstream { 36 | pub fn from_cfg(command: &str, args: &str, sep: &str) -> Result { 37 | let mut split_args = if sep.is_empty() { 38 | args.split_whitespace() 39 | .map(String::from) 40 | .collect::>() 41 | } else { 42 | args.split(sep).map(String::from).collect::>() 43 | }; 44 | let cmd = if !command.is_empty() { 45 | command.to_string() 46 | } else { 47 | split_args 48 | .pop_front() 49 | .ok_or_else(|| { 50 | Error::msg(format!( 51 | "Failed to extract upstream command from arguments '{}'.", 52 | args 53 | )) 54 | })? 55 | .to_string() 56 | }; 57 | let result = Self { 58 | cmd, 59 | args: split_args.into_iter().collect::>(), 60 | }; 61 | log::debug!("using upstream formatter {}", result); 62 | Ok(result) 63 | } 64 | } 65 | 66 | impl fmt::Display for Upstream { 67 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 68 | write!(f, "'{}' '{}'", self.cmd, self.args.join("' '")) 69 | } 70 | } 71 | 72 | pub fn upstream_formatter( 73 | upstream: &Upstream, 74 | file_content: String, 75 | workdir: &Path, 76 | ) -> Result { 77 | let fallback_workdir = PathBuf::from("."); 78 | let workdir = if workdir.components().count() == 0 { 79 | &fallback_workdir 80 | } else { 81 | workdir 82 | }; 83 | log::debug!( 84 | "running upstream executable in directory: {}", 85 | workdir.to_string_lossy() 86 | ); 87 | 88 | let mut process = Command::new(&upstream.cmd) 89 | .args(&upstream.args) 90 | .stdin(Stdio::piped()) 91 | .stdout(Stdio::piped()) 92 | .stderr(Stdio::piped()) 93 | .current_dir(workdir) 94 | .spawn() 95 | .context("failed to spawn upstream auto-formatter")?; 96 | 97 | let mut stdin = process 98 | .stdin 99 | .take() 100 | .context("failed to acquire stdin of upstream auto-formatter")?; 101 | 102 | // Write to stdin in a separate thread. Is there really is no other way to do that? Calling 103 | // "expect" here is not a problem because, if the process panics, we receive an error. 104 | std::thread::spawn(move || { 105 | stdin 106 | .write_all(file_content.as_bytes()) 107 | .expect("failed to write stdin to upstream auto-formatter") 108 | }); 109 | 110 | let output = process 111 | .wait_with_output() 112 | .context("failed to wait for output of upstream auto-formatter")?; 113 | 114 | let stdout = String::from_utf8_lossy(&output.stdout); 115 | let stderr = String::from_utf8_lossy(&output.stderr); 116 | 117 | if output.status.success() { 118 | Ok(stdout.to_string()) 119 | } else { 120 | Err(Error::msg(format!( 121 | "failed to read stdout of upstream auto-formatter \"{}\". Stderr follows: \n\n{}", 122 | upstream, stderr, 123 | ))) 124 | } 125 | } 126 | 127 | pub struct Pager { 128 | stdin: Option, 129 | process: std::process::Child, 130 | } 131 | 132 | impl Pager { 133 | pub fn send(&mut self, s: &str) -> Result<()> { 134 | log::debug!("sending {} bytes to downstream pager's stdin", s.len()); 135 | trace_log!("message sent to downstream pager: {}", s); 136 | if let Some(ref mut stdin) = self.stdin { 137 | stdin 138 | .write_all(s.as_bytes()) 139 | .context("failed to send text to pager's stdin") 140 | } else { 141 | unreachable!("cannot send to closed stdin of downstream pager"); 142 | } 143 | } 144 | } 145 | 146 | impl Drop for Pager { 147 | fn drop(&mut self) { 148 | { 149 | log::debug!("closing stdin of downstream pager"); 150 | // Have pager's stdin go out of scope before waiting for the pager 151 | // process. This should not be needed according to the docs of 152 | // "wait", because supposedly that stdin is closed before waiting to 153 | // prevent deadlocks, but it seems to be needed, because there is a 154 | // deadlock without this. 155 | let _ = self.stdin.take(); 156 | } 157 | self.process 158 | .wait() 159 | .expect("failed to wait for pager to finish"); 160 | } 161 | } 162 | 163 | /// If to_null is set, the output of this pager will be directed to /dev/null. 164 | /// That is used solely for testing. 165 | fn downstream_pager(pager: &str, workdir: std::path::PathBuf, to_null: bool) -> Result { 166 | let split_pager = pager.split_whitespace().collect::>(); 167 | 168 | // Interpret an empty directory as the current directory. 169 | let pager_workdir = if workdir.components().count() == 0 { 170 | ".".into() 171 | } else { 172 | workdir 173 | }; 174 | log::debug!( 175 | "running downstream pager in directory: {}", 176 | pager_workdir.to_string_lossy() 177 | ); 178 | 179 | let cmd = split_pager 180 | .first() 181 | .ok_or(Error::msg("must specify a pager command")) 182 | .context("failed to determine downstream pager command")?; 183 | log::debug!("using pager executable {}", cmd); 184 | 185 | let args = split_pager[1..].to_owned(); 186 | log::debug!("using pager arguments {:?}", args); 187 | 188 | let mut process_cfg = Command::new(cmd); 189 | process_cfg 190 | .args(&args) 191 | .stdin(Stdio::piped()) 192 | .current_dir(pager_workdir); 193 | if to_null { 194 | process_cfg.stdout(Stdio::null()); 195 | } 196 | let mut process = process_cfg 197 | .spawn() 198 | .context("failed to spawn downstream pager")?; 199 | 200 | let stdin = process 201 | .stdin 202 | .take() 203 | .context("failed to acquire stdin of the downstream pager")?; 204 | 205 | Ok(Pager { 206 | stdin: Some(stdin), 207 | process, 208 | }) 209 | } 210 | 211 | /// A helper to ensure that text written to stdout is not mangled due to parallelisation. 212 | pub enum ParallelPrinter { 213 | // First bool indicates whether there had been a failure writing to the pager. 214 | Paged(Mutex<(bool, Pager)>), 215 | Direct(Mutex<()>), 216 | } 217 | 218 | impl ParallelPrinter { 219 | pub fn new(pager: &Option) -> Result { 220 | if let Some(pager) = pager { 221 | if !pager.is_empty() { 222 | let downstream = downstream_pager(pager, PathBuf::from("."), false)?; 223 | Ok(Self::Paged(Mutex::new((false, downstream)))) 224 | } else { 225 | Ok(Self::Direct(Mutex::new(()))) 226 | } 227 | } else { 228 | Ok(Self::Direct(Mutex::new(()))) 229 | } 230 | } 231 | 232 | pub fn println(&self, text: &str) { 233 | match self { 234 | Self::Paged(mutex) => { 235 | let mut result = mutex 236 | .lock() 237 | .expect("failed to lock mutex due to previous panic"); 238 | // We do not retry sending to the pager in case sending failed once. 239 | if !result.0 { 240 | if let Err(err) = result.1.send(text) { 241 | log::error!("{:?}", err); 242 | result.0 = true; 243 | } 244 | } 245 | } 246 | Self::Direct(mutex) => { 247 | // Assigning to keep the lock. The lock is lifted once the binding is dropped. 248 | let _lock = mutex 249 | .lock() 250 | .expect("failed to lock mutex due to previous panic"); 251 | println!("{}", text); 252 | } 253 | } 254 | } 255 | } 256 | 257 | #[cfg(test)] 258 | mod test { 259 | use super::*; 260 | 261 | #[test] 262 | fn can_call_simple_executable_with_stdio_handling() -> Result<()> { 263 | let input = String::from("some text"); 264 | let piped = upstream_formatter( 265 | &Upstream::from_cfg("", "cat", " ")?, 266 | input.clone(), 267 | &PathBuf::from("."), 268 | ) 269 | .unwrap(); 270 | assert_eq!(input, piped); 271 | Ok(()) 272 | } 273 | 274 | #[test] 275 | fn can_call_with_args() -> Result<()> { 276 | let piped = upstream_formatter( 277 | &Upstream::from_cfg("echo", "some text", "")?, 278 | String::new(), 279 | &PathBuf::from("."), 280 | ) 281 | .unwrap(); 282 | assert_eq!("some text\n", piped); 283 | Ok(()) 284 | } 285 | 286 | #[test] 287 | fn need_to_provide_command() -> Result<()> { 288 | let result = upstream_formatter( 289 | &Upstream::from_cfg("", "", " ")?, 290 | String::new(), 291 | &PathBuf::from("."), 292 | ); 293 | assert!(result.is_err()); 294 | Ok(()) 295 | } 296 | 297 | #[test] 298 | fn unknown_executable_fails() -> Result<()> { 299 | let result = upstream_formatter( 300 | &Upstream::from_cfg("", "executable-unknown-asdf", " ")?, 301 | String::new(), 302 | &PathBuf::from("."), 303 | ); 304 | assert!(result.is_err()); 305 | Ok(()) 306 | } 307 | 308 | #[test] 309 | fn can_call_pager_with_args() -> Result<()> { 310 | let mut pager = downstream_pager(&String::from("cat -"), ".".into(), true)?; 311 | pager.send("some text")?; 312 | Ok(()) 313 | } 314 | 315 | #[test] 316 | fn need_to_provide_pager_command() { 317 | let result = downstream_pager("", ".".into(), true); 318 | assert!(result.is_err()); 319 | } 320 | 321 | #[test] 322 | fn unknown_pager_executable_fails() { 323 | let result = downstream_pager(&String::from("executable-unknown-asdf"), ".".into(), true); 324 | assert!(result.is_err()); 325 | } 326 | } 327 | -------------------------------------------------------------------------------- /src/fs.rs: -------------------------------------------------------------------------------- 1 | /* An opinionated line wrapper for markdown files. 2 | Copyright (C) 2023 Torsten Long 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | 18 | use std::collections::HashSet; 19 | use std::path::{Path, PathBuf}; 20 | 21 | use anyhow::{Context, Error, Result}; 22 | use ignore::Walk; 23 | 24 | pub fn find_files_with_extension(paths: &[PathBuf], extension: &str) -> Result> { 25 | let mut errors = vec![]; 26 | 27 | let found = paths 28 | .iter() 29 | .filter_map(|top_level_path| { 30 | if top_level_path.is_file() { 31 | log::debug!("found file on disk: {}", top_level_path.to_string_lossy()); 32 | Some(vec![top_level_path.clone()]) 33 | } else if top_level_path.is_dir() { 34 | log::debug!( 35 | "crawling directory on disk: {}", 36 | top_level_path.to_string_lossy() 37 | ); 38 | Some( 39 | // Recursively extract all files with the given extension. 40 | Walk::new(top_level_path) 41 | .filter_map(|path_entry| match path_entry { 42 | Ok(path) => Some(path), 43 | Err(err) => { 44 | let path = top_level_path.to_string_lossy(); 45 | log::error!("failed to crawl {}: {}", path, err); 46 | None 47 | } 48 | }) 49 | .filter_map(|el| match el.path().canonicalize() { 50 | Ok(path) => Some(path), 51 | Err(err) => { 52 | let path = el.path().to_string_lossy(); 53 | if el.path_is_symlink() { 54 | log::error!("ignoring broken symlink: {}: {}", err, path); 55 | } else { 56 | log::error!("ignoring inaccessible path: {}: {}", err, path); 57 | } 58 | None 59 | } 60 | }) 61 | // Only keep actual markdown files and symlinks to them. 62 | .filter(|el| el.is_file() && el.to_string_lossy().ends_with(extension)) 63 | .map(strip_cwd_if_possible) 64 | .inspect(|el| { 65 | log::debug!("discovered file on disk: {}", el.to_string_lossy()); 66 | }) 67 | .collect::>(), 68 | ) 69 | } else { 70 | errors.push(top_level_path.to_string_lossy().to_string()); 71 | None 72 | } 73 | }) 74 | .flatten() 75 | .collect::>(); 76 | 77 | if errors.is_empty() { 78 | log::debug!( 79 | "discovered {} files with extension {}", 80 | found.len(), 81 | extension 82 | ); 83 | Ok(found) 84 | } else { 85 | Err(Error::msg(format!( 86 | "failed to find paths: '{}'", 87 | errors.join("' '") 88 | ))) 89 | } 90 | } 91 | 92 | pub fn read_stdin() -> String { 93 | std::io::stdin() 94 | .lines() 95 | // Interrupt as soon as one line could not be read. 96 | .map_while(Result::ok) 97 | .collect::>() 98 | .join("\n") 99 | } 100 | 101 | pub fn get_file_content_and_dir(path: &Path) -> Result<(String, PathBuf)> { 102 | let text = std::fs::read_to_string(path).context("failed to read file")?; 103 | let dir = path 104 | .parent() 105 | .map(|el| el.to_path_buf()) 106 | .ok_or(Error::msg("failed to determine parent directory"))?; 107 | 108 | Ok((text, dir)) 109 | } 110 | 111 | fn strip_cwd_if_possible(path: PathBuf) -> PathBuf { 112 | std::env::current_dir() 113 | .map(|cwd| path.strip_prefix(cwd).unwrap_or(&path)) 114 | .unwrap_or(&path) 115 | .to_path_buf() 116 | } 117 | 118 | // For convenience, this can also take paths to existing files and scans upwards, starting in 119 | // their directories. Since we want to avoid scanning the same directories over and over again, 120 | // we also use a cache to remember paths that we have already scanned. We abort scanning upwards 121 | // as soon as we find that we have already scanned a path. 122 | pub fn find_files_upwards( 123 | dir: &Path, 124 | file_name: &str, 125 | cache: &mut Option>, 126 | ) -> Vec { 127 | let mut result = vec![]; 128 | log::debug!( 129 | "finding {} upwards from {}", 130 | file_name, 131 | dir.to_string_lossy() 132 | ); 133 | for dir in UpwardsDirsIterator::new(dir) { 134 | if cache.as_ref().map(|el| el.contains(&dir)).unwrap_or(false) { 135 | log::debug!("early stop of upwards search at {}", dir.to_string_lossy()); 136 | break; 137 | } else { 138 | let maybe_file = dir.join(file_name); 139 | if maybe_file.is_file() { 140 | log::debug!( 141 | "found file in upwards search: {}", 142 | maybe_file.to_string_lossy() 143 | ); 144 | result.push(maybe_file); 145 | } 146 | cache.as_mut().map(|el| el.insert(dir)); 147 | } 148 | } 149 | log::debug!("found {} files in upwards search", result.len()); 150 | result 151 | } 152 | 153 | #[derive(Debug)] 154 | pub struct UpwardsDirsIterator(Option); 155 | 156 | impl UpwardsDirsIterator { 157 | pub fn new(dir_or_file: &Path) -> Self { 158 | match dir_or_file.canonicalize() { 159 | Ok(path) => { 160 | if path.is_file() { 161 | Self(path.parent().map(|el| el.to_path_buf())) 162 | } else { 163 | Self(Some(path.to_owned())) 164 | } 165 | } 166 | Err(_) => Self(None), 167 | } 168 | } 169 | } 170 | 171 | impl Iterator for UpwardsDirsIterator { 172 | type Item = PathBuf; 173 | 174 | fn next(&mut self) -> Option { 175 | let result = self.0.clone(); 176 | if let Some(ref mut base) = self.0 { 177 | if !base.pop() { 178 | self.0 = None; 179 | } 180 | } 181 | result 182 | } 183 | } 184 | 185 | #[cfg(test)] 186 | mod test { 187 | use super::*; 188 | 189 | // Actual tests follow. 190 | #[test] 191 | fn listing_non_existent_fails() { 192 | let is_err = find_files_with_extension(&["i do not exist".into()], ".md").is_err(); 193 | assert!(is_err); 194 | } 195 | 196 | // A struct that will automatically create and delete a temporary directory and that can create 197 | // arbitrary temporary files underneath it, including their parent dirs. 198 | struct TempDir(tempfile::TempDir); 199 | 200 | impl TempDir { 201 | fn new() -> Result { 202 | Ok(Self(tempfile::TempDir::new()?)) 203 | } 204 | 205 | fn new_file_in_dir(&self, path: PathBuf) -> Result { 206 | let mut result = PathBuf::from(self.0.path()); 207 | 208 | // Create directory containing file. 209 | if let Some(parent) = path.parent() { 210 | result.extend(parent); 211 | std::fs::create_dir_all(&result)?; 212 | } 213 | 214 | if let Some(file_name) = path.file_name() { 215 | result.push(file_name); 216 | std::fs::File::create(&result)?; 217 | Ok(result) 218 | } else { 219 | Err(Error::msg("no file given")) 220 | } 221 | } 222 | 223 | fn new_file_in_dir_with_content(&self, path: PathBuf, content: &str) -> Result { 224 | let path = self.new_file_in_dir(path)?; 225 | std::fs::write(&path, content.as_bytes())?; 226 | Ok(path) 227 | } 228 | 229 | /// Remove the temporary directory from the prefix. 230 | fn strip(&self, path: PathBuf) -> PathBuf { 231 | path.as_path() 232 | .strip_prefix(self.0.path()) 233 | .unwrap_or(&path) 234 | .to_path_buf() 235 | } 236 | } 237 | 238 | #[test] 239 | fn finding_all_md_files_in_repo() -> Result<()> { 240 | let tmp = TempDir::new()?; 241 | // Create some directory tree that will then be searched. 242 | tmp.new_file_in_dir("f_1.md".into())?; 243 | tmp.new_file_in_dir("no_md_1.ext".into())?; 244 | tmp.new_file_in_dir("no_md_2.ext".into())?; 245 | tmp.new_file_in_dir("dir/f_2.md".into())?; 246 | tmp.new_file_in_dir("dir/no_md_1.ext".into())?; 247 | tmp.new_file_in_dir("other_dir/f_3.md".into())?; 248 | tmp.new_file_in_dir("other_dir/no_md_1.ext".into())?; 249 | 250 | let ext_found = find_files_with_extension(&[tmp.0.path().into()], ".ext")?; 251 | assert_eq!(ext_found.len(), 4); 252 | 253 | let found = find_files_with_extension(&[tmp.0.path().into()], ".md")?; 254 | assert_eq!(found.len(), 3); 255 | 256 | Ok(()) 257 | } 258 | 259 | #[test] 260 | fn auto_ignoring_files() -> Result<()> { 261 | let tmp = TempDir::new()?; 262 | // Create some directory tree that will then be searched. 263 | tmp.new_file_in_dir("f.md".into())?; 264 | tmp.new_file_in_dir("file.md".into())?; 265 | tmp.new_file_in_dir("stuff.md".into())?; 266 | tmp.new_file_in_dir("dir/f.md".into())?; 267 | tmp.new_file_in_dir("dir/file.md".into())?; 268 | tmp.new_file_in_dir("dir/stuff.md".into())?; 269 | tmp.new_file_in_dir("dir/fstuff.md".into())?; 270 | tmp.new_file_in_dir("other_dir/f.md".into())?; 271 | tmp.new_file_in_dir("other_dir/file.md".into())?; 272 | tmp.new_file_in_dir("other_dir/stuff.md".into())?; 273 | tmp.new_file_in_dir("other_dir/fstuff.md".into())?; 274 | 275 | tmp.new_file_in_dir_with_content(".ignore".into(), "stuff.md\n")?; 276 | tmp.new_file_in_dir_with_content("dir/.ignore".into(), "file.md\n")?; 277 | tmp.new_file_in_dir_with_content("other_dir/.ignore".into(), "f*.md\n")?; 278 | 279 | let found = find_files_with_extension(&[tmp.0.path().into()], ".md")? 280 | .into_iter() 281 | .map(|el| tmp.strip(el)) 282 | .map(|el| el.to_string_lossy().to_string()) 283 | .collect::>(); 284 | 285 | let expected = vec!["file.md", "f.md", "dir/fstuff.md", "dir/f.md"] 286 | .into_iter() 287 | .map(|el| el.to_string()) 288 | .collect::>(); 289 | 290 | assert_eq!(found, expected); 291 | 292 | Ok(()) 293 | } 294 | 295 | #[test] 296 | fn finding_files_upwards() -> Result<()> { 297 | let tmp = TempDir::new()?; 298 | // Create some directory tree that will then be searched. 299 | tmp.new_file_in_dir("find_me".into())?; 300 | tmp.new_file_in_dir("do_not_find_me".into())?; 301 | tmp.new_file_in_dir("other_dir/find_me".into())?; 302 | tmp.new_file_in_dir("other_dir/do_not_find_me".into())?; 303 | tmp.new_file_in_dir("dir/subdir/find_me".into())?; 304 | let start = tmp.new_file_in_dir("dir/subdir/do_not_find_me".into())?; 305 | tmp.new_file_in_dir("dir/subdir/one_more_layer/find_me".into())?; 306 | tmp.new_file_in_dir("dir/subdir/one_more_layer/do_not_find_me".into())?; 307 | 308 | let found = find_files_upwards(&start, "find_me", &mut None) 309 | .into_iter() 310 | .map(|el| tmp.strip(el)) 311 | .map(|el| el.to_string_lossy().to_string()) 312 | .collect::>(); 313 | 314 | let expected = vec!["dir/subdir/find_me", "find_me"]; 315 | 316 | assert_eq!(found, expected); 317 | 318 | Ok(()) 319 | } 320 | 321 | #[test] 322 | fn iterating_dirs_upwards() -> Result<()> { 323 | let tmp = TempDir::new()?; 324 | let start = tmp.new_file_in_dir("dir/subdir/do_not_find_me".into())?; 325 | 326 | let dirs = UpwardsDirsIterator::new(&start) 327 | .map(|el| tmp.strip(el)) 328 | .map(|el| el.to_string_lossy().to_string()) 329 | .collect::>(); 330 | 331 | assert_eq!(start.components().count() - 1, dirs.len(), "{:?}", dirs); 332 | let dirs = dirs 333 | .into_iter() 334 | .filter(|el| !el.starts_with("/") && !el.is_empty()) 335 | .collect::>(); 336 | 337 | let expected = vec!["dir/subdir", "dir"]; 338 | 339 | assert_eq!(dirs, expected); 340 | 341 | Ok(()) 342 | } 343 | } 344 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | /* An opinionated line wrapper for markdown files. 2 | Copyright (C) 2023 Torsten Long 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | 18 | // Imports. 19 | mod call; 20 | mod cfg; 21 | mod detect; 22 | mod diff; 23 | mod features; 24 | mod frontmatter; 25 | mod fs; 26 | mod ignore; 27 | mod indent; 28 | mod lang; 29 | mod linebreak; 30 | mod logging; 31 | mod parse; 32 | mod ranges; 33 | mod replace; 34 | mod wrap; 35 | 36 | use std::collections::{HashMap, HashSet}; 37 | use std::io; 38 | use std::path::{Path, PathBuf}; 39 | 40 | use anyhow::{Context, Error, Result}; 41 | use clap::{CommandFactory, Parser}; 42 | use clap_complete::generate; 43 | use rayon::prelude::*; 44 | 45 | const CONFIG_FILE: &str = ".mdslw.toml"; 46 | 47 | fn generate_report( 48 | mode: &cfg::ReportMode, 49 | new: &str, 50 | org: &str, 51 | filename: &Path, 52 | ) -> Option { 53 | match mode { 54 | cfg::ReportMode::None => None, 55 | cfg::ReportMode::Changed => { 56 | if new != org { 57 | Some(format!("{}", filename.to_string_lossy())) 58 | } else { 59 | None 60 | } 61 | } 62 | cfg::ReportMode::State => { 63 | let ch = if new == org { 'U' } else { 'C' }; 64 | Some(format!("{}:{}", ch, filename.to_string_lossy())) 65 | } 66 | cfg::ReportMode::DiffMyers => Some(diff::Algo::Myers.generate(new, org, filename)), 67 | cfg::ReportMode::DiffPatience => Some(diff::Algo::Patience.generate(new, org, filename)), 68 | cfg::ReportMode::DiffLcs => Some(diff::Algo::Lcs.generate(new, org, filename)), 69 | } 70 | } 71 | 72 | struct Processor { 73 | feature_cfg: features::FeatureCfg, 74 | detector: detect::BreakDetector, 75 | max_width: Option, 76 | } 77 | 78 | impl Processor { 79 | fn process(&self, text: String, width_reduction: usize) -> String { 80 | // First, process the actual text. 81 | let ends_on_linebreak = text.ends_with('\n'); 82 | let text = if self.feature_cfg.keep_spaces_in_links { 83 | log::debug!("not replacing spaces in links by non-breaking spaces"); 84 | text 85 | } else { 86 | log::debug!("replacing spaces in links by non-breaking spaces"); 87 | replace::replace_spaces_in_links_by_nbsp(text) 88 | }; 89 | let text = if self.feature_cfg.outsource_inline_links { 90 | log::debug!("outsourcing inline links"); 91 | replace::outsource_inline_links( 92 | text, 93 | &self.feature_cfg.collate_link_defs, 94 | &self.detector.whitespace, 95 | ) 96 | } else { 97 | log::debug!("not outsourcing inline links"); 98 | text 99 | }; 100 | let text = if self.feature_cfg.collate_link_defs { 101 | log::debug!("collating links at the end of the document"); 102 | replace::collate_link_defs_at_end(text, &self.detector.whitespace) 103 | } else { 104 | log::debug!("not collating links at the end of the document"); 105 | text 106 | }; 107 | let parsed = parse::parse_markdown(&text, &self.feature_cfg.parse_cfg); 108 | let filled = ranges::fill_markdown_ranges(parsed, &text); 109 | let width = &self 110 | .max_width 111 | .map(|el| el.checked_sub(width_reduction).unwrap_or(el)); 112 | let formatted = wrap::add_linebreaks_and_wrap(filled, width, &self.detector, &text); 113 | 114 | // Keep newlines at the end of the file in tact. They disappear sometimes. 115 | let file_end = if !formatted.ends_with('\n') && ends_on_linebreak { 116 | log::debug!("adding missing trailing newline character"); 117 | "\n" 118 | } else { 119 | "" 120 | }; 121 | let text = format!("{}{}", formatted, file_end); 122 | 123 | // At last, process all block quotes. 124 | if self.feature_cfg.format_block_quotes { 125 | log::debug!("formatting text in block quotes"); 126 | parse::BlockQuotes::new(&text) 127 | .apply_to_matches_and_join(|t, indent| self.process(t, indent + width_reduction)) 128 | } else { 129 | log::debug!("not formatting text in block quotes"); 130 | text 131 | } 132 | } 133 | } 134 | 135 | fn process(document: String, file_dir: &Path, cfg: &cfg::PerFileCfg) -> Result<(String, String)> { 136 | // Prepare user-configured options. These could be outsourced if we didn't intend to allow 137 | // per-file configurations. 138 | let lang_keep_words = lang::keep_word_list(&cfg.lang).context("cannot load keep words")?; 139 | let feature_cfg = cfg 140 | .features 141 | .parse::() 142 | .context("cannot parse selected features")?; 143 | let detector = detect::BreakDetector::new( 144 | &(lang_keep_words + &cfg.suppressions), 145 | &cfg.ignores, 146 | cfg.case == cfg::Case::Keep, 147 | &cfg.end_markers, 148 | &feature_cfg.break_cfg, 149 | ); 150 | let max_width = if cfg.max_width == 0 { 151 | log::debug!("not limiting line length"); 152 | None 153 | } else { 154 | log::debug!("limiting line length to {} characters", cfg.max_width); 155 | Some(cfg.max_width) 156 | }; 157 | let processor = Processor { 158 | feature_cfg, 159 | detector, 160 | max_width, 161 | }; 162 | 163 | // Actually process the text. 164 | let frontmatter = frontmatter::extract_frontmatter(&document); 165 | let text = document[frontmatter.len()..].to_string(); 166 | 167 | let after_upstream = if let Ok(upstream) = call::Upstream::from_cfg( 168 | &cfg.upstream_command, 169 | &cfg.upstream, 170 | &cfg.upstream_separator, 171 | ) { 172 | log::debug!("calling upstream formatter: {}", cfg.upstream); 173 | call::upstream_formatter(&upstream, text, file_dir)? 174 | } else { 175 | log::debug!("not calling any upstream formatter"); 176 | text 177 | }; 178 | 179 | let processed = format!("{}{}", frontmatter, processor.process(after_upstream, 0)); 180 | Ok((processed, document)) 181 | } 182 | 183 | fn process_stdin(mode: &cfg::OpMode, build_cfg: F, file_path: &PathBuf) -> Result 184 | where 185 | F: Fn(&str, &PathBuf) -> Result, 186 | { 187 | log::debug!("processing content from stdin and writing to stdout"); 188 | let text = fs::read_stdin(); 189 | 190 | let config = build_cfg(&text, file_path).context("failed to build complete config")?; 191 | 192 | let file_dir = file_path 193 | .parent() 194 | .map(|el| el.to_path_buf()) 195 | .unwrap_or(PathBuf::from(".")); 196 | let (processed, text) = process(text, file_dir.as_path(), &config)?; 197 | 198 | // Decide what to output. 199 | match mode { 200 | cfg::OpMode::Format | cfg::OpMode::Both => { 201 | log::debug!("writing modified file to stdout"); 202 | print!("{}", processed); 203 | } 204 | cfg::OpMode::Check => { 205 | log::debug!("writing original file to stdout in check mode"); 206 | print!("{}", text); 207 | } 208 | } 209 | 210 | Ok(processed == text) 211 | } 212 | 213 | fn process_file(mode: &cfg::OpMode, path: &PathBuf, build_cfg: F) -> Result<(String, String)> 214 | where 215 | F: Fn(&str, &PathBuf) -> Result, 216 | { 217 | let report_path = path.to_string_lossy(); 218 | log::debug!("processing {}", report_path); 219 | 220 | let (text, file_dir) = fs::get_file_content_and_dir(path)?; 221 | let config = build_cfg(&text, path).context("failed to build complete config")?; 222 | let (processed, text) = process(text, &file_dir, &config)?; 223 | 224 | // Decide whether to overwrite existing files. 225 | match mode { 226 | cfg::OpMode::Format | cfg::OpMode::Both => { 227 | if processed == text { 228 | log::debug!("keeping OK file {}", report_path); 229 | } else { 230 | log::debug!("modifying NOK file {} in place", report_path); 231 | std::fs::write(path, processed.as_bytes()).context("failed to write file")?; 232 | } 233 | } 234 | // Do not write anything in check mode. 235 | cfg::OpMode::Check => { 236 | log::debug!("not modifying file {} in check mode", report_path); 237 | } 238 | } 239 | 240 | Ok((processed, text)) 241 | } 242 | 243 | fn read_config_file(path: &Path) -> Option<(PathBuf, cfg::CfgFile)> { 244 | let result = std::fs::read_to_string(path) 245 | .context("failed to read file") 246 | .and_then(|el| { 247 | toml::from_str::(&el).context("that failed to parse due to error:") 248 | }); 249 | 250 | match result { 251 | Ok(cfg) => { 252 | log::debug!("parsed config file {}", path.to_string_lossy()); 253 | Some((path.to_path_buf(), cfg)) 254 | } 255 | Err(err) => { 256 | log::error!("ignoring config file {} {:?}", path.to_string_lossy(), err); 257 | None 258 | } 259 | } 260 | } 261 | 262 | fn build_document_specific_config( 263 | document: &str, 264 | document_path: &Path, 265 | cli: &cfg::CliArgs, 266 | configs: &Vec<(PathBuf, cfg::CfgFile)>, 267 | ) -> Result { 268 | let config_from_frontmatter = toml::from_str::( 269 | &parse::get_value_for_mdslw_toml_yaml_key(&frontmatter::extract_frontmatter(document)), 270 | ) 271 | .with_context(|| { 272 | format!( 273 | "failed to parse frontmatter entry as toml config:\n{}", 274 | document 275 | ) 276 | })?; 277 | let config_tuple = [(document_path.to_path_buf(), config_from_frontmatter)]; 278 | Ok(cfg::merge_configs(cli, config_tuple.iter().chain(configs))) 279 | } 280 | 281 | fn print_config_file() -> Result<()> { 282 | toml::to_string(&cfg::CfgFile::default()) 283 | .context("converting to toml format") 284 | .map(|cfg| println!("{}", cfg)) 285 | } 286 | 287 | fn main() -> Result<()> { 288 | // Perform actions that cannot be changed on a per-file level. 289 | // Argument parsing. 290 | let cli = cfg::CliArgs::parse(); 291 | // Initialising logging. 292 | logging::init_logging(cli.verbose)?; 293 | // Generation of shell completion. 294 | if let Some(shell) = cli.completion { 295 | log::info!("generating shell completion for {}", shell); 296 | let mut cmd = cfg::CliArgs::command(); 297 | let name = cmd.get_name().to_string(); 298 | generate(shell, &mut cmd, name, &mut io::stdout()); 299 | return Ok(()); 300 | } 301 | // Generation of default config file. 302 | if cli.default_config { 303 | log::info!("writing default config file to stdout"); 304 | return print_config_file(); 305 | } 306 | 307 | // All other actions could technically be specified on a per-file level. 308 | let cwd = PathBuf::from("."); 309 | let unchanged = if cli.paths.is_empty() { 310 | let file_path = cli.stdin_filepath.clone().unwrap_or(PathBuf::from("STDIN")); 311 | let file_dir = file_path.parent().unwrap_or(cwd.as_path()); 312 | let configs = fs::find_files_upwards(file_dir, CONFIG_FILE, &mut None) 313 | .into_iter() 314 | .filter_map(|el| read_config_file(&el)) 315 | .collect::>(); 316 | let build_document_config = |document: &str, file_path: &PathBuf| { 317 | build_document_specific_config(document, file_path, &cli, &configs) 318 | }; 319 | process_stdin(&cli.mode, build_document_config, &file_path) 320 | } else { 321 | let md_files = fs::find_files_with_extension(&cli.paths, &cli.extension) 322 | .context("failed to discover markdown files")?; 323 | log::debug!("will process {} markdown file(s) from disk", md_files.len()); 324 | let config_files = { 325 | // Define a temporary cache to avoid scanning the same directories again and again. 326 | let mut cache = Some(HashSet::new()); 327 | md_files 328 | .iter() 329 | .flat_map(|el| fs::find_files_upwards(el, CONFIG_FILE, &mut cache)) 330 | .filter_map(|el| read_config_file(&el)) 331 | .collect::>() 332 | }; 333 | log::debug!("loaded {} configs from disk", config_files.len()); 334 | 335 | // Set number of threads depending on user's choice. 336 | if let Some(num_jobs) = cli.jobs { 337 | rayon::ThreadPoolBuilder::new() 338 | .num_threads(num_jobs) 339 | .build_global() 340 | .context("failed to initialise processing thread-pool")?; 341 | } 342 | 343 | // Enable pager only for diff output. 344 | let diff_pager = if cli.report.is_diff_mode() { 345 | &cli.diff_pager 346 | } else { 347 | log::debug!("disabling possibly set diff pager for non-diff report"); 348 | &None 349 | }; 350 | let par_printer = call::ParallelPrinter::new(diff_pager)?; 351 | 352 | // Process all MD files we found. 353 | md_files 354 | .par_iter() 355 | .map(|path| { 356 | log::info!("processing markdown file {}", path.to_string_lossy()); 357 | let configs = fs::UpwardsDirsIterator::new(path) 358 | .filter_map(|el| { 359 | config_files 360 | .get(&el.join(CONFIG_FILE)) 361 | .map(|cfg| (el, cfg.clone())) 362 | }) 363 | .collect::>(); 364 | let build_document_config = |document: &str, file_path: &PathBuf| { 365 | build_document_specific_config(document, file_path, &cli, &configs) 366 | }; 367 | match process_file(&cli.mode, path, build_document_config) { 368 | Ok((processed, text)) => { 369 | if let Some(rep) = generate_report(&cli.report, &processed, &text, path) { 370 | par_printer.println(&rep); 371 | } 372 | Ok(processed == text) 373 | } 374 | Err(err) => { 375 | log::error!("failed to process {}: {:?}", path.to_string_lossy(), err); 376 | Err(Error::msg("there were errors processing at least one file")) 377 | } 378 | } 379 | }) 380 | .reduce( 381 | || Ok(true), 382 | |a, b| match (a, b) { 383 | (Err(err), _) => Err(err), 384 | (_, Err(err)) => Err(err), 385 | (Ok(f1), Ok(f2)) => Ok(f1 && f2), 386 | }, 387 | ) 388 | }; 389 | 390 | log::debug!("finished execution"); 391 | // Process exit code. 392 | match unchanged { 393 | Ok(true) => Ok(()), 394 | Ok(false) => match cli.mode { 395 | cfg::OpMode::Format => Ok(()), 396 | cfg::OpMode::Check => Err(Error::msg("at least one processed file would be changed")), 397 | cfg::OpMode::Both => Err(Error::msg("at least one processed file changed")), 398 | }, 399 | Err(err) => Err(err), 400 | } 401 | } 402 | -------------------------------------------------------------------------------- /src/cfg.rs: -------------------------------------------------------------------------------- 1 | /* An opinionated line wrapper for markdown files. 2 | Copyright (C) 2023 Torsten Long 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | 18 | use std::fmt; 19 | use std::path::PathBuf; 20 | use std::str::FromStr; 21 | 22 | use clap::{builder::OsStr, Parser, ValueEnum}; 23 | use clap_complete::Shell; 24 | use serde::{Deserialize, Serialize}; 25 | 26 | // Command-line interface definition. 27 | 28 | /// A generic value that knows its origin. That is, we use the "Default" variant when defining 29 | /// default values in the CliArgs struct but we always parse to the "Parsed" variant when parsing 30 | /// from a command line argument. That way, we can distinguish whether an option has been provided 31 | /// on the command line or was taken as a default. 32 | /// 33 | /// Note that default_value_t will perform a display-then-parse-again round trip, which means it 34 | /// actually does not matter whether we use the "Parsed" or the "Default" variant in the 35 | /// default_value_t bit. However, we explicitly add a zero-width space to the end of every default 36 | /// value to be able to determine whether teh value is a default. Note that that will result in 37 | /// unexpected behaviour if a user ever adds such a character to the end of an argument, but what 38 | /// can you do. It's either that, or replacing clap, or not having config file support. In my view, 39 | /// config file support is worth this work-around. 40 | #[derive(Clone, Debug)] 41 | pub enum ValueWOrigin { 42 | Default(T), 43 | Parsed(T), 44 | } 45 | 46 | impl ValueWOrigin { 47 | // All default values that can also come from config files will end in this character. It is the 48 | // UTF8 zero-width space. All terminals that I tested do not display that character, but it is 49 | // present in the internal default string. We append that character to every default value that 50 | // can also come from a config file. That way, we can actually determine whether a value is a 51 | // default or not. See the Implementation of FromStr for this struct. 52 | const ZWS: char = '\u{200b}'; 53 | const ZWS_LEN: usize = Self::ZWS.len_utf8(); 54 | 55 | /// Get the correct value with the following precedence: 56 | /// - If we contain a "Parsed", return the value contained in it. The user has specified that 57 | /// on the command line, which means it takes precedence. 58 | /// - If we contain a "Default" and the other value contains a "Some", return that. 59 | /// That means the user has not specified that option on the command line, but a config file 60 | /// contains it. 61 | /// - Otherwise, return the value in the "Default". 62 | /// In that case, neither has the user specified that option on the command line, nor is it 63 | /// contained in any config file. 64 | fn resolve(&self, other: Option) -> T 65 | where 66 | T: Clone, 67 | { 68 | match self { 69 | ValueWOrigin::Parsed(val) => val.clone(), 70 | ValueWOrigin::Default(val) => other.unwrap_or_else(|| val.clone()), 71 | } 72 | } 73 | } 74 | 75 | impl FromStr for ValueWOrigin 76 | where 77 | T: FromStr, 78 | { 79 | type Err = ::Err; 80 | 81 | fn from_str(s: &str) -> Result { 82 | if s.ends_with(Self::ZWS) { 83 | match s[..s.len() - Self::ZWS_LEN].parse::() { 84 | Ok(val) => Ok(Self::Default(val)), 85 | Err(err) => Err(err), 86 | } 87 | } else { 88 | match s.parse::() { 89 | Ok(val) => Ok(Self::Parsed(val)), 90 | Err(err) => Err(err), 91 | } 92 | } 93 | } 94 | } 95 | 96 | impl fmt::Display for ValueWOrigin 97 | where 98 | T: fmt::Display, 99 | { 100 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 101 | match self { 102 | ValueWOrigin::Parsed(val) | ValueWOrigin::Default(val) => { 103 | write!(f, "{}", val) 104 | } 105 | } 106 | } 107 | } 108 | 109 | #[derive(Copy, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)] 110 | pub enum OpMode { 111 | Both, 112 | Check, 113 | Format, 114 | } 115 | 116 | #[derive(Serialize, Deserialize, Copy, Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] 117 | #[serde(rename_all = "kebab-case")] 118 | pub enum Case { 119 | Ignore, 120 | Keep, 121 | } 122 | 123 | impl FromStr for Case { 124 | type Err = String; 125 | 126 | fn from_str(s: &str) -> Result { 127 | match s { 128 | "keep" => Ok(Self::Keep), 129 | "ignore" => Ok(Self::Ignore), 130 | _ => Err(String::from("possible values: ignore, keep")), 131 | } 132 | } 133 | } 134 | 135 | impl fmt::Display for Case { 136 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 137 | match self { 138 | Self::Ignore => { 139 | write!(f, "ignore") 140 | } 141 | Self::Keep => { 142 | write!(f, "keep") 143 | } 144 | } 145 | } 146 | } 147 | 148 | #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, ValueEnum)] 149 | pub enum ReportMode { 150 | None, 151 | Changed, 152 | State, 153 | DiffMyers, 154 | DiffPatience, 155 | DiffLcs, 156 | } 157 | 158 | impl ReportMode { 159 | pub fn is_diff_mode(&self) -> bool { 160 | self == &ReportMode::DiffMyers 161 | || self == &ReportMode::DiffPatience 162 | || self == &ReportMode::DiffLcs 163 | } 164 | } 165 | 166 | #[derive(Parser, Debug)] 167 | #[command(author, version, about, long_about = None)] 168 | pub struct CliArgs { 169 | /// Paths to files or directories that shall be processed. 170 | pub paths: Vec, 171 | /// The maximum line width that is acceptable. A value of 0 disables wrapping of{n} long 172 | /// lines. 173 | #[arg( 174 | short = 'w', 175 | long, 176 | env = "MDSLW_MAX_WIDTH", 177 | default_value = "80\u{200b}" 178 | )] 179 | pub max_width: ValueWOrigin, 180 | /// A set of characters that are acceptable end of sentence markers. 181 | #[arg(short, long, env = "MDSLW_END_MARKERS", default_value = "?!:.\u{200b}")] 182 | pub end_markers: ValueWOrigin, 183 | /// Mode of operation: "check" means exit with error if format has to be adjusted but do not 184 | /// format,{n} "format" means format the file and exit with error in case of problems only, 185 | /// "both" means do both{n} (useful as pre-commit hook). 186 | #[arg(value_enum, short, long, env = "MDSLW_MODE", default_value_t = OpMode::Format)] 187 | pub mode: OpMode, 188 | /// A space-separated list of languages whose suppression words as specified by unicode should 189 | /// be {n} taken into account. See here for all languages: 190 | /// {n} https://github.com/unicode-org/cldr-json/tree/main/cldr-json/cldr-segments-full/segments 191 | /// {n} Use "none" to disable. 192 | /// Supported languages are: de en es fr it. Use "ac" for "author's choice",{n} a list 193 | /// for the Enlish language defined by this tool's author. 194 | #[arg(short, long, env = "MDSLW_LANG", default_value = "ac\u{200b}")] 195 | pub lang: ValueWOrigin, 196 | /// Space-separated list of words that end in one of END_MARKERS but that should not be 197 | /// followed by a line{n} break. This is in addition to what is specified via --lang. 198 | #[arg(short, long, env = "MDSLW_SUPPRESSIONS", default_value = "\u{200b}")] 199 | pub suppressions: ValueWOrigin, 200 | /// Space-separated list of words that end in one of END_MARKERS and that should be 201 | /// removed{n} from the list of suppressions. 202 | #[arg(short, long, env = "MDSLW_IGNORES", default_value = "\u{200b}")] 203 | pub ignores: ValueWOrigin, 204 | /// Specify an upstream auto-formatter that reads from stdin and writes to stdout. 205 | /// {n} It will be called before mdslw will run. Useful if you want to chain multiple 206 | /// tools.{n} Specify the command that will be executed. For example, specify "prettier" 207 | /// to{n} call prettier first. 208 | /// The upstream auto-formatter runs in each file's directory if PATHS are{n} specified. 209 | #[arg(long, env = "MDSLW_UPSTREAM_COMMAND", default_value = "\u{200b}")] 210 | pub upstream_command: ValueWOrigin, 211 | /// Specify the arguments for the upstream auto-formatter. If --upstream-command is not set, 212 | /// {n} the first word will be used as command. For example, with 213 | /// --upstream-command="prettier",{n} set --upstream="--parser=markdown" to enable markdown 214 | /// parsing. 215 | #[arg(short, long, env = "MDSLW_UPSTREAM", default_value = "\u{200b}")] 216 | pub upstream: ValueWOrigin, 217 | /// Specify a string that will be used to separate the value passed to --upstream into words. 218 | /// {n} If empty, splitting is based on whitespace. 219 | #[arg(long, env = "MDSLW_UPSTREAM_SEPARATOR", default_value = "\u{200b}")] 220 | pub upstream_separator: ValueWOrigin, 221 | /// How to handle the case of provided suppression words, both via --lang 222 | /// and{n} --suppressions. Possible values: ignore, keep 223 | #[arg(short, long, env = "MDSLW_CASE", default_value = "ignore\u{200b}")] 224 | pub case: ValueWOrigin, 225 | /// The file extension used to find markdown files when an entry in{n} PATHS is a directory. 226 | #[arg(long, env = "MDSLW_EXTENSION", default_value_t = String::from(".md"))] 227 | pub extension: String, 228 | // The "." below is used to cause clap to format the help message nicely. 229 | /// Comma-separated list of optional features to enable or disable. Currently, the following 230 | /// are supported: 231 | /// {n} * keep-spaces-in-links => do not replace spaces in link texts by non-breaking spaces 232 | /// {n} * keep-linebreaks => do not remove existing linebreaks during the line-wrapping 233 | /// process 234 | /// {n} * format-block-quotes => format text in block quotes 235 | /// {n} * collate-link-defs => gather all link definitions, i.e. `[link name]: url`, in a 236 | /// block at the end{n} of the document in alphabetical order, sorted 237 | /// case-insensitively; links can be categorised with{n} comments as 238 | /// ``, which will cause sorting per category 239 | /// {n} * outsource-inline-links => replace all inline links by named links using a link 240 | /// definition,{n} i.e. `[link](url)` becomes `[link][def]` and `[def]: url` 241 | /// {n} . 242 | #[arg(long, env = "MDSLW_FEATURES", default_value = "\u{200b}")] 243 | pub features: ValueWOrigin, 244 | /// Output shell completion file for the given shell to stdout and exit.{n} . 245 | #[arg(value_enum, long, env = "MDSLW_COMPLETION")] 246 | pub completion: Option, 247 | /// Specify the number of threads to use for processing files from disk in parallel. Defaults 248 | /// to the number of{n} logical processors. 249 | #[arg(short, long, env = "MDSLW_JOBS")] 250 | pub jobs: Option, 251 | /// What to report to stdout, ignored when reading from stdin: 252 | /// {n} * "none" => report nothing but be silent instead 253 | /// {n} * "changed" => output the names of files that were changed 254 | /// {n} * "state" => output : where is "U" for "unchanged" or 255 | /// "C" for "changed" 256 | /// {n} * "diff-myers" => output a unified diff based on the myers algorithm 257 | /// {n} * "diff-patience" => output a unified diff based on the patience algorithm 258 | /// {n} * "diff-lcs" => output a unified diff based on the lcs algorithm 259 | /// {n} . 260 | #[arg(value_enum, short, long, env = "MDSLW_REPORT", default_value_t = ReportMode::None)] 261 | pub report: ReportMode, 262 | /// Specify a downstream pager for diffs (with args) that reads diffs from stdin. 263 | /// {n} Useful if you want to display a diff nicely. For example, specify 264 | /// {n} "delta --side-by-side" to get a side-by-side view. 265 | #[arg(value_enum, short, long, env = "MDSLW_DIFF_PAGER")] 266 | pub diff_pager: Option, 267 | /// The path to the file that is read from stdin. This is used to determine relevant config 268 | /// files{n} when reading from stdin and to run an upstream formatter. 269 | #[arg(long, env = "MDSLW_STDIN_FILEPATH")] 270 | pub stdin_filepath: Option, 271 | /// Output the default config file in TOML format to stdout and exit. 272 | #[arg(long, env = "MDSLW_DEFAULT_CONFIG")] 273 | pub default_config: bool, 274 | /// Specify to increase verbosity of log output. Specify multiple times to increase even 275 | /// further. 276 | #[arg(short, long, action = clap::ArgAction::Count)] 277 | pub verbose: u8, 278 | } 279 | 280 | #[derive(Debug, PartialEq)] 281 | pub struct PerFileCfg { 282 | pub max_width: usize, 283 | pub end_markers: String, 284 | pub lang: String, 285 | pub suppressions: String, 286 | pub ignores: String, 287 | pub upstream_command: String, 288 | pub upstream: String, 289 | pub upstream_separator: String, 290 | pub case: Case, 291 | pub features: String, 292 | } 293 | 294 | #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] 295 | #[serde(rename_all = "kebab-case", deny_unknown_fields)] 296 | pub struct CfgFile { 297 | pub max_width: Option, 298 | pub end_markers: Option, 299 | pub lang: Option, 300 | pub suppressions: Option, 301 | pub ignores: Option, 302 | pub upstream_command: Option, 303 | pub upstream: Option, 304 | pub upstream_separator: Option, 305 | pub case: Option, 306 | pub features: Option, 307 | } 308 | 309 | impl CfgFile { 310 | /// Merge one config file into this one. Some-values in self take precedence. The return value 311 | /// indicates whether all fields of the struct are fully defined, which means that further 312 | /// merging won't have any effect. 313 | pub fn merge_with(&mut self, other: &Self) -> bool { 314 | let mut fully_defined = true; 315 | 316 | // Reduce code duplication with a macro. 317 | macro_rules! merge_field { 318 | ($field:ident) => { 319 | if self.$field.is_none() { 320 | self.$field = other.$field.clone(); 321 | } 322 | fully_defined = fully_defined && self.$field.is_some(); 323 | }; 324 | } 325 | 326 | merge_field!(max_width); 327 | merge_field!(end_markers); 328 | merge_field!(lang); 329 | merge_field!(suppressions); 330 | merge_field!(ignores); 331 | merge_field!(upstream_command); 332 | merge_field!(upstream); 333 | merge_field!(upstream_separator); 334 | merge_field!(case); 335 | merge_field!(features); 336 | 337 | fully_defined 338 | } 339 | 340 | fn new() -> Self { 341 | Self { 342 | max_width: None, 343 | end_markers: None, 344 | lang: None, 345 | suppressions: None, 346 | ignores: None, 347 | upstream_command: None, 348 | upstream: None, 349 | upstream_separator: None, 350 | case: None, 351 | features: None, 352 | } 353 | } 354 | } 355 | 356 | impl Default for CfgFile { 357 | fn default() -> Self { 358 | let no_args: Vec = vec![]; 359 | let default_cli = CliArgs::parse_from(no_args); 360 | 361 | macro_rules! merge_fields { 362 | (@ | $($result:tt)*) => { Self{ $($result)* } }; 363 | (@ $name:ident $($names:ident)* | $($result:tt)*) => { 364 | merge_fields!( 365 | @ $($names)* | 366 | $name: Some(default_cli.$name.resolve(None)), 367 | $($result)* 368 | ) 369 | }; 370 | ($($names:ident)*) => { merge_fields!(@ $($names)* | ) }; 371 | } 372 | 373 | merge_fields!(max_width end_markers lang suppressions ignores upstream_command upstream upstream_separator case features) 374 | } 375 | } 376 | 377 | pub fn merge_configs<'a, I>(cli: &CliArgs, files: I) -> PerFileCfg 378 | where 379 | I: IntoIterator, 380 | { 381 | let mut merged = CfgFile::new(); 382 | for (path, other) in files { 383 | log::debug!("merging config file {}", path.to_string_lossy()); 384 | if merged.merge_with(other) { 385 | log::debug!("config fully defined, stopping merge"); 386 | break; 387 | } 388 | } 389 | log::debug!("configuration loaded from files: {:?}", merged); 390 | log::debug!("configuration loaded from CLI: {:?}", cli); 391 | 392 | macro_rules! merge_fields { 393 | (@ | $($result:tt)*) => { PerFileCfg{ $($result)* } }; 394 | (@ $name:ident $($names:ident)* | $($result:tt)*) => { 395 | merge_fields!( 396 | @ $($names)* | 397 | $name: cli.$name.resolve(merged.$name), 398 | $($result)* 399 | ) 400 | }; 401 | ($($names:ident)*) => { merge_fields!(@ $($names)* | ) }; 402 | } 403 | 404 | let result = merge_fields!(max_width end_markers lang suppressions ignores upstream_command upstream upstream_separator case features); 405 | log::debug!("merged configuration: {:?}", result); 406 | result 407 | } 408 | 409 | #[cfg(test)] 410 | mod test { 411 | use super::*; 412 | 413 | // Actual tests follow. 414 | #[test] 415 | fn merging_two_partially_defined_config_files() { 416 | let mut main_cfg = CfgFile { 417 | max_width: Some(10), 418 | end_markers: None, 419 | lang: None, 420 | suppressions: None, 421 | ignores: Some("some words".into()), 422 | upstream_command: None, 423 | upstream: None, 424 | upstream_separator: None, 425 | case: None, 426 | features: None, 427 | }; 428 | let other_cfg = CfgFile { 429 | max_width: None, 430 | end_markers: None, 431 | lang: Some("ac".into()), 432 | suppressions: None, 433 | ignores: None, 434 | upstream_command: Some("some".into()), 435 | upstream: None, 436 | upstream_separator: None, 437 | case: None, 438 | features: Some("feature".into()), 439 | }; 440 | 441 | let fully_defined = main_cfg.merge_with(&other_cfg); 442 | assert!(!fully_defined); 443 | 444 | let expected_cfg = CfgFile { 445 | max_width: Some(10), 446 | end_markers: None, 447 | lang: Some("ac".into()), 448 | suppressions: None, 449 | ignores: Some("some words".into()), 450 | upstream_command: Some("some".into()), 451 | upstream: None, 452 | upstream_separator: None, 453 | case: None, 454 | features: Some("feature".into()), 455 | }; 456 | 457 | assert_eq!(expected_cfg, main_cfg); 458 | } 459 | 460 | #[test] 461 | fn options_in_main_config_are_kept() { 462 | let mut main_cfg = CfgFile { 463 | max_width: Some(10), 464 | end_markers: None, 465 | lang: None, 466 | suppressions: None, 467 | ignores: Some("some words".into()), 468 | upstream_command: None, 469 | upstream: None, 470 | upstream_separator: None, 471 | case: None, 472 | features: None, 473 | }; 474 | let other_cfg = CfgFile { 475 | max_width: Some(20), 476 | end_markers: None, 477 | lang: None, 478 | suppressions: None, 479 | ignores: Some("some other words".into()), 480 | upstream_command: None, 481 | upstream: None, 482 | upstream_separator: None, 483 | case: None, 484 | features: None, 485 | }; 486 | assert_ne!(main_cfg, other_cfg); 487 | 488 | let fully_defined = main_cfg.merge_with(&other_cfg); 489 | assert!(!fully_defined); 490 | 491 | let expected_cfg = CfgFile { 492 | max_width: Some(10), 493 | end_markers: None, 494 | lang: None, 495 | suppressions: None, 496 | ignores: Some("some words".into()), 497 | upstream_command: None, 498 | upstream: None, 499 | upstream_separator: None, 500 | case: None, 501 | features: None, 502 | }; 503 | 504 | assert_eq!(expected_cfg, main_cfg); 505 | } 506 | 507 | #[test] 508 | fn fully_defined_config_is_immutable() { 509 | let mut main_cfg = CfgFile { 510 | max_width: None, 511 | end_markers: None, 512 | lang: None, 513 | suppressions: None, 514 | ignores: None, 515 | upstream_command: None, 516 | upstream: None, 517 | upstream_separator: None, 518 | case: None, 519 | features: None, 520 | }; 521 | let missing_options = CfgFile { 522 | max_width: Some(20), 523 | end_markers: Some("marker".into()), 524 | lang: Some("lang".into()), 525 | suppressions: Some("suppressions".into()), 526 | ignores: Some("some other words".into()), 527 | upstream_command: Some("upstream-command".into()), 528 | upstream: Some("upstream".into()), 529 | upstream_separator: Some("sep".into()), 530 | case: Some(Case::Ignore), 531 | features: Some("feature".into()), 532 | }; 533 | let other_options = CfgFile { 534 | max_width: Some(10), 535 | end_markers: Some("nothing".into()), 536 | lang: Some("asdf".into()), 537 | suppressions: Some("just text".into()), 538 | ignores: Some("ignore this".into()), 539 | upstream_command: Some("does not matter".into()), 540 | upstream: Some("swimming is nice".into()), 541 | upstream_separator: Some("let's not split up".into()), 542 | case: Some(Case::Keep), 543 | features: Some("everything".into()), 544 | }; 545 | 546 | let fully_defined = main_cfg.merge_with(&missing_options); 547 | assert!(fully_defined); 548 | let fully_defined = main_cfg.merge_with(&other_options); 549 | assert!(fully_defined); 550 | 551 | let expected_cfg = CfgFile { 552 | max_width: Some(20), 553 | end_markers: Some("marker".into()), 554 | lang: Some("lang".into()), 555 | suppressions: Some("suppressions".into()), 556 | ignores: Some("some other words".into()), 557 | upstream_command: Some("upstream-command".into()), 558 | upstream: Some("upstream".into()), 559 | upstream_separator: Some("sep".into()), 560 | case: Some(Case::Ignore), 561 | features: Some("feature".into()), 562 | }; 563 | 564 | assert_eq!(expected_cfg, main_cfg); 565 | } 566 | 567 | #[test] 568 | fn merging_cli_with_two_config_files() { 569 | let main_cfg = CfgFile { 570 | max_width: Some(10), 571 | end_markers: None, 572 | lang: None, 573 | suppressions: None, 574 | ignores: Some("some words".into()), 575 | upstream_command: None, 576 | upstream: None, 577 | upstream_separator: None, 578 | case: None, 579 | features: None, 580 | }; 581 | let other_cfg = CfgFile { 582 | max_width: None, 583 | end_markers: None, 584 | lang: Some("ac".into()), 585 | suppressions: None, 586 | ignores: None, 587 | upstream_command: None, 588 | upstream: None, 589 | upstream_separator: None, 590 | case: None, 591 | features: Some("feature".into()), 592 | }; 593 | let default_cfg = CfgFile::default(); 594 | 595 | let files = vec![ 596 | (PathBuf::from("main"), main_cfg), 597 | (PathBuf::from("other"), other_cfg), 598 | (PathBuf::from("default"), default_cfg), 599 | ]; 600 | let no_args: Vec = vec![]; 601 | let cli = CliArgs::parse_from(no_args); 602 | let merged = merge_configs(&cli, &files); 603 | 604 | let expected_cfg = PerFileCfg { 605 | max_width: 10, 606 | end_markers: "?!:.".into(), 607 | lang: "ac".into(), 608 | suppressions: "".into(), 609 | ignores: "some words".into(), 610 | upstream_command: "".into(), 611 | upstream: "".into(), 612 | upstream_separator: "".into(), 613 | case: Case::Ignore, 614 | features: "feature".into(), 615 | }; 616 | 617 | assert_eq!(expected_cfg, merged); 618 | } 619 | } 620 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Prepare your markdown for easy diff'ing! 2 | 3 | 4 | 5 | - [About](#about) 6 | - [Motivation](#motivation) 7 | - [Pronunciation](#pronunciation) 8 | - [Working Principle](#working-principle) 9 | - [Caveats](#caveats) 10 | - [About Markdown Extensions](#about-markdown-extensions) 11 | - [Command Reference](#command-reference) 12 | - [Command Line Arguments](#command-line-arguments) 13 | - [Automatic File Discovery](#automatic-file-discovery) 14 | - [Environment Variables](#environment-variables) 15 | - [Config Files](#config-files) 16 | - [Per-File Configuration](#per-file-configuration) 17 | - [Installation](#installation) 18 | - [Building From Source](#building-from-source) 19 | - [Editor Integration](#editor-integration) 20 | - [neovim](#neovim) 21 | - [vim](#vim) 22 | - [VS Code](#vs-code) 23 | - [Tips And Tricks](#tips-and-tricks) 24 | - [Non-Breaking Spaces](#non-breaking-spaces) 25 | - [Disabling Auto-Formatting](#disabling-auto-formatting) 26 | - [How To Contribute](#how-to-contribute) 27 | - [Licence](#licence) 28 | 29 | 30 | 31 | # About 32 | 33 | This is `mdslw`, the MarkDown Sentence Line Wrapper, an auto-formatter that 34 | prepares your markdown for easy diff'ing. 35 | 36 | # Motivation 37 | 38 | Markdown documents are written for different purposes. 39 | Some of them are meant to be read in plain text, while others are first rendered 40 | and then presented to the reader. 41 | In the latter case, the documents are often kept in version control and edited 42 | with the same workflows as other code. 43 | 44 | When editing source code, software developers do not want changes in one 45 | location to show up as changes in unrelated locations. 46 | Now imagine a markdown document like this: 47 | 48 | ```markdown 49 | # Lorem Ipsum 50 | 51 | Lorem ipsum dolor sit amet. Consectetur adipiscing elit. Sed do eiusmod tempor 52 | incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam. 53 | ``` 54 | 55 | Adding the new sentence `Excepteur sint occaecat cupidatat non proident.` after 56 | the second one and re-arranging the text as a block would result in a diff view 57 | like this that shows changes in several lines: 58 | 59 | ```diff 60 | 3,4c3,5 61 | < Lorem ipsum dolor sit amet. Consectetur adipiscing elit. Sed do eiusmod tempor 62 | < incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam. 63 | --- 64 | > Lorem ipsum dolor sit amet. Consectetur adipiscing elit. Excepteur sint occaecat 65 | > cupidatat non proident. Sed do eiusmod tempor incididunt ut labore et dolore 66 | > magna aliqua. Ut enim ad minim veniam. 67 | ``` 68 | 69 | Now imagine the original text had a line break after every sentence, i.e. it had 70 | looked like this: 71 | 72 | ```markdown 73 | # Lorem Ipsum 74 | 75 | Lorem ipsum dolor sit amet. 76 | Consectetur adipiscing elit. 77 | Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. 78 | Ut enim ad minim veniam. 79 | ``` 80 | 81 | For text formatted like this, a diff would only show up for the sentences that 82 | are actually affected, simplifying the review process: 83 | 84 | ```diff 85 | 4a5 86 | > Excepteur sint occaecat cupidatat non proident. 87 | ``` 88 | 89 | Most rendering engines treat a single linebreak like a single space. 90 | Thus, both documents would be identical when presented to the reader even though 91 | the latter is significantly nicer to keep up to date with version control. 92 | The tool `mdslw` aims to auto-format markdown documents in exactly this way. 93 | 94 | # Pronunciation 95 | 96 | If you are wondering how to pronounce `mdslw`, you can either say each letter 97 | individually or pronounce it like mud-slaw (`mʌd-slɔ`). 98 | 99 | # Working Principle 100 | 101 | The tool `mdslw` operates according to a very simple process that can be 102 | described as follows: 103 | 104 | - Parse the document and determine areas in the document that contain text. 105 | Only process those. 106 | - There exists a limited number of characters (`.!?:` by default) that serve as 107 | end-of-sentence markers if they occur alone. 108 | If such a character is followed by whitespace, it denotes the end of a 109 | sentence, _unless_ the last word before the character is part of a known set 110 | of words, matched case-insensitively by default. 111 | Those words can be taken from an included list for a specific language and 112 | also specified directly. 113 | - Insert a line break after every character that ends a sentence, but keep 114 | indents in lists and enumerations in tact. 115 | - Collapse all consecutive whitespace into a single space. 116 | While doing so, preserve both [non-breaking spaces] and linebreaks that are 117 | preceded by [non-breaking spaces]. 118 | - Before line wrapping, replace all spaces in link texts by 119 | [non-breaking spaces]. 120 | - Wrap lines that are longer than the maximum line width (80 characters by 121 | default) without splitting words or splitting at [non-breaking spaces] while 122 | also keeping indents in tact. 123 | 124 | In contrast to most other tools the author could find, `mdslw` does not parse 125 | the entire document into an internal data structure just to render it back 126 | because that might result in changes in unexpected locations. 127 | Instead, it adjusts only those areas that do contain text that can be wrapped. 128 | That is, `mdslw` never touches any parts of a document that cannot be 129 | line-wrapped automatically. 130 | That includes, for example, code blocks, HTML blocks, and pipe tables. 131 | 132 | ## Caveats 133 | 134 | - The default settings of `mdslw` are strongly geared towards the English 135 | language, even though it works for other languages, too. 136 | - Like with any other auto-formatter, you give up some freedom for the benefit 137 | of automatic handling of certain issues. 138 | - Inline code sections are wrapped like any other text, which may cause issues 139 | with certain renderers. 140 | - While `mdslw` has been tested with documents containing unicode characters 141 | such as emojis, the outcome can still be unexpected. 142 | For example, any emoji is treated as a single character when determining line 143 | width even though some editors might draw certain emojis wider. 144 | Any feedback is welcome! 145 | - Since `mdslw` collapses all consecutive whitespace into a single space during 146 | the line-wrapping process, it does not work well with documents using tabs in 147 | text. 148 | A tab, including all whitespace before and after it, will also be replaced by 149 | a single space. 150 | Use the `keep-linebreaks` feature and prefix linebreaks by 151 | [non-breaking spaces] to influence this behaviour. 152 | - There are flavours of markdown that define additional markup syntax that 153 | `mdslw` cannot recognise but instead detects as text. 154 | Consequently, `mdslw` might cause formatting changes that causes such special 155 | syntax to be lost. 156 | You can use [non-breaking spaces] to work around that. 157 | - Some line breaks added by `mdslw` might not be considered nice looking. 158 | Use [non-breaking spaces] instead of normal ones to prevent a line break at a 159 | position. 160 | 161 | ## About Markdown Extensions 162 | 163 | There are quite a lot of markdown extensions out there. 164 | It is not possible for `mdslw` to support all of them. 165 | Instead, `mdslw` aims at supporting CommonMark as well as _some_ extensions used 166 | by its users. 167 | A new extension can be supported if supporting it does not negatively impact 168 | CommonMark support and if support can be added relatively easily. 169 | Please feel free to suggest support for a new extension as a 170 | [contribution](#how-to-contribute). 171 | 172 | # Command Reference 173 | 174 | Call as: 175 | 176 | ```bash 177 | mdslw [OPTIONS] [PATHS]... 178 | ``` 179 | 180 | A `PATH` can point to a file or a directory. 181 | If it is a file, then it will be auto-formatted irrespective of its extension. 182 | If it is a directory, then `mdslw` will discover all files ending in `.md` 183 | recursively and auto-format those. 184 | If you do not specify any path, then `mdslw` will read from stdin and write to 185 | stdout. 186 | 187 | The following is a list of all supported 188 | [command line arguments](#command-line-arguments). 189 | Note that you can also configure `mdslw` via 190 | [environment variables](#environment-variables) or 191 | [config files](#config-files). 192 | Values are resolved in the following order: 193 | 194 | - Defaults 195 | - Config files 196 | - Environment variables 197 | - Command line arguments 198 | 199 | ## Command Line Arguments 200 | 201 | - `--help`: 202 | Print the help message. 203 | - `--version`: 204 | Print the tool's version number. 205 | - `--max-width `: 206 | The maximum line width that is acceptable. 207 | A value of 0 disables wrapping of long lines altogether. 208 | The default value is 80. 209 | - `--end-markers `: 210 | The set of characters that are end of sentence markers, defaults to `?!:.`. 211 | - `--mode `: 212 | A value of `check` means to exit with an error if the format had to be 213 | adjusted but not to perform any formatting. 214 | A value of `format`, the default, means to format the file and exit with 215 | success. 216 | A value of `both` means to do both (useful when used as a `pre-commit` hook). 217 | - `--lang `: 218 | A space-separated list of languages whose suppression words as specified by 219 | unicode should be taken into account. 220 | See [here][unicode] for all languages. 221 | Currently supported are `en`, `de`, `es`, `fr`, and `it`. 222 | Use `none` to disable. 223 | Use `ac` (the default) for "author's choice", a list for the English language 224 | defined and curated by this tool's author. 225 | - `--suppressions `: 226 | A space-separated list of words that end in one of `END_MARKERS` but that 227 | should not be followed by a line break. 228 | This is in addition to what is specified via `--lang`. 229 | Defaults to the empty string. 230 | - `--ignores `: 231 | Space-separated list of words that end in one of `END_MARKERS` and that should 232 | be removed from the list of suppressions. 233 | Defaults to the empty string. 234 | - `--upstream-command `: 235 | Specify an upstream auto-formatter that reads from stdin and writes to stdout. 236 | It will be called before `mdslw` will run. 237 | This is useful if you want to chain multiple tools. 238 | Specify the command that will be executed. 239 | For example, specify `prettier` to call `prettier` first. 240 | The upstream auto-formatter runs in each file's directory if `PATHS` are 241 | specified 242 | - `--upstream `: 243 | Specify the arguments for the upstream auto-formatter. 244 | If `--upstream-cmd` is not set, the first word will be used as command. 245 | For example, with `--upstream-cmd="prettier"`, use 246 | `--upstream="--parser=markdown"` to enable markdown parsing. 247 | - `--upstream-separator `: 248 | Specify a string that will be used to separate the value passed to 249 | `--upstream` into words. 250 | If empty, splitting is based on whitespace. 251 | - `--upstream `: 252 | Specify an upstream auto-formatter (with args) that reads from stdin and 253 | writes to stdout. 254 | It will be called before `mdslw` will run and `mdslw` will use its output. 255 | This is useful if you want to chain multiple tools. 256 | For example, specify `prettier --parser=markdown` to call `prettier` first. 257 | The upstream auto-formatter is run in each file's directory if `PATHS` are 258 | specified. 259 | - `--case `: 260 | How to handle the case of provided suppression words, both via `--lang` and 261 | `--suppressions`. 262 | A value of `ignore`, the default, means to match case-insensitively while a 263 | value of `keep` means to match case-sensitively. 264 | - `--extension `: 265 | The file extension used to find markdown files when a `PATH` is a directory, 266 | defaults to `.md`. 267 | - `--features `: 268 | Comma-separated list of optional features to enable or disable. 269 | Currently, the following are supported (the opposite setting is the default in 270 | each case): 271 | - `keep-spaces-in-links`: 272 | Do not replace spaces in link texts by [non-breaking spaces]. 273 | - `keep-linebreaks`: 274 | Do not remove existing linebreaks during the line-wrapping process. 275 | - `format-block-quotes`: 276 | Format text in block quotes. 277 | - `collate-link-defs`: 278 | Gather all link definitions, i.e. `[link name]: url`, in a block at the end 279 | of the document in alphabetical order, sorted case-insensitively. 280 | Links can be defined as belonging to a category called `CATEGORY_NAME` with 281 | the comment ``. 282 | Each link definition following such a comment will be considered as part of 283 | the specified category. 284 | Link definitions will be sorted per category and categories will also be 285 | sorted by name. 286 | - `outsource-inline-links`: 287 | Replace all inline links by named links using a link definition, i.e. 288 | `[link](url)` becomes `[link][def]` and `[def]: url`. 289 | All new link definitions will be added at the end of the document. 290 | Existing link definitions will be reused. 291 | Link definitions in block quotes will be put at the end of the block quote 292 | if `format-block-quotes` is set. 293 | - `--completion `: 294 | Output shell completion file for the given shell to stdout and exit. 295 | The following shells are supported: 296 | bash, elvish, fish, powershell, zsh. 297 | - `--jobs `: 298 | Specify the number of threads to use for processing files from disk in 299 | parallel. 300 | Defaults to the number of logical processors. 301 | - `--report `: 302 | What to report to stdout, ignored when reading from stdin: 303 | - `none`, the default: 304 | Report nothing but be silent instead, which is useful in scripts. 305 | - `changed`: 306 | Output the names of files that were changed, which is useful for downstream 307 | processing with tools such as `xargs`. 308 | - `state`: 309 | Output `:` where `` is `U` for "unchanged" or `C` 310 | for "changed", which is useful for downstream filtering with tools such as 311 | `grep`. 312 | - `diff-myers`: 313 | Output a unified diff based on the [myers algorithm]. 314 | Pipe the output to tools such as [bat], [delta], or [diff-so-fancy] to get 315 | syntax highlighting. 316 | You can use the `--diff-pager` setting to define such a pager. 317 | - `diff-patience`: 318 | Output a unified diff based on the [patience algorithm]. 319 | See `diff-myers` for useful downstream tools. 320 | - `diff-lcs`: 321 | Output a unified diff based on the [lcs algorithm]. 322 | See `diff-myers` for useful downstream tools. 323 | - `--diff-pager `: 324 | Specify a downstream pager for diffs (with args) that reads diffs from stdin. 325 | This is useful if you want to display a diff nicely. 326 | For example, specify `delta --side-by-side` to get a side-by-side view. 327 | This flag is ignored unless a diff-type report has been requested. 328 | - `--stdin-filepath `: 329 | The path to the file that is read from stdin. 330 | This is used to determine relevant config files when reading from stdin and to 331 | run an upstream formatter. 332 | Defaults to the current working directory. 333 | - `--default-config`: 334 | Output the default config file in TOML format to stdout and exit. 335 | - `--verbose`: 336 | Specify to increase verbosity of log output. 337 | Specify multiple times to increase even further. 338 | 339 | ## Automatic File Discovery 340 | 341 | This tool uses the [ignore crate] in its default settings to discover files when 342 | given a directory as a `PATH`. 343 | Details about those defaults can be found [here][ignore defaults]. 344 | Briefly summarised, the following rules apply when deciding whether a file shall 345 | be ignored: 346 | 347 | - Hidden files (starting with `.`) are ignored. 348 | - Files matching patterns specified in a file called `.ignore` are ignored. 349 | The patterns affect all files in the same directory or child directories. 350 | - If run inside a git repository, files matching patterns specified in a file 351 | called `.gitignore` are ignored. 352 | The patterns affect all files in the same directory or child directories. 353 | 354 | If you wish to format a file that is being ignored by `mdslw`, then pass it as 355 | an argument directly. 356 | Files passed as arguments are never ignored and will always be processed. 357 | 358 | ## Environment Variables 359 | 360 | Instead of or in addition to configuring `mdslw` via 361 | [command line arguments](#command-line-arguments) or 362 | [config files](#config-files), you can configure it via environment variables. 363 | For any command line option `--some-option=value`, you can instead set an 364 | environment variable `MDSLW_SOME_OPTION=value`. 365 | For example, instead of setting `--end-markers=".?!"`, you could set 366 | `MDSLW_END_MARKERS=".?!"` instead. 367 | When set, the value specified via the environment variable will take precedence 368 | over the default value and a value taken from config files. 369 | When set, a command line argument will take precedence over the environment 370 | variable. 371 | Take a call like this for example: 372 | 373 | ```bash 374 | export MDSLW_EXTENSION=".markdown" 375 | export MDSLW_MODE=both 376 | mdslw --mode=check . 377 | ``` 378 | 379 | This call will search for files with the extension `.markdown` instead of the 380 | default `.md`. 381 | Furthermore, files will only be checked due to `--mode=check`, even though the 382 | environment variable `MDSLW_MODE=both` has been set. 383 | Defaults will be used for everything else. 384 | 385 | ## Config Files 386 | 387 | Instead of or in addition to configuring `mdslw` via 388 | [command line arguments](#command-line-arguments) or 389 | [environment variables](#environment-variables), you can configure it via config 390 | files. 391 | Such a file has to have the exact name `.mdslw.toml` and affects all files in or 392 | below its own directory. 393 | Multiple config files will be merged. 394 | Options given in config files closer to a markdown file take precedence. 395 | 396 | Configuration files are limited to options that influence the formatted result. 397 | They cannot influence how `mdslw` operates. 398 | For example, the option `--mode` cannot be set via config files while 399 | `--max-width` can. 400 | The following example shows all the possible options that can be set via config 401 | files. 402 | Note that all entries are optional in config files, which means that any number 403 | of them may be left out. 404 | The following is a full config file containing all the default values. 405 | 406 | 407 | 408 | ```toml 409 | max-width = 80 410 | end-markers = "?!:." 411 | lang = "ac" 412 | suppressions = "" 413 | ignores = "" 414 | upstream-command = "" 415 | upstream = "" 416 | upstream-separator = "" 417 | case = "ignore" 418 | features = "" 419 | ``` 420 | 421 | 422 | 423 | When set, the value specified via the config file will take precedence over the 424 | default value. 425 | When set, an environment variable or a command line argument will take 426 | precedence over a value taken from config files. 427 | 428 | ### Per-File Configuration 429 | 430 | You can embed a configuration for `mdslw` inside a markdown file. 431 | That configuration affects only the file it is embedded in. 432 | It will be merged with other config files affecting the markdown file in 433 | question just like other config files. 434 | 435 | An embedded configuration needs to reside inside the YAML front matter as part 436 | of a _block scalar string_ associated with the YAML key `mdslw-toml` (see below 437 | for an example). 438 | To get an overview of all the different possibilities for defining multi-line 439 | strings in YAML documents, please see [here][yaml-block-scalars]. 440 | The embedded configuration string needs to follow the same format as all other 441 | config files for `mdslw` (see above). 442 | 443 | For example, you can embed the default config file into a markdown document as 444 | in the following example. 445 | It is strongly recommended to use the `|` block style indicator without a block 446 | chomping indicator as done in the following example. 447 | 448 | ```markdown 449 | --- 450 | # This is the YAML front matter. 451 | mdslw-toml: | 452 | max-width = 80 453 | end-markers = "?!:." 454 | lang = "ac" 455 | suppressions = "" 456 | ignores = "" 457 | upstream-command = "" 458 | upstream = "" 459 | upstream-separator = "" 460 | case = "ignore" 461 | features = "" 462 | --- 463 | The actual markdown document follows. 464 | ``` 465 | 466 | Note that `mdslw` does not feature a full YAML parser because, as of October 467 | 2025, there is no suitable library available. 468 | Instead, `mdslw` comes with its own limited YAML parser. 469 | That parser supports only block scalar strings without an indentation indicator. 470 | 471 | # Installation 472 | 473 | Go to the project's [latest release], select the correct binary for your system, 474 | and download it. 475 | See below for how to select the correct one. 476 | Rename the downloaded binary to `mdslw` (or `mdslw.exe` on Windows) and move it 477 | to a location that is in your `$PATH` such as `/usr/local/bin` (will be 478 | different on Windows). 479 | Moving it there will likely require admin or `root` permissions, e.g. via 480 | `sudo`. 481 | On Unix systems, you also have to make the binary executable via the command 482 | `chmod +x mdslw`, pointing to the actual location of `mdslw`. 483 | From now on, you can simply type `mdslw` in your terminal to use it! 484 | 485 | The naming of the release binaries uses the [llvm target triple]. 486 | You can also use the following list to pick the correct binary for your machine: 487 | 488 | - `mdslw_x86_64-unknown-linux-musl`: 489 | Linux desktop or laptop using 64-bit x86-compatible CPUs 490 | - `mdslw_armv7-unknown-linux-gnueabihf`: 491 | RaspberryPi or similar single-board computers using ARMv7-compatible CPUs 492 | - `mdslw_x86_64-pc-windows-gnu.exe`: 493 | Windows desktop or laptop using 64-bit x86-compatible CPUs 494 | - `mdslw_aarch64-apple-darwin`: 495 | Mac using M1, M2, or other Mx CPUs based on Apple silicon, i.e. the new ones 496 | after the [transition from Intel CPUs][apple-architecture-transition-arm] 497 | - `mdslw_x86_64-apple-darwin`: 498 | Mac using 64-bit x86-compatible CPUs, i.e. the old ones after the 499 | [transition from the PowerPC architecture][apple-architecture-transition-ppc] 500 | 501 | ## Building From Source 502 | 503 | First, install rust, including `cargo`, via [rustup]. 504 | Once you have `cargo`, execute the following command in a terminal: 505 | 506 | ```bash 507 | cargo install --git https://github.com/razziel89/mdslw --locked 508 | ``` 509 | 510 | # Editor Integration 511 | 512 | Contributions describing integrations with more editors are welcome! 513 | 514 | ## neovim 515 | 516 | The recommended way of integrating `mdslw` with neovim is through 517 | [conform.nvim]. 518 | Simply install the plugin and modify your `init.vim` like this to add `mdslw` as 519 | a formatter for the markdown file type: 520 | 521 | ```lua 522 | require("conform").setup({ 523 | formatters_by_ft = { 524 | markdown = { "mdslw" }, 525 | }, 526 | formatters = { 527 | mdslw = { prepend_args = { "--stdin-filepath", "$FILENAME" } }, 528 | }, 529 | }) 530 | ``` 531 | 532 | Alternatively, you can also use the vim-like integration shown below. 533 | 534 | ## vim 535 | 536 | Add the following to your `~/.vimrc` to have your editor auto-format every `.md` 537 | document before writing it out: 538 | 539 | ```vim 540 | function MdFormat() 541 | if executable("mdslw") 542 | set lazyredraw 543 | " Enter and exit insert mode to keep track 544 | " of the cursor position, useful when undoing. 545 | execute "normal! ii\" 546 | let cursor_pos = getpos(".") 547 | %!mdslw --stdin-filepath "%" 548 | if v:shell_error != 0 549 | u 550 | endif 551 | call setpos('.', cursor_pos) 552 | set nolazyredraw 553 | endif 554 | endfunction 555 | 556 | autocmd BufWritePre *.md silent! :call MdFormat() 557 | ``` 558 | 559 | ## VS Code 560 | 561 | Assuming you have `mdslw` installed and in your `PATH`, you can integrate it 562 | with VS Code. 563 | To do so, install the extension [run on save] and add the following snippet to 564 | your `settings.json`: 565 | 566 | ```json 567 | { 568 | "emeraldwalk.runonsave": { 569 | "commands": [ 570 | { 571 | "match": ".*\\.md$", 572 | "cmd": "mdslw '${file}'" 573 | } 574 | ] 575 | } 576 | } 577 | ``` 578 | 579 | From now on, every time you save to an existing markdown file, `mdslw` will 580 | auto-format it. 581 | This snippet assumes an empty `settings.json` file. 582 | If yours is not empty, you will have to merge it with the existing one. 583 | 584 | # Tips And Tricks 585 | 586 | ## Non-Breaking Spaces 587 | 588 | The following codepoints are recognised as [non-breaking spaces] by default: 589 | 590 | - U+00A0 591 | - U+2007 592 | - U+202F 593 | - U+2060 594 | - U+FEFF 595 | 596 | How to insert [non-breaking spaces] depends on your operating system as well as 597 | your editor. 598 | The below will cover the non-breaking space U+00A0. 599 | 600 | **vim/neovim** 601 | 602 | Adding this to your `~/.vimrc` or `init.vim` will let you insert non-breaking 603 | spaces when pressing CTRL+s in insert mode and also show them as `+`: 604 | 605 | ```vim 606 | " Make it easy to insert non-breaking spaces and show them by default. 607 | set list listchars+=nbsp:+ 608 | inoremap NS 609 | " Alternatively, you can use this if your neovim/vim does not support this 610 | " digraph. Note that your browser might not copy the non-breaking space at the 611 | " end of the following line correctly. 612 | inoremap   613 | ``` 614 | 615 | ❗Tips for how to add and show non-breaking spaces in other editors are welcome. 616 | 617 | ## Disabling Auto-Formatting 618 | 619 | You can tell `mdslw` to stop auto-formatting parts of your document. 620 | Everything between the HTML comments `` and 621 | `` will not be formatted. 622 | For convenience, `mdslw` also recognises `prettier`'s range ignore directives 623 | `` and ``. 624 | 625 | In addition, [non-breaking spaces](#non-breaking-spaces) can be used to prevent 626 | modifications to your documents. 627 | Replacing a space by a non-breaking space prevents `mdslw` from adding a line 628 | break at that position. 629 | Furthermore, preceding a line break by a non-breaking space prevents `mdslw` 630 | from removing the line break. 631 | 632 | # How To Contribute 633 | 634 | If you have found a bug and want to fix it, please simply go ahead and fork the 635 | repository, fix the bug, and open a pull request to this repository! 636 | Bug fixes are always welcome. 637 | 638 | In all other cases, please open an issue on GitHub first to discuss the 639 | contribution. 640 | The feature you would like to introduce might already be in development. 641 | Please also take note of [the intended scope](#about-markdown-extensions) of 642 | `mdslw`. 643 | 644 | # Licence 645 | 646 | [GPLv3] 647 | 648 | If you want to use this piece of software under a different, more permissive 649 | open-source licence, please contact me. 650 | I am very open to discussing this point. 651 | 652 | 653 | 654 | [GPLv3]: ./LICENCE 655 | [ignore crate]: https://docs.rs/ignore/latest/ignore/ 656 | [ignore defaults]: https://docs.rs/ignore/latest/ignore/struct.WalkBuilder.html#method.standard_filters 657 | 658 | 659 | 660 | [lcs algorithm]: https://docs.rs/similar/latest/similar/algorithms/lcs/index.html 661 | [myers algorithm]: https://docs.rs/similar/latest/similar/algorithms/myers/index.html 662 | [patience algorithm]: https://docs.rs/similar/latest/similar/algorithms/patience/index.html 663 | 664 | 665 | 666 | [bat]: https://github.com/sharkdp/bat 667 | [delta]: https://github.com/dandavison/delta 668 | [diff-so-fancy]: https://github.com/so-fancy/diff-so-fancy 669 | 670 | 671 | 672 | [conform.nvim]: https://github.com/stevearc/conform.nvim 673 | [run on save]: https://marketplace.visualstudio.com/items?itemName=emeraldwalk.RunOnSave 674 | 675 | 676 | 677 | [non-breaking spaces]: https://en.wikipedia.org/wiki/Non-breaking_space 678 | [unicode]: https://github.com/unicode-org/cldr-json/tree/main/cldr-json/cldr-segments-full/segments 679 | [yaml-block-scalars]: https://yaml-multiline.info/ 680 | 681 | 682 | 683 | [apple-architecture-transition-arm]: https://en.wikipedia.org/wiki/Mac_transition_to_Apple_Silicon 684 | [apple-architecture-transition-ppc]: https://en.wikipedia.org/wiki/Mac_transition_to_Intel_processors 685 | [latest release]: https://github.com/razziel89/mdslw/releases/latest 686 | [llvm target triple]: https://clang.llvm.org/docs/CrossCompilation.html#target-triple 687 | [rustup]: https://rustup.rs/ 688 | -------------------------------------------------------------------------------- /src/parse.rs: -------------------------------------------------------------------------------- 1 | /* An opinionated line wrapper for markdown files. 2 | Copyright (C) 2023 Torsten Long 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | 18 | use core::ops::Range; 19 | use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd}; 20 | use std::collections::HashMap; 21 | use std::fmt::Write; 22 | 23 | use crate::detect::WhitespaceDetector; 24 | use crate::ignore::IgnoreByHtmlComment; 25 | use crate::indent::build_indent; 26 | use crate::trace_log; 27 | 28 | const YAML_CONFIG_KEY: &str = "mdslw-toml"; 29 | const YAML_CONFIG_KEY_WITH_COLON: &str = "mdslw-toml:"; 30 | 31 | /// CharRange describes a range of characters in a document. 32 | pub type CharRange = Range; 33 | 34 | #[derive(Debug, PartialEq)] 35 | pub struct ParseCfg { 36 | pub keep_linebreaks: bool, 37 | } 38 | 39 | /// Determine ranges of characters that shall later be wrapped and have their indents fixed. 40 | pub fn parse_markdown(text: &str, parse_cfg: &ParseCfg) -> Vec { 41 | // Enable some options by default to support parsing common kinds of documents. 42 | let mut opts = Options::empty(); 43 | // If we do not want to modify some elements, we detect them with the parser and consider them 44 | // as verbatim in the function "to_be_wrapped". 45 | log::debug!("detecting tables"); 46 | opts.insert(Options::ENABLE_TABLES); 47 | log::debug!("detecting definition lists"); 48 | opts.insert(Options::ENABLE_DEFINITION_LIST); 49 | // Do not enable other options: 50 | // opts.insert(Options::ENABLE_FOOTNOTES); 51 | // opts.insert(Options::ENABLE_TASKLISTS); 52 | // opts.insert(Options::ENABLE_HEADING_ATTRIBUTES); 53 | // opts.insert(Options::ENABLE_SMART_PUNCTUATION); 54 | // opts.insert(Options::ENABLE_STRIKETHROUGH); 55 | let events_and_ranges = Parser::new_ext(text, opts) 56 | .into_offset_iter() 57 | .inspect(|(event, range)| { 58 | trace_log!("parsed [{}, {}): {:?}", range.start, range.end, event) 59 | }) 60 | .collect::>(); 61 | let whitespaces = whitespace_indices(text, &WhitespaceDetector::new(parse_cfg.keep_linebreaks)); 62 | 63 | merge_ranges(to_be_wrapped(events_and_ranges, &whitespaces), &whitespaces) 64 | } 65 | 66 | /// Filter out those ranges of text that shall be wrapped. See comments in the function for 67 | /// what sections are handled in which way. 68 | fn to_be_wrapped( 69 | events: Vec<(Event, CharRange)>, 70 | whitespaces: &HashMap, 71 | ) -> Vec { 72 | let mut verbatim_level: usize = 0; 73 | let mut ignore = IgnoreByHtmlComment::new(); 74 | 75 | events 76 | .into_iter() 77 | // Mark every range that is between two ignore directives as verbatim by filtering it out. 78 | .filter(|(event, _range)| { 79 | if let Event::Html(s) = event { 80 | ignore.process_html(s) 81 | } 82 | !ignore.should_be_ignored() 83 | }) 84 | .filter(|(event, range)| match event { 85 | Event::Start(tag) => { 86 | match tag { 87 | // Most delimited blocks should stay as they are. Introducing line breaks would 88 | // cause problems here. 89 | Tag::BlockQuote(..) 90 | | Tag::CodeBlock(..) 91 | | Tag::FootnoteDefinition(..) 92 | | Tag::Heading { .. } 93 | | Tag::Image { .. } 94 | | Tag::Superscript 95 | | Tag::Subscript 96 | | Tag::Table(..) 97 | | Tag::TableCell 98 | | Tag::TableHead 99 | | Tag::TableRow => { 100 | verbatim_level += 1; 101 | false 102 | } 103 | // In case of some blocks, we do not want to extract the text contained inside 104 | // them but keep everything the block encompasses. 105 | Tag::Emphasis | Tag::Link { .. } | Tag::Strikethrough | Tag::Strong => { 106 | verbatim_level += 1; 107 | true 108 | } 109 | // Other delimited blocks can be both, inside a verbatim block or inside text. 110 | // However, the text they embrace is the important bit but we do not want to 111 | // extract the entire range. 112 | Tag::Item 113 | | Tag::List(..) 114 | | Tag::Paragraph 115 | | Tag::MetadataBlock(..) 116 | | Tag::DefinitionList 117 | | Tag::DefinitionListTitle 118 | | Tag::DefinitionListDefinition => false, 119 | 120 | // See below for why HTML blocks are treated like this. 121 | Tag::HtmlBlock => !range 122 | .clone() 123 | .filter_map(|el| whitespaces.get(&el)) 124 | .any(|el| el == &'\n'), 125 | } 126 | } 127 | 128 | Event::End(tag) => { 129 | match tag { 130 | // Kept as they were. 131 | TagEnd::BlockQuote(..) 132 | | TagEnd::CodeBlock 133 | | TagEnd::FootnoteDefinition 134 | | TagEnd::Heading(..) 135 | | TagEnd::Superscript 136 | | TagEnd::Subscript 137 | | TagEnd::Image 138 | | TagEnd::Table 139 | | TagEnd::TableCell 140 | | TagEnd::TableHead 141 | | TagEnd::TableRow => { 142 | verbatim_level = verbatim_level 143 | .checked_sub(1) 144 | .expect("tags should be balanced"); 145 | false 146 | } 147 | // Should be wrapped but text not extracted. 148 | TagEnd::Emphasis | TagEnd::Link | TagEnd::Strikethrough | TagEnd::Strong => { 149 | verbatim_level = verbatim_level 150 | .checked_sub(1) 151 | .expect("tags should be balanced"); 152 | false 153 | } 154 | 155 | // Can be anything. 156 | TagEnd::Item 157 | | TagEnd::List(..) 158 | | TagEnd::DefinitionList 159 | | TagEnd::DefinitionListTitle 160 | | TagEnd::DefinitionListDefinition 161 | | TagEnd::Paragraph 162 | | TagEnd::HtmlBlock 163 | | TagEnd::MetadataBlock(..) => false, 164 | } 165 | } 166 | 167 | // More elements that are not blocks and that should be taken verbatim. 168 | Event::TaskListMarker(..) | Event::FootnoteReference(..) | Event::Rule => false, 169 | 170 | // We do not support detecting math so far as we do not intend to modify match in any 171 | // way. That is, we treat it as any other text and don't have the parser detect math 172 | // specifically. 173 | Event::InlineMath(..) | Event::DisplayMath(..) => false, 174 | 175 | // Allow editing HTML only if it is inline, i.e. if the range containing the HTML 176 | // contains no whitespace. Treat it like text in that case. 177 | Event::Html(..) | Event::InlineHtml(..) => !range 178 | .clone() 179 | .filter_map(|el| whitespaces.get(&el)) 180 | .any(|el| el == &'\n'), 181 | 182 | // The following should be wrapped if they are not inside a verbatim block. Note that 183 | // that also includes blocks that are extracted in their enirey (e.g. links). In the 184 | // context of text contained within, they cound as verbatim blocks, too. 185 | Event::SoftBreak | Event::HardBreak | Event::Text(..) | Event::Code(..) => { 186 | verbatim_level == 0 187 | } 188 | }) 189 | .map(|(_event, range)| range) 190 | .collect::>() 191 | } 192 | 193 | #[derive(Debug)] 194 | enum RangeMatch<'a> { 195 | Matches((usize, &'a str)), 196 | NoMatch(&'a str), 197 | } 198 | 199 | pub struct BlockQuotes<'a>(Vec>); 200 | 201 | impl<'a> BlockQuotes<'a> { 202 | pub const FULL_PREFIX: &'static str = "> "; 203 | pub const FULL_PREFIX_LEN: usize = Self::FULL_PREFIX.len(); 204 | pub const SHORT_PREFIX: &'static str = ">"; 205 | 206 | fn strip_prefix(text: &str, indent: usize) -> String { 207 | // The first line does start with the actual prefix, while the other lines start with a 208 | // number of other characters. Thus, we strip the off for all but the first line. 209 | text.split_inclusive('\n') 210 | .enumerate() 211 | .map(|(idx, t)| { 212 | let t = if idx == 0 { t } else { &t[indent..t.len()] }; 213 | t.strip_prefix(Self::SHORT_PREFIX) 214 | .map(|el| el.strip_prefix(' ').unwrap_or(el)) 215 | .unwrap_or(t) 216 | }) 217 | .collect::() 218 | } 219 | 220 | fn add_prefix(text: String, indent: usize) -> String { 221 | let indent = build_indent(indent); 222 | // The "write!" calls should never fail since we write to a String that we create here. 223 | let mut result = String::new(); 224 | text.split_inclusive('\n') 225 | .enumerate() 226 | .for_each(|(idx, line)| { 227 | let prefix = if line.len() == 1 { 228 | Self::SHORT_PREFIX 229 | } else { 230 | Self::FULL_PREFIX 231 | }; 232 | // The first line is already correctly indented. For the other lines, we have to add 233 | // the indent. 234 | let ind = if idx == 0 { "" } else { &indent }; 235 | write!(result, "{}{}{}", ind, prefix, line) 236 | .expect("building block-quote formated result"); 237 | }); 238 | result 239 | } 240 | 241 | fn indents(text: &str) -> Vec { 242 | text.split_inclusive('\n') 243 | .flat_map(|line| 0..line.len()) 244 | .collect::>() 245 | } 246 | 247 | pub fn new(text: &'a str) -> Self { 248 | let mut level: usize = 0; 249 | // In case we ever need to iterate over other kinds of syntax, the tag as well as the 250 | // function stripping prefixes will have to be adjusted. 251 | 252 | let indents = Self::indents(text); 253 | let mut start = 0; 254 | 255 | let mut ranges = Parser::new(text) 256 | .into_offset_iter() 257 | .filter_map(|(event, range)| match event { 258 | Event::Start(start) => { 259 | if matches!(start, Tag::BlockQuote(..)) { 260 | level += 1; 261 | } 262 | if level == 1 && matches!(start, Tag::BlockQuote(..)) { 263 | // Using a CharRange here to prevent the flat_map below from flattening 264 | // all the ranges, since Range supports flattening but our 265 | // CharRange does not. 266 | Some(CharRange { 267 | start: range.start, 268 | end: range.end, 269 | }) 270 | } else { 271 | None 272 | } 273 | } 274 | Event::End(end) => { 275 | if matches!(end, TagEnd::BlockQuote(..)) { 276 | level -= 1; 277 | } 278 | None 279 | } 280 | _ => None, 281 | }) 282 | .flat_map(|range| { 283 | let prev_start = start; 284 | let this_start = range.start; 285 | start = range.end; 286 | 287 | let this = RangeMatch::Matches((indents[this_start], &text[range])); 288 | if this_start == prev_start { 289 | vec![this] 290 | } else { 291 | let missing = RangeMatch::NoMatch(&text[prev_start..this_start]); 292 | vec![missing, this] 293 | } 294 | }) 295 | .collect::>(); 296 | 297 | if start != text.len() { 298 | ranges.push(RangeMatch::NoMatch(&text[start..text.len()])); 299 | } 300 | 301 | Self(ranges) 302 | } 303 | 304 | /// The argument `func` should keep a line break at the end if its arguments ends in one. In 305 | /// most cases, it ends in a line break. 306 | pub fn apply_to_matches_and_join(self, func: MapFn) -> String 307 | where 308 | MapFn: Fn(String, usize) -> String, 309 | { 310 | self.0 311 | .into_iter() 312 | .map(|el| match el { 313 | RangeMatch::NoMatch(s) => s.to_string(), 314 | RangeMatch::Matches(s) => Self::add_prefix( 315 | func(Self::strip_prefix(s.1, s.0), s.0 + Self::FULL_PREFIX_LEN), 316 | s.0, 317 | ), 318 | }) 319 | .collect::() 320 | } 321 | } 322 | 323 | /// Check whether there is nothing but whitespace between the end of the previous range and the 324 | /// start of the next one, if the ranges do not connect directly anyway. Note that we still keep 325 | /// paragraphs separated by keeping ranges separate that are separated by more linebreaks than one. 326 | fn merge_ranges(ranges: Vec, whitespaces: &HashMap) -> Vec { 327 | let mut next_range: Option = None; 328 | let mut merged = vec![]; 329 | 330 | for range in ranges { 331 | if let Some(next) = next_range { 332 | let contains_just_whitespace = 333 | (next.end..range.start).all(|el| whitespaces.contains_key(&el)); 334 | let at_most_one_linebreak = (next.end..range.start) 335 | .filter(|el| Some(&'\n') == whitespaces.get(el)) 336 | .count() 337 | <= 1; 338 | let is_contained = range.start >= next.start && range.end <= next.end; 339 | 340 | if is_contained { 341 | // Skip the range if it is already included. 342 | next_range = Some(next); 343 | } else if contains_just_whitespace && at_most_one_linebreak { 344 | // Extend the range. 345 | next_range = Some(CharRange { 346 | start: next.start, 347 | end: range.end, 348 | }); 349 | } else { 350 | // Remember the range and continue extending. 351 | merged.push(next); 352 | next_range = Some(range); 353 | } 354 | } else { 355 | next_range = Some(range); 356 | } 357 | } 358 | 359 | // Treat the last range that may be left. 360 | if let Some(next) = next_range { 361 | merged.push(next) 362 | } 363 | 364 | // Remove ranges that contain at most 1 character. They never have to be wrapped. 365 | let removed = merged 366 | .into_iter() 367 | .filter(|el| el.len() > 1) 368 | .collect::>(); 369 | 370 | trace_log!( 371 | "formattable byte ranges: {}", 372 | removed 373 | .iter() 374 | .map(|range| format!("[{},{})", range.start, range.end)) 375 | .collect::>() 376 | .join(" ") 377 | ); 378 | 379 | removed 380 | } 381 | 382 | /// Get all indices that point to whitespace as well as the characters they point to. 383 | fn whitespace_indices(text: &str, detector: &WhitespaceDetector) -> HashMap { 384 | text.char_indices() 385 | .filter_map(|(pos, ch)| { 386 | if detector.is_whitespace(&ch) { 387 | Some((pos, ch)) 388 | } else { 389 | None 390 | } 391 | }) 392 | .collect::>() 393 | } 394 | 395 | enum YAMLBlockStartLineType { 396 | Pipe, 397 | Angle, 398 | None, 399 | } 400 | 401 | impl YAMLBlockStartLineType { 402 | fn is_actual_start_line(&self) -> bool { 403 | !matches!(self, Self::None) 404 | } 405 | } 406 | 407 | /// Parse a YAML text without an external dependency. We interpret text as being a single YAML 408 | /// document. We search until we find a line starting with the given key. We return everything that 409 | /// is at the same indentation as the line following the key. 410 | pub fn get_value_for_mdslw_toml_yaml_key(text: &str) -> String { 411 | trace_log!( 412 | "extracting value for key {} from yaml: {}", 413 | YAML_CONFIG_KEY, 414 | text.replace("\n", "\\n") 415 | ); 416 | let start_line_type = |line: &str| { 417 | // Only perform the split by words if we can be reasonably sure that this might be the 418 | // correct line, i.e. one that starts with the key that we expect. 419 | if !line.starts_with(YAML_CONFIG_KEY) { 420 | return YAMLBlockStartLineType::None; 421 | } 422 | let split = line.split_whitespace().collect::>(); 423 | let first_word = split 424 | .first() 425 | .expect("Internal error, there should have been a first word."); 426 | if first_word == &YAML_CONFIG_KEY { 427 | match split[1..] { 428 | [":", "|"] | [":", "|-"] | [":", "|+"] => YAMLBlockStartLineType::Pipe, 429 | [":", ">"] | [":", ">-"] | [":", ">+"] => YAMLBlockStartLineType::Angle, 430 | _ => YAMLBlockStartLineType::None, 431 | } 432 | } else if first_word == &YAML_CONFIG_KEY_WITH_COLON { 433 | match split[1..] { 434 | ["|"] | ["|-"] | ["|+"] => YAMLBlockStartLineType::Pipe, 435 | [">"] | [">-"] | [">+"] => YAMLBlockStartLineType::Angle, 436 | _ => YAMLBlockStartLineType::None, 437 | } 438 | } else { 439 | YAMLBlockStartLineType::None 440 | } 441 | }; 442 | // We skip everything until the first line that we expect, including that first line. We end up 443 | // either with an empty iterator or an iterator whose first element is the first value line. 444 | let mut skipped = text 445 | .lines() 446 | .skip_while(|line| !start_line_type(line).is_actual_start_line()); 447 | let block_type = if let Some(line) = skipped.next() { 448 | start_line_type(line) 449 | } else { 450 | YAMLBlockStartLineType::None 451 | }; 452 | let mut peekable = skipped.skip_while(|line| line.is_empty()).peekable(); 453 | let first_line = peekable.peek(); 454 | // Check whether we have a value line or not. 455 | if let Some(line) = first_line { 456 | // We check whether the first value line is indented. If so, we remember the indent since 457 | // every following value line has to have the exact same indent. 458 | let first_indent = line.len() - line.trim_start().len(); 459 | if first_indent > 0 { 460 | let result = peekable 461 | .take_while(|line| { 462 | line.is_empty() || line.len() - line.trim_start().len() == first_indent 463 | }) 464 | .map(|line| line.trim()) 465 | .collect::>() 466 | .join("\n"); 467 | log::info!( 468 | "found value for key {} from yaml:\n{}", 469 | YAML_CONFIG_KEY, 470 | result 471 | ); 472 | match block_type { 473 | YAMLBlockStartLineType::Pipe => result, 474 | YAMLBlockStartLineType::Angle => result 475 | .split("\n\n") 476 | .map(|line| line.replace("\n", " ")) 477 | .collect::>() 478 | .join("\n"), 479 | YAMLBlockStartLineType::None => String::new(), 480 | } 481 | } else { 482 | log::info!("no value line found"); 483 | String::new() 484 | } 485 | } else { 486 | log::info!("key {} not found", YAML_CONFIG_KEY); 487 | String::new() 488 | } 489 | } 490 | 491 | #[cfg(test)] 492 | mod test { 493 | use super::*; 494 | 495 | #[test] 496 | fn detect_whitespace() { 497 | let text = "some test with witespace at some\nlocations"; 498 | let detected = whitespace_indices(text, &WhitespaceDetector::default()); 499 | let expected = vec![ 500 | (4, ' '), 501 | (9, ' '), 502 | (14, ' '), 503 | (24, ' '), 504 | (27, ' '), 505 | (28, '\t'), 506 | (33, '\n'), 507 | ] 508 | .into_iter() 509 | .collect::>(); 510 | 511 | assert_eq!(expected, detected); 512 | } 513 | 514 | #[test] 515 | fn merging_ranges() { 516 | let ranges = vec![ 517 | CharRange { start: 0, end: 4 }, 518 | CharRange { start: 5, end: 9 }, 519 | CharRange { start: 11, end: 15 }, 520 | CharRange { start: 11, end: 14 }, 521 | CharRange { start: 16, end: 19 }, 522 | CharRange { start: 23, end: 36 }, 523 | ]; 524 | let whitespace = whitespace_indices( 525 | "some text\n\nmore text | even more text", 526 | &WhitespaceDetector::default(), 527 | ); 528 | 529 | let merged = merge_ranges(ranges, &whitespace); 530 | 531 | let expected = vec![ 532 | CharRange { start: 0, end: 9 }, 533 | CharRange { start: 11, end: 19 }, 534 | CharRange { start: 23, end: 36 }, 535 | ]; 536 | 537 | assert_eq!(expected, merged); 538 | } 539 | 540 | #[test] 541 | fn parsing_markdown() { 542 | let text = r#" 543 | ## Some Heading 544 | 545 | Some text. 546 | 547 | 548 | 549 | - More text. 550 | - More text. 551 | - Even more text. 552 | - Some text with a [link]. 553 | 554 | ```code 555 | some code 556 | ``` 557 | 558 | [link]: https://something.com "some link" 559 | "#; 560 | let cfg = ParseCfg { 561 | keep_linebreaks: false, 562 | }; 563 | let parsed = parse_markdown(text, &cfg); 564 | 565 | // [18..28, 52..62, 65..75, 80..95, 100..124] 566 | let expected = vec![ 567 | CharRange { start: 18, end: 28 }, 568 | CharRange { start: 52, end: 62 }, 569 | CharRange { start: 65, end: 75 }, 570 | CharRange { start: 80, end: 95 }, 571 | CharRange { 572 | start: 100, 573 | end: 124, 574 | }, 575 | ]; 576 | 577 | assert_eq!(expected, parsed); 578 | } 579 | 580 | #[test] 581 | fn applying_to_no_block_quotes_remains_unchanged() { 582 | let text = r#" 583 | ## Some Heading 584 | 585 | Some text without block quotes. 586 | 587 | 588 | 589 | - More text. 590 | - More text. 591 | - Even more text. 592 | - Some text with a [link]. 593 | 594 | ```code 595 | some code 596 | ``` 597 | 598 | [link]: https://something.com "some link" 599 | "#; 600 | 601 | let unchanged = BlockQuotes::new(text).apply_to_matches_and_join(|_, _| String::new()); 602 | assert_eq!(text.to_string(), unchanged); 603 | } 604 | 605 | #[test] 606 | fn applying_to_block_quotes() { 607 | let text = r#" 608 | ## Some Heading 609 | 610 | Some text with block quotes. 611 | 612 | > This first text is block quoted. 613 | > 614 | >> This text is quoted at the second level. 615 | > 616 | > Some more quotes at the first level. 617 | 618 | 619 | 620 | - More text. 621 | - More text. 622 | - Even more text. 623 | - Some text with a [link]. 624 | 625 | > This second text is also block quoted. 626 | > 627 | > > This text is quoted at the second level. 628 | > 629 | > Some more quotes at the first level. 630 | 631 | - Some text. 632 | 633 | > This third text is block quoted but inside an itemization. 634 | > 635 | >> This text is quoted at the second level. 636 | > 637 | > Some more quotes at the first level. 638 | 639 | More text. 640 | 641 | [link]: https://something.com "some link" 642 | "#; 643 | 644 | let expected = r#" 645 | ## Some Heading 646 | 647 | Some text with block quotes. 648 | 649 | > 2:115 650 | > 2:115 651 | > 2:115 652 | 653 | 654 | 655 | - More text. 656 | - More text. 657 | - Even more text. 658 | - Some text with a [link]. 659 | 660 | > 2:121 661 | > 2:121 662 | > 2:121 663 | 664 | - Some text. 665 | 666 | > 4:141 667 | > 4:141 668 | > 4:141 669 | 670 | More text. 671 | 672 | [link]: https://something.com "some link" 673 | "#; 674 | 675 | let changed = BlockQuotes::new(text).apply_to_matches_and_join(|s, i| { 676 | format!("{}:{}\n{}:{}\n{}:{}\n", i, s.len(), i, s.len(), i, s.len()) 677 | }); 678 | assert_eq!(expected, changed); 679 | } 680 | 681 | #[test] 682 | fn flattening_vecs_of_char_ranges_retains_ranges() { 683 | let to_be_flattened = vec![ 684 | vec![CharRange { start: 0, end: 10 }], 685 | vec![ 686 | CharRange { 687 | start: 100, 688 | end: 110, 689 | }, 690 | CharRange { 691 | start: 200, 692 | end: 210, 693 | }, 694 | ], 695 | ]; 696 | let flat = to_be_flattened.into_iter().flatten().collect::>(); 697 | let expected = vec![(0..10), (100..110), (200..210)]; 698 | assert_eq!(expected, flat); 699 | } 700 | 701 | fn build_yaml( 702 | key: &str, 703 | space_before_colon: bool, 704 | block_marker: &str, 705 | indent_spaces: usize, 706 | content: &str, 707 | ) -> String { 708 | let indent = (0..indent_spaces).map(|_| " ").collect::(); 709 | let indented = content 710 | .lines() 711 | .map(|line| format!("{}{}\n", indent, line)) 712 | .collect::(); 713 | let maybe_space = if space_before_colon { " " } else { "" }; 714 | let result = format!("{}{}: {}\n{}", key, maybe_space, block_marker, indented); 715 | // Ensure that values were filled in. 716 | assert_ne!(result, String::from(": \n")); 717 | result 718 | } 719 | 720 | const YAML_BASE_CONTENT: &str = r#" 721 | some content with an empty line 722 | 723 | at the beginning and in the middle"#; 724 | 725 | #[test] 726 | fn building_yaml() { 727 | let yaml = build_yaml(YAML_CONFIG_KEY, true, "|", 4, YAML_BASE_CONTENT); 728 | let expected = r#"mdslw-toml : | 729 | 730 | some content with an empty line 731 | 732 | at the beginning and in the middle 733 | "#; 734 | assert_eq!(yaml, expected); 735 | } 736 | 737 | #[test] 738 | fn extracting_yaml_string_pipe_block_markers() { 739 | for has_space in [true, false] { 740 | for marker in ["|", "|-", "|+"] { 741 | let yaml = build_yaml(YAML_CONFIG_KEY, has_space, marker, 4, YAML_BASE_CONTENT); 742 | let extracted = get_value_for_mdslw_toml_yaml_key(&yaml); 743 | assert_eq!(extracted, YAML_BASE_CONTENT); 744 | } 745 | } 746 | } 747 | 748 | #[test] 749 | fn extracting_yaml_string_angle_block_markers() { 750 | let expected = r#" some content with an empty line 751 | at the beginning and in the middle"#; 752 | for has_space in [true, false] { 753 | for marker in [">", ">-", ">+"] { 754 | let yaml = build_yaml(YAML_CONFIG_KEY, has_space, marker, 4, YAML_BASE_CONTENT); 755 | let extracted = get_value_for_mdslw_toml_yaml_key(&yaml); 756 | assert_eq!(extracted, expected); 757 | } 758 | } 759 | } 760 | 761 | #[test] 762 | fn extracting_yaml_string_pipe_block_markers_wrong_key() { 763 | let key = "some-other-key"; 764 | assert_ne!(key, YAML_CONFIG_KEY); 765 | for has_space in [true, false] { 766 | for marker in ["|", "|-", "|+"] { 767 | let yaml = build_yaml(key, has_space, marker, 4, YAML_BASE_CONTENT); 768 | let extracted = get_value_for_mdslw_toml_yaml_key(&yaml); 769 | assert_eq!(extracted, String::new()); 770 | } 771 | } 772 | } 773 | 774 | #[test] 775 | fn extracting_yaml_string_angle_block_markers_wrong_key() { 776 | let key = "some-other-key"; 777 | assert_ne!(key, YAML_CONFIG_KEY); 778 | for has_space in [true, false] { 779 | for marker in [">", ">-", ">+"] { 780 | let yaml = build_yaml(key, has_space, marker, 4, YAML_BASE_CONTENT); 781 | let extracted = get_value_for_mdslw_toml_yaml_key(&yaml); 782 | assert_eq!(extracted, String::new()); 783 | } 784 | } 785 | } 786 | 787 | #[test] 788 | fn extracting_yaml_string_empty_content() { 789 | let key = "some-other-key"; 790 | for has_space in [true, false] { 791 | for marker in ["|", "|-", "|+"] { 792 | let yaml = build_yaml(YAML_CONFIG_KEY, has_space, marker, 4, "") 793 | + build_yaml(key, has_space, marker, 4, "").as_str(); 794 | let extracted = get_value_for_mdslw_toml_yaml_key(&yaml); 795 | assert_eq!(extracted, ""); 796 | } 797 | } 798 | } 799 | 800 | #[test] 801 | fn malformed_yaml_file_does_not_break_extraction() { 802 | let yaml = build_yaml(YAML_CONFIG_KEY, false, "|", 4, "does not matter\nat all"); 803 | let malformed = yaml.replace(": |", ""); 804 | let extracted = get_value_for_mdslw_toml_yaml_key(&malformed); 805 | assert_eq!(extracted, "".to_string()); 806 | } 807 | 808 | #[test] 809 | fn config_keys_are_identical() { 810 | assert_eq!( 811 | YAML_CONFIG_KEY.to_string() + ":", 812 | YAML_CONFIG_KEY_WITH_COLON 813 | ); 814 | } 815 | } 816 | -------------------------------------------------------------------------------- /LICENCE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | 676 | --------------------------------------------------------------------------------