├── .gitignore ├── Cargo.toml ├── LICENSE ├── README.md ├── example ├── Cargo.lock ├── Cargo.toml └── src │ ├── .todo.md │ └── main.rs ├── examples ├── debug.rs ├── example.rs └── trimming.rs ├── nolang.txt └── src ├── lib.rs └── lib_old.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /Cargo.lock 3 | tests/bench/target 4 | tarpaulin-report.html 5 | *.swp 6 | /example/target 7 | examples/new_language_test.rs 8 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "synoptic" 3 | authors = ["curlpipe <11898833+curlpipe@users.noreply.github.com>"] 4 | version = "2.2.9" 5 | edition = "2021" 6 | license = "MIT" 7 | description = "A simple, low-level, syntax highlighting library with unicode support" 8 | repository = "https://github.com/curlpipe/synoptic" 9 | readme = "README.md" 10 | keywords = ["unicode", "text-processing"] 11 | categories = ["text-processing"] 12 | 13 | [dependencies] 14 | char_index = "0.1.4" 15 | if_chain = "1.0.2" 16 | nohash-hasher = "0.2.0" 17 | regex = "1.8.4" 18 | unicode-width = "0.2" 19 | 20 | [dev-dependencies] 21 | lliw = "0.2.0" 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 curlpipe 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Synoptic 2 | 3 | > Syntax highlighting for Rust applications 4 | 5 | This is a pretty lightweight (only 3 main depedencies) and simple regex-based syntax highlighter for Rust. 6 | 7 | I originally wrote this for my text editor, Ox. It needed a fast, configurable and optimised syntax highlighter that could easily integrate with existing projects. However, you can (and are encouraged) to use it for any project you have in mind. 8 | 9 | --- 10 | **Advantages:** 11 | - **Customisable** - You can highlight almost any language by adding in custom syntax highlighting rules 12 | - **Fast** - Is reasonably fast, enough so that it won't slow your projects down, even with large files and many different rules 13 | - **Simple** - You can get highlighting code pretty quickly (see example below) 14 | - **Incremental** - As this was designed for use with a text editor, it can really quickly re-highlight code upon edit commands 15 | - **Built in language rules** - Get highlighting even faster by choosing from existing syntax rules 16 | - **File Buffering** - Synoptic doesn't need the whole file to perform a correct highlighting job, thus allowing file buffering 17 | - **Escaping** - Will handle escaping if you need it (`"here is a quote: \" tada!"`) 18 | - **Interpolation** - Will handle interpolation if you need it (`"My name is {name}, nice to meet you!"`) 19 | 20 | **Disadvantages:** 21 | - **Not very well established** - There may be inconsistencies in the included pre-built language highlighting rules 22 | - **Lacks understanding** - This will not be able to provide very detailed syntax highlighting, as no parsing is performed 23 | - **Interpolation is limited** - You can't nest interpolated tokens like `"this is { "f{ "u" }n" }"` 24 | 25 | Despite its disadvantages, if you just want a simple syntax highlighter with no frills or excess baggage, synoptic might just be your crate. 26 | 27 | ## Installation 28 | Just add it to your `Cargo.toml`: 29 | ```toml 30 | [dependencies] 31 | synoptic = "2" 32 | ``` 33 | 34 | - Construct a `Highlighter` instance 35 | - Add regular expressions and keywords to the highlighter and assign each a name 36 | - Use the `run` method to generate tokens 37 | - Use the `line` method to obtain the tokens for each line 38 | 39 | ## Built-in languages 40 | 41 | You can also use some provided syntax highlighters for various popular languages using the `from_extension` function. 42 | There is highly likely to be inconsistencies in the existing rules, please do open an issue if you spot any. 43 | 44 | Currently, synoptic includes 45 | 46 | - [x] Various Higher Level Languages: Python, Ruby, Lua, Perl, Java, Visual Basic, Scala 47 | - [x] The C Family: C, C++, C# 48 | - [x] Various Lower Level Languages: Rust, Go, Assembly 49 | - [x] Web Technologies: HTML, CSS, PHP, Javascript, JSON, TypeScript 50 | - [x] Mathematical Languages: MATLAB, R, Haskell, Prolog 51 | - [x] Moblie Development: Kotlin, Swift, Dart 52 | - [x] Markup Languages: Markdown, YAML, TOML, XML, CSV 53 | - [x] Other: SQL, Bash, Nushell 54 | 55 | Open an issue if there is a language not yet supported, or if you notice any issues in the built-in syntax highlighting rules. 56 | 57 | ## Example 58 | 59 | Here's an example of a Rust syntax highlighter, using the lliw crate. 60 | 61 | ```rust 62 | use synoptic::{Highlighter, TokOpt}; 63 | use lliw::Fg; 64 | 65 | // Let's use some demonstration code 66 | pub static CODE: &str = "\ 67 | /* 68 | Multiline comments 69 | Work great 70 | */ 71 | 72 | pub fn main() -> bool { 73 | // Demonstrate syntax highlighting in Rust! 74 | println!(\"Full Unicode Support: 你好\"); 75 | // Interpolation 76 | let name = \"peter\"; 77 | println!(\"My name is {name}, nice to meet you!\"); 78 | // Bye! 79 | return true; 80 | } 81 | "; 82 | 83 | fn main() { 84 | // Setting up the highlighter 85 | // The `4` here just means tabs are shown as 4 spaces 86 | let mut h = Highlighter::new(4); 87 | 88 | // Bounded tokens are multiline tokens 89 | // Let's define multiline comments 90 | // In rust, these start with /* and end with */ 91 | // Remember to escape any regex characters (like *) 92 | // The false here is whether or not to allow escaping 93 | // When true, we ignore any end markers with a backslash in front of them 94 | // So, if it were true: `/* this is a comment \*/ this is still a comment */ this isn't` 95 | h.bounded("comment", r"/\*", r"\*/", false); 96 | 97 | // Now let's define a string 98 | // In rust, format strings can be interpolated into between {} 99 | // We first define the name of the token, the starting and ending pattern 100 | // Then the starting and ending pattern of the interpolation section 101 | // We also want strings to be escapable e.g. "here's a quote: \" this is still a string" 102 | // Hence the true 103 | h.bounded_interp("string", "\"", "\"", "\\{", "\\}", true); 104 | 105 | // Now let's define some keywords 106 | // These are single line snippets of text 107 | h.keyword("keyword", r"\b(pub|fn|bool|let|return)\b"); 108 | 109 | // Let's get numbers being highlighted 110 | h.keyword("digits", r"\b\d+\.(?:\.\d+)\b"); 111 | 112 | // ... and some remaining syntax rules 113 | h.keyword("comment", "(//.*)$"); 114 | h.keyword("boolean", r"\b(true|false)\b"); 115 | h.keyword("macros", "[a-zA-Z_]+\\!"); 116 | h.keyword("function", r"([a-z][a-zA-Z_]*)\s*\("); 117 | 118 | // Now let's run the highlighter on the example code 119 | // The run method takes a vector of strings (for each line) 120 | let code = CODE 121 | .split('\n') 122 | .map(|line| line.to_string()) 123 | .collect(); 124 | // Now we're ready to go 125 | h.run(&code); 126 | 127 | // Let's render the output 128 | for (line_number, line) in code.iter().enumerate() { 129 | // Line returns tokens for the corresponding line 130 | for token in h.line(line_number, &line) { 131 | // Tokens can either require highlighting or not require highlighting 132 | match token { 133 | // This is some text that needs to be highlighted 134 | TokOpt::Some(text, kind) => print!("{}{text}{}", colour(&kind), Fg::Reset), 135 | // This is just normal text with no highlighting 136 | TokOpt::None(text) => print!("{text}"), 137 | } 138 | } 139 | // Insert a newline at the end of every line 140 | println!(); 141 | } 142 | } 143 | 144 | fn colour(name: &str) -> Fg { 145 | // This function will take in the function name 146 | // And it will output the correct foreground colour 147 | match name { 148 | "comment" => Fg::LightBlack, 149 | "digit" => Fg::Purple, 150 | "string" => Fg::Green, 151 | "macros" => Fg::LightPurple, 152 | "boolean" => Fg::Blue, 153 | "keyword" => Fg::Yellow, 154 | "function" => Fg::Red, 155 | _ => panic!("unknown token name"), 156 | } 157 | } 158 | 159 | ``` 160 | 161 | That will render a result similar to this (depending on your terminal's colour scheme): 162 | 163 | ![](https://i.postimg.cc/0QJTsMbf/image.png) 164 | 165 | ## License 166 | `MIT` license to ensure that you can use it in your project 167 | 168 | you can check the `LICENSE` file for more info 169 | 170 | 171 | -------------------------------------------------------------------------------- /example/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "aho-corasick" 7 | version = "1.0.2" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41" 10 | dependencies = [ 11 | "memchr", 12 | ] 13 | 14 | [[package]] 15 | name = "char_index" 16 | version = "0.1.4" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "10ef8669476802b7127a0a97612a0c34113e949ee65c695e4ac259f1f49aaa25" 19 | 20 | [[package]] 21 | name = "example" 22 | version = "0.1.0" 23 | dependencies = [ 24 | "lliw", 25 | "synoptic", 26 | ] 27 | 28 | [[package]] 29 | name = "if_chain" 30 | version = "1.0.2" 31 | source = "registry+https://github.com/rust-lang/crates.io-index" 32 | checksum = "cb56e1aa765b4b4f3aadfab769793b7087bb03a4ea4920644a6d238e2df5b9ed" 33 | 34 | [[package]] 35 | name = "lliw" 36 | version = "0.2.0" 37 | source = "registry+https://github.com/rust-lang/crates.io-index" 38 | checksum = "2d502c8bcc35a4f7ca9a7ffb7ac27b15ba30b1b92c2d69a1e4437e2635d73af7" 39 | 40 | [[package]] 41 | name = "memchr" 42 | version = "2.5.0" 43 | source = "registry+https://github.com/rust-lang/crates.io-index" 44 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" 45 | 46 | [[package]] 47 | name = "regex" 48 | version = "1.9.1" 49 | source = "registry+https://github.com/rust-lang/crates.io-index" 50 | checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575" 51 | dependencies = [ 52 | "aho-corasick", 53 | "memchr", 54 | "regex-automata", 55 | "regex-syntax", 56 | ] 57 | 58 | [[package]] 59 | name = "regex-automata" 60 | version = "0.3.3" 61 | source = "registry+https://github.com/rust-lang/crates.io-index" 62 | checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310" 63 | dependencies = [ 64 | "aho-corasick", 65 | "memchr", 66 | "regex-syntax", 67 | ] 68 | 69 | [[package]] 70 | name = "regex-syntax" 71 | version = "0.7.4" 72 | source = "registry+https://github.com/rust-lang/crates.io-index" 73 | checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" 74 | 75 | [[package]] 76 | name = "synoptic" 77 | version = "2.2.7" 78 | dependencies = [ 79 | "char_index", 80 | "if_chain", 81 | "regex", 82 | "unicode-width", 83 | ] 84 | 85 | [[package]] 86 | name = "unicode-width" 87 | version = "0.2.0" 88 | source = "registry+https://github.com/rust-lang/crates.io-index" 89 | checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" 90 | -------------------------------------------------------------------------------- /example/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "example" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [profile.release] 7 | codegen-units = 1 8 | lto = "fat" 9 | 10 | [dependencies] 11 | lliw = "0.2.0" 12 | synoptic = { path = "../" } 13 | -------------------------------------------------------------------------------- /example/src/.todo.md: -------------------------------------------------------------------------------- 1 | - [x] Hanging tokens 2 | - [x] Split by line 3 | - [x] Coloured output 4 | - [x] Implement into cactus for interactive testing 5 | - [x] Work with line buffering [EASY] 6 | - [x] Single Line Regex tokens [HARD] 7 | - [x] Unicode support [MEDIUM] 8 | - [x] Allow for insertion 9 | - [x] Allow for deletion 10 | - [x] Implement line trimming [MEDIUM] 11 | - [x] Cleaner API & proper documentation [MEDIUM] 12 | - [x] Optimise [HARD] 13 | - [x] More efficient retokenization on append 14 | - [x] Only retokenize when necessary 15 | - [x] Stop empty tokens from being generated (with regex) [EASY] 16 | - [x] Allow bounded token escaping (\") 17 | - [x] Allow interpolation ("{interpolated}") 18 | - [x] Investigate and solve weird multiline bounded token bug (see multiline blocks in markdown) 19 | - [x] Default syntax highlighters 20 | - [x] JavaScript 21 | - [x] JSON 22 | - [x] Java 23 | - [x] Markdown 24 | - [x] TOML 25 | - [x] Yaml 26 | - [x] XML 27 | - [x] CSV 28 | - [x] Kotlin 29 | - [x] Swift 30 | - [x] C 31 | - [x] C++ 32 | - [x] C# 33 | - [x] R 34 | - [x] Go 35 | - [x] PHP 36 | - [x] Python 37 | - [x] HTML 38 | - [x] CSS 39 | - [x] SQL 40 | - [x] BASH 41 | - [x] Lua 42 | - [x] Rust 43 | - [x] TypeScript 44 | - [x] Ruby 45 | - [x] Dart 46 | - [x] MATLAB 47 | - [x] Assembly 48 | - [x] Perl 49 | - [x] Visual Basic 50 | - [x] Scala 51 | - [x] Prolog 52 | - [x] Haskell 53 | -------------------------------------------------------------------------------- /example/src/main.rs: -------------------------------------------------------------------------------- 1 | use lliw::Fg; 2 | use synoptic::{Highlighter, TokOpt}; 3 | use std::time::Instant; 4 | 5 | pub static CODE: &str = "\ 6 | /* 7 | Multiline comments 8 | Work great 9 | */ 10 | 11 | pub fn main() -> bool { 12 | // Demonstrate syntax highlighting in Rust! 13 | println!(\"Full Unicode Support: 你好\"); 14 | // Interpolation 15 | let name = \"peter\"; 16 | println!(\"My name is {name}, nice to meet you!\"); 17 | // Bye! 18 | return true; 19 | } 20 | "; 21 | 22 | fn main() { 23 | benchmark(); 24 | /* 25 | let mut code: Vec = CODE.split('\n').map(|x| x.to_string()).collect(); 26 | let mut h = synoptic::from_extension("rs", 4).unwrap(); 27 | h.run(&code); 28 | for (y, line) in code.iter().enumerate() { 29 | print!("{: <3} |", y); 30 | for token in h.line(y, &line) { 31 | match token { 32 | TokOpt::Some(text, kind) => print!("{}{text}{}", colour(&kind), Fg::Reset), 33 | TokOpt::None(text) => print!("{text}"), 34 | } 35 | } 36 | println!(); 37 | } 38 | */ 39 | } 40 | 41 | fn colour(kind: &str) -> Fg { 42 | match kind { 43 | "string" => Fg::Rgb(54, 161, 102), 44 | "boolean" => Fg::Rgb(54, 161, 102), 45 | "comment" => Fg::Rgb(108, 107, 90), 46 | "digit" => Fg::Rgb(157, 108, 124), 47 | "keyword" => Fg::Rgb(91, 157, 72), 48 | "attribute" => Fg::Rgb(95, 145, 130), 49 | "character" => Fg::Rgb(125, 151, 38), 50 | "namespace" => Fg::Rgb(125, 151, 38), 51 | "struct" => Fg::Rgb(125, 151, 38), 52 | "operator" => Fg::Rgb(125, 151, 38), 53 | "header" => Fg::Rgb(54, 161, 102), 54 | "reference" => Fg::Rgb(125, 151, 38), 55 | "type" => Fg::Rgb(165, 152, 13), 56 | "function" => Fg::Rgb(174, 115, 19), 57 | "macro" => Fg::Rgb(157, 108, 124), 58 | "heading" => Fg::Rgb(174, 115, 19), 59 | "tag" => Fg::Rgb(174, 115, 19), 60 | "bold" => Fg::Rgb(157, 108, 124), 61 | "strikethrough" => Fg::Rgb(54, 161, 102), 62 | "italic" => Fg::Rgb(125, 151, 38), 63 | "block" => Fg::Rgb(125, 151, 38), 64 | "table" => Fg::Rgb(125, 151, 38), 65 | "type" => Fg::Rgb(165, 152, 13), 66 | "linebreak" => Fg::Rgb(54, 161, 102), 67 | "math" => Fg::Rgb(54, 161, 102), 68 | "footnote" => Fg::Rgb(108, 107, 90), 69 | "quote" => Fg::Rgb(157, 108, 124), 70 | "list" => Fg::Rgb(91, 157, 72), 71 | "image" => Fg::Rgb(125, 151, 38), 72 | "link" => Fg::Rgb(165, 152, 13), 73 | "key" => Fg::Rgb(165, 152, 13), 74 | _ => panic!("Unknown token name {kind}"), 75 | } 76 | } 77 | 78 | fn benchmark() { 79 | let start = Instant::now(); 80 | let mut h = synoptic::from_extension("rs", 4).unwrap(); 81 | let end = Instant::now(); 82 | println!("Initialisation time: {:?}", end - start); 83 | 84 | let mut file = std::fs::read_to_string("/home/luke/dev/rust/kaolinite/demos/8.rs").unwrap().split('\n').map(|x| x.to_string()).collect::>(); 85 | let viewport_file1 = file.iter().take(10).cloned().collect::>(); 86 | let viewport_file2 = file.iter().take(100).cloned().collect::>(); 87 | let viewport_file3 = file.iter().take(1000).cloned().collect::>(); 88 | 89 | let start = Instant::now(); 90 | h.run(&viewport_file1); 91 | let end = Instant::now(); 92 | println!("Run time ({}): {:?}", 10, end - start); 93 | let start = Instant::now(); 94 | h.run(&viewport_file2); 95 | let end = Instant::now(); 96 | println!("Run time ({}): {:?}", 100, end - start); 97 | let start = Instant::now(); 98 | h.run(&viewport_file3); 99 | let end = Instant::now(); 100 | println!("Run time ({}): {:?}", 1000, end - start); 101 | let start = Instant::now(); 102 | h.run(&file); 103 | let end = Instant::now(); 104 | println!("Run time ({}): {:?}", file.len(), end - start); 105 | 106 | let mut h = synoptic::from_extension("rs", 4).unwrap(); 107 | //file[9996] = "/*".to_string(); 108 | h.run(&file); 109 | 110 | for (mut y, line) in file.iter().skip(9996).take(7).enumerate() { 111 | y += 9996; 112 | print!("{: <3} |", y); 113 | for token in h.line(y, &line) { 114 | match token { 115 | TokOpt::Some(text, kind) => print!("{}{text}{}", colour(&kind), Fg::Reset), 116 | TokOpt::None(text) => print!("{text}"), 117 | } 118 | } 119 | println!(); 120 | } 121 | 122 | let start = Instant::now(); 123 | 124 | h.edit(10000, &"/* this is a test pub */ pub fn egg() return 3 + 4".to_string()); 125 | file[10000] = "/* this is a test pub */ pub fn egg() return 3 + 4".to_string(); 126 | h.edit(10004, &"We are all living in a simulation".to_string()); 127 | file[10004] = "We are all living in a simulation".to_string(); 128 | for i in 1..10000 { 129 | h.edit(i, &file[i+1]); 130 | file[i] = file[i+1].clone() 131 | } 132 | 133 | for (mut y, line) in file.iter().skip(9996).take(7).enumerate() { 134 | y += 9996; 135 | print!("{: <3} |", y); 136 | for token in h.line(y, &line) { 137 | match token { 138 | TokOpt::Some(text, kind) => print!("{}{text}{}", colour(&kind), Fg::Reset), 139 | TokOpt::None(text) => print!("{text}"), 140 | } 141 | } 142 | println!(); 143 | } 144 | 145 | let end = Instant::now(); 146 | println!("Edit time: {:?}", end - start); 147 | } 148 | -------------------------------------------------------------------------------- /examples/debug.rs: -------------------------------------------------------------------------------- 1 | use synoptic::{Highlighter, TokOpt, trim_fit}; 2 | use lliw::Fg; 3 | 4 | pub static CODE: &str = r#"f"""#; 5 | 6 | fn main() { 7 | let mut h = synoptic::from_extension("py", 4).unwrap(); 8 | let mut code: Vec = CODE.split('\n').map(|x| x.to_string()).collect(); 9 | h.run(&code); 10 | // Initial state 11 | for token in &h.line(0, &code[0]) { 12 | match token { 13 | TokOpt::Some(text, kind) => print!("{}{text}{}", colour(&kind), Fg::Reset), 14 | TokOpt::None(text) => print!("{text}"), 15 | } 16 | } 17 | println!(); 18 | // Try changing it 19 | code[0] = r#"f"{}""#.to_string(); 20 | h.edit(0, &code[0]); 21 | // Observe incorrect new state 22 | for token in &h.line(0, &code[0]) { 23 | match token { 24 | TokOpt::Some(text, kind) => print!("{}{text}{}", colour(&kind), Fg::Reset), 25 | TokOpt::None(text) => print!("{text}"), 26 | } 27 | } 28 | println!(); 29 | } 30 | 31 | fn colour(name: &str) -> Fg { 32 | // This function will take in the function name 33 | // And it will output the correct foreground colour 34 | match name { 35 | "comment" => Fg::LightBlack, 36 | "digit" => Fg::Purple, 37 | "string" => Fg::Green, 38 | "macros" => Fg::LightPurple, 39 | "boolean" => Fg::Blue, 40 | "keyword" => Fg::Yellow, 41 | "function" => Fg::Red, 42 | "operator" => Fg::LightBlack, 43 | "link" => Fg::LightBlue, 44 | "list" => Fg::Green, 45 | "insertion" => Fg::Green, 46 | "deletion" => Fg::Red, 47 | "reference" => Fg::Purple, 48 | _ => panic!("unknown token {name}"), 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /examples/example.rs: -------------------------------------------------------------------------------- 1 | use synoptic::{Highlighter, TokOpt}; 2 | use lliw::Fg; 3 | 4 | // Let's use some demonstration code 5 | pub static CODE: &str = "\ 6 | /* 7 | Multiline comments 8 | Work great 9 | */ 10 | 11 | pub fn main() -> bool { 12 | // Demonstrate syntax highlighting in Rust! 13 | println!(\"Full Unicode Support: 你好\"); 14 | // Interpolation 15 | let name = \"peter\"; 16 | println!(\"My name is {name}, nice to meet you!\"); 17 | // Bye! 18 | return true; 19 | } 20 | "; 21 | 22 | fn main() { 23 | // Setting up the highlighter 24 | // The `4` here just means tabs are shown as 4 spaces 25 | let mut h = Highlighter::new(4); 26 | 27 | // Bounded tokens are multiline tokens 28 | // Let's define multiline comments 29 | // In rust, these start with /* and end with */ 30 | // Remember to escape any regex characters (like *) 31 | // The false here is whether or not to allow escaping 32 | // When true, we ignore any end markers with a backslash in front of them 33 | // So, if it were true: `/* this is a comment \*/ this is still a comment */ this isn't` 34 | h.bounded("comment", r"/\*", r"\*/", false); 35 | 36 | // Now let's define a string 37 | // In rust, format strings can be interpolated into between {} 38 | // We first define the name of the token, the starting and ending pattern 39 | // Then the starting and ending pattern of the interpolation section 40 | // We also want strings to be escapable e.g. "here's a quote: \" this is still a string" 41 | // Hence the true 42 | h.bounded_interp("string", "\"", "\"", "\\{", "\\}", true); 43 | 44 | // Now let's define some keywords 45 | // These are single line snippets of text 46 | h.keyword("keyword", r"\b(pub|fn|bool|let|return)\b"); 47 | 48 | // Let's get numbers being highlighted 49 | h.keyword("digits", r"\b\d+\.(?:\.\d+)\b"); 50 | 51 | // ... and some remaining syntax rules 52 | h.keyword("comment", "(//.*)$"); 53 | h.keyword("boolean", r"\b(true|false)\b"); 54 | h.keyword("macros", "[a-zA-Z_]+\\!"); 55 | h.keyword("function", r"([a-z][a-zA-Z_]*)\s*\("); 56 | 57 | // Now let's run the highlighter on the example code 58 | // The run method takes a vector of strings (for each line) 59 | let code = CODE 60 | .split('\n') 61 | .map(|line| line.to_string()) 62 | .collect(); 63 | // Now we're ready to go 64 | h.run(&code); 65 | 66 | // Let's render the output 67 | for (line_number, line) in code.iter().enumerate() { 68 | // Line returns tokens for the corresponding line 69 | for token in h.line(line_number, &line) { 70 | // Tokens can either require highlighting or not require highlighting 71 | match token { 72 | // This is some text that needs to be highlighted 73 | TokOpt::Some(text, kind) => print!("{}{text}{}", colour(&kind), Fg::Reset), 74 | // This is just normal text with no highlighting 75 | TokOpt::None(text) => print!("{text}"), 76 | } 77 | } 78 | // Insert a newline at the end of every line 79 | println!(); 80 | } 81 | } 82 | 83 | fn colour(name: &str) -> Fg { 84 | // This function will take in the function name 85 | // And it will output the correct foreground colour 86 | match name { 87 | "comment" => Fg::LightBlack, 88 | "digit" => Fg::Purple, 89 | "string" => Fg::Green, 90 | "macros" => Fg::LightPurple, 91 | "boolean" => Fg::Blue, 92 | "keyword" => Fg::Yellow, 93 | "function" => Fg::Red, 94 | _ => panic!("unknown token name"), 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /examples/trimming.rs: -------------------------------------------------------------------------------- 1 | use synoptic::{Highlighter, TokOpt, trim}; 2 | use lliw::Fg; 3 | 4 | pub static CODE: &str = r#" 5 | arst的st了st在st为sts 6 | art的st了st在st为sts 7 | hello world! 8 | "#; 9 | 10 | fn main() { 11 | let mut h = synoptic::from_extension("diff", 4).unwrap(); 12 | let mut code: Vec = CODE.split('\n').map(|x| x.to_string()).collect(); 13 | h.run(&code); 14 | // Trim and render 15 | for length in 0..30 { 16 | for (line_no, line) in code.iter().enumerate() { 17 | let tokens = h.line(line_no, &line); 18 | let tokens = trim(&tokens, 0, length, 4); 19 | for token in &tokens { 20 | // Tokens can either require highlighting or not require highlighting 21 | match token { 22 | // This is some text that needs to be highlighted 23 | TokOpt::Some(text, kind) => print!("{}{text}{}", colour(&kind), Fg::Reset), 24 | // This is just normal text with no highlighting 25 | TokOpt::None(text) => print!("{text}"), 26 | } 27 | } 28 | println!("|"); 29 | } 30 | } 31 | } 32 | 33 | fn colour(name: &str) -> Fg { 34 | // This function will take in the function name 35 | // And it will output the correct foreground colour 36 | match name { 37 | "comment" => Fg::LightBlack, 38 | "digit" => Fg::Purple, 39 | "string" => Fg::Green, 40 | "macros" => Fg::LightPurple, 41 | "boolean" => Fg::Blue, 42 | "keyword" => Fg::Yellow, 43 | "function" => Fg::Red, 44 | "operator" => Fg::LightBlack, 45 | "link" => Fg::LightBlue, 46 | "list" => Fg::Green, 47 | "insertion" => Fg::Green, 48 | "deletion" => Fg::Red, 49 | "reference" => Fg::Purple, 50 | _ => panic!("unknown token {name}"), 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /nolang.txt: -------------------------------------------------------------------------------- 1 | Total: 36 languages 2 | Languages known to not be supported yet: 14 3 | 4 | Zsh 5 | Vala 6 | Julia 7 | Ini 8 | Haml 9 | Fortran 10 | Erlang 11 | Dockerfile 12 | D 13 | Crystal 14 | Clojure 15 | Cobol 16 | Batch file 17 | Ada 18 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | use unicode_width::UnicodeWidthStr; 2 | pub use regex::Regex; 3 | use std::collections::HashMap; 4 | use std::ops::Range; 5 | use std::cmp::Ordering; 6 | use char_index::IndexedChars; 7 | use nohash_hasher::NoHashHasher; 8 | use std::hash::BuildHasherDefault; 9 | use std::sync::OnceLock; 10 | 11 | /// Represents a point in a 2d space 12 | #[derive(Debug, Clone, PartialEq)] 13 | pub struct Loc { 14 | y: usize, 15 | x: usize, 16 | } 17 | 18 | /// A definition of an Atom 19 | /// See [Atom] for more information 20 | #[derive(Debug, Clone)] 21 | pub struct AtomDef { 22 | /// Name of the atom 23 | name: String, 24 | /// The kind of atom 25 | kind: AtomKind, 26 | /// The corresponding bounded token definition 27 | tok: Option, 28 | /// The regex expression that defines this atom 29 | exp: Regex, 30 | } 31 | 32 | /// The kind of atom being represented 33 | #[derive(Debug, Clone, PartialEq)] 34 | pub enum AtomKind { 35 | /// This is the start atom of a token, for example /* for a multiline comment 36 | Start, 37 | /// This is the end atom of a token, for example */ for a multiline comment 38 | End, 39 | /// Sometimes bounded tokens have the same start and end atom, e.g. a string having a " to 40 | /// start and an " to end, a hybrid token allows atoms to be used to start and end a token in 41 | /// cases where due to having the same start and end atom definitions, their kind is ambiguous 42 | Hybrid, 43 | /// This is just a normal keyword 44 | Keyword, 45 | /// This is a start marker for interpolation 46 | InterpolateStart, 47 | /// This is an end marker for interpolation 48 | InterpolateEnd, 49 | } 50 | 51 | /// An atom is a portion of text within a document that is significant. 52 | /// An atom only covers one line. 53 | /// Atoms cover keywords as well as start and end indicators for bounded tokens 54 | /// E.g., in a string, the atoms would be the starting " and the ending " 55 | #[derive(Debug, Clone, PartialEq)] 56 | pub struct Atom { 57 | /// Name of the atom 58 | name: String, 59 | /// The kind of atom 60 | kind: AtomKind, 61 | /// The corresponding token 62 | tok: Option, 63 | /// The range covered by the atom 64 | x: Range, 65 | /// Whether or not there is a preceding backslash 66 | backslashed: bool, 67 | } 68 | 69 | /// Definition for a bounded token, these are tokens that can cover multiple lines. 70 | /// Things like multiline comments and strings are examples of this. 71 | /// They work well for buffering files where you are unaware of where the end indicator may be as 72 | /// it occurs further down in the file. 73 | #[derive(Debug, Clone)] 74 | pub struct BoundedDef { 75 | /// Whether or not this token can be escaped 76 | escapable: bool, 77 | } 78 | 79 | /// This is a TokenRef, which contains detailed information on what a token is 80 | #[derive(Debug, Clone, PartialEq)] 81 | pub enum TokenRef { 82 | /// Keyword tokens 83 | Keyword { 84 | /// The name of the bounded token 85 | name: String, 86 | /// A reference to the keyword atom 87 | atom: Loc, 88 | }, 89 | /// Bounded tokens 90 | Bounded { 91 | /// The name of the bounded token 92 | name: String, 93 | /// A reference to the start atom 94 | start: Loc, 95 | /// A reference to the end atom 96 | end: Option, 97 | }, 98 | } 99 | 100 | /// This is an enum for representing tokens. 101 | #[derive(Debug, Clone)] 102 | pub enum TokOpt { 103 | /// The Some variant represents a token being present in the format Some(TEXT, NAME). 104 | /// 105 | /// So for a comment token, you can expect to see Some("/* comment */", "comment") 106 | /// provided that you defined the comment using either the keyword or bounded function on 107 | /// [Highlighter] 108 | Some(String, String), 109 | /// The None variant represents just plain text. 110 | None(String), 111 | } 112 | 113 | impl TokOpt { 114 | /// Works out if this token is empty, and thus redundant 115 | pub fn is_empty(&self) -> bool { 116 | let (TokOpt::Some(text, _) | TokOpt::None(text)) = self; 117 | text.len() == 0 118 | } 119 | 120 | /// Finds the text of a tokopt 121 | pub fn text(&self) -> &String { 122 | let (TokOpt::Some(text, _) | TokOpt::None(text)) = self; 123 | text 124 | } 125 | 126 | /// Finds the text of a tokopt (mutable) 127 | pub fn text_mut(&mut self) -> &mut String { 128 | let (TokOpt::Some(ref mut text, _) | TokOpt::None(ref mut text)) = self; 129 | text 130 | } 131 | 132 | /// This will remove the first character from the end of this token 133 | pub fn nibble_front(&mut self, tab_width: usize) -> Option { 134 | let (TokOpt::Some(ref mut text, _) | TokOpt::None(ref mut text)) = self; 135 | let ch = text.chars().nth(0)?; 136 | text.remove(0); 137 | let wid = width(&ch.to_string(), tab_width); 138 | if wid > 1 { 139 | *text = format!("{}{text}", " ".repeat(wid.saturating_sub(1))); 140 | } 141 | Some(ch) 142 | } 143 | 144 | /// This will remove the last character from the end of this token 145 | pub fn nibble_back(&mut self, tab_width: usize) -> Option { 146 | let (TokOpt::Some(ref mut text, _) | TokOpt::None(ref mut text)) = self; 147 | let ch = text.chars().last()?; 148 | text.pop(); 149 | let wid = width(&ch.to_string(), tab_width); 150 | if wid > 1 { 151 | *text = format!("{text}{}", " ".repeat(wid.saturating_sub(1))); 152 | } 153 | Some(ch) 154 | } 155 | 156 | pub fn skip(&mut self, idx: usize, tab_width: usize) { 157 | let mut at_disp = 0; 158 | let mut at_char = 0; 159 | let mut padding = 0; 160 | for i in self.text().chars() { 161 | match at_disp.cmp(&idx) { 162 | // Exactly at index, skip up to this point 163 | Ordering::Equal => break, 164 | // We skipped too much, indicating that padding is needed 165 | Ordering::Greater => { 166 | padding = at_disp - idx; 167 | break; 168 | } 169 | _ => { 170 | at_disp += width(&i.to_string(), tab_width); 171 | at_char += 1; 172 | } 173 | } 174 | } 175 | *self.text_mut() = " ".repeat(padding) + &self.text().chars().skip(at_char).collect::(); 176 | } 177 | 178 | pub fn take(&mut self, idx: usize, tab_width: usize) { 179 | let mut at_disp = 0; 180 | let mut at_char = 0; 181 | let mut padding = 0; 182 | for i in self.text().chars() { 183 | match at_disp.cmp(&idx) { 184 | // Exactly at index, take up to this point 185 | Ordering::Equal => break, 186 | // We took too much, indicating that padding is needed 187 | Ordering::Greater => { 188 | padding = at_disp - idx; 189 | at_char -= 1; 190 | break; 191 | } 192 | _ => { 193 | at_disp += width(&i.to_string(), tab_width); 194 | at_char += 1; 195 | } 196 | } 197 | } 198 | *self.text_mut() = self.text().chars().take(at_char).collect::() + &" ".repeat(padding); 199 | } 200 | } 201 | 202 | /// This is the main struct that will highlight your document 203 | #[derive(Debug, Clone)] 204 | pub struct Highlighter { 205 | /// The list of atoms, encapsulated within an inner vector for atoms on the same line 206 | pub atoms: Vec>, 207 | /// The list of atom definitions to be used at atomization 208 | pub atom_def: Vec, 209 | /// The list of bounded definitions to be used at tokenization 210 | pub bounded_def: Vec, 211 | /// A reference to what tokens lie on which line numbers 212 | pub line_ref: Vec>, 213 | /// A list of the resulting tokens generated from run and append 214 | pub tokens: Vec, 215 | /// How many spaces a tab character should be 216 | pub tab_width: usize, 217 | /// For purposes of tokenization 218 | tokenize_state: Option, 219 | tokenize_interp: bool, 220 | } 221 | 222 | impl Highlighter { 223 | /// Creates a new highlighter 224 | pub fn new(tab_width: usize) -> Self { 225 | Self { 226 | atoms: vec![], 227 | atom_def: vec![], 228 | bounded_def: vec![], 229 | line_ref: vec![], 230 | tokens: vec![], 231 | tab_width, 232 | tokenize_state: None, 233 | tokenize_interp: false, 234 | } 235 | } 236 | 237 | /// Register a new keyword token, provide its name and regex 238 | pub fn keyword>(&mut self, name: S, exp: &str) { 239 | let name = name.into(); 240 | let exp = Regex::new(exp).expect("Invalid regex!"); 241 | self.atom_def.push(AtomDef { name, exp, kind: AtomKind::Keyword, tok: None }); 242 | } 243 | 244 | /// Register a new bounded token, with a start and end, 245 | /// e.g. a multiline comment having starting /* and an ending */ to delimit it 246 | /// The last argument is a boolean 247 | /// when true, tokens can be escaped with a backslash e.g. "\"" would be a string of a quote 248 | pub fn bounded>(&mut self, name: S, start: S, end: S, escapable: bool) { 249 | let (name, start, end) = (name.into(), start.into(), end.into()); 250 | // Gather atom information 251 | let start_exp = Regex::new(&start).expect("Invalid start regex"); 252 | let end_exp = Regex::new(&end).expect("Invalid end regex"); 253 | let hybrid = start == end; 254 | // Register bounded definition 255 | let idx = self.bounded_def.len(); 256 | self.bounded_def.push(BoundedDef { 257 | escapable, 258 | }); 259 | // Register atom definitions 260 | if hybrid { 261 | self.atom_def.push(AtomDef { 262 | name, 263 | exp: start_exp, 264 | kind: AtomKind::Hybrid, 265 | tok: Some(idx), 266 | }); 267 | } else { 268 | self.atom_def.push(AtomDef { 269 | name: name.clone(), 270 | exp: start_exp, 271 | kind: AtomKind::Start, 272 | tok: Some(idx), 273 | }); 274 | self.atom_def.push(AtomDef { 275 | name, 276 | exp: end_exp, 277 | kind: AtomKind::End, 278 | tok: Some(idx), 279 | }); 280 | } 281 | } 282 | 283 | /// Register a new interpolatable bounded token, with a start and end, 284 | /// e.g. a string as a bounded token, but allowing substitution between {} 285 | /// The last argument is a boolean 286 | /// when true, tokens can be escaped with a backslash e.g. "\"" would be a string of a quote 287 | pub fn bounded_interp>(&mut self, name: S, start: S, end: S, i_start: S, i_end: S, escapable: bool) { 288 | let (name, start, end, i_start, i_end) = (name.into(), start.into(), end.into(), i_start.into(), i_end.into()); 289 | if i_start == i_end { panic!("start and end markers for interpolation must not be equal!"); } 290 | // Gather atom information 291 | let start_exp = Regex::new(&start).expect("Invalid start regex"); 292 | let end_exp = Regex::new(&end).expect("Invalid end regex"); 293 | let hybrid = start == end; 294 | let i_start_exp = Regex::new(&i_start).expect("Invalid interpolation start regex"); 295 | let i_end_exp = Regex::new(&i_end).expect("Invalid interpolation end regex"); 296 | // Register bounded definition 297 | let idx = self.bounded_def.len(); 298 | self.bounded_def.push(BoundedDef { 299 | escapable, 300 | }); 301 | // Register atom definitions 302 | if hybrid { 303 | self.atom_def.push(AtomDef { 304 | name: name.clone(), 305 | exp: start_exp, 306 | kind: AtomKind::Hybrid, 307 | tok: Some(idx), 308 | }); 309 | } else { 310 | self.atom_def.push(AtomDef { 311 | name: name.clone(), 312 | exp: start_exp, 313 | kind: AtomKind::Start, 314 | tok: Some(idx), 315 | }); 316 | self.atom_def.push(AtomDef { 317 | name: name.clone(), 318 | exp: end_exp, 319 | kind: AtomKind::End, 320 | tok: Some(idx), 321 | }); 322 | } 323 | self.atom_def.push(AtomDef { 324 | name: name.clone(), 325 | exp: i_start_exp, 326 | kind: AtomKind::InterpolateStart, 327 | tok: Some(idx), 328 | }); 329 | self.atom_def.push(AtomDef { 330 | name: name.clone(), 331 | exp: i_end_exp, 332 | kind: AtomKind::InterpolateEnd, 333 | tok: Some(idx), 334 | }); 335 | } 336 | 337 | /// Do an initial pass on a vector of lines. 338 | /// 339 | /// Note that this will overwrite any existing information, 340 | /// use append to add extra lines to the document. 341 | pub fn run(&mut self, lines: &[String]) { 342 | // Atomize every line 343 | self.atoms = lines.iter().map(|l| self.atomize(l)).collect(); 344 | self.tokenize(); 345 | } 346 | 347 | /// Appends a line to the highlighter. 348 | pub fn append(&mut self, line: &str) { 349 | // Atomize this line 350 | self.atoms.push(self.atomize(line)); 351 | self.line_ref.push(vec![]); 352 | self.tokenize_line(self.atoms.len().saturating_sub(1)); 353 | } 354 | 355 | /// Once you have called the run or append methods, you can use this function 356 | /// to retrieve individual lines by providing the original line text and the y index. 357 | /// 358 | /// # Example 359 | /// ``` 360 | /// let highlighter = Highlighter::new(4); // Tab ('\t') has a display width of 4 361 | /// highlighter.keyword("kw", "keyword"); // All occurances of "keyword" will be classed as a token of "kw" 362 | /// highlighter.run(vec![ 363 | /// "this is a keyword".to_string(), 364 | /// "second line!".to_string() 365 | /// ]); 366 | /// // Get the TokOpt for the first line 367 | /// highlighter.line(0, &"this is a keyword".to_string()) 368 | /// // Get the TokOpt for the second line 369 | /// highlighter.line(1, &"second line!".to_string()) 370 | /// ``` 371 | pub fn line(&self, y: usize, line: &str) -> Vec { 372 | let line = line.replace("\t", &" ".repeat(self.tab_width)); 373 | let len = line.chars().count(); 374 | let mut result = vec![]; 375 | let mut registry: HashMap = HashMap::default(); 376 | // Create token registry for this line 377 | for token in self.line_ref[y].iter().map(|t| &self.tokens[*t]) { 378 | match token { 379 | // Register bounded token 380 | TokenRef::Bounded { start, end, .. } => { 381 | let start = if start.y != y { 0 } else { self.atoms[start.y][start.x].x.start }; 382 | let end = end.clone() 383 | .map(|end| if end.y != y { len } else { self.atoms[end.y][end.x].x.end }) 384 | .unwrap_or(len); 385 | registry.insert(start, (end, token)); 386 | } 387 | // Register keyword token 388 | TokenRef::Keyword { atom, .. } => { 389 | //println!("{:?}", self.atoms); 390 | let start = self.atoms[atom.y][atom.x].x.start; 391 | let end = self.atoms[atom.y][atom.x].x.end; 392 | registry.insert(start, (end, token)); 393 | } 394 | } 395 | } 396 | // Process tokens into TokOpt format 397 | let mut chars = line.chars(); 398 | let mut x = 0; 399 | while x < len { 400 | if let Some((end, TokenRef::Bounded { name, .. } | TokenRef::Keyword { name, .. })) = registry.get(&x) { 401 | // Process token 402 | let text = chars.by_ref().take(end - x).collect::(); 403 | result.push(TokOpt::Some(text, name.clone())); 404 | x = *end; 405 | } else { 406 | // Process plain text 407 | if let Some(TokOpt::None(ref mut s)) = result.last_mut() { 408 | s.push(chars.next().unwrap()); 409 | } else { 410 | result.push(TokOpt::None(chars.next().unwrap().to_string())); 411 | } 412 | x += 1; 413 | } 414 | } 415 | result 416 | } 417 | 418 | /// Whenever a character is deleted or inserted on a line, 419 | /// call this function to update any tokens. 420 | pub fn edit(&mut self, y: usize, line: &str) { 421 | let old_atoms = self.atoms[y].clone(); 422 | // Update the atoms on this line 423 | self.atoms[y] = self.atomize(line); 424 | // Determine whether tokenisation is necessary by checking atomic changes 425 | if self.retokenization_needed(&old_atoms, &self.atoms[y]) { 426 | self.tokenize(); 427 | } 428 | } 429 | 430 | /// Takes two lists of atoms and determines if retokenization is required in the first place 431 | /// This method will ignore index (as this is expected to change when editing) 432 | /// Has been shown to make editing events 500x faster to apply (where no atoms are modified) 433 | fn retokenization_needed(&self, old: &[Atom], new: &Vec) -> bool { 434 | // List lengths differ => atoms have been added or deleted 435 | if old.len() != new.len() { return true; } 436 | for (o, n) in old.iter().zip(new) { 437 | // If there is ever ANY discrepancy between atoms, we must retokenize 438 | if !(o.name == n.name && o.kind == n.kind && o.tok == n.tok && o.backslashed == n.backslashed) { 439 | return true; 440 | } 441 | } 442 | false 443 | } 444 | 445 | /// Whenever a line is inserted into the document, 446 | /// call this function to update any tokens. 447 | pub fn insert_line(&mut self, y: usize, line: &str) { 448 | self.atoms.insert(y, self.atomize(line)); 449 | self.tokenize(); 450 | } 451 | 452 | /// Whenever a line is removed from a document, 453 | /// call this function to update any tokens. 454 | pub fn remove_line(&mut self, y: usize) { 455 | self.atoms.remove(y); 456 | self.tokenize(); 457 | } 458 | 459 | /// This process will turn a line into a vector of atoms 460 | fn atomize(&self, line: &str) -> Vec { 461 | let line = IndexedChars::new(line); 462 | let mut atoms = vec![]; 463 | // For each atom definition 464 | for def in &self.atom_def { 465 | let occurances = find_all(&def.exp, line.as_str(), self.tab_width); 466 | // Register all occurances of any atom 467 | for x in occurances { 468 | if !x.is_empty() { 469 | // Work out how many backslashes there are behind this atom (for escaping) 470 | let mut backslash_count = 0; 471 | let range = (0..x.start).rev(); 472 | for idx in range { 473 | if let Some('\\') = line.get_char(idx) { 474 | backslash_count += 1; 475 | } else { 476 | break; 477 | } 478 | } 479 | // Push out the atom 480 | atoms.push(Atom { 481 | kind: def.kind.clone(), 482 | name: def.name.clone(), 483 | tok: def.tok, 484 | // An odd number of backslashes = escaped 485 | backslashed: backslash_count % 2 != 0, 486 | x, 487 | }); 488 | } 489 | } 490 | } 491 | // Order them based on start index 492 | atoms.sort_by(|a, b| a.x.start.cmp(&b.x.start)); 493 | atoms 494 | } 495 | 496 | fn tokenize(&mut self) { 497 | self.tokenize_state = None; 498 | self.tokenize_interp = false; 499 | self.line_ref = vec![]; 500 | self.atoms.iter().enumerate().for_each(|_| self.line_ref.push(vec![])); 501 | self.tokens = vec![]; 502 | for y in 0..self.atoms.len() { 503 | self.tokenize_line(y); 504 | } 505 | } 506 | 507 | fn tokenize_line(&mut self, y: usize) { 508 | let line_ref = self.line_ref.get_mut(y).unwrap(); 509 | let mut at_x = 0; 510 | let atoms = &self.atoms[y]; 511 | for (x, atom) in atoms.iter().enumerate() { 512 | if atom.x.start < at_x { continue; } 513 | // Work out if this atom is to be ignored (due to escaping) 514 | if let Atom { tok: Some(t), backslashed, .. } = atom { 515 | if self.bounded_def[*t].escapable && *backslashed { 516 | continue; 517 | } 518 | } 519 | // Continue tokenising... 520 | match atom { 521 | Atom { name, kind: AtomKind::Keyword, .. } => { 522 | if self.tokenize_state.is_none() || self.tokenize_interp { 523 | self.tokens.push(TokenRef::Keyword { 524 | name: name.clone(), 525 | atom: Loc { y, x }, 526 | }); 527 | line_ref.push(self.tokens.len().saturating_sub(1)); 528 | at_x = atom.x.end; 529 | } 530 | } 531 | Atom { name, kind: AtomKind::Start, tok, .. } => { 532 | if self.tokenize_interp { continue; } 533 | if self.tokenize_state.is_none() { 534 | self.tokenize_state = *tok; 535 | self.tokens.push(TokenRef::Bounded { 536 | name: name.clone(), 537 | start: Loc { y, x }, 538 | end: None, 539 | }); 540 | at_x = atom.x.end; 541 | } 542 | } 543 | Atom { kind: AtomKind::End, tok, .. } => { 544 | if self.tokenize_interp { continue; } 545 | if self.tokenize_state == *tok { 546 | self.tokenize_state = None; 547 | if let TokenRef::Bounded { ref mut end, .. } = self.tokens.last_mut().unwrap() { 548 | *end = Some(Loc { y, x }); 549 | at_x = atom.x.end; 550 | } 551 | line_ref.push(self.tokens.len().saturating_sub(1)); 552 | } 553 | } 554 | Atom { name, kind: AtomKind::Hybrid, tok, .. } => { 555 | if self.tokenize_interp { continue; } 556 | if self.tokenize_state.is_none() { 557 | // Start registering token 558 | self.tokenize_state = *tok; 559 | self.tokens.push(TokenRef::Bounded { 560 | name: name.clone(), 561 | start: Loc { y, x }, 562 | end: None, 563 | }); 564 | at_x = atom.x.end; 565 | } else if self.tokenize_state == *tok { 566 | // Stop registering token 567 | self.tokenize_state = None; 568 | if let TokenRef::Bounded { ref mut end, .. } = self.tokens.last_mut().unwrap() { 569 | *end = Some(Loc { y, x }); 570 | at_x = atom.x.end; 571 | } 572 | line_ref.push(self.tokens.len().saturating_sub(1)); 573 | } 574 | } 575 | Atom { kind: AtomKind::InterpolateStart, tok, .. } => { 576 | if self.tokenize_state == *tok { 577 | // End the current token 578 | if let TokenRef::Bounded { ref mut end, .. } = self.tokens.last_mut().unwrap() { 579 | *end = Some(Loc { y, x }); 580 | at_x = atom.x.end; 581 | } 582 | line_ref.push(self.tokens.len().saturating_sub(1)); 583 | // Register interpolation 584 | self.tokenize_interp = true; 585 | } 586 | } 587 | Atom { name, kind: AtomKind::InterpolateEnd, tok, .. } => { 588 | if self.tokenize_state == *tok { 589 | // Stop interpolating 590 | self.tokenize_interp = false; 591 | // Resume capturing the outer token 592 | self.tokens.push(TokenRef::Bounded { 593 | name: name.clone(), 594 | start: Loc { y, x }, 595 | end: None, 596 | }); 597 | at_x = atom.x.end; 598 | } 599 | } 600 | } 601 | if self.tokenize_state.is_some() { 602 | line_ref.push(self.tokens.len().saturating_sub(1)); 603 | } 604 | } 605 | if self.tokenize_state.is_some() { 606 | line_ref.push(self.tokens.len().saturating_sub(1)); 607 | } 608 | line_ref.dedup(); 609 | } 610 | } 611 | 612 | /// This will find all occurances of a string in a document (and return character indices) 613 | pub fn find_all(exp: &Regex, target: &str, tab_width: usize) -> Vec> { 614 | let mapping = create_mapping(target, tab_width); 615 | exp.captures_iter(target) 616 | // Get last capture 617 | .map(|c| c.iter().flatten().collect::>()) 618 | .map(|mut c| c.pop().unwrap()) 619 | // Extract end and start values 620 | .map(|m| mapping[&m.start()]..mapping[&m.end()]) 621 | .collect() 622 | } 623 | 624 | /// HashMap 625 | pub fn create_mapping(target: &str, tab_width: usize) -> HashMap::>> { 626 | let mut result: HashMap::>> = 627 | HashMap::with_capacity_and_hasher(target.len(), BuildHasherDefault::default()); 628 | result.insert(0, 0); 629 | let mut acc_byte = 0; 630 | let mut acc_char = 0; 631 | for c in target.chars() { 632 | acc_byte += c.len_utf8(); 633 | acc_char += if c == '\t' { tab_width } else { 1 }; 634 | result.insert(acc_byte, acc_char); 635 | } 636 | result 637 | } 638 | 639 | /// Utility function to determine the width of a string, with variable tab width 640 | #[must_use] 641 | pub fn width(st: &str, tab_width: usize) -> usize { 642 | let tabs = st.matches('\t').count(); 643 | (st.width() + tabs * tab_width).saturating_sub(tabs) 644 | } 645 | 646 | 647 | /// Trim utility function to trim down a line of tokens to offset text 648 | pub fn trim(input: &[TokOpt], start: usize) -> Vec { 649 | let mut opt: Vec = input.to_vec(); 650 | let mut total_width = 0; 651 | for i in &opt { 652 | let (TokOpt::Some(txt, _) | TokOpt::None(txt)) = i; 653 | total_width += txt.len(); 654 | } 655 | let width = total_width.saturating_sub(start); 656 | while total_width != width { 657 | if let Some(token) = opt.get_mut(0) { 658 | token.nibble_front(4); 659 | total_width -= 1; 660 | if token.is_empty() { 661 | opt.remove(0); 662 | } 663 | } else { 664 | break; 665 | } 666 | } 667 | opt 668 | } 669 | 670 | /// Trim utility function to trim down a line of tokens to offset text (with length) 671 | pub fn trim_fit(input: &[TokOpt], start: usize, length: usize, tab_width: usize) -> Vec { 672 | // Form a vector of tokens 673 | let mut opt: Vec = input.to_vec(); 674 | // (1) Find the location of the starting point 675 | let start_idx = find_tok_index(input, start, tab_width); 676 | // (2) Find the location of the ending point 677 | let end_idx = find_tok_index(input, start + length, tab_width); 678 | // Trim off start token (ahead of time) 679 | if let Some((start_tok, start_rel)) = start_idx { 680 | opt.get_mut(start_tok).unwrap().skip(start_rel, tab_width); 681 | } 682 | // Trim off end token (ahead of time) 683 | if let Some((end_tok, mut end_rel)) = end_idx { 684 | if start_idx.unwrap().0 == end_tok { 685 | // Same token for start and end! Adjust (to account for start trim) 686 | end_rel -= start_idx.unwrap().1; 687 | } 688 | opt.get_mut(end_tok).unwrap().take(end_rel, tab_width); 689 | } 690 | // Blitz all tokens firmly behind start 691 | if let Some((start_tok, _)) = start_idx { 692 | opt.drain(..start_tok); 693 | } 694 | // Blitz all tokens firmly ahead of length 695 | if let Some((mut end_tok, _)) = end_idx { 696 | if let Some((start_tok, _)) = start_idx { 697 | // Adjust end_tok after draining of start tokens 698 | end_tok -= start_tok; 699 | } 700 | if end_tok + 1 < opt.len() { 701 | opt.drain(end_tok + 1..); 702 | } 703 | } 704 | // If we can't satisfy start or end, then just return empty handed 705 | if start_idx.is_none() && end_idx.is_none() { 706 | opt = vec![]; 707 | } 708 | // Apply padding if applicable 709 | let mut total_width: usize = opt.iter().map(|tok| width(tok.text(), tab_width)).sum(); 710 | while total_width < length { 711 | if let Some(TokOpt::None(ref mut text)) = opt.last_mut() { 712 | *text += " "; 713 | total_width += 1; 714 | } else { 715 | // No tokens left, discontinue 716 | opt.push(TokOpt::None("".to_string())); 717 | } 718 | } 719 | // Return the result 720 | opt 721 | } 722 | 723 | /// Find the token index within a tokopt given a display index 724 | /// Returns (token_index, index_within_that_token) 725 | pub fn find_tok_index(input: &[TokOpt], disp_idx: usize, tab_width: usize) -> Option<(usize, usize)> { 726 | let mut total_width = 0; 727 | for (idx, token) in input.iter().enumerate() { 728 | let this_width = width(token.text(), tab_width); 729 | total_width += this_width; 730 | // Check if we've passed the display index 731 | if total_width > disp_idx { 732 | // We have, this token contains disp_idx, work out relative idx 733 | let rel_idx = this_width - (total_width - disp_idx); 734 | return Some((idx, rel_idx)); 735 | } 736 | } 737 | None 738 | } 739 | 740 | /// Function to obtain a syntax highlighter based on a file extension 741 | pub fn from_extension(ext: &str, tab_width: usize) -> Option { 742 | let mut result = match ext.to_lowercase().as_str() { 743 | "rs" => rust_syntax_highlighter().to_owned(), 744 | "asm" | "s" => asm_syntax_highlighter().to_owned(), 745 | "py" | "pyw" => python_syntax_highlighter().to_owned(), 746 | "rb" | "ruby" => ruby_syntax_highlighter().to_owned(), 747 | "cgi" | "pm" => cgi_syntax_highlighter().to_owned(), 748 | "lua" => lua_syntax_highlighter().to_owned(), 749 | "r" | "rproj" => r_syntax_highlighter().to_owned(), 750 | "go" => go_syntax_highlighter().to_owned(), 751 | "js" => js_syntax_highlighter().to_owned(), 752 | "ts" | "tsx" => ts_syntax_highlighter().to_owned(), 753 | "dart" => dart_syntax_highlighter().to_owned(), 754 | "c" | "h" => c_syntax_highlighter().to_owned(), 755 | "cpp" | "hpp" | "c++" | "cxx" | "cc" => cpp_syntax_highlighter().to_owned(), 756 | "cs" | "csproj" => cs_syntax_highlighter().to_owned(), 757 | "swift" => swift_syntax_highlighter().to_owned(), 758 | "json" => json_syntax_highlighter().to_owned(), 759 | "kt" => kotlin_syntax_highlighter().to_owned(), 760 | "class" | "java" => java_syntax_highlighter().to_owned(), 761 | "vb" => vb_syntax_highlighter().to_owned(), 762 | "m" => m_syntax_highlighter().to_owned(), 763 | "php" => php_syntax_highlighter().to_owned(), 764 | "scala" => scala_syntax_highlighter().to_owned(), 765 | "pl" | "prolog" => prolog_syntax_highlighter().to_owned(), 766 | "hs" => haskell_syntax_highlighter().to_owned(), 767 | "css" => css_syntax_highlighter().to_owned(), 768 | "html" | "htm" | "xhtml" => html_syntax_highlighter().to_owned(), 769 | "md" | "markdown" => markdown_syntax_highlighter().to_owned(), 770 | "toml" => toml_syntax_highlighter().to_owned(), 771 | "yaml" | "yml" => yaml_syntax_highlighter().to_owned(), 772 | "csv" => csv_syntax_highlighter().to_owned(), 773 | "sh" | "bash" | "bash_profile" | "bashrc" => shell_syntax_highlighter().to_owned(), 774 | "sql" | "sqlproj" => sql_syntax_highlighter().to_owned(), 775 | "xml" => xml_syntax_highlighter().to_owned(), 776 | "nu" => nushell_syntax_highlighter().to_owned(), 777 | "tex" => tex_syntax_highlighter().to_owned(), 778 | "diff" => diff_syntax_highlighter().to_owned(), 779 | _ => Highlighter::new(tab_width), 780 | }; 781 | result.tab_width = tab_width; 782 | Some(result) 783 | } 784 | 785 | fn add_html_keywords(h: &mut Highlighter, kw: &[&str]) { 786 | h.keyword("keyword", &format!(r"(?:<|>() 806 | .join("|") 807 | ), 808 | ); 809 | } 810 | 811 | fn bulk_add(h: &mut Highlighter, name: &str, kw: &[&str]) { 812 | h.keyword(name, &format!(r"({})", kw.join("|"))); 813 | } 814 | 815 | fn rust_syntax_highlighter() -> &'static Highlighter { 816 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 817 | HIGHLIGHTER.get_or_init(|| { 818 | let mut result = Highlighter::new(4); 819 | result.bounded("comment", r"/\*", r"\*/", false); 820 | result.keyword("comment", "(//.*)$"); 821 | result.bounded("string", "r#\"", "\"#", true); 822 | result.bounded("string", "r\"", "\"", true); 823 | result.bounded("string", "#\"", "\"#", true); 824 | result.bounded("string", "\"", "\"", true); 825 | result.bounded("attribute", r"\#\[", r"\]", false); 826 | result.bounded("attribute", r"\#!\[", r"\]", false); 827 | result.keyword("namespace", "([a-z_][A-Za-z0-9_]*)::"); 828 | add_keywords(&mut result, &[ 829 | "as", "break", "const", "continue", "char", "crate", "else", "enum", "extern", 830 | "fn", "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", 831 | "pub", "ref", "return", "self", "static", "struct", "super", "trait", "type", 832 | "unsafe", "use", "where", "while", "async", "await", "dyn", "abstract", "become", 833 | "box", "do", "final", "macro", "override", "priv", "typeof", "unsized", "virtual", 834 | "yield", "try", "'static", "u8", "u16", "u32", "u64", "u128", "usize", "i8", "i16", 835 | "i32", "i64", "i128", "isize", "f32", "f64", "String", "Vec", "str", "Some", 836 | "bool", "None", "Box", "Result", "Option", "Ok", "Err", "Self", "std", 837 | ]); 838 | bulk_add(&mut result, "operator", &[ 839 | "&&", r"\|\|", "=", "\\+", "\\-", "\\*", "[^/](/)[^/]", "\\+=", 840 | "\\-=", "\\*=", "\\\\=", "==", "!=", "\\?", ">=", "<=", "<", ">", "!", 841 | ]); 842 | bulk_add(&mut result, "character", &[r"'[^\\]'", "'\\\\.'"]); 843 | bulk_add(&mut result, "digit", &["\\b(\\d+.\\d+|\\d+)", "\\b(\\d+.\\d+(?:f32|f64))"]); 844 | bulk_add(&mut result, "boolean", &["\\b(true)\\b", "\\b(false)\\b"]); 845 | bulk_add(&mut result, "function", &[ 846 | "fn\\s+([a-z_][A-Za-z0-9_]*)\\s*\\(", 847 | "fn\\s+([a-z_][A-Za-z0-9_]*)\\s*<.*>\\s*\\(", 848 | "\\.([a-z_][A-Za-z0-9_]*)\\s*\\(", 849 | "([a-z_][A-Za-z0-9_]*)\\s*\\(", 850 | ]); 851 | bulk_add(&mut result, "struct", &[ 852 | "(?:trait|enum|struct|impl)\\s+([A-Z][A-Za-z0-9_]*)\\s*", 853 | "impl(?:<.*?>|)\\s+([A-Z][A-Za-z0-9_]*)", 854 | "([A-Z][A-Za-z0-9_]*)::", 855 | "([A-Z][A-Za-z0-9_]*)\\s*\\(", 856 | "impl.*for\\s+([A-Z][A-Za-z0-9_]*)", 857 | "::\\s*([a-z_][A-Za-z0-9_]*)\\s*\\(", 858 | ]); 859 | bulk_add(&mut result, "macro", &["\\b([a-z_][a-zA-Z0-9_]*!)", "(\\$[a-z_][A-Za-z0-9_]*)"]); 860 | bulk_add(&mut result, "reference", &[ 861 | "&", "&str", "&mut", "&self", "&i8", "&i16", "&i32", "&i64", "&i128", "&isize", 862 | "&u8", "&u16", "&u32", "&u64", "&u128", "&usize", "&f32", "&f64", 863 | ]); 864 | result 865 | }) 866 | } 867 | 868 | fn asm_syntax_highlighter() -> &'static Highlighter { 869 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 870 | HIGHLIGHTER.get_or_init(|| { 871 | let mut result = Highlighter::new(4); 872 | result.keyword("function", "([a-zA-Z_]+)\\:$"); 873 | result.keyword("comment", "(;.*)$"); 874 | result.keyword("digit", "\\b((?:0x)?\\d+.\\d+|\\d+)"); 875 | result.bounded("string", "\"", "\"", true); 876 | add_keywords_case_indep( 877 | &mut result, 878 | &[ 879 | "mov", "add", "sub", "jmp", "call", "ret", "bss", "data", "text", "section", 880 | "globl", "extern", "db", "eax", "ebx", "ecx", "edx", "esp", "ebp", "int", "xor", 881 | "imul", "inc", "jle", "cmp", "global", "section", "resb", 882 | ], 883 | ); 884 | result 885 | }) 886 | } 887 | 888 | fn python_syntax_highlighter() -> &'static Highlighter { 889 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 890 | HIGHLIGHTER.get_or_init(|| { 891 | let mut result = Highlighter::new(4); 892 | result.keyword("comment", "(#.*)$"); 893 | result.bounded("string", "\"\"\"", "\"\"\"", true); 894 | result.bounded("string", "\'\'\'", "\'\'\'", true); 895 | result.bounded("string", "b\"", "\"", true); 896 | result.bounded("string", "r\"", "\"", true); 897 | result.bounded_interp("string", "f\"", "\"", "\\{", "\\}", true); 898 | result.bounded("string", "\"", "\"", true); 899 | result.bounded("string", "b\'", "\'", true); 900 | result.bounded("string", "r\'", "\'", true); 901 | result.bounded_interp("string", "f\'", "\'", "\\{", "\\}", true); 902 | result.bounded("string", "\'", "\'", true); 903 | add_keywords(&mut result, &[ 904 | "and", "as", "assert", "break", "class", "continue", "def", "del", "elif", "else", "except", 905 | "exec", "finally", "for", "from", "global", "if", "import", "in", "is", "lambda", "not", 906 | "or", "pass", "print", "raise", "return", "try", "while", "with", "yield", "str", "bool", 907 | "int", "tuple", "list", "dict", "tuple", "len", "None", "input", "type", "set", "range", 908 | "enumerate", "open", "iter", "min", "max", "dir", "self", "isinstance", "help", "next", 909 | "super", "match", "case", 910 | ]); 911 | result.keyword("attribute", "@.*$"); 912 | result.keyword("digit", "\\b(\\d+.\\d+|\\d+)"); 913 | result.keyword("struct", "class\\s+([A-Za-z0-9_]+)"); 914 | bulk_add(&mut result, "operator", &[ 915 | r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"(\s//\s)", r"(%)", r"(\+=)", 916 | r"(\-=)", r"(\*=)", r"(\\=)", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", r"(>)", 917 | ]); 918 | bulk_add(&mut result, "boolean", &["\\b(True)\\b", "\\b(False)\\b"]); 919 | bulk_add(&mut result, "function", &[ 920 | "def\\s+([a-z_][A-Za-z0-9_]*)", 921 | "\\.([a-z_][A-Za-z0-9_\\?!]*)\\s*", 922 | "\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\(", 923 | ]); 924 | result 925 | }) 926 | } 927 | 928 | fn ruby_syntax_highlighter() -> &'static Highlighter { 929 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 930 | HIGHLIGHTER.get_or_init(|| { 931 | let mut result = Highlighter::new(4); 932 | result.keyword("comment", "(#.*)$"); 933 | result.bounded("comment", "=begin", "=end", false); 934 | result.bounded_interp("string", "\"", "\"", "#\\{", "\\}", true); 935 | result.bounded("string", "\'", "\'", true); 936 | result.keyword("string", r"(\:[a-zA-Z_]+)"); 937 | add_keywords(&mut result, &[ 938 | "__ENCODING__", "__LINE__", "__FILE__", "BEGIN", "END", "alias", "and", "begin", "break", 939 | "case", "class", "def", "defined?", "do", "else", "elsif", "end", "ensure", "for", "if", 940 | "in", "module", "next", "nil", "not", "or", "redo", "rescue", "retry", "return", "self", 941 | "super", "then", "undef", "unless", "until", "when", "while", "yield", "extend", "include", 942 | "attr_reader", "attr_writer", "attr_accessor", 943 | ]); 944 | result.keyword("digit", "\\b(\\d+.\\d+|\\d+)"); 945 | result.keyword("struct", "class\\s+([A-Za-z0-9_]+)"); 946 | bulk_add(&mut result, "operator", &[ 947 | "!!", "=", "\\+", "\\-", "\\*", "[^/](/)[^/]", "\\+=", "\\-=", "\\*=", "\\\\=", 948 | "==", "!=", "\\?", ">=", "<=", "<", ">", "&&", "\\|\\|", "!", "&", "\\|", "\\^", 949 | "%", 950 | ]); 951 | bulk_add(&mut result, "boolean", &["\\b(true)\\b", "\\b(false)\\b"]); 952 | bulk_add(&mut result, "function", &[ 953 | "def\\s+([a-z_][A-Za-z0-9_]*)", 954 | "^\\s*([a-z_][A-Za-z0-9_]*)\\s+[^=]", 955 | "\\.([a-z_][A-Za-z0-9_\\?!]*)\\s*", 956 | "\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\(", 957 | ]); 958 | result 959 | }) 960 | } 961 | 962 | fn cgi_syntax_highlighter() -> &'static Highlighter { 963 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 964 | HIGHLIGHTER.get_or_init(|| { 965 | let mut result = Highlighter::new(4); 966 | result.keyword("comment", "(#.*)$"); 967 | result.bounded_interp("string", "\"", "\"", "#\\{", "\\}", true); 968 | result.bounded("string", "(?:m|s)/", "/", true); 969 | result.bounded("string", "\'", "\'", true); 970 | result.keyword("string", r"(\:[a-zA-Z_]+)"); 971 | add_keywords(&mut result, &[ 972 | "if", "else", "elsif", "unless", "while", "for", "foreach", "until", "do", "next", 973 | "last", "goto", "return", "sub", "my", "local", "our", "package", "use", "require", 974 | "import", "undef", "and", "or", "not", "eq", "ne", "lt", "le", "gt", "ge", "cmp", 975 | "qw", "scalar", "array", "hash", "undef", "undef", "ref", "bless", "glob", "filehandle", 976 | "code", "regexp", "integer", "float", "string", "boolean", "reference", "die", 977 | ]); 978 | result.keyword("digit", "\\b(\\d+.\\d+|\\d+)"); 979 | result.keyword("struct", "\\b([A-Z][A-Za-z0-9_]*)"); 980 | bulk_add(&mut result, "operator", &[ 981 | "!!", "=", "\\+", "\\-", "\\*", "[^/](/)[^/]", "\\+=", "\\-=", "\\*=", "\\\\=", 982 | "==", "!=", "\\?", ">=", "<=", "<", ">", "\\$","&&", "\\|\\|", "!", "&", "\\|", 983 | "\\^", "(?:\\\\)?%", "\\\\@", 984 | ]); 985 | bulk_add(&mut result, "boolean", &["\\b(true)\\b", "\\b(false)\\b"]); 986 | bulk_add(&mut result, "function", &[ 987 | "sub\\s+([a-z_][A-Za-z0-9_]*)", 988 | "^\\s*([a-z_][A-Za-z0-9_]*)\\s+[^=]", 989 | "\\.([a-z_][A-Za-z0-9_\\?!]*)\\s*", 990 | "\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\(", 991 | ]); 992 | result 993 | }) 994 | } 995 | 996 | fn lua_syntax_highlighter() -> &'static Highlighter { 997 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 998 | HIGHLIGHTER.get_or_init(|| { 999 | let mut result = Highlighter::new(4); 1000 | result.bounded("comment", r"--\[\[", r"\]\]--", false); 1001 | result.keyword("comment", "(--.*)$"); 1002 | result.bounded("string", "\"", "\"", true); 1003 | result.bounded("string", "\'", "\'", true); 1004 | result.bounded("string", "\\[\\[", "\\]\\]", true); 1005 | result.keyword("digit", "\\b(\\d+.\\d+|\\d+)"); 1006 | result.keyword("struct", "\\b([A-Z][A-Za-z0-9_]*)\\b"); 1007 | bulk_add(&mut result, "function", &[ 1008 | "\\.([a-z_][A-Za-z0-9_\\?!]*)\\s*", 1009 | "\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\(", 1010 | ]); 1011 | bulk_add(&mut result, "operator", &[ 1012 | r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", 1013 | r"(\+=)", r"(\-=)", r"(\*=)", r"(\\=)", r"(\.\.)", r"(==)", r"(~=)", 1014 | r"(>=)", r"(<=)", r"(<)", r"(>)", r"(#)", r"(<<)", r"(>>)", r"\b(and)\b", 1015 | r"\b(or)\b", r"\b(not)\b", 1016 | ]); 1017 | add_keywords(&mut result, &[ 1018 | "break", "do", "else", "elseif", "end", "false", "for", "function", "if", "in", 1019 | "local", "nil", "repeat", "return", "then", "true", "until", "while", "self", 1020 | ]); 1021 | result 1022 | }) 1023 | } 1024 | 1025 | fn r_syntax_highlighter() -> &'static Highlighter { 1026 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1027 | HIGHLIGHTER.get_or_init(|| { 1028 | let mut result = Highlighter::new(4); 1029 | result.keyword("comment", "(#.*)$"); 1030 | result.bounded("string", "\"", "\"", true); 1031 | result.bounded("string", "\'", "\'", true); 1032 | bulk_add(&mut result, "boolean", &["\\b(FALSE)\\b", "\\b(TRUE)\\b"]); 1033 | add_keywords(&mut result, &[ 1034 | "if", "else", "repeat", "while", "function", "for", "in", "next", "break", "TRUE", 1035 | "FALSE", "NULL", "Inf", "NaN", "NA", "NA_integer_", "NA_real_", "NA_complex_", 1036 | "NA_character_", r"\.\.\.", 1037 | ]); 1038 | result.keyword("attribute", "@.*$"); 1039 | result.keyword("digit", "\\b(\\d+.\\d+|\\d+)"); 1040 | result.keyword("struct", "class\\s+([A-Za-z0-9_]+)"); 1041 | bulk_add(&mut result, "operator", &[ 1042 | r"<-", r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"(\s//\s)", r"(&)", r"(%)", 1043 | r"(\+=)", r"(\-=)", r"(\*=)", r"(\\=)", r"(\$)", r"(|)", r"(==)", r"(!=)", r"(>=)", 1044 | r"(<=)", r"(<)", r"(>)", r"(\?)", 1045 | ]); 1046 | bulk_add(&mut result, "function", &[ 1047 | "def\\s+([a-z_][A-Za-z0-9_]*)", 1048 | "\\.([a-z_][A-Za-z0-9_\\?!]*)\\s*", 1049 | "\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\(", 1050 | ]); 1051 | result 1052 | }) 1053 | } 1054 | 1055 | fn go_syntax_highlighter() -> &'static Highlighter { 1056 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1057 | HIGHLIGHTER.get_or_init(|| { 1058 | let mut result = Highlighter::new(4); 1059 | result.bounded("comment", r"/\*", r"\*/", false); 1060 | result.keyword("comment", "(//.*)$"); 1061 | result.bounded("string", "\"", "\"", true); 1062 | result.bounded("string", "`", "`", true); 1063 | bulk_add(&mut result, "character", &[r"'[^\\]'", "'\\\\.'"]); 1064 | add_keywords(&mut result, &[ 1065 | "break", "case", "chan", "const", "continue", "default", "defer", "else", "fallthrough", 1066 | "for", "func", "go", "goto", "if", "import", "interface", "map", "package", "range", 1067 | "return", "select", "struct", "switch", "type", "var", "bool", "byte", "complex64", "complex128", 1068 | "error", "float32", "float64", "int", "int8", "int16", "int32", "int64", "rune", "string", 1069 | ]); 1070 | bulk_add(&mut result, "operator", &[ 1071 | ":=", "=", "\\+", "\\-", "\\*", "[^/](/)[^/]", "\\+=", "\\-=", "\\*=", "\\\\=", 1072 | "==", "!=", "\\?", ">=", "<=", "<", ">", 1073 | ]); 1074 | bulk_add(&mut result, "digit", &["\\b(\\d+.\\d+|\\d+)", "\\b(\\d+.\\d+(?:f32|f64))"]); 1075 | bulk_add(&mut result, "boolean", &["\\b(true)\\b", "\\b(false)\\b"]); 1076 | bulk_add(&mut result, "function", &[ 1077 | "func\\s+([A-Za-z0-9_]+)\\s*\\(", 1078 | "\\.([A-Za-z0-9_]+)\\s*\\(", 1079 | "([A-Za-z0-9_]+)\\s*\\(", 1080 | ]); 1081 | bulk_add(&mut result, "reference", &["&"]); 1082 | result 1083 | }) 1084 | } 1085 | 1086 | fn js_syntax_highlighter() -> &'static Highlighter { 1087 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1088 | HIGHLIGHTER.get_or_init(|| { 1089 | let mut result = Highlighter::new(4); 1090 | result.bounded("comment", r"/\*", r"\*/", false); 1091 | result.keyword("comment", "//.*$"); 1092 | result.bounded("string", "r\"", "\"", true); 1093 | result.bounded("string", "f\"", "\"", true); 1094 | result.bounded("string", "\"", "\"", true); 1095 | result.bounded("string", "r\'", "\'", true); 1096 | result.bounded("string", "f\'", "\'", true); 1097 | result.bounded("string", "\'", "\'", true); 1098 | result.bounded_interp("string", "r`", "`", "\\$\\{", "\\}", true); 1099 | result.bounded_interp("string", "f`", "`", "\\$\\{", "\\}", true); 1100 | result.bounded_interp("string", "`", "`", "\\$\\{", "\\}", true); 1101 | result.bounded("string", "/", "/", true); 1102 | add_keywords(&mut result, &[ 1103 | "abstract", "arguments", "await", "boolean", "break", "byte", "case", "catch", "char", 1104 | "class", "const", "continue", "debugger", "default", "delete", "do", "double", "else", 1105 | "enum", "eval", "export", "extends", "final", "finally", "float", "for", "of", "function", 1106 | "goto", "if", "implements", "import", "in", "instanceof", "int", "interface", "let", "long", 1107 | "native", "new", "null", "package", "private", "protected", "public", "return", "short", 1108 | "static", "super", "switch", "synchronized", "this", "throw", "throws", "transient", "try", 1109 | "typeof", "var", "void", "volatile", "console", "while", "with", "yield", "undefined", "NaN", 1110 | "-Infinity", "Infinity", 1111 | ]); 1112 | result.keyword("digit", "\\b(\\d+.\\d+|\\d+)"); 1113 | result.keyword("struct", "class\\s+([A-Za-z0-9_]+)"); 1114 | bulk_add(&mut result, "boolean", &["\\b(true)\\b", "\\b(false)\\b"]); 1115 | bulk_add(&mut result, "function", &[ 1116 | "function\\s+([a-z_][A-Za-z0-9_]*)", 1117 | "\\b([a-z_][A-Za-z0-9_]*)\\s*\\(", 1118 | "\\.([a-z_][A-Za-z0-9_]*)\\s*", 1119 | ]); 1120 | bulk_add(&mut result, "operator", &[ 1121 | r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", r"(\+=)", 1122 | r"(\-=)", r"(\*=)", r"(\\=)", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", 1123 | r"(>)", r"(<<)", r"(>>)", r"(\&\&)", r"(\|\|)", r"(!)\S", 1124 | ]); 1125 | result 1126 | }) 1127 | } 1128 | 1129 | fn ts_syntax_highlighter() -> &'static Highlighter { 1130 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1131 | HIGHLIGHTER.get_or_init(|| { 1132 | let mut result = Highlighter::new(4); 1133 | result.bounded("comment", r"/\*", r"\*/", false); 1134 | result.keyword("comment", "//.*$"); 1135 | result.bounded("string", "r\"", "\"", true); 1136 | result.bounded("string", "f\"", "\"", true); 1137 | result.bounded("string", "\"", "\"", true); 1138 | result.bounded("string", "r\'", "\'", true); 1139 | result.bounded("string", "f\'", "\'", true); 1140 | result.bounded("string", "\'", "\'", true); 1141 | result.bounded_interp("string", "r`", "`", "\\$\\{", "\\}", true); 1142 | result.bounded_interp("string", "f`", "`", "\\$\\{", "\\}", true); 1143 | result.bounded_interp("string", "`", "`", "\\$\\{", "\\}", true); 1144 | result.bounded("string", "/", "/", true); 1145 | add_keywords(&mut result, &[ 1146 | "abstract", "any", "as", "asserts", "boolean", "break", "case", "catch", "class", "const", "constructor", 1147 | "continue", "debugger", "declare", "default", "delete", "do", "else", "enum", "export", "extends", "false", 1148 | "finally", "for", "from", "function", "get", "if", "implements", "import", "in", "infer", "instanceof", 1149 | "interface", "is", "keyof", "let", "module", "namespace", "never", "new", "null", "number", "object", "package", 1150 | "private", "protected", "public", "readonly", "require", "global", "return", "set", "static", "string", 1151 | "super", "switch", "symbol", "this", "throw", "true", "try", "type", "typeof", "undefined", "unique", "unknown", 1152 | "var", "void", "while", "with", "yield", 1153 | ]); 1154 | result.keyword("digit", "\\b(\\d+.\\d+|\\d+)"); 1155 | result.keyword("struct", "class\\s+([A-Za-z0-9_]+)"); 1156 | bulk_add(&mut result, "boolean", &["\\b(true)\\b", "\\b(false)\\b"]); 1157 | bulk_add(&mut result, "function", &[ 1158 | "function\\s+([a-z_][A-Za-z0-9_]*)", 1159 | "\\b([a-z_][A-Za-z0-9_]*)\\s*\\(", 1160 | "\\.([a-z_][A-Za-z0-9_]*)\\s*", 1161 | ]); 1162 | bulk_add(&mut result, "operator", &[ 1163 | r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", r"(\+=)", r"(\-=)", 1164 | r"(\*=)", r"(\\=)", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", r"(>)", r"(<<)", r"(>>)", 1165 | r"(\&\&)", r"(\|\|)", r"(!)\S", 1166 | ]); 1167 | result 1168 | }) 1169 | } 1170 | 1171 | fn dart_syntax_highlighter() -> &'static Highlighter { 1172 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1173 | HIGHLIGHTER.get_or_init(|| { 1174 | let mut result = Highlighter::new(4); 1175 | result.bounded("comment", r"/\*", r"\*/", false); 1176 | result.keyword("comment", "//.*$"); 1177 | result.bounded("string", "\"\"\"", "\"\"\"", true); 1178 | result.bounded("string", "\'\'\'", "\'\'\'", true); 1179 | result.bounded_interp("string", "\"", "\"", "\\$\\{", "\\}", true); 1180 | result.bounded("string", "\'", "\'", true); 1181 | add_keywords(&mut result, &[ 1182 | "abstract", "as", "assert", "async", "await", "break", "case", "catch", "class", "const", "continue", "covariant", "default", 1183 | "deferred", "do", "dynamic", "else", "enum", "export", "extends", "extension", "external", "factory", "false", "final", "finally", 1184 | "for", "Function", "get", "hide", "if", "implements", "import", "in", "inout", "interface", "is", "late", "library", "mixin", 1185 | "new", "null", "on", "operator", "out", "part", "required", "rethrow", "return", "set", "show", "static", "super", "switch", 1186 | "sync", "this", "throw", "true", "try", "typedef", "var", "void", "while", "with", "yield", "int", "double", "num", "string", 1187 | ]); 1188 | result.keyword("digit", "\\b(\\d+.\\d+|\\d+)"); 1189 | result.keyword("struct", "\\b([A-Z][A-Za-z0-9_]+)"); 1190 | bulk_add(&mut result, "boolean", &["\\b(true)\\b", "\\b(false)\\b"]); 1191 | bulk_add(&mut result, "function", &[ 1192 | "\\b([a-z_][A-Za-z0-9_]*)(?:<[A-Za-z_]*>)?\\s*\\(", 1193 | "\\.([a-z_][A-Za-z0-9_]*)\\s*", 1194 | ]); 1195 | bulk_add(&mut result, "operator", &[ 1196 | r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", r"(\+=)", 1197 | r"(\-=)", r"(\*=)", r"(\\=)", "~/", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", 1198 | r"(>)", "\\?", r"(<<)", r"(>>)", r"(\&\&)", r"(\|\|)", r"(!)\S", "\\?\\?", 1199 | ]); 1200 | result 1201 | }) 1202 | } 1203 | 1204 | fn c_syntax_highlighter() -> &'static Highlighter { 1205 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1206 | HIGHLIGHTER.get_or_init(|| { 1207 | let mut result = Highlighter::new(4); 1208 | result.bounded("comment", r"/\*", r"\*/", false); 1209 | result.keyword("comment", "(//.*)$"); 1210 | result.bounded("string", "\"", "\"", true); 1211 | add_keywords(&mut result, &[ 1212 | "auto", "break", "case", "char", "const", "continue", "default", "do", "double", 1213 | "else", "enum", "extern", "float", "for", "goto", "if", "int", "long", "register", 1214 | "return", "short", "signed", "sizeof", "static", "struct", "switch", "typedef", 1215 | "union", "unsigned", "void", "volatile", "while", "printf", "fscanf", "scanf", 1216 | "fputsf", "exit", "stderr", "malloc", "calloc", "bool", "realloc", "free", 1217 | "strlen", "size_t", 1218 | ]); 1219 | result.keyword("struct", "\\}\\s+([A-Za-z0-9_]+)\\s*"); 1220 | result.keyword("attribute", "^\\s*(#.*?)\\s"); 1221 | result.keyword("header", "(<.*?>)"); 1222 | bulk_add(&mut result, "digit", &["\\b(\\d+.\\d+|\\d+)", "\\b(\\d+.\\d+(?:f|))"]); 1223 | bulk_add(&mut result, "character", &[r"'[^\\]'", "'\\\\.'"]); 1224 | bulk_add(&mut result, "boolean", &["\\b(true)\\b", "\\b(false)\\b"]); 1225 | bulk_add(&mut result, "function", &[ 1226 | "(int|bool|void|char|double|long|short|size_t)\\s+([a-z_][A-Za-z0-9_]*)\\s*\\(", 1227 | "\\b([a-z_][A-Za-z0-9_]*)\\s*\\(", 1228 | ]); 1229 | bulk_add(&mut result, "operator", &[ 1230 | r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"(%)", r"(\+=)", r"(\-=)", 1231 | r"(\*=)", r"(\\=)", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", r"(>)", r"(<<)", 1232 | r"(>>)", r"(\&\&)", r"(\|\|)", r"(!)\S", 1233 | ]); 1234 | result 1235 | }) 1236 | } 1237 | 1238 | fn cpp_syntax_highlighter() -> &'static Highlighter { 1239 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1240 | HIGHLIGHTER.get_or_init(|| { 1241 | let mut result = Highlighter::new(4); 1242 | result.bounded("comment", r"/\*", r"\*/", false); 1243 | result.keyword("comment", "(//.*)$"); 1244 | result.bounded("string", "\"", "\"", true); 1245 | add_keywords(&mut result, &[ 1246 | "alignas", "alignof", "and", "and_eq", "asm", "auto", "bitand", "bitor", "bool", "break", "case", 1247 | "catch", "char", "char8_t", "char16_t", "char32_t", "class", "compl", "concept", "const", "consteval", "constexpr", 1248 | "constinit", "const_cast", "continue", "co_await", "co_return", "co_yield", "decltype", "default", 1249 | "delete", "do", "double", "dynamic_cast", "else", "enum", "explicit", "export", "extern", "false", "float", 1250 | "for", "friend", "goto", "if", "inline", "int", "long", "mutable", "namespace", "new", "noexcept", "not", "not_eq", 1251 | "nullptr", "operator", "or", "or_eq", "private", "protected", "public", "register", "reinterpret_cast", "requires", "return", 1252 | "short", "signed", "sizeof", "static", "static_assert", "static_cast", "struct", "switch", "template", "this", 1253 | "thread_local", "throw", "true", "try", "typedef", "typeid", "typename", "union", "unsigned", "using", "virtual", 1254 | "void", "volatile", "wchar_t", "while", "xor", "xor_eq", "std", "string", 1255 | ]); 1256 | result.keyword("struct", "\\b([A-Z][A-Za-z0-9_]*)"); 1257 | result.keyword("attribute", "^\\s*(#[a-zA-Z_]+)\\s*"); 1258 | bulk_add(&mut result, "operator", &[ 1259 | r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"(%)", r"(\+=)", r"(\-=)", 1260 | r"(\*=)", r"(\\=)", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", r"(>)", r"(<<)", 1261 | r"(>>)", r"(\&\&)", r"(\|\|)", r"(!)\S", r"(|)", r"(&)", r"(^)", r"(~)", 1262 | ]); 1263 | result.keyword("header", "(<.*?>)"); 1264 | bulk_add(&mut result, "digit", &["\\b(\\d+.\\d+|\\d+)", "\\b(\\d+.\\d+(?:f|))"]); 1265 | bulk_add(&mut result, "character", &[r"'[^\\]'", "'\\\\.'"]); 1266 | bulk_add(&mut result, "boolean", &["\\b(true)\\b", "\\b(false)\\b"]); 1267 | bulk_add(&mut result, "function", &[ 1268 | "(int|bool|void|char|double|long|short|size_t)\\s+([a-z_][A-Za-z0-9_]*)\\s*\\(", 1269 | "\\b([a-z_][A-Za-z0-9_]*)\\s*\\(", 1270 | ]); 1271 | result 1272 | }) 1273 | } 1274 | 1275 | fn cs_syntax_highlighter() -> &'static Highlighter { 1276 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1277 | HIGHLIGHTER.get_or_init(|| { 1278 | let mut result = Highlighter::new(4); 1279 | result.bounded("comment", r"/\*", r"\*/", false); 1280 | result.keyword("comment", "(//.*)$"); 1281 | result.bounded("string", "\"", "\"", true); 1282 | add_keywords(&mut result, &[ 1283 | "abstract", "as", "base", "bool", "break", "byte", "case", "catch", "char", "checked", 1284 | "class", "const", "continue", "decimal", "default", "delegate", "do", "double", "else", 1285 | "enum", "event", "explicit", "extern", "false", "finally", "fixed", "float", "for", 1286 | "foreach", "goto", "if", "implicit", "in", "int", "interface", "internal", "is", "lock", 1287 | "long", "namespace", "new", "null", "object", "operator", "out", "override", "params", 1288 | "private", "protected", "public", "readonly", "ref", "return", "sbyte", "sealed", 1289 | "short", "sizeof", "stackalloc", "static", "string", "struct", "switch", "this", "throw", 1290 | "true", "try", "typeof", "uint", "ulong", "unchecked", "unsafe", "ushort", "using", 1291 | "using", "static", "virtual", "void", "volatile", "while", "add", "alias", "ascending", "async", 1292 | "await", "by", "descending", "dynamic", "equals", "from", "get", "global", "group", 1293 | "into", "join", "let", "nameof", "on", "orderby", "partial", "remove", "select", "set", 1294 | "unmanaged", "value", "var", "when", "where", "with", "yield", 1295 | ]); 1296 | result.keyword("struct", "\\b([A-Z][A-Za-z0-9_]*)"); 1297 | bulk_add(&mut result, "operator", &[ 1298 | r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"(%)", r"(\+=)", r"(\-=)", 1299 | r"(\*=)", r"(\\=)", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", r"(>)", r"(<<)", 1300 | r"(>>)", r"(\&\&)", r"(\|\|)", r"(!)\S", r"(|)", r"(&)", r"(^)", r"(~)", 1301 | ]); 1302 | bulk_add(&mut result, "digit", &["\\b(\\d+.\\d+|\\d+)", "\\b(\\d+.\\d+(?:f|m|))"]); 1303 | bulk_add(&mut result, "character", &[r"'[^\\]'", "'\\\\.'"]); 1304 | bulk_add(&mut result, "boolean", &["\\b(true)\\b", "\\b(false)\\b"]); 1305 | bulk_add(&mut result, "function", &[ 1306 | "(int|bool|void|char|double|long|short|size_t)\\s+([a-z_][A-Za-z0-9_]*)\\s*\\(", 1307 | "\\b([a-z_][A-Za-z0-9_]*)\\s*\\(", 1308 | ]); 1309 | result 1310 | }) 1311 | } 1312 | 1313 | fn swift_syntax_highlighter() -> &'static Highlighter { 1314 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1315 | HIGHLIGHTER.get_or_init(|| { 1316 | let mut result = Highlighter::new(4); 1317 | result.bounded("comment", r"/\*", r"\*/", false); 1318 | result.keyword("comment", "(//.*)$"); 1319 | result.bounded_interp("string", "#\"", "\"#", "\\\\#?\\(", "\\)", true); 1320 | result.bounded("string", "\"\"\"", "\"\"\"", true); 1321 | result.bounded_interp("string", "\"", "\"", "\\\\\\(", "\\)", true); 1322 | result.keyword("struct", "\\b([A-Z][A-Za-z0-9_]*)\\b"); 1323 | add_keywords(&mut result, &[ 1324 | "associatedtype", "class", "deinit", "enum", "extension", "fileprivate", "func", 1325 | "import", "init", "inout", "internal", "let", "open", "operator", "private", 1326 | "protocol", "public", "static", "struct", "subscript", "typealias", "var", "break", 1327 | "case", "continue", "default", "defer", "do", "else", "fallthrough", "for", "guard", 1328 | "if", "in", "repeat", "return", "switch", "where", "while", "as", "catch", "throw", 1329 | "try", "Any", "false", "is", "nil", "super", "self", "Self", "true", "associativity", 1330 | "convenience", "dynamic", "didSet", "final", "get", "infix", "indirect", "lazy", "left", 1331 | "mutating", "none", "nonmutating", "optional", "override", "postfix", "precedence", "prefix", 1332 | "Protocol", "required", "right", "set", "Type", "unowned", "weak", "willSet", "Int", 1333 | "String", "Double", "Optional", "endif", 1334 | ]); 1335 | bulk_add(&mut result, "operator", &[ 1336 | "=", "\\+", "\\-", "\\*", "[^/](/)[^/]", "\\+=", "\\-=", "\\*=", "\\\\=", "==", 1337 | "!=", "\\?", ">=", "<=", "<", ">", "!", 1338 | ]); 1339 | bulk_add(&mut result, "digit", &["\\b(\\d+.\\d+|\\d+)", "\\b(\\d+.\\d+(?:f32|f64))"]); 1340 | bulk_add(&mut result, "boolean", &["\\b(true)\\b", "\\b(false)\\b"]); 1341 | bulk_add(&mut result, "function", &[ 1342 | "func\\s+([a-z_][A-Za-z0-9_]*)\\s*(?:\\(|<)", 1343 | "\\.([a-z_][A-Za-z0-9_]*)\\s*\\(", 1344 | "([a-z_][A-Za-z0-9_]*)\\s*\\(", 1345 | ]); 1346 | result 1347 | }) 1348 | } 1349 | 1350 | fn json_syntax_highlighter() -> &'static Highlighter { 1351 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1352 | HIGHLIGHTER.get_or_init(|| { 1353 | let mut result = Highlighter::new(4); 1354 | result.bounded("string", "\"", "\"", true); 1355 | result.keyword("keyword", r"\b(null)\b"); 1356 | result.keyword("digit", "\\b(\\d+.\\d+|\\d+)"); 1357 | result.keyword("boolean", "\\b(true|false)\\b"); 1358 | result 1359 | }) 1360 | } 1361 | 1362 | fn kotlin_syntax_highlighter() -> &'static Highlighter { 1363 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1364 | HIGHLIGHTER.get_or_init(|| { 1365 | let mut result = Highlighter::new(4); 1366 | result.bounded("comment", r"/\*", r"\*/", false); 1367 | result.keyword("comment", "(//.*)$"); 1368 | result.bounded("string", "\"\"\"", "\"\"\"", true); 1369 | result.bounded("string", "\"", "\"", true); 1370 | result.keyword("attribute", r"@\w+"); 1371 | result.keyword("struct", "\\b([A-Z][A-Za-z0-9_]*)\\b"); 1372 | result.keyword("boolean", "\\b(true|false)\\b"); 1373 | result.keyword("digit", "\\b(\\d+.\\d+|\\d+)"); 1374 | bulk_add(&mut result, "operator", &[ 1375 | r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", r"(\+=)", r"(\-=)", 1376 | r"(\*=)", r"(\\=)", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", r"(>)", r"(<<)", r"(>>)", 1377 | r"(\&\&)", r"(\|\|)", r"(!)\S", 1378 | ]); 1379 | add_keywords(&mut result, &[ 1380 | "abstract", "actual", "annotation", "companion", "constructor", "enum", "external", "expect", 1381 | "final", "fun", "inline", "inner", "interface", "internal", "private", "protected", "public", 1382 | "sealed", "suspend", "tailrec", "vararg", "as", "break", "class", "continue", "do", "else", 1383 | "false", "for", "if", "in", "is", "null", "object", "infix", "package", "return", "super", "this", 1384 | "throw", "true", "try", "data", "typealias", "typeof", "val", "when", "while", "var", "operator", 1385 | "override", 1386 | ]); 1387 | bulk_add(&mut result, "function", &[ 1388 | "\\.([a-z_][A-Za-z0-9_\\?!]*)\\s*", 1389 | "\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\(", 1390 | "\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\{", 1391 | ]); 1392 | result 1393 | }) 1394 | } 1395 | 1396 | fn java_syntax_highlighter() -> &'static Highlighter { 1397 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1398 | HIGHLIGHTER.get_or_init(|| { 1399 | let mut result = Highlighter::new(4); 1400 | result.bounded("comment", r"/\*", r"\*/", false); 1401 | result.keyword("comment", "(//.*)$"); 1402 | result.bounded("string", "\"", "\"", true); 1403 | result.keyword("attribute", r"@\w+"); 1404 | result.keyword("struct", "\\b([A-Z][A-Za-z0-9_]*)\\b"); 1405 | result.keyword("boolean", "\\b(true|false)\\b"); 1406 | result.keyword("digit", "\\b(\\d+.\\d+|\\d+)"); 1407 | bulk_add(&mut result, "operator", &[ 1408 | r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", r"(\+=)", r"(\-=)", 1409 | r"(\*=)", r"(\\=)", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", r"(>)", r"(<<)", r"(>>)", 1410 | r"(\&\&)", r"(\|\|)", r"(!)\S", 1411 | ]); 1412 | add_keywords(&mut result, &[ 1413 | "abstract", "assert", "boolean", "break", "byte", "case", "catch", "char", "class", "const", "continue", 1414 | "default", "do", "double", "else", "enum", "extends", "final", "finally", "float", "for", "if", "goto", 1415 | "implements", "import", "instanceof", "int", "interface", "long", "native", "new", "package", "private", 1416 | "protected", "public", "return", "short", "static", "strictfp", "super", "switch", "synchronized", "this", 1417 | "throw", "throws", "transient", "try", "var", "void", "volatile", "while", "null", 1418 | ]); 1419 | bulk_add(&mut result, "function", &[ 1420 | "\\.([a-z_][A-Za-z0-9_\\?!]*)\\s*", 1421 | "\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\(", 1422 | ]); 1423 | result 1424 | }) 1425 | } 1426 | 1427 | fn vb_syntax_highlighter() -> &'static Highlighter { 1428 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1429 | HIGHLIGHTER.get_or_init(|| { 1430 | let mut result = Highlighter::new(4); 1431 | result.keyword("comment", "('.*)$"); 1432 | result.bounded("string", "\"", "\"", true); 1433 | result.keyword("digit", "\\b(\\d+.\\d+|\\d+)"); 1434 | bulk_add(&mut result, "function", &["\\b([A-Za-z0-9_\\?!]*)\\s*\\("]); 1435 | bulk_add(&mut result, "operator", &[ 1436 | r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", r"(\+=)", r"(\-=)", 1437 | r"(\*=)", r"(\\=)", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", r"(>)", r"(<<)", r"(>>)", 1438 | r"(\&\&)", r"(\|\|)", r"(!)\S", 1439 | ]); 1440 | add_keywords(&mut result, &[ 1441 | "AddHandler", "AddressOf", "Alias", "And", "AndAlso", "Ansi", "As", "Assembly", "Auto", "Boolean", 1442 | "ByRef", "Byte", "ByVal", "Call", "Case", "Catch", "CBool", "CByte", "CChar", "CDate", "CDec", "CDbl", 1443 | "Char", "CInt", "Class", "CLng", "CObj", "Const", "CShort", "CSng", "CStr", "CType", "Date", "Decimal", 1444 | "Declare", "Default", "Delegate", "Dim", "DirectCast", "Do", "Double", "Each", "Else", "ElseIf", "End", 1445 | "Enum", "Erase", "Error", "Event", "Exit", "False", "Finally", "For", "Friend", "Function", "Get", "GetType", 1446 | "GoSub", "GoTo", "Handles", "If", "Implements", "Imports", "In", "Inherits", "Integer", "Interface", 1447 | "Is", "IsNot", "Let", "Lib", "Like", "Long", "Loop", "Me", "Mod", "Module", "MustInherit", "MustOverride", 1448 | "MyBase", "MyClass", "Namespace", "Narrowing", "New", "Next", "Not", "Nothing", "NotInheritable", 1449 | "NotOverridable", "Object", "Of", "On", "Operator", "Option", "Optional", "Or", "OrElse", "Out", "Overloads", 1450 | "Overridable", "Overrides", "ParamArray", "Partial", "Private", "Property", "Protected", "Public", "RaiseEvent", 1451 | "ReadOnly", "ReDim", "REM", "RemoveHandler", "Resume", "Return", "SByte", "Select", "Set", "Shadows", "Shared", 1452 | "Short", "Single", "Static", "Step", "Stop", "String", "Structure", "Sub", "SyncLock", "Then", "Throw", "To", 1453 | "True", "Try", "TryCast", "TypeOf", "UInteger", "ULong", "UShort", "Using", "Variant", "Wend", "When", "While", 1454 | "Widening", "With", "WithEvents", "WriteOnly", "Xor", "Console", 1455 | ]); 1456 | result 1457 | }) 1458 | } 1459 | 1460 | fn m_syntax_highlighter() -> &'static Highlighter { 1461 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1462 | HIGHLIGHTER.get_or_init(|| { 1463 | let mut result = Highlighter::new(4); 1464 | result.bounded("comment", "%\\{", "%\\}", true); 1465 | result.keyword("comment", "(%.*)$"); 1466 | result.bounded("string", "\'", "\'", true); 1467 | result.keyword("boolean", "\\b(true|false)\\b"); 1468 | result.keyword("digit", "\\b(\\d+.\\d+|\\d+)"); 1469 | result.keyword("struct", "\\b([A-Z][A-Za-z0-9_]*)\\b"); 1470 | bulk_add(&mut result, "operator", &[ 1471 | r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", r"(\+=)", r"(\-=)", 1472 | r"(\*=)", r"(\\=)", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", r"(>)", r"(<<)", r"(>>)", 1473 | r"(\&\&)", r"(\|\|)", r"(!)\S", 1474 | ]); 1475 | add_keywords(&mut result, &[ 1476 | "break", "case", "catch", "classdef", "continue", "else", "elseif", "end", "for", "function", 1477 | "global", "if", "otherwise", "parfor", "persistent", "return", "spmd", "switch", "try", "while", 1478 | "inf", "nan", "int8", "int16", "int32", "int64", "uint8", "uint16", "uint32", "uint64", "single", 1479 | "double", "char", "string", "cell", "struct", "table", "datetime", "properties", "NaN", "max", 1480 | "min", "length", "sort", "sum", "prod", "mode", "median", "mean", "std", "pi", "randi", "randn", 1481 | "rand", "clf", "shg", "close", "path", "addpath", "rmpath", "cd", "grid", "on", "axis", "square", 1482 | "equal", "off", "hold", "help", "doc", "lookfor", "profile", "viewer", "clc", "diary", "ctrl-c", "who", 1483 | "whos", "clear", "load", "format", "short", "long", "bank", 1484 | ]); 1485 | bulk_add(&mut result, "function", &[ 1486 | "\\.([a-z_][A-Za-z0-9_\\?!]*)\\s*", 1487 | "\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\(", 1488 | ]); 1489 | result 1490 | }) 1491 | } 1492 | 1493 | fn php_syntax_highlighter() -> &'static Highlighter { 1494 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1495 | HIGHLIGHTER.get_or_init(|| { 1496 | let mut result = Highlighter::new(4); 1497 | result.bounded("comment", r"/\*", r"\*/", false); 1498 | result.keyword("comment", "(//.*)$"); 1499 | result.keyword("comment", "(#.*)$"); 1500 | result.bounded_interp("string", "\"", "\"", "\\{", "\\}", true); 1501 | result.bounded_interp("string", "\"", "\"", "\\$\\{", "\\}", true); 1502 | result.bounded("string", "\'", "\'", true); 1503 | result.keyword("boolean", "\\b(true|false|TRUE|FALSE)\\b"); 1504 | result.keyword("digit", "\\b(\\d+.\\d+|\\d+)"); 1505 | result.keyword("struct", "\\b([A-Z][A-Za-z0-9_]*)\\b"); 1506 | bulk_add(&mut result, "function", &[ 1507 | "\\.([a-z_][A-Za-z0-9_\\?!]*)\\s*", 1508 | "\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\(", 1509 | ]); 1510 | add_keywords(&mut result, &[ 1511 | "__halt_compiler", "abstract", "and", "array", "as", "break", "callable", "case", 1512 | "catch", "class", "clone", "const", "continue", "declare", "default", "die", "do", 1513 | "echo", "else", "elseif", "empty", "enddeclare", "endfor", "endforeach", "endif", 1514 | "endswitch", "endwhile", "eval", "exit", "extends", "final", "finally", "for", 1515 | "foreach", "function", "global", "goto", "if", "implements", "include", "include_once", 1516 | "instanceof", "insteadof", "interface", "isset", "list", "namespace", "new", "or", 1517 | "print", "private", "protected", "public", "require", "require_once", "return", "static", 1518 | "switch", "throw", "trait", "try", "unset", "use", "var", "while", "xor", 1519 | "__CLASS__", "__DIR__", "__FILE__", "__FUNCTION__", "__LINE__", "__METHOD__", 1520 | "__NAMESPACE__", "__TRAIT__", "null", 1521 | ]); 1522 | result.keyword("keyword", r"<\?php"); 1523 | result.keyword("keyword", r"\?>"); 1524 | bulk_add(&mut result, "operator", &[ 1525 | r"(->)", r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", r"(\+=)", 1526 | r"(\-=)", r"(\*=)", r"(\\=)", r"(\?)", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", 1527 | r"(>)", r"(\$)", r"(<<)", r"(>>)", r"(\&\&)", r"(\|\|)", r"(!)\S", r"(\.)", 1528 | ]); 1529 | result 1530 | }) 1531 | } 1532 | 1533 | fn scala_syntax_highlighter() -> &'static Highlighter { 1534 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1535 | HIGHLIGHTER.get_or_init(|| { 1536 | let mut result = Highlighter::new(4); 1537 | result.bounded("comment", r"/\*", r"\*/", false); 1538 | result.keyword("comment", "(//.*)$"); 1539 | result.bounded_interp("string", "f\"", "\"", "\\$\\{", "\\}", true); 1540 | result.bounded_interp("string", "s\"", "\"", "\\$\\{", "\\}", true); 1541 | result.bounded("string", "\"\"\"", "\"\"\"", true); 1542 | result.bounded("string", "raw\"", "\"", true); 1543 | result.bounded("string", "\"", "\"", true); 1544 | result.keyword("digit", "\\b(\\d+.\\d+|\\d+)"); 1545 | bulk_add(&mut result, "character", &[r"'[^\\]'", "'\\\\.'"]); 1546 | result.keyword("boolean", "\\b(true|false)\\b"); 1547 | bulk_add(&mut result, "operator", &[ 1548 | r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", r"(\+=)", r"(\-=)", r"(\*=)", r"(\\=)", 1549 | r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", r"(>)", r"(<<)", r"(>>)", r"(\&\&)", r"(\|\|)", r"(!)\S", 1550 | ]); 1551 | add_keywords(&mut result, &[ 1552 | "abstract", "case", "catch", "class", "def", "do", "else", "extends", "false", "final", "finally", 1553 | "for", "forSome", "if", "implicit", "import", "lazy", "macro", "match", "new", "null", "object", 1554 | "override", "package", "private", "protected", "return", "sealed", "super", "this", "throw", "trait", 1555 | "try", "true", "type", "val", "var", "while", "with", "yield", "Boolean", "Byte", "Char", "Double", 1556 | "Float", "Int", "Long", "Short", "String", "Unit", "Any", "AnyVal", "AnyRef", "Nothing", "Null", 1557 | "foreach", "map", "println", "to", "by", 1558 | ]); 1559 | bulk_add(&mut result, "function", &[ 1560 | "\\.([a-z_][A-Za-z0-9_\\?!]*)\\s*", 1561 | "\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\(", 1562 | ]); 1563 | result.keyword("struct", "\\b([A-Z][A-Za-z0-9_]*)\\b"); 1564 | result 1565 | }) 1566 | } 1567 | 1568 | fn prolog_syntax_highlighter() -> &'static Highlighter { 1569 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1570 | HIGHLIGHTER.get_or_init(|| { 1571 | let mut result = Highlighter::new(4); 1572 | result.keyword("comment", "(\\%.*)$"); 1573 | result.bounded("string", "\"", "\"", true); 1574 | result.keyword("digit", "\\b(\\d+.\\d+|\\d+)"); 1575 | result.keyword("boolean", "\\b(true|false)\\b"); 1576 | result.keyword("struct", "\\b([A-Z][A-Za-z0-9_]*)\\b"); 1577 | add_keywords_no_boundary(&mut result, &[ 1578 | ":-", "\\,", "\\.", ";", "\\->", "\\+", "=", "is", "not", "fail", "!", "repeat", "call", "cut", 1579 | "assert", "asserta", "assertz", "retract", "abolish", "dynamic", "consult", "listing", "op", 1580 | "assertions", "clauses", "predicate", "query", "rule", "fact", "variable", "atom", "number", 1581 | "list", "compound", "ground", "callable", "atom", "number", "integer", "float", "variable", 1582 | "list", "compound", 1583 | ]); 1584 | bulk_add(&mut result, "operator", &[ 1585 | r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(<)", r"(>)", 1586 | ]); 1587 | bulk_add(&mut result, "function", &["\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\("]); 1588 | result 1589 | }) 1590 | } 1591 | 1592 | fn haskell_syntax_highlighter() -> &'static Highlighter { 1593 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1594 | HIGHLIGHTER.get_or_init(|| { 1595 | let mut result = Highlighter::new(4); 1596 | result.keyword("comment", "(\\-\\-.*)$"); 1597 | result.bounded("comment", "\\{-", "-\\}", true); 1598 | result.bounded("string", "\"", "\"", true); 1599 | result.keyword("digit", "\\b(\\d+.\\d+|\\d+)"); 1600 | result.keyword("boolean", "\\b(True|False)\\b"); 1601 | bulk_add(&mut result, "character", &[r"'[^\\]'", "'\\\\.'"]); 1602 | bulk_add(&mut result, "operator", &[ 1603 | "->", "\\$", "`.*`", "<-", "<", ">", "&&", "\\|\\|", "\\\\", "\\:", 1604 | "=", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", r"(\+=)", 1605 | r"(\-=)", r"(\*=)", r"(/=)", "!", "\\.", "\\|", r"(==)", r"(!=)", r"(>=)", 1606 | r"(<=)", "_", r"(<<)", r"(>>)", r"(!)\S", "\\band\\b", "\\bor\\b", "\\bnot\\b", 1607 | ]); 1608 | add_keywords(&mut result, &[ 1609 | "module", "import", "as", "qualified", "hiding", "do", "case", "of", "let", "in", "if", "then", "else", 1610 | "data", "type", "newtype", "deriving", "class", "instance", "where", "foreign", "export", "ccall", 1611 | "stdcall", "capi", "prim", "safe", "unsafe", "otherwise", "head", "tail", "last", "init", "null", 1612 | "length", "return", "map", "filter", "foldl", "foldr", "zip", "zipWith", "take", "drop", "reverse", 1613 | "concat", "concatMap", "maximum", "minimum", "elem", "notElem", "sum", "array", "product", "scanl", 1614 | "scanr", "replicate", "cycle", "repeat", "iterate", "fst", "snd", "id", "Maybe", "Either", "Bool", 1615 | "Char", "String", "putStrLn", "getLine", "Just", "Nothing", "for", "Int", "Integer", "Float", 1616 | "Double", "Ordering", "IO", "Functor", "Applicative", "Monad", 1617 | ]); 1618 | result.keyword("function", "^[a-z][a-zA-Z0-9]*"); 1619 | result 1620 | }) 1621 | } 1622 | 1623 | fn css_syntax_highlighter() -> &'static Highlighter { 1624 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1625 | HIGHLIGHTER.get_or_init(|| { 1626 | let mut result = Highlighter::new(4); 1627 | result.bounded("comment", r"/\*", r"\*/", false); 1628 | result.bounded("string", "\"", "\"", true); 1629 | add_keywords(&mut result, &["from", "to", "rotate", "none"]); 1630 | result.keyword("digit", r"\#[0-9a-fA-F]+"); 1631 | result.keyword("digit", "((?:\\d+.\\d+|\\d+)(?:%|deg|px|em|rem)?)"); 1632 | result.keyword("boolean", "\\b(true|false)\\b"); 1633 | result.keyword("attribute", r"\.[a-zA-Z0-9\-]*"); 1634 | result.keyword("attribute", r"\:[a-zA-Z0-9\-]*"); 1635 | result.keyword("attribute", r"\::[a-zA-Z0-9\-]*"); 1636 | result.keyword("attribute", r"@\w+"); 1637 | add_keywords(&mut result, &[ 1638 | "a", "abbr", "address", "area", "article", "aside", "audio", "b", "base", "bdi", "bdo", "blockquote", 1639 | "body", "br", "button", "canvas", "caption", "cite", "code", "col", "colgroup", "data", "datalist", 1640 | "dd", "del", "details", "dfn", "dialog", "div", "dl", "dt", "em", "embed", "fieldset", "figcaption", 1641 | "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", 1642 | "html", "i", "iframe", "img", "input", "ins", "kbd", "label", "legend", "li", "link", "main", "map", 1643 | "mark", "meta", "meter", "nav", "noscript", "object", "ol", "optgroup", "option", "output", "p", 1644 | "param", "picture", "pre", "progress", "q", "rb", "rp", "rt", "rtc", "ruby", "s", "samp", "script", 1645 | "section", "select", "slot", "small", "source", "span", "strong", "style", "sub", "summary", "sup", 1646 | "table", "tbody", "td", "template", "textarea", "tfoot", "th", "thead", "time", "title", "tr", "track", 1647 | "u", "ul", "var", "video", "wbr", "svg", 1648 | ]); 1649 | add_keywords(&mut result, &[ 1650 | "-webkit-touch-callout", "-webkit-user-select", "-moz-user-select", "-ms-user-select", 1651 | "user-select", "transform", "border-radius", "border-right", "border-left", "border-top", 1652 | "border-bottom", "border", "content", "display", "height", "width", "margin-top", "margin-bottom", 1653 | "margin-left", "margin-right", "margin", "pointer-events", "position", "top", "transform-origin", 1654 | "-moz-appearance", "-webkit-appearance", "cursor", "flex-grow", "flex-shrink", "font-size", 1655 | "max-height", "max-width", "min-height", "min-width", "outline", "vertical-align", "background-color", 1656 | "background-image", "background-position", "background-repeat", "background-size", "background", 1657 | "animation", "border-(?:left|right|top|bottom)-color", "border-(?:left|right|top|bottom)-radius", 1658 | "border-(?:left|right|top|bottom)-width", "border-(?:left|right|top|bottom)-style", "align-items", 1659 | "box-shadow", "justify-content", "line-height", "padding", "padding-(?:left|bottom|right|top)", "font-weight", 1660 | "list-style", "box-sizing", "text-align", "bottom", "overflow-x", "overflow-y", "text-rendering", 1661 | "-moz-osx-font-smoothing", "-webkit-font-smoothing", "text-size-adjust", "font-family", "color", 1662 | "text-decoration", "font-style", "word-wrap", "white-space", "-webkit-overflow-scrolling", 1663 | "clear", "float", "overflow", "!important", "text-transform", "clip", "visibility", "border-color", 1664 | "opacity", "flex-wrap", "border-(?:top|bottom)-(?:left|right)-radius", "z-index", "word-break", "letter-spacing", 1665 | "text-transform", "resize", "flex-direction", "order", "border-style", "border-width", "text-overflow", 1666 | "flex-basis", "-ms-overflow-y", "-ms-overflow-x", "transition-duration", "transition-property", 1667 | "transition-timing-function", "(flex)[^-]", "-webkit-text-decoration-style", "-apple-system", "sans-serif", 1668 | "left", "right", "bottom", "top", "font", "tab-size", "text-shadow", 1669 | ]); 1670 | result 1671 | }) 1672 | } 1673 | 1674 | fn html_syntax_highlighter() -> &'static Highlighter { 1675 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1676 | HIGHLIGHTER.get_or_init(|| { 1677 | let mut result = Highlighter::new(4); 1678 | result.bounded("comment", "", false); 1679 | result.bounded("string", "\"", "\"", true); 1680 | result.keyword("digit", "\\b(\\d+.\\d+|\\d+)"); 1681 | result.keyword("boolean", "\\b(true|false)\\b"); 1682 | result.keyword("operator", "="); 1683 | bulk_add(&mut result, "tag", &["", ">", " &'static Highlighter { 1706 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1707 | HIGHLIGHTER.get_or_init(|| { 1708 | let mut result = Highlighter::new(4); 1709 | result.bounded("comment", "", false); 1710 | result.keyword("heading", "(#.*)$"); 1711 | result.keyword("quote", "^(>.*)$"); 1712 | result.bounded("bold", "\\*\\*", "\\*\\*", true); 1713 | result.bounded("italic", "\\*", "\\*", true); 1714 | result.bounded("strikethrough", "~~", "~~", true); 1715 | result.bounded("image", "!\\[", "\\]", true); 1716 | result.bounded("link", "\\[", "\\]", true); 1717 | result.bounded("math", "\\$\\$", "\\$\\$", false); 1718 | result.bounded("math", "\\$", "\\$", false); 1719 | result.bounded("block", "```", "```", false); 1720 | result.bounded("block", "`", "`", true); 1721 | result.keyword("link", r"\b(?:https?://|www\.)\S+\b"); 1722 | result.keyword("linebreak", "^\\s*-{3}"); 1723 | result.keyword("list", "[0-9]+\\."); 1724 | result.keyword("list", "^\\s*-"); 1725 | result.keyword("list", "^\\s*\\+"); 1726 | result 1727 | }) 1728 | } 1729 | 1730 | fn toml_syntax_highlighter() -> &'static Highlighter { 1731 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1732 | HIGHLIGHTER.get_or_init(|| { 1733 | let mut result = Highlighter::new(4); 1734 | result.bounded("string", "\"", "\"", true); 1735 | result.bounded("string", "\'", "\'", true); 1736 | result.keyword("comment", "(#.*)$"); 1737 | result.keyword("boolean", "\\b(true|false)\\b"); 1738 | result.keyword("table", r"^(\[.*\])"); 1739 | bulk_add(&mut result, "digit", &[ 1740 | r"(?:=|\[|,)\s*(0x[a-fA-F]+)", 1741 | r"(?:=|\[|,)\s*(0o[0-7]+)", 1742 | r"(?:=|\[|,)\s*(0b[0-1]+)", 1743 | r"(?:=|\[|,)\s*((?:\+|-)?[0-9]+(?:\.[0-9]+)?(?:e|E)(?:\+|-)?[0-9]+)", 1744 | r"(?:=|\[|,)\s*((?:\+|-)?[0-9_]+(?:\.[0-9]+)?)", 1745 | ]); 1746 | add_keywords(&mut result, &["inf", "nan"]); 1747 | result 1748 | }) 1749 | } 1750 | 1751 | fn yaml_syntax_highlighter() -> &'static Highlighter { 1752 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1753 | HIGHLIGHTER.get_or_init(|| { 1754 | let mut result = Highlighter::new(4); 1755 | result.bounded("string", "\"", "\"", true); 1756 | result.bounded("string", "\'", "\'", true); 1757 | result.keyword("comment", "(#.*)$"); 1758 | result.keyword("key", r"^\s*[ \.a-zA-Z_-]+:"); 1759 | result.keyword("digit", "\\b(\\d+.\\d+|\\d+)"); 1760 | result.keyword("tag", "!!(?:bool|int|float|str|timestamp|null|binary)"); 1761 | add_keywords(&mut result, &["No", "Yes", "no", "yes", "true", "false", "null"]); 1762 | result 1763 | }) 1764 | } 1765 | 1766 | fn csv_syntax_highlighter() -> &'static Highlighter { 1767 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1768 | HIGHLIGHTER.get_or_init(|| { 1769 | let mut result = Highlighter::new(4); 1770 | result.keyword("keyword", ","); 1771 | result 1772 | }) 1773 | } 1774 | 1775 | fn shell_syntax_highlighter() -> &'static Highlighter { 1776 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1777 | HIGHLIGHTER.get_or_init(|| { 1778 | let mut result = Highlighter::new(4); 1779 | result.bounded_interp("string", "\"", "\"", "\\$\\(", "\\)", true); 1780 | result.bounded("string", "\'", "\'", true); 1781 | result.bounded("string", "EOF", "EOF", true); 1782 | result.keyword("comment", "(#.*)$"); 1783 | result.keyword("boolean", "\\b(true|false)\\b"); 1784 | result.keyword("digit", "\\b(\\d+.\\d+|\\d+)"); 1785 | bulk_add(&mut result, "operator", &[ 1786 | r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", r"(\+=)", r"(\-=)", r"(\*=)", 1787 | r"(\\=)", r"(\{)", r"(\})", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", r"(>)", r"(\$)", r"(\.\.)", 1788 | r"(<<)", r"(>>)", r"(\&\&)", r"(\|\|)", r"(!)\S", r"(\.)", r"(&)", 1789 | ]); 1790 | add_keywords(&mut result, &[ 1791 | "if", "then", "else", "elif", "fi", "case", "esac", "for", "while", "until", "do", "done", 1792 | "in", "function", "select", "continue", "break", "return", "exit", "source", "declare", "readonly", 1793 | "local", "export", "ls", "cd", "pwd", "cp", "mv", "rm", "mkdir", "rmdir", "touch", "chmod", 1794 | "chown", "grep", "awk", "sed", "cat", "head", "tail", "sort", "uniq", "wc", "cut", "paste", 1795 | "find", "tar", "gzip", "gunzip", "zip", "unzip", "ssh", "scp", "rsync", "curl", "wget", "ping", 1796 | "traceroute", "netstat", "ps", "kill", "top", "df", "du", "date", "cal", "history", "alias", 1797 | "source", "source", "exec", "exit", "help", "man", "info", "echo", "fgrep", "apropos", 1798 | "whoami", "python", "bg", "fg", "sleep", "jobs", "read", "trap", "clear", "sh", "bash", 1799 | ]); 1800 | bulk_add(&mut result, "function", &["\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\("]); 1801 | result 1802 | }) 1803 | } 1804 | 1805 | fn sql_syntax_highlighter() -> &'static Highlighter { 1806 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1807 | HIGHLIGHTER.get_or_init(|| { 1808 | let mut result = Highlighter::new(4); 1809 | result.keyword("comment", "(--.*)$"); 1810 | result.bounded("string", "\"", "\"", true); 1811 | result.bounded("string", "\'", "\'", true); 1812 | result.keyword("digit", "\\b(\\d+.\\d+|\\d+)"); 1813 | bulk_add(&mut result, "operator", &[ 1814 | r"\+", "-", r"\*", "/", "%", "=", "<>", "!=", "<", ">", "<=", ">=", "&", "|", "^", 1815 | "~", "||", "=", 1816 | ]); 1817 | add_keywords(&mut result, &[ 1818 | "ADD", "ALL", "ALTER", "AND", "AS", "ASC", "BETWEEN", "BY", "CASE", "CHECK", 1819 | "COLUMN", "CONSTRAINT", "CREATE", "DATABASE", "DEFAULT", "DELETE", "DESC", 1820 | "DISTINCT", "DROP", "ELSE", "END", "EXISTS", "FOREIGN", "FROM", "FULL", "GROUP", 1821 | "HAVING", "IN", "INDEX", "INNER", "INSERT", "INTO", "IS", "JOIN", "LEFT", "LIKE", 1822 | "LIMIT", "NOT", "NULL", "ON", "OR", "ORDER", "OUTER", "PRIMARY", "REFERENCES", 1823 | "RIGHT", "SELECT", "SET", "TABLE", "TOP", "TRUNCATE", "UNION", "UNIQUE", "UPDATE", 1824 | "VALUES", "VIEW", "WHERE", "SHOW", "USE", "VARCHAR" 1825 | ]); 1826 | result 1827 | }) 1828 | } 1829 | 1830 | fn xml_syntax_highlighter() -> &'static Highlighter { 1831 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1832 | HIGHLIGHTER.get_or_init(|| { 1833 | let mut result = Highlighter::new(4); 1834 | result.bounded("comment", "", false); 1835 | result.bounded("string", "\"", "\"", true); 1836 | result.keyword("digit", "\\b(\\d+.\\d+|\\d+)"); 1837 | result.keyword("boolean", "\\b(true|false)\\b"); 1838 | result.keyword("operator", "="); 1839 | bulk_add(&mut result, "tag", &["<[A-Za-z0-9_]+>?", "", "", ">", " &'static Highlighter { 1846 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1847 | HIGHLIGHTER.get_or_init(|| { 1848 | let mut result = Highlighter::new(4); 1849 | result.bounded("string", "\"", "\"", true); 1850 | result.bounded("string", "'", "'", true); 1851 | result.keyword("comment", "(#.*)$"); 1852 | result.keyword("digit", "\\b(\\d+.\\d+|\\d+)"); 1853 | bulk_add(&mut result, "operator", &[ 1854 | r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", r"(\+=)", 1855 | r"(\-=)", r"(\*=)", r"(\\=)", r"(\{)", r"(\})", r"(==)", r"(!=)", r"(>=)", 1856 | r"(<=)", r"(<)", r"(>)", r"(\$)", r"(\.\.)", r"(<<)", r"(>>)", r"(\&\&)", 1857 | r"(\|\|)", r"(!)\S", r"(\.)", r"(&)", r"(\|)" 1858 | ]); 1859 | add_keywords(&mut result, &[ 1860 | "alias", "append", "build-string", "cd", "config", "cp", "debug", "def", "do", 1861 | "each", "echo", "else", "empty?", "enter", "every", "exit", "export", "filter", 1862 | "first", "flatten", "for", "format", "from", "get", "group-by", "help", "history", 1863 | "if", "insert", "keep", "last", "let", "ls", "math", "merge", "metadata", "move", 1864 | "mut", "open", "parse", "pivot", "plugin", "post", "pre", "prune", "reduce", "reject", 1865 | "rename", "rm", "save", "select", "skip", "sort-by", "source", "split", "str", "table", 1866 | "to", "touch", "uniq", "update", "url", "use", "where", "with-env", "drop", "complete", 1867 | "load-env", "exec", "mkdir", "du", "glob", "mktemp", "mv", "ps", "run-external", "start", 1868 | "sys", "uname", "watch", "which", "nu-check", "nu-highlight", "print", "decode", "char", 1869 | "encode", "detect", "url", "dexit", "shells", "random", "gstat", "ansi", "input", 1870 | "keybindings", "kill", "sleep", "term", "ulimit", "whoami", "is-terminal", "clear", "path", 1871 | "http", "query", "port", "tutor", "math", "polars", "hash", "cal", "generate", "seq", 1872 | "columns", "collect", "compact", "flatten", "group", "headers", "transpose", "enumerate", 1873 | "catch", "try", "find", "upsert", "string", "pattern", "fill", 1874 | ]); 1875 | result 1876 | }) 1877 | } 1878 | 1879 | fn tex_syntax_highlighter() -> &'static Highlighter { 1880 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1881 | HIGHLIGHTER.get_or_init(|| { 1882 | let mut result = Highlighter::new(4); 1883 | result.bounded("string", "\\$", "\\$", true); 1884 | result.keyword("comment", r"([^\\]%.*)$"); 1885 | result.keyword("comment", r"^(%.*)$"); 1886 | result.keyword("digit", "\\b(\\d+.\\d+|\\d+)"); 1887 | bulk_add(&mut result, "keyword", &[ 1888 | r"\\addbibresource\b", r"\\author\b", r"\\begin\b", r"\\caption\b", 1889 | r"\\centering\b", r"\\date\b", r"\\end\b", r"\\geometry\b", r"\\hline\b", 1890 | r"\\includegraphics\b", r"\\item\b", r"\\label\b", r"\\maketitle\b", r"\\paragraph\b", 1891 | r"\\parindent\b", r"\\parskip\b", r"\\printbibliography\b", r"\\section\b", r"\\setlength\b", 1892 | r"\\subsection\b", r"\\tableofcontents\b", r"\\textbf\b", r"\\textit\b", r"\\texttt\b", 1893 | r"\\title\b", r"\\today\b", r"\\underline\b", r"\\usepackage\b", r"\\ref\b", 1894 | r"\\cite\b", r"\\pageref\b", r"\\include\b", r"\\input\b", r"\\bibliographystyle\b", 1895 | r"\\newcommand\b", r"\\renewcommand\b", r"\\renewenvironment\b", r"\\newenvironment\b", 1896 | r"\\footnote\b", r"\\hline\b", r"\\vspace\b", r"\\hspace\b", r"\\newline\b", r"\\frac\b", 1897 | r"\\textbackslash\b", r"\\documentclass\b", 1898 | ]); 1899 | bulk_add(&mut result, "operator", &[ 1900 | r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(#)", r"(\+=)", r"(\-=)", 1901 | r"(\*=)", r"(\\=)", r"(\^)", r"(%)", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", r"(>)", 1902 | r"(\$)", r"(\.\.)", r"(<<)", r"(>>)", r"(\&\&)", r"(\|\|)", r"(!)\S", r"(&)", r"(\|)", 1903 | ]); 1904 | result 1905 | }) 1906 | } 1907 | 1908 | fn diff_syntax_highlighter() -> &'static Highlighter { 1909 | static HIGHLIGHTER: OnceLock = OnceLock::new(); 1910 | HIGHLIGHTER.get_or_init(|| { 1911 | let mut result = Highlighter::new(4); 1912 | result.keyword("insertion", r"^(\+(?:[^+]|$).*)$"); 1913 | result.keyword("deletion", r"^\-(?:[^-]|$).*$"); 1914 | result.keyword("comment", r"@@.*@@"); 1915 | result 1916 | }) 1917 | } 1918 | -------------------------------------------------------------------------------- /src/lib_old.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use std::ops::Range; 3 | use regex::Regex; 4 | 5 | #[derive(Debug, Clone)] 6 | pub struct RangeLoc { 7 | pub y: usize, 8 | pub x: Range, 9 | } 10 | 11 | #[derive(Debug, Clone, Copy)] 12 | pub struct Loc { 13 | pub y: usize, 14 | pub x: usize, 15 | } 16 | 17 | #[derive(Debug, Clone)] 18 | pub struct Keyword { 19 | pub kind: String, 20 | pub loc: RangeLoc, 21 | } 22 | 23 | #[derive(Debug)] 24 | pub struct BoundedDef { 25 | pub name: String, 26 | pub start: String, 27 | pub end: String, 28 | } 29 | 30 | #[derive(Debug, Clone, PartialEq)] 31 | pub enum PatternKind { 32 | Start, 33 | End, 34 | Hybrid, 35 | } 36 | 37 | #[derive(Debug, Clone)] 38 | pub struct Pattern { 39 | pub of: String, 40 | pub kind: PatternKind, 41 | pub loc: RangeLoc, 42 | pub token: Option, 43 | } 44 | 45 | #[derive(Debug, Clone)] 46 | pub struct TokenSpan { 47 | kind: String, 48 | // References to patterns 49 | start: usize, 50 | end: Option, 51 | } 52 | 53 | #[derive(Debug)] 54 | pub enum Token { 55 | Start(String), 56 | Text(String), 57 | End(String), 58 | } 59 | 60 | pub struct Highlighter { 61 | pub patterns: Vec, 62 | pub tokens: Vec, 63 | pub keywords: Vec>, 64 | pub line_ref: Vec>, 65 | pub bounded_rules: HashMap, 66 | pub keyword_rules: HashMap>, 67 | pub modified: Vec, 68 | } 69 | 70 | impl Highlighter { 71 | pub fn new() -> Self { 72 | Self { 73 | patterns: vec![], 74 | tokens: vec![], 75 | keywords: vec![], 76 | modified: vec![], 77 | line_ref: vec![], 78 | bounded_rules: HashMap::default(), 79 | keyword_rules: HashMap::default(), 80 | } 81 | } 82 | 83 | pub fn bounded>(&mut self, name: S, start: S, end: S) { 84 | let (name, start, end) = (name.into(), start.into(), end.into()); 85 | self.bounded_rules.insert(name.clone(), BoundedDef { name, start, end }); 86 | } 87 | 88 | pub fn keyword>(&mut self, name: S, pattern: S) { 89 | let (name, pattern) = (name.into(), pattern.into()); 90 | let regex = Regex::new(&pattern).expect("Invalid regex pattern"); 91 | if let Some(v) = self.keyword_rules.get_mut(&name) { 92 | v.push(regex); 93 | } else { 94 | self.keyword_rules.insert(name, vec![regex]); 95 | } 96 | } 97 | 98 | // This will clone each line, potentially optimise using pointers? 99 | pub fn line(&mut self, idx: usize, contents: &String) -> Vec { 100 | // Get the tokens that appear on this line 101 | let mut tokens: Vec<(RangeLoc, Option, TokenSpan)> = self.line_ref[idx].iter() 102 | // Clone the token 103 | .map(|i| self.tokens[*i].clone()) 104 | // Attach starting and ending information 105 | .map(|t| { 106 | // Obtain the start index from the pattern from the token 107 | let start_pattern = &self.patterns[t.start]; 108 | let start = start_pattern.loc.clone(); 109 | // Obtain the end pattern from the token 110 | let end_pattern = t.end.and_then(|t| Some(&self.patterns[t])); 111 | let end = end_pattern.and_then(|t| Some(t.loc.clone())); 112 | // Compose together into a tuple 113 | (start, end, t) 114 | }) 115 | .collect(); 116 | // Trim to fit 117 | if let Some((start, _, _)) = tokens.first_mut() { 118 | // Token starts on a different line? 119 | if start.y != idx { 120 | start.x = 0..0; 121 | start.y = idx; 122 | } 123 | } 124 | if let Some((_, end, _)) = tokens.last_mut() { 125 | // Token ends on a different line? 126 | if end.is_none() || end.as_ref().unwrap().y != idx { 127 | let len = contents.len(); 128 | *end = Some(RangeLoc { x: len..len, y: idx }); 129 | } 130 | } 131 | // Obtain keywords if necessary 132 | if self.modified[idx] { 133 | *self.keywords.get_mut(idx).unwrap() = self.find_keywords(contents, idx); 134 | self.modified[idx] = false; 135 | } 136 | // Create hashmap for easier detection (keywords) 137 | let kws: HashMap = self.keywords[idx].iter() 138 | .map(|k| (k.loc.x.start, (k.loc.clone(), &k.kind))) 139 | .collect(); 140 | // Create hashmap for easier detection (bounded) 141 | let tokens: HashMap = tokens.iter() 142 | .map(|(start, end, tok)| (start.x.start, (start.clone(), end.clone().unwrap(), &tok.kind))) 143 | .collect(); 144 | // Run through the whole line, making sure everything is accounted for 145 | let mut result = vec![]; 146 | let mut x = 0; 147 | while x < contents.len() { 148 | if tokens.contains_key(&x) { 149 | // There is a bounded token here 150 | let (start, end, name) = &tokens[&x]; 151 | result.push(Token::Start(name.to_string())); 152 | result.push(Token::Text(contents[start.x.start..end.x.end].to_string())); 153 | result.push(Token::End(name.to_string())); 154 | x = end.x.end; 155 | } else if kws.contains_key(&x) { 156 | // There is a keyword token here 157 | let (range, name) = &kws[&x]; 158 | result.push(Token::Start(name.to_string())); 159 | result.push(Token::Text(contents[range.x.start..range.x.end].to_string())); 160 | result.push(Token::End(name.to_string())); 161 | x = range.x.end; 162 | } else { 163 | // There is no bounded token here, append to text 164 | let ch = contents.chars().nth(x).unwrap(); 165 | if let Some(Token::Text(ref mut text)) = result.last_mut() { 166 | text.push(ch); 167 | } else { 168 | result.push(Token::Text(ch.to_string())); 169 | } 170 | x += 1; 171 | } 172 | } 173 | result 174 | } 175 | 176 | /// Initially highlight lines, additional lines can be added through append 177 | pub fn run(&mut self, lines: &Vec) { 178 | // Locate patterns (starting from line 0) 179 | let mut patterns = self.find_patterns(0, lines); 180 | // Form tokens from patterns 181 | let tokens = Self::form_tokens(&mut patterns); 182 | // Add to highlighter 183 | self.patterns = patterns; 184 | self.tokens = tokens; 185 | // Build line references 186 | self.build_line_ref(lines.len()); 187 | // Build keyword information 188 | self.modified = (0..lines.len()).map(|_| true).collect(); 189 | self.keywords = (0..lines.len()).map(|_| vec![]).collect(); 190 | } 191 | 192 | /// Add an additional line to this highlighter 193 | pub fn append(&mut self, line: &String) { 194 | let line_number = self.line_ref.len(); 195 | let lines = vec![line.clone()]; 196 | // Locate patterns 197 | let mut patterns = self.find_patterns(line_number, &lines); 198 | // Append to highlighter 199 | self.patterns.append(&mut patterns); 200 | self.line_ref.push(vec![]); 201 | self.modified.push(true); 202 | self.keywords.push(vec![]); 203 | // Perform update 204 | self.retokenize(); 205 | } 206 | 207 | pub fn insert(&mut self, loc: Loc, line: &String) { 208 | self.modified[loc.y] = true; 209 | let ch = line.chars().nth(loc.x).unwrap(); 210 | // Shift up patterns past a certain x 211 | let mut idx = self.patterns.iter().enumerate() 212 | .find(|(_, p)| loc.y < p.loc.y || (loc.y == p.loc.y && loc.x <= p.loc.x.start)) 213 | .and_then(|(n, _)| Some(n)) 214 | .unwrap_or(self.patterns.len()); 215 | self.patterns.iter_mut() 216 | .skip(idx) 217 | .filter(|p| loc.y == p.loc.y) 218 | .for_each(|p| { 219 | p.loc.x.end += 1; 220 | p.loc.x.start += 1; 221 | }); 222 | // Check for any pattern being destroyed 223 | let mut delete = false; 224 | if let Some(previous_pattern) = &self.patterns.get(idx.saturating_sub(1)) { 225 | if previous_pattern.loc.y == loc.y { 226 | if previous_pattern.loc.x.contains(&loc.x) { 227 | self.patterns.remove(idx.saturating_sub(1)); 228 | idx -= 1; 229 | delete = true; 230 | } 231 | } 232 | } 233 | // Check for new start or end pattern 234 | for kind in vec![PatternKind::Start, PatternKind::End, PatternKind::Hybrid] { 235 | let is = match kind { 236 | PatternKind::Start => self.is_new_start(loc, ch, line), 237 | PatternKind::End => self.is_new_end(loc, ch, line), 238 | PatternKind::Hybrid => self.is_new_hybrid(loc, ch, line), 239 | }; 240 | if let Some((s, def)) = is { 241 | // Get the length of the pattern 242 | let len = match kind { 243 | PatternKind::Start | PatternKind::Hybrid => def.start.len(), 244 | PatternKind::End => def.end.len(), 245 | }; 246 | // Register the pattern 247 | let pattern = Pattern { 248 | token: None, 249 | loc: RangeLoc { y: loc.y, x: s..(s + len) }, 250 | kind, 251 | of: def.name.to_string(), 252 | }; 253 | self.patterns.insert(idx, pattern); 254 | // Retokenize to correct any dodgy tokens 255 | self.retokenize(); 256 | return; 257 | } 258 | } 259 | // If this insertion only deleted a token, then manually retokenize 260 | if delete { 261 | self.retokenize(); 262 | } 263 | } 264 | 265 | pub fn remove(&mut self, loc: Loc, line: &String) { 266 | self.modified[loc.y] = true; 267 | // Find idx of next pattern 268 | let mut idx = self.patterns.iter().enumerate() 269 | .find(|(_, p)| loc.y < p.loc.y || (loc.y == p.loc.y && loc.x <= p.loc.x.end)) 270 | .and_then(|(n, _)| Some(n)) 271 | .unwrap_or(self.patterns.len()); 272 | let mut modified = false; 273 | // Check to see if any patterns have been destroyed 274 | let in_pattern = self.patterns.iter().enumerate() 275 | .find(|(_, p)| loc.y == p.loc.y && p.loc.x.contains(&loc.x)) 276 | .and_then(|(n, _)| Some(n)); 277 | if let Some(pattern_idx) = in_pattern { 278 | self.patterns.remove(pattern_idx); 279 | modified = true; 280 | } 281 | // Check to see if any patterns have been created as a result 282 | if let Some(joined_char) = line.chars().nth(loc.x + 1) { 283 | let mut line = line.clone(); 284 | line.remove(loc.x); 285 | let mut result: Option<(usize, usize, &String, PatternKind)> = None; 286 | // Find out if any new patterns have been created 287 | if let Some((s, def)) = self.is_new_start(loc, joined_char, &line) { 288 | // A new start pattern has been created 289 | result = Some((s, def.start.len(), &def.name, PatternKind::Start)); 290 | } else if let Some((s, def)) = self.is_new_end(loc, joined_char, &line) { 291 | // A new end pattern has been created 292 | result = Some((s, def.end.len(), &def.name, PatternKind::End)); 293 | } else if let Some((s, def)) = self.is_new_hybrid(loc, joined_char, &line) { 294 | // A new hybrid pattern has been created 295 | result = Some((s, def.start.len(), &def.name, PatternKind::Hybrid)); 296 | } 297 | // If so, register 298 | if let Some((s, len, name, kind)) = result { 299 | let double_start = self.bounded_rules[name].start.len() > 1; 300 | let double_end = self.bounded_rules[name].end.len() > 1; 301 | let double = (kind == PatternKind::Start && double_start) || 302 | (kind == PatternKind::End && double_end) || 303 | (kind == PatternKind::Hybrid && double_start); 304 | if double { 305 | let pattern = Pattern { 306 | token: None, 307 | loc: RangeLoc { y: loc.y, x: s..(s + len) }, 308 | kind, 309 | of: name.to_string(), 310 | }; 311 | self.patterns.insert(idx, pattern); 312 | modified = true; 313 | idx += 1; 314 | } 315 | } 316 | } 317 | // Shift back patterns before a certain x 318 | self.patterns.iter_mut() 319 | .skip(idx) 320 | .filter(|p| loc.y == p.loc.y) 321 | .for_each(|p| { 322 | p.loc.x.end -= 1; 323 | p.loc.x.start -= 1; 324 | }); 325 | // Retokenize if necessary 326 | if modified { 327 | self.retokenize(); 328 | } 329 | } 330 | 331 | pub fn insert_line(&mut self, y: usize) { 332 | self.patterns.iter_mut() 333 | .filter(|p| p.loc.y > y) 334 | .for_each(|p| p.loc.y += 1); 335 | self.line_ref.insert(y, vec![]); 336 | self.keywords.insert(y, vec![]); 337 | self.modified.insert(y, true); 338 | } 339 | 340 | pub fn remove_line(&mut self, y: usize) { 341 | self.patterns.iter_mut() 342 | .filter(|p| p.loc.y > y) 343 | .for_each(|p| p.loc.y -= 1); 344 | self.line_ref.remove(y); 345 | self.keywords.remove(y); 346 | self.modified.remove(y); 347 | } 348 | 349 | pub fn split_down(&mut self, loc: Loc) { 350 | // Inside a pattern: kill off the pattern 351 | let pattern_chop = self.patterns.iter().enumerate() 352 | .filter(|(_, p)| p.loc.y == loc.y) 353 | .find(|(_, p)| ((p.loc.x.start + 1)..p.loc.x.end).contains(&loc.x)) 354 | .and_then(|(n, _)| Some(n)); 355 | if let Some(idx) = pattern_chop { 356 | self.patterns.remove(idx); 357 | self.retokenize(); 358 | } 359 | // Adjust keywords 360 | self.modified[loc.y] = true; 361 | // Adjust patterns 362 | self.insert_line(loc.y); 363 | self.patterns.iter_mut() 364 | .filter(|p| p.loc.y == loc.y && loc.x <= p.loc.x.start) 365 | .for_each(|p| { 366 | p.loc.y += 1; 367 | p.loc.x.start -= loc.x; 368 | p.loc.x.end -= loc.x; 369 | }); 370 | self.build_line_ref(self.line_ref.len()); 371 | } 372 | 373 | pub fn splice_up(&mut self, loc: Loc, line: &String) { 374 | let idx = self.patterns.iter().enumerate() 375 | .find(|(_, p)| p.loc.y >= loc.y + 1) 376 | .and_then(|(n, _)| Some(n)) 377 | .unwrap_or(self.patterns.len()); 378 | let mut modified = false; 379 | // Adjust keywords 380 | self.modified[loc.y] = true; 381 | // Adjust patterns 382 | self.patterns.iter_mut() 383 | .filter(|p| p.loc.y == loc.y + 1) 384 | .for_each(|p| { 385 | p.loc.y -= 1; 386 | p.loc.x.start += loc.x; 387 | p.loc.x.end += loc.x; 388 | }); 389 | self.remove_line(loc.y + 1); 390 | // Check to see if any patterns have been created as a result 391 | if let Some(joined_char) = line.chars().nth(loc.x) { 392 | let line = line.clone(); 393 | let mut result: Option<(usize, usize, &String, PatternKind)> = None; 394 | // Find out if any new patterns have been created 395 | //println!("{loc:?} {joined_char:?} {line:?}"); 396 | if let Some((s, def)) = self.is_new_start(loc, joined_char, &line) { 397 | // A new start pattern has been created 398 | result = Some((s, def.start.len(), &def.name, PatternKind::Start)); 399 | } else if let Some((s, def)) = self.is_new_end(loc, joined_char, &line) { 400 | // A new end pattern has been created 401 | result = Some((s, def.end.len(), &def.name, PatternKind::End)); 402 | } else if let Some((s, def)) = self.is_new_hybrid(loc, joined_char, &line) { 403 | // A new hybrid pattern has been created 404 | result = Some((s, def.start.len(), &def.name, PatternKind::Hybrid)); 405 | } 406 | // If so, register 407 | if let Some((s, len, name, kind)) = result { 408 | let double_start = self.bounded_rules[name].start.len() > 1; 409 | let double_end = self.bounded_rules[name].end.len() > 1; 410 | let double = (kind == PatternKind::Start && double_start) || 411 | (kind == PatternKind::End && double_end) || 412 | (kind == PatternKind::Hybrid && double_start); 413 | if double { 414 | let pattern = Pattern { 415 | token: None, 416 | loc: RangeLoc { y: loc.y, x: s..(s + len) }, 417 | kind, 418 | of: name.to_string(), 419 | }; 420 | self.patterns.insert(idx, pattern); 421 | modified = true; 422 | } 423 | } 424 | } 425 | if modified { 426 | self.retokenize(); 427 | } else { 428 | self.build_line_ref(self.line_ref.len()); 429 | } 430 | } 431 | 432 | fn retokenize(&mut self) { 433 | let patterns = &mut self.patterns; 434 | self.tokens = Self::form_tokens(patterns); 435 | self.build_line_ref(self.line_ref.len()); 436 | } 437 | 438 | fn is_new_start(&self, loc: Loc, ch: char, line: &String) -> Option<(usize, &BoundedDef)> { 439 | self.is_new_pattern(loc, ch, line, PatternKind::Start) 440 | } 441 | 442 | fn is_new_end(&self, loc: Loc, ch: char, line: &String) -> Option<(usize, &BoundedDef)> { 443 | self.is_new_pattern(loc, ch, line, PatternKind::End) 444 | } 445 | 446 | fn is_new_hybrid(&self, loc: Loc, ch: char, line: &String) -> Option<(usize, &BoundedDef)> { 447 | self.is_new_pattern(loc, ch, line, PatternKind::Hybrid) 448 | } 449 | 450 | fn is_new_pattern(&self, loc: Loc, ch: char, line: &String, kind: PatternKind) -> Option<(usize, &BoundedDef)> { 451 | // Get all non-hybrid rules 452 | let rules = self.bounded_rules.values(); 453 | let mut result = None; 454 | // Return a match if there is one 455 | for def in rules { 456 | let pattern = match kind { 457 | PatternKind::Start => &def.start, 458 | PatternKind::End => &def.end, 459 | PatternKind::Hybrid => &def.start, 460 | }; 461 | let hybrid = def.start == def.end; 462 | // Determine if a start or end token has actually been created 463 | result = pattern.chars().enumerate() 464 | // Find locations within the start or end pattern where this character could be 465 | .filter(|(_, i)| *i == ch) 466 | // For each one, work out where the pattern would theoretically start 467 | .map(|(n, _)| loc.x.saturating_sub(n)) 468 | // Attach a corresponding end location 469 | .map(|pattern_start| (pattern_start, pattern_start + pattern.len())) 470 | // Find out if any of these candidates are actually start or end patterns 471 | .map(|(start, end)| (start, &line[start..end] == pattern)) 472 | .find(|(_, is_match)| *is_match && (!hybrid || kind == PatternKind::Hybrid)) 473 | // Link in definition 474 | .and_then(|(pattern_start, _)| Some((pattern_start, def))); 475 | if result.is_some() { 476 | break; 477 | } 478 | } 479 | result 480 | } 481 | 482 | /// Finds patterns in the provided lines 483 | /// offset will add to the y axis (useful for when you're appending lines) 484 | fn find_patterns(&mut self, offset: usize, lines: &Vec) -> Vec { 485 | let mut result = vec![]; 486 | // For each line 487 | for (mut y, line) in lines.iter().enumerate() { 488 | // Offset y tokens 489 | y += offset; 490 | // For each character 491 | let mut x = 0; 492 | while x < line.len() { 493 | // Set up line and position info 494 | let line = &line[x..]; 495 | let loc = Loc { y, x }; 496 | // Work out if there is a pattern here 497 | let pattern = self.bounded_rules.values() 498 | // Find whether this pattern is a start pattern or end pattern 499 | .map(|def| (&def.name, line.starts_with(&def.start), line.starts_with(&def.end))) 500 | // Find one that is either a start or end pattern 501 | .find(|(_, starts, ends)| *starts || *ends); 502 | // If there is, register the pattern 503 | if let Some((name, starts, ends)) = pattern { 504 | // Form the pattern 505 | let def = &self.bounded_rules[name]; 506 | let kind = match (starts, ends) { 507 | // Start pattern 508 | (true, false) => PatternKind::Start, 509 | // End pattern 510 | (false, true) => PatternKind::End, 511 | // Hybrid pattern 512 | (true, true) => PatternKind::Hybrid, 513 | // No pattern here 514 | (false, false) => unreachable!(), 515 | }; 516 | let of = def.name.clone(); 517 | let x_range = loc.x..(loc.x + def.end.len()); 518 | let range = RangeLoc { y: loc.y, x: x_range }; 519 | let pattern = Pattern { token: None, kind, loc: range, of }; 520 | result.push(pattern); 521 | // Keep searching forward 522 | x += if starts { def.start.len() } else { def.end.len() }; 523 | } else { 524 | x += 1; 525 | } 526 | } 527 | } 528 | result 529 | } 530 | 531 | /// Forms tokens based on patterns 532 | /// Ensure patterns are correctly registered before running this 533 | fn form_tokens(patterns: &mut Vec) -> Vec { 534 | let mut result = vec![]; 535 | let mut registering = false; 536 | let mut registering_kind = "".to_string(); 537 | // Run through patterns 538 | for (n, pattern) in patterns.iter_mut().enumerate() { 539 | let Pattern { of, kind, ref mut token, .. } = pattern; 540 | let len = result.len(); 541 | match (kind, registering) { 542 | // New start token 543 | (PatternKind::Start, false) => { 544 | registering = true; 545 | registering_kind = of.clone(); 546 | // Make pattern active 547 | *token = Some(len); 548 | // Put on token 549 | result.push(TokenSpan { 550 | kind: of.clone(), 551 | start: n, 552 | end: None, 553 | }); 554 | } 555 | // Corresponding end token 556 | (PatternKind::End, true) => { 557 | if *of == registering_kind { 558 | if let Some(this) = result.last_mut() { 559 | registering = false; 560 | registering_kind = "".to_string(); 561 | // Make pattern active 562 | *token = Some(len - 1); 563 | // Update end pattern in token 564 | this.end = Some(n); 565 | } 566 | } 567 | } 568 | // Opportunity to end a hybrid token 569 | (PatternKind::Hybrid, true) => { 570 | if let Some(this) = result.last_mut() { 571 | // Tokens are of the same type? 572 | if *of == this.kind { 573 | // They are, terminate this hybrid token 574 | registering = false; 575 | registering_kind = "".to_string(); 576 | // Make pattern active 577 | *token = Some(len - 1); 578 | // Update end pattern in token 579 | this.end = Some(n); 580 | } 581 | } 582 | } 583 | // Opportunity to start a new hybrid token 584 | (PatternKind::Hybrid, false) => { 585 | registering = true; 586 | registering_kind = of.clone(); 587 | // Make pattern active 588 | *token = Some(len); 589 | // Push on token 590 | result.push(TokenSpan { 591 | kind: of.clone(), 592 | start: n, 593 | end: None, 594 | }); 595 | } 596 | _ => (), 597 | } 598 | } 599 | result 600 | } 601 | 602 | fn find_keywords(&self, line: &String, y: usize) -> Vec { 603 | let mut result = vec![]; 604 | for (name, group) in &self.keyword_rules { 605 | for exp in group { 606 | result.append(&mut exp.find_iter(line) 607 | .map(|s| Keyword { 608 | loc: RangeLoc { x: s.start()..s.end(), y }, 609 | kind: name.to_string(), 610 | }) 611 | .collect()); 612 | } 613 | } 614 | result 615 | } 616 | 617 | fn build_line_ref(&mut self, max: usize) { 618 | // Refresh line reference 619 | self.line_ref = vec![]; 620 | (0..max).for_each(|_| self.line_ref.push(vec![])); 621 | // Register tokens according to the lines they span 622 | for (n, token) in self.tokens.iter().enumerate() { 623 | // Obtain start and end positions 624 | let start = self.patterns[token.start].loc.y; 625 | let end = match token.end { 626 | // Find y position of end pattern 627 | Some(end) => self.patterns[end].loc.y, 628 | // This token is a hanging token, max it out 629 | None => max - 1, 630 | }; 631 | for y in start..=end { 632 | self.line_ref[y].push(n); 633 | } 634 | } 635 | } 636 | } 637 | --------------------------------------------------------------------------------