├── .gitignore
├── Cargo.toml
├── LICENSE
├── README.md
├── example
    ├── Cargo.lock
    ├── Cargo.toml
    └── src
    │   ├── .todo.md
    │   └── main.rs
├── examples
    ├── debug.rs
    ├── example.rs
    └── trimming.rs
├── nolang.txt
└── src
    ├── lib.rs
    └── lib_old.rs


/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /Cargo.lock
3 | tests/bench/target
4 | tarpaulin-report.html
5 | *.swp
6 | /example/target
7 | examples/new_language_test.rs
8 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "synoptic"
 3 | authors = ["curlpipe <11898833+curlpipe@users.noreply.github.com>"]
 4 | version = "2.2.9"
 5 | edition = "2021"
 6 | license = "MIT"
 7 | description = "A simple, low-level, syntax highlighting library with unicode support"
 8 | repository = "https://github.com/curlpipe/synoptic"
 9 | readme = "README.md"
10 | keywords = ["unicode", "text-processing"]
11 | categories = ["text-processing"]
12 | 
13 | [dependencies]
14 | char_index = "0.1.4"
15 | if_chain = "1.0.2"
16 | nohash-hasher = "0.2.0"
17 | regex = "1.8.4"
18 | unicode-width = "0.2"
19 | 
20 | [dev-dependencies]
21 | lliw = "0.2.0"
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 curlpipe
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Synoptic
  2 | 
  3 | > Syntax highlighting for Rust applications
  4 | 
  5 | This is a pretty lightweight (only 3 main depedencies) and simple regex-based syntax highlighter for Rust. 
  6 | 
  7 | I originally wrote this for my text editor, Ox. It needed a fast, configurable and optimised syntax highlighter that could easily integrate with existing projects. However, you can (and are encouraged) to use it for any project you have in mind.
  8 | 
  9 | ---
 10 | **Advantages:**
 11 | - **Customisable** - You can highlight almost any language by adding in custom syntax highlighting rules
 12 | - **Fast** - Is reasonably fast, enough so that it won't slow your projects down, even with large files and many different rules
 13 | - **Simple** - You can get highlighting code pretty quickly (see example below)
 14 | - **Incremental** - As this was designed for use with a text editor, it can really quickly re-highlight code upon edit commands
 15 | - **Built in language rules** - Get highlighting even faster by choosing from existing syntax rules
 16 | - **File Buffering** - Synoptic doesn't need the whole file to perform a correct highlighting job, thus allowing file buffering
 17 | - **Escaping** - Will handle escaping if you need it (`"here is a quote: \" tada!"`)
 18 | - **Interpolation**  - Will handle interpolation if you need it (`"My name is {name}, nice to meet you!"`)
 19 | 
 20 | **Disadvantages:**
 21 | - **Not very well established** - There may be inconsistencies in the included pre-built language highlighting rules
 22 | - **Lacks understanding** - This will not be able to provide very detailed syntax highlighting, as no parsing is performed
 23 | - **Interpolation is limited** - You can't nest interpolated tokens like `"this is { "f{ "u" }n" }"` 
 24 | 
 25 | Despite its disadvantages, if you just want a simple syntax highlighter with no frills or excess baggage, synoptic might just be your crate.
 26 | 
 27 | ## Installation
 28 | Just add it to your `Cargo.toml`:
 29 | ```toml
 30 | [dependencies]
 31 | synoptic = "2"
 32 | ```
 33 | 
 34 | - Construct a `Highlighter` instance
 35 | - Add regular expressions and keywords to the highlighter and assign each a name
 36 | - Use the `run` method to generate tokens
 37 | - Use the `line` method to obtain the tokens for each line
 38 | 
 39 | ## Built-in languages
 40 | 
 41 | You can also use some provided syntax highlighters for various popular languages using the `from_extension` function.
 42 | There is highly likely to be inconsistencies in the existing rules, please do open an issue if you spot any.
 43 | 
 44 | Currently, synoptic includes
 45 | 
 46 | - [x] Various Higher Level Languages: Python, Ruby, Lua, Perl, Java, Visual Basic, Scala
 47 | - [x] The C Family: C, C++, C#
 48 | - [x] Various Lower Level Languages: Rust, Go, Assembly
 49 | - [x] Web Technologies: HTML, CSS, PHP, Javascript, JSON, TypeScript
 50 | - [x] Mathematical Languages: MATLAB, R, Haskell, Prolog
 51 | - [x] Moblie Development: Kotlin, Swift, Dart
 52 | - [x] Markup Languages: Markdown, YAML, TOML, XML, CSV
 53 | - [x] Other: SQL, Bash, Nushell
 54 | 
 55 | Open an issue if there is a language not yet supported, or if you notice any issues in the built-in syntax highlighting rules.
 56 | 
 57 | ## Example
 58 | 
 59 | Here's an example of a Rust syntax highlighter, using the lliw crate.
 60 | 
 61 | ```rust
 62 | use synoptic::{Highlighter, TokOpt};
 63 | use lliw::Fg;
 64 | 
 65 | // Let's use some demonstration code
 66 | pub static CODE: &str = "\
 67 | /*
 68 | Multiline comments
 69 | Work great
 70 | */
 71 | 
 72 | pub fn main() -> bool {
 73 |     // Demonstrate syntax highlighting in Rust!
 74 |     println!(\"Full Unicode Support: 你好\");
 75 |     // Interpolation
 76 |     let name = \"peter\";
 77 |     println!(\"My name is {name}, nice to meet you!\");
 78 |     // Bye!
 79 |     return true;
 80 | }
 81 | ";
 82 | 
 83 | fn main() {
 84 |     // Setting up the highlighter
 85 |     // The `4` here just means tabs are shown as 4 spaces
 86 |     let mut h = Highlighter::new(4);
 87 |     
 88 |     // Bounded tokens are multiline tokens
 89 |     // Let's define multiline comments
 90 |     // In rust, these start with /* and end with */
 91 |     // Remember to escape any regex characters (like *)
 92 |     // The false here is whether or not to allow escaping
 93 |     // When true, we ignore any end markers with a backslash in front of them
 94 |     // So, if it were true: `/* this is a comment \*/ this is still a comment */ this isn't`
 95 |     h.bounded("comment", r"/\*", r"\*/", false);
 96 |     
 97 |     // Now let's define a string
 98 |     // In rust, format strings can be interpolated into between {}
 99 |     // We first define the name of the token, the starting and ending pattern
100 |     // Then the starting and ending pattern of the interpolation section
101 |     // We also want strings to be escapable e.g. "here's a quote: \" this is still a string"
102 |     // Hence the true
103 |     h.bounded_interp("string", "\"", "\"", "\\{", "\\}", true);
104 |     
105 |     // Now let's define some keywords
106 |     // These are single line snippets of text
107 |     h.keyword("keyword", r"\b(pub|fn|bool|let|return)\b");
108 |     
109 |     // Let's get numbers being highlighted
110 |     h.keyword("digits", r"\b\d+\.(?:\.\d+)\b");
111 |     
112 |     // ... and some remaining syntax rules
113 |     h.keyword("comment", "(//.*)$");
114 |     h.keyword("boolean", r"\b(true|false)\b");
115 |     h.keyword("macros", "[a-zA-Z_]+\\!");
116 |     h.keyword("function", r"([a-z][a-zA-Z_]*)\s*\(");
117 |     
118 |     // Now let's run the highlighter on the example code
119 |     // The run method takes a vector of strings (for each line)
120 |     let code = CODE
121 |         .split('\n')
122 |         .map(|line| line.to_string())
123 |         .collect();
124 |     // Now we're ready to go
125 |     h.run(&code);
126 |     
127 |     // Let's render the output
128 |     for (line_number, line) in code.iter().enumerate() {
129 |         // Line returns tokens for the corresponding line
130 |         for token in h.line(line_number, &line) {
131 |             // Tokens can either require highlighting or not require highlighting
132 |             match token {
133 |                 // This is some text that needs to be highlighted
134 |                 TokOpt::Some(text, kind) => print!("{}{text}{}", colour(&kind), Fg::Reset),
135 |                 // This is just normal text with no highlighting
136 |                 TokOpt::None(text) => print!("{text}"),
137 |             }
138 |         }
139 |         // Insert a newline at the end of every line
140 |         println!();
141 |     }
142 | }
143 | 
144 | fn colour(name: &str) -> Fg {
145 |     // This function will take in the function name
146 |     // And it will output the correct foreground colour
147 |     match name {
148 |         "comment" => Fg::LightBlack,
149 |         "digit" => Fg::Purple,
150 |         "string" => Fg::Green,
151 |         "macros" => Fg::LightPurple,
152 |         "boolean" => Fg::Blue,
153 |         "keyword" => Fg::Yellow,
154 |         "function" => Fg::Red,
155 |         _ => panic!("unknown token name"),
156 |     }
157 | }
158 | 
159 | ```
160 | 
161 | That will render a result similar to this (depending on your terminal's colour scheme):
162 | 
163 | ![](https://i.postimg.cc/0QJTsMbf/image.png)
164 | 
165 | ## License
166 | `MIT` license to ensure that you can use it in your project
167 | 
168 | you can check the `LICENSE` file for more info
169 | 
170 | 
171 | 


--------------------------------------------------------------------------------
/example/Cargo.lock:
--------------------------------------------------------------------------------
 1 | # This file is automatically @generated by Cargo.
 2 | # It is not intended for manual editing.
 3 | version = 3
 4 | 
 5 | [[package]]
 6 | name = "aho-corasick"
 7 | version = "1.0.2"
 8 | source = "registry+https://github.com/rust-lang/crates.io-index"
 9 | checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
10 | dependencies = [
11 |  "memchr",
12 | ]
13 | 
14 | [[package]]
15 | name = "char_index"
16 | version = "0.1.4"
17 | source = "registry+https://github.com/rust-lang/crates.io-index"
18 | checksum = "10ef8669476802b7127a0a97612a0c34113e949ee65c695e4ac259f1f49aaa25"
19 | 
20 | [[package]]
21 | name = "example"
22 | version = "0.1.0"
23 | dependencies = [
24 |  "lliw",
25 |  "synoptic",
26 | ]
27 | 
28 | [[package]]
29 | name = "if_chain"
30 | version = "1.0.2"
31 | source = "registry+https://github.com/rust-lang/crates.io-index"
32 | checksum = "cb56e1aa765b4b4f3aadfab769793b7087bb03a4ea4920644a6d238e2df5b9ed"
33 | 
34 | [[package]]
35 | name = "lliw"
36 | version = "0.2.0"
37 | source = "registry+https://github.com/rust-lang/crates.io-index"
38 | checksum = "2d502c8bcc35a4f7ca9a7ffb7ac27b15ba30b1b92c2d69a1e4437e2635d73af7"
39 | 
40 | [[package]]
41 | name = "memchr"
42 | version = "2.5.0"
43 | source = "registry+https://github.com/rust-lang/crates.io-index"
44 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
45 | 
46 | [[package]]
47 | name = "regex"
48 | version = "1.9.1"
49 | source = "registry+https://github.com/rust-lang/crates.io-index"
50 | checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575"
51 | dependencies = [
52 |  "aho-corasick",
53 |  "memchr",
54 |  "regex-automata",
55 |  "regex-syntax",
56 | ]
57 | 
58 | [[package]]
59 | name = "regex-automata"
60 | version = "0.3.3"
61 | source = "registry+https://github.com/rust-lang/crates.io-index"
62 | checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310"
63 | dependencies = [
64 |  "aho-corasick",
65 |  "memchr",
66 |  "regex-syntax",
67 | ]
68 | 
69 | [[package]]
70 | name = "regex-syntax"
71 | version = "0.7.4"
72 | source = "registry+https://github.com/rust-lang/crates.io-index"
73 | checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
74 | 
75 | [[package]]
76 | name = "synoptic"
77 | version = "2.2.7"
78 | dependencies = [
79 |  "char_index",
80 |  "if_chain",
81 |  "regex",
82 |  "unicode-width",
83 | ]
84 | 
85 | [[package]]
86 | name = "unicode-width"
87 | version = "0.2.0"
88 | source = "registry+https://github.com/rust-lang/crates.io-index"
89 | checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd"
90 | 


--------------------------------------------------------------------------------
/example/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "example"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | [profile.release]
 7 | codegen-units = 1
 8 | lto = "fat"
 9 | 
10 | [dependencies]
11 | lliw = "0.2.0"
12 | synoptic = { path = "../" }
13 | 


--------------------------------------------------------------------------------
/example/src/.todo.md:
--------------------------------------------------------------------------------
 1 | - [x] Hanging tokens
 2 | - [x] Split by line
 3 | - [x] Coloured output
 4 | - [x] Implement into cactus for interactive testing
 5 | - [x] Work with line buffering [EASY]
 6 | - [x] Single Line Regex tokens [HARD]
 7 | - [x] Unicode support [MEDIUM]
 8 | - [x] Allow for insertion
 9 | - [x] Allow for deletion
10 | - [x] Implement line trimming [MEDIUM]
11 | - [x] Cleaner API & proper documentation [MEDIUM]
12 | - [x] Optimise [HARD]
13 |     - [x] More efficient retokenization on append
14 |     - [x] Only retokenize when necessary
15 | - [x] Stop empty tokens from being generated (with regex) [EASY]
16 | - [x] Allow bounded token escaping (\") 
17 | - [x] Allow interpolation ("{interpolated}")
18 | - [x] Investigate and solve weird multiline bounded token bug (see multiline blocks in markdown)
19 | - [x] Default syntax highlighters
20 |     - [x] JavaScript
21 |     - [x] JSON
22 |     - [x] Java
23 |     - [x] Markdown
24 |     - [x] TOML
25 |     - [x] Yaml
26 |     - [x] XML
27 |     - [x] CSV
28 |     - [x] Kotlin
29 |     - [x] Swift
30 |     - [x] C
31 |     - [x] C++
32 |     - [x] C#
33 |     - [x] R
34 |     - [x] Go
35 |     - [x] PHP
36 |     - [x] Python
37 |     - [x] HTML
38 |     - [x] CSS
39 |     - [x] SQL
40 |     - [x] BASH
41 |     - [x] Lua
42 |     - [x] Rust
43 |     - [x] TypeScript
44 |     - [x] Ruby
45 |     - [x] Dart
46 |     - [x] MATLAB
47 |     - [x] Assembly
48 |     - [x] Perl
49 |     - [x] Visual Basic
50 |     - [x] Scala
51 |     - [x] Prolog
52 |     - [x] Haskell
53 | 


--------------------------------------------------------------------------------
/example/src/main.rs:
--------------------------------------------------------------------------------
  1 | use lliw::Fg;
  2 | use synoptic::{Highlighter, TokOpt};
  3 | use std::time::Instant;
  4 | 
  5 | pub static CODE: &str = "\
  6 | /*
  7 | Multiline comments
  8 | Work great
  9 | */
 10 | 
 11 | pub fn main() -> bool {
 12 | 	// Demonstrate syntax highlighting in Rust!
 13 | 	println!(\"Full Unicode Support: 你好\");
 14 |     // Interpolation
 15 |     let name = \"peter\";
 16 |     println!(\"My name is {name}, nice to meet you!\");
 17 |     // Bye!
 18 | 	return true;
 19 | }
 20 | ";
 21 | 
 22 | fn main() {
 23 |     benchmark();
 24 |     /*
 25 |     let mut code: Vec<String> = CODE.split('\n').map(|x| x.to_string()).collect();
 26 |     let mut h = synoptic::from_extension("rs", 4).unwrap();
 27 |     h.run(&code);
 28 |     for (y, line) in code.iter().enumerate() {
 29 |         print!("{: <3} |", y);
 30 |         for token in h.line(y, &line) {
 31 |             match token {
 32 |                 TokOpt::Some(text, kind) => print!("{}{text}{}", colour(&kind), Fg::Reset),
 33 |                 TokOpt::None(text) => print!("{text}"),
 34 |             }
 35 |         }
 36 |         println!();
 37 |     }
 38 |     */
 39 | }
 40 | 
 41 | fn colour(kind: &str) -> Fg {
 42 |     match kind {
 43 |         "string" => Fg::Rgb(54, 161, 102),
 44 |         "boolean" => Fg::Rgb(54, 161, 102),
 45 |         "comment" => Fg::Rgb(108, 107, 90),
 46 |         "digit" => Fg::Rgb(157, 108, 124),
 47 |         "keyword" => Fg::Rgb(91, 157, 72),
 48 |         "attribute" => Fg::Rgb(95, 145, 130),
 49 |         "character" => Fg::Rgb(125, 151, 38),
 50 |         "namespace" => Fg::Rgb(125, 151, 38),
 51 |         "struct" => Fg::Rgb(125, 151, 38),
 52 |         "operator" => Fg::Rgb(125, 151, 38),
 53 |         "header" => Fg::Rgb(54, 161, 102),
 54 |         "reference" => Fg::Rgb(125, 151, 38),
 55 |         "type" => Fg::Rgb(165, 152, 13),
 56 |         "function" => Fg::Rgb(174, 115, 19),
 57 |         "macro" => Fg::Rgb(157, 108, 124),
 58 |         "heading" => Fg::Rgb(174, 115, 19),
 59 |         "tag" => Fg::Rgb(174, 115, 19),
 60 |         "bold" => Fg::Rgb(157, 108, 124),
 61 |         "strikethrough" => Fg::Rgb(54, 161, 102),
 62 |         "italic" => Fg::Rgb(125, 151, 38),
 63 |         "block" => Fg::Rgb(125, 151, 38),
 64 |         "table" => Fg::Rgb(125, 151, 38),
 65 |         "type" => Fg::Rgb(165, 152, 13),
 66 |         "linebreak" => Fg::Rgb(54, 161, 102),
 67 |         "math" => Fg::Rgb(54, 161, 102),
 68 |         "footnote" => Fg::Rgb(108, 107, 90),
 69 |         "quote" => Fg::Rgb(157, 108, 124),
 70 |         "list" => Fg::Rgb(91, 157, 72),
 71 |         "image" => Fg::Rgb(125, 151, 38),
 72 |         "link" => Fg::Rgb(165, 152, 13),
 73 |         "key" => Fg::Rgb(165, 152, 13),
 74 |         _ => panic!("Unknown token name {kind}"),
 75 |     }
 76 | }
 77 | 
 78 | fn benchmark() {
 79 |     let start = Instant::now();
 80 |     let mut h = synoptic::from_extension("rs", 4).unwrap();
 81 |     let end = Instant::now();
 82 |     println!("Initialisation time: {:?}", end - start);
 83 | 
 84 |     let mut file  = std::fs::read_to_string("/home/luke/dev/rust/kaolinite/demos/8.rs").unwrap().split('\n').map(|x| x.to_string()).collect::<Vec<String>>();
 85 |     let viewport_file1 = file.iter().take(10).cloned().collect::<Vec<String>>();
 86 |     let viewport_file2 = file.iter().take(100).cloned().collect::<Vec<String>>();
 87 |     let viewport_file3 = file.iter().take(1000).cloned().collect::<Vec<String>>();
 88 | 
 89 |     let start = Instant::now();
 90 |     h.run(&viewport_file1);
 91 |     let end = Instant::now();
 92 |     println!("Run time ({}): {:?}", 10, end - start);
 93 |     let start = Instant::now();
 94 |     h.run(&viewport_file2);
 95 |     let end = Instant::now();
 96 |     println!("Run time ({}): {:?}", 100, end - start);
 97 |     let start = Instant::now();
 98 |     h.run(&viewport_file3);
 99 |     let end = Instant::now();
100 |     println!("Run time ({}): {:?}", 1000, end - start);
101 |     let start = Instant::now();
102 |     h.run(&file);
103 |     let end = Instant::now();
104 |     println!("Run time ({}): {:?}", file.len(), end - start);
105 | 
106 |     let mut h = synoptic::from_extension("rs", 4).unwrap();
107 |     //file[9996] = "/*".to_string();
108 |     h.run(&file);
109 | 
110 |     for (mut y, line) in file.iter().skip(9996).take(7).enumerate() {
111 |         y += 9996;
112 |         print!("{: <3} |", y);
113 |         for token in h.line(y, &line) {
114 |             match token {
115 |                 TokOpt::Some(text, kind) => print!("{}{text}{}", colour(&kind), Fg::Reset),
116 |                 TokOpt::None(text) => print!("{text}"),
117 |             }
118 |         }
119 |         println!();
120 |     }
121 | 
122 |     let start = Instant::now();
123 | 
124 |     h.edit(10000, &"/* this is a test pub  */ pub fn egg() return 3 + 4".to_string());
125 |     file[10000] = "/* this is a test pub  */ pub fn egg() return 3 + 4".to_string();
126 |     h.edit(10004, &"We are all living in a simulation".to_string());
127 |     file[10004] = "We are all living in a simulation".to_string();
128 |     for i in 1..10000 {
129 |         h.edit(i, &file[i+1]);
130 |         file[i] = file[i+1].clone()
131 |     }
132 | 
133 |     for (mut y, line) in file.iter().skip(9996).take(7).enumerate() {
134 |         y += 9996;
135 |         print!("{: <3} |", y);
136 |         for token in h.line(y, &line) {
137 |             match token {
138 |                 TokOpt::Some(text, kind) => print!("{}{text}{}", colour(&kind), Fg::Reset),
139 |                 TokOpt::None(text) => print!("{text}"),
140 |             }
141 |         }
142 |         println!();
143 |     }
144 | 
145 |     let end = Instant::now();
146 |     println!("Edit time: {:?}", end - start);
147 | }
148 | 


--------------------------------------------------------------------------------
/examples/debug.rs:
--------------------------------------------------------------------------------
 1 | use synoptic::{Highlighter, TokOpt, trim_fit};
 2 | use lliw::Fg;
 3 | 
 4 | pub static CODE: &str = r#"f"""#;
 5 | 
 6 | fn main() {
 7 |     let mut h = synoptic::from_extension("py", 4).unwrap();
 8 |     let mut code: Vec<String> = CODE.split('\n').map(|x| x.to_string()).collect();
 9 |     h.run(&code);
10 |     // Initial state
11 |     for token in &h.line(0, &code[0]) {
12 |         match token {
13 |             TokOpt::Some(text, kind) => print!("{}{text}{}", colour(&kind), Fg::Reset),
14 |             TokOpt::None(text) => print!("{text}"),
15 |         }
16 |     }
17 |     println!();
18 |     // Try changing it
19 |     code[0] = r#"f"{}""#.to_string();
20 |     h.edit(0, &code[0]);
21 |     // Observe incorrect new state
22 |     for token in &h.line(0, &code[0]) {
23 |         match token {
24 |             TokOpt::Some(text, kind) => print!("{}{text}{}", colour(&kind), Fg::Reset),
25 |             TokOpt::None(text) => print!("{text}"),
26 |         }
27 |     }
28 |     println!();
29 | }
30 | 
31 | fn colour(name: &str) -> Fg {
32 |     // This function will take in the function name
33 |     // And it will output the correct foreground colour
34 |     match name {
35 |         "comment" => Fg::LightBlack,
36 |         "digit" => Fg::Purple,
37 |         "string" => Fg::Green,
38 |         "macros" => Fg::LightPurple,
39 |         "boolean" => Fg::Blue,
40 |         "keyword" => Fg::Yellow,
41 |         "function" => Fg::Red,
42 |         "operator" => Fg::LightBlack,
43 |         "link" => Fg::LightBlue,
44 |         "list" => Fg::Green,
45 |         "insertion" => Fg::Green,
46 |         "deletion" => Fg::Red,
47 |         "reference" => Fg::Purple,
48 |         _ => panic!("unknown token {name}"),
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/examples/example.rs:
--------------------------------------------------------------------------------
 1 | use synoptic::{Highlighter, TokOpt};
 2 | use lliw::Fg;
 3 | 
 4 | // Let's use some demonstration code
 5 | pub static CODE: &str = "\
 6 | /*
 7 | Multiline comments
 8 | Work great
 9 | */
10 | 
11 | pub fn main() -> bool {
12 | 	// Demonstrate syntax highlighting in Rust!
13 | 	println!(\"Full Unicode Support: 你好\");
14 |     // Interpolation
15 |     let name = \"peter\";
16 |     println!(\"My name is {name}, nice to meet you!\");
17 |     // Bye!
18 | 	return true;
19 | }
20 | ";
21 | 
22 | fn main() {
23 |     // Setting up the highlighter
24 |     // The `4` here just means tabs are shown as 4 spaces
25 |     let mut h = Highlighter::new(4);
26 | 
27 |     // Bounded tokens are multiline tokens
28 |     // Let's define multiline comments
29 |     // In rust, these start with /* and end with */
30 |     // Remember to escape any regex characters (like *)
31 |     // The false here is whether or not to allow escaping
32 |     // When true, we ignore any end markers with a backslash in front of them
33 |     // So, if it were true: `/* this is a comment \*/ this is still a comment */ this isn't`
34 |     h.bounded("comment", r"/\*", r"\*/", false);
35 | 
36 |     // Now let's define a string
37 |     // In rust, format strings can be interpolated into between {}
38 |     // We first define the name of the token, the starting and ending pattern
39 |     // Then the starting and ending pattern of the interpolation section
40 |     // We also want strings to be escapable e.g. "here's a quote: \" this is still a string"
41 |     // Hence the true
42 |     h.bounded_interp("string", "\"", "\"", "\\{", "\\}", true);
43 | 
44 |     // Now let's define some keywords
45 |     // These are single line snippets of text
46 |     h.keyword("keyword", r"\b(pub|fn|bool|let|return)\b");
47 | 
48 |     // Let's get numbers being highlighted
49 |     h.keyword("digits", r"\b\d+\.(?:\.\d+)\b");
50 | 
51 |     // ... and some remaining syntax rules
52 |     h.keyword("comment", "(//.*)$");
53 |     h.keyword("boolean", r"\b(true|false)\b");
54 |     h.keyword("macros", "[a-zA-Z_]+\\!");
55 |     h.keyword("function", r"([a-z][a-zA-Z_]*)\s*\(");
56 | 
57 |     // Now let's run the highlighter on the example code
58 |     // The run method takes a vector of strings (for each line)
59 |     let code = CODE
60 |         .split('\n')
61 |         .map(|line| line.to_string())
62 |         .collect();
63 |     // Now we're ready to go
64 |     h.run(&code);
65 | 
66 |     // Let's render the output
67 |     for (line_number, line) in code.iter().enumerate() {
68 |         // Line returns tokens for the corresponding line
69 |         for token in h.line(line_number, &line) {
70 |             // Tokens can either require highlighting or not require highlighting
71 |             match token {
72 |                 // This is some text that needs to be highlighted
73 |                 TokOpt::Some(text, kind) => print!("{}{text}{}", colour(&kind), Fg::Reset),
74 |                 // This is just normal text with no highlighting
75 |                 TokOpt::None(text) => print!("{text}"),
76 |             }
77 |         }
78 |         // Insert a newline at the end of every line
79 |         println!();
80 |     }
81 | }
82 | 
83 | fn colour(name: &str) -> Fg {
84 |     // This function will take in the function name
85 |     // And it will output the correct foreground colour
86 |     match name {
87 |         "comment" => Fg::LightBlack,
88 |         "digit" => Fg::Purple,
89 |         "string" => Fg::Green,
90 |         "macros" => Fg::LightPurple,
91 |         "boolean" => Fg::Blue,
92 |         "keyword" => Fg::Yellow,
93 |         "function" => Fg::Red,
94 |         _ => panic!("unknown token name"),
95 |     }
96 | }
97 | 


--------------------------------------------------------------------------------
/examples/trimming.rs:
--------------------------------------------------------------------------------
 1 | use synoptic::{Highlighter, TokOpt, trim};
 2 | use lliw::Fg;
 3 | 
 4 | pub static CODE: &str = r#"
 5 |     arst的st了st在st为sts
 6 |   art的st了st在st为sts
 7 | hello world!
 8 | "#;
 9 | 
10 | fn main() {
11 |     let mut h = synoptic::from_extension("diff", 4).unwrap();
12 |     let mut code: Vec<String> = CODE.split('\n').map(|x| x.to_string()).collect();
13 |     h.run(&code);
14 |     // Trim and render
15 |     for length in 0..30 {
16 |         for (line_no, line) in code.iter().enumerate() {
17 |             let tokens = h.line(line_no, &line);
18 |             let tokens = trim(&tokens, 0, length, 4);
19 |             for token in &tokens {
20 |                 // Tokens can either require highlighting or not require highlighting
21 |                 match token {
22 |                     // This is some text that needs to be highlighted
23 |                     TokOpt::Some(text, kind) => print!("{}{text}{}", colour(&kind), Fg::Reset),
24 |                     // This is just normal text with no highlighting
25 |                     TokOpt::None(text) => print!("{text}"),
26 |                 }
27 |             }
28 |             println!("|");
29 |         }
30 |     }
31 | }
32 | 
33 | fn colour(name: &str) -> Fg {
34 |     // This function will take in the function name
35 |     // And it will output the correct foreground colour
36 |     match name {
37 |         "comment" => Fg::LightBlack,
38 |         "digit" => Fg::Purple,
39 |         "string" => Fg::Green,
40 |         "macros" => Fg::LightPurple,
41 |         "boolean" => Fg::Blue,
42 |         "keyword" => Fg::Yellow,
43 |         "function" => Fg::Red,
44 |         "operator" => Fg::LightBlack,
45 |         "link" => Fg::LightBlue,
46 |         "list" => Fg::Green,
47 |         "insertion" => Fg::Green,
48 |         "deletion" => Fg::Red,
49 |         "reference" => Fg::Purple,
50 |         _ => panic!("unknown token {name}"),
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/nolang.txt:
--------------------------------------------------------------------------------
 1 | Total: 36 languages
 2 | Languages known to not be supported yet: 14
 3 | 
 4 | Zsh
 5 | Vala
 6 | Julia
 7 | Ini
 8 | Haml
 9 | Fortran
10 | Erlang
11 | Dockerfile
12 | D
13 | Crystal
14 | Clojure
15 | Cobol
16 | Batch file
17 | Ada
18 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
   1 | use unicode_width::UnicodeWidthStr;
   2 | pub use regex::Regex;
   3 | use std::collections::HashMap;
   4 | use std::ops::Range;
   5 | use std::cmp::Ordering;
   6 | use char_index::IndexedChars;
   7 | use nohash_hasher::NoHashHasher;
   8 | use std::hash::BuildHasherDefault;
   9 | use std::sync::OnceLock;
  10 | 
  11 | /// Represents a point in a 2d space
  12 | #[derive(Debug, Clone, PartialEq)]
  13 | pub struct Loc {
  14 |     y: usize,
  15 |     x: usize,
  16 | }
  17 | 
  18 | /// A definition of an Atom
  19 | /// See [Atom] for more information
  20 | #[derive(Debug, Clone)]
  21 | pub struct AtomDef {
  22 |     /// Name of the atom
  23 |     name: String,
  24 |     /// The kind of atom
  25 |     kind: AtomKind,
  26 |     /// The corresponding bounded token definition
  27 |     tok: Option<usize>,
  28 |     /// The regex expression that defines this atom
  29 |     exp: Regex,
  30 | }
  31 | 
  32 | /// The kind of atom being represented
  33 | #[derive(Debug, Clone, PartialEq)]
  34 | pub enum AtomKind {
  35 |     /// This is the start atom of a token, for example /* for a multiline comment
  36 |     Start,
  37 |     /// This is the end atom of a token, for example */ for a multiline comment
  38 |     End,
  39 |     /// Sometimes bounded tokens have the same start and end atom, e.g. a string having a " to
  40 |     /// start and an " to end, a hybrid token allows atoms to be used to start and end a token in
  41 |     /// cases where due to having the same start and end atom definitions, their kind is ambiguous
  42 |     Hybrid,
  43 |     /// This is just a normal keyword
  44 |     Keyword,
  45 |     /// This is a start marker for interpolation
  46 |     InterpolateStart,
  47 |     /// This is an end marker for interpolation
  48 |     InterpolateEnd,
  49 | }
  50 | 
  51 | /// An atom is a portion of text within a document that is significant. 
  52 | /// An atom only covers one line.
  53 | /// Atoms cover keywords as well as start and end indicators for bounded tokens
  54 | /// E.g., in a string, the atoms would be the starting " and the ending "
  55 | #[derive(Debug, Clone, PartialEq)]
  56 | pub struct Atom {
  57 |     /// Name of the atom
  58 |     name: String,
  59 |     /// The kind of atom
  60 |     kind: AtomKind,
  61 |     /// The corresponding token
  62 |     tok: Option<usize>,
  63 |     /// The range covered by the atom
  64 |     x: Range<usize>,
  65 |     /// Whether or not there is a preceding backslash
  66 |     backslashed: bool,
  67 | }
  68 | 
  69 | /// Definition for a bounded token, these are tokens that can cover multiple lines.
  70 | /// Things like multiline comments and strings are examples of this.
  71 | /// They work well for buffering files where you are unaware of where the end indicator may be as
  72 | /// it occurs further down in the file.
  73 | #[derive(Debug, Clone)]
  74 | pub struct BoundedDef {
  75 |     /// Whether or not this token can be escaped
  76 |     escapable: bool,
  77 | }
  78 | 
  79 | /// This is a TokenRef, which contains detailed information on what a token is
  80 | #[derive(Debug, Clone, PartialEq)]
  81 | pub enum TokenRef {
  82 |     /// Keyword tokens
  83 |     Keyword {
  84 |         /// The name of the bounded token
  85 |         name: String,
  86 |         /// A reference to the keyword atom
  87 |         atom: Loc,
  88 |     },
  89 |     /// Bounded tokens
  90 |     Bounded {
  91 |         /// The name of the bounded token
  92 |         name: String,
  93 |         /// A reference to the start atom
  94 |         start: Loc,
  95 |         /// A reference to the end atom
  96 |         end: Option<Loc>,
  97 |     },
  98 | }
  99 | 
 100 | /// This is an enum for representing tokens.
 101 | #[derive(Debug, Clone)]
 102 | pub enum TokOpt {
 103 |     /// The Some variant represents a token being present in the format Some(TEXT, NAME).
 104 |     ///
 105 |     /// So for a comment token, you can expect to see Some("/* comment */", "comment")
 106 |     /// provided that you defined the comment using either the keyword or bounded function on
 107 |     /// [Highlighter]
 108 |     Some(String, String),
 109 |     /// The None variant represents just plain text.
 110 |     None(String),
 111 | }
 112 | 
 113 | impl TokOpt {
 114 |     /// Works out if this token is empty, and thus redundant
 115 |     pub fn is_empty(&self) -> bool {
 116 |         let (TokOpt::Some(text, _) | TokOpt::None(text)) = self;
 117 |         text.len() == 0
 118 |     }
 119 | 
 120 |     /// Finds the text of a tokopt
 121 |     pub fn text(&self) -> &String {
 122 |         let (TokOpt::Some(text, _) | TokOpt::None(text)) = self;
 123 |         text
 124 |     }
 125 | 
 126 |     /// Finds the text of a tokopt (mutable)
 127 |     pub fn text_mut(&mut self) -> &mut String {
 128 |         let (TokOpt::Some(ref mut text, _) | TokOpt::None(ref mut text)) = self;
 129 |         text
 130 |     }
 131 | 
 132 |     /// This will remove the first character from the end of this token
 133 |     pub fn nibble_front(&mut self, tab_width: usize) -> Option<char> {
 134 |         let (TokOpt::Some(ref mut text, _) | TokOpt::None(ref mut text)) = self;
 135 |         let ch = text.chars().nth(0)?;
 136 |         text.remove(0);
 137 |         let wid = width(&ch.to_string(), tab_width);
 138 |         if wid > 1 {
 139 |             *text = format!("{}{text}", " ".repeat(wid.saturating_sub(1)));
 140 |         }
 141 |         Some(ch)
 142 |     }
 143 | 
 144 |     /// This will remove the last character from the end of this token
 145 |     pub fn nibble_back(&mut self, tab_width: usize) -> Option<char> {
 146 |         let (TokOpt::Some(ref mut text, _) | TokOpt::None(ref mut text)) = self;
 147 |         let ch = text.chars().last()?;
 148 |         text.pop();
 149 |         let wid = width(&ch.to_string(), tab_width);
 150 |         if wid > 1 {
 151 |             *text = format!("{text}{}", " ".repeat(wid.saturating_sub(1)));
 152 |         }
 153 |         Some(ch)
 154 |     }
 155 | 
 156 |     pub fn skip(&mut self, idx: usize, tab_width: usize) {
 157 |         let mut at_disp = 0;
 158 |         let mut at_char = 0;
 159 |         let mut padding = 0;
 160 |         for i in self.text().chars() {
 161 |             match at_disp.cmp(&idx) {
 162 |                 // Exactly at index, skip up to this point
 163 |                 Ordering::Equal => break,
 164 |                 // We skipped too much, indicating that padding is needed
 165 |                 Ordering::Greater => {
 166 |                     padding = at_disp - idx;
 167 |                     break;
 168 |                 }
 169 |                 _ => {
 170 |                     at_disp += width(&i.to_string(), tab_width);
 171 |                     at_char += 1;
 172 |                 }
 173 |             }
 174 |         }
 175 |         *self.text_mut() = " ".repeat(padding) + &self.text().chars().skip(at_char).collect::<String>();
 176 |     }
 177 | 
 178 |     pub fn take(&mut self, idx: usize, tab_width: usize) {
 179 |         let mut at_disp = 0;
 180 |         let mut at_char = 0;
 181 |         let mut padding = 0;
 182 |         for i in self.text().chars() {
 183 |             match at_disp.cmp(&idx) {
 184 |                 // Exactly at index, take up to this point
 185 |                 Ordering::Equal => break,
 186 |                 // We took too much, indicating that padding is needed
 187 |                 Ordering::Greater => {
 188 |                     padding = at_disp - idx;
 189 |                     at_char -= 1;
 190 |                     break;
 191 |                 }
 192 |                 _ => {
 193 |                     at_disp += width(&i.to_string(), tab_width);
 194 |                     at_char += 1;
 195 |                 }
 196 |             }
 197 |         }
 198 |         *self.text_mut() = self.text().chars().take(at_char).collect::<String>() + &" ".repeat(padding);
 199 |     }
 200 | }
 201 | 
 202 | /// This is the main struct that will highlight your document
 203 | #[derive(Debug, Clone)]
 204 | pub struct Highlighter {
 205 |     /// The list of atoms, encapsulated within an inner vector for atoms on the same line
 206 |     pub atoms: Vec<Vec<Atom>>,
 207 |     /// The list of atom definitions to be used at atomization
 208 |     pub atom_def: Vec<AtomDef>,
 209 |     /// The list of bounded definitions to be used at tokenization
 210 |     pub bounded_def: Vec<BoundedDef>,
 211 |     /// A reference to what tokens lie on which line numbers
 212 |     pub line_ref: Vec<Vec<usize>>,
 213 |     /// A list of the resulting tokens generated from run and append
 214 |     pub tokens: Vec<TokenRef>,
 215 |     /// How many spaces a tab character should be
 216 |     pub tab_width: usize,
 217 |     /// For purposes of tokenization
 218 |     tokenize_state: Option<usize>,
 219 |     tokenize_interp: bool,
 220 | }
 221 | 
 222 | impl Highlighter {
 223 |     /// Creates a new highlighter
 224 |     pub fn new(tab_width: usize) -> Self {
 225 |         Self {
 226 |             atoms: vec![],
 227 |             atom_def: vec![],
 228 |             bounded_def: vec![],
 229 |             line_ref: vec![],
 230 |             tokens: vec![],
 231 |             tab_width,
 232 |             tokenize_state: None,
 233 |             tokenize_interp: false,
 234 |         }
 235 |     }
 236 | 
 237 |     /// Register a new keyword token, provide its name and regex
 238 |     pub fn keyword<S: Into<String>>(&mut self, name: S, exp: &str) {
 239 |         let name = name.into();
 240 |         let exp = Regex::new(exp).expect("Invalid regex!");
 241 |         self.atom_def.push(AtomDef { name, exp, kind: AtomKind::Keyword, tok: None });
 242 |     }
 243 |     
 244 |     /// Register a new bounded token, with a start and end, 
 245 |     /// e.g. a multiline comment having starting /* and an ending */ to delimit it
 246 |     /// The last argument is a boolean
 247 |     /// when true, tokens can be escaped with a backslash e.g. "\"" would be a string of a quote
 248 |     pub fn bounded<S: Into<String>>(&mut self, name: S, start: S, end: S, escapable: bool) {
 249 |         let (name, start, end) = (name.into(), start.into(), end.into());
 250 |         // Gather atom information
 251 |         let start_exp = Regex::new(&start).expect("Invalid start regex");
 252 |         let end_exp = Regex::new(&end).expect("Invalid end regex");
 253 |         let hybrid = start == end;
 254 |         // Register bounded definition
 255 |         let idx = self.bounded_def.len();
 256 |         self.bounded_def.push(BoundedDef { 
 257 |             escapable,
 258 |         });
 259 |         // Register atom definitions
 260 |         if hybrid {
 261 |             self.atom_def.push(AtomDef { 
 262 |                 name,
 263 |                 exp: start_exp,
 264 |                 kind: AtomKind::Hybrid,
 265 |                 tok: Some(idx),
 266 |             });
 267 |         } else {
 268 |             self.atom_def.push(AtomDef { 
 269 |                 name: name.clone(),
 270 |                 exp: start_exp,
 271 |                 kind: AtomKind::Start,
 272 |                 tok: Some(idx),
 273 |             });
 274 |             self.atom_def.push(AtomDef { 
 275 |                 name,
 276 |                 exp: end_exp,
 277 |                 kind: AtomKind::End,
 278 |                 tok: Some(idx),
 279 |             });
 280 |         }
 281 |     }
 282 | 
 283 |     /// Register a new interpolatable bounded token, with a start and end, 
 284 |     /// e.g. a string as a bounded token, but allowing substitution between {}
 285 |     /// The last argument is a boolean
 286 |     /// when true, tokens can be escaped with a backslash e.g. "\"" would be a string of a quote
 287 |     pub fn bounded_interp<S: Into<String>>(&mut self, name: S, start: S, end: S, i_start: S, i_end: S, escapable: bool) {
 288 |         let (name, start, end, i_start, i_end) = (name.into(), start.into(), end.into(), i_start.into(), i_end.into());
 289 |         if i_start == i_end { panic!("start and end markers for interpolation must not be equal!"); }
 290 |         // Gather atom information
 291 |         let start_exp = Regex::new(&start).expect("Invalid start regex");
 292 |         let end_exp = Regex::new(&end).expect("Invalid end regex");
 293 |         let hybrid = start == end;
 294 |         let i_start_exp = Regex::new(&i_start).expect("Invalid interpolation start regex");
 295 |         let i_end_exp = Regex::new(&i_end).expect("Invalid interpolation end regex");
 296 |         // Register bounded definition
 297 |         let idx = self.bounded_def.len();
 298 |         self.bounded_def.push(BoundedDef { 
 299 |             escapable,
 300 |         });
 301 |         // Register atom definitions
 302 |         if hybrid {
 303 |             self.atom_def.push(AtomDef { 
 304 |                 name: name.clone(),
 305 |                 exp: start_exp,
 306 |                 kind: AtomKind::Hybrid,
 307 |                 tok: Some(idx),
 308 |             });
 309 |         } else {
 310 |             self.atom_def.push(AtomDef { 
 311 |                 name: name.clone(),
 312 |                 exp: start_exp,
 313 |                 kind: AtomKind::Start,
 314 |                 tok: Some(idx),
 315 |             });
 316 |             self.atom_def.push(AtomDef { 
 317 |                 name: name.clone(),
 318 |                 exp: end_exp,
 319 |                 kind: AtomKind::End,
 320 |                 tok: Some(idx),
 321 |             });
 322 |         }
 323 |         self.atom_def.push(AtomDef { 
 324 |             name: name.clone(),
 325 |             exp: i_start_exp,
 326 |             kind: AtomKind::InterpolateStart,
 327 |             tok: Some(idx),
 328 |         });
 329 |         self.atom_def.push(AtomDef { 
 330 |             name: name.clone(),
 331 |             exp: i_end_exp,
 332 |             kind: AtomKind::InterpolateEnd,
 333 |             tok: Some(idx),
 334 |         });
 335 |     }
 336 | 
 337 |     /// Do an initial pass on a vector of lines.
 338 |     ///
 339 |     /// Note that this will overwrite any existing information,
 340 |     /// use append to add extra lines to the document.
 341 |     pub fn run(&mut self, lines: &[String]) {
 342 |         // Atomize every line
 343 |         self.atoms = lines.iter().map(|l| self.atomize(l)).collect();
 344 |         self.tokenize();
 345 |     }
 346 | 
 347 |     /// Appends a line to the highlighter.
 348 |     pub fn append(&mut self, line: &str) {
 349 |         // Atomize this line
 350 |         self.atoms.push(self.atomize(line));
 351 |         self.line_ref.push(vec![]);
 352 |         self.tokenize_line(self.atoms.len().saturating_sub(1));
 353 |     }
 354 | 
 355 |     /// Once you have called the run or append methods, you can use this function
 356 |     /// to retrieve individual lines by providing the original line text and the y index.
 357 |     ///
 358 |     /// # Example
 359 |     /// ```
 360 |     /// let highlighter = Highlighter::new(4); // Tab ('\t') has a display width of 4
 361 |     /// highlighter.keyword("kw", "keyword"); // All occurances of "keyword" will be classed as a token of "kw"
 362 |     /// highlighter.run(vec![
 363 |     ///     "this is a keyword".to_string(), 
 364 |     ///     "second line!".to_string()
 365 |     /// ]);
 366 |     /// // Get the TokOpt for the first line
 367 |     /// highlighter.line(0, &"this is a keyword".to_string())
 368 |     /// // Get the TokOpt for the second line
 369 |     /// highlighter.line(1, &"second line!".to_string())
 370 |     /// ```
 371 |     pub fn line(&self, y: usize, line: &str) -> Vec<TokOpt> {
 372 |         let line = line.replace("\t", &" ".repeat(self.tab_width));
 373 |         let len = line.chars().count();
 374 |         let mut result = vec![];
 375 |         let mut registry: HashMap<usize, (usize, &TokenRef)> = HashMap::default();
 376 |         // Create token registry for this line
 377 |         for token in self.line_ref[y].iter().map(|t| &self.tokens[*t]) {
 378 |             match token {
 379 |                 // Register bounded token
 380 |                 TokenRef::Bounded { start, end, .. } => {
 381 |                     let start = if start.y != y { 0 } else { self.atoms[start.y][start.x].x.start };
 382 |                     let end = end.clone()
 383 |                         .map(|end| if end.y != y { len } else { self.atoms[end.y][end.x].x.end })
 384 |                         .unwrap_or(len);
 385 |                     registry.insert(start, (end, token));
 386 |                 }
 387 |                 // Register keyword token
 388 |                 TokenRef::Keyword { atom, .. } => {
 389 |                     //println!("{:?}", self.atoms);
 390 |                     let start = self.atoms[atom.y][atom.x].x.start;
 391 |                     let end = self.atoms[atom.y][atom.x].x.end;
 392 |                     registry.insert(start, (end, token));
 393 |                 }
 394 |             }
 395 |         }
 396 |         // Process tokens into TokOpt format
 397 |         let mut chars = line.chars();
 398 |         let mut x = 0;
 399 |         while x < len {
 400 |             if let Some((end, TokenRef::Bounded { name, .. } | TokenRef::Keyword { name, .. })) = registry.get(&x) {
 401 |                 // Process token
 402 |                 let text = chars.by_ref().take(end - x).collect::<String>();
 403 |                 result.push(TokOpt::Some(text, name.clone()));
 404 |                 x = *end;
 405 |             } else {
 406 |                 // Process plain text
 407 |                 if let Some(TokOpt::None(ref mut s)) = result.last_mut() {
 408 |                     s.push(chars.next().unwrap());
 409 |                 } else {
 410 |                     result.push(TokOpt::None(chars.next().unwrap().to_string()));
 411 |                 }
 412 |                 x += 1;
 413 |             }
 414 |         }
 415 |         result
 416 |     }
 417 | 
 418 |     /// Whenever a character is deleted or inserted on a line,
 419 |     /// call this function to update any tokens.
 420 |     pub fn edit(&mut self, y: usize, line: &str) {
 421 |         let old_atoms = self.atoms[y].clone();
 422 |         // Update the atoms on this line
 423 |         self.atoms[y] = self.atomize(line);
 424 |         // Determine whether tokenisation is necessary by checking atomic changes
 425 |         if self.retokenization_needed(&old_atoms, &self.atoms[y]) {
 426 |             self.tokenize();
 427 |         }
 428 |     }
 429 | 
 430 |     /// Takes two lists of atoms and determines if retokenization is required in the first place
 431 |     /// This method will ignore index (as this is expected to change when editing)
 432 |     /// Has been shown to make editing events 500x faster to apply (where no atoms are modified)
 433 |     fn retokenization_needed(&self, old: &[Atom], new: &Vec<Atom>) -> bool {
 434 |         // List lengths differ => atoms have been added or deleted
 435 |         if old.len() != new.len() { return true; }
 436 |         for (o, n) in old.iter().zip(new) {
 437 |             // If there is ever ANY discrepancy between atoms, we must retokenize
 438 |             if !(o.name == n.name && o.kind == n.kind && o.tok == n.tok && o.backslashed == n.backslashed) {
 439 |                 return true;
 440 |             }
 441 |         }
 442 |         false
 443 |     }
 444 | 
 445 |     /// Whenever a line is inserted into the document,
 446 |     /// call this function to update any tokens.
 447 |     pub fn insert_line(&mut self, y: usize, line: &str) {
 448 |         self.atoms.insert(y, self.atomize(line));
 449 |         self.tokenize();
 450 |     }
 451 | 
 452 |     /// Whenever a line is removed from a document,
 453 |     /// call this function to update any tokens.
 454 |     pub fn remove_line(&mut self, y: usize) {
 455 |         self.atoms.remove(y);
 456 |         self.tokenize();
 457 |     }
 458 | 
 459 |     /// This process will turn a line into a vector of atoms
 460 |     fn atomize(&self, line: &str) -> Vec<Atom> {
 461 |         let line = IndexedChars::new(line);
 462 |         let mut atoms = vec![];
 463 |         // For each atom definition
 464 |         for def in &self.atom_def {
 465 |             let occurances = find_all(&def.exp, line.as_str(), self.tab_width);
 466 |             // Register all occurances of any atom
 467 |             for x in occurances {
 468 |                 if !x.is_empty() {
 469 |                     // Work out how many backslashes there are behind this atom (for escaping)
 470 |                     let mut backslash_count = 0;
 471 |                     let range = (0..x.start).rev();
 472 |                     for idx in range {
 473 |                         if let Some('\\') = line.get_char(idx) {
 474 |                             backslash_count += 1;
 475 |                         } else {
 476 |                             break;
 477 |                         }
 478 |                     }
 479 |                     // Push out the atom
 480 |                     atoms.push(Atom {
 481 |                         kind: def.kind.clone(),
 482 |                         name: def.name.clone(),
 483 |                         tok: def.tok,
 484 |                         // An odd number of backslashes = escaped
 485 |                         backslashed: backslash_count % 2 != 0,
 486 |                         x,
 487 |                     });
 488 |                 }
 489 |             }
 490 |         }
 491 |         // Order them based on start index
 492 |         atoms.sort_by(|a, b| a.x.start.cmp(&b.x.start));
 493 |         atoms
 494 |     }
 495 | 
 496 |     fn tokenize(&mut self) {
 497 |         self.tokenize_state = None;
 498 |         self.tokenize_interp = false;
 499 |         self.line_ref = vec![];
 500 |         self.atoms.iter().enumerate().for_each(|_| self.line_ref.push(vec![]));
 501 |         self.tokens = vec![];
 502 |         for y in 0..self.atoms.len() {
 503 |             self.tokenize_line(y);
 504 |         }
 505 |     }
 506 | 
 507 |     fn tokenize_line(&mut self, y: usize) {
 508 |         let line_ref = self.line_ref.get_mut(y).unwrap();
 509 |         let mut at_x = 0;
 510 |         let atoms = &self.atoms[y];
 511 |         for (x, atom) in atoms.iter().enumerate() {
 512 |             if atom.x.start < at_x { continue; }
 513 |             // Work out if this atom is to be ignored (due to escaping)
 514 |             if let Atom { tok: Some(t), backslashed, .. } = atom {
 515 |                 if self.bounded_def[*t].escapable && *backslashed {
 516 |                     continue;
 517 |                 }
 518 |             }
 519 |             // Continue tokenising...
 520 |             match atom {
 521 |                 Atom { name, kind: AtomKind::Keyword, .. } => {
 522 |                     if self.tokenize_state.is_none() || self.tokenize_interp {
 523 |                         self.tokens.push(TokenRef::Keyword {
 524 |                             name: name.clone(),
 525 |                             atom: Loc { y, x },
 526 |                         });
 527 |                         line_ref.push(self.tokens.len().saturating_sub(1));
 528 |                         at_x = atom.x.end;
 529 |                     }
 530 |                 }
 531 |                 Atom { name, kind: AtomKind::Start, tok, .. } => {
 532 |                     if self.tokenize_interp { continue; }
 533 |                     if self.tokenize_state.is_none() {
 534 |                         self.tokenize_state = *tok;
 535 |                         self.tokens.push(TokenRef::Bounded {
 536 |                             name: name.clone(),
 537 |                             start: Loc { y, x },
 538 |                             end: None,
 539 |                         });
 540 |                         at_x = atom.x.end;
 541 |                     }
 542 |                 }
 543 |                 Atom { kind: AtomKind::End, tok, .. } => {
 544 |                     if self.tokenize_interp { continue; }
 545 |                     if self.tokenize_state == *tok {
 546 |                         self.tokenize_state = None;
 547 |                         if let TokenRef::Bounded { ref mut end, .. } = self.tokens.last_mut().unwrap() {
 548 |                             *end = Some(Loc { y, x });
 549 |                             at_x = atom.x.end;
 550 |                         }
 551 |                         line_ref.push(self.tokens.len().saturating_sub(1));
 552 |                     }
 553 |                 }
 554 |                 Atom { name, kind: AtomKind::Hybrid, tok, .. } => {
 555 |                     if self.tokenize_interp { continue; }
 556 |                     if self.tokenize_state.is_none() {
 557 |                         // Start registering token
 558 |                         self.tokenize_state = *tok;
 559 |                         self.tokens.push(TokenRef::Bounded {
 560 |                             name: name.clone(),
 561 |                             start: Loc { y, x },
 562 |                             end: None,
 563 |                         });
 564 |                         at_x = atom.x.end;
 565 |                     } else if self.tokenize_state == *tok {
 566 |                         // Stop registering token
 567 |                         self.tokenize_state = None;
 568 |                         if let TokenRef::Bounded { ref mut end, .. } = self.tokens.last_mut().unwrap() {
 569 |                             *end = Some(Loc { y, x });
 570 |                             at_x = atom.x.end;
 571 |                         }
 572 |                         line_ref.push(self.tokens.len().saturating_sub(1));
 573 |                     }
 574 |                 }
 575 |                 Atom { kind: AtomKind::InterpolateStart, tok, .. } => {
 576 |                     if self.tokenize_state == *tok {
 577 |                         // End the current token
 578 |                         if let TokenRef::Bounded { ref mut end, .. } = self.tokens.last_mut().unwrap() {
 579 |                             *end = Some(Loc { y, x });
 580 |                             at_x = atom.x.end;
 581 |                         }
 582 |                         line_ref.push(self.tokens.len().saturating_sub(1));
 583 |                         // Register interpolation
 584 |                         self.tokenize_interp = true;
 585 |                     }
 586 |                 }
 587 |                 Atom { name, kind: AtomKind::InterpolateEnd, tok, .. } => {
 588 |                     if self.tokenize_state == *tok {
 589 |                         // Stop interpolating
 590 |                         self.tokenize_interp = false;
 591 |                         // Resume capturing the outer token
 592 |                         self.tokens.push(TokenRef::Bounded {
 593 |                             name: name.clone(),
 594 |                             start: Loc { y, x },
 595 |                             end: None,
 596 |                         });
 597 |                         at_x = atom.x.end;
 598 |                     }
 599 |                 }
 600 |             }
 601 |             if self.tokenize_state.is_some() {
 602 |                 line_ref.push(self.tokens.len().saturating_sub(1));
 603 |             }
 604 |         }
 605 |         if self.tokenize_state.is_some() {
 606 |             line_ref.push(self.tokens.len().saturating_sub(1));
 607 |         }
 608 |         line_ref.dedup();
 609 |     }
 610 | }
 611 | 
 612 | /// This will find all occurances of a string in a document (and return character indices)
 613 | pub fn find_all(exp: &Regex, target: &str, tab_width: usize) -> Vec<Range<usize>> {
 614 |     let mapping = create_mapping(target, tab_width);
 615 |     exp.captures_iter(target)
 616 |         // Get last capture
 617 |         .map(|c| c.iter().flatten().collect::<Vec<_>>())
 618 |         .map(|mut c| c.pop().unwrap())
 619 |         // Extract end and start values
 620 |         .map(|m| mapping[&m.start()]..mapping[&m.end()])
 621 |         .collect()
 622 | }
 623 | 
 624 | /// HashMap<byte_idx, char_idx>
 625 | pub fn create_mapping(target: &str, tab_width: usize) -> HashMap::<usize, usize, BuildHasherDefault<NoHashHasher<usize>>> {
 626 |     let mut result: HashMap::<usize, usize, BuildHasherDefault<NoHashHasher<usize>>> =
 627 |         HashMap::with_capacity_and_hasher(target.len(), BuildHasherDefault::default());
 628 |     result.insert(0, 0);
 629 |     let mut acc_byte = 0;
 630 |     let mut acc_char = 0;
 631 |     for c in target.chars() {
 632 |         acc_byte += c.len_utf8();
 633 |         acc_char += if c == '\t' { tab_width } else { 1 };
 634 |         result.insert(acc_byte, acc_char);
 635 |     }
 636 |     result
 637 | }
 638 | 
 639 | /// Utility function to determine the width of a string, with variable tab width
 640 | #[must_use]
 641 | pub fn width(st: &str, tab_width: usize) -> usize {
 642 |     let tabs = st.matches('\t').count();
 643 |     (st.width() + tabs * tab_width).saturating_sub(tabs)
 644 | }
 645 | 
 646 | 
 647 | /// Trim utility function to trim down a line of tokens to offset text
 648 | pub fn trim(input: &[TokOpt], start: usize) -> Vec<TokOpt> {
 649 |     let mut opt: Vec<TokOpt> = input.to_vec();
 650 |     let mut total_width = 0;
 651 |     for i in &opt {
 652 |         let (TokOpt::Some(txt, _) | TokOpt::None(txt)) = i;
 653 |         total_width += txt.len();
 654 |     }
 655 |     let width = total_width.saturating_sub(start);
 656 |     while total_width != width {
 657 |         if let Some(token) = opt.get_mut(0) {
 658 |             token.nibble_front(4);
 659 |             total_width -= 1;
 660 |             if token.is_empty() {
 661 |                 opt.remove(0);
 662 |             }
 663 |         } else {
 664 |             break;
 665 |         }
 666 |     }
 667 |     opt
 668 | }
 669 | 
 670 | /// Trim utility function to trim down a line of tokens to offset text (with length)
 671 | pub fn trim_fit(input: &[TokOpt], start: usize, length: usize, tab_width: usize) -> Vec<TokOpt> {
 672 |     // Form a vector of tokens
 673 |     let mut opt: Vec<TokOpt> = input.to_vec();
 674 |     // (1) Find the location of the starting point
 675 |     let start_idx = find_tok_index(input, start, tab_width);
 676 | 	// (2) Find the location of the ending point
 677 |     let end_idx = find_tok_index(input, start + length, tab_width);
 678 |     // Trim off start token (ahead of time)
 679 |     if let Some((start_tok, start_rel)) = start_idx {
 680 |         opt.get_mut(start_tok).unwrap().skip(start_rel, tab_width);
 681 |     }
 682 |     // Trim off end token (ahead of time)
 683 |     if let Some((end_tok, mut end_rel)) = end_idx {
 684 |         if start_idx.unwrap().0 == end_tok {
 685 |             // Same token for start and end! Adjust (to account for start trim)
 686 |             end_rel -= start_idx.unwrap().1;
 687 |         }
 688 |         opt.get_mut(end_tok).unwrap().take(end_rel, tab_width);
 689 | 	}
 690 |     // Blitz all tokens firmly behind start
 691 | 	if let Some((start_tok, _)) = start_idx {
 692 |         opt.drain(..start_tok);
 693 |     }
 694 |     // Blitz all tokens firmly ahead of length
 695 |     if let Some((mut end_tok, _)) = end_idx {
 696 |         if let Some((start_tok, _)) = start_idx {
 697 |             // Adjust end_tok after draining of start tokens
 698 |             end_tok -= start_tok;
 699 |         }
 700 |         if end_tok + 1 < opt.len() {
 701 |             opt.drain(end_tok + 1..);
 702 |         }
 703 |     }
 704 |     // If we can't satisfy start or end, then just return empty handed
 705 |     if start_idx.is_none() && end_idx.is_none() {
 706 |         opt = vec![];
 707 |     }
 708 |     // Apply padding if applicable
 709 |     let mut total_width: usize = opt.iter().map(|tok| width(tok.text(), tab_width)).sum();
 710 |     while total_width < length {
 711 |         if let Some(TokOpt::None(ref mut text)) = opt.last_mut() {
 712 |             *text += " ";
 713 |             total_width += 1;
 714 |         } else {
 715 |             // No tokens left, discontinue
 716 |             opt.push(TokOpt::None("".to_string()));
 717 |         }
 718 |     }
 719 |     // Return the result
 720 |     opt
 721 | }
 722 | 
 723 | /// Find the token index within a tokopt given a display index
 724 | /// Returns (token_index, index_within_that_token)
 725 | pub fn find_tok_index(input: &[TokOpt], disp_idx: usize, tab_width: usize) -> Option<(usize, usize)> {
 726 |     let mut total_width = 0;
 727 |     for (idx, token) in input.iter().enumerate() {
 728 |         let this_width = width(token.text(), tab_width);
 729 |         total_width += this_width;
 730 |         // Check if we've passed the display index
 731 |         if total_width > disp_idx {
 732 |             // We have, this token contains disp_idx, work out relative idx
 733 |             let rel_idx = this_width - (total_width - disp_idx);
 734 |             return Some((idx, rel_idx));
 735 |         }
 736 |     }
 737 |     None
 738 | }
 739 | 
 740 | /// Function to obtain a syntax highlighter based on a file extension
 741 | pub fn from_extension(ext: &str, tab_width: usize) -> Option<Highlighter> {
 742 |     let mut result = match ext.to_lowercase().as_str() {
 743 |         "rs" => rust_syntax_highlighter().to_owned(),
 744 |         "asm" | "s" => asm_syntax_highlighter().to_owned(),
 745 |         "py" | "pyw" => python_syntax_highlighter().to_owned(),
 746 |         "rb" | "ruby" => ruby_syntax_highlighter().to_owned(),
 747 |         "cgi" | "pm" => cgi_syntax_highlighter().to_owned(),
 748 |         "lua" => lua_syntax_highlighter().to_owned(),
 749 |         "r" | "rproj" => r_syntax_highlighter().to_owned(),
 750 |         "go" => go_syntax_highlighter().to_owned(),
 751 |         "js" => js_syntax_highlighter().to_owned(),
 752 |         "ts" | "tsx" => ts_syntax_highlighter().to_owned(),
 753 |         "dart" => dart_syntax_highlighter().to_owned(),
 754 |         "c" | "h" => c_syntax_highlighter().to_owned(),
 755 |         "cpp" | "hpp" | "c++" | "cxx" | "cc" => cpp_syntax_highlighter().to_owned(),
 756 |         "cs" | "csproj" => cs_syntax_highlighter().to_owned(),
 757 |         "swift" => swift_syntax_highlighter().to_owned(),
 758 |         "json" => json_syntax_highlighter().to_owned(),
 759 |         "kt" => kotlin_syntax_highlighter().to_owned(),
 760 |         "class" | "java" => java_syntax_highlighter().to_owned(),
 761 |         "vb" => vb_syntax_highlighter().to_owned(),
 762 |         "m" => m_syntax_highlighter().to_owned(),
 763 |         "php" => php_syntax_highlighter().to_owned(),
 764 |         "scala" => scala_syntax_highlighter().to_owned(),
 765 |         "pl" | "prolog" => prolog_syntax_highlighter().to_owned(),
 766 |         "hs" => haskell_syntax_highlighter().to_owned(),
 767 |         "css" => css_syntax_highlighter().to_owned(),
 768 |         "html" | "htm" | "xhtml" => html_syntax_highlighter().to_owned(),
 769 |         "md" | "markdown" => markdown_syntax_highlighter().to_owned(),
 770 |         "toml" => toml_syntax_highlighter().to_owned(),
 771 |         "yaml" | "yml" => yaml_syntax_highlighter().to_owned(),
 772 |         "csv" => csv_syntax_highlighter().to_owned(),
 773 |         "sh" | "bash" | "bash_profile" | "bashrc" => shell_syntax_highlighter().to_owned(),
 774 |         "sql" | "sqlproj" => sql_syntax_highlighter().to_owned(),
 775 |         "xml" => xml_syntax_highlighter().to_owned(),
 776 |         "nu" => nushell_syntax_highlighter().to_owned(),
 777 |         "tex" => tex_syntax_highlighter().to_owned(),
 778 |         "diff" => diff_syntax_highlighter().to_owned(),
 779 |         _ => Highlighter::new(tab_width),
 780 |     };
 781 |     result.tab_width = tab_width;
 782 |     Some(result)
 783 | }
 784 | 
 785 | fn add_html_keywords(h: &mut Highlighter, kw: &[&str]) {
 786 |     h.keyword("keyword", &format!(r"(?:<|</|<!)({})\b", kw.join("|")));
 787 | }
 788 | 
 789 | fn add_keywords_no_boundary(h: &mut Highlighter, kw: &[&str]) {
 790 |     h.keyword("keyword", &format!(r"({})", kw.join("|")));
 791 | }
 792 | 
 793 | fn add_keywords(h: &mut Highlighter, kw: &[&str]) {
 794 |     h.keyword("keyword", &format!(r"\b({})\b", kw.join("|")));
 795 | }
 796 | 
 797 | fn add_keywords_case_indep(h: &mut Highlighter, kw: &[&str]) {
 798 |     h.keyword("keyword", &format!(r"\b({})\b", kw.join("|")));
 799 |     h.keyword(
 800 |         "keyword",
 801 |         &format!(
 802 |             r"\b({})\b",
 803 |             kw.iter()
 804 |                 .map(|x| x.to_uppercase())
 805 |                 .collect::<Vec<_>>()
 806 |                 .join("|")
 807 |         ),
 808 |     );
 809 | }
 810 | 
 811 | fn bulk_add(h: &mut Highlighter, name: &str, kw: &[&str]) {
 812 |     h.keyword(name, &format!(r"({})", kw.join("|")));
 813 | }
 814 | 
 815 | fn rust_syntax_highlighter() -> &'static Highlighter {
 816 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
 817 |     HIGHLIGHTER.get_or_init(|| {
 818 |         let mut result = Highlighter::new(4);
 819 |         result.bounded("comment", r"/\*", r"\*/", false);
 820 |         result.keyword("comment", "(//.*)$");
 821 |         result.bounded("string", "r#\"", "\"#", true);
 822 |         result.bounded("string", "r\"", "\"", true);
 823 |         result.bounded("string", "#\"", "\"#", true);
 824 |         result.bounded("string", "\"", "\"", true);
 825 |         result.bounded("attribute", r"\#\[", r"\]", false);
 826 |         result.bounded("attribute", r"\#!\[", r"\]", false);
 827 |         result.keyword("namespace", "([a-z_][A-Za-z0-9_]*)::");
 828 |         add_keywords(&mut result, &[
 829 |             "as", "break", "const", "continue", "char", "crate", "else", "enum", "extern",
 830 |             "fn", "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut",
 831 |             "pub", "ref", "return", "self", "static", "struct", "super", "trait", "type",
 832 |             "unsafe", "use", "where", "while", "async", "await", "dyn", "abstract", "become",
 833 |             "box", "do", "final", "macro", "override", "priv", "typeof", "unsized", "virtual",
 834 |             "yield", "try", "'static", "u8", "u16", "u32", "u64", "u128", "usize", "i8", "i16",
 835 |             "i32", "i64", "i128", "isize", "f32", "f64", "String", "Vec", "str", "Some",
 836 |             "bool", "None", "Box", "Result", "Option", "Ok", "Err", "Self", "std",
 837 |         ]);
 838 |         bulk_add(&mut result, "operator", &[
 839 |             "&&", r"\|\|", "=", "\\+", "\\-", "\\*", "[^/](/)[^/]", "\\+=",
 840 |             "\\-=", "\\*=", "\\\\=", "==", "!=", "\\?", ">=", "<=", "<", ">", "!",
 841 |         ]);
 842 |         bulk_add(&mut result, "character", &[r"'[^\\]'", "'\\\\.'"]);
 843 |         bulk_add(&mut result, "digit", &["\\b(\\d+.\\d+|\\d+)", "\\b(\\d+.\\d+(?:f32|f64))"]);
 844 |         bulk_add(&mut result, "boolean", &["\\b(true)\\b", "\\b(false)\\b"]);
 845 |         bulk_add(&mut result, "function", &[
 846 |             "fn\\s+([a-z_][A-Za-z0-9_]*)\\s*\\(",
 847 |             "fn\\s+([a-z_][A-Za-z0-9_]*)\\s*<.*>\\s*\\(",
 848 |             "\\.([a-z_][A-Za-z0-9_]*)\\s*\\(",
 849 |             "([a-z_][A-Za-z0-9_]*)\\s*\\(",
 850 |         ]);
 851 |         bulk_add(&mut result, "struct", &[
 852 |             "(?:trait|enum|struct|impl)\\s+([A-Z][A-Za-z0-9_]*)\\s*",
 853 |             "impl(?:<.*?>|)\\s+([A-Z][A-Za-z0-9_]*)",
 854 |             "([A-Z][A-Za-z0-9_]*)::",
 855 |             "([A-Z][A-Za-z0-9_]*)\\s*\\(",
 856 |             "impl.*for\\s+([A-Z][A-Za-z0-9_]*)",
 857 |             "::\\s*([a-z_][A-Za-z0-9_]*)\\s*\\(",
 858 |         ]);
 859 |         bulk_add(&mut result, "macro", &["\\b([a-z_][a-zA-Z0-9_]*!)", "(\\$[a-z_][A-Za-z0-9_]*)"]);
 860 |         bulk_add(&mut result, "reference", &[
 861 |             "&", "&str", "&mut", "&self", "&i8", "&i16", "&i32", "&i64", "&i128", "&isize",
 862 |             "&u8", "&u16", "&u32", "&u64", "&u128", "&usize", "&f32", "&f64",
 863 |         ]);
 864 |         result
 865 |     })
 866 | }
 867 | 
 868 | fn asm_syntax_highlighter() -> &'static Highlighter {
 869 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
 870 |     HIGHLIGHTER.get_or_init(|| {
 871 |         let mut result = Highlighter::new(4);
 872 |         result.keyword("function", "([a-zA-Z_]+)\\:$");
 873 |         result.keyword("comment", "(;.*)$");
 874 |         result.keyword("digit", "\\b((?:0x)?\\d+.\\d+|\\d+)");
 875 |         result.bounded("string", "\"", "\"", true);
 876 |         add_keywords_case_indep(
 877 |             &mut result,
 878 |             &[
 879 |                 "mov", "add", "sub", "jmp", "call", "ret", "bss", "data", "text", "section",
 880 |                 "globl", "extern", "db", "eax", "ebx", "ecx", "edx", "esp", "ebp", "int", "xor",
 881 |                 "imul", "inc", "jle", "cmp", "global", "section", "resb",
 882 |             ],
 883 |         );
 884 |         result
 885 |     })
 886 | }
 887 | 
 888 | fn python_syntax_highlighter() -> &'static Highlighter {
 889 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
 890 |     HIGHLIGHTER.get_or_init(|| {
 891 |         let mut result = Highlighter::new(4);
 892 |         result.keyword("comment", "(#.*)$");
 893 |         result.bounded("string", "\"\"\"", "\"\"\"", true);
 894 |         result.bounded("string", "\'\'\'", "\'\'\'", true);
 895 |         result.bounded("string", "b\"", "\"", true);
 896 |         result.bounded("string", "r\"", "\"", true);
 897 |         result.bounded_interp("string", "f\"", "\"", "\\{", "\\}", true);
 898 |         result.bounded("string", "\"", "\"", true);
 899 |         result.bounded("string", "b\'", "\'", true);
 900 |         result.bounded("string", "r\'", "\'", true);
 901 |         result.bounded_interp("string", "f\'", "\'", "\\{", "\\}", true);
 902 |         result.bounded("string", "\'", "\'", true);
 903 |         add_keywords(&mut result, &[
 904 |             "and", "as", "assert", "break", "class", "continue", "def", "del", "elif", "else", "except",
 905 |             "exec", "finally", "for", "from", "global", "if", "import", "in", "is", "lambda", "not",
 906 |             "or", "pass", "print", "raise", "return", "try", "while", "with", "yield", "str", "bool",
 907 |             "int", "tuple", "list", "dict", "tuple", "len", "None", "input", "type", "set", "range",
 908 |             "enumerate", "open", "iter", "min", "max", "dir", "self", "isinstance", "help", "next",
 909 |             "super", "match", "case",
 910 |         ]);
 911 |         result.keyword("attribute", "@.*$");
 912 |         result.keyword("digit", "\\b(\\d+.\\d+|\\d+)");
 913 |         result.keyword("struct", "class\\s+([A-Za-z0-9_]+)");
 914 |         bulk_add(&mut result, "operator", &[
 915 |             r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"(\s//\s)", r"(%)", r"(\+=)",
 916 |             r"(\-=)", r"(\*=)", r"(\\=)", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", r"(>)",
 917 |         ]);
 918 |         bulk_add(&mut result, "boolean", &["\\b(True)\\b", "\\b(False)\\b"]);
 919 |         bulk_add(&mut result, "function", &[
 920 |             "def\\s+([a-z_][A-Za-z0-9_]*)",
 921 |             "\\.([a-z_][A-Za-z0-9_\\?!]*)\\s*",
 922 |             "\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\(",
 923 |         ]);
 924 |         result
 925 |     })
 926 | }
 927 | 
 928 | fn ruby_syntax_highlighter() -> &'static Highlighter {
 929 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
 930 |     HIGHLIGHTER.get_or_init(|| {
 931 |         let mut result = Highlighter::new(4);
 932 |         result.keyword("comment", "(#.*)$");
 933 |         result.bounded("comment", "=begin", "=end", false);
 934 |         result.bounded_interp("string", "\"", "\"", "#\\{", "\\}", true);
 935 |         result.bounded("string", "\'", "\'", true);
 936 |         result.keyword("string", r"(\:[a-zA-Z_]+)");
 937 |         add_keywords(&mut result, &[
 938 |             "__ENCODING__", "__LINE__", "__FILE__", "BEGIN", "END", "alias", "and", "begin", "break",
 939 |             "case", "class", "def", "defined?", "do", "else", "elsif", "end", "ensure", "for", "if",
 940 |             "in", "module", "next", "nil", "not", "or", "redo", "rescue", "retry", "return", "self",
 941 |             "super", "then", "undef", "unless", "until", "when", "while", "yield", "extend", "include",
 942 |             "attr_reader", "attr_writer", "attr_accessor",
 943 |         ]);
 944 |         result.keyword("digit", "\\b(\\d+.\\d+|\\d+)");
 945 |         result.keyword("struct", "class\\s+([A-Za-z0-9_]+)");
 946 |         bulk_add(&mut result, "operator", &[
 947 |             "!!", "=", "\\+", "\\-", "\\*", "[^/](/)[^/]", "\\+=", "\\-=", "\\*=", "\\\\=",
 948 |             "==", "!=", "\\?", ">=", "<=", "<", ">", "&&", "\\|\\|", "!", "&", "\\|", "\\^",
 949 |             "%",
 950 |         ]);
 951 |         bulk_add(&mut result, "boolean", &["\\b(true)\\b", "\\b(false)\\b"]);
 952 |         bulk_add(&mut result, "function", &[
 953 |             "def\\s+([a-z_][A-Za-z0-9_]*)",
 954 |             "^\\s*([a-z_][A-Za-z0-9_]*)\\s+[^=]",
 955 |             "\\.([a-z_][A-Za-z0-9_\\?!]*)\\s*",
 956 |             "\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\(",
 957 |         ]);
 958 |         result
 959 |     })
 960 | }
 961 | 
 962 | fn cgi_syntax_highlighter() -> &'static Highlighter {
 963 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
 964 |     HIGHLIGHTER.get_or_init(|| {
 965 |         let mut result = Highlighter::new(4);
 966 |         result.keyword("comment", "(#.*)$");
 967 |         result.bounded_interp("string", "\"", "\"", "#\\{", "\\}", true);
 968 |         result.bounded("string", "(?:m|s)/", "/", true);
 969 |         result.bounded("string", "\'", "\'", true);
 970 |         result.keyword("string", r"(\:[a-zA-Z_]+)");
 971 |         add_keywords(&mut result, &[
 972 |             "if", "else", "elsif", "unless", "while", "for", "foreach", "until", "do", "next",
 973 |             "last", "goto", "return", "sub", "my", "local", "our", "package", "use", "require",
 974 |             "import", "undef", "and", "or", "not", "eq", "ne", "lt", "le", "gt", "ge", "cmp",
 975 |             "qw", "scalar", "array", "hash", "undef", "undef", "ref", "bless", "glob", "filehandle",
 976 |             "code", "regexp", "integer", "float", "string", "boolean", "reference", "die",
 977 |         ]);
 978 |         result.keyword("digit", "\\b(\\d+.\\d+|\\d+)");
 979 |         result.keyword("struct", "\\b([A-Z][A-Za-z0-9_]*)");
 980 |         bulk_add(&mut result, "operator", &[
 981 |             "!!", "=", "\\+", "\\-", "\\*", "[^/](/)[^/]", "\\+=", "\\-=", "\\*=", "\\\\=",
 982 |             "==", "!=", "\\?", ">=", "<=", "<", ">", "\\$","&&", "\\|\\|", "!", "&", "\\|",
 983 |             "\\^", "(?:\\\\)?%", "\\\\@",
 984 |         ]);
 985 |         bulk_add(&mut result, "boolean", &["\\b(true)\\b", "\\b(false)\\b"]);
 986 |         bulk_add(&mut result, "function", &[
 987 |             "sub\\s+([a-z_][A-Za-z0-9_]*)",
 988 |             "^\\s*([a-z_][A-Za-z0-9_]*)\\s+[^=]",
 989 |             "\\.([a-z_][A-Za-z0-9_\\?!]*)\\s*",
 990 |             "\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\(",
 991 |         ]);
 992 |         result
 993 |     })
 994 | }
 995 | 
 996 | fn lua_syntax_highlighter() -> &'static Highlighter {
 997 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
 998 |     HIGHLIGHTER.get_or_init(|| {
 999 |         let mut result = Highlighter::new(4);
1000 |         result.bounded("comment", r"--\[\[", r"\]\]--", false);
1001 |         result.keyword("comment", "(--.*)$");
1002 |         result.bounded("string", "\"", "\"", true);
1003 |         result.bounded("string", "\'", "\'", true);
1004 |         result.bounded("string", "\\[\\[", "\\]\\]", true);
1005 |         result.keyword("digit", "\\b(\\d+.\\d+|\\d+)");
1006 |         result.keyword("struct", "\\b([A-Z][A-Za-z0-9_]*)\\b");
1007 |         bulk_add(&mut result, "function", &[
1008 |             "\\.([a-z_][A-Za-z0-9_\\?!]*)\\s*",
1009 |             "\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\(",
1010 |         ]);
1011 |         bulk_add(&mut result, "operator", &[
1012 |             r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)",
1013 |             r"(\+=)", r"(\-=)", r"(\*=)", r"(\\=)", r"(\.\.)", r"(==)", r"(~=)",
1014 |             r"(>=)", r"(<=)", r"(<)", r"(>)", r"(#)", r"(<<)", r"(>>)", r"\b(and)\b",
1015 |             r"\b(or)\b", r"\b(not)\b",
1016 |         ]);
1017 |         add_keywords(&mut result, &[
1018 |             "break", "do", "else", "elseif", "end", "false", "for", "function", "if", "in",
1019 |             "local", "nil", "repeat", "return", "then", "true", "until", "while", "self",
1020 |         ]);
1021 |         result
1022 |     })
1023 | }
1024 | 
1025 | fn r_syntax_highlighter() -> &'static Highlighter {
1026 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1027 |     HIGHLIGHTER.get_or_init(|| {
1028 |         let mut result = Highlighter::new(4);
1029 |         result.keyword("comment", "(#.*)$");
1030 |         result.bounded("string", "\"", "\"", true);
1031 |         result.bounded("string", "\'", "\'", true);
1032 |         bulk_add(&mut result, "boolean", &["\\b(FALSE)\\b", "\\b(TRUE)\\b"]);
1033 |         add_keywords(&mut result, &[
1034 |             "if", "else", "repeat", "while", "function", "for", "in", "next", "break", "TRUE",
1035 |             "FALSE", "NULL", "Inf", "NaN", "NA", "NA_integer_", "NA_real_", "NA_complex_",
1036 |             "NA_character_", r"\.\.\.",
1037 |         ]);
1038 |         result.keyword("attribute", "@.*$");
1039 |         result.keyword("digit", "\\b(\\d+.\\d+|\\d+)");
1040 |         result.keyword("struct", "class\\s+([A-Za-z0-9_]+)");
1041 |         bulk_add(&mut result, "operator", &[
1042 |             r"<-", r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"(\s//\s)", r"(&)", r"(%)",
1043 |             r"(\+=)", r"(\-=)", r"(\*=)", r"(\\=)", r"(\$)", r"(|)", r"(==)", r"(!=)", r"(>=)",
1044 |             r"(<=)", r"(<)", r"(>)", r"(\?)",
1045 |         ]);
1046 |         bulk_add(&mut result, "function", &[
1047 |             "def\\s+([a-z_][A-Za-z0-9_]*)",
1048 |             "\\.([a-z_][A-Za-z0-9_\\?!]*)\\s*",
1049 |             "\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\(",
1050 |         ]);
1051 |         result
1052 |     })
1053 | }
1054 | 
1055 | fn go_syntax_highlighter() -> &'static Highlighter {
1056 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1057 |     HIGHLIGHTER.get_or_init(|| {
1058 |         let mut result = Highlighter::new(4);
1059 |         result.bounded("comment", r"/\*", r"\*/", false);
1060 |         result.keyword("comment", "(//.*)$");
1061 |         result.bounded("string", "\"", "\"", true);
1062 |         result.bounded("string", "`", "`", true);
1063 |         bulk_add(&mut result, "character", &[r"'[^\\]'", "'\\\\.'"]);
1064 |         add_keywords(&mut result, &[
1065 |             "break", "case", "chan", "const", "continue", "default", "defer", "else", "fallthrough",
1066 |             "for", "func", "go", "goto", "if", "import", "interface", "map", "package", "range",
1067 |             "return", "select", "struct", "switch", "type", "var", "bool", "byte", "complex64", "complex128",
1068 |             "error", "float32", "float64", "int", "int8", "int16", "int32", "int64", "rune", "string",
1069 |         ]);
1070 |         bulk_add(&mut result, "operator", &[
1071 |             ":=", "=", "\\+", "\\-", "\\*", "[^/](/)[^/]", "\\+=", "\\-=", "\\*=", "\\\\=",
1072 |             "==", "!=", "\\?", ">=", "<=", "<", ">",
1073 |         ]);
1074 |         bulk_add(&mut result, "digit", &["\\b(\\d+.\\d+|\\d+)", "\\b(\\d+.\\d+(?:f32|f64))"]);
1075 |         bulk_add(&mut result, "boolean", &["\\b(true)\\b", "\\b(false)\\b"]);
1076 |         bulk_add(&mut result, "function", &[
1077 |             "func\\s+([A-Za-z0-9_]+)\\s*\\(",
1078 |             "\\.([A-Za-z0-9_]+)\\s*\\(",
1079 |             "([A-Za-z0-9_]+)\\s*\\(",
1080 |         ]);
1081 |         bulk_add(&mut result, "reference", &["&"]);
1082 |         result
1083 |     })
1084 | }
1085 | 
1086 | fn js_syntax_highlighter() -> &'static Highlighter {
1087 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1088 |     HIGHLIGHTER.get_or_init(|| {
1089 |         let mut result = Highlighter::new(4);
1090 |         result.bounded("comment", r"/\*", r"\*/", false);
1091 |         result.keyword("comment", "//.*$");
1092 |         result.bounded("string", "r\"", "\"", true);
1093 |         result.bounded("string", "f\"", "\"", true);
1094 |         result.bounded("string", "\"", "\"", true);
1095 |         result.bounded("string", "r\'", "\'", true);
1096 |         result.bounded("string", "f\'", "\'", true);
1097 |         result.bounded("string", "\'", "\'", true);
1098 |         result.bounded_interp("string", "r`", "`", "\\$\\{", "\\}", true);
1099 |         result.bounded_interp("string", "f`", "`", "\\$\\{", "\\}", true);
1100 |         result.bounded_interp("string", "`", "`", "\\$\\{", "\\}", true);
1101 |         result.bounded("string", "/", "/", true);
1102 |         add_keywords(&mut result, &[
1103 |             "abstract", "arguments", "await", "boolean", "break", "byte", "case", "catch", "char",
1104 |             "class", "const", "continue", "debugger", "default", "delete", "do", "double", "else",
1105 |             "enum", "eval", "export", "extends", "final", "finally", "float", "for", "of", "function",
1106 |             "goto", "if", "implements", "import", "in", "instanceof", "int", "interface", "let", "long",
1107 |             "native", "new", "null", "package", "private", "protected", "public", "return", "short",
1108 |             "static", "super", "switch", "synchronized", "this", "throw", "throws", "transient", "try",
1109 |             "typeof", "var", "void", "volatile", "console", "while", "with", "yield", "undefined", "NaN",
1110 |             "-Infinity", "Infinity",
1111 |         ]);
1112 |         result.keyword("digit", "\\b(\\d+.\\d+|\\d+)");
1113 |         result.keyword("struct", "class\\s+([A-Za-z0-9_]+)");
1114 |         bulk_add(&mut result, "boolean", &["\\b(true)\\b", "\\b(false)\\b"]);
1115 |         bulk_add(&mut result, "function", &[
1116 |             "function\\s+([a-z_][A-Za-z0-9_]*)",
1117 |             "\\b([a-z_][A-Za-z0-9_]*)\\s*\\(",
1118 |             "\\.([a-z_][A-Za-z0-9_]*)\\s*",
1119 |         ]);
1120 |         bulk_add(&mut result, "operator", &[
1121 |             r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", r"(\+=)",
1122 |             r"(\-=)", r"(\*=)", r"(\\=)", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)",
1123 |             r"(>)", r"(<<)", r"(>>)", r"(\&\&)", r"(\|\|)", r"(!)\S",
1124 |         ]);
1125 |         result
1126 |     })
1127 | }
1128 | 
1129 | fn ts_syntax_highlighter() -> &'static Highlighter {
1130 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1131 |     HIGHLIGHTER.get_or_init(|| {
1132 |         let mut result = Highlighter::new(4);
1133 |         result.bounded("comment", r"/\*", r"\*/", false);
1134 |         result.keyword("comment", "//.*$");
1135 |         result.bounded("string", "r\"", "\"", true);
1136 |         result.bounded("string", "f\"", "\"", true);
1137 |         result.bounded("string", "\"", "\"", true);
1138 |         result.bounded("string", "r\'", "\'", true);
1139 |         result.bounded("string", "f\'", "\'", true);
1140 |         result.bounded("string", "\'", "\'", true);
1141 |         result.bounded_interp("string", "r`", "`", "\\$\\{", "\\}", true);
1142 |         result.bounded_interp("string", "f`", "`", "\\$\\{", "\\}", true);
1143 |         result.bounded_interp("string", "`", "`", "\\$\\{", "\\}", true);
1144 |         result.bounded("string", "/", "/", true);
1145 |         add_keywords(&mut result, &[
1146 |             "abstract", "any", "as", "asserts", "boolean", "break", "case", "catch", "class", "const", "constructor",
1147 |             "continue", "debugger", "declare", "default", "delete", "do", "else", "enum", "export", "extends", "false",
1148 |             "finally", "for", "from", "function", "get", "if", "implements", "import", "in", "infer", "instanceof",
1149 |             "interface", "is", "keyof", "let", "module", "namespace", "never", "new", "null", "number", "object", "package",
1150 |             "private", "protected", "public", "readonly", "require", "global", "return", "set", "static", "string",
1151 |             "super", "switch", "symbol", "this", "throw", "true", "try", "type", "typeof", "undefined", "unique", "unknown",
1152 |             "var", "void", "while", "with", "yield",
1153 |         ]);
1154 |         result.keyword("digit", "\\b(\\d+.\\d+|\\d+)");
1155 |         result.keyword("struct", "class\\s+([A-Za-z0-9_]+)");
1156 |         bulk_add(&mut result, "boolean", &["\\b(true)\\b", "\\b(false)\\b"]);
1157 |         bulk_add(&mut result, "function", &[
1158 |             "function\\s+([a-z_][A-Za-z0-9_]*)",
1159 |             "\\b([a-z_][A-Za-z0-9_]*)\\s*\\(",
1160 |             "\\.([a-z_][A-Za-z0-9_]*)\\s*",
1161 |         ]);
1162 |         bulk_add(&mut result, "operator", &[
1163 |             r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", r"(\+=)", r"(\-=)",
1164 |             r"(\*=)", r"(\\=)", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", r"(>)", r"(<<)", r"(>>)",
1165 |             r"(\&\&)", r"(\|\|)", r"(!)\S",
1166 |         ]);
1167 |         result
1168 |     })
1169 | }
1170 | 
1171 | fn dart_syntax_highlighter() -> &'static Highlighter {
1172 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1173 |     HIGHLIGHTER.get_or_init(|| {
1174 |         let mut result = Highlighter::new(4);
1175 |         result.bounded("comment", r"/\*", r"\*/", false);
1176 |         result.keyword("comment", "//.*$");
1177 |         result.bounded("string", "\"\"\"", "\"\"\"", true);
1178 |         result.bounded("string", "\'\'\'", "\'\'\'", true);
1179 |         result.bounded_interp("string", "\"", "\"", "\\$\\{", "\\}", true);
1180 |         result.bounded("string", "\'", "\'", true);
1181 |         add_keywords(&mut result, &[
1182 |             "abstract", "as", "assert", "async", "await", "break", "case", "catch", "class", "const", "continue", "covariant", "default",
1183 |             "deferred", "do", "dynamic", "else", "enum", "export", "extends", "extension", "external", "factory", "false", "final", "finally",
1184 |             "for", "Function", "get", "hide", "if", "implements", "import", "in", "inout", "interface", "is", "late", "library", "mixin",
1185 |             "new", "null", "on", "operator", "out", "part", "required", "rethrow", "return", "set", "show", "static", "super", "switch",
1186 |             "sync", "this", "throw", "true", "try", "typedef", "var", "void", "while", "with", "yield", "int", "double", "num", "string",
1187 |         ]);
1188 |         result.keyword("digit", "\\b(\\d+.\\d+|\\d+)");
1189 |         result.keyword("struct", "\\b([A-Z][A-Za-z0-9_]+)");
1190 |         bulk_add(&mut result, "boolean", &["\\b(true)\\b", "\\b(false)\\b"]);
1191 |         bulk_add(&mut result, "function", &[
1192 |             "\\b([a-z_][A-Za-z0-9_]*)(?:<[A-Za-z_]*>)?\\s*\\(",
1193 |             "\\.([a-z_][A-Za-z0-9_]*)\\s*",
1194 |         ]);
1195 |         bulk_add(&mut result, "operator", &[
1196 |             r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", r"(\+=)",
1197 |             r"(\-=)", r"(\*=)", r"(\\=)", "~/", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)",
1198 |             r"(>)", "\\?", r"(<<)", r"(>>)", r"(\&\&)", r"(\|\|)", r"(!)\S", "\\?\\?",
1199 |         ]);
1200 |         result
1201 |     })
1202 | }
1203 | 
1204 | fn c_syntax_highlighter() -> &'static Highlighter {
1205 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1206 |     HIGHLIGHTER.get_or_init(|| {
1207 |         let mut result = Highlighter::new(4);
1208 |         result.bounded("comment", r"/\*", r"\*/", false);
1209 |         result.keyword("comment", "(//.*)$");
1210 |         result.bounded("string", "\"", "\"", true);
1211 |         add_keywords(&mut result, &[
1212 |             "auto", "break", "case", "char", "const", "continue", "default", "do", "double",
1213 |             "else", "enum", "extern", "float", "for", "goto", "if", "int", "long", "register",
1214 |             "return", "short", "signed", "sizeof", "static", "struct", "switch", "typedef",
1215 |             "union", "unsigned", "void", "volatile", "while", "printf", "fscanf", "scanf",
1216 |             "fputsf", "exit", "stderr", "malloc", "calloc", "bool", "realloc", "free",
1217 |             "strlen", "size_t",
1218 |         ]);
1219 |         result.keyword("struct", "\\}\\s+([A-Za-z0-9_]+)\\s*");
1220 |         result.keyword("attribute", "^\\s*(#.*?)\\s");
1221 |         result.keyword("header", "(<.*?>)");
1222 |         bulk_add(&mut result, "digit", &["\\b(\\d+.\\d+|\\d+)", "\\b(\\d+.\\d+(?:f|))"]);
1223 |         bulk_add(&mut result, "character", &[r"'[^\\]'", "'\\\\.'"]);
1224 |         bulk_add(&mut result, "boolean", &["\\b(true)\\b", "\\b(false)\\b"]);
1225 |         bulk_add(&mut result, "function", &[
1226 |             "(int|bool|void|char|double|long|short|size_t)\\s+([a-z_][A-Za-z0-9_]*)\\s*\\(",
1227 |             "\\b([a-z_][A-Za-z0-9_]*)\\s*\\(",
1228 |         ]);
1229 |         bulk_add(&mut result, "operator", &[
1230 |             r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"(%)", r"(\+=)", r"(\-=)",
1231 |             r"(\*=)", r"(\\=)", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", r"(>)", r"(<<)",
1232 |             r"(>>)", r"(\&\&)", r"(\|\|)", r"(!)\S",
1233 |         ]);
1234 |         result
1235 |     })
1236 | }
1237 | 
1238 | fn cpp_syntax_highlighter() -> &'static Highlighter {
1239 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1240 |     HIGHLIGHTER.get_or_init(|| {
1241 |         let mut result = Highlighter::new(4);
1242 |         result.bounded("comment", r"/\*", r"\*/", false);
1243 |         result.keyword("comment", "(//.*)$");
1244 |         result.bounded("string", "\"", "\"", true);
1245 |         add_keywords(&mut result, &[
1246 |             "alignas", "alignof", "and", "and_eq", "asm", "auto", "bitand", "bitor", "bool", "break", "case",
1247 |             "catch", "char", "char8_t", "char16_t", "char32_t", "class", "compl", "concept", "const", "consteval", "constexpr",
1248 |             "constinit", "const_cast", "continue", "co_await", "co_return", "co_yield", "decltype", "default",
1249 |             "delete", "do", "double", "dynamic_cast", "else", "enum", "explicit", "export", "extern", "false", "float",
1250 |             "for", "friend", "goto", "if", "inline", "int", "long", "mutable", "namespace", "new", "noexcept", "not", "not_eq",
1251 |             "nullptr", "operator", "or", "or_eq", "private", "protected", "public", "register", "reinterpret_cast", "requires", "return",
1252 |             "short", "signed", "sizeof", "static", "static_assert", "static_cast", "struct", "switch", "template", "this",
1253 |             "thread_local", "throw", "true", "try", "typedef", "typeid", "typename", "union", "unsigned", "using", "virtual",
1254 |             "void", "volatile", "wchar_t", "while", "xor", "xor_eq", "std", "string",
1255 |         ]);
1256 |         result.keyword("struct", "\\b([A-Z][A-Za-z0-9_]*)");
1257 |         result.keyword("attribute", "^\\s*(#[a-zA-Z_]+)\\s*");
1258 |         bulk_add(&mut result, "operator", &[
1259 |             r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"(%)", r"(\+=)", r"(\-=)",
1260 |             r"(\*=)", r"(\\=)", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", r"(>)", r"(<<)",
1261 |             r"(>>)", r"(\&\&)", r"(\|\|)", r"(!)\S", r"(|)", r"(&)", r"(^)", r"(~)",
1262 |         ]);
1263 |         result.keyword("header", "(<.*?>)");
1264 |         bulk_add(&mut result, "digit", &["\\b(\\d+.\\d+|\\d+)", "\\b(\\d+.\\d+(?:f|))"]);
1265 |         bulk_add(&mut result, "character", &[r"'[^\\]'", "'\\\\.'"]);
1266 |         bulk_add(&mut result, "boolean", &["\\b(true)\\b", "\\b(false)\\b"]);
1267 |         bulk_add(&mut result, "function", &[
1268 |             "(int|bool|void|char|double|long|short|size_t)\\s+([a-z_][A-Za-z0-9_]*)\\s*\\(",
1269 |             "\\b([a-z_][A-Za-z0-9_]*)\\s*\\(",
1270 |         ]);
1271 |         result
1272 |     })
1273 | }
1274 | 
1275 | fn cs_syntax_highlighter() -> &'static Highlighter {
1276 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1277 |     HIGHLIGHTER.get_or_init(|| {
1278 |         let mut result = Highlighter::new(4);
1279 |         result.bounded("comment", r"/\*", r"\*/", false);
1280 |         result.keyword("comment", "(//.*)$");
1281 |         result.bounded("string", "\"", "\"", true);
1282 |         add_keywords(&mut result, &[
1283 |             "abstract", "as", "base", "bool", "break", "byte", "case", "catch", "char", "checked",
1284 |             "class", "const", "continue", "decimal", "default", "delegate", "do", "double", "else",
1285 |             "enum", "event", "explicit", "extern", "false", "finally", "fixed", "float", "for",
1286 |             "foreach", "goto", "if", "implicit", "in", "int", "interface", "internal", "is", "lock",
1287 |             "long", "namespace", "new", "null", "object", "operator", "out", "override", "params",
1288 |             "private", "protected", "public", "readonly", "ref", "return", "sbyte", "sealed",
1289 |             "short", "sizeof", "stackalloc", "static", "string", "struct", "switch", "this", "throw",
1290 |             "true", "try", "typeof", "uint", "ulong", "unchecked", "unsafe", "ushort", "using",
1291 |             "using", "static", "virtual", "void", "volatile", "while", "add", "alias", "ascending", "async",
1292 |             "await", "by", "descending", "dynamic", "equals", "from", "get", "global", "group",
1293 |             "into", "join", "let", "nameof", "on", "orderby", "partial", "remove", "select", "set",
1294 |             "unmanaged", "value", "var", "when", "where", "with", "yield",
1295 |         ]);
1296 |         result.keyword("struct", "\\b([A-Z][A-Za-z0-9_]*)");
1297 |         bulk_add(&mut result, "operator", &[
1298 |             r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"(%)", r"(\+=)", r"(\-=)",
1299 |             r"(\*=)", r"(\\=)", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", r"(>)", r"(<<)",
1300 |             r"(>>)", r"(\&\&)", r"(\|\|)", r"(!)\S", r"(|)", r"(&)", r"(^)", r"(~)",
1301 |         ]);
1302 |         bulk_add(&mut result, "digit", &["\\b(\\d+.\\d+|\\d+)", "\\b(\\d+.\\d+(?:f|m|))"]);
1303 |         bulk_add(&mut result, "character", &[r"'[^\\]'", "'\\\\.'"]);
1304 |         bulk_add(&mut result, "boolean", &["\\b(true)\\b", "\\b(false)\\b"]);
1305 |         bulk_add(&mut result, "function", &[
1306 |             "(int|bool|void|char|double|long|short|size_t)\\s+([a-z_][A-Za-z0-9_]*)\\s*\\(",
1307 |             "\\b([a-z_][A-Za-z0-9_]*)\\s*\\(",
1308 |         ]);
1309 |         result
1310 |     })
1311 | }
1312 | 
1313 | fn swift_syntax_highlighter() -> &'static Highlighter {
1314 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1315 |     HIGHLIGHTER.get_or_init(|| {
1316 |         let mut result = Highlighter::new(4);
1317 |         result.bounded("comment", r"/\*", r"\*/", false);
1318 |         result.keyword("comment", "(//.*)$");
1319 |         result.bounded_interp("string", "#\"", "\"#", "\\\\#?\\(", "\\)", true);
1320 |         result.bounded("string", "\"\"\"", "\"\"\"", true);
1321 |         result.bounded_interp("string", "\"", "\"", "\\\\\\(", "\\)", true);
1322 |         result.keyword("struct", "\\b([A-Z][A-Za-z0-9_]*)\\b");
1323 |         add_keywords(&mut result, &[
1324 |             "associatedtype", "class", "deinit", "enum", "extension", "fileprivate", "func",
1325 |             "import", "init", "inout", "internal", "let", "open", "operator", "private",
1326 |             "protocol", "public", "static", "struct", "subscript", "typealias", "var", "break",
1327 |             "case", "continue", "default", "defer", "do", "else", "fallthrough", "for", "guard",
1328 |             "if", "in", "repeat", "return", "switch", "where", "while", "as", "catch", "throw",
1329 |             "try", "Any", "false", "is", "nil", "super", "self", "Self", "true", "associativity",
1330 |             "convenience", "dynamic", "didSet", "final", "get", "infix", "indirect", "lazy", "left",
1331 |             "mutating", "none", "nonmutating", "optional", "override", "postfix", "precedence", "prefix",
1332 |             "Protocol", "required", "right", "set", "Type", "unowned", "weak", "willSet", "Int",
1333 |             "String", "Double", "Optional", "endif",
1334 |         ]);
1335 |         bulk_add(&mut result, "operator", &[
1336 |             "=", "\\+", "\\-", "\\*", "[^/](/)[^/]", "\\+=", "\\-=", "\\*=", "\\\\=", "==",
1337 |             "!=", "\\?", ">=", "<=", "<", ">", "!",
1338 |         ]);
1339 |         bulk_add(&mut result, "digit", &["\\b(\\d+.\\d+|\\d+)", "\\b(\\d+.\\d+(?:f32|f64))"]);
1340 |         bulk_add(&mut result, "boolean", &["\\b(true)\\b", "\\b(false)\\b"]);
1341 |         bulk_add(&mut result, "function", &[
1342 |             "func\\s+([a-z_][A-Za-z0-9_]*)\\s*(?:\\(|<)",
1343 |             "\\.([a-z_][A-Za-z0-9_]*)\\s*\\(",
1344 |             "([a-z_][A-Za-z0-9_]*)\\s*\\(",
1345 |         ]);
1346 |         result
1347 |     })
1348 | }
1349 | 
1350 | fn json_syntax_highlighter() -> &'static Highlighter {
1351 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1352 |     HIGHLIGHTER.get_or_init(|| {
1353 |         let mut result = Highlighter::new(4);
1354 |         result.bounded("string", "\"", "\"", true);
1355 |         result.keyword("keyword", r"\b(null)\b");
1356 |         result.keyword("digit", "\\b(\\d+.\\d+|\\d+)");
1357 |         result.keyword("boolean", "\\b(true|false)\\b");
1358 |         result
1359 |     })
1360 | }
1361 | 
1362 | fn kotlin_syntax_highlighter() -> &'static Highlighter {
1363 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1364 |     HIGHLIGHTER.get_or_init(|| {
1365 |         let mut result = Highlighter::new(4);
1366 |         result.bounded("comment", r"/\*", r"\*/", false);
1367 |         result.keyword("comment", "(//.*)$");
1368 |         result.bounded("string", "\"\"\"", "\"\"\"", true);
1369 |         result.bounded("string", "\"", "\"", true);
1370 |         result.keyword("attribute", r"@\w+");
1371 |         result.keyword("struct", "\\b([A-Z][A-Za-z0-9_]*)\\b");
1372 |         result.keyword("boolean", "\\b(true|false)\\b");
1373 |         result.keyword("digit", "\\b(\\d+.\\d+|\\d+)");
1374 |         bulk_add(&mut result, "operator", &[
1375 |             r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", r"(\+=)", r"(\-=)",
1376 |             r"(\*=)", r"(\\=)", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", r"(>)", r"(<<)", r"(>>)",
1377 |             r"(\&\&)", r"(\|\|)", r"(!)\S",
1378 |         ]);
1379 |         add_keywords(&mut result, &[
1380 |             "abstract", "actual", "annotation", "companion", "constructor", "enum", "external", "expect",
1381 |             "final", "fun", "inline", "inner", "interface", "internal", "private", "protected", "public",
1382 |             "sealed", "suspend", "tailrec", "vararg", "as", "break", "class", "continue", "do", "else",
1383 |             "false", "for", "if", "in", "is", "null", "object", "infix", "package", "return", "super", "this",
1384 |             "throw", "true", "try", "data", "typealias", "typeof", "val", "when", "while", "var", "operator",
1385 |             "override",
1386 |         ]);
1387 |         bulk_add(&mut result, "function", &[
1388 |             "\\.([a-z_][A-Za-z0-9_\\?!]*)\\s*",
1389 |             "\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\(",
1390 |             "\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\{",
1391 |         ]);
1392 |         result
1393 |     })
1394 | }
1395 | 
1396 | fn java_syntax_highlighter() -> &'static Highlighter {
1397 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1398 |     HIGHLIGHTER.get_or_init(|| {
1399 |         let mut result = Highlighter::new(4);
1400 |         result.bounded("comment", r"/\*", r"\*/", false);
1401 |         result.keyword("comment", "(//.*)$");
1402 |         result.bounded("string", "\"", "\"", true);
1403 |         result.keyword("attribute", r"@\w+");
1404 |         result.keyword("struct", "\\b([A-Z][A-Za-z0-9_]*)\\b");
1405 |         result.keyword("boolean", "\\b(true|false)\\b");
1406 |         result.keyword("digit", "\\b(\\d+.\\d+|\\d+)");
1407 |         bulk_add(&mut result, "operator", &[
1408 |             r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", r"(\+=)", r"(\-=)",
1409 |             r"(\*=)", r"(\\=)", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", r"(>)", r"(<<)", r"(>>)",
1410 |             r"(\&\&)", r"(\|\|)", r"(!)\S",
1411 |         ]);
1412 |         add_keywords(&mut result, &[
1413 |             "abstract", "assert", "boolean", "break", "byte", "case", "catch", "char", "class", "const", "continue",
1414 |             "default", "do", "double", "else", "enum", "extends", "final", "finally", "float", "for", "if", "goto",
1415 |             "implements", "import", "instanceof", "int", "interface", "long", "native", "new", "package", "private",
1416 |             "protected", "public", "return", "short", "static", "strictfp", "super", "switch", "synchronized", "this",
1417 |             "throw", "throws", "transient", "try", "var", "void", "volatile", "while", "null",
1418 |         ]);
1419 |         bulk_add(&mut result, "function", &[
1420 |             "\\.([a-z_][A-Za-z0-9_\\?!]*)\\s*",
1421 |             "\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\(",
1422 |         ]);
1423 |         result
1424 |     })
1425 | }
1426 | 
1427 | fn vb_syntax_highlighter() -> &'static Highlighter {
1428 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1429 |     HIGHLIGHTER.get_or_init(|| {
1430 |         let mut result = Highlighter::new(4);
1431 |         result.keyword("comment", "('.*)$");
1432 |         result.bounded("string", "\"", "\"", true);
1433 |         result.keyword("digit", "\\b(\\d+.\\d+|\\d+)");
1434 |         bulk_add(&mut result, "function", &["\\b([A-Za-z0-9_\\?!]*)\\s*\\("]);
1435 |         bulk_add(&mut result, "operator", &[
1436 |             r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", r"(\+=)", r"(\-=)",
1437 |             r"(\*=)", r"(\\=)", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", r"(>)", r"(<<)", r"(>>)",
1438 |             r"(\&\&)", r"(\|\|)", r"(!)\S",
1439 |         ]);
1440 |         add_keywords(&mut result, &[
1441 |             "AddHandler", "AddressOf", "Alias", "And", "AndAlso", "Ansi", "As", "Assembly", "Auto", "Boolean",
1442 |             "ByRef", "Byte", "ByVal", "Call", "Case", "Catch", "CBool", "CByte", "CChar", "CDate", "CDec", "CDbl",
1443 |             "Char", "CInt", "Class", "CLng", "CObj", "Const", "CShort", "CSng", "CStr", "CType", "Date", "Decimal",
1444 |             "Declare", "Default", "Delegate", "Dim", "DirectCast", "Do", "Double", "Each", "Else", "ElseIf", "End",
1445 |             "Enum", "Erase", "Error", "Event", "Exit", "False", "Finally", "For", "Friend", "Function", "Get", "GetType",
1446 |             "GoSub", "GoTo", "Handles", "If", "Implements", "Imports", "In", "Inherits", "Integer", "Interface",
1447 |             "Is", "IsNot", "Let", "Lib", "Like", "Long", "Loop", "Me", "Mod", "Module", "MustInherit", "MustOverride",
1448 |             "MyBase", "MyClass", "Namespace", "Narrowing", "New", "Next", "Not", "Nothing", "NotInheritable",
1449 |             "NotOverridable", "Object", "Of", "On", "Operator", "Option", "Optional", "Or", "OrElse", "Out", "Overloads",
1450 |             "Overridable", "Overrides", "ParamArray", "Partial", "Private", "Property", "Protected", "Public", "RaiseEvent",
1451 |             "ReadOnly", "ReDim", "REM", "RemoveHandler", "Resume", "Return", "SByte", "Select", "Set", "Shadows", "Shared",
1452 |             "Short", "Single", "Static", "Step", "Stop", "String", "Structure", "Sub", "SyncLock", "Then", "Throw", "To",
1453 |             "True", "Try", "TryCast", "TypeOf", "UInteger", "ULong", "UShort", "Using", "Variant", "Wend", "When", "While",
1454 |             "Widening", "With", "WithEvents", "WriteOnly", "Xor", "Console",
1455 |         ]);
1456 |         result
1457 |     })
1458 | }
1459 | 
1460 | fn m_syntax_highlighter() -> &'static Highlighter {
1461 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1462 |     HIGHLIGHTER.get_or_init(|| {
1463 |         let mut result = Highlighter::new(4);
1464 |         result.bounded("comment", "%\\{", "%\\}", true);
1465 |         result.keyword("comment", "(%.*)$");
1466 |         result.bounded("string", "\'", "\'", true);
1467 |         result.keyword("boolean", "\\b(true|false)\\b");
1468 |         result.keyword("digit", "\\b(\\d+.\\d+|\\d+)");
1469 |         result.keyword("struct", "\\b([A-Z][A-Za-z0-9_]*)\\b");
1470 |         bulk_add(&mut result, "operator", &[
1471 |             r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", r"(\+=)", r"(\-=)",
1472 |             r"(\*=)", r"(\\=)", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", r"(>)", r"(<<)", r"(>>)",
1473 |             r"(\&\&)", r"(\|\|)", r"(!)\S",
1474 |         ]);
1475 |         add_keywords(&mut result, &[
1476 |             "break", "case", "catch", "classdef", "continue", "else", "elseif", "end", "for", "function",
1477 |             "global", "if", "otherwise", "parfor", "persistent", "return", "spmd", "switch", "try", "while",
1478 |             "inf", "nan", "int8", "int16", "int32", "int64", "uint8", "uint16", "uint32", "uint64", "single",
1479 |             "double", "char", "string", "cell", "struct", "table", "datetime", "properties", "NaN", "max",
1480 |             "min", "length", "sort", "sum", "prod", "mode", "median", "mean", "std", "pi", "randi", "randn",
1481 |             "rand", "clf", "shg", "close", "path", "addpath", "rmpath", "cd", "grid", "on", "axis", "square",
1482 |             "equal", "off", "hold", "help", "doc", "lookfor", "profile", "viewer", "clc", "diary", "ctrl-c", "who",
1483 |             "whos", "clear", "load", "format", "short", "long", "bank",
1484 |         ]);
1485 |         bulk_add(&mut result, "function", &[
1486 |             "\\.([a-z_][A-Za-z0-9_\\?!]*)\\s*",
1487 |             "\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\(",
1488 |         ]);
1489 |         result
1490 |     })
1491 | }
1492 | 
1493 | fn php_syntax_highlighter() -> &'static Highlighter {
1494 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1495 |     HIGHLIGHTER.get_or_init(|| {
1496 |         let mut result = Highlighter::new(4);
1497 |         result.bounded("comment", r"/\*", r"\*/", false);
1498 |         result.keyword("comment", "(//.*)$");
1499 |         result.keyword("comment", "(#.*)$");
1500 |         result.bounded_interp("string", "\"", "\"", "\\{", "\\}", true);
1501 |         result.bounded_interp("string", "\"", "\"", "\\$\\{", "\\}", true);
1502 |         result.bounded("string", "\'", "\'", true);
1503 |         result.keyword("boolean", "\\b(true|false|TRUE|FALSE)\\b");
1504 |         result.keyword("digit", "\\b(\\d+.\\d+|\\d+)");
1505 |         result.keyword("struct", "\\b([A-Z][A-Za-z0-9_]*)\\b");
1506 |         bulk_add(&mut result, "function", &[
1507 |             "\\.([a-z_][A-Za-z0-9_\\?!]*)\\s*",
1508 |             "\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\(",
1509 |         ]);
1510 |         add_keywords(&mut result, &[
1511 |             "__halt_compiler", "abstract", "and", "array", "as", "break", "callable", "case",
1512 |             "catch", "class", "clone", "const", "continue", "declare", "default", "die", "do",
1513 |             "echo", "else", "elseif", "empty", "enddeclare", "endfor", "endforeach", "endif", 
1514 |             "endswitch", "endwhile", "eval", "exit", "extends", "final", "finally", "for", 
1515 |             "foreach", "function", "global", "goto", "if", "implements", "include", "include_once",
1516 |             "instanceof", "insteadof", "interface", "isset", "list", "namespace", "new", "or",
1517 |             "print", "private", "protected", "public", "require", "require_once", "return", "static",
1518 |             "switch", "throw", "trait", "try", "unset", "use", "var", "while", "xor",
1519 |             "__CLASS__", "__DIR__", "__FILE__", "__FUNCTION__", "__LINE__", "__METHOD__",
1520 |             "__NAMESPACE__", "__TRAIT__", "null",
1521 |         ]);
1522 |         result.keyword("keyword", r"<\?php");
1523 |         result.keyword("keyword", r"\?>");
1524 |         bulk_add(&mut result, "operator", &[
1525 |             r"(->)", r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", r"(\+=)",
1526 |             r"(\-=)", r"(\*=)", r"(\\=)", r"(\?)", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)",
1527 |             r"(>)", r"(\$)", r"(<<)", r"(>>)", r"(\&\&)", r"(\|\|)", r"(!)\S", r"(\.)",
1528 |         ]);
1529 |         result
1530 |     })
1531 | }
1532 | 
1533 | fn scala_syntax_highlighter() -> &'static Highlighter {
1534 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1535 |     HIGHLIGHTER.get_or_init(|| {
1536 |         let mut result = Highlighter::new(4);
1537 |         result.bounded("comment", r"/\*", r"\*/", false);
1538 |         result.keyword("comment", "(//.*)$");
1539 |         result.bounded_interp("string", "f\"", "\"", "\\$\\{", "\\}", true);
1540 |         result.bounded_interp("string", "s\"", "\"", "\\$\\{", "\\}", true);
1541 |         result.bounded("string", "\"\"\"", "\"\"\"", true);
1542 |         result.bounded("string", "raw\"", "\"", true);
1543 |         result.bounded("string", "\"", "\"", true);
1544 |         result.keyword("digit", "\\b(\\d+.\\d+|\\d+)");
1545 |         bulk_add(&mut result, "character", &[r"'[^\\]'", "'\\\\.'"]);
1546 |         result.keyword("boolean", "\\b(true|false)\\b");
1547 |         bulk_add(&mut result, "operator", &[
1548 |             r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", r"(\+=)", r"(\-=)", r"(\*=)", r"(\\=)",
1549 |             r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", r"(>)", r"(<<)", r"(>>)", r"(\&\&)", r"(\|\|)", r"(!)\S",
1550 |         ]);
1551 |         add_keywords(&mut result, &[
1552 |             "abstract", "case", "catch", "class", "def", "do", "else", "extends", "false", "final", "finally",
1553 |             "for", "forSome", "if", "implicit", "import", "lazy", "macro", "match", "new", "null", "object",
1554 |             "override", "package", "private", "protected", "return", "sealed", "super", "this", "throw", "trait",
1555 |             "try", "true", "type", "val", "var", "while", "with", "yield", "Boolean", "Byte", "Char", "Double",
1556 |             "Float", "Int", "Long", "Short", "String", "Unit", "Any", "AnyVal", "AnyRef", "Nothing", "Null",
1557 |             "foreach", "map", "println", "to", "by",
1558 |         ]);
1559 |         bulk_add(&mut result, "function", &[
1560 |             "\\.([a-z_][A-Za-z0-9_\\?!]*)\\s*",
1561 |             "\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\(",
1562 |         ]);
1563 |         result.keyword("struct", "\\b([A-Z][A-Za-z0-9_]*)\\b");
1564 |         result
1565 |     })
1566 | }
1567 | 
1568 | fn prolog_syntax_highlighter() -> &'static Highlighter {
1569 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1570 |     HIGHLIGHTER.get_or_init(|| {
1571 |         let mut result = Highlighter::new(4);
1572 |         result.keyword("comment", "(\\%.*)$");
1573 |         result.bounded("string", "\"", "\"", true);
1574 |         result.keyword("digit", "\\b(\\d+.\\d+|\\d+)");
1575 |         result.keyword("boolean", "\\b(true|false)\\b");
1576 |         result.keyword("struct", "\\b([A-Z][A-Za-z0-9_]*)\\b");
1577 |         add_keywords_no_boundary(&mut result, &[
1578 |             ":-", "\\,", "\\.", ";", "\\->", "\\+", "=", "is", "not", "fail", "!", "repeat", "call", "cut",
1579 |             "assert", "asserta", "assertz", "retract", "abolish", "dynamic", "consult", "listing", "op",
1580 |             "assertions", "clauses", "predicate", "query", "rule", "fact", "variable", "atom", "number",
1581 |             "list", "compound", "ground", "callable", "atom", "number", "integer", "float", "variable",
1582 |             "list", "compound",
1583 |         ]);
1584 |         bulk_add(&mut result, "operator", &[
1585 |             r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(<)", r"(>)",
1586 |         ]);
1587 |         bulk_add(&mut result, "function", &["\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\("]);
1588 |         result
1589 |     })
1590 | }
1591 | 
1592 | fn haskell_syntax_highlighter() -> &'static Highlighter {
1593 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1594 |     HIGHLIGHTER.get_or_init(|| {
1595 |         let mut result = Highlighter::new(4);
1596 |         result.keyword("comment", "(\\-\\-.*)$");
1597 |         result.bounded("comment", "\\{-", "-\\}", true);
1598 |         result.bounded("string", "\"", "\"", true);
1599 |         result.keyword("digit", "\\b(\\d+.\\d+|\\d+)");
1600 |         result.keyword("boolean", "\\b(True|False)\\b");
1601 |         bulk_add(&mut result, "character", &[r"'[^\\]'", "'\\\\.'"]);
1602 |         bulk_add(&mut result, "operator", &[
1603 |             "->", "\\$", "`.*`", "<-", "<", ">", "&&", "\\|\\|", "\\\\", "\\:",
1604 |             "=", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", r"(\+=)",
1605 |             r"(\-=)", r"(\*=)", r"(/=)", "!", "\\.", "\\|", r"(==)", r"(!=)", r"(>=)",
1606 |             r"(<=)", "_", r"(<<)", r"(>>)", r"(!)\S", "\\band\\b", "\\bor\\b", "\\bnot\\b",
1607 |         ]);
1608 |         add_keywords(&mut result, &[
1609 |             "module", "import", "as", "qualified", "hiding", "do", "case", "of", "let", "in", "if", "then", "else",
1610 |             "data", "type", "newtype", "deriving", "class", "instance", "where", "foreign", "export", "ccall",
1611 |             "stdcall", "capi", "prim", "safe", "unsafe", "otherwise", "head", "tail", "last", "init", "null",
1612 |             "length", "return", "map", "filter", "foldl", "foldr", "zip", "zipWith", "take", "drop", "reverse",
1613 |             "concat", "concatMap", "maximum", "minimum", "elem", "notElem", "sum", "array", "product", "scanl",
1614 |             "scanr", "replicate", "cycle", "repeat", "iterate", "fst", "snd", "id", "Maybe", "Either", "Bool",
1615 |             "Char", "String", "putStrLn", "getLine", "Just", "Nothing", "for", "Int", "Integer", "Float",
1616 |             "Double", "Ordering", "IO", "Functor", "Applicative", "Monad",
1617 |         ]);
1618 |         result.keyword("function", "^[a-z][a-zA-Z0-9]*");
1619 |         result
1620 |     })
1621 | }
1622 | 
1623 | fn css_syntax_highlighter() -> &'static Highlighter {
1624 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1625 |     HIGHLIGHTER.get_or_init(|| {
1626 |         let mut result = Highlighter::new(4);
1627 |         result.bounded("comment", r"/\*", r"\*/", false);
1628 |         result.bounded("string", "\"", "\"", true);
1629 |         add_keywords(&mut result, &["from", "to", "rotate", "none"]);
1630 |         result.keyword("digit", r"\#[0-9a-fA-F]+");
1631 |         result.keyword("digit", "((?:\\d+.\\d+|\\d+)(?:%|deg|px|em|rem)?)");
1632 |         result.keyword("boolean", "\\b(true|false)\\b");
1633 |         result.keyword("attribute", r"\.[a-zA-Z0-9\-]*");
1634 |         result.keyword("attribute", r"\:[a-zA-Z0-9\-]*");
1635 |         result.keyword("attribute", r"\::[a-zA-Z0-9\-]*");
1636 |         result.keyword("attribute", r"@\w+");
1637 |         add_keywords(&mut result, &[
1638 |             "a", "abbr", "address", "area", "article", "aside", "audio", "b", "base", "bdi", "bdo", "blockquote",
1639 |             "body", "br", "button", "canvas", "caption", "cite", "code", "col", "colgroup", "data", "datalist",
1640 |             "dd", "del", "details", "dfn", "dialog", "div", "dl", "dt", "em", "embed", "fieldset", "figcaption",
1641 |             "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr",
1642 |             "html", "i", "iframe", "img", "input", "ins", "kbd", "label", "legend", "li", "link", "main", "map",
1643 |             "mark", "meta", "meter", "nav", "noscript", "object", "ol", "optgroup", "option", "output", "p",
1644 |             "param", "picture", "pre", "progress", "q", "rb", "rp", "rt", "rtc", "ruby", "s", "samp", "script",
1645 |             "section", "select", "slot", "small", "source", "span", "strong", "style", "sub", "summary", "sup", 
1646 |             "table", "tbody", "td", "template", "textarea", "tfoot", "th", "thead", "time", "title", "tr", "track",
1647 |             "u", "ul", "var", "video", "wbr", "svg",
1648 |         ]);
1649 |         add_keywords(&mut result, &[
1650 |             "-webkit-touch-callout", "-webkit-user-select", "-moz-user-select", "-ms-user-select",
1651 |             "user-select", "transform", "border-radius", "border-right", "border-left", "border-top",
1652 |             "border-bottom", "border", "content", "display", "height", "width", "margin-top", "margin-bottom",
1653 |             "margin-left", "margin-right", "margin", "pointer-events", "position", "top", "transform-origin",
1654 |             "-moz-appearance", "-webkit-appearance", "cursor", "flex-grow", "flex-shrink", "font-size",
1655 |             "max-height", "max-width", "min-height", "min-width", "outline", "vertical-align", "background-color", 
1656 |             "background-image", "background-position", "background-repeat", "background-size", "background",
1657 |             "animation", "border-(?:left|right|top|bottom)-color", "border-(?:left|right|top|bottom)-radius",
1658 |             "border-(?:left|right|top|bottom)-width", "border-(?:left|right|top|bottom)-style", "align-items",
1659 |             "box-shadow", "justify-content", "line-height", "padding", "padding-(?:left|bottom|right|top)", "font-weight",
1660 |             "list-style", "box-sizing", "text-align", "bottom", "overflow-x", "overflow-y", "text-rendering",
1661 |             "-moz-osx-font-smoothing", "-webkit-font-smoothing", "text-size-adjust", "font-family", "color",
1662 |             "text-decoration", "font-style", "word-wrap", "white-space", "-webkit-overflow-scrolling",
1663 |             "clear", "float", "overflow", "!important", "text-transform", "clip", "visibility", "border-color",
1664 |             "opacity", "flex-wrap", "border-(?:top|bottom)-(?:left|right)-radius", "z-index", "word-break", "letter-spacing",
1665 |             "text-transform", "resize", "flex-direction", "order", "border-style", "border-width", "text-overflow",
1666 |             "flex-basis", "-ms-overflow-y", "-ms-overflow-x", "transition-duration", "transition-property", 
1667 |             "transition-timing-function", "(flex)[^-]", "-webkit-text-decoration-style", "-apple-system", "sans-serif",
1668 |             "left", "right", "bottom", "top", "font", "tab-size", "text-shadow",
1669 |         ]);
1670 |         result
1671 |     })
1672 | }
1673 | 
1674 | fn html_syntax_highlighter() -> &'static Highlighter {
1675 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1676 |     HIGHLIGHTER.get_or_init(|| {
1677 |         let mut result = Highlighter::new(4);
1678 |         result.bounded("comment", "<!--", "-->", false);
1679 |         result.bounded("string", "\"", "\"", true);
1680 |         result.keyword("digit", "\\b(\\d+.\\d+|\\d+)");
1681 |         result.keyword("boolean", "\\b(true|false)\\b");
1682 |         result.keyword("operator", "=");
1683 |         bulk_add(&mut result, "tag", &["</", "/>", ">", "<!", "<"]);
1684 |         add_html_keywords(&mut result, &[
1685 |             "a", "abbr", "address", "area", "article", "aside", "audio", "b", "base", "bdi", "bdo", "blockquote",
1686 |             "body", "br", "button", "canvas", "caption", "cite", "code", "col", "colgroup", "data", "datalist",
1687 |             "dd", "del", "details", "dfn", "dialog", "div", "dl", "dt", "em", "embed", "fieldset", "figcaption", 
1688 |             "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html",
1689 |             "i", "iframe", "img", "input", "ins", "kbd", "label", "legend", "li", "link", "main", "map", "mark",
1690 |             "meta", "meter", "nav", "noscript", "object", "ol", "optgroup", "option", "output", "p", "param", "picture",
1691 |             "pre", "progress", "q", "rb", "rp", "rt", "rtc", "ruby", "s", "samp", "script", "section", "select", "slot",
1692 |             "small", "source", "span", "strong", "style", "sub", "summary", "sup", "table", "tbody", "td", "template",
1693 |             "textarea", "tfoot", "th", "thead", "time", "title", "tr", "track", "u", "ul", "var", "video", "wbr", "svg",
1694 |         ]);
1695 |         bulk_add(&mut result, "attribute", &[
1696 |             r"([A-Za-z0-9-]+)=", r"(class)\s*=", r"(id)\s*=", r"(style)\s*=", r"(src)\s*=", r"(rel)\s*=",
1697 |             r"(type)\s*=", r"(charset)\s*=", r"(data-target)\s*=", r"(name)\s*=", r"(href)\s*=", r"(content)\s*=",
1698 |             r"(width)\s*=", r"(height)\s*=", r"(aria-label)\s*=", r"(role)\s*=", r"(aria-hidden)\s*=",
1699 |             r"(aria-expanded)\s*=", r"\s*defer\s*",
1700 |         ]);
1701 |         result
1702 |     })
1703 | }
1704 | 
1705 | fn markdown_syntax_highlighter() -> &'static Highlighter {
1706 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1707 |     HIGHLIGHTER.get_or_init(|| {
1708 |         let mut result = Highlighter::new(4);
1709 |         result.bounded("comment", "<!--", "-->", false);
1710 |         result.keyword("heading", "(#.*)$");
1711 |         result.keyword("quote", "^(>.*)$");
1712 |         result.bounded("bold", "\\*\\*", "\\*\\*", true);
1713 |         result.bounded("italic", "\\*", "\\*", true);
1714 |         result.bounded("strikethrough", "~~", "~~", true);
1715 |         result.bounded("image", "!\\[", "\\]", true);
1716 |         result.bounded("link", "\\[", "\\]", true);
1717 |         result.bounded("math", "\\$\\$", "\\$\\$", false);
1718 |         result.bounded("math", "\\$", "\\$", false);
1719 |         result.bounded("block", "```", "```", false);
1720 |         result.bounded("block", "`", "`", true);
1721 |         result.keyword("link", r"\b(?:https?://|www\.)\S+\b");
1722 |         result.keyword("linebreak", "^\\s*-{3}");
1723 |         result.keyword("list", "[0-9]+\\.");
1724 |         result.keyword("list", "^\\s*-");
1725 |         result.keyword("list", "^\\s*\\+");
1726 |         result
1727 |     })
1728 | }
1729 | 
1730 | fn toml_syntax_highlighter() -> &'static Highlighter {
1731 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1732 |     HIGHLIGHTER.get_or_init(|| {
1733 |         let mut result = Highlighter::new(4);
1734 |         result.bounded("string", "\"", "\"", true);
1735 |         result.bounded("string", "\'", "\'", true);
1736 |         result.keyword("comment", "(#.*)$");
1737 |         result.keyword("boolean", "\\b(true|false)\\b");
1738 |         result.keyword("table", r"^(\[.*\])");
1739 |         bulk_add(&mut result, "digit", &[
1740 |             r"(?:=|\[|,)\s*(0x[a-fA-F]+)",
1741 |             r"(?:=|\[|,)\s*(0o[0-7]+)",
1742 |             r"(?:=|\[|,)\s*(0b[0-1]+)",
1743 |             r"(?:=|\[|,)\s*((?:\+|-)?[0-9]+(?:\.[0-9]+)?(?:e|E)(?:\+|-)?[0-9]+)",
1744 |             r"(?:=|\[|,)\s*((?:\+|-)?[0-9_]+(?:\.[0-9]+)?)",
1745 |         ]);
1746 |         add_keywords(&mut result, &["inf", "nan"]);
1747 |         result
1748 |     })
1749 | }
1750 | 
1751 | fn yaml_syntax_highlighter() -> &'static Highlighter {
1752 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1753 |     HIGHLIGHTER.get_or_init(|| {
1754 |         let mut result = Highlighter::new(4);
1755 |         result.bounded("string", "\"", "\"", true);
1756 |         result.bounded("string", "\'", "\'", true);
1757 |         result.keyword("comment", "(#.*)$");
1758 |         result.keyword("key", r"^\s*[ \.a-zA-Z_-]+:");
1759 |         result.keyword("digit", "\\b(\\d+.\\d+|\\d+)");
1760 |         result.keyword("tag", "!!(?:bool|int|float|str|timestamp|null|binary)");
1761 |         add_keywords(&mut result, &["No", "Yes", "no", "yes", "true", "false", "null"]);
1762 |         result
1763 |     })
1764 | }
1765 | 
1766 | fn csv_syntax_highlighter() -> &'static Highlighter {
1767 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1768 |     HIGHLIGHTER.get_or_init(|| {
1769 |         let mut result = Highlighter::new(4);
1770 |         result.keyword("keyword", ",");
1771 |         result
1772 |     })
1773 | }
1774 | 
1775 | fn shell_syntax_highlighter() -> &'static Highlighter {
1776 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1777 |     HIGHLIGHTER.get_or_init(|| {
1778 |         let mut result = Highlighter::new(4);
1779 |         result.bounded_interp("string", "\"", "\"", "\\$\\(", "\\)", true);
1780 |         result.bounded("string", "\'", "\'", true);
1781 |         result.bounded("string", "EOF", "EOF", true);
1782 |         result.keyword("comment", "(#.*)$");
1783 |         result.keyword("boolean", "\\b(true|false)\\b");
1784 |         result.keyword("digit", "\\b(\\d+.\\d+|\\d+)");
1785 |         bulk_add(&mut result, "operator", &[
1786 |             r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", r"(\+=)", r"(\-=)", r"(\*=)",
1787 |             r"(\\=)", r"(\{)", r"(\})", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", r"(>)", r"(\$)", r"(\.\.)",
1788 |             r"(<<)", r"(>>)", r"(\&\&)", r"(\|\|)", r"(!)\S", r"(\.)", r"(&)",
1789 |         ]);
1790 |         add_keywords(&mut result, &[
1791 |             "if", "then", "else", "elif", "fi", "case", "esac", "for", "while", "until", "do", "done",
1792 |             "in", "function", "select", "continue", "break", "return", "exit", "source", "declare", "readonly",
1793 |             "local", "export", "ls", "cd", "pwd", "cp", "mv", "rm", "mkdir", "rmdir", "touch", "chmod",
1794 |             "chown", "grep", "awk", "sed", "cat", "head", "tail", "sort", "uniq", "wc", "cut", "paste",
1795 |             "find", "tar", "gzip", "gunzip", "zip", "unzip", "ssh", "scp", "rsync", "curl", "wget", "ping",
1796 |             "traceroute", "netstat", "ps", "kill", "top", "df", "du", "date", "cal", "history", "alias",
1797 |             "source", "source", "exec", "exit", "help", "man", "info", "echo", "fgrep", "apropos", 
1798 |             "whoami", "python", "bg", "fg", "sleep", "jobs", "read", "trap", "clear", "sh", "bash",
1799 |         ]);
1800 |         bulk_add(&mut result, "function", &["\\b([a-z_][A-Za-z0-9_\\?!]*)\\s*\\("]);
1801 |         result
1802 |     })
1803 | }
1804 | 
1805 | fn sql_syntax_highlighter() -> &'static Highlighter {
1806 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1807 |     HIGHLIGHTER.get_or_init(|| {
1808 |         let mut result = Highlighter::new(4);
1809 |         result.keyword("comment", "(--.*)$");
1810 |         result.bounded("string", "\"", "\"", true);
1811 |         result.bounded("string", "\'", "\'", true);
1812 |         result.keyword("digit", "\\b(\\d+.\\d+|\\d+)");
1813 |         bulk_add(&mut result, "operator", &[
1814 |             r"\+", "-", r"\*", "/", "%", "=", "<>", "!=", "<", ">", "<=", ">=", "&", "|", "^",
1815 |             "~", "||", "=",
1816 |         ]);
1817 |         add_keywords(&mut result, &[
1818 |             "ADD", "ALL", "ALTER", "AND", "AS", "ASC", "BETWEEN", "BY", "CASE", "CHECK",
1819 |             "COLUMN", "CONSTRAINT", "CREATE", "DATABASE", "DEFAULT", "DELETE", "DESC",
1820 |             "DISTINCT", "DROP", "ELSE", "END", "EXISTS", "FOREIGN", "FROM", "FULL", "GROUP",
1821 |             "HAVING", "IN", "INDEX", "INNER", "INSERT", "INTO", "IS", "JOIN", "LEFT", "LIKE",
1822 |             "LIMIT", "NOT", "NULL", "ON", "OR", "ORDER", "OUTER", "PRIMARY", "REFERENCES",
1823 |             "RIGHT", "SELECT", "SET", "TABLE", "TOP", "TRUNCATE", "UNION", "UNIQUE", "UPDATE",
1824 |             "VALUES", "VIEW", "WHERE", "SHOW", "USE", "VARCHAR"
1825 |         ]);
1826 |         result
1827 |     })
1828 | }
1829 | 
1830 | fn xml_syntax_highlighter() -> &'static Highlighter {
1831 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1832 |     HIGHLIGHTER.get_or_init(|| {
1833 |         let mut result = Highlighter::new(4);
1834 |         result.bounded("comment", "<!--", "-->", false);
1835 |         result.bounded("string", "\"", "\"", true);
1836 |         result.keyword("digit", "\\b(\\d+.\\d+|\\d+)");
1837 |         result.keyword("boolean", "\\b(true|false)\\b");
1838 |         result.keyword("operator", "=");
1839 |         bulk_add(&mut result, "tag", &["<[A-Za-z0-9_]+>?", "</[A-Za-z0-9_]+>", "</", "/>", ">", "<!", "<"]);
1840 |         bulk_add(&mut result, "attribute", &[r"([A-Za-z0-9-]+)="]);
1841 |         result
1842 |     })
1843 | }
1844 | 
1845 | fn nushell_syntax_highlighter() -> &'static Highlighter {
1846 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1847 |     HIGHLIGHTER.get_or_init(|| {
1848 |         let mut result = Highlighter::new(4);
1849 |         result.bounded("string", "\"", "\"", true);
1850 |         result.bounded("string", "'", "'", true);
1851 |         result.keyword("comment", "(#.*)$");
1852 |         result.keyword("digit", "\\b(\\d+.\\d+|\\d+)");
1853 |         bulk_add(&mut result, "operator", &[
1854 |             r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(%)", r"(\+=)",
1855 |             r"(\-=)", r"(\*=)", r"(\\=)", r"(\{)", r"(\})", r"(==)", r"(!=)", r"(>=)",
1856 |             r"(<=)", r"(<)", r"(>)", r"(\$)", r"(\.\.)", r"(<<)", r"(>>)", r"(\&\&)", 
1857 |             r"(\|\|)", r"(!)\S", r"(\.)", r"(&)", r"(\|)"
1858 |         ]);
1859 |         add_keywords(&mut result, &[
1860 |             "alias", "append", "build-string", "cd", "config", "cp", "debug", "def", "do",
1861 |             "each", "echo", "else", "empty?", "enter", "every", "exit", "export", "filter",
1862 |             "first", "flatten", "for", "format", "from", "get", "group-by", "help", "history",
1863 |             "if", "insert", "keep", "last", "let", "ls", "math", "merge", "metadata", "move",
1864 |             "mut", "open", "parse", "pivot", "plugin", "post", "pre", "prune", "reduce", "reject",
1865 |             "rename", "rm", "save", "select", "skip", "sort-by", "source", "split", "str", "table",
1866 |             "to", "touch", "uniq", "update", "url", "use", "where", "with-env", "drop", "complete",
1867 |             "load-env", "exec", "mkdir", "du", "glob", "mktemp", "mv", "ps", "run-external", "start",
1868 |             "sys", "uname", "watch", "which", "nu-check", "nu-highlight", "print", "decode", "char",
1869 |             "encode", "detect", "url", "dexit", "shells", "random", "gstat", "ansi", "input",
1870 |             "keybindings", "kill", "sleep", "term", "ulimit", "whoami", "is-terminal", "clear", "path",
1871 |             "http", "query", "port", "tutor", "math", "polars", "hash", "cal", "generate", "seq",
1872 |             "columns", "collect", "compact", "flatten", "group", "headers", "transpose", "enumerate",
1873 |             "catch", "try", "find", "upsert", "string", "pattern", "fill",
1874 |         ]);
1875 |         result
1876 |     })
1877 | }
1878 | 
1879 | fn tex_syntax_highlighter() -> &'static Highlighter {
1880 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1881 |     HIGHLIGHTER.get_or_init(|| {
1882 |         let mut result = Highlighter::new(4);
1883 |         result.bounded("string", "\\$", "\\$", true);
1884 |         result.keyword("comment", r"([^\\]%.*)$");
1885 |         result.keyword("comment", r"^(%.*)$");
1886 |         result.keyword("digit", "\\b(\\d+.\\d+|\\d+)");
1887 |         bulk_add(&mut result, "keyword", &[
1888 |             r"\\addbibresource\b", r"\\author\b", r"\\begin\b", r"\\caption\b",
1889 |             r"\\centering\b", r"\\date\b", r"\\end\b", r"\\geometry\b", r"\\hline\b",
1890 |             r"\\includegraphics\b", r"\\item\b", r"\\label\b", r"\\maketitle\b", r"\\paragraph\b",
1891 |             r"\\parindent\b", r"\\parskip\b", r"\\printbibliography\b", r"\\section\b", r"\\setlength\b",
1892 |             r"\\subsection\b", r"\\tableofcontents\b", r"\\textbf\b", r"\\textit\b", r"\\texttt\b",
1893 |             r"\\title\b", r"\\today\b", r"\\underline\b", r"\\usepackage\b", r"\\ref\b",
1894 |             r"\\cite\b", r"\\pageref\b", r"\\include\b", r"\\input\b", r"\\bibliographystyle\b",
1895 |             r"\\newcommand\b", r"\\renewcommand\b", r"\\renewenvironment\b", r"\\newenvironment\b", 
1896 |             r"\\footnote\b", r"\\hline\b", r"\\vspace\b", r"\\hspace\b", r"\\newline\b", r"\\frac\b", 
1897 |             r"\\textbackslash\b", r"\\documentclass\b",
1898 |         ]);
1899 |         bulk_add(&mut result, "operator", &[
1900 |             r"(=)", r"(\+)", r"(\-)", r"(\*)", r"(\s/\s)", r"\s(//)\s", r"(#)", r"(\+=)", r"(\-=)", 
1901 |             r"(\*=)", r"(\\=)", r"(\^)", r"(%)", r"(==)", r"(!=)", r"(>=)", r"(<=)", r"(<)", r"(>)",
1902 |             r"(\$)", r"(\.\.)", r"(<<)", r"(>>)", r"(\&\&)", r"(\|\|)", r"(!)\S", r"(&)", r"(\|)",
1903 |         ]);
1904 |         result
1905 |     })
1906 | }
1907 | 
1908 | fn diff_syntax_highlighter() -> &'static Highlighter {
1909 |     static HIGHLIGHTER: OnceLock<Highlighter> = OnceLock::new();
1910 |     HIGHLIGHTER.get_or_init(|| {
1911 |         let mut result = Highlighter::new(4);
1912 |         result.keyword("insertion", r"^(\+(?:[^+]|$).*)$");
1913 |         result.keyword("deletion", r"^\-(?:[^-]|$).*$");
1914 |         result.keyword("comment", r"@@.*@@");
1915 |         result
1916 |     })
1917 | }
1918 | 


--------------------------------------------------------------------------------
/src/lib_old.rs:
--------------------------------------------------------------------------------
  1 | use std::collections::HashMap;
  2 | use std::ops::Range;
  3 | use regex::Regex;
  4 | 
  5 | #[derive(Debug, Clone)]
  6 | pub struct RangeLoc {
  7 |     pub y: usize,
  8 |     pub x: Range<usize>,
  9 | }
 10 | 
 11 | #[derive(Debug, Clone, Copy)]
 12 | pub struct Loc {
 13 |     pub y: usize,
 14 |     pub x: usize,
 15 | }
 16 | 
 17 | #[derive(Debug, Clone)]
 18 | pub struct Keyword {
 19 |     pub kind: String,
 20 |     pub loc: RangeLoc,
 21 | }
 22 | 
 23 | #[derive(Debug)]
 24 | pub struct BoundedDef {
 25 |     pub name: String,
 26 |     pub start: String,
 27 |     pub end: String,
 28 | }
 29 | 
 30 | #[derive(Debug, Clone, PartialEq)]
 31 | pub enum PatternKind {
 32 |     Start,
 33 |     End,
 34 |     Hybrid,
 35 | }
 36 | 
 37 | #[derive(Debug, Clone)]
 38 | pub struct Pattern {
 39 |     pub of: String,
 40 |     pub kind: PatternKind,
 41 |     pub loc: RangeLoc,
 42 |     pub token: Option<usize>,
 43 | }
 44 | 
 45 | #[derive(Debug, Clone)]
 46 | pub struct TokenSpan {
 47 |     kind: String,
 48 |     // References to patterns
 49 |     start: usize,
 50 |     end: Option<usize>,
 51 | }
 52 | 
 53 | #[derive(Debug)]
 54 | pub enum Token {
 55 |     Start(String),
 56 |     Text(String),
 57 |     End(String),
 58 | }
 59 | 
 60 | pub struct Highlighter {
 61 |     pub patterns: Vec<Pattern>,
 62 |     pub tokens: Vec<TokenSpan>,
 63 |     pub keywords: Vec<Vec<Keyword>>,
 64 |     pub line_ref: Vec<Vec<usize>>,
 65 |     pub bounded_rules: HashMap<String, BoundedDef>,
 66 |     pub keyword_rules: HashMap<String, Vec<Regex>>,
 67 |     pub modified: Vec<bool>,
 68 | }
 69 | 
 70 | impl Highlighter {
 71 |     pub fn new() -> Self {
 72 |         Self {
 73 |             patterns: vec![],
 74 |             tokens: vec![],
 75 |             keywords: vec![],
 76 |             modified: vec![],
 77 |             line_ref: vec![],
 78 |             bounded_rules: HashMap::default(),
 79 |             keyword_rules: HashMap::default(),
 80 |         }
 81 |     }
 82 | 
 83 |     pub fn bounded<S: Into<String>>(&mut self, name: S, start: S, end: S) {
 84 |         let (name, start, end) = (name.into(), start.into(), end.into());
 85 |         self.bounded_rules.insert(name.clone(), BoundedDef { name, start, end });
 86 |     }
 87 | 
 88 |     pub fn keyword<S: Into<String>>(&mut self, name: S, pattern: S) {
 89 |         let (name, pattern) = (name.into(), pattern.into());
 90 |         let regex = Regex::new(&pattern).expect("Invalid regex pattern");
 91 |         if let Some(v) = self.keyword_rules.get_mut(&name) {
 92 |             v.push(regex);
 93 |         } else {
 94 |             self.keyword_rules.insert(name, vec![regex]);
 95 |         }
 96 |     }
 97 | 
 98 |     // This will clone each line, potentially optimise using pointers?
 99 |     pub fn line(&mut self, idx: usize, contents: &String) -> Vec<Token> {
100 |         // Get the tokens that appear on this line
101 |         let mut tokens: Vec<(RangeLoc, Option<RangeLoc>, TokenSpan)> = self.line_ref[idx].iter()
102 |             // Clone the token
103 |             .map(|i| self.tokens[*i].clone())
104 |             // Attach starting and ending information
105 |             .map(|t| {
106 |                 // Obtain the start index from the pattern from the token
107 |                 let start_pattern = &self.patterns[t.start];
108 |                 let start = start_pattern.loc.clone();
109 |                 // Obtain the end pattern from the token
110 |                 let end_pattern = t.end.and_then(|t| Some(&self.patterns[t]));
111 |                 let end = end_pattern.and_then(|t| Some(t.loc.clone()));
112 |                 // Compose together into a tuple
113 |                 (start, end, t)
114 |             })
115 |             .collect();
116 |         // Trim to fit
117 |         if let Some((start, _, _)) = tokens.first_mut() {
118 |             // Token starts on a different line?
119 |             if start.y != idx {
120 |                 start.x = 0..0;
121 |                 start.y = idx;
122 |             }
123 |         }
124 |         if let Some((_, end, _)) = tokens.last_mut() {
125 |             // Token ends on a different line?
126 |             if end.is_none() || end.as_ref().unwrap().y != idx {
127 |                 let len = contents.len();
128 |                 *end = Some(RangeLoc { x: len..len, y: idx });
129 |             }
130 |         }
131 |         // Obtain keywords if necessary
132 |         if self.modified[idx] {
133 |             *self.keywords.get_mut(idx).unwrap() = self.find_keywords(contents, idx);
134 |             self.modified[idx] = false;
135 |         }
136 |         // Create hashmap for easier detection (keywords)
137 |         let kws: HashMap<usize, (RangeLoc, &String)> = self.keywords[idx].iter()
138 |             .map(|k| (k.loc.x.start, (k.loc.clone(), &k.kind)))
139 |             .collect();
140 |         // Create hashmap for easier detection (bounded)
141 |         let tokens: HashMap<usize, (RangeLoc, RangeLoc, &String)> = tokens.iter()
142 |             .map(|(start, end, tok)| (start.x.start, (start.clone(), end.clone().unwrap(), &tok.kind)))
143 |             .collect();
144 |         // Run through the whole line, making sure everything is accounted for
145 |         let mut result = vec![];
146 |         let mut x = 0;
147 |         while x < contents.len() {
148 |             if tokens.contains_key(&x) {
149 |                 // There is a bounded token here
150 |                 let (start, end, name) = &tokens[&x];
151 |                 result.push(Token::Start(name.to_string()));
152 |                 result.push(Token::Text(contents[start.x.start..end.x.end].to_string()));
153 |                 result.push(Token::End(name.to_string()));
154 |                 x = end.x.end;
155 |             } else if kws.contains_key(&x) {
156 |                 // There is a keyword token here
157 |                 let (range, name) = &kws[&x];
158 |                 result.push(Token::Start(name.to_string()));
159 |                 result.push(Token::Text(contents[range.x.start..range.x.end].to_string()));
160 |                 result.push(Token::End(name.to_string()));
161 |                 x = range.x.end;
162 |             } else {
163 |                 // There is no bounded token here, append to text
164 |                 let ch = contents.chars().nth(x).unwrap();
165 |                 if let Some(Token::Text(ref mut text)) = result.last_mut() {
166 |                     text.push(ch);
167 |                 } else {
168 |                     result.push(Token::Text(ch.to_string()));
169 |                 }
170 |                 x += 1;
171 |             }
172 |         }
173 |         result
174 |     }
175 | 
176 |     /// Initially highlight lines, additional lines can be added through append
177 |     pub fn run(&mut self, lines: &Vec<String>) {
178 |         // Locate patterns (starting from line 0)
179 |         let mut patterns = self.find_patterns(0, lines);
180 |         // Form tokens from patterns
181 |         let tokens = Self::form_tokens(&mut patterns);
182 |         // Add to highlighter
183 |         self.patterns = patterns;
184 |         self.tokens = tokens;
185 |         // Build line references
186 |         self.build_line_ref(lines.len());
187 |         // Build keyword information
188 |         self.modified = (0..lines.len()).map(|_| true).collect();
189 |         self.keywords = (0..lines.len()).map(|_| vec![]).collect();
190 |     }
191 | 
192 |     /// Add an additional line to this highlighter
193 |     pub fn append(&mut self, line: &String) {
194 |         let line_number = self.line_ref.len();
195 |         let lines = vec![line.clone()];
196 |         // Locate patterns
197 |         let mut patterns = self.find_patterns(line_number, &lines);
198 |         // Append to highlighter
199 |         self.patterns.append(&mut patterns);
200 |         self.line_ref.push(vec![]);
201 |         self.modified.push(true);
202 |         self.keywords.push(vec![]);
203 |         // Perform update
204 |         self.retokenize();
205 |     }
206 | 
207 |     pub fn insert(&mut self, loc: Loc, line: &String) {
208 |         self.modified[loc.y] = true;
209 |         let ch = line.chars().nth(loc.x).unwrap();
210 |         // Shift up patterns past a certain x
211 |         let mut idx = self.patterns.iter().enumerate()
212 |             .find(|(_, p)| loc.y < p.loc.y || (loc.y == p.loc.y && loc.x <= p.loc.x.start))
213 |             .and_then(|(n, _)| Some(n))
214 |             .unwrap_or(self.patterns.len());
215 |         self.patterns.iter_mut()
216 |             .skip(idx)
217 |             .filter(|p| loc.y == p.loc.y)
218 |             .for_each(|p| {
219 |                 p.loc.x.end += 1;
220 |                 p.loc.x.start += 1;
221 |             });
222 |         // Check for any pattern being destroyed
223 |         let mut delete = false;
224 |         if let Some(previous_pattern) = &self.patterns.get(idx.saturating_sub(1)) {
225 |             if previous_pattern.loc.y == loc.y {
226 |                 if previous_pattern.loc.x.contains(&loc.x) {
227 |                     self.patterns.remove(idx.saturating_sub(1));
228 |                     idx -= 1;
229 |                     delete = true;
230 |                 }
231 |             }
232 |         }
233 |         // Check for new start or end pattern
234 |         for kind in vec![PatternKind::Start, PatternKind::End, PatternKind::Hybrid] {
235 |             let is = match kind {
236 |                 PatternKind::Start => self.is_new_start(loc, ch, line),
237 |                 PatternKind::End => self.is_new_end(loc, ch, line),
238 |                 PatternKind::Hybrid => self.is_new_hybrid(loc, ch, line),
239 |             };
240 |             if let Some((s, def)) = is {
241 |                 // Get the length of the pattern
242 |                 let len = match kind {
243 |                     PatternKind::Start | PatternKind::Hybrid => def.start.len(),
244 |                     PatternKind::End => def.end.len(),
245 |                 };
246 |                 // Register the pattern
247 |                 let pattern = Pattern {
248 |                     token: None,
249 |                     loc: RangeLoc { y: loc.y, x: s..(s + len) },
250 |                     kind,
251 |                     of: def.name.to_string(),
252 |                 };
253 |                 self.patterns.insert(idx, pattern);
254 |                 // Retokenize to correct any dodgy tokens
255 |                 self.retokenize();
256 |                 return;
257 |             }
258 |         }
259 |         // If this insertion only deleted a token, then manually retokenize
260 |         if delete {
261 |             self.retokenize();
262 |         }
263 |     }
264 | 
265 |     pub fn remove(&mut self, loc: Loc, line: &String) {
266 |         self.modified[loc.y] = true;
267 |         // Find idx of next pattern
268 |         let mut idx = self.patterns.iter().enumerate()
269 |             .find(|(_, p)| loc.y < p.loc.y || (loc.y == p.loc.y && loc.x <= p.loc.x.end))
270 |             .and_then(|(n, _)| Some(n))
271 |             .unwrap_or(self.patterns.len());
272 |         let mut modified = false;
273 |         // Check to see if any patterns have been destroyed
274 |         let in_pattern = self.patterns.iter().enumerate()
275 |             .find(|(_, p)| loc.y == p.loc.y && p.loc.x.contains(&loc.x))
276 |             .and_then(|(n, _)| Some(n));
277 |         if let Some(pattern_idx) = in_pattern {
278 |             self.patterns.remove(pattern_idx);
279 |             modified = true;
280 |         }
281 |         // Check to see if any patterns have been created as a result
282 |         if let Some(joined_char) = line.chars().nth(loc.x + 1) {
283 |             let mut line = line.clone();
284 |             line.remove(loc.x);
285 |             let mut result: Option<(usize, usize, &String, PatternKind)> = None;
286 |             // Find out if any new patterns have been created
287 |             if let Some((s, def)) = self.is_new_start(loc, joined_char, &line) {
288 |                 // A new start pattern has been created
289 |                 result = Some((s, def.start.len(), &def.name, PatternKind::Start));
290 |             } else if let Some((s, def)) = self.is_new_end(loc, joined_char, &line) {
291 |                 // A new end pattern has been created
292 |                 result = Some((s, def.end.len(), &def.name, PatternKind::End));
293 |             } else if let Some((s, def)) = self.is_new_hybrid(loc, joined_char, &line) {
294 |                 // A new hybrid pattern has been created
295 |                 result = Some((s, def.start.len(), &def.name, PatternKind::Hybrid));
296 |             }
297 |             // If so, register
298 |             if let Some((s, len, name, kind)) = result {
299 |                 let double_start = self.bounded_rules[name].start.len() > 1;
300 |                 let double_end = self.bounded_rules[name].end.len() > 1;
301 |                 let double = (kind == PatternKind::Start && double_start) || 
302 |                              (kind == PatternKind::End && double_end) || 
303 |                              (kind == PatternKind::Hybrid && double_start);
304 |                 if double {
305 |                     let pattern = Pattern {
306 |                         token: None,
307 |                         loc: RangeLoc { y: loc.y, x: s..(s + len) },
308 |                         kind,
309 |                         of: name.to_string(),
310 |                     };
311 |                     self.patterns.insert(idx, pattern);
312 |                     modified = true;
313 |                     idx += 1;
314 |                 }
315 |             }
316 |         }
317 |         // Shift back patterns before a certain x
318 |         self.patterns.iter_mut()
319 |             .skip(idx)
320 |             .filter(|p| loc.y == p.loc.y)
321 |             .for_each(|p| {
322 |                 p.loc.x.end -= 1;
323 |                 p.loc.x.start -= 1;
324 |             });
325 |         // Retokenize if necessary
326 |         if modified {
327 |             self.retokenize();
328 |         }
329 |     }
330 | 
331 |     pub fn insert_line(&mut self, y: usize) {
332 |         self.patterns.iter_mut()
333 |             .filter(|p| p.loc.y > y)
334 |             .for_each(|p| p.loc.y += 1);
335 |         self.line_ref.insert(y, vec![]);
336 |         self.keywords.insert(y, vec![]);
337 |         self.modified.insert(y, true);
338 |     }
339 | 
340 |     pub fn remove_line(&mut self, y: usize) {
341 |         self.patterns.iter_mut()
342 |             .filter(|p| p.loc.y > y)
343 |             .for_each(|p| p.loc.y -= 1);
344 |         self.line_ref.remove(y);
345 |         self.keywords.remove(y);
346 |         self.modified.remove(y);
347 |     }
348 | 
349 |     pub fn split_down(&mut self, loc: Loc) {
350 |         // Inside a pattern: kill off the pattern
351 |         let pattern_chop = self.patterns.iter().enumerate()
352 |             .filter(|(_, p)| p.loc.y == loc.y)
353 |             .find(|(_, p)| ((p.loc.x.start + 1)..p.loc.x.end).contains(&loc.x))
354 |             .and_then(|(n, _)| Some(n));
355 |         if let Some(idx) = pattern_chop {
356 |             self.patterns.remove(idx);
357 |             self.retokenize();
358 |         }
359 |         // Adjust keywords
360 |         self.modified[loc.y] = true;
361 |         // Adjust patterns
362 |         self.insert_line(loc.y);
363 |         self.patterns.iter_mut()
364 |             .filter(|p| p.loc.y == loc.y && loc.x <= p.loc.x.start)
365 |             .for_each(|p| {
366 |                 p.loc.y += 1;
367 |                 p.loc.x.start -= loc.x;
368 |                 p.loc.x.end -= loc.x;
369 |             });
370 |         self.build_line_ref(self.line_ref.len());
371 |     }
372 | 
373 |     pub fn splice_up(&mut self, loc: Loc, line: &String) {
374 |         let idx = self.patterns.iter().enumerate()
375 |             .find(|(_, p)| p.loc.y >= loc.y + 1)
376 |             .and_then(|(n, _)| Some(n))
377 |             .unwrap_or(self.patterns.len());
378 |         let mut modified = false;
379 |         // Adjust keywords
380 |         self.modified[loc.y] = true;
381 |         // Adjust patterns
382 |         self.patterns.iter_mut()
383 |             .filter(|p| p.loc.y == loc.y + 1)
384 |             .for_each(|p| {
385 |                 p.loc.y -= 1;
386 |                 p.loc.x.start += loc.x;
387 |                 p.loc.x.end += loc.x;
388 |             });
389 |         self.remove_line(loc.y + 1);
390 |         // Check to see if any patterns have been created as a result
391 |         if let Some(joined_char) = line.chars().nth(loc.x) {
392 |             let line = line.clone();
393 |             let mut result: Option<(usize, usize, &String, PatternKind)> = None;
394 |             // Find out if any new patterns have been created
395 |             //println!("{loc:?} {joined_char:?} {line:?}");
396 |             if let Some((s, def)) = self.is_new_start(loc, joined_char, &line) {
397 |                 // A new start pattern has been created
398 |                 result = Some((s, def.start.len(), &def.name, PatternKind::Start));
399 |             } else if let Some((s, def)) = self.is_new_end(loc, joined_char, &line) {
400 |                 // A new end pattern has been created
401 |                 result = Some((s, def.end.len(), &def.name, PatternKind::End));
402 |             } else if let Some((s, def)) = self.is_new_hybrid(loc, joined_char, &line) {
403 |                 // A new hybrid pattern has been created
404 |                 result = Some((s, def.start.len(), &def.name, PatternKind::Hybrid));
405 |             }
406 |             // If so, register
407 |             if let Some((s, len, name, kind)) = result {
408 |                 let double_start = self.bounded_rules[name].start.len() > 1;
409 |                 let double_end = self.bounded_rules[name].end.len() > 1;
410 |                 let double = (kind == PatternKind::Start && double_start) || 
411 |                              (kind == PatternKind::End && double_end) || 
412 |                              (kind == PatternKind::Hybrid && double_start);
413 |                 if double {
414 |                     let pattern = Pattern {
415 |                         token: None,
416 |                         loc: RangeLoc { y: loc.y, x: s..(s + len) },
417 |                         kind,
418 |                         of: name.to_string(),
419 |                     };
420 |                     self.patterns.insert(idx, pattern);
421 |                     modified = true;
422 |                 }
423 |             }
424 |         }
425 |         if modified {
426 |             self.retokenize();
427 |         } else {
428 |             self.build_line_ref(self.line_ref.len());
429 |         }
430 |     }
431 | 
432 |     fn retokenize(&mut self) {
433 |         let patterns = &mut self.patterns;
434 |         self.tokens = Self::form_tokens(patterns);
435 |         self.build_line_ref(self.line_ref.len());
436 |     }
437 | 
438 |     fn is_new_start(&self, loc: Loc, ch: char, line: &String) -> Option<(usize, &BoundedDef)> {
439 |         self.is_new_pattern(loc, ch, line, PatternKind::Start)
440 |     }
441 | 
442 |     fn is_new_end(&self, loc: Loc, ch: char, line: &String) -> Option<(usize, &BoundedDef)> {
443 |         self.is_new_pattern(loc, ch, line, PatternKind::End)
444 |     }
445 | 
446 |     fn is_new_hybrid(&self, loc: Loc, ch: char, line: &String) -> Option<(usize, &BoundedDef)> {
447 |         self.is_new_pattern(loc, ch, line, PatternKind::Hybrid)
448 |     }
449 | 
450 |     fn is_new_pattern(&self, loc: Loc, ch: char, line: &String, kind: PatternKind) -> Option<(usize, &BoundedDef)> {
451 |         // Get all non-hybrid rules
452 |         let rules = self.bounded_rules.values();
453 |         let mut result = None;
454 |         // Return a match if there is one
455 |         for def in rules {
456 |             let pattern = match kind {
457 |                 PatternKind::Start => &def.start,
458 |                 PatternKind::End => &def.end,
459 |                 PatternKind::Hybrid => &def.start,
460 |             };
461 |             let hybrid = def.start == def.end;
462 |             // Determine if a start or end token has actually been created
463 |             result = pattern.chars().enumerate()
464 |                 // Find locations within the start or end pattern where this character could be
465 |                 .filter(|(_, i)| *i == ch)
466 |                 // For each one, work out where the pattern would theoretically start
467 |                 .map(|(n, _)| loc.x.saturating_sub(n))
468 |                 // Attach a corresponding end location
469 |                 .map(|pattern_start| (pattern_start, pattern_start + pattern.len()))
470 |                 // Find out if any of these candidates are actually start or end patterns
471 |                 .map(|(start, end)| (start, &line[start..end] == pattern))
472 |                 .find(|(_, is_match)| *is_match && (!hybrid || kind == PatternKind::Hybrid))
473 |                 // Link in definition
474 |                 .and_then(|(pattern_start, _)| Some((pattern_start, def)));
475 |             if result.is_some() {
476 |                 break;
477 |             }
478 |         }
479 |         result
480 |     }
481 | 
482 |     /// Finds patterns in the provided lines
483 |     /// offset will add to the y axis (useful for when you're appending lines)
484 |     fn find_patterns(&mut self, offset: usize, lines: &Vec<String>) -> Vec<Pattern> {
485 |         let mut result = vec![];
486 |         // For each line
487 |         for (mut y, line) in lines.iter().enumerate() {
488 |             // Offset y tokens
489 |             y += offset;
490 |             // For each character
491 |             let mut x = 0;
492 |             while x < line.len() {
493 |                 // Set up line and position info
494 |                 let line = &line[x..];
495 |                 let loc = Loc { y, x };
496 |                 // Work out if there is a pattern here
497 |                 let pattern = self.bounded_rules.values()
498 |                     // Find whether this pattern is a start pattern or end pattern
499 |                     .map(|def| (&def.name, line.starts_with(&def.start), line.starts_with(&def.end)))
500 |                     // Find one that is either a start or end pattern
501 |                     .find(|(_, starts, ends)| *starts || *ends);
502 |                 // If there is, register the pattern
503 |                 if let Some((name, starts, ends)) = pattern {
504 |                     // Form the pattern
505 |                     let def = &self.bounded_rules[name];
506 |                     let kind = match (starts, ends) {
507 |                         // Start pattern
508 |                         (true, false) => PatternKind::Start,
509 |                         // End pattern
510 |                         (false, true) => PatternKind::End,
511 |                         // Hybrid pattern
512 |                         (true, true) => PatternKind::Hybrid,
513 |                         // No pattern here
514 |                         (false, false) => unreachable!(),
515 |                     };
516 |                     let of = def.name.clone();
517 |                     let x_range = loc.x..(loc.x + def.end.len());
518 |                     let range = RangeLoc { y: loc.y, x: x_range };
519 |                     let pattern = Pattern { token: None, kind, loc: range, of };
520 |                     result.push(pattern);
521 |                     // Keep searching forward
522 |                     x += if starts { def.start.len() } else { def.end.len() };
523 |                 } else {
524 |                     x += 1;
525 |                 }
526 |             }
527 |         }
528 |         result
529 |     }
530 | 
531 |     /// Forms tokens based on patterns
532 |     /// Ensure patterns are correctly registered before running this
533 |     fn form_tokens(patterns: &mut Vec<Pattern>) -> Vec<TokenSpan> {
534 |         let mut result = vec![];
535 |         let mut registering = false;
536 |         let mut registering_kind = "".to_string();
537 |         // Run through patterns
538 |         for (n, pattern) in patterns.iter_mut().enumerate() {
539 |             let Pattern { of, kind, ref mut token, .. } = pattern;
540 |             let len = result.len();
541 |             match (kind, registering) {
542 |                 // New start token
543 |                 (PatternKind::Start, false) => {
544 |                     registering = true;
545 |                     registering_kind = of.clone();
546 |                     // Make pattern active
547 |                     *token = Some(len);
548 |                     // Put on token
549 |                     result.push(TokenSpan {
550 |                         kind: of.clone(),
551 |                         start: n,
552 |                         end: None,
553 |                     });
554 |                 }
555 |                 // Corresponding end token
556 |                 (PatternKind::End, true) => {
557 |                     if *of == registering_kind {
558 |                         if let Some(this) = result.last_mut() {
559 |                             registering = false;
560 |                             registering_kind = "".to_string();
561 |                             // Make pattern active
562 |                             *token = Some(len - 1);
563 |                             // Update end pattern in token
564 |                             this.end = Some(n);
565 |                         }
566 |                     }
567 |                 }
568 |                 // Opportunity to end a hybrid token
569 |                 (PatternKind::Hybrid, true) => {
570 |                     if let Some(this) = result.last_mut() {
571 |                         // Tokens are of the same type?
572 |                         if *of == this.kind {
573 |                             // They are, terminate this hybrid token
574 |                             registering = false;
575 |                             registering_kind = "".to_string();
576 |                             // Make pattern active
577 |                             *token = Some(len - 1);
578 |                             // Update end pattern in token
579 |                             this.end = Some(n);
580 |                         }
581 |                     }
582 |                 }
583 |                 // Opportunity to start a new hybrid token
584 |                 (PatternKind::Hybrid, false) => {
585 |                     registering = true;
586 |                     registering_kind = of.clone();
587 |                     // Make pattern active
588 |                     *token = Some(len);
589 |                     // Push on token
590 |                     result.push(TokenSpan {
591 |                         kind: of.clone(),
592 |                         start: n,
593 |                         end: None,
594 |                     });
595 |                 }
596 |                 _ => (),
597 |             }
598 |         }
599 |         result
600 |     }
601 | 
602 |     fn find_keywords(&self, line: &String, y: usize) -> Vec<Keyword> {
603 |         let mut result = vec![];
604 |         for (name, group) in &self.keyword_rules {
605 |             for exp in group {
606 |                 result.append(&mut exp.find_iter(line)
607 |                     .map(|s| Keyword { 
608 |                         loc: RangeLoc { x: s.start()..s.end(), y }, 
609 |                         kind: name.to_string(),
610 |                     })
611 |                     .collect());
612 |             }
613 |         }
614 |         result
615 |     }
616 | 
617 |     fn build_line_ref(&mut self, max: usize) {
618 |         // Refresh line reference
619 |         self.line_ref = vec![];
620 |         (0..max).for_each(|_| self.line_ref.push(vec![]));
621 |         // Register tokens according to the lines they span
622 |         for (n, token) in self.tokens.iter().enumerate() {
623 |             // Obtain start and end positions
624 |             let start = self.patterns[token.start].loc.y;
625 |             let end = match token.end {
626 |                 // Find y position of end pattern
627 |                 Some(end) => self.patterns[end].loc.y,
628 |                 // This token is a hanging token, max it out
629 |                 None => max - 1,
630 |             };
631 |             for y in start..=end {
632 |                 self.line_ref[y].push(n);
633 |             }
634 |         }
635 |     }
636 | }
637 | 


--------------------------------------------------------------------------------