65 | int main() {
66 | std::cout << \"Hello World!\" << std::endl;
67 | }";
68 |
69 | let sr_cpp = ss.find_syntax_by_extension("cpp").unwrap();
70 | let mut cpp_html_generator =
71 | ClassedHTMLGenerator::new_with_class_style(sr_cpp, &ss, ClassStyle::Spaced);
72 | for line in LinesWithEndings::from(code_cpp) {
73 | cpp_html_generator
74 | .parse_html_for_line_which_includes_newline(line)
75 | .unwrap();
76 | }
77 | let html_cpp = cpp_html_generator.finalize();
78 |
79 | writeln!(html, "")?;
80 | writeln!(html, "{}", html_cpp)?;
81 | writeln!(html, "")?;
82 |
83 | // write html end
84 | writeln!(html, " ")?;
85 | writeln!(html, "")?;
86 |
87 | // ---------------------------------------------------------------------------------------------
88 | // generate css
89 | let css = "@import url(\"theme-light.css\") (prefers-color-scheme: light);
90 | @import url(\"theme-dark.css\") (prefers-color-scheme: dark);
91 |
92 | @media (prefers-color-scheme: dark) {
93 | body {
94 | background-color: gray;
95 | }
96 | }
97 | @media (prefers-color-scheme: light) {
98 | body {
99 | background-color: lightgray;
100 | }
101 | }";
102 |
103 | let css_file = File::create(Path::new("synhtml-css-classes.css"))?;
104 | let mut css_writer = BufWriter::new(&css_file);
105 |
106 | writeln!(css_writer, "{}", css)?;
107 |
108 | // ---------------------------------------------------------------------------------------------
109 | // generate css files for themes
110 | let ts = ThemeSet::load_defaults();
111 |
112 | // create dark color scheme css
113 | let dark_theme = &ts.themes["Solarized (dark)"];
114 | let css_dark_file = File::create(Path::new("theme-dark.css"))?;
115 | let mut css_dark_writer = BufWriter::new(&css_dark_file);
116 |
117 | let css_dark = css_for_theme_with_class_style(dark_theme, ClassStyle::Spaced).unwrap();
118 | writeln!(css_dark_writer, "{}", css_dark)?;
119 |
120 | // create light color scheme css
121 | let light_theme = &ts.themes["Solarized (light)"];
122 | let css_light_file = File::create(Path::new("theme-light.css"))?;
123 | let mut css_light_writer = BufWriter::new(&css_light_file);
124 |
125 | let css_light = css_for_theme_with_class_style(light_theme, ClassStyle::Spaced).unwrap();
126 | writeln!(css_light_writer, "{}", css_light)?;
127 |
128 | Ok(())
129 | }
130 |
--------------------------------------------------------------------------------
/testdata/test3.html:
--------------------------------------------------------------------------------
1 |
2 | <script type="text/javascript">
3 | var lol = "JS nesting";
4 | class WithES6 extends THREE.Mesh {
5 | static highQuality() { // such classes
6 | return this.toString();
7 | }
8 | }
9 | <%
10 | # The outer syntax is HTML (Rails) detected from the .erb extension
11 | puts "Ruby #{'nesting' * 2}"
12 | here = <<-WOWCOOL + CORRECTLY_DOES_NOT_HIGHLIGHT_REST_OF_LINE
13 | high quality parsing even supports custom heredoc endings
14 | #{
15 | nested = 5 * <<-ZOMG
16 | nested heredocs! (no highlighting: 5 * 6, yes highlighting: #{5 * 6})
17 | ZOMG
18 | }
19 | WOWCOOL
20 | sql = <<-SQL
21 | select * from heredocs where there_are_special_heredoc_names = true
22 | SQL
23 | %>
24 | </script>
25 | <style type="text/css">
26 | /* the HTML syntax also supports CSS of course */
27 | .stuff #wow {
28 | border: 5px #ffffff;
29 | background: url("wow");
30 | }
31 | </style>
32 |
33 |
--------------------------------------------------------------------------------
/testdata/JSON.sublime-syntax:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | name: JSON
4 | file_extensions:
5 | - json
6 | - sublime-settings
7 | - sublime-menu
8 | - sublime-keymap
9 | - sublime-mousemap
10 | - sublime-theme
11 | - sublime-build
12 | - sublime-project
13 | - sublime-completions
14 | - sublime-commands
15 | - sublime-macro
16 | - sublime-color-scheme
17 | - ipynb
18 | - Pipfile.lock
19 | scope: source.json
20 | contexts:
21 | prototype:
22 | - include: comments
23 | main:
24 | - include: value
25 | value:
26 | - include: constant
27 | - include: number
28 | - include: string
29 | - include: array
30 | - include: object
31 | array:
32 | - match: '\['
33 | scope: punctuation.section.sequence.begin.json
34 | push:
35 | - meta_scope: meta.sequence.json
36 | - match: '\]'
37 | scope: punctuation.section.sequence.end.json
38 | pop: true
39 | - include: value
40 | - match: ","
41 | scope: punctuation.separator.sequence.json
42 | - match: '[^\s\]]'
43 | scope: invalid.illegal.expected-sequence-separator.json
44 | comments:
45 | - match: /\*\*(?!/)
46 | scope: punctuation.definition.comment.json
47 | push:
48 | - meta_scope: comment.block.documentation.json
49 | - meta_include_prototype: false
50 | - match: \*/
51 | pop: true
52 | - match: ^\s*(\*)(?!/)
53 | captures:
54 | 1: punctuation.definition.comment.json
55 | - match: /\*
56 | scope: punctuation.definition.comment.json
57 | push:
58 | - meta_scope: comment.block.json
59 | - meta_include_prototype: false
60 | - match: \*/
61 | pop: true
62 | - match: (//).*$\n?
63 | scope: comment.line.double-slash.js
64 | captures:
65 | 1: punctuation.definition.comment.json
66 | constant:
67 | - match: \b(?:true|false|null)\b
68 | scope: constant.language.json
69 | number:
70 | # handles integer and decimal numbers
71 | - match: -?(?:0|[1-9]\d*)(?:(?:(\.)\d+)(?:[eE][-+]?\d+)?|(?:[eE][-+]?\d+))
72 | scope: constant.numeric.float.decimal.json
73 | captures:
74 | 1: punctuation.separator.decimal.json
75 | - match: -?(?:0|[1-9]\d*)
76 | scope: constant.numeric.integer.decimal.json
77 | object:
78 | # a JSON object
79 | - match: '\{'
80 | scope: punctuation.section.mapping.begin.json
81 | push:
82 | - meta_scope: meta.mapping.json
83 | - match: '\}'
84 | scope: punctuation.section.mapping.end.json
85 | pop: true
86 | - match: '"'
87 | scope: punctuation.definition.string.begin.json
88 | push:
89 | - clear_scopes: 1
90 | - meta_scope: meta.mapping.key.json string.quoted.double.json
91 | - meta_include_prototype: false
92 | - include: inside-string
93 | - match: ":"
94 | scope: punctuation.separator.mapping.key-value.json
95 | push:
96 | - match: ',|\s?(?=\})'
97 | scope: invalid.illegal.expected-mapping-value.json
98 | pop: true
99 | - match: (?=\S)
100 | set:
101 | - clear_scopes: 1
102 | - meta_scope: meta.mapping.value.json
103 | - include: value
104 | - match: ''
105 | set:
106 | - match: ','
107 | scope: punctuation.separator.mapping.pair.json
108 | pop: true
109 | - match: \s*(?=\})
110 | pop: true
111 | - match: \s(?!/[/*])(?=[^\s,])|[^\s,]
112 | scope: invalid.illegal.expected-mapping-separator.json
113 | pop: true
114 | - match: '[^\s\}]'
115 | scope: invalid.illegal.expected-mapping-key.json
116 | string:
117 | - match: '"'
118 | scope: punctuation.definition.string.begin.json
119 | push: inside-string
120 | inside-string:
121 | - meta_scope: string.quoted.double.json
122 | - meta_include_prototype: false
123 | - match: '"'
124 | scope: punctuation.definition.string.end.json
125 | pop: true
126 | - include: string-escape
127 | - match: $\n?
128 | scope: invalid.illegal.unclosed-string.json
129 | pop: true
130 | string-escape:
131 | - match: |-
132 | (?x: # turn on extended mode
133 | \\ # a literal backslash
134 | (?: # ...followed by...
135 | ["\\/bfnrt] # one of these characters
136 | | # ...or...
137 | u # a u
138 | [0-9a-fA-F]{4} # and four hex digits
139 | )
140 | )
141 | scope: constant.character.escape.json
142 | - match: \\.
143 | scope: invalid.illegal.unrecognized-string-escape.json
144 |
--------------------------------------------------------------------------------
/examples/syncat.rs:
--------------------------------------------------------------------------------
1 | use getopts::Options;
2 | use std::borrow::Cow;
3 | use std::io::BufRead;
4 | use std::path::Path;
5 | use syntect::dumps::{dump_to_file, from_dump_file};
6 | use syntect::easy::HighlightFile;
7 | use syntect::highlighting::{Style, Theme, ThemeSet};
8 | use syntect::parsing::SyntaxSet;
9 | use syntect::util::as_24_bit_terminal_escaped;
10 |
11 | fn load_theme(tm_file: &str, enable_caching: bool) -> Theme {
12 | let tm_path = Path::new(tm_file);
13 |
14 | if enable_caching {
15 | let tm_cache = tm_path.with_extension("tmdump");
16 |
17 | if tm_cache.exists() {
18 | from_dump_file(tm_cache).unwrap()
19 | } else {
20 | let theme = ThemeSet::get_theme(tm_path).unwrap();
21 | dump_to_file(&theme, tm_cache).unwrap();
22 | theme
23 | }
24 | } else {
25 | ThemeSet::get_theme(tm_path).unwrap()
26 | }
27 | }
28 |
29 | fn main() {
30 | let args: Vec = std::env::args().collect();
31 | let mut opts = Options::new();
32 | opts.optflag("l", "list-file-types", "Lists supported file types");
33 | opts.optflag(
34 | "L",
35 | "list-embedded-themes",
36 | "Lists themes present in the executable",
37 | );
38 | opts.optopt("t", "theme-file", "THEME_FILE", "Theme file to use. May be a path, or an embedded theme. Embedded themes will take precendence. Default: base16-ocean.dark");
39 | opts.optopt(
40 | "s",
41 | "extra-syntaxes",
42 | "SYNTAX_FOLDER",
43 | "Additional folder to search for .sublime-syntax files in.",
44 | );
45 | opts.optflag(
46 | "e",
47 | "no-default-syntaxes",
48 | "Doesn't load default syntaxes, intended for use with --extra-syntaxes.",
49 | );
50 | opts.optflag(
51 | "n",
52 | "no-newlines",
53 | "Uses the no newlines versions of syntaxes and dumps.",
54 | );
55 | opts.optflag("c", "cache-theme", "Cache the parsed theme file.");
56 |
57 | let matches = match opts.parse(&args[1..]) {
58 | Ok(m) => m,
59 | Err(f) => {
60 | panic!("{}", f.to_string())
61 | }
62 | };
63 |
64 | let no_newlines = matches.opt_present("no-newlines");
65 | let mut ss = if matches.opt_present("no-default-syntaxes") {
66 | SyntaxSet::new()
67 | } else if no_newlines {
68 | SyntaxSet::load_defaults_nonewlines()
69 | } else {
70 | SyntaxSet::load_defaults_newlines()
71 | };
72 |
73 | if let Some(folder) = matches.opt_str("extra-syntaxes") {
74 | let mut builder = ss.into_builder();
75 | builder.add_from_folder(folder, !no_newlines).unwrap();
76 | ss = builder.build();
77 | }
78 |
79 | let ts = ThemeSet::load_defaults();
80 |
81 | if matches.opt_present("list-file-types") {
82 | println!("Supported file types:");
83 |
84 | for sd in ss.syntaxes() {
85 | println!("- {} (.{})", sd.name, sd.file_extensions.join(", ."));
86 | }
87 | } else if matches.opt_present("list-embedded-themes") {
88 | println!("Embedded themes:");
89 |
90 | for t in ts.themes.keys() {
91 | println!("- {}", t);
92 | }
93 | } else if matches.free.is_empty() {
94 | let brief = format!("USAGE: {} [options] FILES", args[0]);
95 | println!("{}", opts.usage(&brief));
96 | } else {
97 | let theme_file: String = matches
98 | .opt_str("theme-file")
99 | .unwrap_or_else(|| "base16-ocean.dark".to_string());
100 |
101 | let theme = ts
102 | .themes
103 | .get(&theme_file)
104 | .map(Cow::Borrowed)
105 | .unwrap_or_else(|| {
106 | Cow::Owned(load_theme(&theme_file, matches.opt_present("cache-theme")))
107 | });
108 |
109 | for src in &matches.free[..] {
110 | if matches.free.len() > 1 {
111 | println!("==> {} <==", src);
112 | }
113 |
114 | let mut highlighter = HighlightFile::new(src, &ss, &theme).unwrap();
115 |
116 | // We use read_line instead of `for line in highlighter.reader.lines()` because that
117 | // doesn't return strings with a `\n`, and including the `\n` gets us more robust highlighting.
118 | // See the documentation for `SyntaxSetBuilder::add_from_folder`.
119 | // It also allows re-using the line buffer, which should be a tiny bit faster.
120 | let mut line = String::new();
121 | while highlighter.reader.read_line(&mut line).unwrap() > 0 {
122 | if no_newlines && line.ends_with('\n') {
123 | let _ = line.pop();
124 | }
125 |
126 | {
127 | let regions: Vec<(Style, &str)> = highlighter
128 | .highlight_lines
129 | .highlight_line(&line, &ss)
130 | .unwrap();
131 | print!("{}", as_24_bit_terminal_escaped(®ions[..], true));
132 | }
133 | line.clear();
134 |
135 | if no_newlines {
136 | println!();
137 | }
138 | }
139 |
140 | // Clear the formatting
141 | println!("\x1b[0m");
142 | }
143 | }
144 | }
145 |
--------------------------------------------------------------------------------
/DESIGN.md:
--------------------------------------------------------------------------------
1 | # Optimization/Design notes
2 |
3 | This is my scratch pad for optimization ideas. Some of this I will implement, some I have implemented, some are just speculative.
4 |
5 | # Scopes
6 |
7 | ## Representation ideas:
8 |
9 | - Normal arrays of strings
10 | - array of 32-bit or 64-bit atoms (maybe using Servo's atom library)
11 | - Atoms packed into one or two u64s
12 | - fast equality checking
13 | - potentially fast prefix checking
14 | - needs unsafe code
15 |
16 | ## Potential packings:
17 |
18 | - variable width atoms, either 7 bits and a tag bit for top 128 or 13 bits and 3 tagging bits for rest
19 | - can fit all but 33 of the scopes present
20 | - tagged pointer (taking advantage of alignment), either a pointer to a slow path, or the first 4 bits set then a packed representation, one of others mentioned
21 | - 6 10-bit atoms referencing unique things by position (see by-position stats below)
22 | - 5 11-bit atoms and one 8-bit one for the first atom (2^11 = 2048, 2^8 = 256), one remaining bit for tag marker
23 |
24 | ## Stats:
25 |
26 | - 7000 scopes referenced in sublime, 3537 unique ones, all stats after this are based on non-unique data
27 | - all but 33 scopes in default packages could fit in 64 with combination 8bit or 16bit atom encoding
28 | - there are only 1219 unique atoms in the default package set
29 | - the top 128 atoms make up ~90% of all unique atoms referenced in syntax files
30 | - there are 26 unique first atoms, 145 unique last atoms
31 | - every position (1st atom, 2nd atom, ...) has under 878 possibilities, only 2nd,3rd and 4th have >256
32 | - 99.8% of scopes have 6 or fewer atoms, 97% have 5 or fewer, 70% have 4 or fewer
33 | - for unique scopes: {2=>81, 4=>1752, 3=>621, 5=>935, 7=>8, 6=>140} ----> 95% of uniques <= 6
34 | - for non-unique scopes: {2=>125, 4=>3383, 3=>1505, 5=>1891, 7=>9, 6=>202}
35 |
36 | # Checking prefix
37 |
38 | operation: `fn extent_matched(potential_prefix: Scope, s: Scope) -> u8`
39 | idea: any differences are beyond the length of the prefix.
40 | figure this out by xor and then ctz/clz then a compare to the length (however that works).
41 |
42 | ```bash
43 | XXXXYYYY00000000 # prefix
44 | XXXXYYYYZZZZ0000 # testee
45 | 00000000ZZZZ0000 # = xored
46 |
47 | XXXXYYYYQQQQ0000 # non-prefix
48 | XXXXYYYYZZZZ0000 # testee
49 | 00000000GGGG0000 # = xored
50 |
51 | XXXXQQQQ00000000 # non-prefix
52 | XXXXYYYYZZZZ0000 # testee
53 | 0000BBBBZZZZ0000 # = xored
54 | ```
55 |
56 | # Parsing
57 |
58 | * Problem: need to reduce number of regex search calls
59 | * Solution: cache better
60 |
61 | ## Stats
62 |
63 | ```bash
64 | # On stats branch
65 | $cargo run --release --example syncat testdata/jquery.js | grep cmiss | wc -l
66 | Running `target/release/examples/syncat testdata/jquery.js`
67 | 61266
68 | $cargo run --release --example syncat testdata/jquery.js | grep ptoken | wc -l
69 | Compiling syntect v0.1.0 (file:///Users/tristan/Box/Dev/Projects/syntect)
70 | Running `target/release/examples/syncat testdata/jquery.js`
71 | 98714
72 | $wc -l testdata/jquery.js
73 | 9210 testdata/jquery.js
74 | $cargo run --release --example syncat testdata/jquery.js | grep cclear | wc -l
75 | Compiling syntect v0.1.0 (file:///Users/tristan/Box/Dev/Projects/syntect)
76 | Running `target/release/examples/syncat testdata/jquery.js`
77 | 71302
78 | $cargo run --release --example syncat testdata/jquery.js | grep freshcachetoken | wc -l
79 | Compiling syntect v0.1.0 (file:///Users/tristan/Box/Dev/Projects/syntect)
80 | Running `target/release/examples/syncat testdata/jquery.js`
81 | 80512
82 | # On stats-2 branch
83 | $cargo run --example syncat testdata/jquery.js | grep cachehit | wc -l
84 | Running `target/debug/examples/syncat testdata/jquery.js`
85 | 527774
86 | $cargo run --example syncat testdata/jquery.js | grep regsearch | wc -l
87 | Running `target/debug/examples/syncat testdata/jquery.js`
88 | 2862948
89 | $cargo run --example syncat testdata/jquery.js | grep regmatch | wc -l
90 | Compiling syntect v0.6.0 (file:///Users/tristan/Box/Dev/Projects/syntect)
91 | Running `target/debug/examples/syncat testdata/jquery.js`
92 | 296127
93 | $cargo run --example syncat testdata/jquery.js | grep leastmatch | wc -l
94 | Compiling syntect v0.6.0 (file:///Users/tristan/Box/Dev/Projects/syntect)
95 | Running `target/debug/examples/syncat testdata/jquery.js`
96 | 137842
97 | # With search caching
98 | $cargo run --example syncat testdata/jquery.js | grep searchcached | wc -l
99 | Compiling syntect v0.6.0 (file:///Users/tristan/Box/Dev/Projects/syntect)
100 | Running `target/debug/examples/syncat testdata/jquery.js`
101 | 2440527
102 | $cargo run --example syncat testdata/jquery.js | grep regsearch | wc -l
103 | Running `target/debug/examples/syncat testdata/jquery.js`
104 | 950195
105 | ```
106 |
107 | Average unique regexes per line is 87.58, average non-unique is regsearch/lines = 317
108 |
109 | Ideally we should have only a couple fresh cache searches per line, not `~10` like the stats show (freshcachetoken/linecount).
110 |
111 | In a fantabulous world these stats mean a possible 10x speed improvement, but since caching does have a cost and we can't always cache it likely will be nice but not that high.
112 |
113 | ## Issues
114 |
115 | - Stack transitions always bust cache, even when for example JS just pushes another group
116 | - Doesn't cache actual matches, only if it matched or not
117 |
118 | ## Attacks
119 |
120 | - cache based on actual context, only search if it is a prototype we haven't searched before
121 | - hash maps based on casting RC ref to pointer and hashing? (there is a Hash impl for pointers)
122 | - for new searches, store matched regexes for context in BTreeMap like textmate
123 | - for subsequent tokens in same context, just pop off btreemap and re-search if before curpos
124 | - cache per Regex
125 |
--------------------------------------------------------------------------------
/src/highlighting/theme.rs:
--------------------------------------------------------------------------------
1 | // Code based on https://github.com/defuz/sublimate/blob/master/src/core/syntax/theme.rs
2 | // released under the MIT license by @defuz
3 | use super::selector::*;
4 | use super::style::*;
5 | use serde_derive::{Deserialize, Serialize};
6 |
7 | /// A theme parsed from a `.tmTheme` file.
8 | ///
9 | /// This contains additional fields useful for a theme list as well as `settings` for styling your editor.
10 | #[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
11 | pub struct Theme {
12 | pub name: Option,
13 | pub author: Option,
14 | /// External settings for the editor using this theme
15 | pub settings: ThemeSettings,
16 | /// The styling rules for the viewed text
17 | pub scopes: Vec,
18 | }
19 |
20 | /// Properties for styling the UI of a text editor
21 | ///
22 | /// This essentially consists of the styles that aren't directly applied to the text being viewed.
23 | /// `ThemeSettings` are intended to be used to make the UI of the editor match the styling of the
24 | /// text itself.
25 | #[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
26 | pub struct ThemeSettings {
27 | /// The default color for text.
28 | pub foreground: Option,
29 | /// The default backgound color of the view.
30 | pub background: Option,
31 | /// Color of the caret.
32 | pub caret: Option,
33 | /// Color of the line the caret is in.
34 | /// Only used when the `highlight_line` setting is set to `true`.
35 | pub line_highlight: Option,
36 |
37 | /// The color to use for the squiggly underline drawn under misspelled words.
38 | pub misspelling: Option,
39 | /// The color of the border drawn around the viewport area of the minimap.
40 | /// Only used when the `draw_minimap_border` setting is enabled.
41 | pub minimap_border: Option,
42 | /// A color made available for use by the theme.
43 | pub accent: Option,
44 | /// CSS passed to popups.
45 | pub popup_css: Option,
46 | /// CSS passed to phantoms.
47 | pub phantom_css: Option,
48 |
49 | /// Color of bracketed sections of text when the caret is in a bracketed section.
50 | /// Only applied when the `match_brackets` setting is set to `true`.
51 | pub bracket_contents_foreground: Option,
52 | /// Controls certain options when the caret is in a bracket section.
53 | /// Only applied when the `match_brackets` setting is set to `true`.
54 | pub bracket_contents_options: Option,
55 | /// Foreground color of the brackets when the caret is next to a bracket.
56 | /// Only applied when the `match_brackets` setting is set to `true`.
57 | pub brackets_foreground: Option,
58 | /// Background color of the brackets when the caret is next to a bracket.
59 | /// Only applied when the `match_brackets` setting is set to `true`.
60 | pub brackets_background: Option,
61 | /// Controls certain options when the caret is next to a bracket.
62 | /// Only applied when the `match_brackets` setting is set to `true`.
63 | pub brackets_options: Option,
64 |
65 | /// Color of tags when the caret is next to a tag.
66 | /// Only used when the `match_tags` setting is set to `true`.
67 | pub tags_foreground: Option,
68 | /// Controls certain options when the caret is next to a tag.
69 | /// Only applied when the `match_tags` setting is set to `true`.
70 | pub tags_options: Option,
71 |
72 | /// The border color for "other" matches.
73 | pub highlight: Option,
74 | /// Background color of regions matching the current search.
75 | pub find_highlight: Option,
76 | /// Text color of regions matching the current search.
77 | pub find_highlight_foreground: Option,
78 |
79 | /// Background color of the gutter.
80 | pub gutter: Option,
81 | /// Foreground color of the gutter.
82 | pub gutter_foreground: Option,
83 |
84 | /// The background color of selected text.
85 | pub selection: Option,
86 | /// A color that will override the scope-based text color of the selection.
87 | pub selection_foreground: Option,
88 |
89 | /// Color of the selection regions border.
90 | pub selection_border: Option,
91 | /// The background color of a selection in a view that is not currently focused.
92 | pub inactive_selection: Option,
93 | /// A color that will override the scope-based text color of the selection
94 | /// in a view that is not currently focused.
95 | pub inactive_selection_foreground: Option,
96 |
97 | /// Color of the guides displayed to indicate nesting levels.
98 | pub guide: Option,
99 | /// Color of the guide lined up with the caret.
100 | /// Only applied if the `indent_guide_options` setting is set to `draw_active`.
101 | pub active_guide: Option,
102 | /// Color of the current guide’s parent guide level.
103 | /// Only used if the `indent_guide_options` setting is set to `draw_active`.
104 | pub stack_guide: Option,
105 |
106 | /// The color of the shadow used when a text area can be horizontally scrolled.
107 | pub shadow: Option,
108 | }
109 |
110 | /// A component of a theme meant to highlight a specific thing (e.g string literals)
111 | /// in a certain way.
112 | #[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
113 | pub struct ThemeItem {
114 | /// Target scope name.
115 | pub scope: ScopeSelectors,
116 | /// The style to use for this component
117 | pub style: StyleModifier,
118 | }
119 |
120 | #[derive(Debug, Default, Clone, PartialEq, Serialize, Deserialize)]
121 | pub enum UnderlineOption {
122 | #[default]
123 | None,
124 | Underline,
125 | StippledUnderline,
126 | SquigglyUnderline,
127 | }
128 |
--------------------------------------------------------------------------------
/examples/synstats.rs:
--------------------------------------------------------------------------------
1 | //! An example of using syntect for code analysis.
2 | //! Basically a fancy lines of code count program that works
3 | //! for all languages Sublime Text supports and also counts things
4 | //! like number of functions and number of types defined.
5 | //!
6 | //! Another thing it does that other line count programs can't always
7 | //! do is properly count comments in embedded syntaxes. For example
8 | //! JS, CSS and Ruby comments embedded in ERB files.
9 | use syntect::easy::ScopeRegionIterator;
10 | use syntect::highlighting::{ScopeSelector, ScopeSelectors};
11 | use syntect::parsing::{ParseState, ScopeStack, ScopeStackOp, SyntaxSet};
12 |
13 | use std::fs::File;
14 | use std::io::{BufRead, BufReader};
15 | use std::path::Path;
16 | use std::str::FromStr;
17 | use walkdir::{DirEntry, WalkDir};
18 |
19 | #[derive(Debug)]
20 | struct Selectors {
21 | comment: ScopeSelector,
22 | doc_comment: ScopeSelectors,
23 | function: ScopeSelector,
24 | types: ScopeSelectors,
25 | }
26 |
27 | impl Default for Selectors {
28 | fn default() -> Selectors {
29 | Selectors {
30 | comment: ScopeSelector::from_str("comment - comment.block.attribute").unwrap(),
31 | doc_comment: ScopeSelectors::from_str(
32 | "comment.line.documentation, comment.block.documentation",
33 | )
34 | .unwrap(),
35 | function: ScopeSelector::from_str("entity.name.function").unwrap(),
36 | types: ScopeSelectors::from_str(
37 | "entity.name.class, entity.name.struct, entity.name.enum, entity.name.type",
38 | )
39 | .unwrap(),
40 | }
41 | }
42 | }
43 |
44 | #[derive(Debug, Default)]
45 | struct Stats {
46 | selectors: Selectors,
47 | files: usize,
48 | functions: usize,
49 | types: usize,
50 | lines: usize,
51 | chars: usize,
52 | code_lines: usize,
53 | comment_lines: usize,
54 | comment_chars: usize,
55 | comment_words: usize,
56 | doc_comment_lines: usize,
57 | doc_comment_words: usize,
58 | }
59 |
60 | fn print_stats(stats: &Stats) {
61 | println!();
62 | println!("################## Stats ###################");
63 | println!("File count: {:>6}", stats.files);
64 | println!("Total characters: {:>6}", stats.chars);
65 | println!();
66 | println!(
67 | "Function count: {:>6}",
68 | stats.functions
69 | );
70 | println!("Type count (structs, enums, classes): {:>6}", stats.types);
71 | println!();
72 | println!(
73 | "Code lines (traditional SLOC): {:>6}",
74 | stats.code_lines
75 | );
76 | println!("Total lines (w/ comments & blanks): {:>6}", stats.lines);
77 | println!(
78 | "Comment lines (comment but no code): {:>6}",
79 | stats.comment_lines
80 | );
81 | println!(
82 | "Blank lines (lines-blank-comment): {:>6}",
83 | stats.lines - stats.code_lines - stats.comment_lines
84 | );
85 | println!();
86 | println!(
87 | "Lines with a documentation comment: {:>6}",
88 | stats.doc_comment_lines
89 | );
90 | println!(
91 | "Total words written in doc comments: {:>6}",
92 | stats.doc_comment_words
93 | );
94 | println!(
95 | "Total words written in all comments: {:>6}",
96 | stats.comment_words
97 | );
98 | println!(
99 | "Characters of comment: {:>6}",
100 | stats.comment_chars
101 | );
102 | }
103 |
104 | fn is_ignored(entry: &DirEntry) -> bool {
105 | entry
106 | .file_name()
107 | .to_str()
108 | .map(|s| s.starts_with('.') && s.len() > 1 || s.ends_with(".md"))
109 | .unwrap_or(false)
110 | }
111 |
112 | fn count_line(
113 | ops: &[(usize, ScopeStackOp)],
114 | line: &str,
115 | stack: &mut ScopeStack,
116 | stats: &mut Stats,
117 | ) {
118 | stats.lines += 1;
119 |
120 | let mut line_has_comment = false;
121 | let mut line_has_doc_comment = false;
122 | let mut line_has_code = false;
123 | for (s, op) in ScopeRegionIterator::new(ops, line) {
124 | stack.apply(op).unwrap();
125 | if s.is_empty() {
126 | // in this case we don't care about blank tokens
127 | continue;
128 | }
129 | if stats
130 | .selectors
131 | .comment
132 | .does_match(stack.as_slice())
133 | .is_some()
134 | {
135 | let words = s
136 | .split_whitespace()
137 | .filter(|w| {
138 | w.chars()
139 | .all(|c| c.is_alphanumeric() || c == '.' || c == '\'')
140 | })
141 | .count();
142 | if stats
143 | .selectors
144 | .doc_comment
145 | .does_match(stack.as_slice())
146 | .is_some()
147 | {
148 | line_has_doc_comment = true;
149 | stats.doc_comment_words += words;
150 | }
151 | stats.comment_chars += s.len();
152 | stats.comment_words += words;
153 | line_has_comment = true;
154 | } else if !s.chars().all(|c| c.is_whitespace()) {
155 | line_has_code = true;
156 | }
157 | if stats
158 | .selectors
159 | .function
160 | .does_match(stack.as_slice())
161 | .is_some()
162 | {
163 | stats.functions += 1;
164 | }
165 | if stats.selectors.types.does_match(stack.as_slice()).is_some() {
166 | stats.types += 1;
167 | }
168 | }
169 | if line_has_comment && !line_has_code {
170 | stats.comment_lines += 1;
171 | }
172 | if line_has_doc_comment {
173 | stats.doc_comment_lines += 1;
174 | }
175 | if line_has_code {
176 | stats.code_lines += 1;
177 | }
178 | }
179 |
180 | fn count(ss: &SyntaxSet, path: &Path, stats: &mut Stats) {
181 | let syntax = match ss.find_syntax_for_file(path).unwrap_or(None) {
182 | Some(syntax) => syntax,
183 | None => return,
184 | };
185 | stats.files += 1;
186 | let mut state = ParseState::new(syntax);
187 |
188 | let f = File::open(path).unwrap();
189 | let mut reader = BufReader::new(f);
190 | let mut line = String::new();
191 | let mut stack = ScopeStack::new();
192 | while reader.read_line(&mut line).unwrap() > 0 {
193 | {
194 | let ops = state.parse_line(&line, ss).unwrap();
195 | stats.chars += line.len();
196 | count_line(&ops, &line, &mut stack, stats);
197 | }
198 | line.clear();
199 | }
200 | }
201 |
202 | fn main() {
203 | let ss = SyntaxSet::load_defaults_newlines(); // note we load the version with newlines
204 |
205 | let args: Vec = std::env::args().collect();
206 | let path = if args.len() < 2 { "." } else { &args[1] };
207 |
208 | println!("################## Files ###################");
209 | let mut stats = Stats::default();
210 | let walker = WalkDir::new(path).into_iter();
211 | for entry in walker.filter_entry(|e| !is_ignored(e)) {
212 | let entry = entry.unwrap();
213 | if entry.file_type().is_file() {
214 | println!("{}", entry.path().display());
215 | count(&ss, entry.path(), &mut stats);
216 | }
217 | }
218 |
219 | // println!("{:?}", stats);
220 | print_stats(&stats);
221 | }
222 |
--------------------------------------------------------------------------------
/src/parsing/regex.rs:
--------------------------------------------------------------------------------
1 | use once_cell::sync::OnceCell;
2 | use serde::de::{Deserialize, Deserializer};
3 | use serde::ser::{Serialize, Serializer};
4 | use std::error::Error;
5 |
6 | /// An abstraction for regex patterns.
7 | ///
8 | /// * Allows swapping out the regex implementation because it's only in this module.
9 | /// * Makes regexes serializable and deserializable using just the pattern string.
10 | /// * Lazily compiles regexes on first use to improve initialization time.
11 | #[derive(Debug)]
12 | pub struct Regex {
13 | regex_str: String,
14 | regex: OnceCell,
15 | }
16 |
17 | /// A region contains text positions for capture groups in a match result.
18 | #[derive(Clone, Debug, Eq, PartialEq)]
19 | pub struct Region {
20 | region: regex_impl::Region,
21 | }
22 |
23 | impl Regex {
24 | /// Create a new regex from the pattern string.
25 | ///
26 | /// Note that the regex compilation happens on first use, which is why this method does not
27 | /// return a result.
28 | pub fn new(regex_str: String) -> Self {
29 | Self {
30 | regex_str,
31 | regex: OnceCell::new(),
32 | }
33 | }
34 |
35 | /// Check whether the pattern compiles as a valid regex or not.
36 | pub fn try_compile(regex_str: &str) -> Option> {
37 | regex_impl::Regex::new(regex_str).err()
38 | }
39 |
40 | /// Return the regex pattern.
41 | pub fn regex_str(&self) -> &str {
42 | &self.regex_str
43 | }
44 |
45 | /// Check if the regex matches the given text.
46 | pub fn is_match(&self, text: &str) -> bool {
47 | self.regex().is_match(text)
48 | }
49 |
50 | /// Search for the pattern in the given text from begin/end positions.
51 | ///
52 | /// If a region is passed, it is used for storing match group positions. The argument allows
53 | /// the [`Region`] to be reused between searches, which makes a significant performance
54 | /// difference.
55 | ///
56 | /// [`Region`]: struct.Region.html
57 | pub fn search(
58 | &self,
59 | text: &str,
60 | begin: usize,
61 | end: usize,
62 | region: Option<&mut Region>,
63 | ) -> bool {
64 | self.regex()
65 | .search(text, begin, end, region.map(|r| &mut r.region))
66 | }
67 |
68 | fn regex(&self) -> ®ex_impl::Regex {
69 | self.regex.get_or_init(|| {
70 | regex_impl::Regex::new(&self.regex_str).expect("regex string should be pre-tested")
71 | })
72 | }
73 | }
74 |
75 | impl Clone for Regex {
76 | fn clone(&self) -> Self {
77 | Regex {
78 | regex_str: self.regex_str.clone(),
79 | regex: OnceCell::new(),
80 | }
81 | }
82 | }
83 |
84 | impl PartialEq for Regex {
85 | fn eq(&self, other: &Regex) -> bool {
86 | self.regex_str == other.regex_str
87 | }
88 | }
89 |
90 | impl Eq for Regex {}
91 |
92 | impl Serialize for Regex {
93 | fn serialize(&self, serializer: S) -> Result
94 | where
95 | S: Serializer,
96 | {
97 | serializer.serialize_str(&self.regex_str)
98 | }
99 | }
100 |
101 | impl<'de> Deserialize<'de> for Regex {
102 | fn deserialize(deserializer: D) -> Result
103 | where
104 | D: Deserializer<'de>,
105 | {
106 | let regex_str = String::deserialize(deserializer)?;
107 | Ok(Regex::new(regex_str))
108 | }
109 | }
110 |
111 | impl Region {
112 | pub fn new() -> Self {
113 | Self {
114 | region: regex_impl::new_region(),
115 | }
116 | }
117 |
118 | /// Get the start/end positions of the capture group with given index.
119 | ///
120 | /// If there is no match for that group or the index does not correspond to a group, `None` is
121 | /// returned. The index 0 returns the whole match.
122 | pub fn pos(&self, index: usize) -> Option<(usize, usize)> {
123 | self.region.pos(index)
124 | }
125 | }
126 |
127 | impl Default for Region {
128 | fn default() -> Self {
129 | Self::new()
130 | }
131 | }
132 |
133 | #[cfg(feature = "regex-onig")]
134 | mod regex_impl {
135 | pub use onig::Region;
136 | use onig::{MatchParam, RegexOptions, SearchOptions, Syntax};
137 | use std::error::Error;
138 |
139 | #[derive(Debug)]
140 | pub struct Regex {
141 | regex: onig::Regex,
142 | }
143 |
144 | pub fn new_region() -> Region {
145 | Region::with_capacity(8)
146 | }
147 |
148 | impl Regex {
149 | pub fn new(regex_str: &str) -> Result> {
150 | let result = onig::Regex::with_options(
151 | regex_str,
152 | RegexOptions::REGEX_OPTION_CAPTURE_GROUP,
153 | Syntax::default(),
154 | );
155 | match result {
156 | Ok(regex) => Ok(Regex { regex }),
157 | Err(error) => Err(Box::new(error)),
158 | }
159 | }
160 |
161 | pub fn is_match(&self, text: &str) -> bool {
162 | self.regex
163 | .match_with_options(text, 0, SearchOptions::SEARCH_OPTION_NONE, None)
164 | .is_some()
165 | }
166 |
167 | pub fn search(
168 | &self,
169 | text: &str,
170 | begin: usize,
171 | end: usize,
172 | region: Option<&mut Region>,
173 | ) -> bool {
174 | let matched = self.regex.search_with_param(
175 | text,
176 | begin,
177 | end,
178 | SearchOptions::SEARCH_OPTION_NONE,
179 | region,
180 | MatchParam::default(),
181 | );
182 |
183 | // If there's an error during search, treat it as non-matching.
184 | // For example, in case of catastrophic backtracking, onig should
185 | // fail with a "retry-limit-in-match over" error eventually.
186 | matches!(matched, Ok(Some(_)))
187 | }
188 | }
189 | }
190 |
191 | // If both regex-fancy and regex-onig are requested, this condition makes regex-onig win.
192 | #[cfg(all(feature = "regex-fancy", not(feature = "regex-onig")))]
193 | mod regex_impl {
194 | use std::error::Error;
195 |
196 | #[derive(Debug)]
197 | pub struct Regex {
198 | regex: fancy_regex::Regex,
199 | }
200 |
201 | #[derive(Clone, Debug, Eq, PartialEq)]
202 | pub struct Region {
203 | positions: Vec