├── testdata ├── issue30.tex ├── issue25.c ├── issue28.rs ├── clear_scopes_test.asa ├── test_first_line.test ├── minimized_tests │ ├── syntax_test_scalamini.scala │ └── syntax_test_aspmini.asp ├── issue33.rs ├── known_syntest_failures.txt ├── known_syntest_failures_fancy.txt ├── testing-syntax.testsyntax ├── DefaultPackage │ ├── Indentation Rules - Comments.tmPreferences │ └── Indentation Rules.tmPreferences ├── test1.html ├── test2.html ├── highlight_test.erb ├── embed_escape_test.sublime-syntax ├── Testing.sublime-syntax ├── test5.html ├── test4.html ├── parser_tests.sublime-syntax ├── test3.html └── JSON.sublime-syntax ├── scripts └── id_rsa.enc ├── .git-blame-ignore-revs ├── codecov.yml ├── assets ├── default.themedump ├── default_metadata.packdump ├── default_newlines.packdump └── default_nonewlines.packdump ├── .gitignore ├── src ├── highlighting │ ├── Readme.md │ ├── mod.rs │ ├── settings.rs │ ├── theme_set.rs │ ├── theme.rs │ ├── theme_load.rs │ └── style.rs ├── utils.rs ├── parsing │ ├── mod.rs │ ├── regex.rs │ └── syntax_definition.rs ├── escape.rs ├── lib.rs ├── dumps.rs ├── util.rs └── easy.rs ├── .gitmodules ├── benches ├── highlight_utils │ └── mod.rs ├── utils │ └── mod.rs ├── load_and_highlight.rs ├── parsing.rs ├── loading.rs └── highlighting.rs ├── tests ├── public_api.rs └── error_handling.rs ├── examples ├── latex-demo.rs ├── synhtml.rs ├── parsyncat.rs ├── gendata.rs ├── synhtml-css-classes.rs ├── syncat.rs ├── synstats.rs └── syntest.rs ├── LICENSE.txt ├── .gitattributes ├── Makefile ├── .github └── workflows │ ├── Release.yml │ └── CI.yml ├── Cargo.toml └── DESIGN.md /testdata/issue30.tex: -------------------------------------------------------------------------------- 1 | \title{} 2 | -------------------------------------------------------------------------------- /testdata/issue25.c: -------------------------------------------------------------------------------- 1 | struct{estruct 2 | -------------------------------------------------------------------------------- /testdata/issue28.rs: -------------------------------------------------------------------------------- 1 | use std::fs::File; 2 | use std; 3 | -------------------------------------------------------------------------------- /scripts/id_rsa.enc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/trishume/syntect/HEAD/scripts/id_rsa.enc -------------------------------------------------------------------------------- /.git-blame-ignore-revs: -------------------------------------------------------------------------------- 1 | # initial cargo fmt 2 | cc41c3803b20b79147fa606f950658bc12e50dc2 3 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | # Make codecov not add verbose comments to pull requests 2 | comment: false 3 | -------------------------------------------------------------------------------- /assets/default.themedump: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/trishume/syntect/HEAD/assets/default.themedump -------------------------------------------------------------------------------- /testdata/clear_scopes_test.asa: -------------------------------------------------------------------------------- 1 | Sub wot 2 | 5 3 | %> 4 | Sub 5 | <%=? 6 | 5 "wow" 7 | -------------------------------------------------------------------------------- /testdata/test_first_line.test: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | def blah 4 | test = "wow" 5 | puts test 6 | end 7 | -------------------------------------------------------------------------------- /assets/default_metadata.packdump: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/trishume/syntect/HEAD/assets/default_metadata.packdump -------------------------------------------------------------------------------- /assets/default_newlines.packdump: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/trishume/syntect/HEAD/assets/default_newlines.packdump -------------------------------------------------------------------------------- /assets/default_nonewlines.packdump: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/trishume/syntect/HEAD/assets/default_nonewlines.packdump -------------------------------------------------------------------------------- /testdata/minimized_tests/syntax_test_scalamini.scala: -------------------------------------------------------------------------------- 1 | // SYNTAX TEST "Packages/Scala/Scala.sublime-syntax" 2 | 3 | class Foo[A](a: Bar) 4 | // ^^^^^^^^^ meta.class.identifier.scala 5 | -------------------------------------------------------------------------------- /testdata/issue33.rs: -------------------------------------------------------------------------------- 1 | // issue #33: it prints this line and then hangs on the next one, hang time increases super-linearly with line length 2 | impl ApplicationPreferenceseeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee { 3 | } 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # macOS 2 | .DS_Store 3 | Thumbs.db 4 | 5 | # Windows 6 | [Dd]esktop.ini 7 | 8 | # cargo 9 | target/ 10 | 11 | # IDEA 12 | .idea/ 13 | *.iml 14 | 15 | # Sublime Text 16 | *.sublime-workspace 17 | 18 | # VS Code 19 | .vscode/ 20 | -------------------------------------------------------------------------------- /testdata/known_syntest_failures.txt: -------------------------------------------------------------------------------- 1 | loading syntax definitions from testdata/Packages 2 | FAILED testdata/Packages/C#/tests/syntax_test_Strings.cs: 38 3 | FAILED testdata/Packages/LaTeX/syntax_test_latex.tex: 1 4 | FAILED testdata/Packages/Makefile/syntax_test_makefile.mak: 6 5 | exiting with code 1 6 | -------------------------------------------------------------------------------- /testdata/known_syntest_failures_fancy.txt: -------------------------------------------------------------------------------- 1 | loading syntax definitions from testdata/Packages 2 | FAILED testdata/Packages/C#/tests/syntax_test_Strings.cs: 38 3 | FAILED testdata/Packages/LaTeX/syntax_test_latex.tex: 1 4 | FAILED testdata/Packages/Markdown/syntax_test_markdown.md: 11 5 | exiting with code 1 6 | -------------------------------------------------------------------------------- /testdata/testing-syntax.testsyntax: -------------------------------------------------------------------------------- 1 | hi lol 2 | wow zoom 3 | html 4 |
5 | troll wow lol 6 | htmout 7 | inline 8 | hi zoom lol wow bamf 9 | inout 10 | troll 11 | wow lol 12 | zoom lol bamf doopadoop 13 | out 14 | zoom lol bamf 15 | nested 16 | wow zoom lol bamf doopadoop 17 | outnested 18 | zout 19 | zoom lol bamf 20 | -------------------------------------------------------------------------------- /testdata/DefaultPackage/Indentation Rules - Comments.tmPreferences: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | scope 6 | comment 7 | settings 8 | 9 | preserveIndent 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /testdata/minimized_tests/syntax_test_aspmini.asp: -------------------------------------------------------------------------------- 1 | ' SYNTAX TEST "Packages/ASP/HTML-ASP.sublime-syntax" 2 | <% 3 | Class TestClass2 Public Sub TestSub () Response.Write("wow") End Sub End Class 4 | '^^^^^ meta.class.asp meta.class.identifier.asp storage.type.asp 5 | ' ^ meta.class.asp meta.class.identifier.asp 6 | ' ^ meta.class.asp meta.class.body.asp 7 | %> 8 |

foobar

9 | '^^^ text.html.asp meta.tag.block.any.html - source.asp.embedded.html 10 | -------------------------------------------------------------------------------- /src/highlighting/Readme.md: -------------------------------------------------------------------------------- 1 | # Attribution 2 | 3 | Much of the code in this module/folder is heavily based on and largely copy-pasted from the the 4 | [sublimate](https://github.com/defuz/sublimate) project by 5 | [Ivan Ivashchenko a.k.a @defuz](https://github.com/defuz). The project was released under the MIT license. 6 | 7 | I needed to copy-paste the code here because it required some adaptations to work with the other 8 | parts of syntect. One example modification is using my bit-packed `Scope` type instead of the 9 | original string-based one. 10 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "testdata/Packages"] 2 | branch = st3 # TODO: this line should be removed once st4 functionality has been implemented 3 | path = testdata/Packages 4 | url = https://github.com/sublimehq/Packages 5 | [submodule "testdata/InspiredGitHub.tmtheme"] 6 | path = testdata/InspiredGitHub.tmtheme 7 | url = https://github.com/sethlopezme/InspiredGitHub.tmtheme.git 8 | [submodule "testdata/Solarized"] 9 | path = testdata/Solarized 10 | url = https://github.com/braver/Solarized.git 11 | [submodule "testdata/spacegray"] 12 | path = testdata/spacegray 13 | url = https://github.com/kkga/spacegray.git 14 | -------------------------------------------------------------------------------- /benches/highlight_utils/mod.rs: -------------------------------------------------------------------------------- 1 | use syntect::easy::HighlightLines; 2 | use syntect::highlighting::Theme; 3 | use syntect::parsing::{SyntaxReference, SyntaxSet}; 4 | 5 | /// Common helper for benchmarking highlighting. 6 | pub fn do_highlight( 7 | s: &str, 8 | syntax_set: &SyntaxSet, 9 | syntax: &SyntaxReference, 10 | theme: &Theme, 11 | ) -> usize { 12 | let mut h = HighlightLines::new(syntax, theme); 13 | let mut count = 0; 14 | for line in s.lines() { 15 | let regions = h.highlight_line(line, syntax_set).unwrap(); 16 | count += regions.len(); 17 | } 18 | count 19 | } 20 | -------------------------------------------------------------------------------- /testdata/DefaultPackage/Indentation Rules.tmPreferences: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | scope 6 | source 7 | settings 8 | 9 | decreaseIndentPattern 10 | ^(.*\*/)?\s*\}[;\s]*$ 11 | increaseIndentPattern 12 | ^.*(\{[^}"']*)$ 13 | disableIndentNextLinePattern 14 | ^\s*\{[\]})]*\s*$ 15 | indentParens 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /benches/utils/mod.rs: -------------------------------------------------------------------------------- 1 | /// To be able to keep the same Criterion benchmark names as before (for the 2 | /// `the --baseline` feature of Criterion) we use one level of indirection to 3 | /// map file name to file path. 4 | pub fn get_test_file_path(file: &str) -> &str { 5 | match file { 6 | "highlight_test.erb" => "testdata/highlight_test.erb", 7 | "InspiredGitHub.tmTheme" => "testdata/InspiredGitHub.tmtheme/InspiredGitHub.tmTheme", 8 | "Ruby.sublime-syntax" => "testdata/Packages/Ruby/Ruby.sublime-syntax", 9 | "jquery.js" => "testdata/jquery.js", 10 | "parser.rs" => "testdata/parser.rs", 11 | "scope.rs" => "src/parsing/scope.rs", 12 | _ => panic!("Unknown test file {}", file), 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /testdata/test1.html: -------------------------------------------------------------------------------- 1 | [w](t.co) *hi* **five** 2 | -------------------------------------------------------------------------------- /tests/public_api.rs: -------------------------------------------------------------------------------- 1 | #[test] 2 | fn public_api() { 3 | // Install a compatible nightly toolchain if it is missing 4 | rustup_toolchain::install(public_api::MINIMUM_NIGHTLY_RUST_VERSION).unwrap(); 5 | 6 | // Build rustdoc JSON 7 | let rustdoc_json = rustdoc_json::Builder::default() 8 | .toolchain(public_api::MINIMUM_NIGHTLY_RUST_VERSION) 9 | .build() 10 | .unwrap(); 11 | 12 | // Derive the public API from the rustdoc JSON 13 | let public_api = public_api::Builder::from_rustdoc_json(rustdoc_json) 14 | .omit_blanket_impls(true) 15 | .build() 16 | .unwrap(); 17 | 18 | // Assert that the public API matches the latest snapshot. 19 | // Run with env var `UPDATE_SNAPSHOTS=yes` to update the snapshot. 20 | public_api.assert_eq_or_update("./tests/snapshots/public-api.txt"); 21 | } 22 | -------------------------------------------------------------------------------- /src/utils.rs: -------------------------------------------------------------------------------- 1 | //! Private library utilities that are not exposed to clients since we don't 2 | //! want to make semver guarantees about them 3 | 4 | use std::path::Path; 5 | 6 | use walkdir::WalkDir; 7 | 8 | /// Private helper to walk a dir and also follow symbolic links. 9 | pub fn walk_dir>(folder: P) -> WalkDir { 10 | WalkDir::new(folder).follow_links(true) 11 | } 12 | 13 | #[cfg(all(test, feature = "parsing"))] 14 | pub mod testdata { 15 | use std::sync::LazyLock; 16 | 17 | use crate::parsing::SyntaxSet; 18 | 19 | /// The [`SyntaxSet`] loaded from the `testdata/Packages` folder 20 | /// 21 | /// Shared here to avoid re-doing a particularly costly construction in various tests 22 | pub static PACKAGES_SYN_SET: LazyLock = 23 | LazyLock::new(|| SyntaxSet::load_from_folder("testdata/Packages").unwrap()); 24 | } 25 | -------------------------------------------------------------------------------- /src/highlighting/mod.rs: -------------------------------------------------------------------------------- 1 | //! Everything having to do with turning parsed text into styled text. 2 | //! 3 | //! You might want to check out [`Theme`] for its handy text-editor related settings like selection 4 | //! color, [`ThemeSet`] for loading themes, as well as things starting with `Highlight` for how to 5 | //! highlight text. 6 | //! 7 | //! [`Theme`]: struct.Theme.html 8 | //! [`ThemeSet`]: struct.ThemeSet.html 9 | mod highlighter; 10 | mod selector; 11 | #[cfg(feature = "plist-load")] 12 | pub(crate) mod settings; 13 | mod style; 14 | mod theme; 15 | #[cfg(feature = "plist-load")] 16 | mod theme_load; 17 | mod theme_set; 18 | 19 | pub use self::highlighter::*; 20 | pub use self::selector::*; 21 | #[cfg(feature = "plist-load")] 22 | pub use self::settings::SettingsError; 23 | pub use self::style::*; 24 | pub use self::theme::*; 25 | #[cfg(feature = "plist-load")] 26 | pub use self::theme_load::*; 27 | pub use self::theme_set::*; 28 | -------------------------------------------------------------------------------- /examples/latex-demo.rs: -------------------------------------------------------------------------------- 1 | use syntect::easy::HighlightLines; 2 | use syntect::highlighting::{Style, ThemeSet}; 3 | use syntect::parsing::SyntaxSet; 4 | use syntect::util::{as_latex_escaped, LinesWithEndings}; 5 | 6 | fn main() { 7 | // Load these once at the start of your program 8 | let ps = SyntaxSet::load_defaults_newlines(); 9 | let ts = ThemeSet::load_defaults(); 10 | 11 | let syntax = ps.find_syntax_by_extension("rs").unwrap(); 12 | let s = "pub struct Wow { hi: u64 }\nfn blah() -> u64 {}\n"; 13 | 14 | let mut h = HighlightLines::new(syntax, &ts.themes["InspiredGitHub"]); 15 | for line in LinesWithEndings::from(s) { 16 | // LinesWithEndings enables use of newlines mode 17 | let ranges: Vec<(Style, &str)> = h.highlight_line(line, &ps).unwrap(); 18 | let escaped = as_latex_escaped(&ranges[..]); 19 | println!("\n{:?}", line); 20 | println!("\n{}", escaped); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /testdata/test2.html: -------------------------------------------------------------------------------- 1 | [w]t.co *hi* **five** 2 | -------------------------------------------------------------------------------- /testdata/highlight_test.erb: -------------------------------------------------------------------------------- 1 | 24 | 31 | -------------------------------------------------------------------------------- /src/highlighting/settings.rs: -------------------------------------------------------------------------------- 1 | /// Code based on 2 | /// released under the MIT license by @defuz 3 | use plist::Error as PlistError; 4 | use std::io::{Read, Seek}; 5 | 6 | pub use serde_json::Value as Settings; 7 | 8 | pub trait ParseSettings: Sized { 9 | type Error; 10 | fn parse_settings(settings: Settings) -> Result; 11 | } 12 | 13 | /// An error parsing a settings file 14 | #[derive(Debug, thiserror::Error)] 15 | #[non_exhaustive] 16 | pub enum SettingsError { 17 | /// Incorrect Plist syntax 18 | #[error("Incorrect Plist syntax: {0}")] 19 | Plist(PlistError), 20 | } 21 | 22 | impl From for SettingsError { 23 | fn from(error: PlistError) -> SettingsError { 24 | SettingsError::Plist(error) 25 | } 26 | } 27 | 28 | pub fn read_plist(reader: R) -> Result { 29 | let settings = plist::from_reader(reader)?; 30 | Ok(settings) 31 | } 32 | -------------------------------------------------------------------------------- /src/parsing/mod.rs: -------------------------------------------------------------------------------- 1 | //! Everything about parsing text into text annotated with scopes. 2 | //! 3 | //! The most important struct here is [`SyntaxSet`], check out the docs for that. 4 | //! 5 | //! [`SyntaxSet`]: struct.SyntaxSet.html 6 | 7 | #[cfg(feature = "metadata")] 8 | pub mod metadata; 9 | #[cfg(feature = "parsing")] 10 | mod parser; 11 | #[cfg(feature = "parsing")] 12 | pub mod syntax_definition; 13 | #[cfg(feature = "parsing")] 14 | mod syntax_set; 15 | #[cfg(all(feature = "parsing", feature = "yaml-load"))] 16 | mod yaml_load; 17 | 18 | #[cfg(any(feature = "parsing", feature = "yaml-load", feature = "metadata"))] 19 | mod regex; 20 | mod scope; 21 | 22 | #[cfg(feature = "metadata")] 23 | pub use self::metadata::*; 24 | #[cfg(feature = "parsing")] 25 | pub use self::parser::*; 26 | #[cfg(feature = "parsing")] 27 | pub use self::syntax_definition::SyntaxDefinition; 28 | #[cfg(feature = "parsing")] 29 | pub use self::syntax_set::*; 30 | #[cfg(all(feature = "parsing", feature = "yaml-load"))] 31 | pub use self::yaml_load::*; 32 | 33 | #[cfg(any(feature = "parsing", feature = "yaml-load", feature = "metadata"))] 34 | pub use self::regex::*; 35 | 36 | pub use self::scope::*; 37 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Tristan Hume, Keith Hall, Google Inc and other contributors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /testdata/embed_escape_test.sublime-syntax: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | name: Embed_Escape Used by tests in src/parsing/parser.rs 4 | scope: source.embed-test 5 | contexts: 6 | main: 7 | - match: (") 8 | scope: meta.attribute-with-value.style.html string.quoted.double punctuation.definition.string.begin.html 9 | embed: embedded_context 10 | embed_scope: meta.attribute-with-value.style.html source.css 11 | escape: '\1' 12 | escape_captures: 13 | 0: meta.attribute-with-value.style.html string.quoted.double punctuation.definition.string.end.html 14 | - match: '(>)\s*' 15 | captures: 16 | 1: meta.tag.style.begin.html punctuation.definition.tag.end.html 17 | embed: embedded_context 18 | embed_scope: source.css.embedded.html 19 | escape: (?i)(?=' 21 | - match: foobar 22 | scope: top-level.test 23 | 24 | embedded_context: 25 | - match: a 26 | scope: a 27 | push: # prove that multiple context levels can be "escape"d 28 | - match: b 29 | push: 30 | - match: c 31 | push: 32 | - match: 'test' 33 | scope: test.embedded 34 | -------------------------------------------------------------------------------- /examples/synhtml.rs: -------------------------------------------------------------------------------- 1 | //! Prints highlighted HTML for a file to stdout. 2 | //! Basically just wraps a body around `highlighted_html_for_file` 3 | use syntect::highlighting::{Color, ThemeSet}; 4 | use syntect::html::highlighted_html_for_file; 5 | use syntect::parsing::SyntaxSet; 6 | 7 | fn main() { 8 | let ss = SyntaxSet::load_defaults_newlines(); 9 | let ts = ThemeSet::load_defaults(); 10 | 11 | let args: Vec = std::env::args().collect(); 12 | if args.len() < 2 { 13 | println!("Please pass in a file to highlight"); 14 | return; 15 | } 16 | 17 | let style = " 18 | pre { 19 | font-size:13px; 20 | font-family: Consolas, \"Liberation Mono\", Menlo, Courier, monospace; 21 | }"; 22 | println!( 23 | "{}", 24 | &args[1], style 25 | ); 26 | let theme = &ts.themes["base16-ocean.dark"]; 27 | let c = theme.settings.background.unwrap_or(Color::WHITE); 28 | println!( 29 | "\n", 30 | c.r, c.g, c.b 31 | ); 32 | let html = highlighted_html_for_file(&args[1], &ss, theme).unwrap(); 33 | println!("{}", html); 34 | println!(""); 35 | } 36 | -------------------------------------------------------------------------------- /benches/load_and_highlight.rs: -------------------------------------------------------------------------------- 1 | mod highlight_utils; 2 | mod utils; 3 | 4 | /// Measures the time it takes to run the whole pipeline: 5 | /// 1. Load assets 6 | /// 2. Parse 7 | /// 3. Highlight 8 | fn run(b: &mut criterion::Bencher, file: &str) { 9 | let path = utils::get_test_file_path(file); 10 | 11 | b.iter(|| { 12 | let ss = syntect::parsing::SyntaxSet::load_defaults_nonewlines(); 13 | let ts = syntect::highlighting::ThemeSet::load_defaults(); 14 | 15 | let syntax = ss.find_syntax_for_file(path).unwrap().unwrap(); 16 | let s = std::fs::read_to_string(path).unwrap(); 17 | 18 | highlight_utils::do_highlight(&s, &ss, syntax, &ts.themes["base16-ocean.dark"]); 19 | }) 20 | } 21 | 22 | fn load_and_highlight_benchmark(c: &mut criterion::Criterion) { 23 | let mut group = c.benchmark_group("load_and_highlight"); 24 | for input in &[ 25 | "highlight_test.erb", 26 | "InspiredGitHub.tmTheme", 27 | "Ruby.sublime-syntax", 28 | "parser.rs", 29 | ] { 30 | group.bench_with_input(format!("\"{}\"", input), input, |b, s| run(b, s)); 31 | } 32 | group.finish(); 33 | } 34 | 35 | criterion::criterion_group! { 36 | name = benches; 37 | config = criterion::Criterion::default().sample_size(50); 38 | targets = load_and_highlight_benchmark 39 | } 40 | criterion::criterion_main!(benches); 41 | -------------------------------------------------------------------------------- /benches/parsing.rs: -------------------------------------------------------------------------------- 1 | use criterion::{criterion_group, criterion_main, Bencher, Criterion}; 2 | use std::time::Duration; 3 | use syntect::parsing::{ParseState, SyntaxReference, SyntaxSet}; 4 | 5 | mod utils; 6 | 7 | fn do_parse(s: &str, ss: &SyntaxSet, syntax: &SyntaxReference) -> usize { 8 | let mut state = ParseState::new(syntax); 9 | let mut count = 0; 10 | for line in s.lines() { 11 | let ops = state.parse_line(line, ss).unwrap(); 12 | count += ops.len(); 13 | } 14 | count 15 | } 16 | 17 | fn parse_file(b: &mut Bencher, file: &str) { 18 | let path = utils::get_test_file_path(file); 19 | 20 | // don't load from dump so we don't count lazy regex compilation time 21 | let ss = SyntaxSet::load_defaults_nonewlines(); 22 | 23 | let syntax = ss.find_syntax_for_file(path).unwrap().unwrap(); 24 | let s = std::fs::read_to_string(path).unwrap(); 25 | 26 | b.iter(|| do_parse(&s, &ss, syntax)); 27 | } 28 | 29 | fn parsing_benchmark(c: &mut Criterion) { 30 | let mut parse = c.benchmark_group("parse"); 31 | for input in &[ 32 | "highlight_test.erb", 33 | "InspiredGitHub.tmTheme", 34 | "Ruby.sublime-syntax", 35 | "jquery.js", 36 | "parser.rs", 37 | "scope.rs", 38 | ] { 39 | parse.bench_with_input(format!("\"{}\"", input), input, |b, s| parse_file(b, s)); 40 | } 41 | parse.finish(); 42 | } 43 | 44 | criterion_group! { 45 | name = benches; 46 | config = Criterion::default().sample_size(50).warm_up_time(Duration::from_secs(30)); 47 | targets = parsing_benchmark 48 | } 49 | criterion_main!(benches); 50 | -------------------------------------------------------------------------------- /testdata/Testing.sublime-syntax: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | name: Sublime Syntax Testing 4 | scope: source.thumetesting 5 | file_extensions: 6 | - testsyntax 7 | hidden: false 8 | contexts: 9 | prototype: 10 | - match: lol 11 | scope: storage 12 | - include: zoom 13 | main: 14 | - meta_include_prototype: false 15 | - match: =testset 16 | push: testset 17 | - match: hi 18 | scope: comment 19 | - match: troll 20 | push: wow 21 | - match: inline 22 | push: 23 | - match: testing 24 | scope: constant 25 | - match: inout 26 | pop: true 27 | - match: html 28 | push: scope:text.html.basic 29 | with_prototype: 30 | - match: htmout 31 | pop: true 32 | - include: wow 33 | wow: 34 | - meta_scope: wow 35 | - match: wow 36 | scope: string 37 | - match: out 38 | set: zoom 39 | zoom: 40 | - meta_scope: zoom 41 | - match: zoom 42 | scope: constant 43 | - match: zout 44 | set: bamf 45 | - match: nested 46 | push: 47 | - meta_scope: nested 48 | - match: doopadoop 49 | scope: comment 50 | - match: outnested 51 | pop: true 52 | - include: bamf 53 | bamf: 54 | - meta_scope: bamf 55 | - match: bamf 56 | scope: keyword 57 | testset: 58 | - meta_scope: constant.testset.meta 59 | - meta_content_scope: string.testset.content 60 | - match: =doset 61 | set: setto 62 | setto: 63 | - clear_scopes: 1 64 | - meta_scope: constant.setto.meta 65 | - meta_content_scope: comment.setto.content 66 | - match: =endset 67 | pop: true 68 | -------------------------------------------------------------------------------- /testdata/test5.html: -------------------------------------------------------------------------------- 1 |
 2 | hi lol
 3 | wow zoom
 4 | html
 5 |   <br style="color: #555;"/>
 6 |   troll wow lol
 7 | htmout
 8 | inline
 9 |   hi zoom lol wow bamf
10 | inout
11 | troll
12 |   wow lol
13 |   zoom lol bamf doopadoop
14 | out
15 |   zoom lol bamf
16 |   nested
17 |     wow zoom lol bamf doopadoop
18 |   outnested
19 | zout
20 |   zoom lol bamf
21 | 
22 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Sublime Text 2 | 3 | # tabs indentation, no trailing 4 | *.stTheme eol=lf whitespace=-tab-in-indent,trailing-space,tabwidth=4 5 | # spaces indentation, no trailing 6 | *.sublime-color-scheme eol=lf whitespace=tab-in-indent,trailing-space,tabwidth=4 7 | *.hidden-color-scheme eol=lf whitespace=tab-in-indent,trailing-space,tabwidth=4 8 | *.sublime-settings eol=lf whitespace=tab-in-indent,trailing-space,tabwidth=4 9 | *.sublime-syntax eol=lf whitespace=tab-in-indent,trailing-space,tabwidth=2 10 | *.sublime-theme eol=lf whitespace=tab-in-indent,trailing-space,tabwidth=4 11 | 12 | # TextMate 13 | 14 | # tabs indentation, no trailing 15 | *.tmLanguage eol=lf whitespace=-tab-in-indent,trailing-space,tabwidth=4 16 | *.hidden-tmLanguage eol=lf whitespace=-tab-in-indent,trailing-space,tabwidth=4 17 | *.tmPreferences eol=lf whitespace=-tab-in-indent,trailing-space,tabwidth=4 18 | *.tmTheme eol=lf whitespace=-tab-in-indent,trailing-space,tabwidth=4 19 | *.hidden-tmTheme eol=lf whitespace=-tab-in-indent,trailing-space,tabwidth=4 20 | 21 | # syntect 22 | 23 | testdata/* linguist-vendored 24 | 25 | # General 26 | 27 | # spaces indentation, no trailing 28 | *.md eol=lf whitespace=tab-in-indent,trailing-space,tabwidth=4 29 | # binary 30 | *.png binary 31 | *.tex diff=tex 32 | *.pdf binary diff=astextplain 33 | *.snap linguist-language=txt 34 | -------------------------------------------------------------------------------- /benches/loading.rs: -------------------------------------------------------------------------------- 1 | use criterion::{criterion_group, criterion_main, Bencher, Criterion}; 2 | use syntect::highlighting::ThemeSet; 3 | use syntect::parsing::{SyntaxSet, SyntaxSetBuilder}; 4 | 5 | fn bench_load_internal_dump(b: &mut Bencher) { 6 | b.iter(|| SyntaxSet::load_defaults_newlines()); 7 | } 8 | 9 | fn bench_load_internal_themes(b: &mut Bencher) { 10 | b.iter(|| ThemeSet::load_defaults()); 11 | } 12 | 13 | fn bench_load_theme(b: &mut Bencher) { 14 | b.iter(|| ThemeSet::get_theme("testdata/spacegray/base16-ocean.dark.tmTheme")); 15 | } 16 | 17 | fn bench_add_from_folder(b: &mut Bencher) { 18 | b.iter(|| { 19 | let mut builder = SyntaxSetBuilder::new(); 20 | builder.add_from_folder("testdata/Packages", false).unwrap() 21 | }); 22 | } 23 | 24 | fn bench_link_syntaxes(b: &mut Bencher) { 25 | let mut builder = SyntaxSetBuilder::new(); 26 | builder.add_from_folder("testdata/Packages", false).unwrap(); 27 | b.iter(|| { 28 | builder.clone().build(); 29 | }); 30 | } 31 | 32 | fn bench_from_dump_file(b: &mut Bencher) { 33 | b.iter(|| { 34 | let _: SyntaxSet = 35 | syntect::dumps::from_uncompressed_dump_file("assets/default_newlines.packdump") 36 | .unwrap(); 37 | }) 38 | } 39 | 40 | fn loading_benchmark(c: &mut Criterion) { 41 | c.bench_function("load_internal_dump", bench_load_internal_dump); 42 | c.bench_function("load_internal_themes", bench_load_internal_themes); 43 | c.bench_function("load_theme", bench_load_theme); 44 | c.bench_function("add_from_folder", bench_add_from_folder); 45 | c.bench_function("link_syntaxes", bench_link_syntaxes); 46 | c.bench_function("from_dump_file", bench_from_dump_file); 47 | } 48 | 49 | criterion_group! { 50 | name = benches; 51 | config = Criterion::default().sample_size(50); 52 | targets = loading_benchmark 53 | } 54 | criterion_main!(benches); 55 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SUBMODULES = testdata/Packages/.git 2 | 3 | info: 4 | $(info Targets) 5 | $(info -----------------------------------------------------------------------) 6 | $(info assets | generate default theme packs and syntax) 7 | $(info - OTHER TARGETS -------------------------------------------------------) 8 | $(info themes | generate default theme pack) 9 | $(info packs | generate default syntax pack) 10 | $(info syntest | run syntax test summary) 11 | 12 | 13 | $(SUBMODULES): 14 | git submodule update --init --recursive 15 | 16 | assets: packs themes 17 | 18 | packs: $(SUBMODULES) 19 | cargo run --features=metadata --example gendata -- synpack testdata/Packages assets/default_newlines.packdump assets/default_nonewlines.packdump assets/default_metadata.packdump testdata/DefaultPackage 20 | 21 | themes: $(SUBMODULES) 22 | cargo run --example gendata -- themepack testdata assets/default.themedump 23 | 24 | syntest: $(SUBMODULES) 25 | @echo Tip: Run make update-known-failures to update the known failures file. 26 | cargo run --release --example syntest -- testdata/Packages testdata/Packages --summary | diff -U 1000000 testdata/known_syntest_failures.txt - 27 | @echo No new failures! 28 | 29 | syntest-fancy: $(SUBMODULES) 30 | @echo Tip: Run make update-known-failures to update the known failures file. 31 | cargo run --features default-fancy --no-default-features --release --example syntest -- testdata/Packages testdata/Packages --summary | diff -U 1000000 testdata/known_syntest_failures_fancy.txt - 32 | @echo No new failures! 33 | 34 | update-known-failures: $(SUBMODULES) 35 | cargo run --release --example syntest -- testdata/Packages testdata/Packages --summary | tee testdata/known_syntest_failures.txt 36 | 37 | update-known-failures-fancy: $(SUBMODULES) 38 | cargo run --features default-fancy --no-default-features --release --example syntest -- testdata/Packages testdata/Packages --summary | tee testdata/known_syntest_failures_fancy.txt 39 | -------------------------------------------------------------------------------- /testdata/test4.html: -------------------------------------------------------------------------------- 1 |
 2 | %YAML 1.2
 3 | ---
 4 | # http://www.sublimetext.com/docs/3/syntax.html
 5 | name: Cargo Build Results
 6 | scope: source.build_results
 7 | hidden: true
 8 | contexts:
 9 |   main:
10 |     - match: '^(..[^:\n]*):([0-9]+):?([0-9]+)?:? '
11 |       scope: entity.name.filename
12 |     - match: '\berror: '
13 |       scope: message.error
14 |     - match: '^\[.+\]$'
15 |       scope: comment
16 | 
17 | -------------------------------------------------------------------------------- /src/escape.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Rust Project Developers. See the COPYRIGHT 2 | // file at the top-level directory of this distribution and at 3 | // http://rust-lang.org/COPYRIGHT. 4 | // 5 | // Licensed under the Apache License, Version 2.0 or the MIT license 7 | // , at your 8 | // option. This file may not be copied, modified, or distributed 9 | // except according to those terms. 10 | 11 | //! HTML Escaping 12 | //! 13 | //! This module contains one unit-struct which can be used to HTML-escape a 14 | //! string of text (for use in a format string). 15 | 16 | use std::fmt; 17 | 18 | /// Wrapper struct which will emit the HTML-escaped version of the contained 19 | /// string when passed to a format string. 20 | pub struct Escape<'a>(pub &'a str); 21 | 22 | impl fmt::Display for Escape<'_> { 23 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { 24 | // Because the internet is always right, turns out there's not that many 25 | // characters to escape: http://stackoverflow.com/questions/7381974 26 | let Escape(s) = *self; 27 | let pile_o_bits = s; 28 | let mut last = 0; 29 | for (i, ch) in s.bytes().enumerate() { 30 | match ch as char { 31 | '<' | '>' | '&' | '\'' | '"' => { 32 | fmt.write_str(&pile_o_bits[last..i])?; 33 | let s = match ch as char { 34 | '>' => ">", 35 | '<' => "<", 36 | '&' => "&", 37 | '\'' => "'", 38 | '"' => """, 39 | _ => unreachable!(), 40 | }; 41 | fmt.write_str(s)?; 42 | last = i + 1; 43 | } 44 | _ => {} 45 | } 46 | } 47 | 48 | if last < s.len() { 49 | fmt.write_str(&pile_o_bits[last..])?; 50 | } 51 | Ok(()) 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /examples/parsyncat.rs: -------------------------------------------------------------------------------- 1 | //! Highlights the files given on the command line, in parallel. 2 | //! Prints the highlighted output to stdout. 3 | 4 | use rayon::prelude::*; 5 | use syntect::easy::HighlightFile; 6 | use syntect::highlighting::{Style, ThemeSet}; 7 | use syntect::parsing::SyntaxSet; 8 | 9 | use std::fs::File; 10 | use std::io::{BufRead, BufReader}; 11 | 12 | fn main() { 13 | let files: Vec = std::env::args().skip(1).collect(); 14 | 15 | if files.is_empty() { 16 | println!("Please provide some files to highlight."); 17 | return; 18 | } 19 | 20 | let syntax_set = SyntaxSet::load_defaults_newlines(); 21 | let theme_set = ThemeSet::load_defaults(); 22 | 23 | // We first collect the contents of the files... 24 | let contents: Vec> = files 25 | .par_iter() 26 | .map(|filename| { 27 | let mut lines = Vec::new(); 28 | // We use `String::new()` and `read_line()` instead of `BufRead::lines()` 29 | // in order to preserve the newlines and get better highlighting. 30 | let mut line = String::new(); 31 | let mut reader = BufReader::new(File::open(filename).unwrap()); 32 | while reader.read_line(&mut line).unwrap() > 0 { 33 | lines.push(line); 34 | line = String::new(); 35 | } 36 | lines 37 | }) 38 | .collect(); 39 | 40 | // ...so that the highlighted regions have valid lifetimes... 41 | let regions: Vec> = files 42 | .par_iter() 43 | .zip(&contents) 44 | .map(|(filename, contents)| { 45 | let mut regions = Vec::new(); 46 | let theme = &theme_set.themes["base16-ocean.dark"]; 47 | let mut highlighter = HighlightFile::new(filename, &syntax_set, theme).unwrap(); 48 | 49 | for line in contents { 50 | for region in highlighter 51 | .highlight_lines 52 | .highlight_line(line, &syntax_set) 53 | .unwrap() 54 | { 55 | regions.push(region); 56 | } 57 | } 58 | 59 | regions 60 | }) 61 | .collect(); 62 | 63 | // ...and then print them all out. 64 | for file_regions in regions { 65 | print!( 66 | "{}", 67 | syntect::util::as_24_bit_terminal_escaped(&file_regions[..], true) 68 | ); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /benches/highlighting.rs: -------------------------------------------------------------------------------- 1 | use criterion::{criterion_group, criterion_main, Bencher, Criterion}; 2 | use std::str::FromStr; 3 | use syntect::highlighting::ThemeSet; 4 | use syntect::html::highlighted_html_for_string; 5 | use syntect::parsing::{ScopeStack, SyntaxSet}; 6 | 7 | mod highlight_utils; 8 | mod utils; 9 | 10 | fn highlight_file(b: &mut Bencher, file: &str) { 11 | let path = utils::get_test_file_path(file); 12 | 13 | // don't load from dump so we don't count lazy regex compilation time 14 | let ss = SyntaxSet::load_defaults_nonewlines(); 15 | let ts = ThemeSet::load_defaults(); 16 | 17 | let syntax = ss.find_syntax_for_file(path).unwrap().unwrap(); 18 | let s = std::fs::read_to_string(path).unwrap(); 19 | 20 | b.iter(|| highlight_utils::do_highlight(&s, &ss, syntax, &ts.themes["base16-ocean.dark"])); 21 | } 22 | 23 | fn stack_matching(b: &mut Bencher) { 24 | let s = "source.js meta.group.js meta.group.js meta.block.js meta.function-call.method.js meta.group.js meta.object-literal.js meta.block.js meta.function-call.method.js meta.group.js variable.other.readwrite.js"; 25 | let stack = ScopeStack::from_str(s).unwrap(); 26 | let selector = ScopeStack::from_str("source meta.function-call.method").unwrap(); 27 | b.iter(|| selector.does_match(stack.as_slice())); 28 | } 29 | 30 | fn highlight_html(b: &mut Bencher) { 31 | let ss = SyntaxSet::load_defaults_newlines(); 32 | let ts = ThemeSet::load_defaults(); 33 | 34 | let path = "testdata/parser.rs"; 35 | let syntax = ss.find_syntax_for_file(path).unwrap().unwrap(); 36 | let s = std::fs::read_to_string(path).unwrap(); 37 | 38 | b.iter(|| highlighted_html_for_string(&s, &ss, syntax, &ts.themes["base16-ocean.dark"])); 39 | } 40 | 41 | fn highlighting_benchmark(c: &mut Criterion) { 42 | c.bench_function("stack_matching", stack_matching); 43 | c.bench_function("highlight_html", highlight_html); 44 | let mut highlight = c.benchmark_group("highlight"); 45 | for input in &[ 46 | "highlight_test.erb", 47 | "InspiredGitHub.tmTheme", 48 | "Ruby.sublime-syntax", 49 | "jquery.js", 50 | "parser.rs", 51 | "scope.rs", 52 | ] { 53 | highlight.bench_with_input(format!("\"{}\"", input), input, |b, s| highlight_file(b, s)); 54 | } 55 | highlight.finish(); 56 | } 57 | 58 | criterion_group! { 59 | name = benches; 60 | config = Criterion::default().sample_size(10); 61 | targets = highlighting_benchmark 62 | } 63 | criterion_main!(benches); 64 | -------------------------------------------------------------------------------- /tests/error_handling.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | error::Error, 3 | fmt::Display, 4 | io::{Error as IoError, ErrorKind}, 5 | }; 6 | 7 | use syntect::{ 8 | parsing::{ParseScopeError, ParseSyntaxError}, 9 | LoadingError, 10 | }; 11 | 12 | #[test] 13 | fn loading_error_bad_path_display() { 14 | assert_display(LoadingError::BadPath, "Invalid path"); 15 | } 16 | 17 | #[test] 18 | fn loading_error_parse_syntax_display() { 19 | assert_display( 20 | LoadingError::ParseSyntax( 21 | ParseSyntaxError::MissingMandatoryKey("main"), 22 | String::from("file.sublime-syntax"), 23 | ), 24 | "file.sublime-syntax: Missing mandatory key in YAML file: main", 25 | ); 26 | } 27 | 28 | #[test] 29 | fn loading_error_io_source() { 30 | let io_error_source = IoError::new(ErrorKind::Other, "this is an error string"); 31 | assert_display( 32 | LoadingError::Io(io_error_source).source().unwrap(), 33 | "this is an error string", 34 | ); 35 | } 36 | 37 | #[test] 38 | fn parse_syntax_error_missing_mandatory_key_display() { 39 | assert_display( 40 | ParseSyntaxError::MissingMandatoryKey("mandatory_key"), 41 | "Missing mandatory key in YAML file: mandatory_key", 42 | ); 43 | } 44 | 45 | #[test] 46 | fn parse_syntax_error_regex_compile_error_display() { 47 | assert_display( 48 | ParseSyntaxError::RegexCompileError("[a-Z]".to_owned(), LoadingError::BadPath.into()), 49 | "Error while compiling regex '[a-Z]': Invalid path", 50 | ); 51 | } 52 | 53 | #[test] 54 | fn parse_scope_error_display() { 55 | assert_display( 56 | ParseScopeError::TooLong, 57 | "Too long scope. Scopes can be at most 8 atoms long.", 58 | ) 59 | } 60 | 61 | #[test] 62 | fn parse_syntax_error_regex_compile_error_source() { 63 | let error = ParseSyntaxError::RegexCompileError( 64 | "[[[[[[[[[[[[[[[".to_owned(), 65 | LoadingError::BadPath.into(), 66 | ); 67 | assert_display(error.source().unwrap(), "Invalid path"); 68 | } 69 | 70 | #[test] 71 | fn loading_error_parse_syntax_source() { 72 | let error = LoadingError::ParseSyntax( 73 | ParseSyntaxError::RegexCompileError("[a-Z]".to_owned(), LoadingError::BadPath.into()), 74 | String::from("any-file.sublime-syntax"), 75 | ); 76 | assert_display( 77 | error.source().unwrap(), 78 | "Error while compiling regex '[a-Z]': Invalid path", 79 | ) 80 | } 81 | 82 | /// Helper to assert that a given implementation of [Display] generates the 83 | /// expected string. 84 | fn assert_display(display: impl Display, expected_display: &str) { 85 | assert_eq!(format!("{}", display), String::from(expected_display)); 86 | } 87 | -------------------------------------------------------------------------------- /testdata/parser_tests.sublime-syntax: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | name: Used by tests in src/parsing/parser.rs 4 | scope: source.test 5 | contexts: 6 | main: 7 | - include: test_does_not_crash_on_unlinkable_context 8 | - match: '#infinite_seeming_loop_test' 9 | scope: keyword.test 10 | push: infinite_seeming_loop_c 11 | - match: '(?=#infinite_loop_test)' 12 | push: infinite_loop_test_pop_if_not_whitespace 13 | - match: \' 14 | scope: punctuation.definition.string.begin.example 15 | push: cleared_scopes_string_test 16 | - match: '\d+' 17 | scope: constant.numeric.test 18 | 19 | infinite_loop_test_pop_if_not_whitespace: 20 | - match: '(?=\S)' 21 | pop: true 22 | infinite_seeming_loop_a: 23 | - meta_content_scope: test 24 | - match: 'h' 25 | scope: string.unquoted.test 26 | - match: 'ello' 27 | scope: keyword.control.test 28 | infinite_seeming_loop_b: 29 | - match: '' 30 | pop: true 31 | - match: '(?=.)' 32 | pop: true 33 | - match: '(?=h)' 34 | pop: true 35 | - match: 'h' 36 | scope: entity.name.function.test 37 | - match: 'e' 38 | scope: storage.type.test 39 | infinite_seeming_loop_c: 40 | - match: '' 41 | push: [infinite_seeming_loop_a, infinite_seeming_loop_b] 42 | cleared_scopes_string_test: 43 | - meta_scope: string.quoted.single.example 44 | - match: '#too_many_cleared_scopes_test' 45 | scope: example.pushes-clear-scopes.example 46 | push: 47 | - clear_scopes: 10 48 | - meta_scope: example.meta-scope.after-clear-scopes.example 49 | - match: 'test' 50 | scope: example.pops-clear-scopes.example 51 | pop: true 52 | - match: '#simple_cleared_scopes_test' 53 | scope: example.pushes-clear-scopes.example 54 | push: 55 | - clear_scopes: 1 56 | - meta_scope: example.meta-scope.after-clear-scopes.example 57 | - match: 'test' 58 | scope: example.pops-clear-scopes.example 59 | pop: true 60 | - match: '#nested_clear_scopes_test' 61 | scope: example.pushes-clear-scopes.example 62 | push: 63 | - clear_scopes: 1 64 | - meta_scope: example.meta-scope.after-clear-scopes.example 65 | - match: 'foo' 66 | scope: foo 67 | push: 68 | - clear_scopes: 1 69 | - meta_scope: example.meta-scope.cleared-previous-meta-scope.example 70 | - match: 'bar' 71 | scope: bar 72 | pop: true 73 | - match: 'test' 74 | scope: example.pops-clear-scopes.example 75 | pop: true 76 | - match: '\\.' 77 | scope: constant.character.escape.example 78 | - match: \' 79 | scope: punctuation.definition.string.end.example 80 | pop: true 81 | -------------------------------------------------------------------------------- /.github/workflows/Release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | # To make a release: 4 | # 5 | # 1. Update Cargo.toml version and CHANGELOG.md on master 6 | # 2. Run workflow https://github.com/trishume/syntect/actions/workflows/Release.yml on master 7 | # 3. Done! 8 | 9 | on: 10 | workflow_dispatch: # This workflow can only be triggered manually. 11 | inputs: 12 | one_time_crates_io_token_secret: 13 | description: "A one-time crates.io token (delete it after first use)" 14 | required: true 15 | type: string 16 | 17 | env: 18 | CARGO_TERM_COLOR: always 19 | 20 | jobs: 21 | # Make sure regular CI passes before we make a release. 22 | ci: 23 | uses: ./.github/workflows/CI.yml 24 | with: 25 | one_time_crates_io_token_secret: masked 26 | 27 | # After regular CI passes we publish to crates.io and push a git tag. 28 | publish-and-tag: 29 | needs: ci 30 | runs-on: ubuntu-latest 31 | permissions: 32 | contents: write # So we can push a tag. 33 | outputs: 34 | VERSION: ${{ steps.version.outputs.VERSION }} 35 | TAG_NAME: ${{ steps.version.outputs.TAG_NAME }} 36 | steps: 37 | - run: | 38 | # See https://github.com/actions/runner/issues/643#issuecomment-708468716 39 | # See https://github.com/actions/runner/issues/475#issuecomment-635775403 40 | masked_secret=$(jq -r '.inputs.one_time_crates_io_token_secret' $GITHUB_EVENT_PATH) 41 | echo "::add-mask::$masked_secret" 42 | - uses: actions/checkout@v4 43 | - run: cargo publish -p syntect 44 | env: 45 | CARGO_REGISTRY_TOKEN: ${{ inputs.one_time_crates_io_token_secret }} 46 | - name: version 47 | id: version 48 | run: | 49 | version=$(cargo read-manifest --manifest-path Cargo.toml | jq --raw-output .version) 50 | echo "VERSION=${version}" >> $GITHUB_OUTPUT 51 | echo "TAG_NAME=v${version}" >> $GITHUB_OUTPUT 52 | - name: push tag 53 | run: | 54 | git tag ${{ steps.version.outputs.TAG_NAME }} 55 | git push origin ${{ steps.version.outputs.TAG_NAME }} 56 | 57 | # Lastly, create a GitHub release. 58 | release: 59 | needs: publish-and-tag 60 | runs-on: ubuntu-latest 61 | permissions: 62 | contents: write # So we can create a release. 63 | steps: 64 | - uses: actions/checkout@v4 65 | - run: cargo install parse-changelog@0.6.4 --locked 66 | - name: create release 67 | env: 68 | GH_TOKEN: ${{ github.token }} 69 | run: | 70 | notes="$(parse-changelog CHANGELOG.md ${{ needs.publish-and-tag.outputs.VERSION }})" 71 | title="${{ needs.publish-and-tag.outputs.TAG_NAME }}" 72 | gh release create --title "$title" --notes "$notes" ${{ needs.publish-and-tag.outputs.TAG_NAME }} 73 | -------------------------------------------------------------------------------- /examples/gendata.rs: -------------------------------------------------------------------------------- 1 | //! This program is mainly intended for generating the dumps that are compiled in to 2 | //! syntect, not as a helpful example for beginners. 3 | //! Although it is a valid example for serializing syntaxes, you probably won't need 4 | //! to do this yourself unless you want to cache your own compiled grammars. 5 | //! 6 | //! An example of how this script is used to generate the pack files included 7 | //! with syntect can be found under `make packs` in the Makefile. 8 | use std::env; 9 | use syntect::dumps::*; 10 | use syntect::highlighting::ThemeSet; 11 | use syntect::parsing::SyntaxSetBuilder; 12 | 13 | fn usage_and_exit() -> ! { 14 | println!( 15 | "USAGE: gendata synpack source-dir \ 16 | newlines.packdump nonewlines.packdump \ 17 | [metadata.packdump] [metadata extra-source-dir]\n \ 18 | gendata themepack source-dir themepack.themedump" 19 | ); 20 | ::std::process::exit(2); 21 | } 22 | 23 | fn main() { 24 | let mut a = env::args().skip(1); 25 | match (a.next(), a.next(), a.next(), a.next(), a.next(), a.next()) { 26 | ( 27 | Some(ref cmd), 28 | Some(ref package_dir), 29 | Some(ref packpath_newlines), 30 | Some(ref packpath_nonewlines), 31 | ref _option_metapath, 32 | ref _option_metasource, 33 | ) if cmd == "synpack" => { 34 | let mut builder = SyntaxSetBuilder::new(); 35 | builder.add_plain_text_syntax(); 36 | builder.add_from_folder(package_dir, true).unwrap(); 37 | let ss = builder.build(); 38 | dump_to_uncompressed_file(&ss, packpath_newlines).unwrap(); 39 | 40 | let mut builder_nonewlines = SyntaxSetBuilder::new(); 41 | builder_nonewlines.add_plain_text_syntax(); 42 | builder_nonewlines 43 | .add_from_folder(package_dir, false) 44 | .unwrap(); 45 | 46 | #[cfg(feature = "metadata")] 47 | { 48 | if let Some(metasource) = _option_metasource { 49 | builder_nonewlines 50 | .add_from_folder(metasource, false) 51 | .unwrap(); 52 | } 53 | } 54 | 55 | let ss_nonewlines = builder_nonewlines.build(); 56 | dump_to_uncompressed_file(&ss_nonewlines, packpath_nonewlines).unwrap(); 57 | 58 | #[cfg(feature = "metadata")] 59 | { 60 | if let Some(metapath) = _option_metapath { 61 | dump_to_file(&ss_nonewlines.metadata(), metapath).unwrap(); 62 | } 63 | } 64 | } 65 | (Some(ref s), Some(ref theme_dir), Some(ref packpath), ..) if s == "themepack" => { 66 | let ts = ThemeSet::load_from_folder(theme_dir).unwrap(); 67 | dump_to_file(&ts, packpath).unwrap(); 68 | } 69 | _ => usage_and_exit(), 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "syntect" 3 | description = "library for high quality syntax highlighting and code intelligence using Sublime Text's grammars" 4 | documentation = "https://docs.rs/syntect" 5 | repository = "https://github.com/trishume/syntect" 6 | keywords = ["syntax", "highlighting", "highlighter", "colouring", "parsing"] 7 | categories = ["parser-implementations", "parsing", "text-processing"] 8 | readme = "Readme.md" 9 | license = "MIT" 10 | version = "5.3.0" # remember to update html_root_url 11 | authors = ["Tristan Hume "] 12 | edition = "2021" 13 | exclude = [ 14 | "testdata/*", 15 | "/scripts/*", 16 | "/Makefile", 17 | "/codecov.yml" 18 | ] 19 | 20 | [package.metadata.docs.rs] 21 | # Toggle on extra features that aren't on by default 22 | features = ["metadata"] 23 | 24 | [dependencies] 25 | yaml-rust = { package = "yaml-rust2", version = "0.10.4", optional = true, default-features = false } 26 | onig = { version = "6.5.1", optional = true, default-features = false } 27 | fancy-regex = { version = "0.16.2", optional = true } 28 | walkdir = "2.0" 29 | regex-syntax = { version = "0.8", optional = true } 30 | plist = { version = "1.3", optional = true } 31 | bincode = { version = "1.0", optional = true } 32 | flate2 = { version = "1.0", optional = true } 33 | fnv = { version = "1.0", optional = true } 34 | serde = "1.0" 35 | serde_derive = "1.0" 36 | serde_json = { version = "1.0", optional = true } 37 | once_cell = "1.8" 38 | thiserror = "2.0.12" 39 | 40 | [dev-dependencies] 41 | criterion = { version = "0.3", features = [ "html_reports" ] } 42 | rayon = "1.0.0" 43 | regex = "1.0" 44 | getopts = "0.2" 45 | pretty_assertions = "0.6" 46 | rustup-toolchain = "0.1.5" 47 | rustdoc-json = "0.9.7" 48 | public-api = "0.50.1" 49 | serde_json = "1.0" 50 | 51 | [features] 52 | 53 | # Dump loading using flate2 54 | dump-load = ["dep:flate2", "dep:bincode"] 55 | # Dump creation using flate2 56 | dump-create = ["dep:flate2", "dep:bincode"] 57 | 58 | regex-fancy = ["dep:fancy-regex"] 59 | regex-onig = ["dep:onig"] 60 | 61 | parsing = ["dep:regex-syntax", "dep:fnv", "dump-create", "dump-load"] 62 | 63 | # Support for .tmPreferenes metadata files (indentation, comment syntax, etc) 64 | metadata = ["parsing", "plist-load", "dep:serde_json"] 65 | 66 | # Enables inclusion of the default syntax packages. 67 | default-syntaxes = ["parsing", "dump-load"] 68 | # Enables inclusion of the default theme packages. 69 | default-themes = ["dump-load"] 70 | 71 | html = ["parsing"] 72 | # Support for parsing .tmTheme files and .tmPreferences files 73 | plist-load = ["dep:plist", "dep:serde_json"] 74 | # Support for parsing .sublime-syntax files 75 | yaml-load = ["dep:yaml-rust", "parsing"] 76 | 77 | default-onig = ["parsing", "default-syntaxes", "default-themes", "html", "plist-load", "yaml-load", "dump-load", "dump-create", "regex-onig"] 78 | # In order to switch to the fancy-regex engine, disable default features then add the default-fancy feature 79 | default-fancy = ["parsing", "default-syntaxes", "default-themes", "html", "plist-load", "yaml-load", "dump-load", "dump-create", "regex-fancy"] 80 | default = ["default-onig"] 81 | 82 | # [profile.release] 83 | # debug = true 84 | 85 | [profile.dev.package] 86 | aho-corasick.opt-level = 2 87 | fancy-regex.opt-level = 2 88 | regex-automata.opt-level = 2 89 | regex-syntax.opt-level = 2 90 | 91 | [lib] 92 | bench = false 93 | 94 | [[bench]] 95 | name = "highlighting" 96 | harness = false 97 | 98 | [[bench]] 99 | name = "load_and_highlight" 100 | harness = false 101 | 102 | [[bench]] 103 | name = "loading" 104 | harness = false 105 | 106 | [[bench]] 107 | name = "parsing" 108 | harness = false 109 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Welcome to the syntect docs. 2 | //! 3 | //! Much more info about syntect is available on the [Github Page](https://github.com/trishume/syntect). 4 | //! 5 | //! May I suggest that you start by reading the `Readme.md` file in the main repo. 6 | //! Once you're done with that you can look at the docs for [`parsing::SyntaxSet`] 7 | //! and for the [`easy`] module. 8 | //! 9 | //! Almost everything in syntect is divided up into either the [`parsing`] module 10 | //! for turning text into text annotated with scopes, and the [`highlighting`] module 11 | //! for turning annotated text into styled/colored text. 12 | //! 13 | //! Some docs have example code but a good place to look is the `syncat` example as 14 | //! well as the source code for the [`easy`] module in `easy.rs` as that shows how to 15 | //! plug the various parts together for common use cases. 16 | //! 17 | //! [`parsing::SyntaxSet`]: parsing/struct.SyntaxSet.html 18 | //! [`easy`]: easy/index.html 19 | //! [`parsing`]: parsing/index.html 20 | //! [`highlighting`]: highlighting/index.html 21 | 22 | #![doc(html_root_url = "https://docs.rs/syntect/5.3.0")] 23 | 24 | #[cfg(test)] 25 | #[macro_use] 26 | extern crate pretty_assertions; 27 | 28 | #[cfg(any(feature = "dump-load", feature = "dump-create"))] 29 | pub mod dumps; 30 | #[cfg(feature = "parsing")] 31 | pub mod easy; 32 | #[cfg(feature = "html")] 33 | mod escape; 34 | pub mod highlighting; 35 | #[cfg(feature = "html")] 36 | pub mod html; 37 | pub mod parsing; 38 | pub mod util; 39 | mod utils; 40 | 41 | use std::io::Error as IoError; 42 | 43 | #[cfg(feature = "plist-load")] 44 | use crate::highlighting::{ParseThemeError, SettingsError}; 45 | 46 | /// An error enum for all things that can go wrong within syntect. 47 | #[derive(Debug, thiserror::Error)] 48 | #[non_exhaustive] 49 | pub enum Error { 50 | /// An error occurred while loading a syntax or theme 51 | #[error("Loading error: {0}")] 52 | LoadingError(#[from] LoadingError), 53 | /// An error occurred while parsing 54 | #[cfg(feature = "parsing")] 55 | #[error("Parsing error: {0}")] 56 | ParsingError(#[from] crate::parsing::ParsingError), 57 | /// Scope error 58 | #[error("Scope error: {0}")] 59 | ScopeError(#[from] crate::parsing::ScopeError), 60 | /// Formatting error 61 | #[error("Formatting error: {0}")] 62 | Fmt(#[from] std::fmt::Error), 63 | /// IO Error 64 | #[error("IO Error: {0}")] 65 | Io(#[from] IoError), 66 | } 67 | 68 | /// Common error type used by syntax and theme loading 69 | #[derive(Debug, thiserror::Error)] 70 | #[non_exhaustive] 71 | pub enum LoadingError { 72 | /// error finding all the files in a directory 73 | #[error("error finding all the files in a directory: {0}")] 74 | WalkDir(#[from] walkdir::Error), 75 | /// error reading a file 76 | #[error("error reading a file: {0}")] 77 | Io(#[from] IoError), 78 | /// a syntax file was invalid in some way 79 | #[cfg(all(feature = "yaml-load", feature = "parsing"))] 80 | #[error("{1}: {0}")] 81 | ParseSyntax(#[source] crate::parsing::ParseSyntaxError, String), 82 | /// a metadata file was invalid in some way 83 | #[cfg(feature = "metadata")] 84 | #[error("Failed to parse JSON")] 85 | ParseMetadata(#[from] serde_json::Error), 86 | /// a theme file was invalid in some way 87 | #[cfg(feature = "plist-load")] 88 | #[error("Invalid syntax theme")] 89 | ParseTheme(#[from] ParseThemeError), 90 | /// a theme's Plist syntax was invalid in some way 91 | #[cfg(feature = "plist-load")] 92 | #[error("Invalid syntax theme settings")] 93 | ReadSettings(#[from] SettingsError), 94 | /// A path given to a method was invalid. 95 | /// Possibly because it didn't reference a file or wasn't UTF-8. 96 | #[error("Invalid path")] 97 | BadPath, 98 | } 99 | -------------------------------------------------------------------------------- /.github/workflows/CI.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | workflow_call: # From .github/workflows/Release.yml 5 | workflow_dispatch: 6 | push: 7 | branches: [ master ] 8 | pull_request: 9 | branches: [ master ] 10 | 11 | env: 12 | CARGO_TERM_COLOR: always 13 | 14 | jobs: 15 | min_version: 16 | name: Minimum supported rust version 17 | runs-on: ubuntu-24.04 18 | steps: 19 | - uses: actions/checkout@v4 20 | - uses: dtolnay/rust-toolchain@master 21 | with: 22 | toolchain: stable minus 2 releases # MSRV policy = last three versions of stable 23 | components: clippy, rustfmt 24 | 25 | - name: Run cargo fmt --check 26 | run: cargo fmt -- --check 27 | 28 | - name: Run cargo clippy 29 | run: | 30 | # Must run before `cargo check` until we use Rust 1.52 31 | # See https://github.com/rust-lang/rust-clippy/issues/4612 32 | cargo clippy --all-targets --all-features -- \ 33 | --allow clippy::unknown_clippy_lints \ 34 | --allow clippy::unnecessary_cast \ 35 | --allow clippy::block_in_if_condition_stmt 36 | # Prevent regressions of https://github.com/trishume/syntect/issues/98 37 | cargo clippy --all-features --lib -- --deny clippy::panic 38 | - name: Run cargo check 39 | run: | 40 | cargo check --all-features --all-targets 41 | # Check that if some other crate in the downstream dependency tree 42 | # enables serde's "derive" feature, syntect still builds. 43 | cargo check --all-features --features serde/derive 44 | 45 | documentation: 46 | name: Documentation checks 47 | runs-on: ubuntu-latest 48 | steps: 49 | - uses: actions/checkout@v4 50 | - run: RUSTDOCFLAGS='--deny warnings' cargo doc --no-deps --document-private-items --all-features 51 | 52 | bat-tests: 53 | name: Run bat syntax regression tests 54 | runs-on: ubuntu-latest 55 | steps: 56 | - uses: actions/checkout@v4 57 | with: 58 | path: 'syntect' 59 | - uses: actions/checkout@v4 60 | with: 61 | repository: 'sharkdp/bat' 62 | path: 'bat' 63 | ref: master 64 | submodules: true 65 | - name: bat/tests/syntax-tests/regression_test.sh 66 | run: | 67 | cd bat 68 | sed -i 's%\[dependencies.syntect\]%[dependencies.syntect]\npath = "../syntect"%' Cargo.toml 69 | cargo build --release # Build bat so we can update the assets 70 | PATH=target/release:$PATH ./assets/create.sh # Update assets with newly built bat 71 | cargo build --release # Build bat using the newly updated assets 72 | PATH=./target/release:$PATH tests/syntax-tests/regression_test.sh 73 | 74 | build-and-test: 75 | name: Build and test 76 | runs-on: ubuntu-latest 77 | steps: 78 | - uses: actions/checkout@v4 79 | with: 80 | submodules: true 81 | - uses: dtolnay/rust-toolchain@stable 82 | - name: Build 83 | run: | 84 | cargo build 85 | - name: Run tests 86 | run: | 87 | cargo test --features metadata 88 | - name: Run tests with fancy 89 | run: | 90 | # Run these tests in release mode since they're slow as heck otherwise 91 | cargo test --features default-fancy --no-default-features --release 92 | - name: Ensure highlight works without 'plist-load' and 'yaml-load' features 93 | run: | 94 | cargo run --example synhtml --no-default-features --features html,default-syntaxes,default-themes,regex-onig -- examples/synhtml.rs 95 | - name: Run tests with 'default-syntaxes' but without 'default-themes' 96 | run: | 97 | cargo test --lib --example synstats --no-default-features --features default-syntaxes,yaml-load,regex-onig 98 | - name: Run tests without default features 99 | run: | 100 | cargo test --lib --no-default-features 101 | - name: make stuff 102 | run: | 103 | make assets 104 | make syntest 105 | make syntest-fancy 106 | - name: Docs 107 | run: | 108 | cargo doc 109 | 110 | check-feature-powerset: 111 | name: Check feature powerset 112 | runs-on: ubuntu-latest 113 | steps: 114 | - uses: actions/checkout@v4 115 | - uses: dtolnay/rust-toolchain@stable 116 | - uses: taiki-e/install-action@v2 117 | with: 118 | tool: cargo-hack 119 | # Ensure that `cargo check` works across many different feature sets 120 | - name: Check feature powerset 121 | run: | 122 | cargo hack --feature-powerset --depth=2 --features=regex-fancy --exclude-features=regex-onig check 123 | -------------------------------------------------------------------------------- /src/highlighting/theme_set.rs: -------------------------------------------------------------------------------- 1 | use super::super::LoadingError; 2 | #[cfg(feature = "plist-load")] 3 | use super::settings::*; 4 | use super::theme::Theme; 5 | use serde_derive::{Deserialize, Serialize}; 6 | use std::collections::BTreeMap; 7 | use std::path::{Path, PathBuf}; 8 | 9 | #[derive(Debug, Default, Serialize, Deserialize)] 10 | pub struct ThemeSet { 11 | // This is a `BTreeMap` because they're faster than hashmaps on small sets 12 | pub themes: BTreeMap, 13 | } 14 | 15 | /// A set of themes, includes convenient methods for loading and discovering themes. 16 | impl ThemeSet { 17 | /// Creates an empty set 18 | pub fn new() -> ThemeSet { 19 | ThemeSet::default() 20 | } 21 | 22 | /// Returns all the themes found in a folder 23 | /// 24 | /// This is good for enumerating before loading one with [`get_theme`](#method.get_theme) 25 | pub fn discover_theme_paths>(folder: P) -> Result, LoadingError> { 26 | let mut themes = Vec::new(); 27 | for entry in crate::utils::walk_dir(folder) { 28 | let entry = entry.map_err(LoadingError::WalkDir)?; 29 | if entry.path().is_file() 30 | && entry 31 | .path() 32 | .extension() 33 | .is_some_and(|e| e.eq_ignore_ascii_case("tmTheme")) 34 | { 35 | themes.push(entry.path().to_owned()); 36 | } 37 | } 38 | Ok(themes) 39 | } 40 | 41 | /// Loads a theme given a path to a .tmTheme file 42 | #[cfg(feature = "plist-load")] 43 | pub fn get_theme>(path: P) -> Result { 44 | let file = std::fs::File::open(path)?; 45 | let mut file = std::io::BufReader::new(file); 46 | Self::load_from_reader(&mut file) 47 | } 48 | 49 | /// Loads a theme given a readable stream 50 | #[cfg(feature = "plist-load")] 51 | pub fn load_from_reader( 52 | r: &mut R, 53 | ) -> Result { 54 | Ok(Theme::parse_settings(read_plist(r)?)?) 55 | } 56 | 57 | /// Generate a `ThemeSet` from all themes in a folder 58 | #[cfg(feature = "plist-load")] 59 | pub fn load_from_folder>(folder: P) -> Result { 60 | let mut theme_set = Self::new(); 61 | theme_set.add_from_folder(folder)?; 62 | Ok(theme_set) 63 | } 64 | 65 | /// Load all the themes in the folder into this `ThemeSet` 66 | #[cfg(feature = "plist-load")] 67 | pub fn add_from_folder>(&mut self, folder: P) -> Result<(), LoadingError> { 68 | let paths = Self::discover_theme_paths(folder)?; 69 | for p in &paths { 70 | let theme = Self::get_theme(p)?; 71 | let basename = p 72 | .file_stem() 73 | .and_then(|x| x.to_str()) 74 | .ok_or(LoadingError::BadPath)?; 75 | self.themes.insert(basename.to_owned(), theme); 76 | } 77 | 78 | Ok(()) 79 | } 80 | } 81 | 82 | #[cfg(test)] 83 | mod tests { 84 | use crate::highlighting::{Color, ThemeSet}; 85 | #[cfg(feature = "plist-load")] 86 | #[test] 87 | fn can_parse_common_themes() { 88 | let themes = ThemeSet::load_from_folder("testdata").unwrap(); 89 | let all_themes: Vec<&str> = themes.themes.keys().map(|x| &**x).collect(); 90 | assert!(all_themes.contains(&"base16-ocean.dark")); 91 | 92 | println!("{:?}", all_themes); 93 | 94 | let theme = ThemeSet::get_theme("testdata/spacegray/base16-ocean.dark.tmTheme").unwrap(); 95 | assert_eq!(theme.name.unwrap(), "Base16 Ocean Dark"); 96 | assert_eq!( 97 | theme.settings.selection.unwrap(), 98 | Color { 99 | r: 0x4f, 100 | g: 0x5b, 101 | b: 0x66, 102 | a: 0xff, 103 | } 104 | ); 105 | assert_eq!( 106 | theme.scopes[0].style.foreground.unwrap(), 107 | Color { 108 | r: 0xc0, 109 | g: 0xc5, 110 | b: 0xce, 111 | a: 0xff, 112 | } 113 | ); 114 | assert_eq!( 115 | theme.settings.gutter_foreground.unwrap(), 116 | Color { 117 | r: 0x65, 118 | g: 0x73, 119 | b: 0x7e, 120 | a: 0xff, 121 | } 122 | ); 123 | assert_eq!( 124 | theme.settings.gutter.unwrap(), 125 | Color { 126 | r: 0x34, 127 | g: 0x3d, 128 | b: 0x46, 129 | a: 0xff, 130 | } 131 | ); 132 | // unreachable!(); 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /examples/synhtml-css-classes.rs: -------------------------------------------------------------------------------- 1 | //! Generates highlighted HTML with CSS classes for a Rust and a C++ source. 2 | //! Run with ```cargo run --example synhtml-css-classes``` 3 | //! 4 | //! will generate 4 files as usage example 5 | //! * synhtml-css-classes.html 6 | //! * synhtml-css-classes.css 7 | //! * theme-dark.css 8 | //! * theme-light.css 9 | //! 10 | //! You can open the html with a web browser and change between light and dark 11 | //! mode. 12 | use syntect::highlighting::ThemeSet; 13 | use syntect::html::css_for_theme_with_class_style; 14 | use syntect::html::{ClassStyle, ClassedHTMLGenerator}; 15 | use syntect::parsing::SyntaxSet; 16 | use syntect::util::LinesWithEndings; 17 | 18 | use std::fs::File; 19 | use std::io::{BufWriter, Write}; 20 | use std::path::Path; 21 | 22 | fn main() -> Result<(), std::io::Error> { 23 | // --------------------------------------------------------------------------------------------- 24 | // generate html 25 | let ss = SyntaxSet::load_defaults_newlines(); 26 | 27 | let html_file = File::create(Path::new("synhtml-css-classes.html"))?; 28 | let mut html = BufWriter::new(&html_file); 29 | 30 | // write html header 31 | writeln!(html, "")?; 32 | writeln!(html, "")?; 33 | writeln!(html, " ")?; 34 | writeln!(html, " synhtml-css-classes.rs")?; 35 | writeln!( 36 | html, 37 | " " 38 | )?; 39 | writeln!(html, " ")?; 40 | writeln!(html, " ")?; 41 | 42 | // Rust 43 | let code_rs = "// Rust source 44 | fn main() { 45 | println!(\"Hello World!\"); 46 | }"; 47 | 48 | let sr_rs = ss.find_syntax_by_extension("rs").unwrap(); 49 | let mut rs_html_generator = 50 | ClassedHTMLGenerator::new_with_class_style(sr_rs, &ss, ClassStyle::Spaced); 51 | for line in LinesWithEndings::from(code_rs) { 52 | rs_html_generator 53 | .parse_html_for_line_which_includes_newline(line) 54 | .unwrap(); 55 | } 56 | let html_rs = rs_html_generator.finalize(); 57 | 58 | writeln!(html, "
")?;
 59 |     writeln!(html, "{}", html_rs)?;
 60 |     writeln!(html, "
")?; 61 | 62 | // C++ 63 | let code_cpp = "/* C++ source */ 64 | #include 65 | int main() { 66 | std::cout << \"Hello World!\" << std::endl; 67 | }"; 68 | 69 | let sr_cpp = ss.find_syntax_by_extension("cpp").unwrap(); 70 | let mut cpp_html_generator = 71 | ClassedHTMLGenerator::new_with_class_style(sr_cpp, &ss, ClassStyle::Spaced); 72 | for line in LinesWithEndings::from(code_cpp) { 73 | cpp_html_generator 74 | .parse_html_for_line_which_includes_newline(line) 75 | .unwrap(); 76 | } 77 | let html_cpp = cpp_html_generator.finalize(); 78 | 79 | writeln!(html, "
")?;
 80 |     writeln!(html, "{}", html_cpp)?;
 81 |     writeln!(html, "
")?; 82 | 83 | // write html end 84 | writeln!(html, " ")?; 85 | writeln!(html, "")?; 86 | 87 | // --------------------------------------------------------------------------------------------- 88 | // generate css 89 | let css = "@import url(\"theme-light.css\") (prefers-color-scheme: light); 90 | @import url(\"theme-dark.css\") (prefers-color-scheme: dark); 91 | 92 | @media (prefers-color-scheme: dark) { 93 | body { 94 | background-color: gray; 95 | } 96 | } 97 | @media (prefers-color-scheme: light) { 98 | body { 99 | background-color: lightgray; 100 | } 101 | }"; 102 | 103 | let css_file = File::create(Path::new("synhtml-css-classes.css"))?; 104 | let mut css_writer = BufWriter::new(&css_file); 105 | 106 | writeln!(css_writer, "{}", css)?; 107 | 108 | // --------------------------------------------------------------------------------------------- 109 | // generate css files for themes 110 | let ts = ThemeSet::load_defaults(); 111 | 112 | // create dark color scheme css 113 | let dark_theme = &ts.themes["Solarized (dark)"]; 114 | let css_dark_file = File::create(Path::new("theme-dark.css"))?; 115 | let mut css_dark_writer = BufWriter::new(&css_dark_file); 116 | 117 | let css_dark = css_for_theme_with_class_style(dark_theme, ClassStyle::Spaced).unwrap(); 118 | writeln!(css_dark_writer, "{}", css_dark)?; 119 | 120 | // create light color scheme css 121 | let light_theme = &ts.themes["Solarized (light)"]; 122 | let css_light_file = File::create(Path::new("theme-light.css"))?; 123 | let mut css_light_writer = BufWriter::new(&css_light_file); 124 | 125 | let css_light = css_for_theme_with_class_style(light_theme, ClassStyle::Spaced).unwrap(); 126 | writeln!(css_light_writer, "{}", css_light)?; 127 | 128 | Ok(()) 129 | } 130 | -------------------------------------------------------------------------------- /testdata/test3.html: -------------------------------------------------------------------------------- 1 |
 2 | <script type="text/javascript">
 3 |   var lol = "JS nesting";
 4 |   class WithES6 extends THREE.Mesh {
 5 |     static highQuality() { // such classes
 6 |       return this.toString();
 7 |     }
 8 |   }
 9 |   <%
10 |     # The outer syntax is HTML (Rails) detected from the .erb extension
11 |     puts "Ruby #{'nesting' * 2}"
12 |     here = <<-WOWCOOL + CORRECTLY_DOES_NOT_HIGHLIGHT_REST_OF_LINE
13 |       high quality parsing even supports custom heredoc endings
14 |       #{
15 |       nested = 5 * <<-ZOMG
16 |         nested heredocs! (no highlighting: 5 * 6, yes highlighting: #{5 * 6})
17 |       ZOMG
18 |       }
19 |     WOWCOOL
20 |     sql = <<-SQL
21 |       select * from heredocs where there_are_special_heredoc_names = true
22 |     SQL
23 |   %>
24 | </script>
25 | <style type="text/css">
26 |   /* the HTML syntax also supports CSS of course */
27 |   .stuff #wow {
28 |     border: 5px #ffffff;
29 |     background: url("wow");
30 |   }
31 | </style>
32 | 
33 | -------------------------------------------------------------------------------- /testdata/JSON.sublime-syntax: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | name: JSON 4 | file_extensions: 5 | - json 6 | - sublime-settings 7 | - sublime-menu 8 | - sublime-keymap 9 | - sublime-mousemap 10 | - sublime-theme 11 | - sublime-build 12 | - sublime-project 13 | - sublime-completions 14 | - sublime-commands 15 | - sublime-macro 16 | - sublime-color-scheme 17 | - ipynb 18 | - Pipfile.lock 19 | scope: source.json 20 | contexts: 21 | prototype: 22 | - include: comments 23 | main: 24 | - include: value 25 | value: 26 | - include: constant 27 | - include: number 28 | - include: string 29 | - include: array 30 | - include: object 31 | array: 32 | - match: '\[' 33 | scope: punctuation.section.sequence.begin.json 34 | push: 35 | - meta_scope: meta.sequence.json 36 | - match: '\]' 37 | scope: punctuation.section.sequence.end.json 38 | pop: true 39 | - include: value 40 | - match: "," 41 | scope: punctuation.separator.sequence.json 42 | - match: '[^\s\]]' 43 | scope: invalid.illegal.expected-sequence-separator.json 44 | comments: 45 | - match: /\*\*(?!/) 46 | scope: punctuation.definition.comment.json 47 | push: 48 | - meta_scope: comment.block.documentation.json 49 | - meta_include_prototype: false 50 | - match: \*/ 51 | pop: true 52 | - match: ^\s*(\*)(?!/) 53 | captures: 54 | 1: punctuation.definition.comment.json 55 | - match: /\* 56 | scope: punctuation.definition.comment.json 57 | push: 58 | - meta_scope: comment.block.json 59 | - meta_include_prototype: false 60 | - match: \*/ 61 | pop: true 62 | - match: (//).*$\n? 63 | scope: comment.line.double-slash.js 64 | captures: 65 | 1: punctuation.definition.comment.json 66 | constant: 67 | - match: \b(?:true|false|null)\b 68 | scope: constant.language.json 69 | number: 70 | # handles integer and decimal numbers 71 | - match: -?(?:0|[1-9]\d*)(?:(?:(\.)\d+)(?:[eE][-+]?\d+)?|(?:[eE][-+]?\d+)) 72 | scope: constant.numeric.float.decimal.json 73 | captures: 74 | 1: punctuation.separator.decimal.json 75 | - match: -?(?:0|[1-9]\d*) 76 | scope: constant.numeric.integer.decimal.json 77 | object: 78 | # a JSON object 79 | - match: '\{' 80 | scope: punctuation.section.mapping.begin.json 81 | push: 82 | - meta_scope: meta.mapping.json 83 | - match: '\}' 84 | scope: punctuation.section.mapping.end.json 85 | pop: true 86 | - match: '"' 87 | scope: punctuation.definition.string.begin.json 88 | push: 89 | - clear_scopes: 1 90 | - meta_scope: meta.mapping.key.json string.quoted.double.json 91 | - meta_include_prototype: false 92 | - include: inside-string 93 | - match: ":" 94 | scope: punctuation.separator.mapping.key-value.json 95 | push: 96 | - match: ',|\s?(?=\})' 97 | scope: invalid.illegal.expected-mapping-value.json 98 | pop: true 99 | - match: (?=\S) 100 | set: 101 | - clear_scopes: 1 102 | - meta_scope: meta.mapping.value.json 103 | - include: value 104 | - match: '' 105 | set: 106 | - match: ',' 107 | scope: punctuation.separator.mapping.pair.json 108 | pop: true 109 | - match: \s*(?=\}) 110 | pop: true 111 | - match: \s(?!/[/*])(?=[^\s,])|[^\s,] 112 | scope: invalid.illegal.expected-mapping-separator.json 113 | pop: true 114 | - match: '[^\s\}]' 115 | scope: invalid.illegal.expected-mapping-key.json 116 | string: 117 | - match: '"' 118 | scope: punctuation.definition.string.begin.json 119 | push: inside-string 120 | inside-string: 121 | - meta_scope: string.quoted.double.json 122 | - meta_include_prototype: false 123 | - match: '"' 124 | scope: punctuation.definition.string.end.json 125 | pop: true 126 | - include: string-escape 127 | - match: $\n? 128 | scope: invalid.illegal.unclosed-string.json 129 | pop: true 130 | string-escape: 131 | - match: |- 132 | (?x: # turn on extended mode 133 | \\ # a literal backslash 134 | (?: # ...followed by... 135 | ["\\/bfnrt] # one of these characters 136 | | # ...or... 137 | u # a u 138 | [0-9a-fA-F]{4} # and four hex digits 139 | ) 140 | ) 141 | scope: constant.character.escape.json 142 | - match: \\. 143 | scope: invalid.illegal.unrecognized-string-escape.json 144 | -------------------------------------------------------------------------------- /examples/syncat.rs: -------------------------------------------------------------------------------- 1 | use getopts::Options; 2 | use std::borrow::Cow; 3 | use std::io::BufRead; 4 | use std::path::Path; 5 | use syntect::dumps::{dump_to_file, from_dump_file}; 6 | use syntect::easy::HighlightFile; 7 | use syntect::highlighting::{Style, Theme, ThemeSet}; 8 | use syntect::parsing::SyntaxSet; 9 | use syntect::util::as_24_bit_terminal_escaped; 10 | 11 | fn load_theme(tm_file: &str, enable_caching: bool) -> Theme { 12 | let tm_path = Path::new(tm_file); 13 | 14 | if enable_caching { 15 | let tm_cache = tm_path.with_extension("tmdump"); 16 | 17 | if tm_cache.exists() { 18 | from_dump_file(tm_cache).unwrap() 19 | } else { 20 | let theme = ThemeSet::get_theme(tm_path).unwrap(); 21 | dump_to_file(&theme, tm_cache).unwrap(); 22 | theme 23 | } 24 | } else { 25 | ThemeSet::get_theme(tm_path).unwrap() 26 | } 27 | } 28 | 29 | fn main() { 30 | let args: Vec = std::env::args().collect(); 31 | let mut opts = Options::new(); 32 | opts.optflag("l", "list-file-types", "Lists supported file types"); 33 | opts.optflag( 34 | "L", 35 | "list-embedded-themes", 36 | "Lists themes present in the executable", 37 | ); 38 | opts.optopt("t", "theme-file", "THEME_FILE", "Theme file to use. May be a path, or an embedded theme. Embedded themes will take precendence. Default: base16-ocean.dark"); 39 | opts.optopt( 40 | "s", 41 | "extra-syntaxes", 42 | "SYNTAX_FOLDER", 43 | "Additional folder to search for .sublime-syntax files in.", 44 | ); 45 | opts.optflag( 46 | "e", 47 | "no-default-syntaxes", 48 | "Doesn't load default syntaxes, intended for use with --extra-syntaxes.", 49 | ); 50 | opts.optflag( 51 | "n", 52 | "no-newlines", 53 | "Uses the no newlines versions of syntaxes and dumps.", 54 | ); 55 | opts.optflag("c", "cache-theme", "Cache the parsed theme file."); 56 | 57 | let matches = match opts.parse(&args[1..]) { 58 | Ok(m) => m, 59 | Err(f) => { 60 | panic!("{}", f.to_string()) 61 | } 62 | }; 63 | 64 | let no_newlines = matches.opt_present("no-newlines"); 65 | let mut ss = if matches.opt_present("no-default-syntaxes") { 66 | SyntaxSet::new() 67 | } else if no_newlines { 68 | SyntaxSet::load_defaults_nonewlines() 69 | } else { 70 | SyntaxSet::load_defaults_newlines() 71 | }; 72 | 73 | if let Some(folder) = matches.opt_str("extra-syntaxes") { 74 | let mut builder = ss.into_builder(); 75 | builder.add_from_folder(folder, !no_newlines).unwrap(); 76 | ss = builder.build(); 77 | } 78 | 79 | let ts = ThemeSet::load_defaults(); 80 | 81 | if matches.opt_present("list-file-types") { 82 | println!("Supported file types:"); 83 | 84 | for sd in ss.syntaxes() { 85 | println!("- {} (.{})", sd.name, sd.file_extensions.join(", .")); 86 | } 87 | } else if matches.opt_present("list-embedded-themes") { 88 | println!("Embedded themes:"); 89 | 90 | for t in ts.themes.keys() { 91 | println!("- {}", t); 92 | } 93 | } else if matches.free.is_empty() { 94 | let brief = format!("USAGE: {} [options] FILES", args[0]); 95 | println!("{}", opts.usage(&brief)); 96 | } else { 97 | let theme_file: String = matches 98 | .opt_str("theme-file") 99 | .unwrap_or_else(|| "base16-ocean.dark".to_string()); 100 | 101 | let theme = ts 102 | .themes 103 | .get(&theme_file) 104 | .map(Cow::Borrowed) 105 | .unwrap_or_else(|| { 106 | Cow::Owned(load_theme(&theme_file, matches.opt_present("cache-theme"))) 107 | }); 108 | 109 | for src in &matches.free[..] { 110 | if matches.free.len() > 1 { 111 | println!("==> {} <==", src); 112 | } 113 | 114 | let mut highlighter = HighlightFile::new(src, &ss, &theme).unwrap(); 115 | 116 | // We use read_line instead of `for line in highlighter.reader.lines()` because that 117 | // doesn't return strings with a `\n`, and including the `\n` gets us more robust highlighting. 118 | // See the documentation for `SyntaxSetBuilder::add_from_folder`. 119 | // It also allows re-using the line buffer, which should be a tiny bit faster. 120 | let mut line = String::new(); 121 | while highlighter.reader.read_line(&mut line).unwrap() > 0 { 122 | if no_newlines && line.ends_with('\n') { 123 | let _ = line.pop(); 124 | } 125 | 126 | { 127 | let regions: Vec<(Style, &str)> = highlighter 128 | .highlight_lines 129 | .highlight_line(&line, &ss) 130 | .unwrap(); 131 | print!("{}", as_24_bit_terminal_escaped(®ions[..], true)); 132 | } 133 | line.clear(); 134 | 135 | if no_newlines { 136 | println!(); 137 | } 138 | } 139 | 140 | // Clear the formatting 141 | println!("\x1b[0m"); 142 | } 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /DESIGN.md: -------------------------------------------------------------------------------- 1 | # Optimization/Design notes 2 | 3 | This is my scratch pad for optimization ideas. Some of this I will implement, some I have implemented, some are just speculative. 4 | 5 | # Scopes 6 | 7 | ## Representation ideas: 8 | 9 | - Normal arrays of strings 10 | - array of 32-bit or 64-bit atoms (maybe using Servo's atom library) 11 | - Atoms packed into one or two u64s 12 | - fast equality checking 13 | - potentially fast prefix checking 14 | - needs unsafe code 15 | 16 | ## Potential packings: 17 | 18 | - variable width atoms, either 7 bits and a tag bit for top 128 or 13 bits and 3 tagging bits for rest 19 | - can fit all but 33 of the scopes present 20 | - tagged pointer (taking advantage of alignment), either a pointer to a slow path, or the first 4 bits set then a packed representation, one of others mentioned 21 | - 6 10-bit atoms referencing unique things by position (see by-position stats below) 22 | - 5 11-bit atoms and one 8-bit one for the first atom (2^11 = 2048, 2^8 = 256), one remaining bit for tag marker 23 | 24 | ## Stats: 25 | 26 | - 7000 scopes referenced in sublime, 3537 unique ones, all stats after this are based on non-unique data 27 | - all but 33 scopes in default packages could fit in 64 with combination 8bit or 16bit atom encoding 28 | - there are only 1219 unique atoms in the default package set 29 | - the top 128 atoms make up ~90% of all unique atoms referenced in syntax files 30 | - there are 26 unique first atoms, 145 unique last atoms 31 | - every position (1st atom, 2nd atom, ...) has under 878 possibilities, only 2nd,3rd and 4th have >256 32 | - 99.8% of scopes have 6 or fewer atoms, 97% have 5 or fewer, 70% have 4 or fewer 33 | - for unique scopes: {2=>81, 4=>1752, 3=>621, 5=>935, 7=>8, 6=>140} ----> 95% of uniques <= 6 34 | - for non-unique scopes: {2=>125, 4=>3383, 3=>1505, 5=>1891, 7=>9, 6=>202} 35 | 36 | # Checking prefix 37 | 38 | operation: `fn extent_matched(potential_prefix: Scope, s: Scope) -> u8` 39 | idea: any differences are beyond the length of the prefix. 40 | figure this out by xor and then ctz/clz then a compare to the length (however that works). 41 | 42 | ```bash 43 | XXXXYYYY00000000 # prefix 44 | XXXXYYYYZZZZ0000 # testee 45 | 00000000ZZZZ0000 # = xored 46 | 47 | XXXXYYYYQQQQ0000 # non-prefix 48 | XXXXYYYYZZZZ0000 # testee 49 | 00000000GGGG0000 # = xored 50 | 51 | XXXXQQQQ00000000 # non-prefix 52 | XXXXYYYYZZZZ0000 # testee 53 | 0000BBBBZZZZ0000 # = xored 54 | ``` 55 | 56 | # Parsing 57 | 58 | * Problem: need to reduce number of regex search calls 59 | * Solution: cache better 60 | 61 | ## Stats 62 | 63 | ```bash 64 | # On stats branch 65 | $cargo run --release --example syncat testdata/jquery.js | grep cmiss | wc -l 66 | Running `target/release/examples/syncat testdata/jquery.js` 67 | 61266 68 | $cargo run --release --example syncat testdata/jquery.js | grep ptoken | wc -l 69 | Compiling syntect v0.1.0 (file:///Users/tristan/Box/Dev/Projects/syntect) 70 | Running `target/release/examples/syncat testdata/jquery.js` 71 | 98714 72 | $wc -l testdata/jquery.js 73 | 9210 testdata/jquery.js 74 | $cargo run --release --example syncat testdata/jquery.js | grep cclear | wc -l 75 | Compiling syntect v0.1.0 (file:///Users/tristan/Box/Dev/Projects/syntect) 76 | Running `target/release/examples/syncat testdata/jquery.js` 77 | 71302 78 | $cargo run --release --example syncat testdata/jquery.js | grep freshcachetoken | wc -l 79 | Compiling syntect v0.1.0 (file:///Users/tristan/Box/Dev/Projects/syntect) 80 | Running `target/release/examples/syncat testdata/jquery.js` 81 | 80512 82 | # On stats-2 branch 83 | $cargo run --example syncat testdata/jquery.js | grep cachehit | wc -l 84 | Running `target/debug/examples/syncat testdata/jquery.js` 85 | 527774 86 | $cargo run --example syncat testdata/jquery.js | grep regsearch | wc -l 87 | Running `target/debug/examples/syncat testdata/jquery.js` 88 | 2862948 89 | $cargo run --example syncat testdata/jquery.js | grep regmatch | wc -l 90 | Compiling syntect v0.6.0 (file:///Users/tristan/Box/Dev/Projects/syntect) 91 | Running `target/debug/examples/syncat testdata/jquery.js` 92 | 296127 93 | $cargo run --example syncat testdata/jquery.js | grep leastmatch | wc -l 94 | Compiling syntect v0.6.0 (file:///Users/tristan/Box/Dev/Projects/syntect) 95 | Running `target/debug/examples/syncat testdata/jquery.js` 96 | 137842 97 | # With search caching 98 | $cargo run --example syncat testdata/jquery.js | grep searchcached | wc -l 99 | Compiling syntect v0.6.0 (file:///Users/tristan/Box/Dev/Projects/syntect) 100 | Running `target/debug/examples/syncat testdata/jquery.js` 101 | 2440527 102 | $cargo run --example syncat testdata/jquery.js | grep regsearch | wc -l 103 | Running `target/debug/examples/syncat testdata/jquery.js` 104 | 950195 105 | ``` 106 | 107 | Average unique regexes per line is 87.58, average non-unique is regsearch/lines = 317 108 | 109 | Ideally we should have only a couple fresh cache searches per line, not `~10` like the stats show (freshcachetoken/linecount). 110 | 111 | In a fantabulous world these stats mean a possible 10x speed improvement, but since caching does have a cost and we can't always cache it likely will be nice but not that high. 112 | 113 | ## Issues 114 | 115 | - Stack transitions always bust cache, even when for example JS just pushes another group 116 | - Doesn't cache actual matches, only if it matched or not 117 | 118 | ## Attacks 119 | 120 | - cache based on actual context, only search if it is a prototype we haven't searched before 121 | - hash maps based on casting RC ref to pointer and hashing? (there is a Hash impl for pointers) 122 | - for new searches, store matched regexes for context in BTreeMap like textmate 123 | - for subsequent tokens in same context, just pop off btreemap and re-search if before curpos 124 | - cache per Regex 125 | -------------------------------------------------------------------------------- /src/highlighting/theme.rs: -------------------------------------------------------------------------------- 1 | // Code based on https://github.com/defuz/sublimate/blob/master/src/core/syntax/theme.rs 2 | // released under the MIT license by @defuz 3 | use super::selector::*; 4 | use super::style::*; 5 | use serde_derive::{Deserialize, Serialize}; 6 | 7 | /// A theme parsed from a `.tmTheme` file. 8 | /// 9 | /// This contains additional fields useful for a theme list as well as `settings` for styling your editor. 10 | #[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)] 11 | pub struct Theme { 12 | pub name: Option, 13 | pub author: Option, 14 | /// External settings for the editor using this theme 15 | pub settings: ThemeSettings, 16 | /// The styling rules for the viewed text 17 | pub scopes: Vec, 18 | } 19 | 20 | /// Properties for styling the UI of a text editor 21 | /// 22 | /// This essentially consists of the styles that aren't directly applied to the text being viewed. 23 | /// `ThemeSettings` are intended to be used to make the UI of the editor match the styling of the 24 | /// text itself. 25 | #[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)] 26 | pub struct ThemeSettings { 27 | /// The default color for text. 28 | pub foreground: Option, 29 | /// The default backgound color of the view. 30 | pub background: Option, 31 | /// Color of the caret. 32 | pub caret: Option, 33 | /// Color of the line the caret is in. 34 | /// Only used when the `highlight_line` setting is set to `true`. 35 | pub line_highlight: Option, 36 | 37 | /// The color to use for the squiggly underline drawn under misspelled words. 38 | pub misspelling: Option, 39 | /// The color of the border drawn around the viewport area of the minimap. 40 | /// Only used when the `draw_minimap_border` setting is enabled. 41 | pub minimap_border: Option, 42 | /// A color made available for use by the theme. 43 | pub accent: Option, 44 | /// CSS passed to popups. 45 | pub popup_css: Option, 46 | /// CSS passed to phantoms. 47 | pub phantom_css: Option, 48 | 49 | /// Color of bracketed sections of text when the caret is in a bracketed section. 50 | /// Only applied when the `match_brackets` setting is set to `true`. 51 | pub bracket_contents_foreground: Option, 52 | /// Controls certain options when the caret is in a bracket section. 53 | /// Only applied when the `match_brackets` setting is set to `true`. 54 | pub bracket_contents_options: Option, 55 | /// Foreground color of the brackets when the caret is next to a bracket. 56 | /// Only applied when the `match_brackets` setting is set to `true`. 57 | pub brackets_foreground: Option, 58 | /// Background color of the brackets when the caret is next to a bracket. 59 | /// Only applied when the `match_brackets` setting is set to `true`. 60 | pub brackets_background: Option, 61 | /// Controls certain options when the caret is next to a bracket. 62 | /// Only applied when the `match_brackets` setting is set to `true`. 63 | pub brackets_options: Option, 64 | 65 | /// Color of tags when the caret is next to a tag. 66 | /// Only used when the `match_tags` setting is set to `true`. 67 | pub tags_foreground: Option, 68 | /// Controls certain options when the caret is next to a tag. 69 | /// Only applied when the `match_tags` setting is set to `true`. 70 | pub tags_options: Option, 71 | 72 | /// The border color for "other" matches. 73 | pub highlight: Option, 74 | /// Background color of regions matching the current search. 75 | pub find_highlight: Option, 76 | /// Text color of regions matching the current search. 77 | pub find_highlight_foreground: Option, 78 | 79 | /// Background color of the gutter. 80 | pub gutter: Option, 81 | /// Foreground color of the gutter. 82 | pub gutter_foreground: Option, 83 | 84 | /// The background color of selected text. 85 | pub selection: Option, 86 | /// A color that will override the scope-based text color of the selection. 87 | pub selection_foreground: Option, 88 | 89 | /// Color of the selection regions border. 90 | pub selection_border: Option, 91 | /// The background color of a selection in a view that is not currently focused. 92 | pub inactive_selection: Option, 93 | /// A color that will override the scope-based text color of the selection 94 | /// in a view that is not currently focused. 95 | pub inactive_selection_foreground: Option, 96 | 97 | /// Color of the guides displayed to indicate nesting levels. 98 | pub guide: Option, 99 | /// Color of the guide lined up with the caret. 100 | /// Only applied if the `indent_guide_options` setting is set to `draw_active`. 101 | pub active_guide: Option, 102 | /// Color of the current guide’s parent guide level. 103 | /// Only used if the `indent_guide_options` setting is set to `draw_active`. 104 | pub stack_guide: Option, 105 | 106 | /// The color of the shadow used when a text area can be horizontally scrolled. 107 | pub shadow: Option, 108 | } 109 | 110 | /// A component of a theme meant to highlight a specific thing (e.g string literals) 111 | /// in a certain way. 112 | #[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)] 113 | pub struct ThemeItem { 114 | /// Target scope name. 115 | pub scope: ScopeSelectors, 116 | /// The style to use for this component 117 | pub style: StyleModifier, 118 | } 119 | 120 | #[derive(Debug, Default, Clone, PartialEq, Serialize, Deserialize)] 121 | pub enum UnderlineOption { 122 | #[default] 123 | None, 124 | Underline, 125 | StippledUnderline, 126 | SquigglyUnderline, 127 | } 128 | -------------------------------------------------------------------------------- /examples/synstats.rs: -------------------------------------------------------------------------------- 1 | //! An example of using syntect for code analysis. 2 | //! Basically a fancy lines of code count program that works 3 | //! for all languages Sublime Text supports and also counts things 4 | //! like number of functions and number of types defined. 5 | //! 6 | //! Another thing it does that other line count programs can't always 7 | //! do is properly count comments in embedded syntaxes. For example 8 | //! JS, CSS and Ruby comments embedded in ERB files. 9 | use syntect::easy::ScopeRegionIterator; 10 | use syntect::highlighting::{ScopeSelector, ScopeSelectors}; 11 | use syntect::parsing::{ParseState, ScopeStack, ScopeStackOp, SyntaxSet}; 12 | 13 | use std::fs::File; 14 | use std::io::{BufRead, BufReader}; 15 | use std::path::Path; 16 | use std::str::FromStr; 17 | use walkdir::{DirEntry, WalkDir}; 18 | 19 | #[derive(Debug)] 20 | struct Selectors { 21 | comment: ScopeSelector, 22 | doc_comment: ScopeSelectors, 23 | function: ScopeSelector, 24 | types: ScopeSelectors, 25 | } 26 | 27 | impl Default for Selectors { 28 | fn default() -> Selectors { 29 | Selectors { 30 | comment: ScopeSelector::from_str("comment - comment.block.attribute").unwrap(), 31 | doc_comment: ScopeSelectors::from_str( 32 | "comment.line.documentation, comment.block.documentation", 33 | ) 34 | .unwrap(), 35 | function: ScopeSelector::from_str("entity.name.function").unwrap(), 36 | types: ScopeSelectors::from_str( 37 | "entity.name.class, entity.name.struct, entity.name.enum, entity.name.type", 38 | ) 39 | .unwrap(), 40 | } 41 | } 42 | } 43 | 44 | #[derive(Debug, Default)] 45 | struct Stats { 46 | selectors: Selectors, 47 | files: usize, 48 | functions: usize, 49 | types: usize, 50 | lines: usize, 51 | chars: usize, 52 | code_lines: usize, 53 | comment_lines: usize, 54 | comment_chars: usize, 55 | comment_words: usize, 56 | doc_comment_lines: usize, 57 | doc_comment_words: usize, 58 | } 59 | 60 | fn print_stats(stats: &Stats) { 61 | println!(); 62 | println!("################## Stats ###################"); 63 | println!("File count: {:>6}", stats.files); 64 | println!("Total characters: {:>6}", stats.chars); 65 | println!(); 66 | println!( 67 | "Function count: {:>6}", 68 | stats.functions 69 | ); 70 | println!("Type count (structs, enums, classes): {:>6}", stats.types); 71 | println!(); 72 | println!( 73 | "Code lines (traditional SLOC): {:>6}", 74 | stats.code_lines 75 | ); 76 | println!("Total lines (w/ comments & blanks): {:>6}", stats.lines); 77 | println!( 78 | "Comment lines (comment but no code): {:>6}", 79 | stats.comment_lines 80 | ); 81 | println!( 82 | "Blank lines (lines-blank-comment): {:>6}", 83 | stats.lines - stats.code_lines - stats.comment_lines 84 | ); 85 | println!(); 86 | println!( 87 | "Lines with a documentation comment: {:>6}", 88 | stats.doc_comment_lines 89 | ); 90 | println!( 91 | "Total words written in doc comments: {:>6}", 92 | stats.doc_comment_words 93 | ); 94 | println!( 95 | "Total words written in all comments: {:>6}", 96 | stats.comment_words 97 | ); 98 | println!( 99 | "Characters of comment: {:>6}", 100 | stats.comment_chars 101 | ); 102 | } 103 | 104 | fn is_ignored(entry: &DirEntry) -> bool { 105 | entry 106 | .file_name() 107 | .to_str() 108 | .map(|s| s.starts_with('.') && s.len() > 1 || s.ends_with(".md")) 109 | .unwrap_or(false) 110 | } 111 | 112 | fn count_line( 113 | ops: &[(usize, ScopeStackOp)], 114 | line: &str, 115 | stack: &mut ScopeStack, 116 | stats: &mut Stats, 117 | ) { 118 | stats.lines += 1; 119 | 120 | let mut line_has_comment = false; 121 | let mut line_has_doc_comment = false; 122 | let mut line_has_code = false; 123 | for (s, op) in ScopeRegionIterator::new(ops, line) { 124 | stack.apply(op).unwrap(); 125 | if s.is_empty() { 126 | // in this case we don't care about blank tokens 127 | continue; 128 | } 129 | if stats 130 | .selectors 131 | .comment 132 | .does_match(stack.as_slice()) 133 | .is_some() 134 | { 135 | let words = s 136 | .split_whitespace() 137 | .filter(|w| { 138 | w.chars() 139 | .all(|c| c.is_alphanumeric() || c == '.' || c == '\'') 140 | }) 141 | .count(); 142 | if stats 143 | .selectors 144 | .doc_comment 145 | .does_match(stack.as_slice()) 146 | .is_some() 147 | { 148 | line_has_doc_comment = true; 149 | stats.doc_comment_words += words; 150 | } 151 | stats.comment_chars += s.len(); 152 | stats.comment_words += words; 153 | line_has_comment = true; 154 | } else if !s.chars().all(|c| c.is_whitespace()) { 155 | line_has_code = true; 156 | } 157 | if stats 158 | .selectors 159 | .function 160 | .does_match(stack.as_slice()) 161 | .is_some() 162 | { 163 | stats.functions += 1; 164 | } 165 | if stats.selectors.types.does_match(stack.as_slice()).is_some() { 166 | stats.types += 1; 167 | } 168 | } 169 | if line_has_comment && !line_has_code { 170 | stats.comment_lines += 1; 171 | } 172 | if line_has_doc_comment { 173 | stats.doc_comment_lines += 1; 174 | } 175 | if line_has_code { 176 | stats.code_lines += 1; 177 | } 178 | } 179 | 180 | fn count(ss: &SyntaxSet, path: &Path, stats: &mut Stats) { 181 | let syntax = match ss.find_syntax_for_file(path).unwrap_or(None) { 182 | Some(syntax) => syntax, 183 | None => return, 184 | }; 185 | stats.files += 1; 186 | let mut state = ParseState::new(syntax); 187 | 188 | let f = File::open(path).unwrap(); 189 | let mut reader = BufReader::new(f); 190 | let mut line = String::new(); 191 | let mut stack = ScopeStack::new(); 192 | while reader.read_line(&mut line).unwrap() > 0 { 193 | { 194 | let ops = state.parse_line(&line, ss).unwrap(); 195 | stats.chars += line.len(); 196 | count_line(&ops, &line, &mut stack, stats); 197 | } 198 | line.clear(); 199 | } 200 | } 201 | 202 | fn main() { 203 | let ss = SyntaxSet::load_defaults_newlines(); // note we load the version with newlines 204 | 205 | let args: Vec = std::env::args().collect(); 206 | let path = if args.len() < 2 { "." } else { &args[1] }; 207 | 208 | println!("################## Files ###################"); 209 | let mut stats = Stats::default(); 210 | let walker = WalkDir::new(path).into_iter(); 211 | for entry in walker.filter_entry(|e| !is_ignored(e)) { 212 | let entry = entry.unwrap(); 213 | if entry.file_type().is_file() { 214 | println!("{}", entry.path().display()); 215 | count(&ss, entry.path(), &mut stats); 216 | } 217 | } 218 | 219 | // println!("{:?}", stats); 220 | print_stats(&stats); 221 | } 222 | -------------------------------------------------------------------------------- /src/parsing/regex.rs: -------------------------------------------------------------------------------- 1 | use once_cell::sync::OnceCell; 2 | use serde::de::{Deserialize, Deserializer}; 3 | use serde::ser::{Serialize, Serializer}; 4 | use std::error::Error; 5 | 6 | /// An abstraction for regex patterns. 7 | /// 8 | /// * Allows swapping out the regex implementation because it's only in this module. 9 | /// * Makes regexes serializable and deserializable using just the pattern string. 10 | /// * Lazily compiles regexes on first use to improve initialization time. 11 | #[derive(Debug)] 12 | pub struct Regex { 13 | regex_str: String, 14 | regex: OnceCell, 15 | } 16 | 17 | /// A region contains text positions for capture groups in a match result. 18 | #[derive(Clone, Debug, Eq, PartialEq)] 19 | pub struct Region { 20 | region: regex_impl::Region, 21 | } 22 | 23 | impl Regex { 24 | /// Create a new regex from the pattern string. 25 | /// 26 | /// Note that the regex compilation happens on first use, which is why this method does not 27 | /// return a result. 28 | pub fn new(regex_str: String) -> Self { 29 | Self { 30 | regex_str, 31 | regex: OnceCell::new(), 32 | } 33 | } 34 | 35 | /// Check whether the pattern compiles as a valid regex or not. 36 | pub fn try_compile(regex_str: &str) -> Option> { 37 | regex_impl::Regex::new(regex_str).err() 38 | } 39 | 40 | /// Return the regex pattern. 41 | pub fn regex_str(&self) -> &str { 42 | &self.regex_str 43 | } 44 | 45 | /// Check if the regex matches the given text. 46 | pub fn is_match(&self, text: &str) -> bool { 47 | self.regex().is_match(text) 48 | } 49 | 50 | /// Search for the pattern in the given text from begin/end positions. 51 | /// 52 | /// If a region is passed, it is used for storing match group positions. The argument allows 53 | /// the [`Region`] to be reused between searches, which makes a significant performance 54 | /// difference. 55 | /// 56 | /// [`Region`]: struct.Region.html 57 | pub fn search( 58 | &self, 59 | text: &str, 60 | begin: usize, 61 | end: usize, 62 | region: Option<&mut Region>, 63 | ) -> bool { 64 | self.regex() 65 | .search(text, begin, end, region.map(|r| &mut r.region)) 66 | } 67 | 68 | fn regex(&self) -> ®ex_impl::Regex { 69 | self.regex.get_or_init(|| { 70 | regex_impl::Regex::new(&self.regex_str).expect("regex string should be pre-tested") 71 | }) 72 | } 73 | } 74 | 75 | impl Clone for Regex { 76 | fn clone(&self) -> Self { 77 | Regex { 78 | regex_str: self.regex_str.clone(), 79 | regex: OnceCell::new(), 80 | } 81 | } 82 | } 83 | 84 | impl PartialEq for Regex { 85 | fn eq(&self, other: &Regex) -> bool { 86 | self.regex_str == other.regex_str 87 | } 88 | } 89 | 90 | impl Eq for Regex {} 91 | 92 | impl Serialize for Regex { 93 | fn serialize(&self, serializer: S) -> Result 94 | where 95 | S: Serializer, 96 | { 97 | serializer.serialize_str(&self.regex_str) 98 | } 99 | } 100 | 101 | impl<'de> Deserialize<'de> for Regex { 102 | fn deserialize(deserializer: D) -> Result 103 | where 104 | D: Deserializer<'de>, 105 | { 106 | let regex_str = String::deserialize(deserializer)?; 107 | Ok(Regex::new(regex_str)) 108 | } 109 | } 110 | 111 | impl Region { 112 | pub fn new() -> Self { 113 | Self { 114 | region: regex_impl::new_region(), 115 | } 116 | } 117 | 118 | /// Get the start/end positions of the capture group with given index. 119 | /// 120 | /// If there is no match for that group or the index does not correspond to a group, `None` is 121 | /// returned. The index 0 returns the whole match. 122 | pub fn pos(&self, index: usize) -> Option<(usize, usize)> { 123 | self.region.pos(index) 124 | } 125 | } 126 | 127 | impl Default for Region { 128 | fn default() -> Self { 129 | Self::new() 130 | } 131 | } 132 | 133 | #[cfg(feature = "regex-onig")] 134 | mod regex_impl { 135 | pub use onig::Region; 136 | use onig::{MatchParam, RegexOptions, SearchOptions, Syntax}; 137 | use std::error::Error; 138 | 139 | #[derive(Debug)] 140 | pub struct Regex { 141 | regex: onig::Regex, 142 | } 143 | 144 | pub fn new_region() -> Region { 145 | Region::with_capacity(8) 146 | } 147 | 148 | impl Regex { 149 | pub fn new(regex_str: &str) -> Result> { 150 | let result = onig::Regex::with_options( 151 | regex_str, 152 | RegexOptions::REGEX_OPTION_CAPTURE_GROUP, 153 | Syntax::default(), 154 | ); 155 | match result { 156 | Ok(regex) => Ok(Regex { regex }), 157 | Err(error) => Err(Box::new(error)), 158 | } 159 | } 160 | 161 | pub fn is_match(&self, text: &str) -> bool { 162 | self.regex 163 | .match_with_options(text, 0, SearchOptions::SEARCH_OPTION_NONE, None) 164 | .is_some() 165 | } 166 | 167 | pub fn search( 168 | &self, 169 | text: &str, 170 | begin: usize, 171 | end: usize, 172 | region: Option<&mut Region>, 173 | ) -> bool { 174 | let matched = self.regex.search_with_param( 175 | text, 176 | begin, 177 | end, 178 | SearchOptions::SEARCH_OPTION_NONE, 179 | region, 180 | MatchParam::default(), 181 | ); 182 | 183 | // If there's an error during search, treat it as non-matching. 184 | // For example, in case of catastrophic backtracking, onig should 185 | // fail with a "retry-limit-in-match over" error eventually. 186 | matches!(matched, Ok(Some(_))) 187 | } 188 | } 189 | } 190 | 191 | // If both regex-fancy and regex-onig are requested, this condition makes regex-onig win. 192 | #[cfg(all(feature = "regex-fancy", not(feature = "regex-onig")))] 193 | mod regex_impl { 194 | use std::error::Error; 195 | 196 | #[derive(Debug)] 197 | pub struct Regex { 198 | regex: fancy_regex::Regex, 199 | } 200 | 201 | #[derive(Clone, Debug, Eq, PartialEq)] 202 | pub struct Region { 203 | positions: Vec>, 204 | } 205 | 206 | pub fn new_region() -> Region { 207 | Region { 208 | positions: Vec::with_capacity(8), 209 | } 210 | } 211 | 212 | impl Regex { 213 | pub fn new(regex_str: &str) -> Result> { 214 | let result = fancy_regex::RegexBuilder::new(regex_str) 215 | .oniguruma_mode(true) 216 | .build(); 217 | match result { 218 | Ok(regex) => Ok(Regex { regex }), 219 | Err(error) => Err(Box::new(error)), 220 | } 221 | } 222 | 223 | pub fn is_match(&self, text: &str) -> bool { 224 | // Errors are treated as non-matches 225 | self.regex.is_match(text).unwrap_or(false) 226 | } 227 | 228 | pub fn search( 229 | &self, 230 | text: &str, 231 | begin: usize, 232 | end: usize, 233 | region: Option<&mut Region>, 234 | ) -> bool { 235 | // If there's an error during search, treat it as non-matching. 236 | // For example, in case of catastrophic backtracking, fancy-regex should 237 | // fail with an error eventually. 238 | if let Ok(Some(captures)) = self.regex.captures_from_pos(&text[..end], begin) { 239 | if let Some(region) = region { 240 | region.init_from_captures(&captures); 241 | } 242 | true 243 | } else { 244 | false 245 | } 246 | } 247 | } 248 | 249 | impl Region { 250 | fn init_from_captures(&mut self, captures: &fancy_regex::Captures) { 251 | self.positions.clear(); 252 | for i in 0..captures.len() { 253 | let pos = captures.get(i).map(|m| (m.start(), m.end())); 254 | self.positions.push(pos); 255 | } 256 | } 257 | 258 | pub fn pos(&self, i: usize) -> Option<(usize, usize)> { 259 | if i < self.positions.len() { 260 | self.positions[i] 261 | } else { 262 | None 263 | } 264 | } 265 | } 266 | } 267 | 268 | #[cfg(test)] 269 | mod tests { 270 | use super::*; 271 | 272 | #[test] 273 | fn caches_compiled_regex() { 274 | let regex = Regex::new(String::from(r"\w+")); 275 | 276 | assert!(regex.regex.get().is_none()); 277 | assert!(regex.is_match("test")); 278 | assert!(regex.regex.get().is_some()); 279 | } 280 | 281 | #[test] 282 | fn serde_as_string() { 283 | let pattern: Regex = serde_json::from_str("\"just a string\"").unwrap(); 284 | assert_eq!(pattern.regex_str(), "just a string"); 285 | let back_to_str = serde_json::to_string(&pattern).unwrap(); 286 | assert_eq!(back_to_str, "\"just a string\""); 287 | } 288 | } 289 | -------------------------------------------------------------------------------- /src/dumps.rs: -------------------------------------------------------------------------------- 1 | //! Methods for dumping serializable structs to a compressed binary format, 2 | //! used to allow fast startup times 3 | //! 4 | //! Currently syntect serializes [`SyntaxSet`] structs with [`dump_to_uncompressed_file`] 5 | //! into `.packdump` files and likewise [`ThemeSet`] structs to `.themedump` files with [`dump_to_file`]. 6 | //! 7 | //! You can use these methods to manage your own caching of compiled syntaxes and 8 | //! themes. And even your own `serde::Serialize` structures if you want to 9 | //! be consistent with your format. 10 | //! 11 | //! [`SyntaxSet`]: ../parsing/struct.SyntaxSet.html 12 | //! [`dump_to_uncompressed_file`]: fn.dump_to_uncompressed_file.html 13 | //! [`ThemeSet`]: ../highlighting/struct.ThemeSet.html 14 | //! [`dump_to_file`]: fn.dump_to_file.html 15 | #[cfg(feature = "default-themes")] 16 | use crate::highlighting::ThemeSet; 17 | #[cfg(feature = "default-syntaxes")] 18 | use crate::parsing::SyntaxSet; 19 | #[cfg(feature = "dump-load")] 20 | use bincode::deserialize_from; 21 | #[cfg(feature = "dump-create")] 22 | use bincode::serialize_into; 23 | use bincode::Result; 24 | #[cfg(feature = "dump-load")] 25 | use flate2::bufread::ZlibDecoder; 26 | #[cfg(feature = "dump-create")] 27 | use flate2::write::ZlibEncoder; 28 | #[cfg(feature = "dump-create")] 29 | use flate2::Compression; 30 | #[cfg(feature = "dump-load")] 31 | use serde::de::DeserializeOwned; 32 | #[cfg(feature = "dump-create")] 33 | use serde::ser::Serialize; 34 | use std::fs::File; 35 | #[cfg(feature = "dump-load")] 36 | use std::io::BufRead; 37 | #[cfg(feature = "dump-create")] 38 | use std::io::{BufWriter, Write}; 39 | use std::path::Path; 40 | 41 | /// Dumps an object to the given writer in a compressed binary format 42 | /// 43 | /// The writer is encoded with the `bincode` crate and compressed with `flate2`. 44 | #[cfg(feature = "dump-create")] 45 | pub fn dump_to_writer(to_dump: &T, output: W) -> Result<()> { 46 | serialize_to_writer_impl(to_dump, output, true) 47 | } 48 | 49 | /// Dumps an object to a binary array in the same format as [`dump_to_writer`] 50 | /// 51 | /// [`dump_to_writer`]: fn.dump_to_writer.html 52 | #[cfg(feature = "dump-create")] 53 | pub fn dump_binary(o: &T) -> Vec { 54 | let mut v = Vec::new(); 55 | dump_to_writer(o, &mut v).unwrap(); 56 | v 57 | } 58 | 59 | /// Dumps an encodable object to a file at a given path, in the same format as [`dump_to_writer`] 60 | /// 61 | /// If a file already exists at that path it will be overwritten. The files created are encoded with 62 | /// the `bincode` crate and then compressed with the `flate2` crate. 63 | /// 64 | /// [`dump_to_writer`]: fn.dump_to_writer.html 65 | #[cfg(feature = "dump-create")] 66 | pub fn dump_to_file>(o: &T, path: P) -> Result<()> { 67 | let out = BufWriter::new(File::create(path)?); 68 | dump_to_writer(o, out) 69 | } 70 | 71 | /// A helper function for decoding and decompressing data from a reader 72 | #[cfg(feature = "dump-load")] 73 | pub fn from_reader(input: R) -> Result { 74 | deserialize_from_reader_impl(input, true) 75 | } 76 | 77 | /// Returns a fully loaded object from a binary dump. 78 | /// 79 | /// This function panics if the dump is invalid. 80 | #[cfg(feature = "dump-load")] 81 | pub fn from_binary(v: &[u8]) -> T { 82 | from_reader(v).unwrap() 83 | } 84 | 85 | /// Returns a fully loaded object from a binary dump file. 86 | #[cfg(feature = "dump-load")] 87 | pub fn from_dump_file>(path: P) -> Result { 88 | let contents = std::fs::read(path)?; 89 | from_reader(&contents[..]) 90 | } 91 | 92 | /// To be used when serializing a [`SyntaxSet`] to a file. A [`SyntaxSet`] 93 | /// itself shall not be compressed, because the data for its lazy-loaded 94 | /// syntaxes are already compressed. Compressing another time just results in 95 | /// bad performance. 96 | #[cfg(feature = "dump-create")] 97 | pub fn dump_to_uncompressed_file>(o: &T, path: P) -> Result<()> { 98 | let out = BufWriter::new(File::create(path)?); 99 | serialize_to_writer_impl(o, out, false) 100 | } 101 | 102 | /// To be used when deserializing a [`SyntaxSet`] that was previously written to 103 | /// file using [dump_to_uncompressed_file]. 104 | #[cfg(feature = "dump-load")] 105 | pub fn from_uncompressed_dump_file>(path: P) -> Result { 106 | let contents = std::fs::read(path)?; 107 | deserialize_from_reader_impl(&contents[..], false) 108 | } 109 | 110 | /// To be used when deserializing a [`SyntaxSet`] from raw data, for example 111 | /// data that has been embedded in your own binary with the [`include_bytes!`] 112 | /// macro. 113 | #[cfg(feature = "dump-load")] 114 | pub fn from_uncompressed_data(v: &[u8]) -> Result { 115 | deserialize_from_reader_impl(v, false) 116 | } 117 | 118 | /// Private low level helper function used to implement the public API. 119 | #[cfg(feature = "dump-create")] 120 | fn serialize_to_writer_impl( 121 | to_dump: &T, 122 | output: W, 123 | use_compression: bool, 124 | ) -> Result<()> { 125 | if use_compression { 126 | let mut encoder = std::io::BufWriter::new(ZlibEncoder::new(output, Compression::best())); 127 | serialize_into(&mut encoder, to_dump) 128 | } else { 129 | serialize_into(output, to_dump) 130 | } 131 | } 132 | 133 | /// Private low level helper function used to implement the public API. 134 | #[cfg(feature = "dump-load")] 135 | fn deserialize_from_reader_impl( 136 | input: R, 137 | use_compression: bool, 138 | ) -> Result { 139 | if use_compression { 140 | let mut decoder = ZlibDecoder::new(input); 141 | deserialize_from(&mut decoder) 142 | } else { 143 | deserialize_from(input) 144 | } 145 | } 146 | 147 | #[cfg(feature = "default-syntaxes")] 148 | impl SyntaxSet { 149 | /// Instantiates a new syntax set from a binary dump of Sublime Text's default open source 150 | /// syntax definitions. 151 | /// 152 | /// These dumps are included in this library's binary for convenience. 153 | /// 154 | /// This method loads the version for parsing line strings with no `\n` characters at the end. 155 | /// If you're able to efficiently include newlines at the end of strings, use 156 | /// [`load_defaults_newlines`] since it works better. See [`SyntaxSetBuilder::add_from_folder`] 157 | /// for more info on this issue. 158 | /// 159 | /// This is the recommended way of creating a syntax set for non-advanced use cases. It is also 160 | /// significantly faster than loading the YAML files. 161 | /// 162 | /// Note that you can load additional syntaxes after doing this. If you want you can even use 163 | /// the fact that SyntaxDefinitions are serializable with the bincode crate to cache dumps of 164 | /// additional syntaxes yourself. 165 | /// 166 | /// [`load_defaults_newlines`]: #method.load_defaults_nonewlines 167 | /// [`SyntaxSetBuilder::add_from_folder`]: struct.SyntaxSetBuilder.html#method.add_from_folder 168 | pub fn load_defaults_nonewlines() -> SyntaxSet { 169 | #[cfg(feature = "metadata")] 170 | { 171 | let mut ps: SyntaxSet = 172 | from_uncompressed_data(include_bytes!("../assets/default_nonewlines.packdump")) 173 | .unwrap(); 174 | let metadata = from_binary(include_bytes!("../assets/default_metadata.packdump")); 175 | ps.metadata = metadata; 176 | ps 177 | } 178 | #[cfg(not(feature = "metadata"))] 179 | { 180 | from_uncompressed_data(include_bytes!("../assets/default_nonewlines.packdump")).unwrap() 181 | } 182 | } 183 | 184 | /// Same as [`load_defaults_nonewlines`] but for parsing line strings with newlines at the end. 185 | /// 186 | /// These are separate methods because thanks to linker garbage collection, only the serialized 187 | /// dumps for the method(s) you call will be included in the binary (each is ~200kb for now). 188 | /// 189 | /// [`load_defaults_nonewlines`]: #method.load_defaults_nonewlines 190 | pub fn load_defaults_newlines() -> SyntaxSet { 191 | #[cfg(feature = "metadata")] 192 | { 193 | let mut ps: SyntaxSet = 194 | from_uncompressed_data(include_bytes!("../assets/default_newlines.packdump")) 195 | .unwrap(); 196 | let metadata = from_binary(include_bytes!("../assets/default_metadata.packdump")); 197 | ps.metadata = metadata; 198 | ps 199 | } 200 | #[cfg(not(feature = "metadata"))] 201 | { 202 | from_uncompressed_data(include_bytes!("../assets/default_newlines.packdump")).unwrap() 203 | } 204 | } 205 | } 206 | 207 | #[cfg(feature = "default-themes")] 208 | impl ThemeSet { 209 | /// Loads the set of default themes 210 | /// Currently includes (these are the keys for the map): 211 | /// 212 | /// - `base16-ocean.dark`,`base16-eighties.dark`,`base16-mocha.dark`,`base16-ocean.light` 213 | /// - `InspiredGitHub` from [here](https://github.com/sethlopezme/InspiredGitHub.tmtheme) 214 | /// - `Solarized (dark)` and `Solarized (light)` 215 | pub fn load_defaults() -> ThemeSet { 216 | from_binary(include_bytes!("../assets/default.themedump")) 217 | } 218 | } 219 | 220 | #[cfg(test)] 221 | mod tests { 222 | #[cfg(all( 223 | feature = "yaml-load", 224 | feature = "dump-create", 225 | feature = "dump-load", 226 | feature = "parsing" 227 | ))] 228 | #[test] 229 | fn can_dump_and_load() { 230 | use super::*; 231 | use crate::utils::testdata; 232 | 233 | let ss = &*testdata::PACKAGES_SYN_SET; 234 | 235 | let bin = dump_binary(&ss); 236 | println!("{:?}", bin.len()); 237 | let ss2: SyntaxSet = from_binary(&bin[..]); 238 | assert_eq!(ss.syntaxes().len(), ss2.syntaxes().len()); 239 | } 240 | 241 | #[cfg(all(feature = "yaml-load", feature = "dump-create", feature = "dump-load"))] 242 | #[test] 243 | fn dump_is_deterministic() { 244 | use super::*; 245 | use crate::parsing::SyntaxSetBuilder; 246 | use crate::utils::testdata; 247 | 248 | let ss1 = &*testdata::PACKAGES_SYN_SET; 249 | let bin1 = dump_binary(&ss1); 250 | 251 | let mut builder2 = SyntaxSetBuilder::new(); 252 | builder2 253 | .add_from_folder("testdata/Packages", false) 254 | .unwrap(); 255 | let ss2 = builder2.build(); 256 | let bin2 = dump_binary(&ss2); 257 | // This is redundant, but assert_eq! can be really slow on a large 258 | // vector, so check the length first to fail faster. 259 | assert_eq!(bin1.len(), bin2.len()); 260 | assert_eq!(bin1, bin2); 261 | } 262 | 263 | #[cfg(feature = "default-themes")] 264 | #[test] 265 | fn has_default_themes() { 266 | use crate::highlighting::ThemeSet; 267 | let themes = ThemeSet::load_defaults(); 268 | assert!(themes.themes.len() > 4); 269 | } 270 | } 271 | -------------------------------------------------------------------------------- /src/parsing/syntax_definition.rs: -------------------------------------------------------------------------------- 1 | //! Data structures for representing syntax definitions 2 | //! 3 | //! Everything here is public becaues I want this library to be useful in super integrated cases 4 | //! like text editors and I have no idea what kind of monkeying you might want to do with the data. 5 | //! Perhaps parsing your own syntax format into this data structure? 6 | 7 | use super::regex::{Regex, Region}; 8 | use super::{scope::*, ParsingError}; 9 | use crate::parsing::syntax_set::SyntaxSet; 10 | use regex_syntax::escape; 11 | use serde::ser::{Serialize, Serializer}; 12 | use serde_derive::{Deserialize, Serialize}; 13 | use std::collections::{BTreeMap, HashMap}; 14 | use std::hash::Hash; 15 | 16 | pub type CaptureMapping = Vec<(usize, Vec)>; 17 | 18 | /// An opaque ID for a [`Context`]. 19 | #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)] 20 | pub struct ContextId { 21 | /// Index into [`SyntaxSet::syntaxes`] 22 | pub(crate) syntax_index: usize, 23 | 24 | /// Index into [`crate::parsing::LazyContexts::contexts`] for the [`Self::syntax_index`] syntax 25 | pub(crate) context_index: usize, 26 | } 27 | 28 | /// The main data structure representing a syntax definition loaded from a 29 | /// `.sublime-syntax` file 30 | /// 31 | /// You'll probably only need these as references to be passed around to parsing code. 32 | /// 33 | /// Some useful public fields are the `name` field which is a human readable name to display in 34 | /// syntax lists, and the `hidden` field which means hide this syntax from any lists because it is 35 | /// for internal use. 36 | #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] 37 | pub struct SyntaxDefinition { 38 | pub name: String, 39 | pub file_extensions: Vec, 40 | pub scope: Scope, 41 | pub first_line_match: Option, 42 | pub hidden: bool, 43 | #[serde(serialize_with = "ordered_map")] 44 | pub variables: HashMap, 45 | #[serde(serialize_with = "ordered_map")] 46 | pub contexts: HashMap, 47 | } 48 | 49 | #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] 50 | pub struct Context { 51 | pub meta_scope: Vec, 52 | pub meta_content_scope: Vec, 53 | /// This being set false in the syntax file implies this field being set false, 54 | /// but it can also be set falso for contexts that don't include the prototype for other reasons 55 | pub meta_include_prototype: bool, 56 | pub clear_scopes: Option, 57 | /// This is filled in by the linker at link time 58 | /// for contexts that have `meta_include_prototype==true` 59 | /// and are not included from the prototype. 60 | pub prototype: Option, 61 | pub uses_backrefs: bool, 62 | 63 | pub patterns: Vec, 64 | } 65 | 66 | impl Context { 67 | pub fn new(meta_include_prototype: bool) -> Context { 68 | Context { 69 | meta_scope: Vec::new(), 70 | meta_content_scope: Vec::new(), 71 | meta_include_prototype, 72 | clear_scopes: None, 73 | uses_backrefs: false, 74 | patterns: Vec::new(), 75 | prototype: None, 76 | } 77 | } 78 | } 79 | 80 | #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] 81 | pub enum Pattern { 82 | Match(MatchPattern), 83 | Include(ContextReference), 84 | } 85 | 86 | /// Used to iterate over all the match patterns in a context 87 | /// 88 | /// Basically walks the tree of patterns and include directives in the correct order. 89 | #[derive(Debug)] 90 | pub struct MatchIter<'a> { 91 | syntax_set: &'a SyntaxSet, 92 | ctx_stack: Vec<&'a Context>, 93 | index_stack: Vec, 94 | } 95 | 96 | #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] 97 | pub struct MatchPattern { 98 | pub has_captures: bool, 99 | pub regex: Regex, 100 | pub scope: Vec, 101 | pub captures: Option, 102 | pub operation: MatchOperation, 103 | pub with_prototype: Option, 104 | } 105 | 106 | #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] 107 | #[non_exhaustive] 108 | pub enum ContextReference { 109 | #[non_exhaustive] 110 | Named(String), 111 | #[non_exhaustive] 112 | ByScope { 113 | scope: Scope, 114 | sub_context: Option, 115 | /// `true` if this reference by scope is part of an `embed` for which 116 | /// there is an `escape`. In other words a reference for a context for 117 | /// which there "always is a way out". Enables falling back to `Plain 118 | /// Text` syntax in case the referenced scope is missing. 119 | with_escape: bool, 120 | }, 121 | #[non_exhaustive] 122 | File { 123 | name: String, 124 | sub_context: Option, 125 | /// Same semantics as for [`Self::ByScope::with_escape`]. 126 | with_escape: bool, 127 | }, 128 | #[non_exhaustive] 129 | Inline(String), 130 | #[non_exhaustive] 131 | Direct(ContextId), 132 | } 133 | 134 | #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] 135 | pub enum MatchOperation { 136 | Push(Vec), 137 | Set(Vec), 138 | Pop(usize), 139 | None, 140 | } 141 | 142 | impl<'a> Iterator for MatchIter<'a> { 143 | type Item = (&'a Context, usize); 144 | 145 | fn next(&mut self) -> Option<(&'a Context, usize)> { 146 | loop { 147 | if self.ctx_stack.is_empty() { 148 | return None; 149 | } 150 | // uncomment for debugging infinite recursion 151 | // println!("{:?}", self.index_stack); 152 | // use std::thread::sleep_ms; 153 | // sleep_ms(500); 154 | let last_index = self.ctx_stack.len() - 1; 155 | let context = self.ctx_stack[last_index]; 156 | let index = self.index_stack[last_index]; 157 | self.index_stack[last_index] = index + 1; 158 | if index < context.patterns.len() { 159 | match context.patterns[index] { 160 | Pattern::Match(_) => { 161 | return Some((context, index)); 162 | } 163 | Pattern::Include(ref ctx_ref) => { 164 | let ctx_ptr = match *ctx_ref { 165 | ContextReference::Direct(ref context_id) => { 166 | self.syntax_set.get_context(context_id).unwrap() 167 | } 168 | _ => return self.next(), // skip this and move onto the next one 169 | }; 170 | self.ctx_stack.push(ctx_ptr); 171 | self.index_stack.push(0); 172 | } 173 | } 174 | } else { 175 | self.ctx_stack.pop(); 176 | self.index_stack.pop(); 177 | } 178 | } 179 | } 180 | } 181 | 182 | /// Returns an iterator over all the match patterns in this context. 183 | /// 184 | /// It recursively follows include directives. Can only be run on contexts that have already been 185 | /// linked up. 186 | pub fn context_iter<'a>(syntax_set: &'a SyntaxSet, context: &'a Context) -> MatchIter<'a> { 187 | MatchIter { 188 | syntax_set, 189 | ctx_stack: vec![context], 190 | index_stack: vec![0], 191 | } 192 | } 193 | 194 | impl Context { 195 | /// Returns the match pattern at an index 196 | pub fn match_at(&self, index: usize) -> Result<&MatchPattern, ParsingError> { 197 | match self.patterns[index] { 198 | Pattern::Match(ref match_pat) => Ok(match_pat), 199 | _ => Err(ParsingError::BadMatchIndex(index)), 200 | } 201 | } 202 | } 203 | 204 | impl ContextReference { 205 | /// find the pointed to context 206 | pub fn resolve<'a>(&self, syntax_set: &'a SyntaxSet) -> Result<&'a Context, ParsingError> { 207 | match *self { 208 | ContextReference::Direct(ref context_id) => syntax_set.get_context(context_id), 209 | _ => Err(ParsingError::UnresolvedContextReference(self.clone())), 210 | } 211 | } 212 | 213 | /// get the context ID this reference points to 214 | pub fn id(&self) -> Result { 215 | match *self { 216 | ContextReference::Direct(ref context_id) => Ok(*context_id), 217 | _ => Err(ParsingError::UnresolvedContextReference(self.clone())), 218 | } 219 | } 220 | } 221 | 222 | pub(crate) fn substitute_backrefs_in_regex(regex_str: &str, substituter: F) -> String 223 | where 224 | F: Fn(usize) -> Option, 225 | { 226 | let mut reg_str = String::with_capacity(regex_str.len()); 227 | 228 | let mut last_was_escape = false; 229 | for c in regex_str.chars() { 230 | if last_was_escape && c.is_ascii_digit() { 231 | let val = c.to_digit(10).unwrap() as usize; 232 | if let Some(sub) = substituter(val) { 233 | reg_str.push_str(&sub); 234 | } 235 | } else if last_was_escape { 236 | reg_str.push('\\'); 237 | reg_str.push(c); 238 | } else if c != '\\' { 239 | reg_str.push(c); 240 | } 241 | 242 | last_was_escape = c == '\\' && !last_was_escape; 243 | } 244 | if last_was_escape { 245 | reg_str.push('\\'); 246 | } 247 | reg_str 248 | } 249 | 250 | impl MatchPattern { 251 | pub fn new( 252 | has_captures: bool, 253 | regex_str: String, 254 | scope: Vec, 255 | captures: Option, 256 | operation: MatchOperation, 257 | with_prototype: Option, 258 | ) -> MatchPattern { 259 | MatchPattern { 260 | has_captures, 261 | regex: Regex::new(regex_str), 262 | scope, 263 | captures, 264 | operation, 265 | with_prototype, 266 | } 267 | } 268 | 269 | /// Used by the parser to compile a regex which needs to reference 270 | /// regions from another matched pattern. 271 | pub fn regex_with_refs(&self, region: &Region, text: &str) -> Regex { 272 | let new_regex = substitute_backrefs_in_regex(self.regex.regex_str(), |i| { 273 | region.pos(i).map(|(start, end)| escape(&text[start..end])) 274 | }); 275 | 276 | Regex::new(new_regex) 277 | } 278 | 279 | pub fn regex(&self) -> &Regex { 280 | &self.regex 281 | } 282 | } 283 | 284 | /// Serialize the provided map in natural key order, so that it's deterministic when dumping. 285 | pub(crate) fn ordered_map(map: &HashMap, serializer: S) -> Result 286 | where 287 | S: Serializer, 288 | K: Eq + Hash + Ord + Serialize, 289 | V: Serialize, 290 | { 291 | let ordered: BTreeMap<_, _> = map.iter().collect(); 292 | ordered.serialize(serializer) 293 | } 294 | 295 | #[cfg(test)] 296 | mod tests { 297 | use super::*; 298 | 299 | #[test] 300 | fn can_compile_refs() { 301 | let pat = MatchPattern { 302 | has_captures: true, 303 | regex: Regex::new(r"lol \\ \2 \1 '\9' \wz".into()), 304 | scope: vec![], 305 | captures: None, 306 | operation: MatchOperation::None, 307 | with_prototype: None, 308 | }; 309 | let r = Regex::new(r"(\\\[\]\(\))(b)(c)(d)(e)".into()); 310 | let s = r"\[]()bcde"; 311 | let mut region = Region::new(); 312 | let matched = r.search(s, 0, s.len(), Some(&mut region)); 313 | assert!(matched); 314 | 315 | let regex_with_refs = pat.regex_with_refs(®ion, s); 316 | assert_eq!(regex_with_refs.regex_str(), r"lol \\ b \\\[\]\(\) '' \wz"); 317 | } 318 | } 319 | -------------------------------------------------------------------------------- /src/highlighting/theme_load.rs: -------------------------------------------------------------------------------- 1 | // Code based on https://github.com/defuz/sublimate/blob/master/src/core/syntax/theme.rs 2 | // released under the MIT license by @defuz 3 | 4 | use std::str::FromStr; 5 | 6 | use super::selector::*; 7 | use super::settings::{ParseSettings, Settings}; 8 | use super::style::*; 9 | use super::theme::*; 10 | use crate::parsing::ParseScopeError; 11 | 12 | use self::ParseThemeError::*; 13 | 14 | #[derive(Debug, thiserror::Error)] 15 | #[non_exhaustive] 16 | pub enum ParseThemeError { 17 | #[error("Incorrect underline option")] 18 | IncorrectUnderlineOption, 19 | #[error("Incorrect font style: {0}")] 20 | IncorrectFontStyle(String), 21 | #[error("Incorrect color")] 22 | IncorrectColor, 23 | #[error("Incorrect syntax")] 24 | IncorrectSyntax, 25 | #[error("Incorrect settings")] 26 | IncorrectSettings, 27 | #[error("Undefined settings")] 28 | UndefinedSettings, 29 | #[error("Undefined scope settings: {0}")] 30 | UndefinedScopeSettings(String), 31 | #[error("Color sheme scope is not object")] 32 | ColorShemeScopeIsNotObject, 33 | #[error("Color sheme settings is not object")] 34 | ColorShemeSettingsIsNotObject, 35 | #[error("Scope selector is not string: {0}")] 36 | ScopeSelectorIsNotString(String), 37 | #[error("Duplicate settings")] 38 | DuplicateSettings, 39 | #[error("Scope parse error: {0}")] 40 | ScopeParse(#[from] ParseScopeError), 41 | } 42 | 43 | impl FromStr for UnderlineOption { 44 | type Err = ParseThemeError; 45 | 46 | fn from_str(s: &str) -> Result { 47 | Ok(match s { 48 | "underline" => UnderlineOption::Underline, 49 | "stippled_underline" => UnderlineOption::StippledUnderline, 50 | "squiggly_underline" => UnderlineOption::SquigglyUnderline, 51 | _ => return Err(IncorrectUnderlineOption), 52 | }) 53 | } 54 | } 55 | 56 | impl ParseSettings for UnderlineOption { 57 | type Error = ParseThemeError; 58 | 59 | fn parse_settings(settings: Settings) -> Result { 60 | match settings { 61 | Settings::String(value) => UnderlineOption::from_str(&value), 62 | _ => Err(IncorrectUnderlineOption), 63 | } 64 | } 65 | } 66 | 67 | impl FromStr for FontStyle { 68 | type Err = ParseThemeError; 69 | 70 | fn from_str(s: &str) -> Result { 71 | let mut font_style = FontStyle::empty(); 72 | for i in s.split_whitespace() { 73 | font_style.insert(match i { 74 | "bold" => FontStyle::BOLD, 75 | "underline" => FontStyle::UNDERLINE, 76 | "italic" => FontStyle::ITALIC, 77 | "normal" | "regular" => FontStyle::empty(), 78 | s => return Err(IncorrectFontStyle(s.to_owned())), 79 | }) 80 | } 81 | Ok(font_style) 82 | } 83 | } 84 | 85 | impl ParseSettings for FontStyle { 86 | type Error = ParseThemeError; 87 | 88 | fn parse_settings(settings: Settings) -> Result { 89 | match settings { 90 | Settings::String(value) => FontStyle::from_str(&value), 91 | c => Err(IncorrectFontStyle(c.to_string())), 92 | } 93 | } 94 | } 95 | 96 | impl FromStr for Color { 97 | type Err = ParseThemeError; 98 | 99 | fn from_str(s: &str) -> Result { 100 | let mut chars = s.chars(); 101 | if chars.next() != Some('#') { 102 | return Err(IncorrectColor); 103 | } 104 | let mut d = Vec::new(); 105 | for char in chars { 106 | d.push(char.to_digit(16).ok_or(IncorrectColor)? as u8); 107 | } 108 | Ok(match d.len() { 109 | 3 => Color { 110 | r: d[0], 111 | g: d[1], 112 | b: d[2], 113 | a: 255, 114 | }, 115 | 6 => Color { 116 | r: d[0] * 16 + d[1], 117 | g: d[2] * 16 + d[3], 118 | b: d[4] * 16 + d[5], 119 | a: 255, 120 | }, 121 | 8 => Color { 122 | r: d[0] * 16 + d[1], 123 | g: d[2] * 16 + d[3], 124 | b: d[4] * 16 + d[5], 125 | a: d[6] * 16 + d[7], 126 | }, 127 | _ => return Err(IncorrectColor), 128 | }) 129 | } 130 | } 131 | 132 | impl ParseSettings for Color { 133 | type Error = ParseThemeError; 134 | 135 | fn parse_settings(settings: Settings) -> Result { 136 | match settings { 137 | Settings::String(value) => Color::from_str(&value), 138 | _ => Err(IncorrectColor), 139 | } 140 | } 141 | } 142 | 143 | impl ParseSettings for StyleModifier { 144 | type Error = ParseThemeError; 145 | 146 | fn parse_settings(settings: Settings) -> Result { 147 | let mut obj = match settings { 148 | Settings::Object(obj) => obj, 149 | _ => return Err(ColorShemeScopeIsNotObject), 150 | }; 151 | let font_style = match obj.remove("fontStyle") { 152 | Some(Settings::String(value)) => Some(FontStyle::from_str(&value)?), 153 | None => None, 154 | Some(c) => return Err(IncorrectFontStyle(c.to_string())), 155 | }; 156 | let foreground = match obj.remove("foreground") { 157 | Some(Settings::String(value)) => Some(Color::from_str(&value)?), 158 | None => None, 159 | _ => return Err(IncorrectColor), 160 | }; 161 | let background = match obj.remove("background") { 162 | Some(Settings::String(value)) => Some(Color::from_str(&value)?), 163 | None => None, 164 | _ => return Err(IncorrectColor), 165 | }; 166 | 167 | Ok(StyleModifier { 168 | foreground, 169 | background, 170 | font_style, 171 | }) 172 | } 173 | } 174 | 175 | impl ParseSettings for ThemeItem { 176 | type Error = ParseThemeError; 177 | 178 | fn parse_settings(settings: Settings) -> Result { 179 | let mut obj = match settings { 180 | Settings::Object(obj) => obj, 181 | _ => return Err(ColorShemeScopeIsNotObject), 182 | }; 183 | let scope = match obj.remove("scope") { 184 | Some(Settings::String(value)) => ScopeSelectors::from_str(&value)?, 185 | _ => return Err(ScopeSelectorIsNotString(format!("{:?}", obj))), 186 | }; 187 | let style = match obj.remove("settings") { 188 | Some(settings) => StyleModifier::parse_settings(settings)?, 189 | None => return Err(IncorrectSettings), 190 | }; 191 | Ok(ThemeItem { scope, style }) 192 | } 193 | } 194 | 195 | impl ParseSettings for ThemeSettings { 196 | type Error = ParseThemeError; 197 | 198 | fn parse_settings(json: Settings) -> Result { 199 | let mut settings = ThemeSettings::default(); 200 | 201 | let obj = match json { 202 | Settings::Object(obj) => obj, 203 | _ => return Err(ColorShemeSettingsIsNotObject), 204 | }; 205 | 206 | for (key, value) in obj { 207 | match &key[..] { 208 | "foreground" => settings.foreground = Color::parse_settings(value).ok(), 209 | "background" => settings.background = Color::parse_settings(value).ok(), 210 | "caret" => settings.caret = Color::parse_settings(value).ok(), 211 | "lineHighlight" => settings.line_highlight = Color::parse_settings(value).ok(), 212 | "misspelling" => settings.misspelling = Color::parse_settings(value).ok(), 213 | "minimapBorder" => settings.minimap_border = Color::parse_settings(value).ok(), 214 | "accent" => settings.accent = Color::parse_settings(value).ok(), 215 | 216 | "popupCss" => settings.popup_css = value.as_str().map(|s| s.to_owned()), 217 | "phantomCss" => settings.phantom_css = value.as_str().map(|s| s.to_owned()), 218 | 219 | "bracketContentsForeground" => { 220 | settings.bracket_contents_foreground = Color::parse_settings(value).ok() 221 | } 222 | "bracketContentsOptions" => { 223 | settings.bracket_contents_options = UnderlineOption::parse_settings(value).ok() 224 | } 225 | "bracketsForeground" => { 226 | settings.brackets_foreground = Color::parse_settings(value).ok() 227 | } 228 | "bracketsBackground" => { 229 | settings.brackets_background = Color::parse_settings(value).ok() 230 | } 231 | "bracketsOptions" => { 232 | settings.brackets_options = UnderlineOption::parse_settings(value).ok() 233 | } 234 | "tagsForeground" => settings.tags_foreground = Color::parse_settings(value).ok(), 235 | "tagsOptions" => { 236 | settings.tags_options = UnderlineOption::parse_settings(value).ok() 237 | } 238 | "highlight" => settings.highlight = Color::parse_settings(value).ok(), 239 | "findHighlight" => settings.find_highlight = Color::parse_settings(value).ok(), 240 | "findHighlightForeground" => { 241 | settings.find_highlight_foreground = Color::parse_settings(value).ok() 242 | } 243 | "gutter" => settings.gutter = Color::parse_settings(value).ok(), 244 | "gutterForeground" => { 245 | settings.gutter_foreground = Color::parse_settings(value).ok() 246 | } 247 | "selection" => settings.selection = Color::parse_settings(value).ok(), 248 | "selectionForeground" => { 249 | settings.selection_foreground = Color::parse_settings(value).ok() 250 | } 251 | "selectionBorder" => settings.selection_border = Color::parse_settings(value).ok(), 252 | "inactiveSelection" => { 253 | settings.inactive_selection = Color::parse_settings(value).ok() 254 | } 255 | "inactiveSelectionForeground" => { 256 | settings.inactive_selection_foreground = Color::parse_settings(value).ok() 257 | } 258 | "guide" => settings.guide = Color::parse_settings(value).ok(), 259 | "activeGuide" => settings.active_guide = Color::parse_settings(value).ok(), 260 | "stackGuide" => settings.stack_guide = Color::parse_settings(value).ok(), 261 | "shadow" => settings.shadow = Color::parse_settings(value).ok(), 262 | _ => (), // E.g. "shadowWidth" and "invisibles" are ignored 263 | } 264 | } 265 | Ok(settings) 266 | } 267 | } 268 | 269 | impl ParseSettings for Theme { 270 | type Error = ParseThemeError; 271 | 272 | fn parse_settings(settings: Settings) -> Result { 273 | let mut obj = match settings { 274 | Settings::Object(obj) => obj, 275 | _ => return Err(IncorrectSyntax), 276 | }; 277 | let name = match obj.remove("name") { 278 | Some(Settings::String(name)) => Some(name), 279 | None => None, 280 | _ => return Err(IncorrectSyntax), 281 | }; 282 | let author = match obj.remove("author") { 283 | Some(Settings::String(author)) => Some(author), 284 | None => None, 285 | _ => return Err(IncorrectSyntax), 286 | }; 287 | let items = match obj.remove("settings") { 288 | Some(Settings::Array(items)) => items, 289 | _ => return Err(IncorrectSyntax), 290 | }; 291 | let mut iter = items.into_iter(); 292 | let mut settings = match iter.next() { 293 | Some(Settings::Object(mut obj)) => match obj.remove("settings") { 294 | Some(settings) => ThemeSettings::parse_settings(settings)?, 295 | None => return Err(UndefinedSettings), 296 | }, 297 | _ => return Err(UndefinedSettings), 298 | }; 299 | if let Some(Settings::Object(obj)) = obj.remove("gutterSettings") { 300 | for (key, value) in obj { 301 | let color = Color::parse_settings(value).ok(); 302 | match &key[..] { 303 | "background" => settings.gutter = settings.gutter.or(color), 304 | "foreground" => { 305 | settings.gutter_foreground = settings.gutter_foreground.or(color) 306 | } 307 | _ => (), 308 | } 309 | } 310 | } 311 | let mut scopes = Vec::new(); 312 | for json in iter { 313 | // TODO option to disable best effort parsing and bubble up warnings 314 | if let Ok(item) = ThemeItem::parse_settings(json) { 315 | scopes.push(item); 316 | } 317 | } 318 | Ok(Theme { 319 | name, 320 | author, 321 | settings, 322 | scopes, 323 | }) 324 | } 325 | } 326 | -------------------------------------------------------------------------------- /src/util.rs: -------------------------------------------------------------------------------- 1 | //! Convenient helper functions for common use cases: 2 | //! * Printing to terminal 3 | //! * Iterating lines with `\n`s 4 | //! * Modifying ranges of highlighted output 5 | 6 | use crate::highlighting::{Color, Style, StyleModifier}; 7 | #[cfg(feature = "parsing")] 8 | use crate::parsing::ScopeStackOp; 9 | use std::fmt::Write; 10 | use std::ops::Range; 11 | 12 | #[inline] 13 | fn blend_fg_color(fg: Color, bg: Color) -> Color { 14 | if fg.a == 0xff { 15 | return fg; 16 | } 17 | let ratio = fg.a as u32; 18 | let r = (fg.r as u32 * ratio + bg.r as u32 * (255 - ratio)) / 255; 19 | let g = (fg.g as u32 * ratio + bg.g as u32 * (255 - ratio)) / 255; 20 | let b = (fg.b as u32 * ratio + bg.b as u32 * (255 - ratio)) / 255; 21 | Color { 22 | r: r as u8, 23 | g: g as u8, 24 | b: b as u8, 25 | a: 255, 26 | } 27 | } 28 | 29 | /// Formats the styled fragments using 24-bit color terminal escape codes. 30 | /// Meant for debugging and testing. 31 | /// 32 | /// This function is currently fairly inefficient in its use of escape codes. 33 | /// 34 | /// Note that this does not currently ever un-set the color so that the end of a line will also get 35 | /// highlighted with the background. This means if you might want to use `println!("\x1b[0m");` 36 | /// after to clear the coloring. 37 | /// 38 | /// If `bg` is true then the background is also set 39 | pub fn as_24_bit_terminal_escaped(v: &[(Style, &str)], bg: bool) -> String { 40 | let mut s: String = String::new(); 41 | for &(ref style, text) in v.iter() { 42 | if bg { 43 | write!( 44 | s, 45 | "\x1b[48;2;{};{};{}m", 46 | style.background.r, style.background.g, style.background.b 47 | ) 48 | .unwrap(); 49 | } 50 | let fg = blend_fg_color(style.foreground, style.background); 51 | write!(s, "\x1b[38;2;{};{};{}m{}", fg.r, fg.g, fg.b, text).unwrap(); 52 | } 53 | // s.push_str("\x1b[0m"); 54 | s 55 | } 56 | 57 | const LATEX_REPLACE: [(&str, &str); 3] = [("\\", "\\\\"), ("{", "\\{"), ("}", "\\}")]; 58 | 59 | /// Formats the styled fragments using LaTeX textcolor directive. 60 | /// 61 | /// Usage is similar to the `as_24_bit_terminal_escaped` function: 62 | /// 63 | /// ``` 64 | /// use syntect::easy::HighlightLines; 65 | /// use syntect::parsing::SyntaxSet; 66 | /// use syntect::highlighting::{ThemeSet,Style}; 67 | /// use syntect::util::{as_latex_escaped,LinesWithEndings}; 68 | /// 69 | /// // Load these once at the start of your program 70 | /// let ps = SyntaxSet::load_defaults_newlines(); 71 | /// let ts = ThemeSet::load_defaults(); 72 | /// 73 | /// let syntax = ps.find_syntax_by_extension("rs").unwrap(); 74 | /// let s = "pub struct Wow { hi: u64 }\nfn blah() -> u64 {}\n"; 75 | /// 76 | /// let mut h = HighlightLines::new(syntax, &ts.themes["InspiredGitHub"]); 77 | /// for line in LinesWithEndings::from(s) { // LinesWithEndings enables use of newlines mode 78 | /// let ranges: Vec<(Style, &str)> = h.highlight_line(line, &ps).unwrap(); 79 | /// let escaped = as_latex_escaped(&ranges[..]); 80 | /// println!("{}", escaped); 81 | /// } 82 | /// ``` 83 | /// 84 | /// Returned content is intended to be placed inside a fancyvrb 85 | /// Verbatim environment: 86 | /// 87 | /// ```latex 88 | /// \usepackage{fancyvrb} 89 | /// \usepackage{xcolor} 90 | /// % ... 91 | /// % enable comma-separated arguments inside \textcolor 92 | /// \makeatletter 93 | /// \def\verbatim@nolig@list{\do\`\do\<\do\>\do\'\do\-} 94 | /// \makeatother 95 | /// % ... 96 | /// \begin{Verbatim}[commandchars=\\\{\}] 97 | /// % content goes here 98 | /// \end{Verbatim} 99 | /// ``` 100 | /// 101 | /// Background color is ignored. 102 | pub fn as_latex_escaped(v: &[(Style, &str)]) -> String { 103 | let mut s: String = String::new(); 104 | let mut prev_style: Option