├── testdata
    ├── issue30.tex
    ├── issue25.c
    ├── issue28.rs
    ├── clear_scopes_test.asa
    ├── test_first_line.test
    ├── minimized_tests
    │   ├── syntax_test_scalamini.scala
    │   └── syntax_test_aspmini.asp
    ├── issue33.rs
    ├── known_syntest_failures.txt
    ├── known_syntest_failures_fancy.txt
    ├── testing-syntax.testsyntax
    ├── DefaultPackage
    │   ├── Indentation Rules - Comments.tmPreferences
    │   └── Indentation Rules.tmPreferences
    ├── test1.html
    ├── test2.html
    ├── highlight_test.erb
    ├── embed_escape_test.sublime-syntax
    ├── Testing.sublime-syntax
    ├── test5.html
    ├── test4.html
    ├── parser_tests.sublime-syntax
    ├── test3.html
    └── JSON.sublime-syntax
├── scripts
    └── id_rsa.enc
├── .git-blame-ignore-revs
├── codecov.yml
├── assets
    ├── default.themedump
    ├── default_metadata.packdump
    ├── default_newlines.packdump
    └── default_nonewlines.packdump
├── .gitignore
├── src
    ├── highlighting
    │   ├── Readme.md
    │   ├── mod.rs
    │   ├── settings.rs
    │   ├── theme_set.rs
    │   ├── theme.rs
    │   ├── theme_load.rs
    │   └── style.rs
    ├── utils.rs
    ├── parsing
    │   ├── mod.rs
    │   ├── regex.rs
    │   └── syntax_definition.rs
    ├── escape.rs
    ├── lib.rs
    ├── dumps.rs
    ├── util.rs
    └── easy.rs
├── .gitmodules
├── benches
    ├── highlight_utils
    │   └── mod.rs
    ├── utils
    │   └── mod.rs
    ├── load_and_highlight.rs
    ├── parsing.rs
    ├── loading.rs
    └── highlighting.rs
├── tests
    ├── public_api.rs
    └── error_handling.rs
├── examples
    ├── latex-demo.rs
    ├── synhtml.rs
    ├── parsyncat.rs
    ├── gendata.rs
    ├── synhtml-css-classes.rs
    ├── syncat.rs
    ├── synstats.rs
    └── syntest.rs
├── LICENSE.txt
├── .gitattributes
├── Makefile
├── .github
    └── workflows
    │   ├── Release.yml
    │   └── CI.yml
├── Cargo.toml
└── DESIGN.md


/testdata/issue30.tex:
--------------------------------------------------------------------------------
1 | \title{}
2 | 


--------------------------------------------------------------------------------
/testdata/issue25.c:
--------------------------------------------------------------------------------
1 | struct{estruct
2 | 


--------------------------------------------------------------------------------
/testdata/issue28.rs:
--------------------------------------------------------------------------------
1 | use std::fs::File;
2 | use std;
3 | 


--------------------------------------------------------------------------------
/scripts/id_rsa.enc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trishume/syntect/HEAD/scripts/id_rsa.enc


--------------------------------------------------------------------------------
/.git-blame-ignore-revs:
--------------------------------------------------------------------------------
1 | # initial cargo fmt
2 | cc41c3803b20b79147fa606f950658bc12e50dc2 
3 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | # Make codecov not add verbose comments to pull requests
2 | comment: false
3 | 


--------------------------------------------------------------------------------
/assets/default.themedump:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trishume/syntect/HEAD/assets/default.themedump


--------------------------------------------------------------------------------
/testdata/clear_scopes_test.asa:
--------------------------------------------------------------------------------
1 | Sub wot
2 |   5
3 | %>
4 | <a href="5">Sub</a>
5 | <%=?
6 | 5 "wow"
7 | 


--------------------------------------------------------------------------------
/testdata/test_first_line.test:
--------------------------------------------------------------------------------
1 | #!/usr/bin/ruby
2 | 
3 | def blah
4 |   test = "wow"
5 |   puts test
6 | end
7 | 


--------------------------------------------------------------------------------
/assets/default_metadata.packdump:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trishume/syntect/HEAD/assets/default_metadata.packdump


--------------------------------------------------------------------------------
/assets/default_newlines.packdump:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trishume/syntect/HEAD/assets/default_newlines.packdump


--------------------------------------------------------------------------------
/assets/default_nonewlines.packdump:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trishume/syntect/HEAD/assets/default_nonewlines.packdump


--------------------------------------------------------------------------------
/testdata/minimized_tests/syntax_test_scalamini.scala:
--------------------------------------------------------------------------------
1 | // SYNTAX TEST "Packages/Scala/Scala.sublime-syntax"
2 | 
3 |    class Foo[A](a: Bar)
4 | // ^^^^^^^^^ meta.class.identifier.scala
5 | 


--------------------------------------------------------------------------------
/testdata/issue33.rs:
--------------------------------------------------------------------------------
1 | // issue #33: it prints this line and then hangs on the next one, hang time increases super-linearly with line length
2 | impl ApplicationPreferenceseeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee {
3 | }
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # macOS
 2 | .DS_Store
 3 | Thumbs.db
 4 | 
 5 | # Windows
 6 | [Dd]esktop.ini
 7 | 
 8 | # cargo
 9 | target/
10 | 
11 | # IDEA
12 | .idea/
13 | *.iml
14 | 
15 | # Sublime Text
16 | *.sublime-workspace
17 | 
18 | # VS Code
19 | .vscode/
20 | 


--------------------------------------------------------------------------------
/testdata/known_syntest_failures.txt:
--------------------------------------------------------------------------------
1 | loading syntax definitions from testdata/Packages
2 | FAILED testdata/Packages/C#/tests/syntax_test_Strings.cs: 38
3 | FAILED testdata/Packages/LaTeX/syntax_test_latex.tex: 1
4 | FAILED testdata/Packages/Makefile/syntax_test_makefile.mak: 6
5 | exiting with code 1
6 | 


--------------------------------------------------------------------------------
/testdata/known_syntest_failures_fancy.txt:
--------------------------------------------------------------------------------
1 | loading syntax definitions from testdata/Packages
2 | FAILED testdata/Packages/C#/tests/syntax_test_Strings.cs: 38
3 | FAILED testdata/Packages/LaTeX/syntax_test_latex.tex: 1
4 | FAILED testdata/Packages/Markdown/syntax_test_markdown.md: 11
5 | exiting with code 1
6 | 


--------------------------------------------------------------------------------
/testdata/testing-syntax.testsyntax:
--------------------------------------------------------------------------------
 1 | hi lol
 2 | wow zoom
 3 | html
 4 |   <br style="color: #555;"/>
 5 |   troll wow lol
 6 | htmout
 7 | inline
 8 |   hi zoom lol wow bamf
 9 | inout
10 | troll
11 |   wow lol
12 |   zoom lol bamf doopadoop
13 | out
14 |   zoom lol bamf
15 |   nested
16 |     wow zoom lol bamf doopadoop
17 |   outnested
18 | zout
19 |   zoom lol bamf
20 | 


--------------------------------------------------------------------------------
/testdata/DefaultPackage/Indentation Rules - Comments.tmPreferences:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 3 | <plist version="1.0">
 4 | <dict>
 5 |     <key>scope</key>
 6 |     <string>comment</string>
 7 |     <key>settings</key>
 8 |     <dict>
 9 |         <key>preserveIndent</key>
10 |         <true/>
11 |     </dict>
12 | </dict>
13 | </plist>
14 | 


--------------------------------------------------------------------------------
/testdata/minimized_tests/syntax_test_aspmini.asp:
--------------------------------------------------------------------------------
 1 | ' SYNTAX TEST "Packages/ASP/HTML-ASP.sublime-syntax"
 2 | <%
 3 |  Class TestClass2 Public Sub TestSub () Response.Write("wow") End Sub End Class
 4 | '^^^^^ meta.class.asp meta.class.identifier.asp storage.type.asp
 5 | '      ^ meta.class.asp meta.class.identifier.asp
 6 | '                ^ meta.class.asp meta.class.body.asp
 7 | %>
 8 |  <p>foobar</p>
 9 | '^^^ text.html.asp meta.tag.block.any.html - source.asp.embedded.html
10 | 


--------------------------------------------------------------------------------
/src/highlighting/Readme.md:
--------------------------------------------------------------------------------
 1 | # Attribution
 2 | 
 3 | Much of the code in this module/folder is heavily based on and largely copy-pasted from the the
 4 | [sublimate](https://github.com/defuz/sublimate) project by
 5 | [Ivan Ivashchenko a.k.a @defuz](https://github.com/defuz). The project was released under the MIT license.
 6 | 
 7 | I needed to copy-paste the code here because it required some adaptations to work with the other
 8 | parts of syntect. One example modification is using my bit-packed `Scope` type instead of the
 9 | original string-based one.
10 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "testdata/Packages"]
 2 | 	branch = st3 # TODO: this line should be removed once st4 functionality has been implemented
 3 | 	path = testdata/Packages
 4 | 	url = https://github.com/sublimehq/Packages
 5 | [submodule "testdata/InspiredGitHub.tmtheme"]
 6 | 	path = testdata/InspiredGitHub.tmtheme
 7 | 	url = https://github.com/sethlopezme/InspiredGitHub.tmtheme.git
 8 | [submodule "testdata/Solarized"]
 9 | 	path = testdata/Solarized
10 | 	url = https://github.com/braver/Solarized.git
11 | [submodule "testdata/spacegray"]
12 | 	path = testdata/spacegray
13 | 	url = https://github.com/kkga/spacegray.git
14 | 


--------------------------------------------------------------------------------
/benches/highlight_utils/mod.rs:
--------------------------------------------------------------------------------
 1 | use syntect::easy::HighlightLines;
 2 | use syntect::highlighting::Theme;
 3 | use syntect::parsing::{SyntaxReference, SyntaxSet};
 4 | 
 5 | /// Common helper for benchmarking highlighting.
 6 | pub fn do_highlight(
 7 |     s: &str,
 8 |     syntax_set: &SyntaxSet,
 9 |     syntax: &SyntaxReference,
10 |     theme: &Theme,
11 | ) -> usize {
12 |     let mut h = HighlightLines::new(syntax, theme);
13 |     let mut count = 0;
14 |     for line in s.lines() {
15 |         let regions = h.highlight_line(line, syntax_set).unwrap();
16 |         count += regions.len();
17 |     }
18 |     count
19 | }
20 | 


--------------------------------------------------------------------------------
/testdata/DefaultPackage/Indentation Rules.tmPreferences:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 3 | <plist version="1.0">
 4 | <dict>
 5 | 	<key>scope</key>
 6 | 	<string>source</string>
 7 | 	<key>settings</key>
 8 | 	<dict>
 9 | 		<key>decreaseIndentPattern</key>
10 | 		<string>^(.*\*/)?\s*\}[;\s]*$</string>
11 | 		<key>increaseIndentPattern</key>
12 | 		<string>^.*(\{[^}"']*)$</string>
13 | 		<key>disableIndentNextLinePattern</key>
14 | 		<string>^\s*\{[\]})]*\s*$</string>
15 | 		<key>indentParens</key>
16 | 		<true/>
17 | 	</dict>
18 | </dict>
19 | </plist>
20 | 


--------------------------------------------------------------------------------
/benches/utils/mod.rs:
--------------------------------------------------------------------------------
 1 | /// To be able to keep the same Criterion benchmark names as before (for the
 2 | /// `the --baseline` feature of Criterion) we use one level of indirection to
 3 | /// map file name to file path.
 4 | pub fn get_test_file_path(file: &str) -> &str {
 5 |     match file {
 6 |         "highlight_test.erb" => "testdata/highlight_test.erb",
 7 |         "InspiredGitHub.tmTheme" => "testdata/InspiredGitHub.tmtheme/InspiredGitHub.tmTheme",
 8 |         "Ruby.sublime-syntax" => "testdata/Packages/Ruby/Ruby.sublime-syntax",
 9 |         "jquery.js" => "testdata/jquery.js",
10 |         "parser.rs" => "testdata/parser.rs",
11 |         "scope.rs" => "src/parsing/scope.rs",
12 |         _ => panic!("Unknown test file {}", file),
13 |     }
14 | }
15 | 


--------------------------------------------------------------------------------
/testdata/test1.html:
--------------------------------------------------------------------------------
1 | <span style="background-color:#ffffff;color:#a71d5d;">[</span><span style="background-color:#ffffff;color:#4183c4;">w</span><span style="background-color:#ffffff;color:#a71d5d;">](</span><span style="background-color:#ffffff;font-style:italic;color:#4183c4;">t.co</span><span style="background-color:#ffffff;color:#a71d5d;">) </span><span style="background-color:#ffffff;font-style:italic;color:#a71d5d;">*</span><span style="background-color:#ffffff;font-style:italic;color:#323232;">hi</span><span style="background-color:#ffffff;font-style:italic;color:#a71d5d;">* </span><span style="background-color:#ffffff;font-weight:bold;color:#a71d5d;">**</span><span style="background-color:#ffffff;font-weight:bold;color:#323232;">five</span><span style="background-color:#ffffff;font-weight:bold;color:#a71d5d;">**</span>
2 | 


--------------------------------------------------------------------------------
/tests/public_api.rs:
--------------------------------------------------------------------------------
 1 | #[test]
 2 | fn public_api() {
 3 |     // Install a compatible nightly toolchain if it is missing
 4 |     rustup_toolchain::install(public_api::MINIMUM_NIGHTLY_RUST_VERSION).unwrap();
 5 | 
 6 |     // Build rustdoc JSON
 7 |     let rustdoc_json = rustdoc_json::Builder::default()
 8 |         .toolchain(public_api::MINIMUM_NIGHTLY_RUST_VERSION)
 9 |         .build()
10 |         .unwrap();
11 | 
12 |     // Derive the public API from the rustdoc JSON
13 |     let public_api = public_api::Builder::from_rustdoc_json(rustdoc_json)
14 |         .omit_blanket_impls(true)
15 |         .build()
16 |         .unwrap();
17 | 
18 |     // Assert that the public API matches the latest snapshot.
19 |     // Run with env var `UPDATE_SNAPSHOTS=yes` to update the snapshot.
20 |     public_api.assert_eq_or_update("./tests/snapshots/public-api.txt");
21 | }
22 | 


--------------------------------------------------------------------------------
/src/utils.rs:
--------------------------------------------------------------------------------
 1 | //! Private library utilities that are not exposed to clients since we don't
 2 | //! want to make semver guarantees about them
 3 | 
 4 | use std::path::Path;
 5 | 
 6 | use walkdir::WalkDir;
 7 | 
 8 | /// Private helper to walk a dir and also follow symbolic links.
 9 | pub fn walk_dir<P: AsRef<Path>>(folder: P) -> WalkDir {
10 |     WalkDir::new(folder).follow_links(true)
11 | }
12 | 
13 | #[cfg(all(test, feature = "parsing"))]
14 | pub mod testdata {
15 |     use std::sync::LazyLock;
16 | 
17 |     use crate::parsing::SyntaxSet;
18 | 
19 |     /// The [`SyntaxSet`] loaded from the `testdata/Packages` folder
20 |     ///
21 |     /// Shared here to avoid re-doing a particularly costly construction in various tests
22 |     pub static PACKAGES_SYN_SET: LazyLock<SyntaxSet> =
23 |         LazyLock::new(|| SyntaxSet::load_from_folder("testdata/Packages").unwrap());
24 | }
25 | 


--------------------------------------------------------------------------------
/src/highlighting/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Everything having to do with turning parsed text into styled text.
 2 | //!
 3 | //! You might want to check out [`Theme`] for its handy text-editor related settings like selection
 4 | //! color, [`ThemeSet`] for loading themes, as well as things starting with `Highlight` for how to
 5 | //! highlight text.
 6 | //!
 7 | //! [`Theme`]: struct.Theme.html
 8 | //! [`ThemeSet`]: struct.ThemeSet.html
 9 | mod highlighter;
10 | mod selector;
11 | #[cfg(feature = "plist-load")]
12 | pub(crate) mod settings;
13 | mod style;
14 | mod theme;
15 | #[cfg(feature = "plist-load")]
16 | mod theme_load;
17 | mod theme_set;
18 | 
19 | pub use self::highlighter::*;
20 | pub use self::selector::*;
21 | #[cfg(feature = "plist-load")]
22 | pub use self::settings::SettingsError;
23 | pub use self::style::*;
24 | pub use self::theme::*;
25 | #[cfg(feature = "plist-load")]
26 | pub use self::theme_load::*;
27 | pub use self::theme_set::*;
28 | 


--------------------------------------------------------------------------------
/examples/latex-demo.rs:
--------------------------------------------------------------------------------
 1 | use syntect::easy::HighlightLines;
 2 | use syntect::highlighting::{Style, ThemeSet};
 3 | use syntect::parsing::SyntaxSet;
 4 | use syntect::util::{as_latex_escaped, LinesWithEndings};
 5 | 
 6 | fn main() {
 7 |     // Load these once at the start of your program
 8 |     let ps = SyntaxSet::load_defaults_newlines();
 9 |     let ts = ThemeSet::load_defaults();
10 | 
11 |     let syntax = ps.find_syntax_by_extension("rs").unwrap();
12 |     let s = "pub struct Wow { hi: u64 }\nfn blah() -> u64 {}\n";
13 | 
14 |     let mut h = HighlightLines::new(syntax, &ts.themes["InspiredGitHub"]);
15 |     for line in LinesWithEndings::from(s) {
16 |         // LinesWithEndings enables use of newlines mode
17 |         let ranges: Vec<(Style, &str)> = h.highlight_line(line, &ps).unwrap();
18 |         let escaped = as_latex_escaped(&ranges[..]);
19 |         println!("\n{:?}", line);
20 |         println!("\n{}", escaped);
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/testdata/test2.html:
--------------------------------------------------------------------------------
1 | <span class="text html markdown"><span class="meta paragraph markdown"><span class="meta link inline markdown"><span class="punctuation definition link begin markdown">[</span></span><span class="meta link inline description markdown">w</span><span class="meta link inline markdown"><span class="punctuation definition link end markdown">]</span></span><span class="meta link inline markdown"><span class="punctuation definition metadata begin markdown">(</span><span class="markup underline link markdown">t.co</span><span class="punctuation definition metadata end markdown">)</span></span> <span class="markup italic markdown"><span class="punctuation definition italic begin markdown">*</span>hi<span class="punctuation definition italic end markdown">*</span></span> <span class="markup bold markdown"><span class="punctuation definition bold begin markdown">**</span>five<span class="punctuation definition bold end markdown">**</span></span>
2 | 


--------------------------------------------------------------------------------
/testdata/highlight_test.erb:
--------------------------------------------------------------------------------
 1 | <script type="text/javascript">
 2 |   var lol = "JS nesting";
 3 |   class WithES6 extends THREE.Mesh {
 4 |     static highQuality() { // such classes
 5 |       return this.toString();
 6 |     }
 7 |   }
 8 |   <%
 9 |     # The outer syntax is HTML (Rails) detected from the .erb extension
10 |     puts "Ruby #{'nesting' * 2}"
11 |     here = <<-WOWCOOL + CORRECTLY_DOES_NOT_HIGHLIGHT_REST_OF_LINE
12 |       high quality parsing even supports custom heredoc endings
13 |       #{
14 |       nested = 5 * <<-ZOMG
15 |         nested heredocs! (no highlighting: 5 * 6, yes highlighting: #{5 * 6})
16 |       ZOMG
17 |       }
18 |     WOWCOOL
19 |     sql = <<-SQL
20 |       select * from heredocs where there_are_special_heredoc_names = true
21 |     SQL
22 |   %>
23 | </script>
24 | <style type="text/css">
25 |   /* the HTML syntax also supports CSS of course */
26 |   .stuff #wow {
27 |     border: 5px #ffffff;
28 |     background: url("wow");
29 |   }
30 | </style>
31 | 


--------------------------------------------------------------------------------
/src/highlighting/settings.rs:
--------------------------------------------------------------------------------
 1 | /// Code based on <https://github.com/defuz/sublimate/blob/master/src/core/settings.rs>
 2 | /// released under the MIT license by @defuz
 3 | use plist::Error as PlistError;
 4 | use std::io::{Read, Seek};
 5 | 
 6 | pub use serde_json::Value as Settings;
 7 | 
 8 | pub trait ParseSettings: Sized {
 9 |     type Error;
10 |     fn parse_settings(settings: Settings) -> Result<Self, Self::Error>;
11 | }
12 | 
13 | /// An error parsing a settings file
14 | #[derive(Debug, thiserror::Error)]
15 | #[non_exhaustive]
16 | pub enum SettingsError {
17 |     /// Incorrect Plist syntax
18 |     #[error("Incorrect Plist syntax: {0}")]
19 |     Plist(PlistError),
20 | }
21 | 
22 | impl From<PlistError> for SettingsError {
23 |     fn from(error: PlistError) -> SettingsError {
24 |         SettingsError::Plist(error)
25 |     }
26 | }
27 | 
28 | pub fn read_plist<R: Read + Seek>(reader: R) -> Result<Settings, SettingsError> {
29 |     let settings = plist::from_reader(reader)?;
30 |     Ok(settings)
31 | }
32 | 


--------------------------------------------------------------------------------
/src/parsing/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Everything about parsing text into text annotated with scopes.
 2 | //!
 3 | //! The most important struct here is [`SyntaxSet`], check out the docs for that.
 4 | //!
 5 | //! [`SyntaxSet`]: struct.SyntaxSet.html
 6 | 
 7 | #[cfg(feature = "metadata")]
 8 | pub mod metadata;
 9 | #[cfg(feature = "parsing")]
10 | mod parser;
11 | #[cfg(feature = "parsing")]
12 | pub mod syntax_definition;
13 | #[cfg(feature = "parsing")]
14 | mod syntax_set;
15 | #[cfg(all(feature = "parsing", feature = "yaml-load"))]
16 | mod yaml_load;
17 | 
18 | #[cfg(any(feature = "parsing", feature = "yaml-load", feature = "metadata"))]
19 | mod regex;
20 | mod scope;
21 | 
22 | #[cfg(feature = "metadata")]
23 | pub use self::metadata::*;
24 | #[cfg(feature = "parsing")]
25 | pub use self::parser::*;
26 | #[cfg(feature = "parsing")]
27 | pub use self::syntax_definition::SyntaxDefinition;
28 | #[cfg(feature = "parsing")]
29 | pub use self::syntax_set::*;
30 | #[cfg(all(feature = "parsing", feature = "yaml-load"))]
31 | pub use self::yaml_load::*;
32 | 
33 | #[cfg(any(feature = "parsing", feature = "yaml-load", feature = "metadata"))]
34 | pub use self::regex::*;
35 | 
36 | pub use self::scope::*;
37 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Tristan Hume, Keith Hall, Google Inc and other contributors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/testdata/embed_escape_test.sublime-syntax:
--------------------------------------------------------------------------------
 1 | %YAML 1.2
 2 | ---
 3 | name: Embed_Escape Used by tests in src/parsing/parser.rs
 4 | scope: source.embed-test
 5 | contexts:
 6 |   main:
 7 |     - match: (")
 8 |       scope: meta.attribute-with-value.style.html string.quoted.double punctuation.definition.string.begin.html
 9 |       embed: embedded_context
10 |       embed_scope: meta.attribute-with-value.style.html source.css
11 |       escape: '\1'
12 |       escape_captures:
13 |         0: meta.attribute-with-value.style.html string.quoted.double punctuation.definition.string.end.html
14 |     - match: '(>)\s*'
15 |       captures:
16 |         1: meta.tag.style.begin.html punctuation.definition.tag.end.html
17 |       embed: embedded_context
18 |       embed_scope: source.css.embedded.html
19 |       escape: (?i)(?=</style)
20 |     - match: '</style>'
21 |     - match: foobar
22 |       scope: top-level.test
23 | 
24 |   embedded_context:
25 |     - match: a
26 |       scope: a
27 |       push: # prove that multiple context levels can be "escape"d
28 |         - match: b
29 |           push:
30 |             - match: c
31 |               push:
32 |                 - match: 'test'
33 |                   scope: test.embedded
34 | 


--------------------------------------------------------------------------------
/examples/synhtml.rs:
--------------------------------------------------------------------------------
 1 | //! Prints highlighted HTML for a file to stdout.
 2 | //! Basically just wraps a body around `highlighted_html_for_file`
 3 | use syntect::highlighting::{Color, ThemeSet};
 4 | use syntect::html::highlighted_html_for_file;
 5 | use syntect::parsing::SyntaxSet;
 6 | 
 7 | fn main() {
 8 |     let ss = SyntaxSet::load_defaults_newlines();
 9 |     let ts = ThemeSet::load_defaults();
10 | 
11 |     let args: Vec<String> = std::env::args().collect();
12 |     if args.len() < 2 {
13 |         println!("Please pass in a file to highlight");
14 |         return;
15 |     }
16 | 
17 |     let style = "
18 |         pre {
19 |             font-size:13px;
20 |             font-family: Consolas, \"Liberation Mono\", Menlo, Courier, monospace;
21 |         }";
22 |     println!(
23 |         "<head><title>{}</title><style>{}</style></head>",
24 |         &args[1], style
25 |     );
26 |     let theme = &ts.themes["base16-ocean.dark"];
27 |     let c = theme.settings.background.unwrap_or(Color::WHITE);
28 |     println!(
29 |         "<body style=\"background-color:#{:02x}{:02x}{:02x};\">\n",
30 |         c.r, c.g, c.b
31 |     );
32 |     let html = highlighted_html_for_file(&args[1], &ss, theme).unwrap();
33 |     println!("{}", html);
34 |     println!("</body>");
35 | }
36 | 


--------------------------------------------------------------------------------
/benches/load_and_highlight.rs:
--------------------------------------------------------------------------------
 1 | mod highlight_utils;
 2 | mod utils;
 3 | 
 4 | /// Measures the time it takes to run the whole pipeline:
 5 | ///  1. Load assets
 6 | ///  2. Parse
 7 | ///  3. Highlight
 8 | fn run(b: &mut criterion::Bencher, file: &str) {
 9 |     let path = utils::get_test_file_path(file);
10 | 
11 |     b.iter(|| {
12 |         let ss = syntect::parsing::SyntaxSet::load_defaults_nonewlines();
13 |         let ts = syntect::highlighting::ThemeSet::load_defaults();
14 | 
15 |         let syntax = ss.find_syntax_for_file(path).unwrap().unwrap();
16 |         let s = std::fs::read_to_string(path).unwrap();
17 | 
18 |         highlight_utils::do_highlight(&s, &ss, syntax, &ts.themes["base16-ocean.dark"]);
19 |     })
20 | }
21 | 
22 | fn load_and_highlight_benchmark(c: &mut criterion::Criterion) {
23 |     let mut group = c.benchmark_group("load_and_highlight");
24 |     for input in &[
25 |         "highlight_test.erb",
26 |         "InspiredGitHub.tmTheme",
27 |         "Ruby.sublime-syntax",
28 |         "parser.rs",
29 |     ] {
30 |         group.bench_with_input(format!("\"{}\"", input), input, |b, s| run(b, s));
31 |     }
32 |     group.finish();
33 | }
34 | 
35 | criterion::criterion_group! {
36 |     name = benches;
37 |     config = criterion::Criterion::default().sample_size(50);
38 |     targets = load_and_highlight_benchmark
39 | }
40 | criterion::criterion_main!(benches);
41 | 


--------------------------------------------------------------------------------
/benches/parsing.rs:
--------------------------------------------------------------------------------
 1 | use criterion::{criterion_group, criterion_main, Bencher, Criterion};
 2 | use std::time::Duration;
 3 | use syntect::parsing::{ParseState, SyntaxReference, SyntaxSet};
 4 | 
 5 | mod utils;
 6 | 
 7 | fn do_parse(s: &str, ss: &SyntaxSet, syntax: &SyntaxReference) -> usize {
 8 |     let mut state = ParseState::new(syntax);
 9 |     let mut count = 0;
10 |     for line in s.lines() {
11 |         let ops = state.parse_line(line, ss).unwrap();
12 |         count += ops.len();
13 |     }
14 |     count
15 | }
16 | 
17 | fn parse_file(b: &mut Bencher, file: &str) {
18 |     let path = utils::get_test_file_path(file);
19 | 
20 |     // don't load from dump so we don't count lazy regex compilation time
21 |     let ss = SyntaxSet::load_defaults_nonewlines();
22 | 
23 |     let syntax = ss.find_syntax_for_file(path).unwrap().unwrap();
24 |     let s = std::fs::read_to_string(path).unwrap();
25 | 
26 |     b.iter(|| do_parse(&s, &ss, syntax));
27 | }
28 | 
29 | fn parsing_benchmark(c: &mut Criterion) {
30 |     let mut parse = c.benchmark_group("parse");
31 |     for input in &[
32 |         "highlight_test.erb",
33 |         "InspiredGitHub.tmTheme",
34 |         "Ruby.sublime-syntax",
35 |         "jquery.js",
36 |         "parser.rs",
37 |         "scope.rs",
38 |     ] {
39 |         parse.bench_with_input(format!("\"{}\"", input), input, |b, s| parse_file(b, s));
40 |     }
41 |     parse.finish();
42 | }
43 | 
44 | criterion_group! {
45 |     name = benches;
46 |     config = Criterion::default().sample_size(50).warm_up_time(Duration::from_secs(30));
47 |     targets = parsing_benchmark
48 | }
49 | criterion_main!(benches);
50 | 


--------------------------------------------------------------------------------
/testdata/Testing.sublime-syntax:
--------------------------------------------------------------------------------
 1 | %YAML 1.2
 2 | ---
 3 | name: Sublime Syntax Testing
 4 | scope: source.thumetesting
 5 | file_extensions:
 6 |   - testsyntax
 7 | hidden: false
 8 | contexts:
 9 |   prototype:
10 |     - match: lol
11 |       scope: storage
12 |     - include: zoom
13 |   main:
14 |     - meta_include_prototype: false
15 |     - match: =testset
16 |       push: testset
17 |     - match: hi
18 |       scope: comment
19 |     - match: troll
20 |       push: wow
21 |     - match: inline
22 |       push:
23 |         - match: testing
24 |           scope: constant
25 |         - match: inout
26 |           pop: true
27 |     - match: html
28 |       push: scope:text.html.basic
29 |       with_prototype:
30 |         - match: htmout
31 |           pop: true
32 |     - include: wow
33 |   wow:
34 |     - meta_scope: wow
35 |     - match: wow
36 |       scope: string
37 |     - match: out
38 |       set: zoom
39 |   zoom:
40 |     - meta_scope: zoom
41 |     - match: zoom
42 |       scope: constant
43 |     - match: zout
44 |       set: bamf
45 |     - match: nested
46 |       push:
47 |         - meta_scope: nested
48 |         - match: doopadoop
49 |           scope: comment
50 |         - match: outnested
51 |           pop: true
52 |     - include: bamf
53 |   bamf:
54 |     - meta_scope: bamf
55 |     - match: bamf
56 |       scope: keyword
57 |   testset:
58 |     - meta_scope: constant.testset.meta
59 |     - meta_content_scope: string.testset.content
60 |     - match: =doset
61 |       set: setto
62 |   setto:
63 |     - clear_scopes: 1
64 |     - meta_scope: constant.setto.meta
65 |     - meta_content_scope: comment.setto.content
66 |     - match: =endset
67 |       pop: true
68 | 


--------------------------------------------------------------------------------
/testdata/test5.html:
--------------------------------------------------------------------------------
 1 | <pre style="background-color:#2b303b;">
 2 | <span style="color:#65737e;">hi</span><span style="color:#c0c5ce;"> lol
 3 | </span><span style="color:#a3be8c;">wow</span><span style="color:#c0c5ce;"> zoom
 4 | </span><span style="color:#c0c5ce;">html
 5 | </span><span style="color:#c0c5ce;">  &lt;</span><span style="color:#bf616a;">br </span><span style="color:#d08770;">style</span><span style="color:#c0c5ce;">=&quot;color: </span><span style="color:#96b5b4;">#555</span><span style="color:#c0c5ce;">;&quot;/&gt;
 6 | </span><span style="color:#c0c5ce;">  troll wow lol
 7 | </span><span style="color:#c0c5ce;">htmout
 8 | </span><span style="color:#c0c5ce;">inline
 9 | </span><span style="color:#c0c5ce;">  hi </span><span style="color:#d08770;">zoom </span><span style="color:#b48ead;">lol</span><span style="color:#c0c5ce;"> wow </span><span style="color:#b48ead;">bamf
10 | </span><span style="color:#c0c5ce;">inout
11 | </span><span style="color:#c0c5ce;">troll
12 | </span><span style="color:#c0c5ce;">  </span><span style="color:#a3be8c;">wow </span><span style="color:#b48ead;">lol
13 | </span><span style="color:#c0c5ce;">  </span><span style="color:#d08770;">zoom </span><span style="color:#b48ead;">lol bamf</span><span style="color:#c0c5ce;"> doopadoop
14 | </span><span style="color:#c0c5ce;">out
15 | </span><span style="color:#c0c5ce;">  </span><span style="color:#d08770;">zoom</span><span style="color:#c0c5ce;"> lol </span><span style="color:#b48ead;">bamf
16 | </span><span style="color:#c0c5ce;">  nested
17 | </span><span style="color:#c0c5ce;">    wow zoom lol bamf </span><span style="color:#65737e;">doopadoop
18 | </span><span style="color:#c0c5ce;">  outnested
19 | </span><span style="color:#c0c5ce;">zout
20 | </span><span style="color:#c0c5ce;">  zoom lol </span><span style="color:#b48ead;">bamf
21 | </span></pre>
22 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Sublime Text
 2 | 
 3 | #                                       tabs indentation,         no trailing
 4 | *.stTheme                     eol=lf    whitespace=-tab-in-indent,trailing-space,tabwidth=4
 5 | #                                       spaces indentation,      no trailing
 6 | *.sublime-color-scheme        eol=lf    whitespace=tab-in-indent,trailing-space,tabwidth=4
 7 | *.hidden-color-scheme         eol=lf    whitespace=tab-in-indent,trailing-space,tabwidth=4
 8 | *.sublime-settings            eol=lf    whitespace=tab-in-indent,trailing-space,tabwidth=4
 9 | *.sublime-syntax              eol=lf    whitespace=tab-in-indent,trailing-space,tabwidth=2
10 | *.sublime-theme               eol=lf    whitespace=tab-in-indent,trailing-space,tabwidth=4
11 | 
12 | # TextMate
13 | 
14 | #                                       tabs indentation,         no trailing
15 | *.tmLanguage                  eol=lf    whitespace=-tab-in-indent,trailing-space,tabwidth=4
16 | *.hidden-tmLanguage           eol=lf    whitespace=-tab-in-indent,trailing-space,tabwidth=4
17 | *.tmPreferences               eol=lf    whitespace=-tab-in-indent,trailing-space,tabwidth=4
18 | *.tmTheme                     eol=lf    whitespace=-tab-in-indent,trailing-space,tabwidth=4
19 | *.hidden-tmTheme              eol=lf    whitespace=-tab-in-indent,trailing-space,tabwidth=4
20 | 
21 | # syntect
22 | 
23 | testdata/*                              linguist-vendored
24 | 
25 | # General
26 | 
27 | #                                       spaces indentation,      no trailing
28 | *.md                          eol=lf    whitespace=tab-in-indent,trailing-space,tabwidth=4
29 | #                             binary
30 | *.png                         binary
31 | *.tex                                   diff=tex
32 | *.pdf                         binary    diff=astextplain
33 | *.snap                                  linguist-language=txt
34 | 


--------------------------------------------------------------------------------
/benches/loading.rs:
--------------------------------------------------------------------------------
 1 | use criterion::{criterion_group, criterion_main, Bencher, Criterion};
 2 | use syntect::highlighting::ThemeSet;
 3 | use syntect::parsing::{SyntaxSet, SyntaxSetBuilder};
 4 | 
 5 | fn bench_load_internal_dump(b: &mut Bencher) {
 6 |     b.iter(|| SyntaxSet::load_defaults_newlines());
 7 | }
 8 | 
 9 | fn bench_load_internal_themes(b: &mut Bencher) {
10 |     b.iter(|| ThemeSet::load_defaults());
11 | }
12 | 
13 | fn bench_load_theme(b: &mut Bencher) {
14 |     b.iter(|| ThemeSet::get_theme("testdata/spacegray/base16-ocean.dark.tmTheme"));
15 | }
16 | 
17 | fn bench_add_from_folder(b: &mut Bencher) {
18 |     b.iter(|| {
19 |         let mut builder = SyntaxSetBuilder::new();
20 |         builder.add_from_folder("testdata/Packages", false).unwrap()
21 |     });
22 | }
23 | 
24 | fn bench_link_syntaxes(b: &mut Bencher) {
25 |     let mut builder = SyntaxSetBuilder::new();
26 |     builder.add_from_folder("testdata/Packages", false).unwrap();
27 |     b.iter(|| {
28 |         builder.clone().build();
29 |     });
30 | }
31 | 
32 | fn bench_from_dump_file(b: &mut Bencher) {
33 |     b.iter(|| {
34 |         let _: SyntaxSet =
35 |             syntect::dumps::from_uncompressed_dump_file("assets/default_newlines.packdump")
36 |                 .unwrap();
37 |     })
38 | }
39 | 
40 | fn loading_benchmark(c: &mut Criterion) {
41 |     c.bench_function("load_internal_dump", bench_load_internal_dump);
42 |     c.bench_function("load_internal_themes", bench_load_internal_themes);
43 |     c.bench_function("load_theme", bench_load_theme);
44 |     c.bench_function("add_from_folder", bench_add_from_folder);
45 |     c.bench_function("link_syntaxes", bench_link_syntaxes);
46 |     c.bench_function("from_dump_file", bench_from_dump_file);
47 | }
48 | 
49 | criterion_group! {
50 |     name = benches;
51 |     config = Criterion::default().sample_size(50);
52 |     targets = loading_benchmark
53 | }
54 | criterion_main!(benches);
55 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | SUBMODULES = testdata/Packages/.git
 2 | 
 3 | info:
 4 | 	$(info Targets)
 5 | 	$(info -----------------------------------------------------------------------)
 6 | 	$(info assets      | generate default theme packs and syntax)
 7 | 	$(info - OTHER TARGETS -------------------------------------------------------)
 8 | 	$(info themes      | generate default theme pack)
 9 | 	$(info packs       | generate default syntax pack)
10 | 	$(info syntest     | run syntax test summary)
11 | 
12 | 
13 | $(SUBMODULES):
14 | 	git submodule update --init --recursive
15 | 
16 | assets: packs themes
17 | 
18 | packs: $(SUBMODULES)
19 | 	cargo run --features=metadata --example gendata -- synpack testdata/Packages assets/default_newlines.packdump assets/default_nonewlines.packdump assets/default_metadata.packdump testdata/DefaultPackage
20 | 
21 | themes: $(SUBMODULES)
22 | 	cargo run --example gendata -- themepack testdata assets/default.themedump
23 | 
24 | syntest: $(SUBMODULES)
25 | 	@echo Tip: Run make update-known-failures to update the known failures file.
26 | 	cargo run --release --example syntest -- testdata/Packages testdata/Packages --summary | diff -U 1000000 testdata/known_syntest_failures.txt -
27 | 	@echo No new failures!
28 | 
29 | syntest-fancy: $(SUBMODULES)
30 | 	@echo Tip: Run make update-known-failures to update the known failures file.
31 | 	cargo run --features default-fancy --no-default-features --release --example syntest -- testdata/Packages testdata/Packages --summary | diff -U 1000000 testdata/known_syntest_failures_fancy.txt -
32 | 	@echo No new failures!
33 | 
34 | update-known-failures: $(SUBMODULES)
35 | 	cargo run --release --example syntest -- testdata/Packages testdata/Packages --summary | tee testdata/known_syntest_failures.txt
36 | 
37 | update-known-failures-fancy: $(SUBMODULES)
38 | 	cargo run --features default-fancy --no-default-features --release --example syntest -- testdata/Packages testdata/Packages --summary | tee testdata/known_syntest_failures_fancy.txt
39 | 


--------------------------------------------------------------------------------
/testdata/test4.html:
--------------------------------------------------------------------------------
 1 | <pre style="background-color:#ffffff;">
 2 | <span style="color:#323232;">%</span><span style="font-weight:bold;color:#a71d5d;">YAML </span><span style="color:#0086b3;">1.2
 3 | </span><span style="color:#323232;">---
 4 | </span><span style="font-style:italic;color:#969896;"># http://www.sublimetext.com/docs/3/syntax.html
 5 | </span><span style="color:#63a35c;">name</span><span style="color:#323232;">: </span><span style="color:#183691;">Cargo Build Results
 6 | </span><span style="color:#63a35c;">scope</span><span style="color:#323232;">: </span><span style="color:#183691;">source.build_results
 7 | </span><span style="color:#63a35c;">hidden</span><span style="color:#323232;">: </span><span style="color:#0086b3;">true
 8 | </span><span style="color:#63a35c;">contexts</span><span style="color:#323232;">:
 9 | </span><span style="color:#323232;">  </span><span style="color:#63a35c;">main</span><span style="color:#323232;">:
10 | </span><span style="color:#323232;">    - </span><span style="color:#63a35c;">match</span><span style="color:#323232;">: </span><span style="color:#183691;">&#39;^(..[^:\n]*):([0-9]+):?([0-9]+)?:? &#39;
11 | </span><span style="color:#323232;">      </span><span style="color:#63a35c;">scope</span><span style="color:#323232;">: </span><span style="color:#183691;">entity.name.filename
12 | </span><span style="color:#323232;">    - </span><span style="color:#63a35c;">match</span><span style="color:#323232;">: </span><span style="color:#183691;">&#39;\berror: &#39;
13 | </span><span style="color:#323232;">      </span><span style="color:#63a35c;">scope</span><span style="color:#323232;">: </span><span style="color:#183691;">message.error
14 | </span><span style="color:#323232;">    - </span><span style="color:#63a35c;">match</span><span style="color:#323232;">: </span><span style="color:#183691;">&#39;^\[.+\]$&#39;
15 | </span><span style="color:#323232;">      </span><span style="color:#63a35c;">scope</span><span style="color:#323232;">: </span><span style="color:#183691;">comment
16 | </span></pre>
17 | 


--------------------------------------------------------------------------------
/src/escape.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 The Rust Project Developers. See the COPYRIGHT
 2 | // file at the top-level directory of this distribution and at
 3 | // http://rust-lang.org/COPYRIGHT.
 4 | //
 5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 8 | // option. This file may not be copied, modified, or distributed
 9 | // except according to those terms.
10 | 
11 | //! HTML Escaping
12 | //!
13 | //! This module contains one unit-struct which can be used to HTML-escape a
14 | //! string of text (for use in a format string).
15 | 
16 | use std::fmt;
17 | 
18 | /// Wrapper struct which will emit the HTML-escaped version of the contained
19 | /// string when passed to a format string.
20 | pub struct Escape<'a>(pub &'a str);
21 | 
22 | impl fmt::Display for Escape<'_> {
23 |     fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
24 |         // Because the internet is always right, turns out there's not that many
25 |         // characters to escape: http://stackoverflow.com/questions/7381974
26 |         let Escape(s) = *self;
27 |         let pile_o_bits = s;
28 |         let mut last = 0;
29 |         for (i, ch) in s.bytes().enumerate() {
30 |             match ch as char {
31 |                 '<' | '>' | '&' | '\'' | '"' => {
32 |                     fmt.write_str(&pile_o_bits[last..i])?;
33 |                     let s = match ch as char {
34 |                         '>' => "&gt;",
35 |                         '<' => "&lt;",
36 |                         '&' => "&amp;",
37 |                         '\'' => "&#39;",
38 |                         '"' => "&quot;",
39 |                         _ => unreachable!(),
40 |                     };
41 |                     fmt.write_str(s)?;
42 |                     last = i + 1;
43 |                 }
44 |                 _ => {}
45 |             }
46 |         }
47 | 
48 |         if last < s.len() {
49 |             fmt.write_str(&pile_o_bits[last..])?;
50 |         }
51 |         Ok(())
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/examples/parsyncat.rs:
--------------------------------------------------------------------------------
 1 | //! Highlights the files given on the command line, in parallel.
 2 | //! Prints the highlighted output to stdout.
 3 | 
 4 | use rayon::prelude::*;
 5 | use syntect::easy::HighlightFile;
 6 | use syntect::highlighting::{Style, ThemeSet};
 7 | use syntect::parsing::SyntaxSet;
 8 | 
 9 | use std::fs::File;
10 | use std::io::{BufRead, BufReader};
11 | 
12 | fn main() {
13 |     let files: Vec<String> = std::env::args().skip(1).collect();
14 | 
15 |     if files.is_empty() {
16 |         println!("Please provide some files to highlight.");
17 |         return;
18 |     }
19 | 
20 |     let syntax_set = SyntaxSet::load_defaults_newlines();
21 |     let theme_set = ThemeSet::load_defaults();
22 | 
23 |     // We first collect the contents of the files...
24 |     let contents: Vec<Vec<String>> = files
25 |         .par_iter()
26 |         .map(|filename| {
27 |             let mut lines = Vec::new();
28 |             // We use `String::new()` and `read_line()` instead of `BufRead::lines()`
29 |             // in order to preserve the newlines and get better highlighting.
30 |             let mut line = String::new();
31 |             let mut reader = BufReader::new(File::open(filename).unwrap());
32 |             while reader.read_line(&mut line).unwrap() > 0 {
33 |                 lines.push(line);
34 |                 line = String::new();
35 |             }
36 |             lines
37 |         })
38 |         .collect();
39 | 
40 |     // ...so that the highlighted regions have valid lifetimes...
41 |     let regions: Vec<Vec<(Style, &str)>> = files
42 |         .par_iter()
43 |         .zip(&contents)
44 |         .map(|(filename, contents)| {
45 |             let mut regions = Vec::new();
46 |             let theme = &theme_set.themes["base16-ocean.dark"];
47 |             let mut highlighter = HighlightFile::new(filename, &syntax_set, theme).unwrap();
48 | 
49 |             for line in contents {
50 |                 for region in highlighter
51 |                     .highlight_lines
52 |                     .highlight_line(line, &syntax_set)
53 |                     .unwrap()
54 |                 {
55 |                     regions.push(region);
56 |                 }
57 |             }
58 | 
59 |             regions
60 |         })
61 |         .collect();
62 | 
63 |     // ...and then print them all out.
64 |     for file_regions in regions {
65 |         print!(
66 |             "{}",
67 |             syntect::util::as_24_bit_terminal_escaped(&file_regions[..], true)
68 |         );
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/benches/highlighting.rs:
--------------------------------------------------------------------------------
 1 | use criterion::{criterion_group, criterion_main, Bencher, Criterion};
 2 | use std::str::FromStr;
 3 | use syntect::highlighting::ThemeSet;
 4 | use syntect::html::highlighted_html_for_string;
 5 | use syntect::parsing::{ScopeStack, SyntaxSet};
 6 | 
 7 | mod highlight_utils;
 8 | mod utils;
 9 | 
10 | fn highlight_file(b: &mut Bencher, file: &str) {
11 |     let path = utils::get_test_file_path(file);
12 | 
13 |     // don't load from dump so we don't count lazy regex compilation time
14 |     let ss = SyntaxSet::load_defaults_nonewlines();
15 |     let ts = ThemeSet::load_defaults();
16 | 
17 |     let syntax = ss.find_syntax_for_file(path).unwrap().unwrap();
18 |     let s = std::fs::read_to_string(path).unwrap();
19 | 
20 |     b.iter(|| highlight_utils::do_highlight(&s, &ss, syntax, &ts.themes["base16-ocean.dark"]));
21 | }
22 | 
23 | fn stack_matching(b: &mut Bencher) {
24 |     let s = "source.js meta.group.js meta.group.js meta.block.js meta.function-call.method.js meta.group.js meta.object-literal.js meta.block.js meta.function-call.method.js meta.group.js variable.other.readwrite.js";
25 |     let stack = ScopeStack::from_str(s).unwrap();
26 |     let selector = ScopeStack::from_str("source meta.function-call.method").unwrap();
27 |     b.iter(|| selector.does_match(stack.as_slice()));
28 | }
29 | 
30 | fn highlight_html(b: &mut Bencher) {
31 |     let ss = SyntaxSet::load_defaults_newlines();
32 |     let ts = ThemeSet::load_defaults();
33 | 
34 |     let path = "testdata/parser.rs";
35 |     let syntax = ss.find_syntax_for_file(path).unwrap().unwrap();
36 |     let s = std::fs::read_to_string(path).unwrap();
37 | 
38 |     b.iter(|| highlighted_html_for_string(&s, &ss, syntax, &ts.themes["base16-ocean.dark"]));
39 | }
40 | 
41 | fn highlighting_benchmark(c: &mut Criterion) {
42 |     c.bench_function("stack_matching", stack_matching);
43 |     c.bench_function("highlight_html", highlight_html);
44 |     let mut highlight = c.benchmark_group("highlight");
45 |     for input in &[
46 |         "highlight_test.erb",
47 |         "InspiredGitHub.tmTheme",
48 |         "Ruby.sublime-syntax",
49 |         "jquery.js",
50 |         "parser.rs",
51 |         "scope.rs",
52 |     ] {
53 |         highlight.bench_with_input(format!("\"{}\"", input), input, |b, s| highlight_file(b, s));
54 |     }
55 |     highlight.finish();
56 | }
57 | 
58 | criterion_group! {
59 |     name = benches;
60 |     config = Criterion::default().sample_size(10);
61 |     targets = highlighting_benchmark
62 | }
63 | criterion_main!(benches);
64 | 


--------------------------------------------------------------------------------
/tests/error_handling.rs:
--------------------------------------------------------------------------------
 1 | use std::{
 2 |     error::Error,
 3 |     fmt::Display,
 4 |     io::{Error as IoError, ErrorKind},
 5 | };
 6 | 
 7 | use syntect::{
 8 |     parsing::{ParseScopeError, ParseSyntaxError},
 9 |     LoadingError,
10 | };
11 | 
12 | #[test]
13 | fn loading_error_bad_path_display() {
14 |     assert_display(LoadingError::BadPath, "Invalid path");
15 | }
16 | 
17 | #[test]
18 | fn loading_error_parse_syntax_display() {
19 |     assert_display(
20 |         LoadingError::ParseSyntax(
21 |             ParseSyntaxError::MissingMandatoryKey("main"),
22 |             String::from("file.sublime-syntax"),
23 |         ),
24 |         "file.sublime-syntax: Missing mandatory key in YAML file: main",
25 |     );
26 | }
27 | 
28 | #[test]
29 | fn loading_error_io_source() {
30 |     let io_error_source = IoError::new(ErrorKind::Other, "this is an error string");
31 |     assert_display(
32 |         LoadingError::Io(io_error_source).source().unwrap(),
33 |         "this is an error string",
34 |     );
35 | }
36 | 
37 | #[test]
38 | fn parse_syntax_error_missing_mandatory_key_display() {
39 |     assert_display(
40 |         ParseSyntaxError::MissingMandatoryKey("mandatory_key"),
41 |         "Missing mandatory key in YAML file: mandatory_key",
42 |     );
43 | }
44 | 
45 | #[test]
46 | fn parse_syntax_error_regex_compile_error_display() {
47 |     assert_display(
48 |         ParseSyntaxError::RegexCompileError("[a-Z]".to_owned(), LoadingError::BadPath.into()),
49 |         "Error while compiling regex '[a-Z]': Invalid path",
50 |     );
51 | }
52 | 
53 | #[test]
54 | fn parse_scope_error_display() {
55 |     assert_display(
56 |         ParseScopeError::TooLong,
57 |         "Too long scope. Scopes can be at most 8 atoms long.",
58 |     )
59 | }
60 | 
61 | #[test]
62 | fn parse_syntax_error_regex_compile_error_source() {
63 |     let error = ParseSyntaxError::RegexCompileError(
64 |         "[[[[[[[[[[[[[[[".to_owned(),
65 |         LoadingError::BadPath.into(),
66 |     );
67 |     assert_display(error.source().unwrap(), "Invalid path");
68 | }
69 | 
70 | #[test]
71 | fn loading_error_parse_syntax_source() {
72 |     let error = LoadingError::ParseSyntax(
73 |         ParseSyntaxError::RegexCompileError("[a-Z]".to_owned(), LoadingError::BadPath.into()),
74 |         String::from("any-file.sublime-syntax"),
75 |     );
76 |     assert_display(
77 |         error.source().unwrap(),
78 |         "Error while compiling regex '[a-Z]': Invalid path",
79 |     )
80 | }
81 | 
82 | /// Helper to assert that a given implementation of [Display] generates the
83 | /// expected string.
84 | fn assert_display(display: impl Display, expected_display: &str) {
85 |     assert_eq!(format!("{}", display), String::from(expected_display));
86 | }
87 | 


--------------------------------------------------------------------------------
/testdata/parser_tests.sublime-syntax:
--------------------------------------------------------------------------------
 1 | %YAML 1.2
 2 | ---
 3 | name: Used by tests in src/parsing/parser.rs
 4 | scope: source.test
 5 | contexts:
 6 |   main:
 7 |     - include: test_does_not_crash_on_unlinkable_context
 8 |     - match: '#infinite_seeming_loop_test'
 9 |       scope: keyword.test
10 |       push: infinite_seeming_loop_c
11 |     - match: '(?=#infinite_loop_test)'
12 |       push: infinite_loop_test_pop_if_not_whitespace
13 |     - match: \'
14 |       scope: punctuation.definition.string.begin.example
15 |       push: cleared_scopes_string_test
16 |     - match: '\d+'
17 |       scope: constant.numeric.test
18 | 
19 |   infinite_loop_test_pop_if_not_whitespace:
20 |     - match: '(?=\S)'
21 |       pop: true
22 |   infinite_seeming_loop_a:
23 |     - meta_content_scope: test
24 |     - match: 'h'
25 |       scope: string.unquoted.test
26 |     - match: 'ello'
27 |       scope: keyword.control.test
28 |   infinite_seeming_loop_b:
29 |     - match: ''
30 |       pop: true
31 |     - match: '(?=.)'
32 |       pop: true
33 |     - match: '(?=h)'
34 |       pop: true
35 |     - match: 'h'
36 |       scope: entity.name.function.test
37 |     - match: 'e'
38 |       scope: storage.type.test
39 |   infinite_seeming_loop_c:
40 |     - match: ''
41 |       push: [infinite_seeming_loop_a, infinite_seeming_loop_b]
42 |   cleared_scopes_string_test:
43 |     - meta_scope: string.quoted.single.example
44 |     - match: '#too_many_cleared_scopes_test'
45 |       scope: example.pushes-clear-scopes.example
46 |       push:
47 |         - clear_scopes: 10
48 |         - meta_scope: example.meta-scope.after-clear-scopes.example
49 |         - match: 'test'
50 |           scope: example.pops-clear-scopes.example
51 |           pop: true
52 |     - match: '#simple_cleared_scopes_test'
53 |       scope: example.pushes-clear-scopes.example
54 |       push:
55 |         - clear_scopes: 1
56 |         - meta_scope: example.meta-scope.after-clear-scopes.example
57 |         - match: 'test'
58 |           scope: example.pops-clear-scopes.example
59 |           pop: true
60 |     - match: '#nested_clear_scopes_test'
61 |       scope: example.pushes-clear-scopes.example
62 |       push:
63 |         - clear_scopes: 1
64 |         - meta_scope: example.meta-scope.after-clear-scopes.example
65 |         - match: 'foo'
66 |           scope: foo
67 |           push:
68 |             - clear_scopes: 1
69 |             - meta_scope: example.meta-scope.cleared-previous-meta-scope.example
70 |             - match: 'bar'
71 |               scope: bar
72 |               pop: true
73 |         - match: 'test'
74 |           scope: example.pops-clear-scopes.example
75 |           pop: true
76 |     - match: '\\.'
77 |       scope: constant.character.escape.example
78 |     - match: \'
79 |       scope: punctuation.definition.string.end.example
80 |       pop: true
81 | 


--------------------------------------------------------------------------------
/.github/workflows/Release.yml:
--------------------------------------------------------------------------------
 1 | name: Release
 2 | 
 3 | # To make a release:
 4 | #
 5 | # 1. Update Cargo.toml version and CHANGELOG.md on master
 6 | # 2. Run workflow https://github.com/trishume/syntect/actions/workflows/Release.yml on master
 7 | # 3. Done!
 8 | 
 9 | on:
10 |   workflow_dispatch: # This workflow can only be triggered manually.
11 |     inputs:
12 |       one_time_crates_io_token_secret:
13 |         description: "A one-time crates.io token (delete it after first use)"
14 |         required: true
15 |         type: string
16 | 
17 | env:
18 |   CARGO_TERM_COLOR: always
19 | 
20 | jobs:
21 |   # Make sure regular CI passes before we make a release.
22 |   ci:
23 |     uses: ./.github/workflows/CI.yml
24 |     with:
25 |       one_time_crates_io_token_secret: masked
26 | 
27 |   # After regular CI passes we publish to crates.io and push a git tag.
28 |   publish-and-tag:
29 |     needs: ci
30 |     runs-on: ubuntu-latest
31 |     permissions:
32 |       contents: write # So we can push a tag.
33 |     outputs:
34 |       VERSION: ${{ steps.version.outputs.VERSION }}
35 |       TAG_NAME: ${{ steps.version.outputs.TAG_NAME }}
36 |     steps:
37 |       - run: |
38 |           # See https://github.com/actions/runner/issues/643#issuecomment-708468716
39 |           # See https://github.com/actions/runner/issues/475#issuecomment-635775403
40 |           masked_secret=$(jq -r '.inputs.one_time_crates_io_token_secret' $GITHUB_EVENT_PATH)
41 |           echo "::add-mask::$masked_secret"
42 |       - uses: actions/checkout@v4
43 |       - run: cargo publish -p syntect
44 |         env:
45 |           CARGO_REGISTRY_TOKEN: ${{ inputs.one_time_crates_io_token_secret }}
46 |       - name: version
47 |         id: version
48 |         run: |
49 |           version=$(cargo read-manifest --manifest-path Cargo.toml | jq --raw-output .version)
50 |           echo "VERSION=${version}" >> $GITHUB_OUTPUT
51 |           echo "TAG_NAME=v${version}" >> $GITHUB_OUTPUT
52 |       - name: push tag
53 |         run: |
54 |           git tag ${{ steps.version.outputs.TAG_NAME }}
55 |           git push origin ${{ steps.version.outputs.TAG_NAME }}
56 | 
57 |   # Lastly, create a GitHub release.
58 |   release:
59 |     needs: publish-and-tag
60 |     runs-on: ubuntu-latest
61 |     permissions:
62 |       contents: write # So we can create a release.
63 |     steps:
64 |       - uses: actions/checkout@v4
65 |       - run: cargo install parse-changelog@0.6.4 --locked
66 |       - name: create release
67 |         env:
68 |           GH_TOKEN: ${{ github.token }}
69 |         run: |
70 |           notes="$(parse-changelog CHANGELOG.md ${{ needs.publish-and-tag.outputs.VERSION }})"
71 |           title="${{ needs.publish-and-tag.outputs.TAG_NAME }}"
72 |           gh release create --title "$title" --notes "$notes" ${{ needs.publish-and-tag.outputs.TAG_NAME }}
73 | 


--------------------------------------------------------------------------------
/examples/gendata.rs:
--------------------------------------------------------------------------------
 1 | //! This program is mainly intended for generating the dumps that are compiled in to
 2 | //! syntect, not as a helpful example for beginners.
 3 | //! Although it is a valid example for serializing syntaxes, you probably won't need
 4 | //! to do this yourself unless you want to cache your own compiled grammars.
 5 | //!
 6 | //! An example of how this script is used to generate the pack files included
 7 | //! with syntect can be found under `make packs` in the Makefile.
 8 | use std::env;
 9 | use syntect::dumps::*;
10 | use syntect::highlighting::ThemeSet;
11 | use syntect::parsing::SyntaxSetBuilder;
12 | 
13 | fn usage_and_exit() -> ! {
14 |     println!(
15 |         "USAGE: gendata synpack source-dir \
16 |               newlines.packdump nonewlines.packdump \
17 |               [metadata.packdump] [metadata extra-source-dir]\n       \
18 |               gendata themepack source-dir themepack.themedump"
19 |     );
20 |     ::std::process::exit(2);
21 | }
22 | 
23 | fn main() {
24 |     let mut a = env::args().skip(1);
25 |     match (a.next(), a.next(), a.next(), a.next(), a.next(), a.next()) {
26 |         (
27 |             Some(ref cmd),
28 |             Some(ref package_dir),
29 |             Some(ref packpath_newlines),
30 |             Some(ref packpath_nonewlines),
31 |             ref _option_metapath,
32 |             ref _option_metasource,
33 |         ) if cmd == "synpack" => {
34 |             let mut builder = SyntaxSetBuilder::new();
35 |             builder.add_plain_text_syntax();
36 |             builder.add_from_folder(package_dir, true).unwrap();
37 |             let ss = builder.build();
38 |             dump_to_uncompressed_file(&ss, packpath_newlines).unwrap();
39 | 
40 |             let mut builder_nonewlines = SyntaxSetBuilder::new();
41 |             builder_nonewlines.add_plain_text_syntax();
42 |             builder_nonewlines
43 |                 .add_from_folder(package_dir, false)
44 |                 .unwrap();
45 | 
46 |             #[cfg(feature = "metadata")]
47 |             {
48 |                 if let Some(metasource) = _option_metasource {
49 |                     builder_nonewlines
50 |                         .add_from_folder(metasource, false)
51 |                         .unwrap();
52 |                 }
53 |             }
54 | 
55 |             let ss_nonewlines = builder_nonewlines.build();
56 |             dump_to_uncompressed_file(&ss_nonewlines, packpath_nonewlines).unwrap();
57 | 
58 |             #[cfg(feature = "metadata")]
59 |             {
60 |                 if let Some(metapath) = _option_metapath {
61 |                     dump_to_file(&ss_nonewlines.metadata(), metapath).unwrap();
62 |                 }
63 |             }
64 |         }
65 |         (Some(ref s), Some(ref theme_dir), Some(ref packpath), ..) if s == "themepack" => {
66 |             let ts = ThemeSet::load_from_folder(theme_dir).unwrap();
67 |             dump_to_file(&ts, packpath).unwrap();
68 |         }
69 |         _ => usage_and_exit(),
70 |     }
71 | }
72 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
  1 | [package]
  2 | name = "syntect"
  3 | description = "library for high quality syntax highlighting and code intelligence using Sublime Text's grammars"
  4 | documentation = "https://docs.rs/syntect"
  5 | repository = "https://github.com/trishume/syntect"
  6 | keywords = ["syntax", "highlighting", "highlighter", "colouring", "parsing"]
  7 | categories = ["parser-implementations", "parsing", "text-processing"]
  8 | readme = "Readme.md"
  9 | license = "MIT"
 10 | version = "5.3.0" # remember to update html_root_url
 11 | authors = ["Tristan Hume <tristan@thume.ca>"]
 12 | edition = "2021"
 13 | exclude = [
 14 |     "testdata/*",
 15 |     "/scripts/*",
 16 |     "/Makefile",
 17 |     "/codecov.yml"
 18 | ]
 19 | 
 20 | [package.metadata.docs.rs]
 21 | # Toggle on extra features that aren't on by default
 22 | features = ["metadata"]
 23 | 
 24 | [dependencies]
 25 | yaml-rust = { package = "yaml-rust2", version = "0.10.4", optional = true, default-features = false }
 26 | onig = { version = "6.5.1", optional = true, default-features = false }
 27 | fancy-regex = { version = "0.16.2", optional = true }
 28 | walkdir = "2.0"
 29 | regex-syntax = { version = "0.8", optional = true }
 30 | plist = { version = "1.3", optional = true }
 31 | bincode = { version = "1.0", optional = true }
 32 | flate2 = { version = "1.0", optional = true }
 33 | fnv = { version = "1.0", optional = true }
 34 | serde = "1.0"
 35 | serde_derive = "1.0"
 36 | serde_json = { version = "1.0", optional = true }
 37 | once_cell = "1.8"
 38 | thiserror = "2.0.12"
 39 | 
 40 | [dev-dependencies]
 41 | criterion = { version = "0.3", features = [ "html_reports" ] }
 42 | rayon = "1.0.0"
 43 | regex = "1.0"
 44 | getopts = "0.2"
 45 | pretty_assertions = "0.6"
 46 | rustup-toolchain = "0.1.5"
 47 | rustdoc-json = "0.9.7"
 48 | public-api = "0.50.1"
 49 | serde_json = "1.0"
 50 | 
 51 | [features]
 52 | 
 53 | # Dump loading using flate2
 54 | dump-load = ["dep:flate2", "dep:bincode"]
 55 | # Dump creation using flate2
 56 | dump-create = ["dep:flate2", "dep:bincode"]
 57 | 
 58 | regex-fancy = ["dep:fancy-regex"]
 59 | regex-onig = ["dep:onig"]
 60 | 
 61 | parsing = ["dep:regex-syntax", "dep:fnv", "dump-create", "dump-load"]
 62 | 
 63 | # Support for .tmPreferenes metadata files (indentation, comment syntax, etc)
 64 | metadata = ["parsing", "plist-load", "dep:serde_json"]
 65 | 
 66 | # Enables inclusion of the default syntax packages.
 67 | default-syntaxes = ["parsing", "dump-load"]
 68 | # Enables inclusion of the default theme packages.
 69 | default-themes = ["dump-load"]
 70 | 
 71 | html = ["parsing"]
 72 | # Support for parsing .tmTheme files and .tmPreferences files
 73 | plist-load = ["dep:plist", "dep:serde_json"]
 74 | # Support for parsing .sublime-syntax files
 75 | yaml-load = ["dep:yaml-rust", "parsing"]
 76 | 
 77 | default-onig = ["parsing", "default-syntaxes", "default-themes", "html", "plist-load", "yaml-load", "dump-load", "dump-create", "regex-onig"]
 78 | # In order to switch to the fancy-regex engine, disable default features then add the default-fancy feature
 79 | default-fancy = ["parsing", "default-syntaxes", "default-themes", "html", "plist-load", "yaml-load", "dump-load", "dump-create", "regex-fancy"]
 80 | default = ["default-onig"]
 81 | 
 82 | # [profile.release]
 83 | # debug = true
 84 | 
 85 | [profile.dev.package]
 86 | aho-corasick.opt-level = 2
 87 | fancy-regex.opt-level = 2
 88 | regex-automata.opt-level = 2
 89 | regex-syntax.opt-level = 2
 90 | 
 91 | [lib]
 92 | bench = false
 93 | 
 94 | [[bench]]
 95 | name = "highlighting"
 96 | harness = false
 97 | 
 98 | [[bench]]
 99 | name = "load_and_highlight"
100 | harness = false
101 | 
102 | [[bench]]
103 | name = "loading"
104 | harness = false
105 | 
106 | [[bench]]
107 | name = "parsing"
108 | harness = false
109 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! Welcome to the syntect docs.
 2 | //!
 3 | //! Much more info about syntect is available on the [Github Page](https://github.com/trishume/syntect).
 4 | //!
 5 | //! May I suggest that you start by reading the `Readme.md` file in the main repo.
 6 | //! Once you're done with that you can look at the docs for [`parsing::SyntaxSet`]
 7 | //! and for the [`easy`] module.
 8 | //!
 9 | //! Almost everything in syntect is divided up into either the [`parsing`] module
10 | //! for turning text into text annotated with scopes, and the [`highlighting`] module
11 | //! for turning annotated text into styled/colored text.
12 | //!
13 | //! Some docs have example code but a good place to look is the `syncat` example as
14 | //! well as the source code for the [`easy`] module in `easy.rs` as that shows how to
15 | //! plug the various parts together for common use cases.
16 | //!
17 | //! [`parsing::SyntaxSet`]: parsing/struct.SyntaxSet.html
18 | //! [`easy`]: easy/index.html
19 | //! [`parsing`]: parsing/index.html
20 | //! [`highlighting`]: highlighting/index.html
21 | 
22 | #![doc(html_root_url = "https://docs.rs/syntect/5.3.0")]
23 | 
24 | #[cfg(test)]
25 | #[macro_use]
26 | extern crate pretty_assertions;
27 | 
28 | #[cfg(any(feature = "dump-load", feature = "dump-create"))]
29 | pub mod dumps;
30 | #[cfg(feature = "parsing")]
31 | pub mod easy;
32 | #[cfg(feature = "html")]
33 | mod escape;
34 | pub mod highlighting;
35 | #[cfg(feature = "html")]
36 | pub mod html;
37 | pub mod parsing;
38 | pub mod util;
39 | mod utils;
40 | 
41 | use std::io::Error as IoError;
42 | 
43 | #[cfg(feature = "plist-load")]
44 | use crate::highlighting::{ParseThemeError, SettingsError};
45 | 
46 | /// An error enum for all things that can go wrong within syntect.
47 | #[derive(Debug, thiserror::Error)]
48 | #[non_exhaustive]
49 | pub enum Error {
50 |     /// An error occurred while loading a syntax or theme
51 |     #[error("Loading error: {0}")]
52 |     LoadingError(#[from] LoadingError),
53 |     /// An error occurred while parsing
54 |     #[cfg(feature = "parsing")]
55 |     #[error("Parsing error: {0}")]
56 |     ParsingError(#[from] crate::parsing::ParsingError),
57 |     /// Scope error
58 |     #[error("Scope error: {0}")]
59 |     ScopeError(#[from] crate::parsing::ScopeError),
60 |     /// Formatting error
61 |     #[error("Formatting error: {0}")]
62 |     Fmt(#[from] std::fmt::Error),
63 |     /// IO Error
64 |     #[error("IO Error: {0}")]
65 |     Io(#[from] IoError),
66 | }
67 | 
68 | /// Common error type used by syntax and theme loading
69 | #[derive(Debug, thiserror::Error)]
70 | #[non_exhaustive]
71 | pub enum LoadingError {
72 |     /// error finding all the files in a directory
73 |     #[error("error finding all the files in a directory: {0}")]
74 |     WalkDir(#[from] walkdir::Error),
75 |     /// error reading a file
76 |     #[error("error reading a file: {0}")]
77 |     Io(#[from] IoError),
78 |     /// a syntax file was invalid in some way
79 |     #[cfg(all(feature = "yaml-load", feature = "parsing"))]
80 |     #[error("{1}: {0}")]
81 |     ParseSyntax(#[source] crate::parsing::ParseSyntaxError, String),
82 |     /// a metadata file was invalid in some way
83 |     #[cfg(feature = "metadata")]
84 |     #[error("Failed to parse JSON")]
85 |     ParseMetadata(#[from] serde_json::Error),
86 |     /// a theme file was invalid in some way
87 |     #[cfg(feature = "plist-load")]
88 |     #[error("Invalid syntax theme")]
89 |     ParseTheme(#[from] ParseThemeError),
90 |     /// a theme's Plist syntax was invalid in some way
91 |     #[cfg(feature = "plist-load")]
92 |     #[error("Invalid syntax theme settings")]
93 |     ReadSettings(#[from] SettingsError),
94 |     /// A path given to a method was invalid.
95 |     /// Possibly because it didn't reference a file or wasn't UTF-8.
96 |     #[error("Invalid path")]
97 |     BadPath,
98 | }
99 | 


--------------------------------------------------------------------------------
/.github/workflows/CI.yml:
--------------------------------------------------------------------------------
  1 | name: CI
  2 | 
  3 | on:
  4 |   workflow_call: # From .github/workflows/Release.yml
  5 |   workflow_dispatch:
  6 |   push:
  7 |     branches: [ master ]
  8 |   pull_request:
  9 |     branches: [ master ]
 10 | 
 11 | env:
 12 |   CARGO_TERM_COLOR: always
 13 | 
 14 | jobs:
 15 |   min_version:
 16 |     name: Minimum supported rust version
 17 |     runs-on: ubuntu-24.04
 18 |     steps:
 19 |     - uses: actions/checkout@v4
 20 |     - uses: dtolnay/rust-toolchain@master
 21 |       with:
 22 |         toolchain: stable minus 2 releases # MSRV policy = last three versions of stable
 23 |         components: clippy, rustfmt
 24 | 
 25 |     - name: Run cargo fmt --check
 26 |       run: cargo fmt -- --check
 27 | 
 28 |     - name: Run cargo clippy
 29 |       run: |
 30 |         # Must run before `cargo check` until we use Rust 1.52
 31 |         # See https://github.com/rust-lang/rust-clippy/issues/4612
 32 |         cargo clippy --all-targets --all-features -- \
 33 |           --allow clippy::unknown_clippy_lints \
 34 |           --allow clippy::unnecessary_cast \
 35 |           --allow clippy::block_in_if_condition_stmt
 36 |         # Prevent regressions of https://github.com/trishume/syntect/issues/98
 37 |         cargo clippy --all-features --lib -- --deny clippy::panic
 38 |     - name: Run cargo check
 39 |       run: |
 40 |         cargo check --all-features --all-targets
 41 |         # Check that if some other crate in the downstream dependency tree
 42 |         # enables serde's "derive" feature, syntect still builds.
 43 |         cargo check --all-features --features serde/derive
 44 | 
 45 |   documentation:
 46 |     name: Documentation checks
 47 |     runs-on: ubuntu-latest
 48 |     steps:
 49 |     - uses: actions/checkout@v4
 50 |     - run: RUSTDOCFLAGS='--deny warnings' cargo doc --no-deps --document-private-items --all-features
 51 | 
 52 |   bat-tests:
 53 |     name: Run bat syntax regression tests
 54 |     runs-on: ubuntu-latest
 55 |     steps:
 56 |     - uses: actions/checkout@v4
 57 |       with:
 58 |         path: 'syntect'
 59 |     - uses: actions/checkout@v4
 60 |       with:
 61 |         repository: 'sharkdp/bat'
 62 |         path: 'bat'
 63 |         ref: master
 64 |         submodules: true
 65 |     - name: bat/tests/syntax-tests/regression_test.sh
 66 |       run: |
 67 |         cd bat
 68 |         sed -i 's%\[dependencies.syntect\]%[dependencies.syntect]\npath = "../syntect"%' Cargo.toml
 69 |         cargo build --release # Build bat so we can update the assets
 70 |         PATH=target/release:$PATH ./assets/create.sh # Update assets with newly built bat
 71 |         cargo build --release # Build bat using the newly updated assets
 72 |         PATH=./target/release:$PATH tests/syntax-tests/regression_test.sh
 73 | 
 74 |   build-and-test:
 75 |     name: Build and test
 76 |     runs-on: ubuntu-latest
 77 |     steps:
 78 |     - uses: actions/checkout@v4
 79 |       with:
 80 |         submodules: true
 81 |     - uses: dtolnay/rust-toolchain@stable
 82 |     - name: Build
 83 |       run: |
 84 |         cargo build
 85 |     - name: Run tests
 86 |       run: |
 87 |         cargo test --features metadata
 88 |     - name: Run tests with fancy
 89 |       run: |
 90 |         # Run these tests in release mode since they're slow as heck otherwise
 91 |         cargo test --features default-fancy --no-default-features --release
 92 |     - name: Ensure highlight works without 'plist-load' and 'yaml-load' features
 93 |       run: |
 94 |         cargo run --example synhtml --no-default-features --features html,default-syntaxes,default-themes,regex-onig -- examples/synhtml.rs
 95 |     - name: Run tests with 'default-syntaxes' but without 'default-themes'
 96 |       run: |
 97 |         cargo test --lib --example synstats --no-default-features --features default-syntaxes,yaml-load,regex-onig
 98 |     - name: Run tests without default features
 99 |       run: |
100 |         cargo test --lib --no-default-features
101 |     - name: make stuff
102 |       run: |
103 |         make assets
104 |         make syntest
105 |         make syntest-fancy
106 |     - name: Docs
107 |       run: |
108 |         cargo doc
109 | 
110 |   check-feature-powerset:
111 |     name: Check feature powerset
112 |     runs-on: ubuntu-latest
113 |     steps:
114 |     - uses: actions/checkout@v4
115 |     - uses: dtolnay/rust-toolchain@stable
116 |     - uses: taiki-e/install-action@v2
117 |       with:
118 |         tool: cargo-hack
119 |     # Ensure that `cargo check` works across many different feature sets
120 |     - name: Check feature powerset
121 |       run: |
122 |         cargo hack --feature-powerset --depth=2 --features=regex-fancy --exclude-features=regex-onig check
123 | 


--------------------------------------------------------------------------------
/src/highlighting/theme_set.rs:
--------------------------------------------------------------------------------
  1 | use super::super::LoadingError;
  2 | #[cfg(feature = "plist-load")]
  3 | use super::settings::*;
  4 | use super::theme::Theme;
  5 | use serde_derive::{Deserialize, Serialize};
  6 | use std::collections::BTreeMap;
  7 | use std::path::{Path, PathBuf};
  8 | 
  9 | #[derive(Debug, Default, Serialize, Deserialize)]
 10 | pub struct ThemeSet {
 11 |     // This is a `BTreeMap` because they're faster than hashmaps on small sets
 12 |     pub themes: BTreeMap<String, Theme>,
 13 | }
 14 | 
 15 | /// A set of themes, includes convenient methods for loading and discovering themes.
 16 | impl ThemeSet {
 17 |     /// Creates an empty set
 18 |     pub fn new() -> ThemeSet {
 19 |         ThemeSet::default()
 20 |     }
 21 | 
 22 |     /// Returns all the themes found in a folder
 23 |     ///
 24 |     /// This is good for enumerating before loading one with [`get_theme`](#method.get_theme)
 25 |     pub fn discover_theme_paths<P: AsRef<Path>>(folder: P) -> Result<Vec<PathBuf>, LoadingError> {
 26 |         let mut themes = Vec::new();
 27 |         for entry in crate::utils::walk_dir(folder) {
 28 |             let entry = entry.map_err(LoadingError::WalkDir)?;
 29 |             if entry.path().is_file()
 30 |                 && entry
 31 |                     .path()
 32 |                     .extension()
 33 |                     .is_some_and(|e| e.eq_ignore_ascii_case("tmTheme"))
 34 |             {
 35 |                 themes.push(entry.path().to_owned());
 36 |             }
 37 |         }
 38 |         Ok(themes)
 39 |     }
 40 | 
 41 |     /// Loads a theme given a path to a .tmTheme file
 42 |     #[cfg(feature = "plist-load")]
 43 |     pub fn get_theme<P: AsRef<Path>>(path: P) -> Result<Theme, LoadingError> {
 44 |         let file = std::fs::File::open(path)?;
 45 |         let mut file = std::io::BufReader::new(file);
 46 |         Self::load_from_reader(&mut file)
 47 |     }
 48 | 
 49 |     /// Loads a theme given a readable stream
 50 |     #[cfg(feature = "plist-load")]
 51 |     pub fn load_from_reader<R: std::io::BufRead + std::io::Seek>(
 52 |         r: &mut R,
 53 |     ) -> Result<Theme, LoadingError> {
 54 |         Ok(Theme::parse_settings(read_plist(r)?)?)
 55 |     }
 56 | 
 57 |     /// Generate a `ThemeSet` from all themes in a folder
 58 |     #[cfg(feature = "plist-load")]
 59 |     pub fn load_from_folder<P: AsRef<Path>>(folder: P) -> Result<ThemeSet, LoadingError> {
 60 |         let mut theme_set = Self::new();
 61 |         theme_set.add_from_folder(folder)?;
 62 |         Ok(theme_set)
 63 |     }
 64 | 
 65 |     /// Load all the themes in the folder into this `ThemeSet`
 66 |     #[cfg(feature = "plist-load")]
 67 |     pub fn add_from_folder<P: AsRef<Path>>(&mut self, folder: P) -> Result<(), LoadingError> {
 68 |         let paths = Self::discover_theme_paths(folder)?;
 69 |         for p in &paths {
 70 |             let theme = Self::get_theme(p)?;
 71 |             let basename = p
 72 |                 .file_stem()
 73 |                 .and_then(|x| x.to_str())
 74 |                 .ok_or(LoadingError::BadPath)?;
 75 |             self.themes.insert(basename.to_owned(), theme);
 76 |         }
 77 | 
 78 |         Ok(())
 79 |     }
 80 | }
 81 | 
 82 | #[cfg(test)]
 83 | mod tests {
 84 |     use crate::highlighting::{Color, ThemeSet};
 85 |     #[cfg(feature = "plist-load")]
 86 |     #[test]
 87 |     fn can_parse_common_themes() {
 88 |         let themes = ThemeSet::load_from_folder("testdata").unwrap();
 89 |         let all_themes: Vec<&str> = themes.themes.keys().map(|x| &**x).collect();
 90 |         assert!(all_themes.contains(&"base16-ocean.dark"));
 91 | 
 92 |         println!("{:?}", all_themes);
 93 | 
 94 |         let theme = ThemeSet::get_theme("testdata/spacegray/base16-ocean.dark.tmTheme").unwrap();
 95 |         assert_eq!(theme.name.unwrap(), "Base16 Ocean Dark");
 96 |         assert_eq!(
 97 |             theme.settings.selection.unwrap(),
 98 |             Color {
 99 |                 r: 0x4f,
100 |                 g: 0x5b,
101 |                 b: 0x66,
102 |                 a: 0xff,
103 |             }
104 |         );
105 |         assert_eq!(
106 |             theme.scopes[0].style.foreground.unwrap(),
107 |             Color {
108 |                 r: 0xc0,
109 |                 g: 0xc5,
110 |                 b: 0xce,
111 |                 a: 0xff,
112 |             }
113 |         );
114 |         assert_eq!(
115 |             theme.settings.gutter_foreground.unwrap(),
116 |             Color {
117 |                 r: 0x65,
118 |                 g: 0x73,
119 |                 b: 0x7e,
120 |                 a: 0xff,
121 |             }
122 |         );
123 |         assert_eq!(
124 |             theme.settings.gutter.unwrap(),
125 |             Color {
126 |                 r: 0x34,
127 |                 g: 0x3d,
128 |                 b: 0x46,
129 |                 a: 0xff,
130 |             }
131 |         );
132 |         // unreachable!();
133 |     }
134 | }
135 | 


--------------------------------------------------------------------------------
/examples/synhtml-css-classes.rs:
--------------------------------------------------------------------------------
  1 | //! Generates highlighted HTML with CSS classes for a Rust and a C++ source.
  2 | //! Run with ```cargo run --example synhtml-css-classes```
  3 | //!
  4 | //! will generate 4 files as usage example
  5 | //! * synhtml-css-classes.html
  6 | //! * synhtml-css-classes.css
  7 | //! * theme-dark.css
  8 | //! * theme-light.css
  9 | //!
 10 | //! You can open the html with a web browser and change between light and dark
 11 | //! mode.
 12 | use syntect::highlighting::ThemeSet;
 13 | use syntect::html::css_for_theme_with_class_style;
 14 | use syntect::html::{ClassStyle, ClassedHTMLGenerator};
 15 | use syntect::parsing::SyntaxSet;
 16 | use syntect::util::LinesWithEndings;
 17 | 
 18 | use std::fs::File;
 19 | use std::io::{BufWriter, Write};
 20 | use std::path::Path;
 21 | 
 22 | fn main() -> Result<(), std::io::Error> {
 23 |     // ---------------------------------------------------------------------------------------------
 24 |     // generate html
 25 |     let ss = SyntaxSet::load_defaults_newlines();
 26 | 
 27 |     let html_file = File::create(Path::new("synhtml-css-classes.html"))?;
 28 |     let mut html = BufWriter::new(&html_file);
 29 | 
 30 |     // write html header
 31 |     writeln!(html, "<!DOCTYPE html>")?;
 32 |     writeln!(html, "<html>")?;
 33 |     writeln!(html, "  <head>")?;
 34 |     writeln!(html, "    <title>synhtml-css-classes.rs</title>")?;
 35 |     writeln!(
 36 |         html,
 37 |         "    <link rel=\"stylesheet\" href=\"synhtml-css-classes.css\">"
 38 |     )?;
 39 |     writeln!(html, "  </head>")?;
 40 |     writeln!(html, "  <body>")?;
 41 | 
 42 |     // Rust
 43 |     let code_rs = "// Rust source
 44 | fn main() {
 45 |     println!(\"Hello World!\");
 46 | }";
 47 | 
 48 |     let sr_rs = ss.find_syntax_by_extension("rs").unwrap();
 49 |     let mut rs_html_generator =
 50 |         ClassedHTMLGenerator::new_with_class_style(sr_rs, &ss, ClassStyle::Spaced);
 51 |     for line in LinesWithEndings::from(code_rs) {
 52 |         rs_html_generator
 53 |             .parse_html_for_line_which_includes_newline(line)
 54 |             .unwrap();
 55 |     }
 56 |     let html_rs = rs_html_generator.finalize();
 57 | 
 58 |     writeln!(html, "<pre class=\"code\">")?;
 59 |     writeln!(html, "{}", html_rs)?;
 60 |     writeln!(html, "</pre>")?;
 61 | 
 62 |     // C++
 63 |     let code_cpp = "/* C++ source */
 64 | #include <iostream>
 65 | int main() {
 66 |     std::cout << \"Hello World!\" << std::endl;
 67 | }";
 68 | 
 69 |     let sr_cpp = ss.find_syntax_by_extension("cpp").unwrap();
 70 |     let mut cpp_html_generator =
 71 |         ClassedHTMLGenerator::new_with_class_style(sr_cpp, &ss, ClassStyle::Spaced);
 72 |     for line in LinesWithEndings::from(code_cpp) {
 73 |         cpp_html_generator
 74 |             .parse_html_for_line_which_includes_newline(line)
 75 |             .unwrap();
 76 |     }
 77 |     let html_cpp = cpp_html_generator.finalize();
 78 | 
 79 |     writeln!(html, "<pre class=\"code\">")?;
 80 |     writeln!(html, "{}", html_cpp)?;
 81 |     writeln!(html, "</pre>")?;
 82 | 
 83 |     // write html end
 84 |     writeln!(html, "  </body>")?;
 85 |     writeln!(html, "</html>")?;
 86 | 
 87 |     // ---------------------------------------------------------------------------------------------
 88 |     // generate css
 89 |     let css = "@import url(\"theme-light.css\") (prefers-color-scheme: light);
 90 |     @import url(\"theme-dark.css\") (prefers-color-scheme: dark);
 91 | 
 92 |     @media (prefers-color-scheme: dark) {
 93 |       body {
 94 |         background-color: gray;
 95 |       }
 96 |     }
 97 |     @media (prefers-color-scheme: light) {
 98 |       body {
 99 |         background-color: lightgray;
100 |       }
101 |     }";
102 | 
103 |     let css_file = File::create(Path::new("synhtml-css-classes.css"))?;
104 |     let mut css_writer = BufWriter::new(&css_file);
105 | 
106 |     writeln!(css_writer, "{}", css)?;
107 | 
108 |     // ---------------------------------------------------------------------------------------------
109 |     // generate css files for themes
110 |     let ts = ThemeSet::load_defaults();
111 | 
112 |     // create dark color scheme css
113 |     let dark_theme = &ts.themes["Solarized (dark)"];
114 |     let css_dark_file = File::create(Path::new("theme-dark.css"))?;
115 |     let mut css_dark_writer = BufWriter::new(&css_dark_file);
116 | 
117 |     let css_dark = css_for_theme_with_class_style(dark_theme, ClassStyle::Spaced).unwrap();
118 |     writeln!(css_dark_writer, "{}", css_dark)?;
119 | 
120 |     // create light color scheme css
121 |     let light_theme = &ts.themes["Solarized (light)"];
122 |     let css_light_file = File::create(Path::new("theme-light.css"))?;
123 |     let mut css_light_writer = BufWriter::new(&css_light_file);
124 | 
125 |     let css_light = css_for_theme_with_class_style(light_theme, ClassStyle::Spaced).unwrap();
126 |     writeln!(css_light_writer, "{}", css_light)?;
127 | 
128 |     Ok(())
129 | }
130 | 


--------------------------------------------------------------------------------
/testdata/test3.html:
--------------------------------------------------------------------------------
 1 | <pre style="background-color:#2b303b;">
 2 | <span style="color:#c0c5ce;">&lt;</span><span style="color:#bf616a;">script </span><span style="color:#d08770;">type</span><span style="color:#c0c5ce;">=&quot;</span><span style="color:#a3be8c;">text/javascript</span><span style="color:#c0c5ce;">&quot;&gt;
 3 | </span><span style="color:#c0c5ce;">  </span><span style="color:#b48ead;">var </span><span style="color:#bf616a;">lol </span><span style="color:#c0c5ce;">= &quot;</span><span style="color:#a3be8c;">JS nesting</span><span style="color:#c0c5ce;">&quot;;
 4 | </span><span style="color:#c0c5ce;">  </span><span style="color:#b48ead;">class </span><span style="color:#ebcb8b;">WithES6 </span><span style="color:#b48ead;">extends </span><span style="color:#a3be8c;">THREE</span><span style="color:#eff1f5;">.</span><span style="color:#a3be8c;">Mesh </span><span style="color:#eff1f5;">{
 5 | </span><span style="color:#eff1f5;">    </span><span style="color:#b48ead;">static </span><span style="color:#8fa1b3;">highQuality</span><span style="color:#eff1f5;">() { </span><span style="color:#65737e;">// such classes
 6 | </span><span style="color:#eff1f5;">      </span><span style="color:#b48ead;">return </span><span style="color:#bf616a;">this</span><span style="color:#eff1f5;">.</span><span style="color:#8fa1b3;">toString</span><span style="color:#eff1f5;">();
 7 | </span><span style="color:#eff1f5;">    }
 8 | </span><span style="color:#eff1f5;">  }
 9 | </span><span style="color:#c0c5ce;">  </span><span style="color:#ab7967;">&lt;%
10 | </span><span style="color:#c0c5ce;">    </span><span style="color:#65737e;"># The outer syntax is HTML (Rails) detected from the .erb extension
11 | </span><span style="color:#c0c5ce;">    </span><span style="color:#96b5b4;">puts </span><span style="color:#c0c5ce;">&quot;</span><span style="color:#a3be8c;">Ruby </span><span style="color:#ab7967;">#{</span><span style="color:#c0c5ce;">&#39;</span><span style="color:#a3be8c;">nesting</span><span style="color:#c0c5ce;">&#39; * </span><span style="color:#d08770;">2</span><span style="color:#ab7967;">}</span><span style="color:#c0c5ce;">&quot;
12 | </span><span style="color:#c0c5ce;">    here = &lt;&lt;-WOWCOOL + </span><span style="color:#bf616a;">CORRECTLY_DOES_NOT_HIGHLIGHT_REST_OF_LINE
13 | </span><span style="color:#a3be8c;">      high quality parsing even supports custom heredoc endings
14 | </span><span style="color:#a3be8c;">      </span><span style="color:#ab7967;">#{
15 | </span><span style="color:#a3be8c;">      nested </span><span style="color:#c0c5ce;">= </span><span style="color:#d08770;">5 </span><span style="color:#c0c5ce;">* &lt;&lt;-ZOMG
16 | </span><span style="color:#a3be8c;">        nested heredocs! (no highlighting: 5 * 6, yes highlighting: </span><span style="color:#ab7967;">#{</span><span style="color:#d08770;">5 </span><span style="color:#c0c5ce;">* </span><span style="color:#d08770;">6</span><span style="color:#ab7967;">}</span><span style="color:#a3be8c;">)
17 | </span><span style="color:#c0c5ce;">      ZOMG
18 | </span><span style="color:#a3be8c;">      </span><span style="color:#ab7967;">}
19 | </span><span style="color:#c0c5ce;">    WOWCOOL
20 | </span><span style="color:#c0c5ce;">    sql = &lt;&lt;-SQL
21 | </span><span style="color:#a3be8c;">      </span><span style="color:#b48ead;">select </span><span style="color:#c0c5ce;">* </span><span style="color:#b48ead;">from</span><span style="color:#a3be8c;"> heredocs </span><span style="color:#b48ead;">where</span><span style="color:#a3be8c;"> there_are_special_heredoc_names </span><span style="color:#c0c5ce;">= </span><span style="color:#d08770;">true
22 | </span><span style="color:#c0c5ce;">    SQL
23 | </span><span style="color:#c0c5ce;">  </span><span style="color:#ab7967;">%&gt;
24 | </span><span style="color:#c0c5ce;">&lt;/</span><span style="color:#bf616a;">script</span><span style="color:#c0c5ce;">&gt;
25 | </span><span style="color:#c0c5ce;">&lt;</span><span style="color:#bf616a;">style </span><span style="color:#d08770;">type</span><span style="color:#c0c5ce;">=&quot;</span><span style="color:#a3be8c;">text/css</span><span style="color:#c0c5ce;">&quot;&gt;
26 | </span><span style="color:#c0c5ce;">  </span><span style="color:#65737e;">/* the HTML syntax also supports CSS of course */
27 | </span><span style="color:#b48ead;">  </span><span style="color:#8fa1b3;">.</span><span style="color:#d08770;">stuff </span><span style="color:#8fa1b3;">#wow </span><span style="color:#c0c5ce;">{
28 | </span><span style="color:#c0c5ce;">    border: </span><span style="color:#d08770;">5px </span><span style="color:#96b5b4;">#ffffff</span><span style="color:#c0c5ce;">;
29 | </span><span style="color:#c0c5ce;">    background: </span><span style="color:#96b5b4;">url</span><span style="color:#c0c5ce;">(&quot;</span><span style="color:#a3be8c;">wow</span><span style="color:#c0c5ce;">&quot;);
30 | </span><span style="color:#c0c5ce;">  }
31 | </span><span style="color:#c0c5ce;">&lt;/</span><span style="color:#bf616a;">style</span><span style="color:#c0c5ce;">&gt;
32 | </span></pre>
33 | 


--------------------------------------------------------------------------------
/testdata/JSON.sublime-syntax:
--------------------------------------------------------------------------------
  1 | %YAML 1.2
  2 | ---
  3 | name: JSON
  4 | file_extensions:
  5 |   - json
  6 |   - sublime-settings
  7 |   - sublime-menu
  8 |   - sublime-keymap
  9 |   - sublime-mousemap
 10 |   - sublime-theme
 11 |   - sublime-build
 12 |   - sublime-project
 13 |   - sublime-completions
 14 |   - sublime-commands
 15 |   - sublime-macro
 16 |   - sublime-color-scheme
 17 |   - ipynb
 18 |   - Pipfile.lock
 19 | scope: source.json
 20 | contexts:
 21 |   prototype:
 22 |     - include: comments
 23 |   main:
 24 |     - include: value
 25 |   value:
 26 |     - include: constant
 27 |     - include: number
 28 |     - include: string
 29 |     - include: array
 30 |     - include: object
 31 |   array:
 32 |     - match: '\['
 33 |       scope: punctuation.section.sequence.begin.json
 34 |       push:
 35 |         - meta_scope: meta.sequence.json
 36 |         - match: '\]'
 37 |           scope: punctuation.section.sequence.end.json
 38 |           pop: true
 39 |         - include: value
 40 |         - match: ","
 41 |           scope: punctuation.separator.sequence.json
 42 |         - match: '[^\s\]]'
 43 |           scope: invalid.illegal.expected-sequence-separator.json
 44 |   comments:
 45 |     - match: /\*\*(?!/)
 46 |       scope: punctuation.definition.comment.json
 47 |       push:
 48 |         - meta_scope: comment.block.documentation.json
 49 |         - meta_include_prototype: false
 50 |         - match: \*/
 51 |           pop: true
 52 |         - match: ^\s*(\*)(?!/)
 53 |           captures:
 54 |             1: punctuation.definition.comment.json
 55 |     - match: /\*
 56 |       scope: punctuation.definition.comment.json
 57 |       push:
 58 |         - meta_scope: comment.block.json
 59 |         - meta_include_prototype: false
 60 |         - match: \*/
 61 |           pop: true
 62 |     - match: (//).*$\n?
 63 |       scope: comment.line.double-slash.js
 64 |       captures:
 65 |         1: punctuation.definition.comment.json
 66 |   constant:
 67 |     - match: \b(?:true|false|null)\b
 68 |       scope: constant.language.json
 69 |   number:
 70 |     # handles integer and decimal numbers
 71 |     - match: -?(?:0|[1-9]\d*)(?:(?:(\.)\d+)(?:[eE][-+]?\d+)?|(?:[eE][-+]?\d+))
 72 |       scope: constant.numeric.float.decimal.json
 73 |       captures:
 74 |         1: punctuation.separator.decimal.json
 75 |     - match: -?(?:0|[1-9]\d*)
 76 |       scope: constant.numeric.integer.decimal.json
 77 |   object:
 78 |     # a JSON object
 79 |     - match: '\{'
 80 |       scope: punctuation.section.mapping.begin.json
 81 |       push:
 82 |         - meta_scope: meta.mapping.json
 83 |         - match: '\}'
 84 |           scope: punctuation.section.mapping.end.json
 85 |           pop: true
 86 |         - match: '"'
 87 |           scope: punctuation.definition.string.begin.json
 88 |           push:
 89 |             - clear_scopes: 1
 90 |             - meta_scope: meta.mapping.key.json string.quoted.double.json
 91 |             - meta_include_prototype: false
 92 |             - include: inside-string
 93 |         - match: ":"
 94 |           scope: punctuation.separator.mapping.key-value.json
 95 |           push:
 96 |             - match: ',|\s?(?=\})'
 97 |               scope: invalid.illegal.expected-mapping-value.json
 98 |               pop: true
 99 |             - match: (?=\S)
100 |               set:
101 |                 - clear_scopes: 1
102 |                 - meta_scope: meta.mapping.value.json
103 |                 - include: value
104 |                 - match: ''
105 |                   set:
106 |                     - match: ','
107 |                       scope: punctuation.separator.mapping.pair.json
108 |                       pop: true
109 |                     - match: \s*(?=\})
110 |                       pop: true
111 |                     - match: \s(?!/[/*])(?=[^\s,])|[^\s,]
112 |                       scope: invalid.illegal.expected-mapping-separator.json
113 |                       pop: true
114 |         - match: '[^\s\}]'
115 |           scope: invalid.illegal.expected-mapping-key.json
116 |   string:
117 |     - match: '"'
118 |       scope: punctuation.definition.string.begin.json
119 |       push: inside-string
120 |   inside-string:
121 |     - meta_scope: string.quoted.double.json
122 |     - meta_include_prototype: false
123 |     - match: '"'
124 |       scope: punctuation.definition.string.end.json
125 |       pop: true
126 |     - include: string-escape
127 |     - match: $\n?
128 |       scope: invalid.illegal.unclosed-string.json
129 |       pop: true
130 |   string-escape:
131 |     - match: |-
132 |         (?x:                # turn on extended mode
133 |           \\                # a literal backslash
134 |           (?:               # ...followed by...
135 |             ["\\/bfnrt]     # one of these characters
136 |             |               # ...or...
137 |             u               # a u
138 |             [0-9a-fA-F]{4}  # and four hex digits
139 |           )
140 |         )
141 |       scope: constant.character.escape.json
142 |     - match: \\.
143 |       scope: invalid.illegal.unrecognized-string-escape.json
144 | 


--------------------------------------------------------------------------------
/examples/syncat.rs:
--------------------------------------------------------------------------------
  1 | use getopts::Options;
  2 | use std::borrow::Cow;
  3 | use std::io::BufRead;
  4 | use std::path::Path;
  5 | use syntect::dumps::{dump_to_file, from_dump_file};
  6 | use syntect::easy::HighlightFile;
  7 | use syntect::highlighting::{Style, Theme, ThemeSet};
  8 | use syntect::parsing::SyntaxSet;
  9 | use syntect::util::as_24_bit_terminal_escaped;
 10 | 
 11 | fn load_theme(tm_file: &str, enable_caching: bool) -> Theme {
 12 |     let tm_path = Path::new(tm_file);
 13 | 
 14 |     if enable_caching {
 15 |         let tm_cache = tm_path.with_extension("tmdump");
 16 | 
 17 |         if tm_cache.exists() {
 18 |             from_dump_file(tm_cache).unwrap()
 19 |         } else {
 20 |             let theme = ThemeSet::get_theme(tm_path).unwrap();
 21 |             dump_to_file(&theme, tm_cache).unwrap();
 22 |             theme
 23 |         }
 24 |     } else {
 25 |         ThemeSet::get_theme(tm_path).unwrap()
 26 |     }
 27 | }
 28 | 
 29 | fn main() {
 30 |     let args: Vec<String> = std::env::args().collect();
 31 |     let mut opts = Options::new();
 32 |     opts.optflag("l", "list-file-types", "Lists supported file types");
 33 |     opts.optflag(
 34 |         "L",
 35 |         "list-embedded-themes",
 36 |         "Lists themes present in the executable",
 37 |     );
 38 |     opts.optopt("t", "theme-file", "THEME_FILE", "Theme file to use. May be a path, or an embedded theme. Embedded themes will take precendence. Default: base16-ocean.dark");
 39 |     opts.optopt(
 40 |         "s",
 41 |         "extra-syntaxes",
 42 |         "SYNTAX_FOLDER",
 43 |         "Additional folder to search for .sublime-syntax files in.",
 44 |     );
 45 |     opts.optflag(
 46 |         "e",
 47 |         "no-default-syntaxes",
 48 |         "Doesn't load default syntaxes, intended for use with --extra-syntaxes.",
 49 |     );
 50 |     opts.optflag(
 51 |         "n",
 52 |         "no-newlines",
 53 |         "Uses the no newlines versions of syntaxes and dumps.",
 54 |     );
 55 |     opts.optflag("c", "cache-theme", "Cache the parsed theme file.");
 56 | 
 57 |     let matches = match opts.parse(&args[1..]) {
 58 |         Ok(m) => m,
 59 |         Err(f) => {
 60 |             panic!("{}", f.to_string())
 61 |         }
 62 |     };
 63 | 
 64 |     let no_newlines = matches.opt_present("no-newlines");
 65 |     let mut ss = if matches.opt_present("no-default-syntaxes") {
 66 |         SyntaxSet::new()
 67 |     } else if no_newlines {
 68 |         SyntaxSet::load_defaults_nonewlines()
 69 |     } else {
 70 |         SyntaxSet::load_defaults_newlines()
 71 |     };
 72 | 
 73 |     if let Some(folder) = matches.opt_str("extra-syntaxes") {
 74 |         let mut builder = ss.into_builder();
 75 |         builder.add_from_folder(folder, !no_newlines).unwrap();
 76 |         ss = builder.build();
 77 |     }
 78 | 
 79 |     let ts = ThemeSet::load_defaults();
 80 | 
 81 |     if matches.opt_present("list-file-types") {
 82 |         println!("Supported file types:");
 83 | 
 84 |         for sd in ss.syntaxes() {
 85 |             println!("- {} (.{})", sd.name, sd.file_extensions.join(", ."));
 86 |         }
 87 |     } else if matches.opt_present("list-embedded-themes") {
 88 |         println!("Embedded themes:");
 89 | 
 90 |         for t in ts.themes.keys() {
 91 |             println!("- {}", t);
 92 |         }
 93 |     } else if matches.free.is_empty() {
 94 |         let brief = format!("USAGE: {} [options] FILES", args[0]);
 95 |         println!("{}", opts.usage(&brief));
 96 |     } else {
 97 |         let theme_file: String = matches
 98 |             .opt_str("theme-file")
 99 |             .unwrap_or_else(|| "base16-ocean.dark".to_string());
100 | 
101 |         let theme = ts
102 |             .themes
103 |             .get(&theme_file)
104 |             .map(Cow::Borrowed)
105 |             .unwrap_or_else(|| {
106 |                 Cow::Owned(load_theme(&theme_file, matches.opt_present("cache-theme")))
107 |             });
108 | 
109 |         for src in &matches.free[..] {
110 |             if matches.free.len() > 1 {
111 |                 println!("==> {} <==", src);
112 |             }
113 | 
114 |             let mut highlighter = HighlightFile::new(src, &ss, &theme).unwrap();
115 | 
116 |             // We use read_line instead of `for line in highlighter.reader.lines()` because that
117 |             // doesn't return strings with a `\n`, and including the `\n` gets us more robust highlighting.
118 |             // See the documentation for `SyntaxSetBuilder::add_from_folder`.
119 |             // It also allows re-using the line buffer, which should be a tiny bit faster.
120 |             let mut line = String::new();
121 |             while highlighter.reader.read_line(&mut line).unwrap() > 0 {
122 |                 if no_newlines && line.ends_with('\n') {
123 |                     let _ = line.pop();
124 |                 }
125 | 
126 |                 {
127 |                     let regions: Vec<(Style, &str)> = highlighter
128 |                         .highlight_lines
129 |                         .highlight_line(&line, &ss)
130 |                         .unwrap();
131 |                     print!("{}", as_24_bit_terminal_escaped(&regions[..], true));
132 |                 }
133 |                 line.clear();
134 | 
135 |                 if no_newlines {
136 |                     println!();
137 |                 }
138 |             }
139 | 
140 |             // Clear the formatting
141 |             println!("\x1b[0m");
142 |         }
143 |     }
144 | }
145 | 


--------------------------------------------------------------------------------
/DESIGN.md:
--------------------------------------------------------------------------------
  1 | # Optimization/Design notes
  2 | 
  3 | This is my scratch pad for optimization ideas. Some of this I will implement, some I have implemented, some are just speculative.
  4 | 
  5 | # Scopes
  6 | 
  7 | ## Representation ideas:
  8 | 
  9 | - Normal arrays of strings
 10 | - array of 32-bit or 64-bit atoms (maybe using Servo's atom library)
 11 | - Atoms packed into one or two u64s
 12 |   - fast equality checking
 13 |   - potentially fast prefix checking
 14 |   - needs unsafe code
 15 | 
 16 | ## Potential packings:
 17 | 
 18 | - variable width atoms, either 7 bits and a tag bit for top 128 or 13 bits and 3 tagging bits for rest
 19 |   - can fit all but 33 of the scopes present
 20 | - tagged pointer (taking advantage of alignment), either a pointer to a slow path, or the first 4 bits set then a packed representation, one of others mentioned
 21 | - 6 10-bit atoms referencing unique things by position (see by-position stats below)
 22 | - 5 11-bit atoms and one 8-bit one for the first atom (2^11 = 2048, 2^8 = 256), one remaining bit for tag marker
 23 | 
 24 | ## Stats:
 25 | 
 26 | - 7000 scopes referenced in sublime, 3537 unique ones, all stats after this are based on non-unique data
 27 | - all but 33 scopes in default packages could fit in 64 with combination 8bit or 16bit atom encoding
 28 | - there are only 1219 unique atoms in the default package set
 29 | - the top 128 atoms make up ~90% of all unique atoms referenced in syntax files
 30 | - there are 26 unique first atoms, 145 unique last atoms
 31 | - every position (1st atom, 2nd atom, ...) has under 878 possibilities, only 2nd,3rd and 4th have >256
 32 | - 99.8% of scopes have 6 or fewer atoms, 97% have 5 or fewer, 70% have 4 or fewer
 33 |   - for unique scopes: {2=>81, 4=>1752, 3=>621, 5=>935, 7=>8, 6=>140} ----> 95% of uniques <= 6
 34 |   - for non-unique scopes: {2=>125, 4=>3383, 3=>1505, 5=>1891, 7=>9, 6=>202}
 35 | 
 36 | # Checking prefix
 37 | 
 38 | operation: `fn extent_matched(potential_prefix: Scope, s: Scope) -> u8`
 39 | idea: any differences are beyond the length of the prefix.
 40 | figure this out by xor and then ctz/clz then a compare to the length (however that works).
 41 | 
 42 | ```bash
 43 | XXXXYYYY00000000 # prefix
 44 | XXXXYYYYZZZZ0000 # testee
 45 | 00000000ZZZZ0000 # = xored
 46 | 
 47 | XXXXYYYYQQQQ0000 # non-prefix
 48 | XXXXYYYYZZZZ0000 # testee
 49 | 00000000GGGG0000 # = xored
 50 | 
 51 | XXXXQQQQ00000000 # non-prefix
 52 | XXXXYYYYZZZZ0000 # testee
 53 | 0000BBBBZZZZ0000 # = xored
 54 | ```
 55 | 
 56 | # Parsing
 57 | 
 58 | * Problem: need to reduce number of regex search calls
 59 | * Solution: cache better
 60 | 
 61 | ## Stats
 62 | 
 63 | ```bash
 64 | # On stats branch
 65 | $cargo run --release --example syncat testdata/jquery.js | grep cmiss | wc -l
 66 |      Running `target/release/examples/syncat testdata/jquery.js`
 67 |    61266
 68 | $cargo run --release --example syncat testdata/jquery.js | grep ptoken | wc -l
 69 |    Compiling syntect v0.1.0 (file:///Users/tristan/Box/Dev/Projects/syntect)
 70 |      Running `target/release/examples/syncat testdata/jquery.js`
 71 |    98714
 72 | $wc -l testdata/jquery.js
 73 |     9210 testdata/jquery.js
 74 | $cargo run --release --example syncat testdata/jquery.js | grep cclear | wc -l
 75 |    Compiling syntect v0.1.0 (file:///Users/tristan/Box/Dev/Projects/syntect)
 76 |      Running `target/release/examples/syncat testdata/jquery.js`
 77 |    71302
 78 | $cargo run --release --example syncat testdata/jquery.js | grep freshcachetoken | wc -l
 79 |     Compiling syntect v0.1.0 (file:///Users/tristan/Box/Dev/Projects/syntect)
 80 |       Running `target/release/examples/syncat testdata/jquery.js`
 81 |    80512
 82 | # On stats-2 branch
 83 | $cargo run --example syncat testdata/jquery.js | grep cachehit | wc -l
 84 |      Running `target/debug/examples/syncat testdata/jquery.js`
 85 |   527774
 86 | $cargo run --example syncat testdata/jquery.js | grep regsearch | wc -l
 87 |      Running `target/debug/examples/syncat testdata/jquery.js`
 88 |  2862948
 89 | $cargo run --example syncat testdata/jquery.js | grep regmatch | wc -l
 90 |    Compiling syntect v0.6.0 (file:///Users/tristan/Box/Dev/Projects/syntect)
 91 |      Running `target/debug/examples/syncat testdata/jquery.js`
 92 |   296127
 93 | $cargo run --example syncat testdata/jquery.js | grep leastmatch | wc -l
 94 |    Compiling syntect v0.6.0 (file:///Users/tristan/Box/Dev/Projects/syntect)
 95 |      Running `target/debug/examples/syncat testdata/jquery.js`
 96 |   137842
 97 | # With search caching
 98 | $cargo run --example syncat testdata/jquery.js | grep searchcached | wc -l
 99 |    Compiling syntect v0.6.0 (file:///Users/tristan/Box/Dev/Projects/syntect)
100 |      Running `target/debug/examples/syncat testdata/jquery.js`
101 |  2440527
102 | $cargo run --example syncat testdata/jquery.js | grep regsearch | wc -l
103 |      Running `target/debug/examples/syncat testdata/jquery.js`
104 |   950195
105 | ```
106 | 
107 | Average unique regexes per line is 87.58, average non-unique is regsearch/lines = 317
108 | 
109 | Ideally we should have only a couple fresh cache searches per line, not `~10` like the stats show (freshcachetoken/linecount).
110 | 
111 | In a fantabulous world these stats mean a possible 10x speed improvement, but since caching does have a cost and we can't always cache it likely will be nice but not that high.
112 | 
113 | ## Issues
114 | 
115 | - Stack transitions always bust cache, even when for example JS just pushes another group
116 | - Doesn't cache actual matches, only if it matched or not
117 | 
118 | ## Attacks
119 | 
120 | - cache based on actual context, only search if it is a prototype we haven't searched before
121 |   - hash maps based on casting RC ref to pointer and hashing? (there is a Hash impl for pointers)
122 | - for new searches, store matched regexes for context in BTreeMap like textmate
123 |   - for subsequent tokens in same context, just pop off btreemap and re-search if before curpos
124 | - cache per Regex
125 | 


--------------------------------------------------------------------------------
/src/highlighting/theme.rs:
--------------------------------------------------------------------------------
  1 | // Code based on https://github.com/defuz/sublimate/blob/master/src/core/syntax/theme.rs
  2 | // released under the MIT license by @defuz
  3 | use super::selector::*;
  4 | use super::style::*;
  5 | use serde_derive::{Deserialize, Serialize};
  6 | 
  7 | /// A theme parsed from a `.tmTheme` file.
  8 | ///
  9 | /// This contains additional fields useful for a theme list as well as `settings` for styling your editor.
 10 | #[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
 11 | pub struct Theme {
 12 |     pub name: Option<String>,
 13 |     pub author: Option<String>,
 14 |     /// External settings for the editor using this theme
 15 |     pub settings: ThemeSettings,
 16 |     /// The styling rules for the viewed text
 17 |     pub scopes: Vec<ThemeItem>,
 18 | }
 19 | 
 20 | /// Properties for styling the UI of a text editor
 21 | ///
 22 | /// This essentially consists of the styles that aren't directly applied to the text being viewed.
 23 | /// `ThemeSettings` are intended to be used to make the UI of the editor match the styling of the
 24 | /// text itself.
 25 | #[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
 26 | pub struct ThemeSettings {
 27 |     /// The default color for text.
 28 |     pub foreground: Option<Color>,
 29 |     /// The default backgound color of the view.
 30 |     pub background: Option<Color>,
 31 |     /// Color of the caret.
 32 |     pub caret: Option<Color>,
 33 |     /// Color of the line the caret is in.
 34 |     /// Only used when the `highlight_line` setting is set to `true`.
 35 |     pub line_highlight: Option<Color>,
 36 | 
 37 |     /// The color to use for the squiggly underline drawn under misspelled words.
 38 |     pub misspelling: Option<Color>,
 39 |     /// The color of the border drawn around the viewport area of the minimap.
 40 |     /// Only used when the `draw_minimap_border` setting is enabled.
 41 |     pub minimap_border: Option<Color>,
 42 |     /// A color made available for use by the theme.
 43 |     pub accent: Option<Color>,
 44 |     /// CSS passed to popups.
 45 |     pub popup_css: Option<String>,
 46 |     /// CSS passed to phantoms.
 47 |     pub phantom_css: Option<String>,
 48 | 
 49 |     /// Color of bracketed sections of text when the caret is in a bracketed section.
 50 |     /// Only applied when the `match_brackets` setting is set to `true`.
 51 |     pub bracket_contents_foreground: Option<Color>,
 52 |     /// Controls certain options when the caret is in a bracket section.
 53 |     /// Only applied when the `match_brackets` setting is set to `true`.
 54 |     pub bracket_contents_options: Option<UnderlineOption>,
 55 |     /// Foreground color of the brackets when the caret is next to a bracket.
 56 |     /// Only applied when the `match_brackets` setting is set to `true`.
 57 |     pub brackets_foreground: Option<Color>,
 58 |     /// Background color of the brackets when the caret is next to a bracket.
 59 |     /// Only applied when the `match_brackets` setting is set to `true`.
 60 |     pub brackets_background: Option<Color>,
 61 |     /// Controls certain options when the caret is next to a bracket.
 62 |     /// Only applied when the `match_brackets` setting is set to `true`.
 63 |     pub brackets_options: Option<UnderlineOption>,
 64 | 
 65 |     /// Color of tags when the caret is next to a tag.
 66 |     /// Only used when the `match_tags` setting is set to `true`.
 67 |     pub tags_foreground: Option<Color>,
 68 |     /// Controls certain options when the caret is next to a tag.
 69 |     /// Only applied when the `match_tags` setting is set to `true`.
 70 |     pub tags_options: Option<UnderlineOption>,
 71 | 
 72 |     /// The border color for "other" matches.
 73 |     pub highlight: Option<Color>,
 74 |     /// Background color of regions matching the current search.
 75 |     pub find_highlight: Option<Color>,
 76 |     /// Text color of regions matching the current search.
 77 |     pub find_highlight_foreground: Option<Color>,
 78 | 
 79 |     /// Background color of the gutter.
 80 |     pub gutter: Option<Color>,
 81 |     /// Foreground color of the gutter.
 82 |     pub gutter_foreground: Option<Color>,
 83 | 
 84 |     /// The background color of selected text.
 85 |     pub selection: Option<Color>,
 86 |     /// A color that will override the scope-based text color of the selection.
 87 |     pub selection_foreground: Option<Color>,
 88 | 
 89 |     /// Color of the selection regions border.
 90 |     pub selection_border: Option<Color>,
 91 |     /// The background color of a selection in a view that is not currently focused.
 92 |     pub inactive_selection: Option<Color>,
 93 |     /// A color that will override the scope-based text color of the selection
 94 |     /// in a view that is not currently focused.
 95 |     pub inactive_selection_foreground: Option<Color>,
 96 | 
 97 |     /// Color of the guides displayed to indicate nesting levels.
 98 |     pub guide: Option<Color>,
 99 |     /// Color of the guide lined up with the caret.
100 |     /// Only applied if the `indent_guide_options` setting is set to `draw_active`.
101 |     pub active_guide: Option<Color>,
102 |     /// Color of the current guide’s parent guide level.
103 |     /// Only used if the `indent_guide_options` setting is set to `draw_active`.
104 |     pub stack_guide: Option<Color>,
105 | 
106 |     /// The color of the shadow used when a text area can be horizontally scrolled.
107 |     pub shadow: Option<Color>,
108 | }
109 | 
110 | /// A component of a theme meant to highlight a specific thing (e.g string literals)
111 | /// in a certain way.
112 | #[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
113 | pub struct ThemeItem {
114 |     /// Target scope name.
115 |     pub scope: ScopeSelectors,
116 |     /// The style to use for this component
117 |     pub style: StyleModifier,
118 | }
119 | 
120 | #[derive(Debug, Default, Clone, PartialEq, Serialize, Deserialize)]
121 | pub enum UnderlineOption {
122 |     #[default]
123 |     None,
124 |     Underline,
125 |     StippledUnderline,
126 |     SquigglyUnderline,
127 | }
128 | 


--------------------------------------------------------------------------------
/examples/synstats.rs:
--------------------------------------------------------------------------------
  1 | //! An example of using syntect for code analysis.
  2 | //! Basically a fancy lines of code count program that works
  3 | //! for all languages Sublime Text supports and also counts things
  4 | //! like number of functions and number of types defined.
  5 | //!
  6 | //! Another thing it does that other line count programs can't always
  7 | //! do is properly count comments in embedded syntaxes. For example
  8 | //! JS, CSS and Ruby comments embedded in ERB files.
  9 | use syntect::easy::ScopeRegionIterator;
 10 | use syntect::highlighting::{ScopeSelector, ScopeSelectors};
 11 | use syntect::parsing::{ParseState, ScopeStack, ScopeStackOp, SyntaxSet};
 12 | 
 13 | use std::fs::File;
 14 | use std::io::{BufRead, BufReader};
 15 | use std::path::Path;
 16 | use std::str::FromStr;
 17 | use walkdir::{DirEntry, WalkDir};
 18 | 
 19 | #[derive(Debug)]
 20 | struct Selectors {
 21 |     comment: ScopeSelector,
 22 |     doc_comment: ScopeSelectors,
 23 |     function: ScopeSelector,
 24 |     types: ScopeSelectors,
 25 | }
 26 | 
 27 | impl Default for Selectors {
 28 |     fn default() -> Selectors {
 29 |         Selectors {
 30 |             comment: ScopeSelector::from_str("comment - comment.block.attribute").unwrap(),
 31 |             doc_comment: ScopeSelectors::from_str(
 32 |                 "comment.line.documentation, comment.block.documentation",
 33 |             )
 34 |             .unwrap(),
 35 |             function: ScopeSelector::from_str("entity.name.function").unwrap(),
 36 |             types: ScopeSelectors::from_str(
 37 |                 "entity.name.class, entity.name.struct, entity.name.enum, entity.name.type",
 38 |             )
 39 |             .unwrap(),
 40 |         }
 41 |     }
 42 | }
 43 | 
 44 | #[derive(Debug, Default)]
 45 | struct Stats {
 46 |     selectors: Selectors,
 47 |     files: usize,
 48 |     functions: usize,
 49 |     types: usize,
 50 |     lines: usize,
 51 |     chars: usize,
 52 |     code_lines: usize,
 53 |     comment_lines: usize,
 54 |     comment_chars: usize,
 55 |     comment_words: usize,
 56 |     doc_comment_lines: usize,
 57 |     doc_comment_words: usize,
 58 | }
 59 | 
 60 | fn print_stats(stats: &Stats) {
 61 |     println!();
 62 |     println!("################## Stats ###################");
 63 |     println!("File count:                           {:>6}", stats.files);
 64 |     println!("Total characters:                     {:>6}", stats.chars);
 65 |     println!();
 66 |     println!(
 67 |         "Function count:                       {:>6}",
 68 |         stats.functions
 69 |     );
 70 |     println!("Type count (structs, enums, classes): {:>6}", stats.types);
 71 |     println!();
 72 |     println!(
 73 |         "Code lines (traditional SLOC):        {:>6}",
 74 |         stats.code_lines
 75 |     );
 76 |     println!("Total lines (w/ comments & blanks):   {:>6}", stats.lines);
 77 |     println!(
 78 |         "Comment lines (comment but no code):  {:>6}",
 79 |         stats.comment_lines
 80 |     );
 81 |     println!(
 82 |         "Blank lines (lines-blank-comment):    {:>6}",
 83 |         stats.lines - stats.code_lines - stats.comment_lines
 84 |     );
 85 |     println!();
 86 |     println!(
 87 |         "Lines with a documentation comment:   {:>6}",
 88 |         stats.doc_comment_lines
 89 |     );
 90 |     println!(
 91 |         "Total words written in doc comments:  {:>6}",
 92 |         stats.doc_comment_words
 93 |     );
 94 |     println!(
 95 |         "Total words written in all comments:  {:>6}",
 96 |         stats.comment_words
 97 |     );
 98 |     println!(
 99 |         "Characters of comment:                {:>6}",
100 |         stats.comment_chars
101 |     );
102 | }
103 | 
104 | fn is_ignored(entry: &DirEntry) -> bool {
105 |     entry
106 |         .file_name()
107 |         .to_str()
108 |         .map(|s| s.starts_with('.') && s.len() > 1 || s.ends_with(".md"))
109 |         .unwrap_or(false)
110 | }
111 | 
112 | fn count_line(
113 |     ops: &[(usize, ScopeStackOp)],
114 |     line: &str,
115 |     stack: &mut ScopeStack,
116 |     stats: &mut Stats,
117 | ) {
118 |     stats.lines += 1;
119 | 
120 |     let mut line_has_comment = false;
121 |     let mut line_has_doc_comment = false;
122 |     let mut line_has_code = false;
123 |     for (s, op) in ScopeRegionIterator::new(ops, line) {
124 |         stack.apply(op).unwrap();
125 |         if s.is_empty() {
126 |             // in this case we don't care about blank tokens
127 |             continue;
128 |         }
129 |         if stats
130 |             .selectors
131 |             .comment
132 |             .does_match(stack.as_slice())
133 |             .is_some()
134 |         {
135 |             let words = s
136 |                 .split_whitespace()
137 |                 .filter(|w| {
138 |                     w.chars()
139 |                         .all(|c| c.is_alphanumeric() || c == '.' || c == '\'')
140 |                 })
141 |                 .count();
142 |             if stats
143 |                 .selectors
144 |                 .doc_comment
145 |                 .does_match(stack.as_slice())
146 |                 .is_some()
147 |             {
148 |                 line_has_doc_comment = true;
149 |                 stats.doc_comment_words += words;
150 |             }
151 |             stats.comment_chars += s.len();
152 |             stats.comment_words += words;
153 |             line_has_comment = true;
154 |         } else if !s.chars().all(|c| c.is_whitespace()) {
155 |             line_has_code = true;
156 |         }
157 |         if stats
158 |             .selectors
159 |             .function
160 |             .does_match(stack.as_slice())
161 |             .is_some()
162 |         {
163 |             stats.functions += 1;
164 |         }
165 |         if stats.selectors.types.does_match(stack.as_slice()).is_some() {
166 |             stats.types += 1;
167 |         }
168 |     }
169 |     if line_has_comment && !line_has_code {
170 |         stats.comment_lines += 1;
171 |     }
172 |     if line_has_doc_comment {
173 |         stats.doc_comment_lines += 1;
174 |     }
175 |     if line_has_code {
176 |         stats.code_lines += 1;
177 |     }
178 | }
179 | 
180 | fn count(ss: &SyntaxSet, path: &Path, stats: &mut Stats) {
181 |     let syntax = match ss.find_syntax_for_file(path).unwrap_or(None) {
182 |         Some(syntax) => syntax,
183 |         None => return,
184 |     };
185 |     stats.files += 1;
186 |     let mut state = ParseState::new(syntax);
187 | 
188 |     let f = File::open(path).unwrap();
189 |     let mut reader = BufReader::new(f);
190 |     let mut line = String::new();
191 |     let mut stack = ScopeStack::new();
192 |     while reader.read_line(&mut line).unwrap() > 0 {
193 |         {
194 |             let ops = state.parse_line(&line, ss).unwrap();
195 |             stats.chars += line.len();
196 |             count_line(&ops, &line, &mut stack, stats);
197 |         }
198 |         line.clear();
199 |     }
200 | }
201 | 
202 | fn main() {
203 |     let ss = SyntaxSet::load_defaults_newlines(); // note we load the version with newlines
204 | 
205 |     let args: Vec<String> = std::env::args().collect();
206 |     let path = if args.len() < 2 { "." } else { &args[1] };
207 | 
208 |     println!("################## Files ###################");
209 |     let mut stats = Stats::default();
210 |     let walker = WalkDir::new(path).into_iter();
211 |     for entry in walker.filter_entry(|e| !is_ignored(e)) {
212 |         let entry = entry.unwrap();
213 |         if entry.file_type().is_file() {
214 |             println!("{}", entry.path().display());
215 |             count(&ss, entry.path(), &mut stats);
216 |         }
217 |     }
218 | 
219 |     // println!("{:?}", stats);
220 |     print_stats(&stats);
221 | }
222 | 


--------------------------------------------------------------------------------
/src/parsing/regex.rs:
--------------------------------------------------------------------------------
  1 | use once_cell::sync::OnceCell;
  2 | use serde::de::{Deserialize, Deserializer};
  3 | use serde::ser::{Serialize, Serializer};
  4 | use std::error::Error;
  5 | 
  6 | /// An abstraction for regex patterns.
  7 | ///
  8 | /// * Allows swapping out the regex implementation because it's only in this module.
  9 | /// * Makes regexes serializable and deserializable using just the pattern string.
 10 | /// * Lazily compiles regexes on first use to improve initialization time.
 11 | #[derive(Debug)]
 12 | pub struct Regex {
 13 |     regex_str: String,
 14 |     regex: OnceCell<regex_impl::Regex>,
 15 | }
 16 | 
 17 | /// A region contains text positions for capture groups in a match result.
 18 | #[derive(Clone, Debug, Eq, PartialEq)]
 19 | pub struct Region {
 20 |     region: regex_impl::Region,
 21 | }
 22 | 
 23 | impl Regex {
 24 |     /// Create a new regex from the pattern string.
 25 |     ///
 26 |     /// Note that the regex compilation happens on first use, which is why this method does not
 27 |     /// return a result.
 28 |     pub fn new(regex_str: String) -> Self {
 29 |         Self {
 30 |             regex_str,
 31 |             regex: OnceCell::new(),
 32 |         }
 33 |     }
 34 | 
 35 |     /// Check whether the pattern compiles as a valid regex or not.
 36 |     pub fn try_compile(regex_str: &str) -> Option<Box<dyn Error + Send + Sync + 'static>> {
 37 |         regex_impl::Regex::new(regex_str).err()
 38 |     }
 39 | 
 40 |     /// Return the regex pattern.
 41 |     pub fn regex_str(&self) -> &str {
 42 |         &self.regex_str
 43 |     }
 44 | 
 45 |     /// Check if the regex matches the given text.
 46 |     pub fn is_match(&self, text: &str) -> bool {
 47 |         self.regex().is_match(text)
 48 |     }
 49 | 
 50 |     /// Search for the pattern in the given text from begin/end positions.
 51 |     ///
 52 |     /// If a region is passed, it is used for storing match group positions. The argument allows
 53 |     /// the [`Region`] to be reused between searches, which makes a significant performance
 54 |     /// difference.
 55 |     ///
 56 |     /// [`Region`]: struct.Region.html
 57 |     pub fn search(
 58 |         &self,
 59 |         text: &str,
 60 |         begin: usize,
 61 |         end: usize,
 62 |         region: Option<&mut Region>,
 63 |     ) -> bool {
 64 |         self.regex()
 65 |             .search(text, begin, end, region.map(|r| &mut r.region))
 66 |     }
 67 | 
 68 |     fn regex(&self) -> &regex_impl::Regex {
 69 |         self.regex.get_or_init(|| {
 70 |             regex_impl::Regex::new(&self.regex_str).expect("regex string should be pre-tested")
 71 |         })
 72 |     }
 73 | }
 74 | 
 75 | impl Clone for Regex {
 76 |     fn clone(&self) -> Self {
 77 |         Regex {
 78 |             regex_str: self.regex_str.clone(),
 79 |             regex: OnceCell::new(),
 80 |         }
 81 |     }
 82 | }
 83 | 
 84 | impl PartialEq for Regex {
 85 |     fn eq(&self, other: &Regex) -> bool {
 86 |         self.regex_str == other.regex_str
 87 |     }
 88 | }
 89 | 
 90 | impl Eq for Regex {}
 91 | 
 92 | impl Serialize for Regex {
 93 |     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
 94 |     where
 95 |         S: Serializer,
 96 |     {
 97 |         serializer.serialize_str(&self.regex_str)
 98 |     }
 99 | }
100 | 
101 | impl<'de> Deserialize<'de> for Regex {
102 |     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
103 |     where
104 |         D: Deserializer<'de>,
105 |     {
106 |         let regex_str = String::deserialize(deserializer)?;
107 |         Ok(Regex::new(regex_str))
108 |     }
109 | }
110 | 
111 | impl Region {
112 |     pub fn new() -> Self {
113 |         Self {
114 |             region: regex_impl::new_region(),
115 |         }
116 |     }
117 | 
118 |     /// Get the start/end positions of the capture group with given index.
119 |     ///
120 |     /// If there is no match for that group or the index does not correspond to a group, `None` is
121 |     /// returned. The index 0 returns the whole match.
122 |     pub fn pos(&self, index: usize) -> Option<(usize, usize)> {
123 |         self.region.pos(index)
124 |     }
125 | }
126 | 
127 | impl Default for Region {
128 |     fn default() -> Self {
129 |         Self::new()
130 |     }
131 | }
132 | 
133 | #[cfg(feature = "regex-onig")]
134 | mod regex_impl {
135 |     pub use onig::Region;
136 |     use onig::{MatchParam, RegexOptions, SearchOptions, Syntax};
137 |     use std::error::Error;
138 | 
139 |     #[derive(Debug)]
140 |     pub struct Regex {
141 |         regex: onig::Regex,
142 |     }
143 | 
144 |     pub fn new_region() -> Region {
145 |         Region::with_capacity(8)
146 |     }
147 | 
148 |     impl Regex {
149 |         pub fn new(regex_str: &str) -> Result<Regex, Box<dyn Error + Send + Sync + 'static>> {
150 |             let result = onig::Regex::with_options(
151 |                 regex_str,
152 |                 RegexOptions::REGEX_OPTION_CAPTURE_GROUP,
153 |                 Syntax::default(),
154 |             );
155 |             match result {
156 |                 Ok(regex) => Ok(Regex { regex }),
157 |                 Err(error) => Err(Box::new(error)),
158 |             }
159 |         }
160 | 
161 |         pub fn is_match(&self, text: &str) -> bool {
162 |             self.regex
163 |                 .match_with_options(text, 0, SearchOptions::SEARCH_OPTION_NONE, None)
164 |                 .is_some()
165 |         }
166 | 
167 |         pub fn search(
168 |             &self,
169 |             text: &str,
170 |             begin: usize,
171 |             end: usize,
172 |             region: Option<&mut Region>,
173 |         ) -> bool {
174 |             let matched = self.regex.search_with_param(
175 |                 text,
176 |                 begin,
177 |                 end,
178 |                 SearchOptions::SEARCH_OPTION_NONE,
179 |                 region,
180 |                 MatchParam::default(),
181 |             );
182 | 
183 |             // If there's an error during search, treat it as non-matching.
184 |             // For example, in case of catastrophic backtracking, onig should
185 |             // fail with a "retry-limit-in-match over" error eventually.
186 |             matches!(matched, Ok(Some(_)))
187 |         }
188 |     }
189 | }
190 | 
191 | // If both regex-fancy and regex-onig are requested, this condition makes regex-onig win.
192 | #[cfg(all(feature = "regex-fancy", not(feature = "regex-onig")))]
193 | mod regex_impl {
194 |     use std::error::Error;
195 | 
196 |     #[derive(Debug)]
197 |     pub struct Regex {
198 |         regex: fancy_regex::Regex,
199 |     }
200 | 
201 |     #[derive(Clone, Debug, Eq, PartialEq)]
202 |     pub struct Region {
203 |         positions: Vec<Option<(usize, usize)>>,
204 |     }
205 | 
206 |     pub fn new_region() -> Region {
207 |         Region {
208 |             positions: Vec::with_capacity(8),
209 |         }
210 |     }
211 | 
212 |     impl Regex {
213 |         pub fn new(regex_str: &str) -> Result<Regex, Box<dyn Error + Send + Sync + 'static>> {
214 |             let result = fancy_regex::RegexBuilder::new(regex_str)
215 |                 .oniguruma_mode(true)
216 |                 .build();
217 |             match result {
218 |                 Ok(regex) => Ok(Regex { regex }),
219 |                 Err(error) => Err(Box::new(error)),
220 |             }
221 |         }
222 | 
223 |         pub fn is_match(&self, text: &str) -> bool {
224 |             // Errors are treated as non-matches
225 |             self.regex.is_match(text).unwrap_or(false)
226 |         }
227 | 
228 |         pub fn search(
229 |             &self,
230 |             text: &str,
231 |             begin: usize,
232 |             end: usize,
233 |             region: Option<&mut Region>,
234 |         ) -> bool {
235 |             // If there's an error during search, treat it as non-matching.
236 |             // For example, in case of catastrophic backtracking, fancy-regex should
237 |             // fail with an error eventually.
238 |             if let Ok(Some(captures)) = self.regex.captures_from_pos(&text[..end], begin) {
239 |                 if let Some(region) = region {
240 |                     region.init_from_captures(&captures);
241 |                 }
242 |                 true
243 |             } else {
244 |                 false
245 |             }
246 |         }
247 |     }
248 | 
249 |     impl Region {
250 |         fn init_from_captures(&mut self, captures: &fancy_regex::Captures) {
251 |             self.positions.clear();
252 |             for i in 0..captures.len() {
253 |                 let pos = captures.get(i).map(|m| (m.start(), m.end()));
254 |                 self.positions.push(pos);
255 |             }
256 |         }
257 | 
258 |         pub fn pos(&self, i: usize) -> Option<(usize, usize)> {
259 |             if i < self.positions.len() {
260 |                 self.positions[i]
261 |             } else {
262 |                 None
263 |             }
264 |         }
265 |     }
266 | }
267 | 
268 | #[cfg(test)]
269 | mod tests {
270 |     use super::*;
271 | 
272 |     #[test]
273 |     fn caches_compiled_regex() {
274 |         let regex = Regex::new(String::from(r"\w+"));
275 | 
276 |         assert!(regex.regex.get().is_none());
277 |         assert!(regex.is_match("test"));
278 |         assert!(regex.regex.get().is_some());
279 |     }
280 | 
281 |     #[test]
282 |     fn serde_as_string() {
283 |         let pattern: Regex = serde_json::from_str("\"just a string\"").unwrap();
284 |         assert_eq!(pattern.regex_str(), "just a string");
285 |         let back_to_str = serde_json::to_string(&pattern).unwrap();
286 |         assert_eq!(back_to_str, "\"just a string\"");
287 |     }
288 | }
289 | 


--------------------------------------------------------------------------------
/src/dumps.rs:
--------------------------------------------------------------------------------
  1 | //! Methods for dumping serializable structs to a compressed binary format,
  2 | //! used to allow fast startup times
  3 | //!
  4 | //! Currently syntect serializes [`SyntaxSet`] structs with [`dump_to_uncompressed_file`]
  5 | //! into `.packdump` files and likewise [`ThemeSet`] structs to `.themedump` files with [`dump_to_file`].
  6 | //!
  7 | //! You can use these methods to manage your own caching of compiled syntaxes and
  8 | //! themes. And even your own `serde::Serialize` structures if you want to
  9 | //! be consistent with your format.
 10 | //!
 11 | //! [`SyntaxSet`]: ../parsing/struct.SyntaxSet.html
 12 | //! [`dump_to_uncompressed_file`]: fn.dump_to_uncompressed_file.html
 13 | //! [`ThemeSet`]: ../highlighting/struct.ThemeSet.html
 14 | //! [`dump_to_file`]: fn.dump_to_file.html
 15 | #[cfg(feature = "default-themes")]
 16 | use crate::highlighting::ThemeSet;
 17 | #[cfg(feature = "default-syntaxes")]
 18 | use crate::parsing::SyntaxSet;
 19 | #[cfg(feature = "dump-load")]
 20 | use bincode::deserialize_from;
 21 | #[cfg(feature = "dump-create")]
 22 | use bincode::serialize_into;
 23 | use bincode::Result;
 24 | #[cfg(feature = "dump-load")]
 25 | use flate2::bufread::ZlibDecoder;
 26 | #[cfg(feature = "dump-create")]
 27 | use flate2::write::ZlibEncoder;
 28 | #[cfg(feature = "dump-create")]
 29 | use flate2::Compression;
 30 | #[cfg(feature = "dump-load")]
 31 | use serde::de::DeserializeOwned;
 32 | #[cfg(feature = "dump-create")]
 33 | use serde::ser::Serialize;
 34 | use std::fs::File;
 35 | #[cfg(feature = "dump-load")]
 36 | use std::io::BufRead;
 37 | #[cfg(feature = "dump-create")]
 38 | use std::io::{BufWriter, Write};
 39 | use std::path::Path;
 40 | 
 41 | /// Dumps an object to the given writer in a compressed binary format
 42 | ///
 43 | /// The writer is encoded with the `bincode` crate and compressed with `flate2`.
 44 | #[cfg(feature = "dump-create")]
 45 | pub fn dump_to_writer<T: Serialize, W: Write>(to_dump: &T, output: W) -> Result<()> {
 46 |     serialize_to_writer_impl(to_dump, output, true)
 47 | }
 48 | 
 49 | /// Dumps an object to a binary array in the same format as [`dump_to_writer`]
 50 | ///
 51 | /// [`dump_to_writer`]: fn.dump_to_writer.html
 52 | #[cfg(feature = "dump-create")]
 53 | pub fn dump_binary<T: Serialize>(o: &T) -> Vec<u8> {
 54 |     let mut v = Vec::new();
 55 |     dump_to_writer(o, &mut v).unwrap();
 56 |     v
 57 | }
 58 | 
 59 | /// Dumps an encodable object to a file at a given path, in the same format as [`dump_to_writer`]
 60 | ///
 61 | /// If a file already exists at that path it will be overwritten. The files created are encoded with
 62 | /// the `bincode` crate and then compressed with the `flate2` crate.
 63 | ///
 64 | /// [`dump_to_writer`]: fn.dump_to_writer.html
 65 | #[cfg(feature = "dump-create")]
 66 | pub fn dump_to_file<T: Serialize, P: AsRef<Path>>(o: &T, path: P) -> Result<()> {
 67 |     let out = BufWriter::new(File::create(path)?);
 68 |     dump_to_writer(o, out)
 69 | }
 70 | 
 71 | /// A helper function for decoding and decompressing data from a reader
 72 | #[cfg(feature = "dump-load")]
 73 | pub fn from_reader<T: DeserializeOwned, R: BufRead>(input: R) -> Result<T> {
 74 |     deserialize_from_reader_impl(input, true)
 75 | }
 76 | 
 77 | /// Returns a fully loaded object from a binary dump.
 78 | ///
 79 | /// This function panics if the dump is invalid.
 80 | #[cfg(feature = "dump-load")]
 81 | pub fn from_binary<T: DeserializeOwned>(v: &[u8]) -> T {
 82 |     from_reader(v).unwrap()
 83 | }
 84 | 
 85 | /// Returns a fully loaded object from a binary dump file.
 86 | #[cfg(feature = "dump-load")]
 87 | pub fn from_dump_file<T: DeserializeOwned, P: AsRef<Path>>(path: P) -> Result<T> {
 88 |     let contents = std::fs::read(path)?;
 89 |     from_reader(&contents[..])
 90 | }
 91 | 
 92 | /// To be used when serializing a [`SyntaxSet`] to a file. A [`SyntaxSet`]
 93 | /// itself shall not be compressed, because the data for its lazy-loaded
 94 | /// syntaxes are already compressed. Compressing another time just results in
 95 | /// bad performance.
 96 | #[cfg(feature = "dump-create")]
 97 | pub fn dump_to_uncompressed_file<T: Serialize, P: AsRef<Path>>(o: &T, path: P) -> Result<()> {
 98 |     let out = BufWriter::new(File::create(path)?);
 99 |     serialize_to_writer_impl(o, out, false)
100 | }
101 | 
102 | /// To be used when deserializing a [`SyntaxSet`] that was previously written to
103 | /// file using [dump_to_uncompressed_file].
104 | #[cfg(feature = "dump-load")]
105 | pub fn from_uncompressed_dump_file<T: DeserializeOwned, P: AsRef<Path>>(path: P) -> Result<T> {
106 |     let contents = std::fs::read(path)?;
107 |     deserialize_from_reader_impl(&contents[..], false)
108 | }
109 | 
110 | /// To be used when deserializing a [`SyntaxSet`] from raw data, for example
111 | /// data that has been embedded in your own binary with the [`include_bytes!`]
112 | /// macro.
113 | #[cfg(feature = "dump-load")]
114 | pub fn from_uncompressed_data<T: DeserializeOwned>(v: &[u8]) -> Result<T> {
115 |     deserialize_from_reader_impl(v, false)
116 | }
117 | 
118 | /// Private low level helper function used to implement the public API.
119 | #[cfg(feature = "dump-create")]
120 | fn serialize_to_writer_impl<T: Serialize, W: Write>(
121 |     to_dump: &T,
122 |     output: W,
123 |     use_compression: bool,
124 | ) -> Result<()> {
125 |     if use_compression {
126 |         let mut encoder = std::io::BufWriter::new(ZlibEncoder::new(output, Compression::best()));
127 |         serialize_into(&mut encoder, to_dump)
128 |     } else {
129 |         serialize_into(output, to_dump)
130 |     }
131 | }
132 | 
133 | /// Private low level helper function used to implement the public API.
134 | #[cfg(feature = "dump-load")]
135 | fn deserialize_from_reader_impl<T: DeserializeOwned, R: BufRead>(
136 |     input: R,
137 |     use_compression: bool,
138 | ) -> Result<T> {
139 |     if use_compression {
140 |         let mut decoder = ZlibDecoder::new(input);
141 |         deserialize_from(&mut decoder)
142 |     } else {
143 |         deserialize_from(input)
144 |     }
145 | }
146 | 
147 | #[cfg(feature = "default-syntaxes")]
148 | impl SyntaxSet {
149 |     /// Instantiates a new syntax set from a binary dump of Sublime Text's default open source
150 |     /// syntax definitions.
151 |     ///
152 |     /// These dumps are included in this library's binary for convenience.
153 |     ///
154 |     /// This method loads the version for parsing line strings with no `\n` characters at the end.
155 |     /// If you're able to efficiently include newlines at the end of strings, use
156 |     /// [`load_defaults_newlines`] since it works better. See [`SyntaxSetBuilder::add_from_folder`]
157 |     /// for more info on this issue.
158 |     ///
159 |     /// This is the recommended way of creating a syntax set for non-advanced use cases. It is also
160 |     /// significantly faster than loading the YAML files.
161 |     ///
162 |     /// Note that you can load additional syntaxes after doing this. If you want you can even use
163 |     /// the fact that SyntaxDefinitions are serializable with the bincode crate to cache dumps of
164 |     /// additional syntaxes yourself.
165 |     ///
166 |     /// [`load_defaults_newlines`]: #method.load_defaults_nonewlines
167 |     /// [`SyntaxSetBuilder::add_from_folder`]: struct.SyntaxSetBuilder.html#method.add_from_folder
168 |     pub fn load_defaults_nonewlines() -> SyntaxSet {
169 |         #[cfg(feature = "metadata")]
170 |         {
171 |             let mut ps: SyntaxSet =
172 |                 from_uncompressed_data(include_bytes!("../assets/default_nonewlines.packdump"))
173 |                     .unwrap();
174 |             let metadata = from_binary(include_bytes!("../assets/default_metadata.packdump"));
175 |             ps.metadata = metadata;
176 |             ps
177 |         }
178 |         #[cfg(not(feature = "metadata"))]
179 |         {
180 |             from_uncompressed_data(include_bytes!("../assets/default_nonewlines.packdump")).unwrap()
181 |         }
182 |     }
183 | 
184 |     /// Same as [`load_defaults_nonewlines`] but for parsing line strings with newlines at the end.
185 |     ///
186 |     /// These are separate methods because thanks to linker garbage collection, only the serialized
187 |     /// dumps for the method(s) you call will be included in the binary (each is ~200kb for now).
188 |     ///
189 |     /// [`load_defaults_nonewlines`]: #method.load_defaults_nonewlines
190 |     pub fn load_defaults_newlines() -> SyntaxSet {
191 |         #[cfg(feature = "metadata")]
192 |         {
193 |             let mut ps: SyntaxSet =
194 |                 from_uncompressed_data(include_bytes!("../assets/default_newlines.packdump"))
195 |                     .unwrap();
196 |             let metadata = from_binary(include_bytes!("../assets/default_metadata.packdump"));
197 |             ps.metadata = metadata;
198 |             ps
199 |         }
200 |         #[cfg(not(feature = "metadata"))]
201 |         {
202 |             from_uncompressed_data(include_bytes!("../assets/default_newlines.packdump")).unwrap()
203 |         }
204 |     }
205 | }
206 | 
207 | #[cfg(feature = "default-themes")]
208 | impl ThemeSet {
209 |     /// Loads the set of default themes
210 |     /// Currently includes (these are the keys for the map):
211 |     ///
212 |     /// - `base16-ocean.dark`,`base16-eighties.dark`,`base16-mocha.dark`,`base16-ocean.light`
213 |     /// - `InspiredGitHub` from [here](https://github.com/sethlopezme/InspiredGitHub.tmtheme)
214 |     /// - `Solarized (dark)` and `Solarized (light)`
215 |     pub fn load_defaults() -> ThemeSet {
216 |         from_binary(include_bytes!("../assets/default.themedump"))
217 |     }
218 | }
219 | 
220 | #[cfg(test)]
221 | mod tests {
222 |     #[cfg(all(
223 |         feature = "yaml-load",
224 |         feature = "dump-create",
225 |         feature = "dump-load",
226 |         feature = "parsing"
227 |     ))]
228 |     #[test]
229 |     fn can_dump_and_load() {
230 |         use super::*;
231 |         use crate::utils::testdata;
232 | 
233 |         let ss = &*testdata::PACKAGES_SYN_SET;
234 | 
235 |         let bin = dump_binary(&ss);
236 |         println!("{:?}", bin.len());
237 |         let ss2: SyntaxSet = from_binary(&bin[..]);
238 |         assert_eq!(ss.syntaxes().len(), ss2.syntaxes().len());
239 |     }
240 | 
241 |     #[cfg(all(feature = "yaml-load", feature = "dump-create", feature = "dump-load"))]
242 |     #[test]
243 |     fn dump_is_deterministic() {
244 |         use super::*;
245 |         use crate::parsing::SyntaxSetBuilder;
246 |         use crate::utils::testdata;
247 | 
248 |         let ss1 = &*testdata::PACKAGES_SYN_SET;
249 |         let bin1 = dump_binary(&ss1);
250 | 
251 |         let mut builder2 = SyntaxSetBuilder::new();
252 |         builder2
253 |             .add_from_folder("testdata/Packages", false)
254 |             .unwrap();
255 |         let ss2 = builder2.build();
256 |         let bin2 = dump_binary(&ss2);
257 |         // This is redundant, but assert_eq! can be really slow on a large
258 |         // vector, so check the length first to fail faster.
259 |         assert_eq!(bin1.len(), bin2.len());
260 |         assert_eq!(bin1, bin2);
261 |     }
262 | 
263 |     #[cfg(feature = "default-themes")]
264 |     #[test]
265 |     fn has_default_themes() {
266 |         use crate::highlighting::ThemeSet;
267 |         let themes = ThemeSet::load_defaults();
268 |         assert!(themes.themes.len() > 4);
269 |     }
270 | }
271 | 


--------------------------------------------------------------------------------
/src/parsing/syntax_definition.rs:
--------------------------------------------------------------------------------
  1 | //! Data structures for representing syntax definitions
  2 | //!
  3 | //! Everything here is public becaues I want this library to be useful in super integrated cases
  4 | //! like text editors and I have no idea what kind of monkeying you might want to do with the data.
  5 | //! Perhaps parsing your own syntax format into this data structure?
  6 | 
  7 | use super::regex::{Regex, Region};
  8 | use super::{scope::*, ParsingError};
  9 | use crate::parsing::syntax_set::SyntaxSet;
 10 | use regex_syntax::escape;
 11 | use serde::ser::{Serialize, Serializer};
 12 | use serde_derive::{Deserialize, Serialize};
 13 | use std::collections::{BTreeMap, HashMap};
 14 | use std::hash::Hash;
 15 | 
 16 | pub type CaptureMapping = Vec<(usize, Vec<Scope>)>;
 17 | 
 18 | /// An opaque ID for a [`Context`].
 19 | #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)]
 20 | pub struct ContextId {
 21 |     /// Index into [`SyntaxSet::syntaxes`]
 22 |     pub(crate) syntax_index: usize,
 23 | 
 24 |     /// Index into [`crate::parsing::LazyContexts::contexts`] for the [`Self::syntax_index`] syntax
 25 |     pub(crate) context_index: usize,
 26 | }
 27 | 
 28 | /// The main data structure representing a syntax definition loaded from a
 29 | /// `.sublime-syntax` file
 30 | ///
 31 | /// You'll probably only need these as references to be passed around to parsing code.
 32 | ///
 33 | /// Some useful public fields are the `name` field which is a human readable name to display in
 34 | /// syntax lists, and the `hidden` field which means hide this syntax from any lists because it is
 35 | /// for internal use.
 36 | #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
 37 | pub struct SyntaxDefinition {
 38 |     pub name: String,
 39 |     pub file_extensions: Vec<String>,
 40 |     pub scope: Scope,
 41 |     pub first_line_match: Option<String>,
 42 |     pub hidden: bool,
 43 |     #[serde(serialize_with = "ordered_map")]
 44 |     pub variables: HashMap<String, String>,
 45 |     #[serde(serialize_with = "ordered_map")]
 46 |     pub contexts: HashMap<String, Context>,
 47 | }
 48 | 
 49 | #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
 50 | pub struct Context {
 51 |     pub meta_scope: Vec<Scope>,
 52 |     pub meta_content_scope: Vec<Scope>,
 53 |     /// This being set false in the syntax file implies this field being set false,
 54 |     /// but it can also be set falso for contexts that don't include the prototype for other reasons
 55 |     pub meta_include_prototype: bool,
 56 |     pub clear_scopes: Option<ClearAmount>,
 57 |     /// This is filled in by the linker at link time
 58 |     /// for contexts that have `meta_include_prototype==true`
 59 |     /// and are not included from the prototype.
 60 |     pub prototype: Option<ContextId>,
 61 |     pub uses_backrefs: bool,
 62 | 
 63 |     pub patterns: Vec<Pattern>,
 64 | }
 65 | 
 66 | impl Context {
 67 |     pub fn new(meta_include_prototype: bool) -> Context {
 68 |         Context {
 69 |             meta_scope: Vec::new(),
 70 |             meta_content_scope: Vec::new(),
 71 |             meta_include_prototype,
 72 |             clear_scopes: None,
 73 |             uses_backrefs: false,
 74 |             patterns: Vec::new(),
 75 |             prototype: None,
 76 |         }
 77 |     }
 78 | }
 79 | 
 80 | #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
 81 | pub enum Pattern {
 82 |     Match(MatchPattern),
 83 |     Include(ContextReference),
 84 | }
 85 | 
 86 | /// Used to iterate over all the match patterns in a context
 87 | ///
 88 | /// Basically walks the tree of patterns and include directives in the correct order.
 89 | #[derive(Debug)]
 90 | pub struct MatchIter<'a> {
 91 |     syntax_set: &'a SyntaxSet,
 92 |     ctx_stack: Vec<&'a Context>,
 93 |     index_stack: Vec<usize>,
 94 | }
 95 | 
 96 | #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
 97 | pub struct MatchPattern {
 98 |     pub has_captures: bool,
 99 |     pub regex: Regex,
100 |     pub scope: Vec<Scope>,
101 |     pub captures: Option<CaptureMapping>,
102 |     pub operation: MatchOperation,
103 |     pub with_prototype: Option<ContextReference>,
104 | }
105 | 
106 | #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
107 | #[non_exhaustive]
108 | pub enum ContextReference {
109 |     #[non_exhaustive]
110 |     Named(String),
111 |     #[non_exhaustive]
112 |     ByScope {
113 |         scope: Scope,
114 |         sub_context: Option<String>,
115 |         /// `true` if this reference by scope is part of an `embed` for which
116 |         /// there is an `escape`. In other words a reference for a context for
117 |         /// which there "always is a way out". Enables falling back to `Plain
118 |         /// Text` syntax in case the referenced scope is missing.
119 |         with_escape: bool,
120 |     },
121 |     #[non_exhaustive]
122 |     File {
123 |         name: String,
124 |         sub_context: Option<String>,
125 |         /// Same semantics as for [`Self::ByScope::with_escape`].
126 |         with_escape: bool,
127 |     },
128 |     #[non_exhaustive]
129 |     Inline(String),
130 |     #[non_exhaustive]
131 |     Direct(ContextId),
132 | }
133 | 
134 | #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
135 | pub enum MatchOperation {
136 |     Push(Vec<ContextReference>),
137 |     Set(Vec<ContextReference>),
138 |     Pop(usize),
139 |     None,
140 | }
141 | 
142 | impl<'a> Iterator for MatchIter<'a> {
143 |     type Item = (&'a Context, usize);
144 | 
145 |     fn next(&mut self) -> Option<(&'a Context, usize)> {
146 |         loop {
147 |             if self.ctx_stack.is_empty() {
148 |                 return None;
149 |             }
150 |             // uncomment for debugging infinite recursion
151 |             // println!("{:?}", self.index_stack);
152 |             // use std::thread::sleep_ms;
153 |             // sleep_ms(500);
154 |             let last_index = self.ctx_stack.len() - 1;
155 |             let context = self.ctx_stack[last_index];
156 |             let index = self.index_stack[last_index];
157 |             self.index_stack[last_index] = index + 1;
158 |             if index < context.patterns.len() {
159 |                 match context.patterns[index] {
160 |                     Pattern::Match(_) => {
161 |                         return Some((context, index));
162 |                     }
163 |                     Pattern::Include(ref ctx_ref) => {
164 |                         let ctx_ptr = match *ctx_ref {
165 |                             ContextReference::Direct(ref context_id) => {
166 |                                 self.syntax_set.get_context(context_id).unwrap()
167 |                             }
168 |                             _ => return self.next(), // skip this and move onto the next one
169 |                         };
170 |                         self.ctx_stack.push(ctx_ptr);
171 |                         self.index_stack.push(0);
172 |                     }
173 |                 }
174 |             } else {
175 |                 self.ctx_stack.pop();
176 |                 self.index_stack.pop();
177 |             }
178 |         }
179 |     }
180 | }
181 | 
182 | /// Returns an iterator over all the match patterns in this context.
183 | ///
184 | /// It recursively follows include directives. Can only be run on contexts that have already been
185 | /// linked up.
186 | pub fn context_iter<'a>(syntax_set: &'a SyntaxSet, context: &'a Context) -> MatchIter<'a> {
187 |     MatchIter {
188 |         syntax_set,
189 |         ctx_stack: vec![context],
190 |         index_stack: vec![0],
191 |     }
192 | }
193 | 
194 | impl Context {
195 |     /// Returns the match pattern at an index
196 |     pub fn match_at(&self, index: usize) -> Result<&MatchPattern, ParsingError> {
197 |         match self.patterns[index] {
198 |             Pattern::Match(ref match_pat) => Ok(match_pat),
199 |             _ => Err(ParsingError::BadMatchIndex(index)),
200 |         }
201 |     }
202 | }
203 | 
204 | impl ContextReference {
205 |     /// find the pointed to context
206 |     pub fn resolve<'a>(&self, syntax_set: &'a SyntaxSet) -> Result<&'a Context, ParsingError> {
207 |         match *self {
208 |             ContextReference::Direct(ref context_id) => syntax_set.get_context(context_id),
209 |             _ => Err(ParsingError::UnresolvedContextReference(self.clone())),
210 |         }
211 |     }
212 | 
213 |     /// get the context ID this reference points to
214 |     pub fn id(&self) -> Result<ContextId, ParsingError> {
215 |         match *self {
216 |             ContextReference::Direct(ref context_id) => Ok(*context_id),
217 |             _ => Err(ParsingError::UnresolvedContextReference(self.clone())),
218 |         }
219 |     }
220 | }
221 | 
222 | pub(crate) fn substitute_backrefs_in_regex<F>(regex_str: &str, substituter: F) -> String
223 | where
224 |     F: Fn(usize) -> Option<String>,
225 | {
226 |     let mut reg_str = String::with_capacity(regex_str.len());
227 | 
228 |     let mut last_was_escape = false;
229 |     for c in regex_str.chars() {
230 |         if last_was_escape && c.is_ascii_digit() {
231 |             let val = c.to_digit(10).unwrap() as usize;
232 |             if let Some(sub) = substituter(val) {
233 |                 reg_str.push_str(&sub);
234 |             }
235 |         } else if last_was_escape {
236 |             reg_str.push('\\');
237 |             reg_str.push(c);
238 |         } else if c != '\\' {
239 |             reg_str.push(c);
240 |         }
241 | 
242 |         last_was_escape = c == '\\' && !last_was_escape;
243 |     }
244 |     if last_was_escape {
245 |         reg_str.push('\\');
246 |     }
247 |     reg_str
248 | }
249 | 
250 | impl MatchPattern {
251 |     pub fn new(
252 |         has_captures: bool,
253 |         regex_str: String,
254 |         scope: Vec<Scope>,
255 |         captures: Option<CaptureMapping>,
256 |         operation: MatchOperation,
257 |         with_prototype: Option<ContextReference>,
258 |     ) -> MatchPattern {
259 |         MatchPattern {
260 |             has_captures,
261 |             regex: Regex::new(regex_str),
262 |             scope,
263 |             captures,
264 |             operation,
265 |             with_prototype,
266 |         }
267 |     }
268 | 
269 |     /// Used by the parser to compile a regex which needs to reference
270 |     /// regions from another matched pattern.
271 |     pub fn regex_with_refs(&self, region: &Region, text: &str) -> Regex {
272 |         let new_regex = substitute_backrefs_in_regex(self.regex.regex_str(), |i| {
273 |             region.pos(i).map(|(start, end)| escape(&text[start..end]))
274 |         });
275 | 
276 |         Regex::new(new_regex)
277 |     }
278 | 
279 |     pub fn regex(&self) -> &Regex {
280 |         &self.regex
281 |     }
282 | }
283 | 
284 | /// Serialize the provided map in natural key order, so that it's deterministic when dumping.
285 | pub(crate) fn ordered_map<K, V, S>(map: &HashMap<K, V>, serializer: S) -> Result<S::Ok, S::Error>
286 | where
287 |     S: Serializer,
288 |     K: Eq + Hash + Ord + Serialize,
289 |     V: Serialize,
290 | {
291 |     let ordered: BTreeMap<_, _> = map.iter().collect();
292 |     ordered.serialize(serializer)
293 | }
294 | 
295 | #[cfg(test)]
296 | mod tests {
297 |     use super::*;
298 | 
299 |     #[test]
300 |     fn can_compile_refs() {
301 |         let pat = MatchPattern {
302 |             has_captures: true,
303 |             regex: Regex::new(r"lol \\ \2 \1 '\9' \wz".into()),
304 |             scope: vec![],
305 |             captures: None,
306 |             operation: MatchOperation::None,
307 |             with_prototype: None,
308 |         };
309 |         let r = Regex::new(r"(\\\[\]\(\))(b)(c)(d)(e)".into());
310 |         let s = r"\[]()bcde";
311 |         let mut region = Region::new();
312 |         let matched = r.search(s, 0, s.len(), Some(&mut region));
313 |         assert!(matched);
314 | 
315 |         let regex_with_refs = pat.regex_with_refs(&region, s);
316 |         assert_eq!(regex_with_refs.regex_str(), r"lol \\ b \\\[\]\(\) '' \wz");
317 |     }
318 | }
319 | 


--------------------------------------------------------------------------------
/src/highlighting/theme_load.rs:
--------------------------------------------------------------------------------
  1 | // Code based on https://github.com/defuz/sublimate/blob/master/src/core/syntax/theme.rs
  2 | // released under the MIT license by @defuz
  3 | 
  4 | use std::str::FromStr;
  5 | 
  6 | use super::selector::*;
  7 | use super::settings::{ParseSettings, Settings};
  8 | use super::style::*;
  9 | use super::theme::*;
 10 | use crate::parsing::ParseScopeError;
 11 | 
 12 | use self::ParseThemeError::*;
 13 | 
 14 | #[derive(Debug, thiserror::Error)]
 15 | #[non_exhaustive]
 16 | pub enum ParseThemeError {
 17 |     #[error("Incorrect underline option")]
 18 |     IncorrectUnderlineOption,
 19 |     #[error("Incorrect font style: {0}")]
 20 |     IncorrectFontStyle(String),
 21 |     #[error("Incorrect color")]
 22 |     IncorrectColor,
 23 |     #[error("Incorrect syntax")]
 24 |     IncorrectSyntax,
 25 |     #[error("Incorrect settings")]
 26 |     IncorrectSettings,
 27 |     #[error("Undefined settings")]
 28 |     UndefinedSettings,
 29 |     #[error("Undefined scope settings: {0}")]
 30 |     UndefinedScopeSettings(String),
 31 |     #[error("Color sheme scope is not object")]
 32 |     ColorShemeScopeIsNotObject,
 33 |     #[error("Color sheme settings is not object")]
 34 |     ColorShemeSettingsIsNotObject,
 35 |     #[error("Scope selector is not string: {0}")]
 36 |     ScopeSelectorIsNotString(String),
 37 |     #[error("Duplicate settings")]
 38 |     DuplicateSettings,
 39 |     #[error("Scope parse error: {0}")]
 40 |     ScopeParse(#[from] ParseScopeError),
 41 | }
 42 | 
 43 | impl FromStr for UnderlineOption {
 44 |     type Err = ParseThemeError;
 45 | 
 46 |     fn from_str(s: &str) -> Result<UnderlineOption, Self::Err> {
 47 |         Ok(match s {
 48 |             "underline" => UnderlineOption::Underline,
 49 |             "stippled_underline" => UnderlineOption::StippledUnderline,
 50 |             "squiggly_underline" => UnderlineOption::SquigglyUnderline,
 51 |             _ => return Err(IncorrectUnderlineOption),
 52 |         })
 53 |     }
 54 | }
 55 | 
 56 | impl ParseSettings for UnderlineOption {
 57 |     type Error = ParseThemeError;
 58 | 
 59 |     fn parse_settings(settings: Settings) -> Result<UnderlineOption, Self::Error> {
 60 |         match settings {
 61 |             Settings::String(value) => UnderlineOption::from_str(&value),
 62 |             _ => Err(IncorrectUnderlineOption),
 63 |         }
 64 |     }
 65 | }
 66 | 
 67 | impl FromStr for FontStyle {
 68 |     type Err = ParseThemeError;
 69 | 
 70 |     fn from_str(s: &str) -> Result<FontStyle, Self::Err> {
 71 |         let mut font_style = FontStyle::empty();
 72 |         for i in s.split_whitespace() {
 73 |             font_style.insert(match i {
 74 |                 "bold" => FontStyle::BOLD,
 75 |                 "underline" => FontStyle::UNDERLINE,
 76 |                 "italic" => FontStyle::ITALIC,
 77 |                 "normal" | "regular" => FontStyle::empty(),
 78 |                 s => return Err(IncorrectFontStyle(s.to_owned())),
 79 |             })
 80 |         }
 81 |         Ok(font_style)
 82 |     }
 83 | }
 84 | 
 85 | impl ParseSettings for FontStyle {
 86 |     type Error = ParseThemeError;
 87 | 
 88 |     fn parse_settings(settings: Settings) -> Result<FontStyle, Self::Error> {
 89 |         match settings {
 90 |             Settings::String(value) => FontStyle::from_str(&value),
 91 |             c => Err(IncorrectFontStyle(c.to_string())),
 92 |         }
 93 |     }
 94 | }
 95 | 
 96 | impl FromStr for Color {
 97 |     type Err = ParseThemeError;
 98 | 
 99 |     fn from_str(s: &str) -> Result<Color, Self::Err> {
100 |         let mut chars = s.chars();
101 |         if chars.next() != Some('#') {
102 |             return Err(IncorrectColor);
103 |         }
104 |         let mut d = Vec::new();
105 |         for char in chars {
106 |             d.push(char.to_digit(16).ok_or(IncorrectColor)? as u8);
107 |         }
108 |         Ok(match d.len() {
109 |             3 => Color {
110 |                 r: d[0],
111 |                 g: d[1],
112 |                 b: d[2],
113 |                 a: 255,
114 |             },
115 |             6 => Color {
116 |                 r: d[0] * 16 + d[1],
117 |                 g: d[2] * 16 + d[3],
118 |                 b: d[4] * 16 + d[5],
119 |                 a: 255,
120 |             },
121 |             8 => Color {
122 |                 r: d[0] * 16 + d[1],
123 |                 g: d[2] * 16 + d[3],
124 |                 b: d[4] * 16 + d[5],
125 |                 a: d[6] * 16 + d[7],
126 |             },
127 |             _ => return Err(IncorrectColor),
128 |         })
129 |     }
130 | }
131 | 
132 | impl ParseSettings for Color {
133 |     type Error = ParseThemeError;
134 | 
135 |     fn parse_settings(settings: Settings) -> Result<Color, Self::Error> {
136 |         match settings {
137 |             Settings::String(value) => Color::from_str(&value),
138 |             _ => Err(IncorrectColor),
139 |         }
140 |     }
141 | }
142 | 
143 | impl ParseSettings for StyleModifier {
144 |     type Error = ParseThemeError;
145 | 
146 |     fn parse_settings(settings: Settings) -> Result<StyleModifier, Self::Error> {
147 |         let mut obj = match settings {
148 |             Settings::Object(obj) => obj,
149 |             _ => return Err(ColorShemeScopeIsNotObject),
150 |         };
151 |         let font_style = match obj.remove("fontStyle") {
152 |             Some(Settings::String(value)) => Some(FontStyle::from_str(&value)?),
153 |             None => None,
154 |             Some(c) => return Err(IncorrectFontStyle(c.to_string())),
155 |         };
156 |         let foreground = match obj.remove("foreground") {
157 |             Some(Settings::String(value)) => Some(Color::from_str(&value)?),
158 |             None => None,
159 |             _ => return Err(IncorrectColor),
160 |         };
161 |         let background = match obj.remove("background") {
162 |             Some(Settings::String(value)) => Some(Color::from_str(&value)?),
163 |             None => None,
164 |             _ => return Err(IncorrectColor),
165 |         };
166 | 
167 |         Ok(StyleModifier {
168 |             foreground,
169 |             background,
170 |             font_style,
171 |         })
172 |     }
173 | }
174 | 
175 | impl ParseSettings for ThemeItem {
176 |     type Error = ParseThemeError;
177 | 
178 |     fn parse_settings(settings: Settings) -> Result<ThemeItem, Self::Error> {
179 |         let mut obj = match settings {
180 |             Settings::Object(obj) => obj,
181 |             _ => return Err(ColorShemeScopeIsNotObject),
182 |         };
183 |         let scope = match obj.remove("scope") {
184 |             Some(Settings::String(value)) => ScopeSelectors::from_str(&value)?,
185 |             _ => return Err(ScopeSelectorIsNotString(format!("{:?}", obj))),
186 |         };
187 |         let style = match obj.remove("settings") {
188 |             Some(settings) => StyleModifier::parse_settings(settings)?,
189 |             None => return Err(IncorrectSettings),
190 |         };
191 |         Ok(ThemeItem { scope, style })
192 |     }
193 | }
194 | 
195 | impl ParseSettings for ThemeSettings {
196 |     type Error = ParseThemeError;
197 | 
198 |     fn parse_settings(json: Settings) -> Result<ThemeSettings, Self::Error> {
199 |         let mut settings = ThemeSettings::default();
200 | 
201 |         let obj = match json {
202 |             Settings::Object(obj) => obj,
203 |             _ => return Err(ColorShemeSettingsIsNotObject),
204 |         };
205 | 
206 |         for (key, value) in obj {
207 |             match &key[..] {
208 |                 "foreground" => settings.foreground = Color::parse_settings(value).ok(),
209 |                 "background" => settings.background = Color::parse_settings(value).ok(),
210 |                 "caret" => settings.caret = Color::parse_settings(value).ok(),
211 |                 "lineHighlight" => settings.line_highlight = Color::parse_settings(value).ok(),
212 |                 "misspelling" => settings.misspelling = Color::parse_settings(value).ok(),
213 |                 "minimapBorder" => settings.minimap_border = Color::parse_settings(value).ok(),
214 |                 "accent" => settings.accent = Color::parse_settings(value).ok(),
215 | 
216 |                 "popupCss" => settings.popup_css = value.as_str().map(|s| s.to_owned()),
217 |                 "phantomCss" => settings.phantom_css = value.as_str().map(|s| s.to_owned()),
218 | 
219 |                 "bracketContentsForeground" => {
220 |                     settings.bracket_contents_foreground = Color::parse_settings(value).ok()
221 |                 }
222 |                 "bracketContentsOptions" => {
223 |                     settings.bracket_contents_options = UnderlineOption::parse_settings(value).ok()
224 |                 }
225 |                 "bracketsForeground" => {
226 |                     settings.brackets_foreground = Color::parse_settings(value).ok()
227 |                 }
228 |                 "bracketsBackground" => {
229 |                     settings.brackets_background = Color::parse_settings(value).ok()
230 |                 }
231 |                 "bracketsOptions" => {
232 |                     settings.brackets_options = UnderlineOption::parse_settings(value).ok()
233 |                 }
234 |                 "tagsForeground" => settings.tags_foreground = Color::parse_settings(value).ok(),
235 |                 "tagsOptions" => {
236 |                     settings.tags_options = UnderlineOption::parse_settings(value).ok()
237 |                 }
238 |                 "highlight" => settings.highlight = Color::parse_settings(value).ok(),
239 |                 "findHighlight" => settings.find_highlight = Color::parse_settings(value).ok(),
240 |                 "findHighlightForeground" => {
241 |                     settings.find_highlight_foreground = Color::parse_settings(value).ok()
242 |                 }
243 |                 "gutter" => settings.gutter = Color::parse_settings(value).ok(),
244 |                 "gutterForeground" => {
245 |                     settings.gutter_foreground = Color::parse_settings(value).ok()
246 |                 }
247 |                 "selection" => settings.selection = Color::parse_settings(value).ok(),
248 |                 "selectionForeground" => {
249 |                     settings.selection_foreground = Color::parse_settings(value).ok()
250 |                 }
251 |                 "selectionBorder" => settings.selection_border = Color::parse_settings(value).ok(),
252 |                 "inactiveSelection" => {
253 |                     settings.inactive_selection = Color::parse_settings(value).ok()
254 |                 }
255 |                 "inactiveSelectionForeground" => {
256 |                     settings.inactive_selection_foreground = Color::parse_settings(value).ok()
257 |                 }
258 |                 "guide" => settings.guide = Color::parse_settings(value).ok(),
259 |                 "activeGuide" => settings.active_guide = Color::parse_settings(value).ok(),
260 |                 "stackGuide" => settings.stack_guide = Color::parse_settings(value).ok(),
261 |                 "shadow" => settings.shadow = Color::parse_settings(value).ok(),
262 |                 _ => (), // E.g. "shadowWidth" and "invisibles" are ignored
263 |             }
264 |         }
265 |         Ok(settings)
266 |     }
267 | }
268 | 
269 | impl ParseSettings for Theme {
270 |     type Error = ParseThemeError;
271 | 
272 |     fn parse_settings(settings: Settings) -> Result<Theme, Self::Error> {
273 |         let mut obj = match settings {
274 |             Settings::Object(obj) => obj,
275 |             _ => return Err(IncorrectSyntax),
276 |         };
277 |         let name = match obj.remove("name") {
278 |             Some(Settings::String(name)) => Some(name),
279 |             None => None,
280 |             _ => return Err(IncorrectSyntax),
281 |         };
282 |         let author = match obj.remove("author") {
283 |             Some(Settings::String(author)) => Some(author),
284 |             None => None,
285 |             _ => return Err(IncorrectSyntax),
286 |         };
287 |         let items = match obj.remove("settings") {
288 |             Some(Settings::Array(items)) => items,
289 |             _ => return Err(IncorrectSyntax),
290 |         };
291 |         let mut iter = items.into_iter();
292 |         let mut settings = match iter.next() {
293 |             Some(Settings::Object(mut obj)) => match obj.remove("settings") {
294 |                 Some(settings) => ThemeSettings::parse_settings(settings)?,
295 |                 None => return Err(UndefinedSettings),
296 |             },
297 |             _ => return Err(UndefinedSettings),
298 |         };
299 |         if let Some(Settings::Object(obj)) = obj.remove("gutterSettings") {
300 |             for (key, value) in obj {
301 |                 let color = Color::parse_settings(value).ok();
302 |                 match &key[..] {
303 |                     "background" => settings.gutter = settings.gutter.or(color),
304 |                     "foreground" => {
305 |                         settings.gutter_foreground = settings.gutter_foreground.or(color)
306 |                     }
307 |                     _ => (),
308 |                 }
309 |             }
310 |         }
311 |         let mut scopes = Vec::new();
312 |         for json in iter {
313 |             // TODO option to disable best effort parsing and bubble up warnings
314 |             if let Ok(item) = ThemeItem::parse_settings(json) {
315 |                 scopes.push(item);
316 |             }
317 |         }
318 |         Ok(Theme {
319 |             name,
320 |             author,
321 |             settings,
322 |             scopes,
323 |         })
324 |     }
325 | }
326 | 


--------------------------------------------------------------------------------
/src/util.rs:
--------------------------------------------------------------------------------
  1 | //! Convenient helper functions for common use cases:
  2 | //! * Printing to terminal
  3 | //! * Iterating lines with `\n`s
  4 | //! * Modifying ranges of highlighted output
  5 | 
  6 | use crate::highlighting::{Color, Style, StyleModifier};
  7 | #[cfg(feature = "parsing")]
  8 | use crate::parsing::ScopeStackOp;
  9 | use std::fmt::Write;
 10 | use std::ops::Range;
 11 | 
 12 | #[inline]
 13 | fn blend_fg_color(fg: Color, bg: Color) -> Color {
 14 |     if fg.a == 0xff {
 15 |         return fg;
 16 |     }
 17 |     let ratio = fg.a as u32;
 18 |     let r = (fg.r as u32 * ratio + bg.r as u32 * (255 - ratio)) / 255;
 19 |     let g = (fg.g as u32 * ratio + bg.g as u32 * (255 - ratio)) / 255;
 20 |     let b = (fg.b as u32 * ratio + bg.b as u32 * (255 - ratio)) / 255;
 21 |     Color {
 22 |         r: r as u8,
 23 |         g: g as u8,
 24 |         b: b as u8,
 25 |         a: 255,
 26 |     }
 27 | }
 28 | 
 29 | /// Formats the styled fragments using 24-bit color terminal escape codes.
 30 | /// Meant for debugging and testing.
 31 | ///
 32 | /// This function is currently fairly inefficient in its use of escape codes.
 33 | ///
 34 | /// Note that this does not currently ever un-set the color so that the end of a line will also get
 35 | /// highlighted with the background.  This means if you might want to use `println!("\x1b[0m");`
 36 | /// after to clear the coloring.
 37 | ///
 38 | /// If `bg` is true then the background is also set
 39 | pub fn as_24_bit_terminal_escaped(v: &[(Style, &str)], bg: bool) -> String {
 40 |     let mut s: String = String::new();
 41 |     for &(ref style, text) in v.iter() {
 42 |         if bg {
 43 |             write!(
 44 |                 s,
 45 |                 "\x1b[48;2;{};{};{}m",
 46 |                 style.background.r, style.background.g, style.background.b
 47 |             )
 48 |             .unwrap();
 49 |         }
 50 |         let fg = blend_fg_color(style.foreground, style.background);
 51 |         write!(s, "\x1b[38;2;{};{};{}m{}", fg.r, fg.g, fg.b, text).unwrap();
 52 |     }
 53 |     // s.push_str("\x1b[0m");
 54 |     s
 55 | }
 56 | 
 57 | const LATEX_REPLACE: [(&str, &str); 3] = [("\\", "\\\\"), ("{", "\\{"), ("}", "\\}")];
 58 | 
 59 | /// Formats the styled fragments using LaTeX textcolor directive.
 60 | ///
 61 | /// Usage is similar to the `as_24_bit_terminal_escaped` function:
 62 | ///
 63 | /// ```
 64 | /// use syntect::easy::HighlightLines;
 65 | /// use syntect::parsing::SyntaxSet;
 66 | /// use syntect::highlighting::{ThemeSet,Style};
 67 | /// use syntect::util::{as_latex_escaped,LinesWithEndings};
 68 | ///
 69 | /// // Load these once at the start of your program
 70 | /// let ps = SyntaxSet::load_defaults_newlines();
 71 | /// let ts = ThemeSet::load_defaults();
 72 | ///
 73 | /// let syntax = ps.find_syntax_by_extension("rs").unwrap();
 74 | /// let s = "pub struct Wow { hi: u64 }\nfn blah() -> u64 {}\n";
 75 | ///
 76 | /// let mut h = HighlightLines::new(syntax, &ts.themes["InspiredGitHub"]);
 77 | /// for line in LinesWithEndings::from(s) { // LinesWithEndings enables use of newlines mode
 78 | ///     let ranges: Vec<(Style, &str)> = h.highlight_line(line, &ps).unwrap();
 79 | ///     let escaped = as_latex_escaped(&ranges[..]);
 80 | ///     println!("{}", escaped);
 81 | /// }
 82 | /// ```
 83 | ///
 84 | /// Returned content is intended to be placed inside a fancyvrb
 85 | /// Verbatim environment:
 86 | ///
 87 | /// ```latex
 88 | /// \usepackage{fancyvrb}
 89 | /// \usepackage{xcolor}
 90 | /// % ...
 91 | /// % enable comma-separated arguments inside \textcolor
 92 | /// \makeatletter
 93 | /// \def\verbatim@nolig@list{\do\`\do\<\do\>\do\'\do\-}
 94 | /// \makeatother
 95 | /// % ...
 96 | /// \begin{Verbatim}[commandchars=\\\{\}]
 97 | /// % content goes here
 98 | /// \end{Verbatim}
 99 | /// ```
100 | ///
101 | /// Background color is ignored.
102 | pub fn as_latex_escaped(v: &[(Style, &str)]) -> String {
103 |     let mut s: String = String::new();
104 |     let mut prev_style: Option<Style> = None;
105 |     let mut content: String;
106 |     fn textcolor(style: &Style, first: bool) -> String {
107 |         format!(
108 |             "{}\\textcolor[RGB]{{{},{},{}}}{{",
109 |             if first { "" } else { "}" },
110 |             style.foreground.r,
111 |             style.foreground.b,
112 |             style.foreground.g
113 |         )
114 |     }
115 |     for &(style, text) in v.iter() {
116 |         if let Some(ps) = prev_style {
117 |             match text {
118 |                 " " => {
119 |                     s.push(' ');
120 |                     continue;
121 |                 }
122 |                 "\n" => continue,
123 |                 _ => (),
124 |             }
125 |             if style != ps {
126 |                 write!(s, "{}", textcolor(&style, false)).unwrap();
127 |             }
128 |         } else {
129 |             write!(s, "{}", textcolor(&style, true)).unwrap();
130 |         }
131 |         content = text.to_string();
132 |         for &(old, new) in LATEX_REPLACE.iter() {
133 |             content = content.replace(old, new);
134 |         }
135 |         write!(s, "{}", &content).unwrap();
136 |         prev_style = Some(style);
137 |     }
138 |     s.push('}');
139 |     s
140 | }
141 | 
142 | /// Print out the various push and pop operations in a vector
143 | /// with visual alignment to the line. Obviously for debugging.
144 | #[cfg(feature = "parsing")]
145 | pub fn debug_print_ops(line: &str, ops: &[(usize, ScopeStackOp)]) {
146 |     for &(i, ref op) in ops.iter() {
147 |         println!("{}", line.trim_end());
148 |         print!("{: <1$}", "", i);
149 |         match *op {
150 |             ScopeStackOp::Push(s) => {
151 |                 println!("^ +{}", s);
152 |             }
153 |             ScopeStackOp::Pop(count) => {
154 |                 println!("^ pop {}", count);
155 |             }
156 |             ScopeStackOp::Clear(amount) => {
157 |                 println!("^ clear {:?}", amount);
158 |             }
159 |             ScopeStackOp::Restore => println!("^ restore"),
160 |             ScopeStackOp::Noop => println!("noop"),
161 |         }
162 |     }
163 | }
164 | 
165 | /// An iterator over the lines of a string, including the line endings.
166 | ///
167 | /// This is similar to the standard library's `lines` method on `str`, except
168 | /// that the yielded lines include the trailing newline character(s).
169 | ///
170 | /// You can use it if you're parsing/highlighting some text that you have as a
171 | /// string. With this, you can use the "newlines" variant of syntax definitions,
172 | /// which is recommended.
173 | ///
174 | /// # Examples
175 | ///
176 | /// ```
177 | /// use syntect::util::LinesWithEndings;
178 | ///
179 | /// let mut lines = LinesWithEndings::from("foo\nbar\nbaz");
180 | ///
181 | /// assert_eq!(Some("foo\n"), lines.next());
182 | /// assert_eq!(Some("bar\n"), lines.next());
183 | /// assert_eq!(Some("baz"), lines.next());
184 | ///
185 | /// assert_eq!(None, lines.next());
186 | /// ```
187 | pub struct LinesWithEndings<'a> {
188 |     input: &'a str,
189 | }
190 | 
191 | impl<'a> LinesWithEndings<'a> {
192 |     pub fn from(input: &'a str) -> LinesWithEndings<'a> {
193 |         LinesWithEndings { input }
194 |     }
195 | }
196 | 
197 | impl<'a> Iterator for LinesWithEndings<'a> {
198 |     type Item = &'a str;
199 | 
200 |     #[inline]
201 |     fn next(&mut self) -> Option<&'a str> {
202 |         if self.input.is_empty() {
203 |             return None;
204 |         }
205 |         let split = self
206 |             .input
207 |             .find('\n')
208 |             .map(|i| i + 1)
209 |             .unwrap_or_else(|| self.input.len());
210 |         let (line, rest) = self.input.split_at(split);
211 |         self.input = rest;
212 |         Some(line)
213 |     }
214 | }
215 | 
216 | /// Split a highlighted line at a byte index in the line into a before and
217 | /// after component.
218 | ///
219 | /// This is just a helper that does the somewhat tricky logic including splitting
220 | /// a span if the index lies on a boundary.
221 | ///
222 | /// This can be used to extract a chunk of the line out for special treatment
223 | /// like wrapping it in an HTML tag for extra styling.
224 | ///
225 | /// Generic for testing purposes and fancier use cases, but intended for use with
226 | /// the `Vec<(Style, &str)>` returned by `highlight` methods. Look at the source
227 | /// code for `modify_range` for an example usage.
228 | #[allow(clippy::type_complexity)]
229 | pub fn split_at<'a, A: Clone>(
230 |     v: &[(A, &'a str)],
231 |     split_i: usize,
232 | ) -> (Vec<(A, &'a str)>, Vec<(A, &'a str)>) {
233 |     // This function works by gradually reducing the problem into smaller sub-problems from the front
234 |     let mut rest = v;
235 |     let mut rest_split_i = split_i;
236 | 
237 |     // Consume all tokens before the split
238 |     let mut before = Vec::new();
239 |     for tok in rest {
240 |         // Use for instead of a while to avoid bounds checks
241 |         if tok.1.len() > rest_split_i {
242 |             break;
243 |         }
244 |         before.push(tok.clone());
245 |         rest_split_i -= tok.1.len();
246 |     }
247 |     rest = &rest[before.len()..];
248 | 
249 |     let mut after = Vec::new();
250 |     // If necessary, split the token the split falls inside
251 |     if !rest.is_empty() && rest_split_i > 0 {
252 |         let mut rest_split_index = rest_split_i;
253 |         // Splitting in the middle of a multibyte character causes panic,
254 |         // so if index is in the middle of such a character,
255 |         // reduce the index by 1.
256 |         while !rest[0].1.is_char_boundary(rest_split_index) && rest_split_index > 0 {
257 |             rest_split_index -= 1;
258 |         }
259 |         let (sa, sb) = rest[0].1.split_at(rest_split_index);
260 |         before.push((rest[0].0.clone(), sa));
261 |         after.push((rest[0].0.clone(), sb));
262 |         rest = &rest[1..];
263 |     }
264 | 
265 |     after.extend_from_slice(rest);
266 | 
267 |     (before, after)
268 | }
269 | 
270 | /// Modify part of a highlighted line using a style modifier, useful for highlighting sections of a line.
271 | ///
272 | /// # Examples
273 | ///
274 | /// ```
275 | /// use syntect::util::modify_range;
276 | /// use syntect::highlighting::{Style, StyleModifier, FontStyle};
277 | ///
278 | /// let plain = Style::default();
279 | /// let boldmod = StyleModifier { foreground: None, background: None, font_style: Some(FontStyle::BOLD) };
280 | /// let bold = plain.apply(boldmod);
281 | ///
282 | /// let l = &[(plain, "abc"), (plain, "def"), (plain, "ghi")];
283 | /// let l2 = modify_range(l, 1..6, boldmod);
284 | /// assert_eq!(l2, &[(plain, "a"), (bold, "bc"), (bold, "def"), (plain, "ghi")]);
285 | /// ```
286 | pub fn modify_range<'a>(
287 |     v: &[(Style, &'a str)],
288 |     r: Range<usize>,
289 |     modifier: StyleModifier,
290 | ) -> Vec<(Style, &'a str)> {
291 |     let (mut result, in_and_after) = split_at(v, r.start);
292 |     let (inside, mut after) = split_at(&in_and_after, r.end - r.start);
293 | 
294 |     result.extend(inside.iter().map(|(style, s)| (style.apply(modifier), *s)));
295 |     result.append(&mut after);
296 |     result
297 | }
298 | 
299 | #[cfg(test)]
300 | mod tests {
301 |     use super::*;
302 |     use crate::highlighting::FontStyle;
303 | 
304 |     #[test]
305 |     fn test_lines_with_endings() {
306 |         fn lines(s: &str) -> Vec<&str> {
307 |             LinesWithEndings::from(s).collect()
308 |         }
309 | 
310 |         assert!(lines("").is_empty());
311 |         assert_eq!(lines("f"), vec!["f"]);
312 |         assert_eq!(lines("foo"), vec!["foo"]);
313 |         assert_eq!(lines("foo\n"), vec!["foo\n"]);
314 |         assert_eq!(lines("foo\nbar"), vec!["foo\n", "bar"]);
315 |         assert_eq!(lines("foo\nbar\n"), vec!["foo\n", "bar\n"]);
316 |         assert_eq!(lines("foo\r\nbar"), vec!["foo\r\n", "bar"]);
317 |         assert_eq!(lines("foo\r\nbar\r\n"), vec!["foo\r\n", "bar\r\n"]);
318 |         assert_eq!(lines("\nfoo"), vec!["\n", "foo"]);
319 |         assert_eq!(lines("\n\n\n"), vec!["\n", "\n", "\n"]);
320 |     }
321 | 
322 |     #[test]
323 |     fn test_split_at() {
324 |         let l: &[(u8, &str)] = &[];
325 |         let (before, after) = split_at(l, 0); // empty
326 |         assert_eq!((&before[..], &after[..]), (&[][..], &[][..]));
327 | 
328 |         let l = &[(0u8, "abc"), (1u8, "def"), (2u8, "ghi")];
329 | 
330 |         let (before, after) = split_at(l, 0); // at start
331 |         assert_eq!(
332 |             (&before[..], &after[..]),
333 |             (&[][..], &[(0u8, "abc"), (1u8, "def"), (2u8, "ghi")][..])
334 |         );
335 | 
336 |         let (before, after) = split_at(l, 4); // inside token
337 |         assert_eq!(
338 |             (&before[..], &after[..]),
339 |             (
340 |                 &[(0u8, "abc"), (1u8, "d")][..],
341 |                 &[(1u8, "ef"), (2u8, "ghi")][..]
342 |             )
343 |         );
344 | 
345 |         let (before, after) = split_at(l, 3); // between tokens
346 |         assert_eq!(
347 |             (&before[..], &after[..]),
348 |             (&[(0u8, "abc")][..], &[(1u8, "def"), (2u8, "ghi")][..])
349 |         );
350 | 
351 |         let (before, after) = split_at(l, 9); // just after last token
352 |         assert_eq!(
353 |             (&before[..], &after[..]),
354 |             (&[(0u8, "abc"), (1u8, "def"), (2u8, "ghi")][..], &[][..])
355 |         );
356 | 
357 |         let (before, after) = split_at(l, 10); // out of bounds
358 |         assert_eq!(
359 |             (&before[..], &after[..]),
360 |             (&[(0u8, "abc"), (1u8, "def"), (2u8, "ghi")][..], &[][..])
361 |         );
362 | 
363 |         let l = &[(0u8, "こんにちは"), (1u8, "世界"), (2u8, "！")];
364 | 
365 |         let (before, after) = split_at(l, 3);
366 | 
367 |         assert_eq!(
368 |             (&before[..], &after[..]),
369 |             (
370 |                 &[(0u8, "こ")][..],
371 |                 &[(0u8, "んにちは"), (1u8, "世界"), (2u8, "！")][..]
372 |             )
373 |         );
374 | 
375 |         //Splitting inside a multibyte character could cause panic,
376 |         //so if index is inside such a character,
377 |         //index is decreased by 1.
378 |         let (before, after) = split_at(l, 4);
379 | 
380 |         assert_eq!(
381 |             (&before[..], &after[..]),
382 |             (
383 |                 &[(0u8, "こ")][..],
384 |                 &[(0u8, "んにちは"), (1u8, "世界"), (2u8, "！")][..]
385 |             )
386 |         );
387 |     }
388 | 
389 |     #[test]
390 |     fn test_as_24_bit_terminal_escaped() {
391 |         let style = Style {
392 |             foreground: Color::WHITE,
393 |             background: Color::BLACK,
394 |             font_style: FontStyle::default(),
395 |         };
396 | 
397 |         // With background
398 |         let s = as_24_bit_terminal_escaped(&[(style, "hello")], true);
399 |         assert_eq!(s, "\x1b[48;2;0;0;0m\x1b[38;2;255;255;255mhello");
400 | 
401 |         // Without background
402 |         let s = as_24_bit_terminal_escaped(&[(style, "hello")], false);
403 |         assert_eq!(s, "\x1b[38;2;255;255;255mhello");
404 | 
405 |         // Blend alpha
406 |         let mut foreground = Color::WHITE;
407 |         foreground.a = 128;
408 |         let style = Style {
409 |             foreground,
410 |             background: Color::BLACK,
411 |             font_style: FontStyle::default(),
412 |         };
413 |         let s = as_24_bit_terminal_escaped(&[(style, "hello")], true);
414 |         assert_eq!(s, "\x1b[48;2;0;0;0m\x1b[38;2;128;128;128mhello");
415 |     }
416 | }
417 | 


--------------------------------------------------------------------------------
/src/highlighting/style.rs:
--------------------------------------------------------------------------------
  1 | // Code based on [https://github.com/defuz/sublimate/blob/master/src/core/syntax/scope.rs](https://github.com/defuz/sublimate/blob/master/src/core/syntax/scope.rs)
  2 | // released under the MIT license by @defuz
  3 | use serde_derive::{Deserialize, Serialize};
  4 | use std::{fmt, ops};
  5 | 
  6 | /// Foreground and background colors, with font style
  7 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
  8 | pub struct Style {
  9 |     /// Foreground color
 10 |     pub foreground: Color,
 11 |     /// Background color
 12 |     pub background: Color,
 13 |     /// Style of the font
 14 |     pub font_style: FontStyle,
 15 | }
 16 | 
 17 | /// A change to a [`Style`] applied incrementally by a theme rule
 18 | ///
 19 | /// Fields left empty (as `None`) will not modify the corresponding field on a `Style`
 20 | ///
 21 | /// [`Style`]: struct.Style.html
 22 | #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
 23 | pub struct StyleModifier {
 24 |     /// Foreground color
 25 |     pub foreground: Option<Color>,
 26 |     /// Background color
 27 |     pub background: Option<Color>,
 28 |     /// Style of the font
 29 |     pub font_style: Option<FontStyle>,
 30 | }
 31 | 
 32 | /// RGBA color, directly from the theme
 33 | ///
 34 | /// Because these numbers come directly from the theme, you might have to do your own color space
 35 | /// conversion if you're outputting a different color space from the theme. This can be a problem
 36 | /// because some Sublime themes use sRGB and some don't. This is specified in an attribute syntect
 37 | /// doesn't parse yet.
 38 | #[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
 39 | pub struct Color {
 40 |     /// Red component
 41 |     pub r: u8,
 42 |     /// Green component
 43 |     pub g: u8,
 44 |     /// Blue component
 45 |     pub b: u8,
 46 |     /// Alpha (transparency) component
 47 |     pub a: u8,
 48 | }
 49 | 
 50 | // More compact alternate debug representation by not using a separate line for each color field,
 51 | // also adapts the default debug representation to match.
 52 | impl std::fmt::Debug for Color {
 53 |     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
 54 |         let Color { r, g, b, a } = self;
 55 |         if f.alternate() {
 56 |             // when formatted with "{:#?}"
 57 |             write!(
 58 |                 f,
 59 |                 "Color {{ r/g/b/a: {: >3}/{: >3}/{: >3}/{: >3} }}",
 60 |                 r, g, b, a
 61 |             )
 62 |         } else {
 63 |             // when formatted with "{:?}"
 64 |             write!(f, "Color {{ r/g/b/a: {}/{}/{}/{} }}", r, g, b, a)
 65 |         }
 66 |     }
 67 | }
 68 | 
 69 | /// The color-independent styling of a font - i.e. bold, italicized, and/or underlined
 70 | #[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
 71 | pub struct FontStyle {
 72 |     bits: u8,
 73 | }
 74 | 
 75 | impl FontStyle {
 76 |     /// Bold font style
 77 |     pub const BOLD: Self = Self { bits: 1 };
 78 |     /// Underline font style
 79 |     pub const UNDERLINE: Self = Self { bits: 2 };
 80 |     /// Italic font style
 81 |     pub const ITALIC: Self = Self { bits: 4 };
 82 | 
 83 |     /// Returns an empty set of flags.
 84 |     pub const fn empty() -> Self {
 85 |         Self { bits: 0 }
 86 |     }
 87 | 
 88 |     /// Returns the set containing all flags.
 89 |     pub const fn all() -> Self {
 90 |         let bits = Self::BOLD.bits | Self::UNDERLINE.bits | Self::ITALIC.bits;
 91 |         Self { bits }
 92 |     }
 93 | 
 94 |     /// Returns the raw value of the flags currently stored.
 95 |     pub const fn bits(&self) -> u8 {
 96 |         self.bits
 97 |     }
 98 | 
 99 |     /// Convert from underlying bit representation, unless that
100 |     /// representation contains bits that do not correspond to a flag.
101 |     pub const fn from_bits(bits: u8) -> Option<Self> {
102 |         if (bits & !Self::all().bits()) == 0 {
103 |             Some(Self { bits })
104 |         } else {
105 |             None
106 |         }
107 |     }
108 | 
109 |     /// Convert from underlying bit representation, dropping any bits
110 |     /// that do not correspond to flags.
111 |     pub const fn from_bits_truncate(bits: u8) -> Self {
112 |         let bits = bits & Self::all().bits;
113 |         Self { bits }
114 |     }
115 | 
116 |     /// Convert from underlying bit representation, preserving all
117 |     /// bits (even those not corresponding to a defined flag).
118 |     ///
119 |     /// # Safety
120 |     ///
121 |     /// The caller of the `bitflags!` macro can chose to allow or
122 |     /// disallow extra bits for their bitflags type.
123 |     ///
124 |     /// The caller of `from_bits_unchecked()` has to ensure that
125 |     /// all bits correspond to a defined flag or that extra bits
126 |     /// are valid for this bitflags type.
127 |     pub const unsafe fn from_bits_unchecked(bits: u8) -> Self {
128 |         Self { bits }
129 |     }
130 | 
131 |     /// Returns `true` if no flags are currently stored.
132 |     pub const fn is_empty(&self) -> bool {
133 |         self.bits() == Self::empty().bits()
134 |     }
135 | 
136 |     /// Returns `true` if all flags are currently set.
137 |     pub const fn is_all(&self) -> bool {
138 |         self.bits() == Self::all().bits()
139 |     }
140 | 
141 |     /// Returns `true` if there are flags common to both `self` and `other`.
142 |     pub const fn intersects(&self, other: Self) -> bool {
143 |         let bits = self.bits & other.bits;
144 |         !(Self { bits }).is_empty()
145 |     }
146 | 
147 |     /// Returns `true` if all of the flags in `other` are contained within `self`.
148 |     pub const fn contains(&self, other: Self) -> bool {
149 |         (self.bits & other.bits) == other.bits
150 |     }
151 | 
152 |     /// Inserts the specified flags in-place.
153 |     pub fn insert(&mut self, other: Self) {
154 |         self.bits |= other.bits;
155 |     }
156 | 
157 |     /// Removes the specified flags in-place.
158 |     pub fn remove(&mut self, other: Self) {
159 |         self.bits &= !other.bits;
160 |     }
161 | 
162 |     /// Toggles the specified flags in-place.
163 |     pub fn toggle(&mut self, other: Self) {
164 |         self.bits ^= other.bits;
165 |     }
166 | 
167 |     /// Inserts or removes the specified flags depending on the passed value.
168 |     pub fn set(&mut self, other: Self, value: bool) {
169 |         if value {
170 |             self.insert(other);
171 |         } else {
172 |             self.remove(other);
173 |         }
174 |     }
175 | 
176 |     /// Returns the intersection between the flags in `self` and
177 |     /// `other`.
178 |     ///
179 |     /// Specifically, the returned set contains only the flags which are
180 |     /// present in *both* `self` *and* `other`.
181 |     ///
182 |     /// This is equivalent to using the `&` operator (e.g.
183 |     /// [`ops::BitAnd`]), as in `flags & other`.
184 |     ///
185 |     /// [`ops::BitAnd`]: https://doc.rust-lang.org/std/ops/trait.BitAnd.html
186 |     #[must_use]
187 |     pub const fn intersection(self, other: Self) -> Self {
188 |         let bits = self.bits & other.bits;
189 |         Self { bits }
190 |     }
191 | 
192 |     /// Returns the union of between the flags in `self` and `other`.
193 |     ///
194 |     /// Specifically, the returned set contains all flags which are
195 |     /// present in *either* `self` *or* `other`, including any which are
196 |     /// present in both (see [`Self::symmetric_difference`] if that
197 |     /// is undesirable).
198 |     ///
199 |     /// This is equivalent to using the `|` operator (e.g.
200 |     /// [`ops::BitOr`]), as in `flags | other`.
201 |     ///
202 |     /// [`ops::BitOr`]: https://doc.rust-lang.org/std/ops/trait.BitOr.html
203 |     #[must_use]
204 |     pub const fn union(self, other: Self) -> Self {
205 |         let bits = self.bits | other.bits;
206 |         Self { bits }
207 |     }
208 | 
209 |     /// Returns the difference between the flags in `self` and `other`.
210 |     ///
211 |     /// Specifically, the returned set contains all flags present in
212 |     /// `self`, except for the ones present in `other`.
213 |     ///
214 |     /// It is also conceptually equivalent to the "bit-clear" operation:
215 |     /// `flags & !other` (and this syntax is also supported).
216 |     ///
217 |     /// This is equivalent to using the `-` operator (e.g.
218 |     /// [`ops::Sub`]), as in `flags - other`.
219 |     ///
220 |     /// [`ops::Sub`]: https://doc.rust-lang.org/std/ops/trait.Sub.html
221 |     pub const fn difference(self, other: Self) -> Self {
222 |         let bits = self.bits & !other.bits;
223 |         Self { bits }
224 |     }
225 | 
226 |     /// Returns the [symmetric difference][sym-diff] between the flags
227 |     /// in `self` and `other`.
228 |     ///
229 |     /// Specifically, the returned set contains the flags present which
230 |     /// are present in `self` or `other`, but that are not present in
231 |     /// both. Equivalently, it contains the flags present in *exactly
232 |     /// one* of the sets `self` and `other`.
233 |     ///
234 |     /// This is equivalent to using the `^` operator (e.g.
235 |     /// [`ops::BitXor`]), as in `flags ^ other`.
236 |     ///
237 |     /// [sym-diff]: https://en.wikipedia.org/wiki/Symmetric_difference
238 |     /// [`ops::BitXor`]: https://doc.rust-lang.org/std/ops/trait.BitXor.html
239 |     #[must_use]
240 |     pub const fn symmetric_difference(self, other: Self) -> Self {
241 |         let bits = self.bits ^ other.bits;
242 |         Self { bits }
243 |     }
244 | 
245 |     /// Returns the complement of this set of flags.
246 |     ///
247 |     /// Specifically, the returned set contains all the flags which are
248 |     /// not set in `self`, but which are allowed for this type.
249 |     ///
250 |     /// Alternatively, it can be thought of as the set difference
251 |     /// between [`Self::all()`] and `self` (e.g. `Self::all() - self`)
252 |     ///
253 |     /// This is equivalent to using the `!` operator (e.g.
254 |     /// [`ops::Not`]), as in `!flags`.
255 |     ///
256 |     /// [`Self::all()`]: Self::all
257 |     /// [`ops::Not`]: https://doc.rust-lang.org/std/ops/trait.Not.html
258 |     #[must_use]
259 |     pub const fn complement(self) -> Self {
260 |         Self::from_bits_truncate(!self.bits)
261 |     }
262 | }
263 | 
264 | impl fmt::Debug for FontStyle {
265 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
266 |         let mut empty = true;
267 | 
268 |         let pairs = [
269 |             (Self::BOLD, "BOLD"),
270 |             (Self::UNDERLINE, "UNDERLINE"),
271 |             (Self::ITALIC, "ITALIC"),
272 |         ];
273 |         for (flag, flag_str) in pairs {
274 |             if self.contains(flag) {
275 |                 if !std::mem::take(&mut empty) {
276 |                     f.write_str(" | ")?;
277 |                 }
278 |                 f.write_str(flag_str)?;
279 |             }
280 |         }
281 | 
282 |         let extra_bits = self.bits & !Self::all().bits();
283 |         if extra_bits != 0 {
284 |             if !std::mem::take(&mut empty) {
285 |                 f.write_str(" | ")?;
286 |             }
287 |             f.write_str("0x")?;
288 |             fmt::LowerHex::fmt(&extra_bits, f)?;
289 |         }
290 | 
291 |         if empty {
292 |             f.write_str("(empty)")?;
293 |         }
294 | 
295 |         Ok(())
296 |     }
297 | }
298 | 
299 | impl fmt::Binary for FontStyle {
300 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
301 |         fmt::Binary::fmt(&self.bits, f)
302 |     }
303 | }
304 | 
305 | impl fmt::Octal for FontStyle {
306 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
307 |         fmt::Octal::fmt(&self.bits, f)
308 |     }
309 | }
310 | 
311 | impl fmt::LowerHex for FontStyle {
312 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
313 |         fmt::LowerHex::fmt(&self.bits, f)
314 |     }
315 | }
316 | 
317 | impl fmt::UpperHex for FontStyle {
318 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
319 |         fmt::UpperHex::fmt(&self.bits, f)
320 |     }
321 | }
322 | 
323 | impl ops::BitOr for FontStyle {
324 |     type Output = Self;
325 |     /// Returns the union of the two sets of flags.
326 |     fn bitor(self, other: FontStyle) -> Self {
327 |         let bits = self.bits | other.bits;
328 |         Self { bits }
329 |     }
330 | }
331 | 
332 | impl ops::BitOrAssign for FontStyle {
333 |     /// Adds the set of flags.
334 |     fn bitor_assign(&mut self, other: Self) {
335 |         self.bits |= other.bits;
336 |     }
337 | }
338 | 
339 | impl ops::BitXor for FontStyle {
340 |     type Output = Self;
341 |     /// Returns the left flags, but with all the right flags toggled.
342 |     fn bitxor(self, other: Self) -> Self {
343 |         let bits = self.bits ^ other.bits;
344 |         Self { bits }
345 |     }
346 | }
347 | 
348 | impl ops::BitXorAssign for FontStyle {
349 |     /// Toggles the set of flags.
350 |     fn bitxor_assign(&mut self, other: Self) {
351 |         self.bits ^= other.bits;
352 |     }
353 | }
354 | 
355 | impl ops::BitAnd for FontStyle {
356 |     type Output = Self;
357 |     /// Returns the intersection between the two sets of flags.
358 |     fn bitand(self, other: Self) -> Self {
359 |         let bits = self.bits & other.bits;
360 |         Self { bits }
361 |     }
362 | }
363 | 
364 | impl ops::BitAndAssign for FontStyle {
365 |     /// Disables all flags disabled in the set.
366 |     fn bitand_assign(&mut self, other: Self) {
367 |         self.bits &= other.bits;
368 |     }
369 | }
370 | 
371 | impl ops::Sub for FontStyle {
372 |     type Output = Self;
373 |     /// Returns the set difference of the two sets of flags.
374 |     fn sub(self, other: Self) -> Self {
375 |         let bits = self.bits & !other.bits;
376 |         Self { bits }
377 |     }
378 | }
379 | 
380 | impl ops::SubAssign for FontStyle {
381 |     /// Disables all flags enabled in the set.
382 |     fn sub_assign(&mut self, other: Self) {
383 |         self.bits &= !other.bits;
384 |     }
385 | }
386 | 
387 | impl ops::Not for FontStyle {
388 |     type Output = Self;
389 |     /// Returns the complement of this set of flags.
390 |     fn not(self) -> Self {
391 |         Self { bits: !self.bits } & Self::all()
392 |     }
393 | }
394 | 
395 | impl Extend<FontStyle> for FontStyle {
396 |     fn extend<T: IntoIterator<Item = Self>>(&mut self, iterator: T) {
397 |         for item in iterator {
398 |             self.insert(item)
399 |         }
400 |     }
401 | }
402 | 
403 | impl FromIterator<FontStyle> for FontStyle {
404 |     fn from_iter<T: IntoIterator<Item = Self>>(iterator: T) -> Self {
405 |         let mut result = Self::empty();
406 |         result.extend(iterator);
407 |         result
408 |     }
409 | }
410 | 
411 | impl Color {
412 |     /// The color black (`#000000`)
413 |     pub const BLACK: Color = Color {
414 |         r: 0x00,
415 |         g: 0x00,
416 |         b: 0x00,
417 |         a: 0xFF,
418 |     };
419 | 
420 |     /// The color white (`#FFFFFF`)
421 |     pub const WHITE: Color = Color {
422 |         r: 0xFF,
423 |         g: 0xFF,
424 |         b: 0xFF,
425 |         a: 0xFF,
426 |     };
427 | }
428 | 
429 | impl Style {
430 |     /// Applies a change to this style, yielding a new changed style
431 |     pub fn apply(&self, modifier: StyleModifier) -> Style {
432 |         Style {
433 |             foreground: modifier.foreground.unwrap_or(self.foreground),
434 |             background: modifier.background.unwrap_or(self.background),
435 |             font_style: modifier.font_style.unwrap_or(self.font_style),
436 |         }
437 |     }
438 | }
439 | 
440 | impl Default for Style {
441 |     fn default() -> Style {
442 |         Style {
443 |             foreground: Color::BLACK,
444 |             background: Color::WHITE,
445 |             font_style: FontStyle::empty(),
446 |         }
447 |     }
448 | }
449 | 
450 | impl StyleModifier {
451 |     /// Applies the other modifier to this one, creating a new modifier.
452 |     ///
453 |     /// Values in `other` are preferred.
454 |     pub fn apply(&self, other: StyleModifier) -> StyleModifier {
455 |         StyleModifier {
456 |             foreground: other.foreground.or(self.foreground),
457 |             background: other.background.or(self.background),
458 |             font_style: other.font_style.or(self.font_style),
459 |         }
460 |     }
461 | }
462 | 


--------------------------------------------------------------------------------
/src/easy.rs:
--------------------------------------------------------------------------------
  1 | //! API wrappers for common use cases like highlighting strings and
  2 | //! files without caring about intermediate semantic representation
  3 | //! and caching.
  4 | 
  5 | use crate::highlighting::{HighlightIterator, HighlightState, Highlighter, Style, Theme};
  6 | use crate::parsing::{ParseState, ScopeStack, ScopeStackOp, SyntaxReference, SyntaxSet};
  7 | use crate::Error;
  8 | use std::fs::File;
  9 | use std::io::{self, BufReader};
 10 | use std::path::Path;
 11 | // use util::debug_print_ops;
 12 | 
 13 | /// Simple way to go directly from lines of text to colored tokens.
 14 | ///
 15 | /// Depending on how you load the syntaxes (see the [`SyntaxSet`] docs), this can either take
 16 | /// strings with trailing `\n`s or without.
 17 | ///
 18 | /// [`SyntaxSet`]: ../parsing/struct.SyntaxSet.html
 19 | ///
 20 | /// # Examples
 21 | ///
 22 | /// Prints colored lines of a string to the terminal
 23 | ///
 24 | /// ```
 25 | /// use syntect::easy::HighlightLines;
 26 | /// use syntect::parsing::SyntaxSet;
 27 | /// use syntect::highlighting::{ThemeSet, Style};
 28 | /// use syntect::util::{as_24_bit_terminal_escaped, LinesWithEndings};
 29 | ///
 30 | /// // Load these once at the start of your program
 31 | /// let ps = SyntaxSet::load_defaults_newlines();
 32 | /// let ts = ThemeSet::load_defaults();
 33 | ///
 34 | /// let syntax = ps.find_syntax_by_extension("rs").unwrap();
 35 | /// let mut h = HighlightLines::new(syntax, &ts.themes["base16-ocean.dark"]);
 36 | /// let s = "pub struct Wow { hi: u64 }\nfn blah() -> u64 {}";
 37 | /// for line in LinesWithEndings::from(s) { // LinesWithEndings enables use of newlines mode
 38 | ///     let ranges: Vec<(Style, &str)> = h.highlight_line(line, &ps).unwrap();
 39 | ///     let escaped = as_24_bit_terminal_escaped(&ranges[..], true);
 40 | ///     print!("{}", escaped);
 41 | /// }
 42 | /// ```
 43 | pub struct HighlightLines<'a> {
 44 |     highlighter: Highlighter<'a>,
 45 |     parse_state: ParseState,
 46 |     highlight_state: HighlightState,
 47 | }
 48 | 
 49 | impl<'a> HighlightLines<'a> {
 50 |     pub fn new(syntax: &SyntaxReference, theme: &'a Theme) -> HighlightLines<'a> {
 51 |         let highlighter = Highlighter::new(theme);
 52 |         let highlight_state = HighlightState::new(&highlighter, ScopeStack::new());
 53 |         HighlightLines {
 54 |             highlighter,
 55 |             parse_state: ParseState::new(syntax),
 56 |             highlight_state,
 57 |         }
 58 |     }
 59 | 
 60 |     #[deprecated(
 61 |         since = "5.0.0",
 62 |         note = "Renamed to `highlight_line` to make it clear it should be passed a single line at a time"
 63 |     )]
 64 |     pub fn highlight<'b>(
 65 |         &mut self,
 66 |         line: &'b str,
 67 |         syntax_set: &SyntaxSet,
 68 |     ) -> Vec<(Style, &'b str)> {
 69 |         self.highlight_line(line, syntax_set)
 70 |             .expect("`highlight` is deprecated, use `highlight_line` instead")
 71 |     }
 72 | 
 73 |     /// Highlights a line of a file
 74 |     pub fn highlight_line<'b>(
 75 |         &mut self,
 76 |         line: &'b str,
 77 |         syntax_set: &SyntaxSet,
 78 |     ) -> Result<Vec<(Style, &'b str)>, Error> {
 79 |         // println!("{}", self.highlight_state.path);
 80 |         let ops = self.parse_state.parse_line(line, syntax_set)?;
 81 |         // use util::debug_print_ops;
 82 |         // debug_print_ops(line, &ops);
 83 |         let iter =
 84 |             HighlightIterator::new(&mut self.highlight_state, &ops[..], line, &self.highlighter);
 85 |         Ok(iter.collect())
 86 |     }
 87 | 
 88 |     /// This starts again from a previous state, useful for highlighting a file incrementally for
 89 |     /// which you've cached the highlight and parse state.
 90 |     pub fn from_state(
 91 |         theme: &'a Theme,
 92 |         highlight_state: HighlightState,
 93 |         parse_state: ParseState,
 94 |     ) -> HighlightLines<'a> {
 95 |         HighlightLines {
 96 |             highlighter: Highlighter::new(theme),
 97 |             parse_state,
 98 |             highlight_state,
 99 |         }
100 |     }
101 | 
102 |     /// Returns the current highlight and parse states, useful for caching and incremental highlighting.
103 |     pub fn state(self) -> (HighlightState, ParseState) {
104 |         (self.highlight_state, self.parse_state)
105 |     }
106 | }
107 | 
108 | /// Convenience struct containing everything you need to highlight a file
109 | ///
110 | /// Use the `reader` to get the lines of the file and the `highlight_lines` to highlight them. See
111 | /// the [`new`] method docs for more information.
112 | ///
113 | /// [`new`]: #method.new
114 | pub struct HighlightFile<'a> {
115 |     pub reader: BufReader<File>,
116 |     pub highlight_lines: HighlightLines<'a>,
117 | }
118 | 
119 | impl<'a> HighlightFile<'a> {
120 |     /// Constructs a file reader and a line highlighter to get you reading files as fast as possible.
121 |     ///
122 |     /// This auto-detects the syntax from the extension and constructs a [`HighlightLines`] with the
123 |     /// correct syntax and theme.
124 |     ///
125 |     /// [`HighlightLines`]: struct.HighlightLines.html
126 |     ///
127 |     /// # Examples
128 |     ///
129 |     /// Using the `newlines` mode is a bit involved but yields more robust and glitch-free highlighting,
130 |     /// as well as being slightly faster since it can re-use a line buffer.
131 |     ///
132 |     /// ```
133 |     /// use syntect::parsing::SyntaxSet;
134 |     /// use syntect::highlighting::{ThemeSet, Style};
135 |     /// use syntect::util::as_24_bit_terminal_escaped;
136 |     /// use syntect::easy::HighlightFile;
137 |     /// use std::io::BufRead;
138 |     ///
139 |     /// # use std::io;
140 |     /// # fn foo() -> io::Result<()> {
141 |     /// let ss = SyntaxSet::load_defaults_newlines();
142 |     /// let ts = ThemeSet::load_defaults();
143 |     ///
144 |     /// let mut highlighter = HighlightFile::new("testdata/highlight_test.erb", &ss, &ts.themes["base16-ocean.dark"]).unwrap();
145 |     /// let mut line = String::new();
146 |     /// while highlighter.reader.read_line(&mut line)? > 0 {
147 |     ///     {
148 |     ///         let regions: Vec<(Style, &str)> = highlighter.highlight_lines.highlight_line(&line, &ss).unwrap();
149 |     ///         print!("{}", as_24_bit_terminal_escaped(&regions[..], true));
150 |     ///     } // until NLL this scope is needed so we can clear the buffer after
151 |     ///     line.clear(); // read_line appends so we need to clear between lines
152 |     /// }
153 |     /// # Ok(())
154 |     /// # }
155 |     /// ```
156 |     ///
157 |     /// This example uses `reader.lines()` to get lines without a newline character, it's simpler but may break on rare tricky cases.
158 |     ///
159 |     /// ```
160 |     /// use syntect::parsing::SyntaxSet;
161 |     /// use syntect::highlighting::{ThemeSet, Style};
162 |     /// use syntect::util::as_24_bit_terminal_escaped;
163 |     /// use syntect::easy::HighlightFile;
164 |     /// use std::io::BufRead;
165 |     ///
166 |     /// let ss = SyntaxSet::load_defaults_nonewlines();
167 |     /// let ts = ThemeSet::load_defaults();
168 |     ///
169 |     /// let mut highlighter = HighlightFile::new("testdata/highlight_test.erb", &ss, &ts.themes["base16-ocean.dark"]).unwrap();
170 |     /// for maybe_line in highlighter.reader.lines() {
171 |     ///     let line = maybe_line.unwrap();
172 |     ///     let regions: Vec<(Style, &str)> = highlighter.highlight_lines.highlight_line(&line, &ss).unwrap();
173 |     ///     println!("{}", as_24_bit_terminal_escaped(&regions[..], true));
174 |     /// }
175 |     /// ```
176 |     pub fn new<P: AsRef<Path>>(
177 |         path_obj: P,
178 |         ss: &SyntaxSet,
179 |         theme: &'a Theme,
180 |     ) -> io::Result<HighlightFile<'a>> {
181 |         let path: &Path = path_obj.as_ref();
182 |         let f = File::open(path)?;
183 |         let syntax = ss
184 |             .find_syntax_for_file(path)?
185 |             .unwrap_or_else(|| ss.find_syntax_plain_text());
186 | 
187 |         Ok(HighlightFile {
188 |             reader: BufReader::new(f),
189 |             highlight_lines: HighlightLines::new(syntax, theme),
190 |         })
191 |     }
192 | }
193 | 
194 | /// Iterator over the ranges of a line which a given the operation from the parser applies.
195 | ///
196 | /// Use [`ScopeRegionIterator`] to obtain directly regions (`&str`s) from the line.
197 | ///
198 | /// To use, just keep your own [`ScopeStack`] and then `ScopeStack.apply(op)` the operation that is
199 | /// yielded at the top of your `for` loop over this iterator. Now you have a substring of the line
200 | /// and the scope stack for that token.
201 | ///
202 | /// See the `synstats.rs` example for an example of using this iterator.
203 | ///
204 | /// **Note:** This will often return empty ranges, just `continue` after applying the op if you
205 | /// don't want them.
206 | ///
207 | /// [`ScopeStack`]: ../parsing/struct.ScopeStack.html
208 | /// [`ScopeRegionIterator`]: ./struct.ScopeRegionIterator.html
209 | #[derive(Debug)]
210 | pub struct ScopeRangeIterator<'a> {
211 |     ops: &'a [(usize, ScopeStackOp)],
212 |     line: &'a str,
213 |     index: usize,
214 |     last_str_index: usize,
215 | }
216 | 
217 | impl<'a> ScopeRangeIterator<'a> {
218 |     pub fn new(ops: &'a [(usize, ScopeStackOp)], line: &'a str) -> ScopeRangeIterator<'a> {
219 |         ScopeRangeIterator {
220 |             ops,
221 |             line,
222 |             index: 0,
223 |             last_str_index: 0,
224 |         }
225 |     }
226 | }
227 | 
228 | static NOOP_OP: ScopeStackOp = ScopeStackOp::Noop;
229 | 
230 | impl<'a> Iterator for ScopeRangeIterator<'a> {
231 |     type Item = (std::ops::Range<usize>, &'a ScopeStackOp);
232 |     fn next(&mut self) -> Option<Self::Item> {
233 |         if self.index > self.ops.len() {
234 |             return None;
235 |         }
236 | 
237 |         // region extends up to next operation (ops[index]) or string end if there is none
238 |         // note the next operation may be at, last_str_index, in which case the region is empty
239 |         let next_str_i = if self.index == self.ops.len() {
240 |             self.line.len()
241 |         } else {
242 |             self.ops[self.index].0
243 |         };
244 |         let range = self.last_str_index..next_str_i;
245 |         self.last_str_index = next_str_i;
246 | 
247 |         // the first region covers everything before the first op, which may be empty
248 |         let op = if self.index == 0 {
249 |             &NOOP_OP
250 |         } else {
251 |             &self.ops[self.index - 1].1
252 |         };
253 | 
254 |         self.index += 1;
255 |         Some((range, op))
256 |     }
257 | }
258 | 
259 | /// A convenience wrapper over [`ScopeRangeIterator`] to return `&str`s directly.
260 | ///
261 | /// To use, just keep your own [`ScopeStack`] and then `ScopeStack.apply(op)` the operation that is
262 | /// yielded at the top of your `for` loop over this iterator. Now you have a substring of the line
263 | /// and the scope stack for that token.
264 | ///
265 | /// See the `synstats.rs` example for an example of using this iterator.
266 | ///
267 | /// **Note:** This will often return empty regions, just `continue` after applying the op if you
268 | /// don't want them.
269 | ///
270 | /// [`ScopeStack`]: ../parsing/struct.ScopeStack.html
271 | /// [`ScopeRangeIterator`]: ./struct.ScopeRangeIterator.html
272 | #[derive(Debug)]
273 | pub struct ScopeRegionIterator<'a> {
274 |     range_iter: ScopeRangeIterator<'a>,
275 | }
276 | 
277 | impl<'a> ScopeRegionIterator<'a> {
278 |     pub fn new(ops: &'a [(usize, ScopeStackOp)], line: &'a str) -> ScopeRegionIterator<'a> {
279 |         ScopeRegionIterator {
280 |             range_iter: ScopeRangeIterator::new(ops, line),
281 |         }
282 |     }
283 | }
284 | 
285 | impl<'a> Iterator for ScopeRegionIterator<'a> {
286 |     type Item = (&'a str, &'a ScopeStackOp);
287 |     fn next(&mut self) -> Option<Self::Item> {
288 |         let (range, op) = self.range_iter.next()?;
289 |         Some((&self.range_iter.line[range], op))
290 |     }
291 | }
292 | 
293 | #[cfg(test)]
294 | mod tests {
295 |     use super::*;
296 |     #[cfg(feature = "default-themes")]
297 |     use crate::highlighting::ThemeSet;
298 |     use crate::parsing::{ParseState, ScopeStack, SyntaxSet};
299 |     use std::str::FromStr;
300 | 
301 |     #[cfg(all(feature = "default-syntaxes", feature = "default-themes"))]
302 |     #[test]
303 |     fn can_highlight_lines() {
304 |         let ss = SyntaxSet::load_defaults_nonewlines();
305 |         let ts = ThemeSet::load_defaults();
306 |         let syntax = ss.find_syntax_by_extension("rs").unwrap();
307 |         let mut h = HighlightLines::new(syntax, &ts.themes["base16-ocean.dark"]);
308 |         let ranges = h
309 |             .highlight_line("pub struct Wow { hi: u64 }", &ss)
310 |             .expect("#[cfg(test)]");
311 |         assert!(ranges.len() > 4);
312 |     }
313 | 
314 |     #[cfg(all(feature = "default-syntaxes", feature = "default-themes"))]
315 |     #[test]
316 |     fn can_highlight_file() {
317 |         let ss = SyntaxSet::load_defaults_nonewlines();
318 |         let ts = ThemeSet::load_defaults();
319 |         HighlightFile::new(
320 |             "testdata/highlight_test.erb",
321 |             &ss,
322 |             &ts.themes["base16-ocean.dark"],
323 |         )
324 |         .unwrap();
325 |     }
326 | 
327 |     #[cfg(feature = "default-syntaxes")]
328 |     #[test]
329 |     fn can_find_regions() {
330 |         let ss = SyntaxSet::load_defaults_nonewlines();
331 |         let mut state = ParseState::new(ss.find_syntax_by_extension("rb").unwrap());
332 |         let line = "lol =5+2";
333 |         let ops = state.parse_line(line, &ss).expect("#[cfg(test)]");
334 | 
335 |         let mut stack = ScopeStack::new();
336 |         let mut token_count = 0;
337 |         for (s, op) in ScopeRegionIterator::new(&ops, line) {
338 |             stack.apply(op).expect("#[cfg(test)]");
339 |             if s.is_empty() {
340 |                 // in this case we don't care about blank tokens
341 |                 continue;
342 |             }
343 |             if token_count == 1 {
344 |                 assert_eq!(
345 |                     stack,
346 |                     ScopeStack::from_str("source.ruby keyword.operator.assignment.ruby").unwrap()
347 |                 );
348 |                 assert_eq!(s, "=");
349 |             }
350 |             token_count += 1;
351 |             println!("{:?} {}", s, stack);
352 |         }
353 |         assert_eq!(token_count, 5);
354 |     }
355 | 
356 |     #[cfg(feature = "default-syntaxes")]
357 |     #[test]
358 |     fn can_find_regions_with_trailing_newline() {
359 |         let ss = SyntaxSet::load_defaults_newlines();
360 |         let mut state = ParseState::new(ss.find_syntax_by_extension("rb").unwrap());
361 |         let lines = ["# hello world\n", "lol=5+2\n"];
362 |         let mut stack = ScopeStack::new();
363 | 
364 |         for line in lines.iter() {
365 |             let ops = state.parse_line(line, &ss).expect("#[cfg(test)]");
366 |             println!("{:?}", ops);
367 | 
368 |             let mut iterated_ops: Vec<&ScopeStackOp> = Vec::new();
369 |             for (_, op) in ScopeRegionIterator::new(&ops, line) {
370 |                 stack.apply(op).expect("#[cfg(test)]");
371 |                 iterated_ops.push(op);
372 |                 println!("{:?}", op);
373 |             }
374 | 
375 |             let all_ops = ops.iter().map(|t| &t.1);
376 |             assert_eq!(all_ops.count(), iterated_ops.len() - 1); // -1 because we want to ignore the NOOP
377 |         }
378 |     }
379 | 
380 |     #[cfg(all(feature = "default-syntaxes", feature = "default-themes"))]
381 |     #[test]
382 |     fn can_start_again_from_previous_state() {
383 |         let ss = SyntaxSet::load_defaults_nonewlines();
384 |         let ts = ThemeSet::load_defaults();
385 |         let mut highlighter = HighlightLines::new(
386 |             ss.find_syntax_by_extension("py").unwrap(),
387 |             &ts.themes["base16-ocean.dark"],
388 |         );
389 | 
390 |         let lines = ["\"\"\"", "def foo():", "\"\"\""];
391 | 
392 |         let highlighted_first_line = highlighter
393 |             .highlight_line(lines[0], &ss)
394 |             .expect("#[cfg(test)]");
395 | 
396 |         let (highlight_state, parse_state) = highlighter.state();
397 | 
398 |         let mut other_highlighter = HighlightLines::from_state(
399 |             &ts.themes["base16-ocean.dark"],
400 |             highlight_state,
401 |             parse_state,
402 |         );
403 | 
404 |         let highlighted_second_line = other_highlighter
405 |             .highlight_line(lines[1], &ss)
406 |             .expect("#[cfg(test)]");
407 | 
408 |         // Check that the second line is highlighted correctly (i.e. as a docstring)
409 |         // using the first line's previous state
410 |         assert!(highlighted_second_line.len() == 1);
411 |         assert!(highlighted_second_line[0].0 == highlighted_first_line[0].0);
412 |     }
413 | }
414 | 


--------------------------------------------------------------------------------
/examples/syntest.rs:
--------------------------------------------------------------------------------
  1 | //! An example of using syntect for testing syntax definitions.
  2 | //! Basically exactly the same as what Sublime Text can do,
  3 | //! but without needing ST installed
  4 | // To run tests only for a particular package, while showing the operations, you could use:
  5 | // cargo run --example syntest -- --debug testdata/Packages/Makefile/
  6 | // to specify that the syntax definitions should be parsed instead of loaded from the dump file,
  7 | // you can tell it where to parse them from - the following will execute only 1 syntax test after
  8 | // parsing the sublime-syntax files in the JavaScript folder:
  9 | // cargo run --example syntest testdata/Packages/JavaScript/syntax_test_json.json testdata/Packages/JavaScript/
 10 | 
 11 | use syntect::easy::ScopeRegionIterator;
 12 | use syntect::highlighting::ScopeSelectors;
 13 | use syntect::parsing::{ParseState, Scope, ScopeStack, SyntaxSet, SyntaxSetBuilder};
 14 | 
 15 | use std::cmp::{max, min};
 16 | use std::fs::File;
 17 | use std::io::{BufRead, BufReader};
 18 | use std::path::Path;
 19 | use std::str::FromStr;
 20 | use std::time::Instant;
 21 | 
 22 | use getopts::Options;
 23 | use once_cell::sync::Lazy;
 24 | use regex::Regex;
 25 | use walkdir::{DirEntry, WalkDir};
 26 | 
 27 | #[derive(Debug, Clone, PartialEq, Eq)]
 28 | pub enum SyntaxTestHeaderError {
 29 |     MalformedHeader,
 30 |     SyntaxDefinitionNotFound,
 31 | }
 32 | 
 33 | #[derive(Debug, Clone, PartialEq, Eq)]
 34 | pub enum SyntaxTestFileResult {
 35 |     FailedAssertions(usize, usize),
 36 |     Success(usize),
 37 | }
 38 | 
 39 | pub static SYNTAX_TEST_HEADER_PATTERN: Lazy<Regex> = Lazy::new(|| {
 40 |     Regex::new(
 41 |         r#"(?xm)
 42 |         ^(?P<testtoken_start>\s*\S+)
 43 |         \s+SYNTAX\sTEST\s+
 44 |         "(?P<syntax_file>[^"]+)"
 45 |         \s*(?P<testtoken_end>\S+)?$
 46 |     "#,
 47 |     )
 48 |     .unwrap()
 49 | });
 50 | pub static SYNTAX_TEST_ASSERTION_PATTERN: Lazy<Regex> = Lazy::new(|| {
 51 |     Regex::new(
 52 |         r#"(?xm)
 53 |     \s*(?:
 54 |         (?P<begin_of_token><-)|(?P<range>\^+)
 55 |     )(.*)$"#,
 56 |     )
 57 |     .unwrap()
 58 | });
 59 | 
 60 | #[derive(Clone, Copy)]
 61 | struct OutputOptions {
 62 |     time: bool,
 63 |     debug: bool,
 64 |     summary: bool,
 65 | }
 66 | 
 67 | #[derive(Debug)]
 68 | struct AssertionRange<'a> {
 69 |     begin_char: usize,
 70 |     end_char: usize,
 71 |     scope_selector_text: &'a str,
 72 |     is_pure_assertion_line: bool,
 73 | }
 74 | 
 75 | #[derive(Debug)]
 76 | struct ScopedText {
 77 |     scope: Vec<Scope>,
 78 |     char_start: usize,
 79 |     text_len: usize,
 80 | }
 81 | 
 82 | #[derive(Debug)]
 83 | struct RangeTestResult {
 84 |     column_begin: usize,
 85 |     column_end: usize,
 86 |     success: bool,
 87 | }
 88 | 
 89 | fn get_line_assertion_details<'a>(
 90 |     testtoken_start: &str,
 91 |     testtoken_end: Option<&str>,
 92 |     line: &'a str,
 93 | ) -> Option<AssertionRange<'a>> {
 94 |     // if the test start token specified in the test file's header is on the line
 95 |     if let Some(index) = line.find(testtoken_start) {
 96 |         let (before_token_start, token_and_rest_of_line) = line.split_at(index);
 97 | 
 98 |         if let Some(captures) =
 99 |             SYNTAX_TEST_ASSERTION_PATTERN.captures(&token_and_rest_of_line[testtoken_start.len()..])
100 |         {
101 |             let mut sst = captures.get(3).unwrap().as_str(); // get the scope selector text
102 |             let mut only_whitespace_after_token_end = true;
103 | 
104 |             if let Some(token) = testtoken_end {
105 |                 // if there is an end token defined in the test file header
106 |                 if let Some(end_token_pos) = sst.find(token) {
107 |                     // and there is an end token in the line
108 |                     let (ss, after_token_end) = sst.split_at(end_token_pos); // the scope selector text ends at the end token
109 |                     sst = ss;
110 |                     only_whitespace_after_token_end = after_token_end.trim_end().is_empty();
111 |                 }
112 |             }
113 |             return Some(AssertionRange {
114 |                 begin_char: index
115 |                     + if captures.get(2).is_some() {
116 |                         testtoken_start.len() + captures.get(2).unwrap().start()
117 |                     } else {
118 |                         0
119 |                     },
120 |                 end_char: index
121 |                     + if captures.get(2).is_some() {
122 |                         testtoken_start.len() + captures.get(2).unwrap().end()
123 |                     } else {
124 |                         1
125 |                     },
126 |                 scope_selector_text: sst,
127 |                 is_pure_assertion_line: before_token_start.trim_start().is_empty()
128 |                     && only_whitespace_after_token_end, // if only whitespace surrounds the test tokens on the line, then it is a pure assertion line
129 |             });
130 |         }
131 |     }
132 |     None
133 | }
134 | 
135 | fn process_assertions(
136 |     assertion: &AssertionRange<'_>,
137 |     test_against_line_scopes: &[ScopedText],
138 | ) -> Vec<RangeTestResult> {
139 |     // format the scope selector to include a space at the beginning, because, currently, ScopeSelector expects excludes to begin with " -"
140 |     // and they are sometimes in the syntax test as ^^^-comment, for example
141 |     let selector =
142 |         ScopeSelectors::from_str(&format!(" {}", &assertion.scope_selector_text)).unwrap();
143 |     // find the scope at the specified start column, and start matching the selector through the rest of the tokens on the line from there until the end column is reached
144 |     let mut results = Vec::new();
145 |     for scoped_text in test_against_line_scopes
146 |         .iter()
147 |         .skip_while(|s| s.char_start + s.text_len <= assertion.begin_char)
148 |         .take_while(|s| s.char_start < assertion.end_char)
149 |     {
150 |         let match_value = selector.does_match(scoped_text.scope.as_slice());
151 |         let result = RangeTestResult {
152 |             column_begin: max(scoped_text.char_start, assertion.begin_char),
153 |             column_end: min(
154 |                 scoped_text.char_start + scoped_text.text_len,
155 |                 assertion.end_char,
156 |             ),
157 |             success: match_value.is_some(),
158 |         };
159 |         results.push(result);
160 |     }
161 |     // don't ignore assertions after the newline, they should be treated as though they are asserting against the newline
162 |     let last = test_against_line_scopes.last().unwrap();
163 |     if last.char_start + last.text_len < assertion.end_char {
164 |         let match_value = selector.does_match(last.scope.as_slice());
165 |         let result = RangeTestResult {
166 |             column_begin: max(last.char_start + last.text_len, assertion.begin_char),
167 |             column_end: assertion.end_char,
168 |             success: match_value.is_some(),
169 |         };
170 |         results.push(result);
171 |     }
172 |     results
173 | }
174 | 
175 | /// If `parse_test_lines` is `false` then lines that only contain assertions are not parsed
176 | fn test_file(
177 |     ss: &SyntaxSet,
178 |     path: &Path,
179 |     parse_test_lines: bool,
180 |     out_opts: OutputOptions,
181 | ) -> Result<SyntaxTestFileResult, SyntaxTestHeaderError> {
182 |     use syntect::util::debug_print_ops;
183 |     let f = File::open(path).unwrap();
184 |     let mut reader = BufReader::new(f);
185 |     let mut line = String::new();
186 | 
187 |     // read the first line from the file - if we have reached EOF already, it's an invalid file
188 |     if reader.read_line(&mut line).unwrap() == 0 {
189 |         return Err(SyntaxTestHeaderError::MalformedHeader);
190 |     }
191 | 
192 |     line = line.replace('\r', "");
193 | 
194 |     // parse the syntax test header in the first line of the file
195 |     let header_line = line.clone();
196 |     let search_result = SYNTAX_TEST_HEADER_PATTERN.captures(&header_line);
197 |     let captures = search_result.ok_or(SyntaxTestHeaderError::MalformedHeader)?;
198 | 
199 |     let testtoken_start = captures.name("testtoken_start").unwrap().as_str();
200 |     let testtoken_end = captures.name("testtoken_end").map(|c| c.as_str());
201 |     let syntax_file = captures.name("syntax_file").unwrap().as_str();
202 | 
203 |     // find the relevant syntax definition to parse the file with - case is important!
204 |     if !out_opts.summary {
205 |         println!(
206 |             "The test file references syntax definition file: {}",
207 |             syntax_file
208 |         );
209 |     }
210 |     let syntax = ss
211 |         .find_syntax_by_path(syntax_file)
212 |         .ok_or(SyntaxTestHeaderError::SyntaxDefinitionNotFound)?;
213 | 
214 |     // iterate over the lines of the file, testing them
215 |     let mut state = ParseState::new(syntax);
216 |     let mut stack = ScopeStack::new();
217 | 
218 |     let mut current_line_number = 1;
219 |     let mut test_against_line_number = 1;
220 |     let mut scopes_on_line_being_tested = Vec::new();
221 |     let mut previous_non_assertion_line = line.to_string();
222 | 
223 |     let mut assertion_failures: usize = 0;
224 |     let mut total_assertions: usize = 0;
225 | 
226 |     loop {
227 |         // over lines of file, starting with the header line
228 |         let mut line_only_has_assertion = false;
229 |         let mut line_has_assertion = false;
230 |         if let Some(assertion) = get_line_assertion_details(testtoken_start, testtoken_end, &line) {
231 |             let result = process_assertions(&assertion, &scopes_on_line_being_tested);
232 |             total_assertions += assertion.end_char - assertion.begin_char;
233 |             for failure in result.iter().filter(|r| !r.success) {
234 |                 let length = failure.column_end - failure.column_begin;
235 |                 let text: String = previous_non_assertion_line
236 |                     .chars()
237 |                     .skip(failure.column_begin)
238 |                     .take(length)
239 |                     .collect();
240 |                 if !out_opts.summary {
241 |                     println!(
242 |                         "  Assertion selector {:?} \
243 |                         from line {:?} failed against line {:?}, column range {:?}-{:?} \
244 |                         (with text {:?}) \
245 |                         has scope {:?}",
246 |                         assertion.scope_selector_text.trim(),
247 |                         current_line_number,
248 |                         test_against_line_number,
249 |                         failure.column_begin,
250 |                         failure.column_end,
251 |                         text,
252 |                         scopes_on_line_being_tested
253 |                             .iter()
254 |                             .find(|s| s.char_start + s.text_len > failure.column_begin)
255 |                             .unwrap_or_else(|| scopes_on_line_being_tested.last().unwrap())
256 |                             .scope
257 |                     );
258 |                 }
259 |                 assertion_failures += failure.column_end - failure.column_begin;
260 |             }
261 |             line_only_has_assertion = assertion.is_pure_assertion_line;
262 |             line_has_assertion = true;
263 |         }
264 |         if !line_only_has_assertion || parse_test_lines {
265 |             if !line_has_assertion {
266 |                 // ST seems to ignore lines that have assertions when calculating which line the assertion tests against
267 |                 scopes_on_line_being_tested.clear();
268 |                 test_against_line_number = current_line_number;
269 |                 previous_non_assertion_line = line.to_string();
270 |             }
271 |             if out_opts.debug && !line_only_has_assertion {
272 |                 println!(
273 |                     "-- debugging line {} -- scope stack: {:?}, -- parse state: {:?}",
274 |                     current_line_number, stack, state
275 |                 );
276 |             }
277 |             let ops = state.parse_line(&line, ss).unwrap();
278 |             if out_opts.debug && !line_only_has_assertion {
279 |                 if ops.is_empty() && !line.is_empty() {
280 |                     println!("no operations for this line...");
281 |                 } else {
282 |                     debug_print_ops(&line, &ops);
283 |                 }
284 |             }
285 |             let mut col: usize = 0;
286 |             for (s, op) in ScopeRegionIterator::new(&ops, &line) {
287 |                 stack.apply(op).unwrap();
288 |                 if s.is_empty() {
289 |                     // in this case we don't care about blank tokens
290 |                     continue;
291 |                 }
292 |                 if !line_has_assertion {
293 |                     // if the line has no assertions on it, remember the scopes on the line so we can test against them later
294 |                     let len = s.chars().count();
295 |                     scopes_on_line_being_tested.push(ScopedText {
296 |                         char_start: col,
297 |                         text_len: len,
298 |                         scope: stack.as_slice().to_vec(),
299 |                     });
300 |                     // TODO: warn when there are duplicate adjacent (non-meta?) scopes, as it is almost always undesired
301 |                     col += len;
302 |                 }
303 |             }
304 |         }
305 | 
306 |         line.clear();
307 |         current_line_number += 1;
308 |         if reader.read_line(&mut line).unwrap() == 0 {
309 |             break;
310 |         }
311 |         line = line.replace('\r', "");
312 |     }
313 |     let res = if assertion_failures > 0 {
314 |         Ok(SyntaxTestFileResult::FailedAssertions(
315 |             assertion_failures,
316 |             total_assertions,
317 |         ))
318 |     } else {
319 |         Ok(SyntaxTestFileResult::Success(total_assertions))
320 |     };
321 | 
322 |     if out_opts.summary {
323 |         if let Ok(SyntaxTestFileResult::FailedAssertions(failures, _)) = res {
324 |             // Don't print total assertion count so that diffs don't pick up new succeeding tests
325 |             println!("FAILED {}: {}", path.display(), failures);
326 |         }
327 |     } else {
328 |         println!("{:?}", res);
329 |     }
330 | 
331 |     res
332 | }
333 | 
334 | fn main() {
335 |     let args: Vec<String> = std::env::args().collect();
336 |     let mut opts = Options::new();
337 |     opts.optflag("d", "debug", "Show parsing results for each test line");
338 |     opts.optflag(
339 |         "t",
340 |         "time",
341 |         "Time execution as a more broad-ranging benchmark",
342 |     );
343 |     opts.optflag("s", "summary", "Print only summary of test failures");
344 | 
345 |     let matches = match opts.parse(&args[1..]) {
346 |         Ok(m) => m,
347 |         Err(f) => {
348 |             panic!("{}", f.to_string())
349 |         }
350 |     };
351 | 
352 |     let tests_path = if matches.free.is_empty() {
353 |         "."
354 |     } else {
355 |         &args[1]
356 |     };
357 | 
358 |     let syntaxes_path = if matches.free.len() < 2 { "" } else { &args[2] };
359 | 
360 |     // load the syntaxes from disk if told to
361 |     // (as opposed to from the binary dumps)
362 |     // this helps to ensure that a recompile isn't needed
363 |     // when using this for syntax development
364 |     let mut ss = if syntaxes_path.is_empty() {
365 |         SyntaxSet::load_defaults_newlines() // note we load the version with newlines
366 |     } else {
367 |         SyntaxSet::new()
368 |     };
369 |     if !syntaxes_path.is_empty() {
370 |         println!("loading syntax definitions from {}", syntaxes_path);
371 |         let mut builder = SyntaxSetBuilder::new();
372 |         builder.add_from_folder(syntaxes_path, true).unwrap(); // note that we load the version with newlines
373 |         ss = builder.build();
374 |     }
375 | 
376 |     let out_opts = OutputOptions {
377 |         debug: matches.opt_present("debug"),
378 |         time: matches.opt_present("time"),
379 |         summary: matches.opt_present("summary"),
380 |     };
381 | 
382 |     let exit_code = recursive_walk(&ss, tests_path, out_opts);
383 |     println!("exiting with code {}", exit_code);
384 |     std::process::exit(exit_code);
385 | }
386 | 
387 | fn recursive_walk(ss: &SyntaxSet, path: &str, out_opts: OutputOptions) -> i32 {
388 |     let mut exit_code: i32 = 0; // exit with code 0 by default, if all tests pass
389 |     let walker = WalkDir::new(path).into_iter();
390 | 
391 |     // accumulate and sort for consistency of diffs across machines
392 |     let mut files = Vec::new();
393 |     for entry in walker.filter_entry(|e| e.file_type().is_dir() || is_a_syntax_test_file(e)) {
394 |         let entry = entry.unwrap();
395 |         if entry.file_type().is_file() {
396 |             files.push(entry.path().to_owned());
397 |         }
398 |     }
399 |     files.sort();
400 | 
401 |     for path in &files {
402 |         if !out_opts.summary {
403 |             println!("Testing file {}", path.display());
404 |         }
405 |         let start = Instant::now();
406 |         let result = test_file(ss, path, true, out_opts);
407 |         let elapsed = start.elapsed();
408 |         if out_opts.time {
409 |             let ms = (elapsed.as_secs() * 1_000) + elapsed.subsec_millis() as u64;
410 |             println!("{} ms for file {}", ms, path.display());
411 |         }
412 |         if exit_code != 2 {
413 |             // leave exit code 2 if there was an error
414 |             if result.is_err() {
415 |                 // set exit code 2 if there was an error
416 |                 exit_code = 2;
417 |             } else if let Ok(SyntaxTestFileResult::FailedAssertions(_, _)) = result {
418 |                 exit_code = 1; // otherwise, if there were failures, exit with code 1
419 |             }
420 |         }
421 |     }
422 | 
423 |     exit_code
424 | }
425 | 
426 | fn is_a_syntax_test_file(entry: &DirEntry) -> bool {
427 |     entry
428 |         .file_name()
429 |         .to_str()
430 |         .map(|s| s.starts_with("syntax_test_"))
431 |         .unwrap_or(false)
432 | }
433 | 


--------------------------------------------------------------------------------