├── CHANGELOG.md
├── PerformanceTestResults.html
├── README.md
├── docs
├── .project
├── apidocs
│ ├── allclasses-index.html
│ ├── allpackages-index.html
│ ├── constant-values.html
│ ├── de
│ │ └── unkrig
│ │ │ ├── lfr
│ │ │ └── core
│ │ │ │ ├── CharacterClass.html
│ │ │ │ ├── Grapheme.html
│ │ │ │ ├── IntPredicate.html
│ │ │ │ ├── Matcher.CompiledReplacement.html
│ │ │ │ ├── Matcher.html
│ │ │ │ ├── Pattern.html
│ │ │ │ ├── PatternFactory.html
│ │ │ │ ├── ReverseCharSequence.html
│ │ │ │ ├── Sequences.LiteralString.html
│ │ │ │ ├── Sequences.html
│ │ │ │ ├── package-summary.html
│ │ │ │ └── package-tree.html
│ │ │ └── ref4j
│ │ │ ├── Matcher.html
│ │ │ ├── Pattern.html
│ │ │ ├── PatternFactory.html
│ │ │ ├── package-summary.html
│ │ │ └── package-tree.html
│ ├── element-list
│ ├── help-doc.html
│ ├── index-all.html
│ ├── index.html
│ ├── jquery-ui.overrides.css
│ ├── legal
│ │ ├── ADDITIONAL_LICENSE_INFO
│ │ ├── ASSEMBLY_EXCEPTION
│ │ ├── LICENSE
│ │ ├── jquery.md
│ │ └── jqueryUI.md
│ ├── member-search-index.js
│ ├── module-search-index.js
│ ├── overview-summary.html
│ ├── overview-tree.html
│ ├── package-search-index.js
│ ├── resources
│ │ ├── glass.png
│ │ └── x.png
│ ├── script-dir
│ │ ├── images
│ │ │ ├── ui-bg_glass_55_fbf9ee_1x400.png
│ │ │ ├── ui-bg_glass_65_dadada_1x400.png
│ │ │ ├── ui-bg_glass_75_dadada_1x400.png
│ │ │ ├── ui-bg_glass_75_e6e6e6_1x400.png
│ │ │ ├── ui-bg_glass_95_fef1ec_1x400.png
│ │ │ ├── ui-bg_highlight-soft_75_cccccc_1x100.png
│ │ │ ├── ui-icons_222222_256x240.png
│ │ │ ├── ui-icons_2e83ff_256x240.png
│ │ │ ├── ui-icons_454545_256x240.png
│ │ │ ├── ui-icons_888888_256x240.png
│ │ │ └── ui-icons_cd0a0a_256x240.png
│ │ ├── jquery-3.5.1.min.js
│ │ ├── jquery-ui.min.css
│ │ ├── jquery-ui.min.js
│ │ └── jquery-ui.structure.min.css
│ ├── script.js
│ ├── search.js
│ ├── serialized-form.html
│ ├── stylesheet.css
│ ├── tag-search-index.js
│ └── type-search-index.js
└── generate_javadoc.sh
├── lfr-core
├── .checkstyle
├── .classpath
├── .gitignore
├── .project
├── .settings
│ ├── org.eclipse.core.resources.prefs
│ ├── org.eclipse.jdt.core.prefs
│ ├── org.eclipse.jdt.ui.prefs
│ ├── org.eclipse.m2e.core.prefs
│ └── org.jboss.ide.eclipse.as.core.prefs
├── launch
│ ├── lfr-core jre11.launch
│ ├── lfr-core jre17.launch
│ └── lfr-core jre8.launch
├── pom.xml
└── src
│ ├── main
│ ├── java
│ │ └── de
│ │ │ └── unkrig
│ │ │ └── lfr
│ │ │ └── core
│ │ │ ├── CharacterClass.java
│ │ │ ├── CharacterClasses.java
│ │ │ ├── CompositeSequence.java
│ │ │ ├── Grapheme.java
│ │ │ ├── IntPredicate.java
│ │ │ ├── Matcher.java
│ │ │ ├── MatcherImpl.java
│ │ │ ├── MultivalentCharClass.java
│ │ │ ├── MultivalentCharacterClass.java
│ │ │ ├── MultivalentSequence.java
│ │ │ ├── Pattern.java
│ │ │ ├── PatternFactory.java
│ │ │ ├── ReverseCharSequence.java
│ │ │ ├── Sequence.java
│ │ │ ├── Sequences.java
│ │ │ └── package-info.java
│ ├── javadoc
│ │ ├── overview.html
│ │ ├── package-lists
│ │ │ └── de.unkrig.commons
│ │ │ │ └── commons-text
│ │ │ │ └── package-list
│ │ └── stylesheet.css
│ └── resources
│ │ └── META-INF
│ │ └── services
│ │ └── de.unkrig.ref4j.PatternFactory
│ └── test
│ └── java
│ └── test
│ ├── FunctionalityEquivalencePatternFactory.java
│ ├── Misc.java
│ ├── OracleEssentials.java
│ ├── OracleEssentialsTest.java
│ ├── ParameterizedWithPatternFactory.java
│ ├── PatternTest.java
│ ├── PerformanceMeasurement.java
│ ├── PerformanceMeasurementPatternFactory.java
│ ├── PerformanceTests.java
│ ├── Sampler.java
│ └── package-info.java
├── lfr-parent
├── .keep
├── .project
├── .settings
│ ├── org.eclipse.core.resources.prefs
│ └── org.eclipse.m2e.core.prefs
└── pom.xml
├── openjdk15_regex_tests
├── .classpath
├── .keep
├── .project
├── .settings
│ ├── org.eclipse.core.resources.prefs
│ ├── org.eclipse.jdt.core.prefs
│ └── org.eclipse.m2e.core.prefs
├── launch
│ ├── openjdk15_regex_tests (JRE 11).launch
│ ├── openjdk15_regex_tests (JRE 17).launch
│ └── openjdk15_regex_tests (JRE 8).launch
├── pom.xml
└── src
│ ├── main
│ ├── java
│ │ └── .keep
│ └── resources
│ │ └── .keep
│ └── test
│ ├── java
│ ├── GraphemeTest.java.txt
│ ├── NegativeArraySize.java.txt
│ ├── POSIX_ASCII.java
│ ├── POSIX_Unicode.java
│ ├── PatternStreamTest.java.txt
│ ├── RegExTest.java
│ └── RegExTest.java.orig
│ └── resources
│ ├── BMPTestCases.txt
│ ├── GraphemeTestCases.txt
│ ├── SupplementaryTestCases.txt
│ └── TestCases.txt
├── openjdk8_regex_tests
├── .checkstyle
├── .classpath
├── .gitignore
├── .project
├── .settings
│ ├── org.eclipse.core.resources.prefs
│ ├── org.eclipse.jdt.core.prefs
│ └── org.eclipse.m2e.core.prefs
├── README.md
├── launch
│ ├── openjdk8_regex_tests (JRE 11).launch
│ ├── openjdk8_regex_tests (JRE 17).launch
│ └── openjdk8_regex_tests (JRE 8).launch
├── pom.xml
└── src
│ ├── main
│ ├── java
│ │ └── .empty
│ └── resources
│ │ └── .empty
│ └── test
│ ├── java
│ ├── POSIX_ASCII.java
│ ├── POSIX_Unicode.java
│ └── RegExTest.java
│ └── resources
│ ├── BMPTestCases.txt
│ ├── SupplementaryTestCases.txt
│ └── TestCases.txt
└── ref4j
├── .checkstyle
├── .classpath
├── .gitignore
├── .project
├── .settings
├── org.eclipse.core.resources.prefs
├── org.eclipse.jdt.core.prefs
└── org.eclipse.m2e.core.prefs
├── foo.txt
├── pom.xml
└── src
├── main
├── java
│ └── de
│ │ └── unkrig
│ │ └── ref4j
│ │ ├── Matcher.java
│ │ ├── Pattern.java
│ │ ├── PatternFactory.java
│ │ ├── jur
│ │ ├── PatternFactory.java
│ │ └── package-info.java
│ │ └── package-info.java
└── resources
│ └── META-INF
│ └── services
│ └── de.unkrig.ref4j.PatternFactory
└── test
└── java
└── ref4j
├── Ref4JTests.java
└── package-info.java
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | ### Version 1.2.0:
2 |
3 | * Added missing JRE 8 methods to "Pattern" for JRE 8-17 compatibility.
4 | * Gave up on JRE 6 compatibility - minimum JRE verseion is now 8.
5 | * Fixed some scanning rules in "comment mode".
6 | * Catch infinite quantities of zero-width operand (creates an endless look otherwise).
7 | * Fixed the backtracking of "\r\n" sequences (may pose *two* (!) line breaks!).
8 | * Fixed allowed characters in capturing group names.
9 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Lightning-fast Regular Expressions for Java
2 |
3 | Lightning-fast Regular Expressions ("LFR") is a 99.9%-complete reimplementation of `java.util.regex` ("JUR") with better `match()` and `find()` performance. Yet the design is much cleaner and easier to understand and extend.
4 |
5 | LFR is (successfully) tested against the official OpenJDK 15 regex regression test suite.
6 |
7 | ## Differences between LFR and JUR
8 |
9 | ### FUNCTIONAL DIFFERENCES
10 |
11 | All features of JUR are available and functionally identical, except for the following differences:
12 |
13 | Minus:
14 |
15 | * `Pattern.CANON_EQ` (a really obscure, hopefully rarely used feature) is not implemented. You get an `IllegalArgumentException` when you invoke LFR `Pattern.compile()` with this flag.
16 |
17 | Plus:
18 |
19 | * Lookbehinds are no longer limited to fixed-length expressions.
20 |
21 | * LFR's `Matcher.replaceFirst/All()` methods can not only replace with numered group (`$1`) or named group (`${name}`), but also with a Java-like expression; e.g.
22 |
23 | `PatternFactory.INSTANCE.compile("(?a)").matcher("abc").replaceAll("${3 + 4 + grp + m.groupCount()}")`
24 |
25 | returns
26 |
27 | `"7a1bc"`
28 |
29 | The expression syntax is described [here](https://aunkrig.github.io/lfr/apidocs/de/unkrig/lfr/core/Matcher.html#compileReplacement-java.lang.String-).
30 |
31 | ### API DIFFERENCES
32 |
33 | The classes `Pattern` and `Matcher` were duplicated from the JUR (package `java.util.regex`) to LFR (package `de.unkrig.lfr.core`) with identical fields and methods.
34 |
35 | The JUR `MatchResult` and `PatternSyntaxException` were re-used instead of being duplicated.
36 |
37 | There are the following differences in the API:
38 |
39 | Minus:
40 |
41 | * Some JRE classes use JUR internally, and cannot be retrofitted to use LFR. However, all these methods use `Pattern.compile()`, so you don't want to use them in performance-critical applications. Examples: `String.matches(regex)`, `String.replaceFirst(regex, replacement)`, `String.replaceAll(regex, replacement)`, `String.split(regex[, limit])`, `java.util.Scanner.next(Pattern)` (Actually not a minus in the LFR API, but in the JRE APIs.)
42 |
43 | Plus:
44 |
45 | * The LFR `Pattern` class has three additional methods `matches(CharSequence subject[, regionStart[, regionEnd]])`, which are particularly fast because they do not expose the `Matcher` object and can thus save some overhead.
46 |
47 | * The LFR `Pattern` class has an additional method `sequenceToString()` which returns a human-readable form of the compiled regex. For example, `compile("A.*abcdefghijklmn", DOTALL).sequenceToString()` returns
48 |
49 | `'A' . greedyQuantifierOnAnyCharAndLiteralString(min=0, max=infinite, ls=boyerMooreHorspool("abcdefghijklmn"))`
50 |
51 | This is useful for testing how a regex compiled, and especially which optimizations have taken place.
52 |
53 | * LFR only requires JRE 1.8+, but makes some later features available for earlier JREs:
54 | * JUR features that appeared in JRE 1.9:
55 | * Named Unicode characters, e.g. `\N{LATIN SMALL LETTER O}` (only if executed in a JRE 9+)
56 | * (Unicode extended graphemes -- are not (yet) supported.)
57 | * JUR features that appeared in JREs 10, 11, 12, 13, 14, 15, 16 and 17:
58 | * (None.)
59 |
60 | * Although LFR requires only JRE 8+, the methods that were added later (namely with Java 9: `Matcher.replaceFirst(Function)`, `Matcher.replaceAll(Function)`, `Matcher.results()`) are always available.
61 |
62 | ## Performance
63 |
64 | Minus:
65 |
66 | * Regex compilation performance was not measured and is probably quite slow (as with JUR). There is surely a lot of room for optimization in this area, if someone needs it.
67 |
68 | Plus:
69 |
70 | * Regex evaluation (`Matcher.matches()`, `find()`, `lookingAt()`, ...) is roughly [four times as fast as with JUR](https://gitcdn.link/cdn/aunkrig/lfr/master/PerformanceTestResults.html). This was measured with the LFR test case suite and [Performance comparison of regular expression engines](https://zherczeg.github.io/sljit/regex_perf.html). Other use cases (other regexes, other subjects, other API calls, ...) may yield different results.
71 |
72 | * LFR specifically improves the evaluation performance for the following special cases:
73 |
74 | * Patterns that start with literal characters (or character classes, or alternatives) (for `Matcher.find()`)
75 |
76 | * Patterns that contain a greedy or reluctant quantifier of ANY, followed by literal characters (or character classes, or alternatives); e.g. `"xxx.*ABCDEFGHIJKLMNOPxxx"` (or `"(?i)xxx.+foobar"`, or `"xxx.{4,39}?(?:alpha|beta|gamma)"`)
77 |
78 | * Patterns that contain a possessive quantifier of ANY; e.g. `"xxx.++xxx"`
79 |
80 | "ANY" means the "." pattern, and the DOTALL flag being active. ("." *without* the DOTALL flag being active means "any character except a line terminator".)
81 |
82 | ## Facade
83 |
84 | If you want to switch between JUR and LFR (and other, not yet written RE implementations) at *runtime*, you can use "`de.unkrig.ref4j`", the "regular expressions facade for Java":
85 |
86 | de.unkrig.ref4j.PatternFactory pf = PatternFactory.get(); // Gets the PF designated by the system property "de.unkrig.ref4j.PatternFactory", or the first PF on the classpath
87 | de.unkrig.ref4j.Pattern p = pf.compile(regex);
88 | ...
89 |
90 | ## Integration
91 |
92 | All versions of LFR are available on [MAVEN CENTRAL](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22de.unkrig.lfr%22); download the latest JAR file from there, or add it as a MAVEN dependency.
93 |
94 | JAVADOC can be found [here](https://aunkrig.github.io/lfr/apidocs/de/unkrig/lfr/core/package-summary.html).
95 |
96 | ## License
97 |
98 | de.unkrig.lfr - A super-fast regular expression evaluator
99 |
100 | Copyright (c) 2017, Arno Unkrig
101 | All rights reserved.
102 |
103 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
104 |
105 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and thefollowing disclaimer.
106 |
107 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
108 |
109 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
110 |
111 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
112 |
--------------------------------------------------------------------------------
/docs/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | docs
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/docs/apidocs/allpackages-index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | All Packages
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
21 |
24 |
66 |
67 |
68 |
--------------------------------------------------------------------------------
/docs/apidocs/jquery-ui.overrides.css:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 | *
5 | * This code is free software; you can redistribute it and/or modify it
6 | * under the terms of the GNU General Public License version 2 only, as
7 | * published by the Free Software Foundation. Oracle designates this
8 | * particular file as subject to the "Classpath" exception as provided
9 | * by Oracle in the LICENSE file that accompanied this code.
10 | *
11 | * This code is distributed in the hope that it will be useful, but WITHOUT
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 | * version 2 for more details (a copy is included in the LICENSE file that
15 | * accompanied this code).
16 | *
17 | * You should have received a copy of the GNU General Public License version
18 | * 2 along with this work; if not, write to the Free Software Foundation,
19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 | *
21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 | * or visit www.oracle.com if you need additional information or have any
23 | * questions.
24 | */
25 |
26 | .ui-state-active,
27 | .ui-widget-content .ui-state-active,
28 | .ui-widget-header .ui-state-active,
29 | a.ui-button:active,
30 | .ui-button:active,
31 | .ui-button.ui-state-active:hover {
32 | /* Overrides the color of selection used in jQuery UI */
33 | background: #F8981D;
34 | }
35 |
--------------------------------------------------------------------------------
/docs/apidocs/legal/ADDITIONAL_LICENSE_INFO:
--------------------------------------------------------------------------------
1 | Please see ..\java.base\ADDITIONAL_LICENSE_INFO
2 |
--------------------------------------------------------------------------------
/docs/apidocs/legal/ASSEMBLY_EXCEPTION:
--------------------------------------------------------------------------------
1 | Please see ..\java.base\ASSEMBLY_EXCEPTION
2 |
--------------------------------------------------------------------------------
/docs/apidocs/legal/LICENSE:
--------------------------------------------------------------------------------
1 | Please see ..\java.base\LICENSE
2 |
--------------------------------------------------------------------------------
/docs/apidocs/legal/jquery.md:
--------------------------------------------------------------------------------
1 | ## jQuery v3.5.1
2 |
3 | ### jQuery License
4 | ```
5 | jQuery v 3.5.1
6 | Copyright JS Foundation and other contributors, https://js.foundation/
7 |
8 | Permission is hereby granted, free of charge, to any person obtaining
9 | a copy of this software and associated documentation files (the
10 | "Software"), to deal in the Software without restriction, including
11 | without limitation the rights to use, copy, modify, merge, publish,
12 | distribute, sublicense, and/or sell copies of the Software, and to
13 | permit persons to whom the Software is furnished to do so, subject to
14 | the following conditions:
15 |
16 | The above copyright notice and this permission notice shall be
17 | included in all copies or substantial portions of the Software.
18 |
19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
23 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 |
27 | ******************************************
28 |
29 | The jQuery JavaScript Library v3.5.1 also includes Sizzle.js
30 |
31 | Sizzle.js includes the following license:
32 |
33 | Copyright JS Foundation and other contributors, https://js.foundation/
34 |
35 | This software consists of voluntary contributions made by many
36 | individuals. For exact contribution history, see the revision history
37 | available at https://github.com/jquery/sizzle
38 |
39 | The following license applies to all parts of this software except as
40 | documented below:
41 |
42 | ====
43 |
44 | Permission is hereby granted, free of charge, to any person obtaining
45 | a copy of this software and associated documentation files (the
46 | "Software"), to deal in the Software without restriction, including
47 | without limitation the rights to use, copy, modify, merge, publish,
48 | distribute, sublicense, and/or sell copies of the Software, and to
49 | permit persons to whom the Software is furnished to do so, subject to
50 | the following conditions:
51 |
52 | The above copyright notice and this permission notice shall be
53 | included in all copies or substantial portions of the Software.
54 |
55 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
56 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
57 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
58 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
59 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
60 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
61 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
62 |
63 | ====
64 |
65 | All files located in the node_modules and external directories are
66 | externally maintained libraries used by this software which have their
67 | own licenses; we recommend you read them, as their terms may differ from
68 | the terms above.
69 |
70 | *********************
71 |
72 | ```
73 |
--------------------------------------------------------------------------------
/docs/apidocs/legal/jqueryUI.md:
--------------------------------------------------------------------------------
1 | ## jQuery UI v1.12.1
2 |
3 | ### jQuery UI License
4 | ```
5 | Copyright jQuery Foundation and other contributors, https://jquery.org/
6 |
7 | This software consists of voluntary contributions made by many
8 | individuals. For exact contribution history, see the revision history
9 | available at https://github.com/jquery/jquery-ui
10 |
11 | The following license applies to all parts of this software except as
12 | documented below:
13 |
14 | ====
15 |
16 | Permission is hereby granted, free of charge, to any person obtaining
17 | a copy of this software and associated documentation files (the
18 | "Software"), to deal in the Software without restriction, including
19 | without limitation the rights to use, copy, modify, merge, publish,
20 | distribute, sublicense, and/or sell copies of the Software, and to
21 | permit persons to whom the Software is furnished to do so, subject to
22 | the following conditions:
23 |
24 | The above copyright notice and this permission notice shall be
25 | included in all copies or substantial portions of the Software.
26 |
27 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
31 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
32 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
33 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
34 |
35 | ====
36 |
37 | Copyright and related rights for sample code are waived via CC0. Sample
38 | code is defined as all source code contained within the demos directory.
39 |
40 | CC0: http://creativecommons.org/publicdomain/zero/1.0/
41 |
42 | ====
43 |
44 | All files located in the node_modules and external directories are
45 | externally maintained libraries used by this software which have their
46 | own licenses; we recommend you read them, as their terms may differ from
47 | the terms above.
48 |
49 | ```
50 |
--------------------------------------------------------------------------------
/docs/apidocs/module-search-index.js:
--------------------------------------------------------------------------------
1 | moduleSearchIndex = [];updateSearchResults();
--------------------------------------------------------------------------------
/docs/apidocs/overview-summary.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Generated Documentation (Untitled)
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
17 |
18 |
19 |
20 |
23 |
120 |
121 |
122 |
--------------------------------------------------------------------------------
/docs/apidocs/tag-search-index.js:
--------------------------------------------------------------------------------
1 | tagSearchIndex = [{"l":"Constant Field Values","h":"","u":"constant-values.html"},{"l":"Serialized Form","h":"","u":"serialized-form.html"}];updateSearchResults();
--------------------------------------------------------------------------------
/docs/apidocs/type-search-index.js:
--------------------------------------------------------------------------------
1 | typeSearchIndex = [{"l":"All Classes and Interfaces","u":"allclasses-index.html"},{"p":"de.unkrig.lfr.core","l":"CharacterClass"},{"p":"de.unkrig.lfr.core","l":"Matcher.CompiledReplacement"},{"p":"de.unkrig.lfr.core","l":"Grapheme"},{"p":"de.unkrig.lfr.core","l":"IntPredicate"},{"p":"de.unkrig.lfr.core","l":"Sequences.LiteralString"},{"p":"de.unkrig.lfr.core","l":"Matcher"},{"p":"de.unkrig.ref4j","l":"Matcher"},{"p":"de.unkrig.lfr.core","l":"Pattern"},{"p":"de.unkrig.ref4j","l":"Pattern"},{"p":"de.unkrig.lfr.core","l":"PatternFactory"},{"p":"de.unkrig.ref4j","l":"PatternFactory"},{"p":"de.unkrig.lfr.core","l":"ReverseCharSequence"},{"p":"de.unkrig.lfr.core","l":"Sequences"}];updateSearchResults();
--------------------------------------------------------------------------------
/docs/generate_javadoc.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e;
4 |
5 | JAVA_HOME=C:/dev/Java/jdk-17.0.2+8;
6 |
7 | cp="\
8 | ../commons-lang/target/classes;\
9 | ../commons-util/target/classes;\
10 | ../commons-nullanalysis/target/classes;\
11 | ../commons-text/target/classes";
12 |
13 | # -link https://docs.oracle.com/javase/11/docs/api/ \
14 | $JAVA_HOME/bin/javadoc \
15 | -sourcepath "../lfr-core/src/main/java;../ref4j/src/main/java" \
16 | -classpath "$cp" \
17 | -d apidocs \
18 | -Xdoclint:none \
19 | -J-Dhttp.proxyHost=localhost \
20 | -J-Dhttp.proxyPort=999 \
21 | -J-Dhttps.proxyHost=localhost \
22 | -J-Dhttps.proxyPort=999 \
23 | -subpackages \
24 | de.unkrig.lfr.core \
25 | de.unkrig.ref4j \
26 | ;
27 |
--------------------------------------------------------------------------------
/lfr-core/.checkstyle:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/lfr-core/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/lfr-core/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 |
--------------------------------------------------------------------------------
/lfr-core/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | lfr-core
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 | net.sf.eclipsecs.core.CheckstyleBuilder
15 |
16 |
17 |
18 |
19 | org.eclipse.m2e.core.maven2Builder
20 |
21 |
22 |
23 |
24 |
25 | org.eclipse.jdt.core.javanature
26 | org.eclipse.m2e.core.maven2Nature
27 | net.sf.eclipsecs.core.CheckstyleNature
28 |
29 |
30 |
--------------------------------------------------------------------------------
/lfr-core/.settings/org.eclipse.core.resources.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | encoding//src/main/java=UTF-8
3 | encoding//src/main/resources=UTF-8
4 | encoding//src/test/java=UTF-8
5 | encoding//src/test/resources=UTF-8
6 | encoding/=UTF-8
7 |
--------------------------------------------------------------------------------
/lfr-core/.settings/org.eclipse.jdt.ui.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | formatter_profile=_Eclipse [spaces-only]
3 | formatter_settings_version=12
4 |
--------------------------------------------------------------------------------
/lfr-core/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 |
--------------------------------------------------------------------------------
/lfr-core/.settings/org.jboss.ide.eclipse.as.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.jboss.ide.eclipse.as.core.singledeployable.deployableList=
3 |
--------------------------------------------------------------------------------
/lfr-core/launch/lfr-core jre11.launch:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/lfr-core/launch/lfr-core jre17.launch:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/lfr-core/launch/lfr-core jre8.launch:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/lfr-core/pom.xml:
--------------------------------------------------------------------------------
1 |
2 | 4.0.0
3 |
4 | lfr-core
5 |
6 | Lightning-fast regular expressions
7 |
8 |
9 |
10 | de.unkrig.commons
11 | commons-text
12 |
13 |
14 | junit
15 | junit
16 | test
17 |
18 |
19 | de.unkrig.lfr
20 | ref4j
21 | ${project.parent.version}
22 |
23 |
24 |
25 |
26 |
27 |
28 | org.apache.maven.plugins
29 | maven-jar-plugin
30 |
31 |
32 |
33 | org.apache.maven.plugins
34 | maven-source-plugin
35 |
36 |
37 |
38 | org.apache.maven.plugins
39 | maven-javadoc-plugin
40 |
41 |
42 |
43 | org.sonatype.plugins
44 | nexus-staging-maven-plugin
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 | have_gpg
54 |
55 |
56 |
57 | org.apache.maven.plugins
58 | maven-gpg-plugin
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 | de.unkrig.lfr
67 | lfr-parent
68 | 1.2.1-SNAPSHOT
69 | ../lfr-parent
70 |
71 |
--------------------------------------------------------------------------------
/lfr-core/src/main/java/de/unkrig/lfr/core/CharacterClass.java:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * de.unkrig.lfr - A super-fast regular expression evaluator
4 | *
5 | * Copyright (c) 2017, Arno Unkrig
6 | * All rights reserved.
7 | *
8 | * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
9 | * following conditions are met:
10 | *
11 | * 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
12 | * following disclaimer.
13 | * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
14 | * following disclaimer in the documentation and/or other materials provided with the distribution.
15 | * 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
16 | * products derived from this software without specific prior written permission.
17 | *
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
19 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | */
26 |
27 | package de.unkrig.lfr.core;
28 |
29 | import de.unkrig.commons.lang.protocol.Consumer;
30 |
31 | /**
32 | * A {@link CompositeSequence} that implements {@link #matches(MatcherImpl)} by applying {@link
33 | * #matches(int)} onto itself.
34 | */
35 | public abstract
36 | class CharacterClass extends CompositeSequence {
37 |
38 | public
39 | CharacterClass() {
40 | super(
41 | 1, // minMatchLengthWithoutNext
42 | 2 // maxMatchLengthWithoutNext
43 | );
44 | }
45 |
46 | public
47 | CharacterClass(int matchLengthWithoutNext) {
48 | super(matchLengthWithoutNext, matchLengthWithoutNext);
49 | }
50 |
51 | public
52 | CharacterClass(int minMatchLengthWithoutNext, int maxMatchLengthWithoutNext) {
53 | super(minMatchLengthWithoutNext, maxMatchLengthWithoutNext);
54 | }
55 |
56 | @Override public boolean
57 | matches(MatcherImpl matcher) {
58 |
59 | if (matcher.offset >= matcher.regionEnd) {
60 | matcher.hitEnd = true;
61 | return false;
62 | }
63 |
64 | int savedOffset = matcher.offset;
65 | int cp = matcher.readChar();
66 |
67 | if (!this.matches(cp)) {
68 | matcher.offset= savedOffset;
69 | return false;
70 | }
71 |
72 | return this.next.matches(matcher);
73 | }
74 |
75 | @Override public int
76 | find(MatcherImpl matcher) {
77 |
78 | while (matcher.offset < matcher.regionEnd) {
79 |
80 | int startOfMatch = matcher.offset;
81 |
82 | int cp = matcher.readChar();
83 |
84 | if (this.matches(cp)) {
85 |
86 | // See if the rest of the pattern matches.
87 | int savedOffset = matcher.offset;
88 | if (this.next.matches(matcher)) return startOfMatch;
89 | matcher.offset= savedOffset;
90 | }
91 | }
92 |
93 | matcher.hitEnd = true;
94 | return -1;
95 | }
96 |
97 | /**
98 | * @return Whether the codePoint matches this character class
99 | */
100 | public abstract boolean
101 | matches(int codePoint);
102 |
103 | /**
104 | * {@link #matches(int)} is guaranteed to return {@code false} for all subjects smaller than {@link #lowerBound()}.
105 | */
106 | @SuppressWarnings("static-method") public int
107 | lowerBound() { return 0; }
108 |
109 | /**
110 | * {@link #matches(int)} is guaranteed to return {@code false} for all subjects greater than or equal to {@link
111 | * #upperBound()}.
112 | */
113 | @SuppressWarnings("static-method") public int
114 | upperBound() { return Integer.MAX_VALUE; }
115 |
116 | /**
117 | * @return The number of values for which {@link #matches(int)} returns {@code true}, or more
118 | */
119 | @SuppressWarnings("static-method") public int
120 | sizeBound() { return Integer.MAX_VALUE; }
121 |
122 | @Override protected void
123 | checkWithoutNext(int offset, Consumer result) {
124 |
125 | if (this.upperBound() - this.lowerBound() > 100) {
126 | result.consume(-1);
127 | return;
128 | }
129 |
130 | for (int cp = this.lowerBound(); cp < this.upperBound(); cp++) {
131 | if (this.matches(cp)) {
132 | for (int cp2 : new int[] { cp, Character.toUpperCase(cp), Character.toLowerCase(cp) }) {
133 | char[] chars = Character.toChars(cp2);
134 | if (offset < chars.length) result.consume((int) chars[offset]);
135 | }
136 | }
137 | }
138 | }
139 | }
140 |
--------------------------------------------------------------------------------
/lfr-core/src/main/java/de/unkrig/lfr/core/CompositeSequence.java:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * de.unkrig.lfr - A super-fast regular expression evaluator
4 | *
5 | * Copyright (c) 2017, Arno Unkrig
6 | * All rights reserved.
7 | *
8 | * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
9 | * following conditions are met:
10 | *
11 | * 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
12 | * following disclaimer.
13 | * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
14 | * following disclaimer in the documentation and/or other materials provided with the distribution.
15 | * 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
16 | * products derived from this software without specific prior written permission.
17 | *
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
19 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | */
26 |
27 | package de.unkrig.lfr.core;
28 |
29 | import de.unkrig.commons.lang.protocol.Consumer;
30 | import de.unkrig.commons.util.ArrayUtil;
31 |
32 | /**
33 | * An {@link Sequence} that implements {@link #concat(Sequence)} by setting up a linked list of {@link Sequence}es.
34 | *
35 | * Notice that implementations' {@link #matches(MatcherImpl)} methods must always honor the {@link #next}!
36 | *
37 | */
38 | abstract
39 | class CompositeSequence extends Sequence {
40 |
41 | /**
42 | * Reference to the "next" sequence.
43 | */
44 | Sequence next;
45 |
46 | /**
47 | * This sequence (excluding the {@link #next} sequence) will match at least that many characters. E.g.
48 | * ".{3,5}" has a {@link #minMatchLengthWithoutNext} of three.
49 | */
50 | private final int minMatchLengthWithoutNext;
51 |
52 | /**
53 | * This sequence (excluding the {@link #next} sequence) will match at most that many characters. E.g.
54 | * ".{,3}" has a {@link #maxMatchLengthWithoutNext} of six.
55 | */
56 | private final int maxMatchLengthWithoutNext;
57 |
58 | CompositeSequence(int matchLengthWithoutNext) {
59 | super(matchLengthWithoutNext, matchLengthWithoutNext);
60 | this.next = Sequences.TERMINAL;
61 | this.minMatchLengthWithoutNext = matchLengthWithoutNext;
62 | this.maxMatchLengthWithoutNext = matchLengthWithoutNext;
63 | }
64 |
65 | CompositeSequence(
66 | int minMatchLengthWithoutNext,
67 | int maxMatchLengthWithoutNext
68 | ) {
69 | super(minMatchLengthWithoutNext, maxMatchLengthWithoutNext);
70 | this.next = Sequences.TERMINAL;
71 | this.minMatchLengthWithoutNext = minMatchLengthWithoutNext;
72 | this.maxMatchLengthWithoutNext = maxMatchLengthWithoutNext;
73 | }
74 |
75 | @Override public Sequence
76 | concat(Sequence that) {
77 |
78 | this.next = this.next.concat(that);
79 |
80 | this.minMatchLength = Sequences.add(this.minMatchLengthWithoutNext, this.next.minMatchLength);
81 | this.maxMatchLength = Sequences.add(this.maxMatchLengthWithoutNext, this.next.maxMatchLength);
82 |
83 | // Join adjacent MultivalentSequences.
84 | if (this instanceof MultivalentSequence) {
85 | MultivalentSequence ms1 = (MultivalentSequence) this;
86 | CompositeSequence cs1 = (CompositeSequence) this;
87 |
88 | if (cs1.next instanceof MultivalentSequence) {
89 | MultivalentSequence ms2 = (MultivalentSequence) cs1.next;
90 | CompositeSequence cs2 = (CompositeSequence) cs1.next;
91 |
92 | return Sequences.multivalentSequence(ArrayUtil.append(
93 | ms1.getNeedle(),
94 | ms2.getNeedle()
95 | )).concat(cs2.next);
96 | }
97 | }
98 |
99 | return this;
100 | }
101 |
102 | @Override void
103 | check(int offset, Consumer result) {
104 |
105 | if (offset < this.maxMatchLengthWithoutNext) this.checkWithoutNext(offset, result);
106 |
107 | // int limit = offset - this.minMatchLengthWithoutNext;
108 | if (offset >= this.minMatchLengthWithoutNext) {
109 | for (int i = this.minMatchLengthWithoutNext; i <= this.maxMatchLengthWithoutNext && i <= offset; i++) {
110 | this.next.check(offset - i, result);
111 | }
112 | }
113 | }
114 |
115 | /**
116 | * Same as {@link #check(int, Consumer)}, but ignores the {@link #next} sequence.
117 | *
118 | * @param offset 0 ... {@code (}{@link #maxMatchLengthWithoutNext} {@code - 1)}
119 | */
120 | protected void
121 | checkWithoutNext(int offset, Consumer result) {
122 | if (this.maxMatchLengthWithoutNext > 0) result.consume(-1);
123 | }
124 |
125 | /**
126 | * @return A human-readable form of {@code this} sequence, but without the {@link #next} sequence
127 | */
128 | protected abstract String
129 | toStringWithoutNext();
130 |
131 | @Override public String
132 | toString() {
133 | return (
134 | this.next == Sequences.TERMINAL
135 | ? this.toStringWithoutNext()
136 | : this.toStringWithoutNext() + " . " + this.next.toString()
137 | );
138 | }
139 | }
140 |
--------------------------------------------------------------------------------
/lfr-core/src/main/java/de/unkrig/lfr/core/Grapheme.java:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * de.unkrig.lfr - A super-fast regular expression evaluator
4 | *
5 | * Copyright (c) 2019, Arno Unkrig
6 | * All rights reserved.
7 | *
8 | * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
9 | * following conditions are met:
10 | *
11 | * 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
12 | * following disclaimer.
13 | * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
14 | * following disclaimer in the documentation and/or other materials provided with the distribution.
15 | * 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
16 | * products derived from this software without specific prior written permission.
17 | *
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
19 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | */
26 |
27 | package de.unkrig.lfr.core;
28 |
29 | import de.unkrig.commons.lang.AssertionUtil;
30 | import de.unkrig.commons.lang.OptionalMethods;
31 | import de.unkrig.commons.lang.OptionalMethods.MethodWrapper2;
32 |
33 | public final
34 | class Grapheme {
35 |
36 | static { AssertionUtil.enableAssertionsForThisClass(); }
37 |
38 | private Grapheme() {}
39 |
40 | private static final MethodWrapper2, Boolean, Integer, Integer, RuntimeException>
41 | GRAPHEME__IS_BOUNDARY = OptionalMethods.get2(
42 | "Graphemes only available in Java 9+", // message
43 | null, // classLoader
44 | "java.util.regex.Grapheme", // declaringClassName
45 | "isBoundary", // methodName
46 | int.class, // parameterType1
47 | int.class // parameterType2
48 | );
49 |
50 | public static boolean
51 | isBoundary(int cp0, int cp1) {
52 | Boolean result = GRAPHEME__IS_BOUNDARY.invoke(null, cp0, cp1);
53 | assert result != null;
54 | return result;
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/lfr-core/src/main/java/de/unkrig/lfr/core/IntPredicate.java:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * de.unkrig.lfr - A super-fast regular expression evaluator
4 | *
5 | * Copyright (c) 2017, Arno Unkrig
6 | * All rights reserved.
7 | *
8 | * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
9 | * following conditions are met:
10 | *
11 | * 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
12 | * following disclaimer.
13 | * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
14 | * following disclaimer in the documentation and/or other materials provided with the distribution.
15 | * 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
16 | * products derived from this software without specific prior written permission.
17 | *
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
19 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | */
26 |
27 | package de.unkrig.lfr.core;
28 |
29 | import de.unkrig.commons.lang.protocol.Predicate;
30 |
31 | /**
32 | * An optimization of {@link Predicate Predicate}{@code } which saves the overhead of boxing and unboxing.
33 | */
34 | public
35 | interface IntPredicate {
36 |
37 | /**
38 | * Returns {@code true} iff the subject "qualifies", otherwise {@code false}.
39 | */
40 | boolean evaluate(int subject);
41 | }
42 |
--------------------------------------------------------------------------------
/lfr-core/src/main/java/de/unkrig/lfr/core/MultivalentCharClass.java:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * de.unkrig.lfr - A super-fast regular expression evaluator
4 | *
5 | * Copyright (c) 2017, Arno Unkrig
6 | * All rights reserved.
7 | *
8 | * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
9 | * following conditions are met:
10 | *
11 | * 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
12 | * following disclaimer.
13 | * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
14 | * following disclaimer in the documentation and/or other materials provided with the distribution.
15 | * 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
16 | * products derived from this software without specific prior written permission.
17 | *
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
19 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | */
26 |
27 | package de.unkrig.lfr.core;
28 |
29 | import java.util.Arrays;
30 | import java.util.Set;
31 |
32 | /**
33 | * A specialization of {@link MultivalentCharacterClass} that is slightly faster because it saves the overhead of
34 | * decoding surrogate pairs, but works only for BMP (one-char) code points.
35 | */
36 | class MultivalentCharClass extends MultivalentCharacterClass implements MultivalentSequence {
37 |
38 | MultivalentCharClass(Set codePoints) { super(codePoints); }
39 |
40 | @Override public char[][]
41 | getNeedle() {
42 |
43 | char[][] result = new char[1][this.codePoints.size()];
44 |
45 | int i = 0;
46 | for (int cp : this.codePoints) result[0][i++] = (char) cp;
47 |
48 | Arrays.sort(result[0]);
49 |
50 | return result;
51 | }
52 |
53 | @Override public boolean
54 | matches(MatcherImpl matcher) {
55 |
56 | int o = matcher.offset;
57 |
58 | if (o >= matcher.regionEnd) {
59 | matcher.hitEnd = true;
60 | return false;
61 | }
62 |
63 | char c = matcher.subject.charAt(o++);
64 |
65 | if (!this.matches(c)) return false;
66 |
67 | matcher.offset = o;
68 | return this.next.matches(matcher);
69 | }
70 |
71 | /**
72 | * Optimized version of {@link #find(MatcherImpl)}
73 | */
74 | @Override public int
75 | find(MatcherImpl matcher) {
76 |
77 | int o = matcher.offset;
78 |
79 | FIND:
80 | while (o < matcher.regionEnd) {
81 |
82 | // Find the next occurrence of the literal char.
83 | for (;; o++) {
84 | if (o >= matcher.regionEnd) break FIND;
85 | if (this.matches(matcher.subject.charAt(o))) break;
86 | }
87 |
88 | // See if the rest of the pattern matches.
89 | matcher.offset = o + 1;
90 | if (this.next.matches(matcher)) return o;
91 |
92 | // Rest of pattern didn't match; continue right behind the character match.
93 | o++;
94 | }
95 |
96 | matcher.hitEnd = true;
97 | return -1;
98 | }
99 |
100 | @Override public int sizeBound() { return 1; }
101 |
102 | @Override protected String
103 | toStringWithoutNext() { return "oneOfManyChars(" + this.codePoints + ")"; }
104 | }
105 |
--------------------------------------------------------------------------------
/lfr-core/src/main/java/de/unkrig/lfr/core/MultivalentCharacterClass.java:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * de.unkrig.lfr - A super-fast regular expression evaluator
4 | *
5 | * Copyright (c) 2017, Arno Unkrig
6 | * All rights reserved.
7 | *
8 | * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
9 | * following conditions are met:
10 | *
11 | * 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
12 | * following disclaimer.
13 | * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
14 | * following disclaimer in the documentation and/or other materials provided with the distribution.
15 | * 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
16 | * products derived from this software without specific prior written permission.
17 | *
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
19 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | */
26 |
27 | package de.unkrig.lfr.core;
28 |
29 | import java.util.ArrayList;
30 | import java.util.List;
31 | import java.util.Set;
32 |
33 | import de.unkrig.commons.lang.protocol.Consumer;
34 | import de.unkrig.commons.util.ArrayUtil;
35 |
36 | /**
37 | * A specialization of {@link CharacterClass} that matches a (typically relatively small) set of code points.
38 | */
39 | class MultivalentCharacterClass extends CharacterClass {
40 |
41 | protected final Set codePoints;
42 | protected final int lowerBound, upperBound, sizeBound;
43 |
44 | /**
45 | * @param codePoints The set of code points that designate a positive match e.g. "{ 'a', 'A' }"
46 | */
47 | MultivalentCharacterClass(Set codePoints) {
48 | super(
49 | MultivalentCharacterClass.minCharCountOf(codePoints),
50 | MultivalentCharacterClass.maxCharCountOf(codePoints)
51 | );
52 | this.codePoints = codePoints;
53 |
54 | this.lowerBound = MultivalentCharacterClass.min(codePoints);
55 | this.upperBound = MultivalentCharacterClass.max(codePoints) + 1;
56 | this.sizeBound = codePoints.size();
57 | }
58 |
59 | @Override public boolean
60 | matches(int cp) { return this.codePoints.contains(cp); }
61 |
62 | @Override public int lowerBound() { return this.lowerBound; }
63 | @Override public int upperBound() { return this.upperBound; }
64 | @Override public int sizeBound() { return this.sizeBound; }
65 |
66 | @Override public Sequence
67 | concat(Sequence that) {
68 |
69 | Sequence result = super.concat(that);
70 | if (result != this) return result;
71 |
72 | if (
73 | this.minMatchLength == this.maxMatchLength
74 | && this.next instanceof MultivalentCharacterClass
75 | ) {
76 | MultivalentCharacterClass
77 | next2 = (MultivalentCharacterClass) this.next;
78 |
79 | if (next2.minMatchLength == next2.maxMatchLength) {
80 |
81 | List chars1 = new ArrayList(this.codePoints.size());
82 | for (int cp : this.codePoints) chars1.add(Character.toChars(cp));
83 |
84 | List chars2 = new ArrayList(next2.codePoints.size());
85 | for (int cp : next2.codePoints) chars2.add(Character.toChars(cp));
86 |
87 | return Sequences.multivalentSequence(ArrayUtil.append(
88 | ArrayUtil.mirror(chars1.toArray(new char[chars1.size()][])),
89 | ArrayUtil.mirror(chars2.toArray(new char[chars2.size()][]))
90 | )).concat(next2.next);
91 | }
92 | }
93 |
94 | return this;
95 | }
96 |
97 | @Override protected void
98 | checkWithoutNext(int offset, Consumer result) {
99 | for (int cp : this.codePoints) {
100 | char[] chars = Character.toChars(cp);
101 | if (offset < chars.length) result.consume((int) chars[offset]);
102 | }
103 | }
104 |
105 | @Override protected String
106 | toStringWithoutNext() { return "oneOfManyCodePoints(" + this.codePoints + ")"; }
107 |
108 | private static int
109 | min(Set set) {
110 | int result = Integer.MAX_VALUE;
111 | for (int i : set) {
112 | if (i < result) result = i;
113 | }
114 | return result;
115 | }
116 |
117 | private static int
118 | max(Set set) {
119 | int result = Integer.MIN_VALUE;
120 | for (int i : set) {
121 | if (i > result) result = i;
122 | }
123 | return result;
124 | }
125 |
126 | private static int
127 | minCharCountOf(Set codePoints) {
128 | for (int cp : codePoints) {
129 | if (Character.charCount(cp) == 1) return 1;
130 | }
131 | return 2;
132 | }
133 |
134 | private static int
135 | maxCharCountOf(Set codePoints) {
136 | for (int cp : codePoints) {
137 | if (Character.charCount(cp) == 2) return 2;
138 | }
139 | return 1;
140 | }
141 | }
142 |
--------------------------------------------------------------------------------
/lfr-core/src/main/java/de/unkrig/lfr/core/MultivalentSequence.java:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * de.unkrig.lfr - A super-fast regular expression evaluator
4 | *
5 | * Copyright (c) 2017, Arno Unkrig
6 | * All rights reserved.
7 | *
8 | * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
9 | * following conditions are met:
10 | *
11 | * 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
12 | * following disclaimer.
13 | * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
14 | * following disclaimer in the documentation and/or other materials provided with the distribution.
15 | * 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
16 | * products derived from this software without specific prior written permission.
17 | *
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
19 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | */
26 |
27 | package de.unkrig.lfr.core;
28 |
29 | /**
30 | * A sequence of fixed length, where each position is linked with a (typically small) set of {@code char}s.
31 | * These sets of chars are also called "the needle", as a metaphor for a needle that is to be found in a haystack.
32 | *
33 | * Examples for multivalent sequences:
34 | *
35 | *
36 | *
"abc"
37 | *
"abc", case-insensitive
38 | *
"a[bc]d" (match length is 3)
39 | *
"\s" (whitespace characters are all in the BMP, and are a small set)
40 | *
"abc|def" (both alternatives have the same length)
41 | *
"a\x{10000}|def" (both alternatives have length 3)
42 | *
43 | *
44 | * The following are not multivalent sequences:
45 | *
46 | *
47 | *
"[b\x{10000}]" ("b" has length 1, and "\x{10000}" has length 2)
48 | *
"\S" (non-whitespace characters; have different lengths)
63 | * Notice that, other than in the example above, {@code getNeedle()[n].length} is often different for different
64 | * values of {@code n}.
65 | *
66 | */
67 | char[][]
68 | getNeedle();
69 | }
70 |
--------------------------------------------------------------------------------
/lfr-core/src/main/java/de/unkrig/lfr/core/ReverseCharSequence.java:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * de.unkrig.lfr - A super-fast regular expression evaluator
4 | *
5 | * Copyright (c) 2017, Arno Unkrig
6 | * All rights reserved.
7 | *
8 | * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
9 | * following conditions are met:
10 | *
11 | * 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
12 | * following disclaimer.
13 | * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
14 | * following disclaimer in the documentation and/or other materials provided with the distribution.
15 | * 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
16 | * products derived from this software without specific prior written permission.
17 | *
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
19 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | */
26 |
27 | package de.unkrig.lfr.core;
28 |
29 | /**
30 | * @author Arno
31 | *
32 | */
33 | public abstract
34 | class ReverseCharSequence implements CharSequence {
35 |
36 | /**
37 | * @return The original char sequence; actually "the reverse of the reverse"
38 | */
39 | protected abstract CharSequence
40 | original();
41 |
42 | /**
43 | * @return Depending on the subject, {@link #reverseByCopy(CharSequence)} or {@link #reverseInPlace(CharSequence)}
44 | */
45 | public static CharSequence
46 | reverse(CharSequence subject) {
47 | return (
48 | subject.length() <= 10
49 | ? ReverseCharSequence.reverseByCopy(subject)
50 | : ReverseCharSequence.reverseInPlace(subject)
51 | );
52 | }
53 |
54 | /**
55 | * Returns a string that is the "logical reverse" of the cs, i.e. surrogate pairs and CR-LF pairs are
56 | * not reversed.
57 | */
58 | public static String
59 | reverseByCopy(CharSequence subject) {
60 |
61 | StringBuilder sb = new StringBuilder(subject).reverse();
62 |
63 | // Un-reverse CR-LF sequences.
64 | for (int i = sb.length() - 2; i >= 0; i--) {
65 | if (sb.charAt(i) == '\n' && sb.charAt(i + 1) == '\r') {
66 | sb.setCharAt(i, '\r');
67 | sb.setCharAt(i + 1, '\n');
68 | i--;
69 | }
70 | }
71 |
72 | return sb.toString();
73 | }
74 |
75 | /**
76 | * Returns a {@link CharSequence} that is the "logical reverse" of the cs, i.e. surrogate pairs and
77 | * CR-LF pairs are not reversed. The returned {@link CharSequence} is based on the original, thus, the
78 | * behavior of the returned {@link CharSequence} is undefined if the original changes.
79 | */
80 | public static CharSequence
81 | reverseInPlace(final CharSequence subject) {
82 |
83 | if (subject instanceof ReverseCharSequence) return ((ReverseCharSequence) subject).original();
84 |
85 | return new ReverseCharSequence() {
86 |
87 | final int len = subject.length();
88 | final int lenm1 = this.len - 1;
89 |
90 | // IMPLEMENT ReverseCharSequence
91 |
92 | @Override protected CharSequence
93 | original() { return subject; }
94 |
95 | // IMPLEMENT CharSequence
96 |
97 | @Override public int
98 | length() { return this.len; }
99 |
100 | @Override public char
101 | charAt(int offset) {
102 |
103 | // Reverse the offset.
104 | offset = this.lenm1 - offset;
105 |
106 | char c = subject.charAt(offset);
107 |
108 | // Un-reverse CR-LF sequences.
109 | if (c == '\r' && offset < this.lenm1 && subject.charAt(offset + 1) == '\n') return '\n';
110 | if (c == '\n' && offset > 0 && subject.charAt(offset - 1) == '\r') return '\r';
111 |
112 | // Un-reverse high-surrogate-low-surrogate.
113 | char c2;
114 | if (
115 | Character.isHighSurrogate(c)
116 | && offset < this.lenm1
117 | && Character.isLowSurrogate((c2 = subject.charAt(offset + 1)))
118 | ) return c2;
119 | if (
120 | Character.isLowSurrogate(c)
121 | && offset > 0
122 | && Character.isHighSurrogate((c2 = subject.charAt(offset - 1)))
123 | ) return c2;
124 |
125 | return c;
126 | }
127 |
128 | @Override public CharSequence
129 | subSequence(int start, int end) { return ReverseCharSequence.subSequence(subject, start, end); }
130 |
131 | @Override public String
132 | toString() {
133 | StringBuilder sb = new StringBuilder(this.len);
134 | for (int i = 0; i < this.len;) {
135 | int cp = Character.codePointAt(this, i);
136 | sb.appendCodePoint(cp);
137 | i += Character.charCount(cp);
138 | }
139 | return sb.toString();
140 | }
141 | };
142 | }
143 |
144 | /**
145 | * Trivial implementation of a "subsequence".
146 | */
147 | private static CharSequence
148 | subSequence(final CharSequence cs, final int start, final int end) {
149 |
150 | return new CharSequence() {
151 |
152 | @Override public int
153 | length() { return end - start; }
154 |
155 | @Override public char
156 | charAt(int index) { return cs.charAt(start + index); }
157 |
158 | @Override public CharSequence
159 | subSequence(int start2, int end2) {
160 | return ReverseCharSequence.subSequence(cs, start + start2, start + end2);
161 | }
162 |
163 | @Override public String
164 | toString() {
165 | char[] ca = new char[end - start];
166 | for (int i = start; i < end; i++) ca[i] = cs.charAt(start + i);
167 | return new String(ca);
168 | }
169 | };
170 | }
171 | }
172 |
--------------------------------------------------------------------------------
/lfr-core/src/main/java/de/unkrig/lfr/core/package-info.java:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * de.unkrig.lfr - A super-fast regular expression evaluator
4 | *
5 | * Copyright (c) 2016, Arno Unkrig
6 | * All rights reserved.
7 | *
8 | * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
9 | * following conditions are met:
10 | *
11 | * 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
12 | * following disclaimer.
13 | * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
14 | * following disclaimer in the documentation and/or other materials provided with the distribution.
15 | * 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
16 | * products derived from this software without specific prior written permission.
17 | *
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
19 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | */
26 |
27 | /**
28 | * A super-fast drop-in replacement for {@code java.util.regex}.
29 | */
30 | @NotNullByDefault
31 | package de.unkrig.lfr.core;
32 |
33 | import de.unkrig.commons.nullanalysis.NotNullByDefault;
34 |
--------------------------------------------------------------------------------
/lfr-core/src/main/javadoc/overview.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Lightning-fast Regular Expressions ("LFR") is a 99.9%-complete reimplementation of java.util.regex
4 | ("JUR") with better match() and find() performance, and some extra features.
5 |
10 |
11 |
--------------------------------------------------------------------------------
/lfr-core/src/main/javadoc/package-lists/de.unkrig.commons/commons-text/package-list:
--------------------------------------------------------------------------------
1 | de.unkrig.commons.text
2 | de.unkrig.commons.text.expression
3 | de.unkrig.commons.text.json
4 | de.unkrig.commons.text.parser
5 | de.unkrig.commons.text.pattern
6 | de.unkrig.commons.text.scanner
7 | de.unkrig.commons.text.xml
8 |
--------------------------------------------------------------------------------
/lfr-core/src/main/resources/META-INF/services/de.unkrig.ref4j.PatternFactory:
--------------------------------------------------------------------------------
1 |
2 | # The "lightening-fast regular expressions" implementation of de.unkrig.ref4j.PatternFactory:
3 | de.unkrig.lfr.core.PatternFactory
4 | de.unkrig.lfr.core.PatternFactory.instanceField = INSTANCE
5 |
--------------------------------------------------------------------------------
/lfr-core/src/test/java/test/OracleEssentials.java:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * de.unkrig.lfr - A super-fast regular expression evaluator
4 | *
5 | * Copyright (c) 2016, Arno Unkrig
6 | * All rights reserved.
7 | *
8 | * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
9 | * following conditions are met:
10 | *
11 | * 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
12 | * following disclaimer.
13 | * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
14 | * following disclaimer in the documentation and/or other materials provided with the distribution.
15 | * 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
16 | * products derived from this software without specific prior written permission.
17 | *
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
19 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | */
26 |
27 | package test;
28 |
29 | import de.unkrig.commons.lang.ExceptionUtil;
30 | import de.unkrig.commons.nullanalysis.Nullable;
31 | import de.unkrig.ref4j.Matcher;
32 | import de.unkrig.ref4j.Pattern;
33 | import de.unkrig.ref4j.PatternFactory;
34 |
35 | public
36 | class OracleEssentials extends ParameterizedWithPatternFactory {
37 |
38 | public
39 | OracleEssentials(PatternFactory patternFactory) { super(patternFactory); }
40 |
41 | /**
42 | * Shorthand for "{@link #harnessFull(String, String, int, Integer, int, Boolean, Boolean) harness(regex, subject,
43 | * 0, null, 0, null, null)}".
44 | */
45 | public void
46 | harnessFull(String regex, String subject) { this.harnessFull(regex, subject, 0, null, 0, null, null); }
47 |
48 | /**
49 | * Shorthand for "{@link #harnessFull(String, String, int, Integer, int, Boolean, Boolean) harness(regex, subject,
50 | * flags, null, 0, null, null)}".
51 | */
52 | public void
53 | harnessFull(String regex, String subject, int flags) {
54 | this.harnessFull(regex, subject, flags, null, 0, null, null);
55 | }
56 |
57 | /**
58 | * Shorthand for "{@link #harnessFull(String, String, int, Integer, int, Boolean, Boolean) harness(regex, subject,
59 | * flags, regionStart, regionEnd, null, null)}".
60 | */
61 | public void
62 | harnessFull(String regex, String subject, int flags, int regionStart, int regionEnd) {
63 | this.harnessFull(regex, subject, flags, regionStart, regionEnd, null, null);
64 | }
65 |
66 | /**
67 | * Shorthand for "{@link #harnessFull(String, String, int, Integer, int, Boolean, Boolean) harness(regex, subject,
68 | * flags, regionStart, regionEnd, transparentBounds, null)}".
69 | */
70 | public void
71 | harnessFull(String regex, String subject, int flags, int regionStart, int regionEnd, boolean transparentBounds) {
72 | this.harnessFull(regex, subject, flags, regionStart, regionEnd, transparentBounds, null);
73 | }
74 |
75 | /**
76 | * Verifies that {@link PatternFactory#compile(String, int)}, {@link Matcher#lookingAt()}, {@link Matcher#matches()} and
77 | * {@link Matcher#find()} don't throw any exceptions.
78 | *
79 | * @param flags Regex compilation flags, see {@link java.util.regex.Pattern#compile(String, int)}
80 | * @param regionStart Optional: The non-default region to use for the matcher; see {@link
81 | * java.util.regex.Matcher#region(int, int)}
82 | * @param regionEnd Honored only when regionStart {@code != null}
83 | * @param transparentBounds Optional: Call {@link java.util.regex.Matcher#useTransparentBounds(boolean)}
84 | * @param anchoringBounds Optional: Call {@link java.util.regex.Matcher#useAnchoringBounds(boolean)}
85 | */
86 | public void
87 | harnessFull(
88 | String regex,
89 | final String subject,
90 | int flags,
91 | @Nullable Integer regionStart,
92 | int regionEnd,
93 | @Nullable Boolean transparentBounds,
94 | @Nullable Boolean anchoringBounds
95 | ) {
96 |
97 | Pattern pattern = this.patternFactory.compile(regex, flags);
98 |
99 | Matcher m = pattern.matcher(subject);
100 |
101 | if (regionStart != null) m.region(regionStart, regionEnd);
102 | if (transparentBounds != null) m.useTransparentBounds(transparentBounds);
103 | if (anchoringBounds != null) m.useAnchoringBounds(anchoringBounds);
104 |
105 | m.lookingAt();
106 |
107 | m.matches();
108 |
109 | // "matches()", if unsuccessful, leaves the matcher in a very strange state: The next invocation of "find()"
110 | // (a few lines below), will will NOT start at the beginning of the region, but where "lookingAt()" left off!?
111 | // The simple workaround is to reset the matcher here.
112 | m.reset();
113 |
114 | int matchNumber = 1;
115 | try {
116 | while (m.find()) matchNumber++;
117 | } catch (RuntimeException re) {
118 | throw ExceptionUtil.wrap("find(): Match #" + matchNumber, re);
119 | } catch (Error e) { // SUPPRESS CHECKSTYLE IllegalCatch
120 | throw ExceptionUtil.wrap("find(): Match #" + matchNumber, e);
121 | }
122 | }
123 | }
124 |
--------------------------------------------------------------------------------
/lfr-core/src/test/java/test/ParameterizedWithPatternFactory.java:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * de.unkrig.lfr - A super-fast regular expression evaluator
4 | *
5 | * Copyright (c) 2019, Arno Unkrig
6 | * All rights reserved.
7 | *
8 | * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
9 | * following conditions are met:
10 | *
11 | * 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
12 | * following disclaimer.
13 | * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
14 | * following disclaimer in the documentation and/or other materials provided with the distribution.
15 | * 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
16 | * products derived from this software without specific prior written permission.
17 | *
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
19 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | */
26 |
27 | package test;
28 |
29 | import java.util.ArrayList;
30 | import java.util.List;
31 |
32 | import org.junit.runners.Parameterized.Parameters;
33 |
34 | import de.unkrig.ref4j.PatternFactory;
35 |
36 | public abstract
37 | class ParameterizedWithPatternFactory {
38 |
39 | protected final PatternFactory patternFactory;
40 |
41 | public
42 | ParameterizedWithPatternFactory(PatternFactory patternFactory) { this.patternFactory = patternFactory; }
43 |
44 | @Parameters(name = "PatternFactory={1}") public static Iterable