├── .gitignore ├── .idea ├── ant.xml ├── compiler.xml ├── encodings.xml ├── libraries │ └── Maven__junit_junit_4_7.xml ├── misc.xml ├── modules.xml ├── vcs.xml └── workspace.xml ├── README.markdown ├── article ├── build.sh ├── conf.py ├── images │ ├── benchmark.pdf │ ├── benchmark.png │ ├── dfa.pdf │ ├── dfa.png │ ├── ire-dfa-nfa.pdf │ ├── ire-dfa-nfa.png │ ├── ire-fa.pdf │ ├── ire-fa.png │ ├── ire-overview.pdf │ ├── ire-overview.png │ ├── ire-rope.pdf │ ├── ire-rope.png │ ├── memory-overhead.pdf │ ├── memory-overhead.png │ ├── nfa-composition.pdf │ ├── nfa-composition.png │ ├── nfa-ne-all.pdf │ ├── nfa-ne-all.png │ ├── nfa-ne.pdf │ ├── nfa-ne.png │ ├── nfa.pdf │ ├── nfa.png │ ├── rope-append-sharing.pdf │ ├── rope-append-sharing.png │ ├── rope-append.pdf │ ├── rope-append.png │ ├── rope-minmax.pdf │ ├── rope-minmax.png │ ├── rope-nfa.pdf │ ├── rope-nfa.png │ ├── rope-ops.pdf │ ├── rope-ops.png │ ├── split-as-concatenation.odg │ ├── split-as-concatenation.pdf │ ├── split-as-concatenation.png │ ├── split-sum-squares.pdf │ ├── split-sum-squares.png │ ├── tree-split-pred.pdf │ └── tree-split-pred.png ├── index.rst └── themes │ └── ire │ ├── static │ └── ire.css_t │ └── theme.conf ├── ire.iml ├── lib └── annotations.jar ├── pom.xml ├── src ├── main │ └── java │ │ └── org │ │ └── jkff │ │ └── ire │ │ ├── Compiler.java │ │ ├── DFAIndexedString.java │ │ ├── DFAMatcher.java │ │ ├── DFARopePatternSet.java │ │ ├── IndexedString.java │ │ ├── LinearIS.java │ │ ├── Match.java │ │ ├── PatternSet.java │ │ ├── fa │ │ ├── BiDFA.java │ │ ├── DFA.java │ │ ├── IntState.java │ │ ├── IntTable.java │ │ ├── MutableTransferFunction.java │ │ ├── PowerIntState.java │ │ ├── PowerIntTable.java │ │ ├── Sequence.java │ │ ├── State.java │ │ ├── TransferFunction.java │ │ └── TransferTable.java │ │ ├── regex │ │ ├── Alternative.java │ │ ├── CharacterClass.java │ │ ├── Empty.java │ │ ├── Labeled.java │ │ ├── OnceOrMore.java │ │ ├── RegexCompiler.java │ │ ├── RegexParser.java │ │ ├── RxNode.java │ │ └── Sequence.java │ │ ├── rope │ │ ├── Rope.java │ │ ├── RopeBasedIS.java │ │ └── RopeFactory.java │ │ └── util │ │ ├── CoarsestPartition.java │ │ ├── CollectionFactory.java │ │ ├── Function.java │ │ ├── Function2.java │ │ ├── Pair.java │ │ ├── Predicate.java │ │ ├── Reducer.java │ │ └── WrappedBitSet.java └── test │ └── java │ └── org │ └── jkff │ └── ire │ ├── DFABuilder.java │ ├── IntegrationTest.java │ ├── LinearISTest.java │ ├── NFABuilder.java │ ├── regex │ ├── RegexCompilerTest.java │ └── RegexParserTest.java │ └── rope │ ├── RopeBasedISTest.java │ └── RopeTest.java └── target └── ire-0.1.jar /.gitignore: -------------------------------------------------------------------------------- 1 | out 2 | -------------------------------------------------------------------------------- /.idea/ant.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__junit_junit_4_7.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | 9 | 10 | 26 | 27 | 32 | 33 | 34 | http://www.w3.org/1999/xhtml 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /README.markdown: -------------------------------------------------------------------------------- 1 | What's this 2 | ============= 3 | ire is a library for incremental regular expression matching, based on further development of the ideas from Dan Piponi's famous blogpost [Fast Incremental Regular Expression Matching with Monoids](http://blog.sigfpe.com/2009/01/fast-incremental-regular-expression.html). 4 | 5 | * **ire** is for "incremental", "regular", "expressions" 6 | * **incremental** means "fast recomputation of results according to changes of input string" (not pattern) 7 | * **regular** means "regular" - t.i. no backrefs or other Perl magic. 8 | 9 | There is an article about it: http://jkff.info/articles/ire 10 | 11 | How to use it? 12 | =============== 13 | 14 | Add the target/ire-VERSION.jar to your classpath. 15 | 16 | import org.jkff.ire.*; 17 | import org.jkff.regex.RegexCompiler; 18 | 19 | // Compile the regexes 20 | String[] regexes = {...}; 21 | PatternSet pat = RegexCompiler.compile(regexes); 22 | 23 | // Index a string (slow) 24 | IndexedString is = pat.match(someString); // or match(someString, blockSize) 25 | 26 | // Get matches (fast) 27 | for(Match m : is.getMatches()) { 28 | int startPos = m.startPos(); 29 | int length = m.length(); 30 | int whichPattern = m.whichPattern(); 31 | } 32 | 33 | // Here's the "incremental" part. Assume 'a' and 'b' are IndexedString's. 34 | // You can cut and recombine string pieces, it will be fast, and getMatches() 35 | // of the resulting strings will be fast. 36 | IndexedString c = a.append(b); 37 | IndexedString sub = is.subSequence(start, end); 38 | Pair p = is.splitBefore(i); 39 | 40 | How to experiment with it? 41 | ========================== 42 | Open the IDEA project (or create a project in your favourite IDE over it - there's just one library dependency in the "lib" folder) and run the "tests" in `org.jkff.ire.IntegrationTest`. 43 | 44 | Do not forget to run the unit tests after changes. 45 | 46 | Ask me (ekirpichov@gmail.com) if you're interested in something. 47 | 48 | How fast is it? 49 | =============== 50 | 51 | It is much faster than `java.util.regex` in the following case: 52 | 53 | * Not too many patterns 54 | * Not too many occurences of them 55 | * The input strings are very long 56 | * Incremental operations are dominant 57 | * You have a lot of spare memory (the "block size" parameter is not too large) 58 | 59 | It is much slower in most other cases. 60 | 61 | For example, when finding all occurences of patterns from the ["regex-dna" benchmark](http://shootout.alioth.debian.org/u32q/performance.php?test=regexdna) 62 | in a 500-kb DNA string with total 100 occurences, with a block size of 16 we're getting nearly 5000 occurences per second with our library and about 500 per second with java.util.regex. 63 | 64 | However, when solving the same problem for a 50kb string with 100 occurences, with a block size of 256 we have exactly the opposite - 500 vs 6000. 65 | 66 | How does it work? 67 | ================== 68 | Read Dan Piponi's aforementioned blogpost; here are the differences: 69 | 70 | * Instead of fingertrees, we use a "rope" datastructure with caching sums of values in an arbitrary monoid. The rope datastructure is in `org.jkff.ire.rope` package. It uses a constant-height 2-3 tree of N..2N-1 array chunks. Append and split operations are quite trivial. 71 | * We not only test for a match, but also find match positions. This is done by 1 split to find the end of the match and another to find the beginning, with some intricacies for overlapping matches. See `org.jkff.ire.DFAMatcher` class. 72 | * We use NFA instead of DFA, because we care mostly about number of states (we have to compose transition tables) and state blow-up of DFAs is unacceptable. FAs are in `org.jkff.ire.fa` package. 73 | * We do some optimization of the NFA to further reduce states, see `org.jkff.ire.regex.RegexCompiler`. 74 | * We use a compact representation of NFA as a boolean matrix represented as a bitset, with fast multiplication, see `org.jkff.ire.fa.PowerIntTable` 75 | 76 | If you happen to read Russian, then read [an article in fprog.ru](http://fprog.ru/2010/issue6/eugene-kirpichov-incremental-regular-expressions/) 77 | -------------------------------------------------------------------------------- /article/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | sphinx-build -b html -D math-output=mathjax -a -E . ./_build 4 | -------------------------------------------------------------------------------- /article/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is execfile()d with the current directory set to its containing dir. 4 | # 5 | # Note that not all possible configuration values are present in this 6 | # autogenerated file. 7 | # 8 | # All configuration values have a default; values that are commented out 9 | # serve to show the default. 10 | 11 | import sys, os 12 | 13 | # If extensions (or modules to document with autodoc) are in another directory, 14 | # add these directories to sys.path here. If the directory is relative to the 15 | # documentation root, use os.path.abspath to make it absolute, like shown here. 16 | #sys.path.insert(0, os.path.abspath('.')) 17 | 18 | # -- General configuration ----------------------------------------------------- 19 | 20 | # If your documentation needs a minimal Sphinx version, state it here. 21 | #needs_sphinx = '1.0' 22 | 23 | # Add any Sphinx extension module names here, as strings. They can be extensions 24 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 25 | extensions = ['sphinx.ext.todo', 'sphinx.ext.mathjax'] 26 | 27 | # Add any paths that contain templates here, relative to this directory. 28 | templates_path = ['_templates'] 29 | 30 | # The suffix of source filenames. 31 | source_suffix = '.rst' 32 | 33 | # The encoding of source files. 34 | #source_encoding = 'utf-8-sig' 35 | 36 | # The master toctree document. 37 | master_doc = 'index' 38 | 39 | # General information about the project. 40 | project = u'ire' 41 | copyright = u'2012, Eugene Kirpichov' 42 | 43 | # The version info for the project you're documenting, acts as replacement for 44 | # |version| and |release|, also used in various other places throughout the 45 | # built documents. 46 | # 47 | # The short X.Y version. 48 | # The full version, including alpha/beta/rc tags. 49 | 50 | # The language for content autogenerated by Sphinx. Refer to documentation 51 | # for a list of supported languages. 52 | #language = None 53 | 54 | # There are two options for replacing |today|: either, you set today to some 55 | # non-false value, then it is used: 56 | #today = '' 57 | # Else, today_fmt is used as the format for a strftime call. 58 | #today_fmt = '%B %d, %Y' 59 | 60 | # List of patterns, relative to source directory, that match files and 61 | # directories to ignore when looking for source files. 62 | exclude_patterns = ['_build'] 63 | 64 | # The reST default role (used for this markup: `text`) to use for all documents. 65 | #default_role = None 66 | 67 | # If true, '()' will be appended to :func: etc. cross-reference text. 68 | #add_function_parentheses = True 69 | 70 | # If true, the current module name will be prepended to all description 71 | # unit titles (such as .. function::). 72 | #add_module_names = True 73 | 74 | # If true, sectionauthor and moduleauthor directives will be shown in the 75 | # output. They are ignored by default. 76 | #show_authors = False 77 | 78 | # The name of the Pygments (syntax highlighting) style to use. 79 | pygments_style = 'sphinx' 80 | 81 | # A list of ignored prefixes for module index sorting. 82 | #modindex_common_prefix = [] 83 | 84 | 85 | # -- Options for HTML output --------------------------------------------------- 86 | 87 | # The theme to use for HTML and HTML Help pages. See the documentation for 88 | # a list of builtin themes. 89 | html_theme = 'ire' 90 | 91 | # Theme options are theme-specific and customize the look and feel of a theme 92 | # further. For a list of options available for each theme, see the 93 | # documentation. 94 | html_theme_options = { 'sidebarwidth': 300 } 95 | 96 | # Add any paths that contain custom themes here, relative to this directory. 97 | html_theme_path = ['themes'] 98 | 99 | # The name for this set of Sphinx documents. If None, it defaults to 100 | # " v documentation". 101 | html_title = 'Incremental regular expressions' 102 | 103 | # A shorter title for the navigation bar. Default is the same as html_title. 104 | #html_short_title = None 105 | 106 | # The name of an image file (relative to this directory) to place at the top 107 | # of the sidebar. 108 | #html_logo = None 109 | 110 | # The name of an image file (within the static path) to use as favicon of the 111 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 112 | # pixels large. 113 | #html_favicon = None 114 | 115 | # Add any paths that contain custom static files (such as style sheets) here, 116 | # relative to this directory. They are copied after the builtin static files, 117 | # so a file named "default.css" will overwrite the builtin "default.css". 118 | html_static_path = ['_static'] 119 | 120 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 121 | # using the given strftime format. 122 | #html_last_updated_fmt = '%b %d, %Y' 123 | 124 | # If true, SmartyPants will be used to convert quotes and dashes to 125 | # typographically correct entities. 126 | #html_use_smartypants = True 127 | 128 | # Custom sidebar templates, maps document names to template names. 129 | #html_sidebars = {} 130 | 131 | # Additional templates that should be rendered to pages, maps page names to 132 | # template names. 133 | #html_additional_pages = {} 134 | 135 | # If false, no module index is generated. 136 | #html_domain_indices = True 137 | 138 | # If false, no index is generated. 139 | #html_use_index = True 140 | 141 | # If true, the index is split into individual pages for each letter. 142 | #html_split_index = False 143 | 144 | # If true, links to the reST sources are added to the pages. 145 | #html_show_sourcelink = True 146 | 147 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 148 | #html_show_sphinx = True 149 | 150 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 151 | #html_show_copyright = True 152 | 153 | # If true, an OpenSearch description file will be output, and all pages will 154 | # contain a tag referring to it. The value of this option must be the 155 | # base URL from which the finished HTML is served. 156 | #html_use_opensearch = '' 157 | 158 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 159 | #html_file_suffix = None 160 | 161 | # Output file base name for HTML help builder. 162 | htmlhelp_basename = 'iredoc' 163 | 164 | 165 | # -- Options for LaTeX output -------------------------------------------------- 166 | 167 | latex_elements = { 168 | # The paper size ('letterpaper' or 'a4paper'). 169 | #'papersize': 'letterpaper', 170 | 171 | # The font size ('10pt', '11pt' or '12pt'). 172 | #'pointsize': '10pt', 173 | 174 | # Additional stuff for the LaTeX preamble. 175 | #'preamble': '', 176 | } 177 | 178 | # The name of an image file (relative to this directory) to place at the top of 179 | # the title page. 180 | #latex_logo = None 181 | 182 | # For "manual" documents, if this is true, then toplevel headings are parts, 183 | # not chapters. 184 | #latex_use_parts = False 185 | 186 | # If true, show page references after internal links. 187 | #latex_show_pagerefs = False 188 | 189 | # If true, show URL addresses after external links. 190 | #latex_show_urls = False 191 | 192 | # Documents to append as an appendix to all manuals. 193 | #latex_appendices = [] 194 | 195 | # If false, no module index is generated. 196 | #latex_domain_indices = True 197 | 198 | 199 | # -- Options for manual page output -------------------------------------------- 200 | 201 | # If true, show URL addresses after external links. 202 | #man_show_urls = False 203 | 204 | 205 | # -- Options for Texinfo output ------------------------------------------------ 206 | 207 | # Documents to append as an appendix to all manuals. 208 | #texinfo_appendices = [] 209 | 210 | # If false, no module index is generated. 211 | #texinfo_domain_indices = True 212 | 213 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 214 | #texinfo_show_urls = 'footnote' 215 | -------------------------------------------------------------------------------- /article/images/benchmark.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/benchmark.pdf -------------------------------------------------------------------------------- /article/images/benchmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/benchmark.png -------------------------------------------------------------------------------- /article/images/dfa.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/dfa.pdf -------------------------------------------------------------------------------- /article/images/dfa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/dfa.png -------------------------------------------------------------------------------- /article/images/ire-dfa-nfa.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/ire-dfa-nfa.pdf -------------------------------------------------------------------------------- /article/images/ire-dfa-nfa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/ire-dfa-nfa.png -------------------------------------------------------------------------------- /article/images/ire-fa.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/ire-fa.pdf -------------------------------------------------------------------------------- /article/images/ire-fa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/ire-fa.png -------------------------------------------------------------------------------- /article/images/ire-overview.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/ire-overview.pdf -------------------------------------------------------------------------------- /article/images/ire-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/ire-overview.png -------------------------------------------------------------------------------- /article/images/ire-rope.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/ire-rope.pdf -------------------------------------------------------------------------------- /article/images/ire-rope.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/ire-rope.png -------------------------------------------------------------------------------- /article/images/memory-overhead.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/memory-overhead.pdf -------------------------------------------------------------------------------- /article/images/memory-overhead.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/memory-overhead.png -------------------------------------------------------------------------------- /article/images/nfa-composition.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/nfa-composition.pdf -------------------------------------------------------------------------------- /article/images/nfa-composition.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/nfa-composition.png -------------------------------------------------------------------------------- /article/images/nfa-ne-all.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/nfa-ne-all.pdf -------------------------------------------------------------------------------- /article/images/nfa-ne-all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/nfa-ne-all.png -------------------------------------------------------------------------------- /article/images/nfa-ne.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/nfa-ne.pdf -------------------------------------------------------------------------------- /article/images/nfa-ne.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/nfa-ne.png -------------------------------------------------------------------------------- /article/images/nfa.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/nfa.pdf -------------------------------------------------------------------------------- /article/images/nfa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/nfa.png -------------------------------------------------------------------------------- /article/images/rope-append-sharing.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/rope-append-sharing.pdf -------------------------------------------------------------------------------- /article/images/rope-append-sharing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/rope-append-sharing.png -------------------------------------------------------------------------------- /article/images/rope-append.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/rope-append.pdf -------------------------------------------------------------------------------- /article/images/rope-append.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/rope-append.png -------------------------------------------------------------------------------- /article/images/rope-minmax.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/rope-minmax.pdf -------------------------------------------------------------------------------- /article/images/rope-minmax.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/rope-minmax.png -------------------------------------------------------------------------------- /article/images/rope-nfa.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/rope-nfa.pdf -------------------------------------------------------------------------------- /article/images/rope-nfa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/rope-nfa.png -------------------------------------------------------------------------------- /article/images/rope-ops.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/rope-ops.pdf -------------------------------------------------------------------------------- /article/images/rope-ops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/rope-ops.png -------------------------------------------------------------------------------- /article/images/split-as-concatenation.odg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/split-as-concatenation.odg -------------------------------------------------------------------------------- /article/images/split-as-concatenation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/split-as-concatenation.pdf -------------------------------------------------------------------------------- /article/images/split-as-concatenation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/split-as-concatenation.png -------------------------------------------------------------------------------- /article/images/split-sum-squares.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/split-sum-squares.pdf -------------------------------------------------------------------------------- /article/images/split-sum-squares.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/split-sum-squares.png -------------------------------------------------------------------------------- /article/images/tree-split-pred.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/tree-split-pred.pdf -------------------------------------------------------------------------------- /article/images/tree-split-pred.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/article/images/tree-split-pred.png -------------------------------------------------------------------------------- /article/themes/ire/static/ire.css_t: -------------------------------------------------------------------------------- 1 | /* 2 | * nature.css_t 3 | * ~~~~~~~~~~~~ 4 | * 5 | * Sphinx stylesheet -- nature theme. 6 | * 7 | * :copyright: Copyright 2007-2011 by the Sphinx team, see AUTHORS. 8 | * :license: BSD, see LICENSE for details. 9 | * 10 | */ 11 | 12 | @import url("basic.css"); 13 | 14 | /* -- page layout ----------------------------------------------------------- */ 15 | 16 | body { 17 | font-family: Baskerville, serif; 18 | font-size: 120%; 19 | background-color: #111; 20 | color: #555; 21 | margin: 0; 22 | padding: 0; 23 | } 24 | 25 | div.documentwrapper { 26 | float: left; 27 | width: 100%; 28 | } 29 | 30 | div.bodywrapper { 31 | margin: 0 0 0 {{ theme_sidebarwidth|toint }}px; 32 | } 33 | 34 | hr { 35 | border: 1px solid #B1B4B6; 36 | } 37 | 38 | div.document { 39 | background-color: #eee; 40 | } 41 | 42 | div.body { 43 | background-color: #ffffff; 44 | color: #3E4349; 45 | padding: 0 30px 30px 30px; 46 | width: 900px; 47 | font-size: 0.9em; 48 | } 49 | 50 | div.footer { 51 | color: #555; 52 | width: 100%; 53 | padding: 13px 0; 54 | text-align: center; 55 | font-size: 75%; 56 | } 57 | 58 | div.footer a { 59 | color: #444; 60 | text-decoration: underline; 61 | } 62 | 63 | div.related { 64 | background-color: #6BA81E; 65 | line-height: 32px; 66 | color: #fff; 67 | text-shadow: 0px 1px 0 #444; 68 | font-size: 0.9em; 69 | } 70 | 71 | div.related a { 72 | color: #E2F3CC; 73 | } 74 | 75 | div.sphinxsidebar { 76 | font-size: 0.75em; 77 | line-height: 1.5em; 78 | } 79 | 80 | div.sphinxsidebarwrapper{ 81 | padding: 20px 0; 82 | } 83 | 84 | div.sphinxsidebar h3, 85 | div.sphinxsidebar h4 { 86 | font-family: Arial, sans-serif; 87 | color: #222; 88 | font-size: 1.2em; 89 | font-weight: normal; 90 | margin: 0; 91 | padding: 5px 10px; 92 | background-color: #ddd; 93 | text-shadow: 1px 1px 0 white 94 | } 95 | 96 | div.sphinxsidebar h4{ 97 | font-size: 1.1em; 98 | } 99 | 100 | div.sphinxsidebar h3 a { 101 | color: #444; 102 | } 103 | 104 | 105 | div.sphinxsidebar p { 106 | color: #888; 107 | padding: 5px 20px; 108 | } 109 | 110 | div.sphinxsidebar p.topless { 111 | } 112 | 113 | div.sphinxsidebar ul { 114 | margin: 10px 10px; 115 | padding: 0; 116 | color: #000; 117 | } 118 | 119 | div.sphinxsidebar a { 120 | color: #444; 121 | } 122 | 123 | div.sphinxsidebar input { 124 | border: 1px solid #ccc; 125 | font-family: sans-serif; 126 | font-size: 1em; 127 | } 128 | 129 | div.sphinxsidebar input[type=text]{ 130 | margin-left: 20px; 131 | } 132 | 133 | /* -- body styles ----------------------------------------------------------- */ 134 | 135 | a { 136 | color: #005B81; 137 | text-decoration: none; 138 | } 139 | 140 | a:hover { 141 | color: #E32E00; 142 | text-decoration: underline; 143 | } 144 | 145 | div.body h1, 146 | div.body h2, 147 | div.body h3, 148 | div.body h4, 149 | div.body h5, 150 | div.body h6 { 151 | font-family: Gill Sans, Arial, sans-serif; 152 | background-color: #BED4EB; 153 | font-weight: normal; 154 | color: #212224; 155 | margin: 30px 0px 10px 0px; 156 | padding: 5px 0 5px 10px; 157 | text-shadow: 0px 1px 0 white 158 | } 159 | 160 | div.body h1 { border-top: 20px solid white; margin-top: 0; font-size: 200%; } 161 | div.body h2 { font-size: 150%; background-color: #C8D5E3; } 162 | div.body h3 { font-size: 120%; background-color: #D8DEE3; } 163 | div.body h4 { font-size: 110%; background-color: #D8DEE3; } 164 | div.body h5 { font-size: 100%; background-color: #D8DEE3; } 165 | div.body h6 { font-size: 100%; background-color: #D8DEE3; } 166 | 167 | a.headerlink { 168 | color: #c60f0f; 169 | font-size: 0.8em; 170 | padding: 0 4px 0 4px; 171 | text-decoration: none; 172 | } 173 | 174 | a.headerlink:hover { 175 | background-color: #c60f0f; 176 | color: white; 177 | } 178 | 179 | div.body p, div.body dd, div.body li { 180 | line-height: 1.5em; 181 | } 182 | 183 | div.admonition p.admonition-title + p { 184 | display: inline; 185 | } 186 | 187 | div.highlight{ 188 | background-color: white; 189 | } 190 | 191 | div.note { 192 | background-color: #eee; 193 | border: 1px solid #ccc; 194 | } 195 | 196 | div.seealso { 197 | background-color: #ffc; 198 | border: 1px solid #ff6; 199 | } 200 | 201 | div.topic { 202 | background-color: #eee; 203 | } 204 | 205 | div.warning { 206 | background-color: #ffe4e4; 207 | border: 1px solid #f66; 208 | } 209 | 210 | p.admonition-title { 211 | display: inline; 212 | } 213 | 214 | p.admonition-title:after { 215 | content: ":"; 216 | } 217 | 218 | pre { 219 | padding: 10px; 220 | background-color: White; 221 | color: #222; 222 | line-height: 1.2em; 223 | border: 1px solid #C6C9CB; 224 | font-size: 1.1em; 225 | margin: 1.5em 0 1.5em 0; 226 | -webkit-box-shadow: 1px 1px 1px #d8d8d8; 227 | -moz-box-shadow: 1px 1px 1px #d8d8d8; 228 | } 229 | 230 | tt { 231 | background-color: #ecf0f3; 232 | color: #222; 233 | /* padding: 1px 2px; */ 234 | font-size: 1.1em; 235 | font-family: monospace; 236 | } 237 | 238 | .viewcode-back { 239 | font-family: Arial, sans-serif; 240 | } 241 | 242 | div.viewcode-block:target { 243 | background-color: #f4debf; 244 | border-top: 1px solid #ac9; 245 | border-bottom: 1px solid #ac9; 246 | } 247 | -------------------------------------------------------------------------------- /article/themes/ire/theme.conf: -------------------------------------------------------------------------------- 1 | [theme] 2 | inherit = nature 3 | stylesheet = ire.css 4 | pygments_style = tango 5 | -------------------------------------------------------------------------------- /ire.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /lib/annotations.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/lib/annotations.jar -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | org.jkff 5 | ire 6 | jar 7 | 0.1 8 | ire 9 | http://maven.apache.org 10 | 11 | 12 | 13 | junit 14 | junit 15 | 4.7 16 | test 17 | 18 | 19 | 20 | org.jetbrains 21 | annotations 22 | 1.0.0 23 | system 24 | ${basedir}/lib/annotations.jar 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/Compiler.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire; 2 | 3 | /** 4 | * Created on: 22.07.2010 23:26:38 5 | */ 6 | public interface Compiler { 7 | PatternSet compile(String[] patterns); 8 | } 9 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/DFAIndexedString.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire; 2 | 3 | import org.jkff.ire.fa.TransferFunction; 4 | 5 | /** 6 | * Created on: 21.08.2010 21:03:13 7 | */ 8 | public interface DFAIndexedString extends IndexedString { 9 | TransferFunction getForward(); 10 | TransferFunction getBackward(); 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/DFAMatcher.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire; 2 | 3 | import org.jkff.ire.fa.BiDFA; 4 | import org.jkff.ire.fa.State; 5 | import org.jkff.ire.util.Function2; 6 | import org.jkff.ire.util.Pair; 7 | import org.jkff.ire.util.Predicate; 8 | 9 | import org.jkff.ire.util.WrappedBitSet; 10 | import java.util.List; 11 | 12 | import static org.jkff.ire.util.CollectionFactory.newArrayList; 13 | 14 | /** 15 | * Created on: 31.07.2010 12:19:28 16 | */ 17 | public class DFAMatcher { 18 | @SuppressWarnings("unchecked") 19 | public static 20 | Iterable getMatches( 21 | final BiDFA bidfa, final DFAIndexedString string) 22 | { 23 | final ST initial = bidfa.getForward().getInitialState(); 24 | 25 | Function2, IndexedString, SP> addString = new Function2, IndexedString, SP>() { 26 | public SP applyTo(SP sp, IndexedString s) { 27 | return new SP(((DFAIndexedString) s).getForward().next(sp.state), sp.pos+s.length()); 28 | } 29 | }; 30 | 31 | Function2, Character, SP> addChar = new Function2, Character, SP>() { 32 | public SP applyTo(SP sp, Character c) { 33 | return new SP(bidfa.getForward().transfer(c).next(sp.state), sp.pos+1); 34 | } 35 | }; 36 | 37 | List res = newArrayList(); 38 | 39 | int shift = 0; 40 | 41 | SP matchStartState = new SP(initial, 0); 42 | IndexedString rem = string; 43 | IndexedString seen = string.subSequence(0,0); 44 | 45 | while(true) { 46 | Pair p = rem.splitAfterRise( 47 | matchStartState, addString, addChar, DFAMatcher.hasForwardMatchAfter(shift)); 48 | if(p == null) 49 | break; 50 | 51 | DFAIndexedString matchingPrefix = (DFAIndexedString) p.first; 52 | rem = p.second; 53 | seen = seen.append(matchingPrefix); 54 | 55 | final ST stateAfterMatch = matchingPrefix.getForward().next(matchStartState.state); 56 | WrappedBitSet term = stateAfterMatch.getTerminatedPatterns(); 57 | 58 | ST backwardInitial = bidfa.getBackward().getInitialState(); 59 | 60 | ST nextMatchStart = stateAfterMatch; 61 | 62 | for(int bit = term.nextSetBit(0); bit >= 0; bit = term.nextSetBit(bit+1)) { 63 | final int bit2 = bit; 64 | 65 | Function2 addStringBack = new Function2() { 66 | public ST applyTo(ST st, IndexedString s) { 67 | return ((DFAIndexedString) s).getBackward().next(st); 68 | } 69 | }; 70 | 71 | Function2 addCharBack = new Function2() { 72 | public ST applyTo(ST st, Character c) { 73 | return bidfa.getBackward().transfer(c).next(st); 74 | } 75 | }; 76 | 77 | Predicate startsThisMatch = new Predicate() { 78 | public boolean isTrueFor(ST state) { 79 | WrappedBitSet tp = state.getTerminatedPatterns(); 80 | return tp!=null && tp.get(bit2); 81 | } 82 | }; 83 | 84 | int len = seen.splitAfterBackRise( 85 | backwardInitial, addStringBack, addCharBack, startsThisMatch).second.length(); 86 | int startPos = seen.length() - len; 87 | res.add(new Match(bit, startPos, len)); 88 | 89 | nextMatchStart = bidfa.getForward().resetTerminatedPattern(nextMatchStart, bit); 90 | } 91 | 92 | matchStartState = new SP(nextMatchStart, matchingPrefix.length() + 1); 93 | } 94 | 95 | return res; 96 | } 97 | 98 | private static Predicate> hasForwardMatchAfter(final int pos) { 99 | return new Predicate>() { 100 | public boolean isTrueFor(SP sp) { 101 | return !sp.state.getTerminatedPatterns().isEmpty() && sp.pos >= pos; 102 | } 103 | }; 104 | } 105 | 106 | // State and position. 107 | private static class SP { 108 | ST state; 109 | int pos; 110 | 111 | SP(ST state, int pos) { 112 | this.state = state; 113 | this.pos = pos; 114 | } 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/DFARopePatternSet.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire; 2 | 3 | import org.jkff.ire.fa.BiDFA; 4 | import org.jkff.ire.fa.PowerIntState; 5 | import org.jkff.ire.rope.RopeBasedIS; 6 | 7 | /** 8 | * Created on: 01.09.2010 23:44:51 9 | */ 10 | public class DFARopePatternSet implements PatternSet { 11 | private BiDFA bidfa; 12 | 13 | public DFARopePatternSet(BiDFA bidfa) { 14 | this.bidfa = bidfa; 15 | } 16 | 17 | public IndexedString match(String s) { 18 | return new RopeBasedIS(bidfa, s); 19 | } 20 | 21 | public IndexedString match(String s, int blockSize) { 22 | return new RopeBasedIS(bidfa, s, blockSize); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/IndexedString.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire; 2 | 3 | import org.jkff.ire.util.Function2; 4 | import org.jkff.ire.util.Pair; 5 | import org.jkff.ire.util.Predicate; 6 | import org.jetbrains.annotations.Nullable; 7 | 8 | /** 9 | * Created on: 22.07.2010 23:20:48 10 | */ 11 | public interface IndexedString extends CharSequence { 12 | Iterable getMatches(); 13 | 14 | Pair splitBefore(int index); 15 | 16 | @Nullable 17 | Pair splitAfterRise( 18 | ST seed, 19 | Function2 addChunk, Function2 addChar, 20 | Predicate toBool); 21 | 22 | /** 23 | * Like splitAfterRise, but we count from the right end. 24 | * @param addChunk will be given a NON-REVERSED chunk 25 | */ 26 | @Nullable 27 | Pair splitAfterBackRise( 28 | T seed, 29 | Function2 addChunk, Function2 addChar, 30 | Predicate toBool); 31 | 32 | IndexedString append(IndexedString s); 33 | 34 | IndexedString subSequence(int start, int end); 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/LinearIS.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire; 2 | 3 | import org.jkff.ire.fa.BiDFA; 4 | import org.jkff.ire.fa.DFA; 5 | import org.jkff.ire.fa.TransferFunction; 6 | import org.jkff.ire.fa.State; 7 | import org.jkff.ire.util.Pair; 8 | import org.jkff.ire.util.Reducer; 9 | import org.jkff.ire.util.Function2; 10 | import org.jkff.ire.util.Predicate; 11 | 12 | /** 13 | * Created on: 23.07.2010 9:23:42 14 | */ 15 | public class LinearIS implements DFAIndexedString { 16 | private CharSequence cs; 17 | private BiDFA bidfa; 18 | private TransferFunction forward; 19 | private TransferFunction backward; 20 | 21 | public LinearIS(CharSequence cs, BiDFA bidfa) { 22 | this(cs, bidfa, transferForward(bidfa, cs), transferBackward(bidfa, cs)); 23 | } 24 | 25 | private LinearIS(CharSequence cs, 26 | BiDFA bidfa, 27 | TransferFunction forward, TransferFunction backward) 28 | { 29 | this.cs = cs; 30 | this.bidfa = bidfa; 31 | this.forward = forward; 32 | this.backward = backward; 33 | } 34 | 35 | public TransferFunction getForward() { 36 | return forward; 37 | } 38 | 39 | public TransferFunction getBackward() { 40 | return backward; 41 | } 42 | 43 | public Iterable getMatches() { 44 | return DFAMatcher.getMatches(bidfa, this); 45 | } 46 | 47 | public int length() { 48 | return cs.length(); 49 | } 50 | 51 | public char charAt(int index) { 52 | return cs.charAt(index); 53 | } 54 | 55 | public String toString() { 56 | return cs.toString(); 57 | } 58 | 59 | public LinearIS subSequence(int start, int end) { 60 | return new LinearIS(cs.subSequence(start, end), bidfa); 61 | } 62 | 63 | public Pair splitBefore(int index) { 64 | return Pair.of( 65 | (IndexedString)new LinearIS(cs.subSequence(0, index), bidfa), 66 | (IndexedString)new LinearIS(cs.subSequence(index, cs.length()), bidfa)); 67 | } 68 | 69 | public Pair splitAfterRise( 70 | T seed, 71 | Function2 addChunk, 72 | Function2 addChar, Predicate toBool) 73 | { 74 | T t = seed; 75 | for(int i = 0; i < length(); ++i) { 76 | if(toBool.isTrueFor(t)) 77 | return splitBefore(i); 78 | t = addChar.applyTo(t, this.charAt(i)); 79 | } 80 | if(toBool.isTrueFor(t)) 81 | return splitBefore(length()); 82 | return null; 83 | } 84 | 85 | public Pair splitAfterBackRise( 86 | T seed, 87 | Function2 addChunk, Function2 addChar, 88 | Predicate toBool) 89 | { 90 | T t = seed; 91 | for(int i = length()-1; i >= 0; --i) { 92 | if(toBool.isTrueFor(t)) 93 | return splitBefore(i+1); 94 | t = addChar.applyTo(t, this.charAt(i)); 95 | } 96 | return null; 97 | } 98 | 99 | public IndexedString append(IndexedString other) { 100 | return new LinearIS(cs.toString() + other.toString(), bidfa); 101 | } 102 | 103 | private static TransferFunction transferForward( 104 | BiDFA bidfa, CharSequence cs) 105 | { 106 | DFA dfa = bidfa.getForward(); 107 | Reducer> reducer = dfa.getTransferFunctionsReducer(); 108 | TransferFunction res = null; 109 | for(int i = 0; i < cs.length(); ++i) { 110 | res = reducer.compose(res, dfa.transfer(cs.charAt(i))); 111 | } 112 | return res; 113 | } 114 | 115 | private static TransferFunction transferBackward( 116 | BiDFA bidfa, CharSequence cs) { 117 | DFA dfa = bidfa.getBackward(); 118 | Reducer> reducer = dfa.getTransferFunctionsReducer(); 119 | TransferFunction res = null; 120 | for(int i = cs.length() - 1; i >= 0; --i) { 121 | res = reducer.compose(res, dfa.transfer(cs.charAt(i))); 122 | } 123 | return res; 124 | } 125 | 126 | private static TransferFunction identity() { 127 | return new TransferFunction() { 128 | public T next(T x) { 129 | return x; 130 | } 131 | }; 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/Match.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire; 2 | 3 | /** 4 | * Created on: 22.07.2010 23:25:29 5 | */ 6 | public class Match { 7 | private int whichPattern; 8 | private int startPos; 9 | private int length; 10 | 11 | public Match(int whichPattern, int startPos, int length) { 12 | this.whichPattern = whichPattern; 13 | this.startPos = startPos; 14 | this.length = length; 15 | } 16 | 17 | public int whichPattern() { 18 | return whichPattern; 19 | } 20 | 21 | public int startPos() { 22 | return startPos; 23 | } 24 | 25 | public int length() { 26 | return length; 27 | } 28 | 29 | public String toString() { 30 | return "" + whichPattern + "@("+startPos+","+length+")"; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/PatternSet.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire; 2 | 3 | /** 4 | * Created on: 22.07.2010 23:24:31 5 | */ 6 | public interface PatternSet { 7 | IndexedString match(String s); 8 | 9 | IndexedString match(String s, int blockSize); 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/fa/BiDFA.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.fa; 2 | 3 | /** 4 | * Created on: 25.07.2010 13:34:11 5 | */ 6 | public class BiDFA { 7 | private DFA forward; 8 | private DFA backward; 9 | 10 | public BiDFA(DFA forward, DFA backward) { 11 | this.forward = forward; 12 | this.backward = backward; 13 | } 14 | 15 | public DFA getForward() { 16 | return forward; 17 | } 18 | 19 | public DFA getBackward() { 20 | return backward; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/fa/DFA.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.fa; 2 | 3 | import org.jkff.ire.util.Reducer; 4 | 5 | /** 6 | * Created on: 22.07.2010 23:54:27 7 | */ 8 | public abstract class DFA { 9 | private TransferTable transfer; 10 | private S initialState; 11 | private Reducer> transferFunctionsReducer; 12 | 13 | public DFA(TransferTable transfer, S initialState, 14 | Reducer> transferFunctionsReducer) 15 | { 16 | this.transfer = transfer; 17 | this.initialState = initialState; 18 | this.transferFunctionsReducer = transferFunctionsReducer; 19 | } 20 | 21 | public S getInitialState() { 22 | return initialState; 23 | } 24 | 25 | public TransferFunction transfer(C token) { 26 | return transfer.forToken(token); 27 | } 28 | 29 | public Reducer> getTransferFunctionsReducer() { 30 | return transferFunctionsReducer; 31 | } 32 | 33 | public abstract S resetTerminatedPattern(S state, int pattern); 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/fa/IntState.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.fa; 2 | 3 | import org.jkff.ire.util.WrappedBitSet; 4 | 5 | /** 6 | * Created on: 31.07.2010 15:16:46 7 | */ 8 | public class IntState implements State { 9 | private int index; 10 | private WrappedBitSet terminatedPatterns; 11 | 12 | public IntState(int index, WrappedBitSet terminatedPatterns) { 13 | this.index = index; 14 | this.terminatedPatterns = terminatedPatterns; 15 | } 16 | 17 | public int getIndex() { 18 | return index; 19 | } 20 | 21 | public WrappedBitSet getTerminatedPatterns() { 22 | return terminatedPatterns; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/fa/IntTable.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.fa; 2 | 3 | import org.jkff.ire.util.Reducer; 4 | 5 | /** 6 | * Created on: 22.07.2010 23:49:39 7 | */ 8 | public class IntTable implements TransferFunction { 9 | private IntState[] states; 10 | private int[] table; 11 | 12 | public IntTable(IntState[] states, int[] table) { 13 | this.states = states; 14 | this.table = table; 15 | } 16 | 17 | public static Reducer> REDUCER = new Reducer>() { 18 | public TransferFunction compose( 19 | TransferFunction a, TransferFunction b) 20 | { 21 | if(a == null) 22 | return b; 23 | if(b == null) 24 | return a; 25 | return ((IntTable)a).followedBy((IntTable)b); 26 | } 27 | 28 | public TransferFunction composeAll(Sequence> ts) { 29 | TransferFunction res = ts.get(0); 30 | for(int i = 1; i < ts.length(); ++i) { 31 | res = compose(res, ts.get(i)); 32 | } 33 | return res; 34 | } 35 | }; 36 | 37 | private IntTable followedBy(IntTable other) { 38 | int[] res = new int[table.length]; 39 | for(int i = 0; i < res.length; ++i) { 40 | res[i] = other.table[this.table[i]]; 41 | } 42 | return new IntTable(states, res); 43 | } 44 | 45 | public IntState next(IntState x) { 46 | return states[table[x.getIndex()]]; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/fa/MutableTransferFunction.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.fa; 2 | 3 | /** 4 | * Created on: 09.09.2010 1:08:03 5 | */ 6 | public interface MutableTransferFunction { 7 | void followInPlaceBy(TransferFunction other); 8 | } 9 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/fa/PowerIntState.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.fa; 2 | 3 | import org.jkff.ire.util.WrappedBitSet; 4 | 5 | /** 6 | * Created on: 01.08.2010 13:20:58 7 | */ 8 | public class PowerIntState implements State { 9 | private State[] basis; 10 | private WrappedBitSet subset; 11 | 12 | public PowerIntState(State[] basis, WrappedBitSet subset) { 13 | this.basis = basis; 14 | this.subset = subset; 15 | } 16 | 17 | public State[] getBasis() { 18 | return basis; 19 | } 20 | 21 | public WrappedBitSet getSubset() { 22 | return subset; 23 | } 24 | 25 | public WrappedBitSet getTerminatedPatterns() { 26 | WrappedBitSet res = null; 27 | for(int bit = subset.nextSetBit(0); bit >= 0; bit = subset.nextSetBit(bit+1)) { 28 | if(res == null) 29 | res = basis[bit].getTerminatedPatterns().makeCopy(); 30 | else 31 | res.or(basis[bit].getTerminatedPatterns()); 32 | } 33 | return res; 34 | } 35 | 36 | public String toString() { 37 | return subset.toString(); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/fa/PowerIntTable.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.fa; 2 | 3 | import org.jkff.ire.util.Reducer; 4 | import org.jkff.ire.util.WrappedBitSet; 5 | 6 | import java.util.Arrays; 7 | 8 | /** 9 | * Created on: 01.08.2010 13:23:02 10 | */ 11 | public class PowerIntTable implements TransferFunction { 12 | private final int numStates; 13 | private final int blockSize; 14 | private final long[] words; // numStates blocks of ceil(numStates/64) longs 15 | 16 | public PowerIntTable(WrappedBitSet[] state2next) { 17 | this.numStates = state2next.length; 18 | this.blockSize = (63+numStates) / 64; 19 | this.words = new long[numStates * blockSize]; 20 | for(int s = 0; s < numStates; ++s) { 21 | new WrappedBitSet(words, s*blockSize, blockSize, numStates).or(state2next[s]); 22 | } 23 | } 24 | 25 | private PowerIntTable(int numStates, long[] words) { 26 | this.numStates = numStates; 27 | this.blockSize = (63+numStates) / 64; 28 | this.words = words; 29 | } 30 | 31 | public static Reducer> REDUCER = new Reducer>() { 32 | public TransferFunction compose( 33 | TransferFunction a, TransferFunction b) 34 | { 35 | if(a == null) 36 | return b; 37 | if(b == null) 38 | return a; 39 | return ((PowerIntTable)a).followedBy((PowerIntTable) b); 40 | } 41 | 42 | public TransferFunction composeAll(Sequence> ts) { 43 | return PowerIntTable.composeAll(ts); 44 | } 45 | }; 46 | 47 | public PowerIntTable followedBy(PowerIntTable other) { 48 | long[] words = new long[this.words.length]; 49 | long[] theirWords = other.words; 50 | for(int state = 0; state < numStates; ++state) { 51 | int ourOffset = state * blockSize; 52 | int bit = WrappedBitSet.nextSetBit(this.words, ourOffset, blockSize, 0); 53 | while (bit >= 0) { 54 | for (int i = 0; i < blockSize; ++i) { 55 | words[ourOffset + i] |= theirWords[bit*blockSize + i]; 56 | } 57 | bit = WrappedBitSet.nextSetBit(this.words, ourOffset, blockSize, bit + 1); 58 | } 59 | } 60 | return new PowerIntTable(numStates, words); 61 | } 62 | 63 | private static String toString(long[] ws) { 64 | StringBuilder sb = new StringBuilder(); 65 | for(long w : ws) sb.append(w).append(" "); 66 | return sb.toString(); 67 | } 68 | 69 | public PowerIntState next(PowerIntState st) { 70 | WrappedBitSet s = st.getSubset(); 71 | WrappedBitSet res = new WrappedBitSet(s.numBits()); 72 | for(int bit = s.nextSetBit(0); bit >= 0; bit = s.nextSetBit(bit+1)) { 73 | res.or(new WrappedBitSet(words, bit*blockSize, blockSize, numStates)); 74 | } 75 | return new PowerIntState(st.getBasis(), res); 76 | } 77 | 78 | public static TransferFunction composeAll(Sequence> fs) { 79 | PowerIntTable first = (PowerIntTable) fs.get(0); 80 | int numWords = first.words.length; 81 | long[] curWords = Arrays.copyOf(first.words, numWords); 82 | long[] newWords = new long[numWords]; 83 | int numStates = first.numStates; 84 | int blockSize = first.blockSize; 85 | 86 | for (int iF = 1; iF < fs.length(); iF++) { 87 | for(int j = 0; j < numWords; ++j) { 88 | newWords[j] = 0L; 89 | } 90 | long[] nextWords = ((PowerIntTable) fs.get(iF)).words; 91 | for (int state = 0; state < numStates; ++state) { 92 | int ourOffset = state * blockSize; 93 | int bit = WrappedBitSet.nextSetBit(curWords, ourOffset, blockSize, 0); 94 | while (bit >= 0) { 95 | for (int i = 0; i < blockSize; ++i) { 96 | newWords[ourOffset + i] |= nextWords[bit*blockSize + i]; 97 | } 98 | bit = WrappedBitSet.nextSetBit(curWords, ourOffset, blockSize, bit + 1); 99 | } 100 | } 101 | long[] tmp = curWords; 102 | curWords = newWords; 103 | newWords = tmp; 104 | } 105 | 106 | return new PowerIntTable(numStates, curWords); 107 | } 108 | 109 | public String toString() { 110 | StringBuilder sb = new StringBuilder(); 111 | for(int state = 0; state < numStates; ++state) { 112 | int offset = state * blockSize; 113 | sb.append(state).append(" -> ") 114 | .append(new WrappedBitSet(words, offset, blockSize, numStates).toString()) 115 | .append("; "); 116 | } 117 | return sb.toString(); 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/fa/Sequence.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.fa; 2 | 3 | /** 4 | * Created on: 09.09.2010 2:13:48 5 | */ 6 | public interface Sequence { 7 | int length(); 8 | T get(int i); 9 | } 10 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/fa/State.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.fa; 2 | 3 | import org.jkff.ire.util.WrappedBitSet; 4 | 5 | /** 6 | * Created on: 31.07.2010 14:57:34 7 | */ 8 | public interface State { 9 | WrappedBitSet getTerminatedPatterns(); 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/fa/TransferFunction.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.fa; 2 | 3 | /** 4 | * Created on: 22.07.2010 23:48:22 5 | */ 6 | public interface TransferFunction { 7 | T next(T t); 8 | } 9 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/fa/TransferTable.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.fa; 2 | 3 | /** 4 | * Created on: 31.07.2010 15:18:23 5 | */ 6 | public interface TransferTable { 7 | TransferFunction forToken(C token); 8 | } 9 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/regex/Alternative.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.regex; 2 | 3 | /** 4 | * Created on: 01.09.2010 23:42:24 5 | */ 6 | public class Alternative implements RxNode { 7 | public final RxNode a; 8 | public final RxNode b; 9 | 10 | public Alternative(RxNode a, RxNode b) { 11 | this.a = a; 12 | this.b = b; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/regex/CharacterClass.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.regex; 2 | 3 | /** 4 | * Created on: 01.09.2010 23:47:14 5 | */ 6 | public abstract class CharacterClass implements RxNode { 7 | public abstract boolean acceptsChar(char c); 8 | 9 | public static CharacterClass ANY_CHAR = new CharacterClass() { 10 | @Override 11 | public boolean acceptsChar(char c) { 12 | return true; 13 | } 14 | 15 | @Override 16 | public boolean intersects(CharacterClass c) { 17 | return true; 18 | } 19 | 20 | public String toString() { 21 | return "."; 22 | } 23 | }; 24 | 25 | public static CharacterClass oneOf(final String s) { 26 | return new OneOf(s); 27 | } 28 | 29 | public abstract boolean intersects(CharacterClass c); 30 | 31 | private static class OneOf extends CharacterClass { 32 | private String s; 33 | 34 | public OneOf(String s) { 35 | this.s = s; 36 | } 37 | 38 | @Override 39 | public boolean acceptsChar(char c) { 40 | return s.indexOf(c) > -1; 41 | } 42 | 43 | @Override 44 | public boolean intersects(CharacterClass c) { 45 | if(c instanceof OneOf) { 46 | OneOf other = (OneOf) c; 47 | for(int i = 0; i < s.length(); ++i) { 48 | char ch = s.charAt(i); 49 | if(other.s.indexOf(ch) != -1) { 50 | return true; 51 | } 52 | } 53 | return false; 54 | } else if(c == ANY_CHAR) { 55 | return true; 56 | } else { 57 | throw new UnsupportedOperationException(); 58 | } 59 | } 60 | 61 | public String toString() { 62 | return "[" + s + "]"; 63 | } 64 | 65 | public boolean equals(Object other) { 66 | if(other == this) return true; 67 | if(other == null) return false; 68 | if(!(other instanceof OneOf)) return false; 69 | return s.equals(((OneOf)other).s); 70 | } 71 | 72 | public int hashCode() { 73 | return s.hashCode(); 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/regex/Empty.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.regex; 2 | 3 | /** 4 | * Created on: 01.09.2010 23:41:03 5 | */ 6 | public class Empty implements RxNode { 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/regex/Labeled.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.regex; 2 | 3 | /** 4 | * Created on: 04.09.2010 12:14:35 5 | */ 6 | public class Labeled implements RxNode { 7 | public final RxNode a; 8 | public final int patternId; 9 | 10 | public Labeled(RxNode a, int patternId) { 11 | this.a = a; 12 | this.patternId = patternId; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/regex/OnceOrMore.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.regex; 2 | 3 | /** 4 | * Created on: 01.09.2010 23:41:53 5 | */ 6 | public class OnceOrMore implements RxNode { 7 | public final RxNode a; 8 | 9 | public OnceOrMore(RxNode a) { 10 | this.a = a; 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/regex/RegexCompiler.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.regex; 2 | 3 | import org.jkff.ire.DFARopePatternSet; 4 | import org.jkff.ire.PatternSet; 5 | import org.jkff.ire.fa.*; 6 | import org.jkff.ire.util.CoarsestPartition; 7 | import org.jkff.ire.util.WrappedBitSet; 8 | import org.jkff.ire.util.Pair; 9 | 10 | import java.util.*; 11 | import java.util.concurrent.atomic.AtomicInteger; 12 | 13 | import static org.jkff.ire.util.CollectionFactory.*; 14 | 15 | /** 16 | * Created on: 01.09.2010 23:43:07 17 | */ 18 | public class RegexCompiler { 19 | public static PatternSet compile(List roots) { 20 | return new DFARopePatternSet(compileToBiDFA(roots)); 21 | } 22 | 23 | public static PatternSet compile(String... regexes) { 24 | List roots = newArrayList(); 25 | for(String regex : regexes) { 26 | roots.add(RegexParser.parse(regex)); 27 | } 28 | return compile(roots); 29 | } 30 | 31 | static BiDFA compileToBiDFA(List roots) { 32 | List rootsAnywhere = newArrayList(); 33 | List reversedRoots = newArrayList(); 34 | for(RxNode root : roots) { 35 | Alternative dotStar = new Alternative(new Empty(), new OnceOrMore(CharacterClass.ANY_CHAR)); 36 | rootsAnywhere.add(new Sequence(dotStar, new Sequence(root, dotStar))); 37 | reversedRoots.add(new Sequence(reverse(root), dotStar)); 38 | } 39 | return new BiDFA(compileToDFA(rootsAnywhere), compileToDFA(reversedRoots)); 40 | } 41 | 42 | static DFA compileToDFA(List rxNodes) { 43 | if(rxNodes.isEmpty()) { 44 | throw new IllegalArgumentException("Pattern list can't be empty"); 45 | } 46 | List labeled = newArrayList(); 47 | for(int i = 0; i < rxNodes.size(); ++i) { 48 | labeled.add(new Labeled(rxNodes.get(i), i)); 49 | } 50 | RxNode alt = labeled.get(0); 51 | for(int i = 1; i < rxNodes.size(); ++i) { 52 | alt = new Alternative(alt, labeled.get(i)); 53 | } 54 | 55 | return toDFA(reduceNFA(toNFA(alt)), rxNodes.size()); 56 | } 57 | 58 | static DFA toDFA(NFA nfa, int numPatterns) { 59 | Pair, NFA.Node> opt = optimize(nfa); 60 | 61 | Set allNodes = opt.first; 62 | NFA.Node newInitial = opt.second; 63 | 64 | final int numStates = allNodes.size(); 65 | 66 | final Map node2id = newLinkedHashMap(); 67 | final NFA.Node[] id2node = allNodes.toArray(new NFA.Node[allNodes.size()]); 68 | 69 | for(int i = 0; i < id2node.length; ++i) { 70 | node2id.put(id2node[i], i); 71 | } 72 | 73 | final State[] basis = new State[numStates]; 74 | for(int i = 0; i < numStates; ++i) { 75 | WrappedBitSet terminatedPatterns = new WrappedBitSet(numPatterns); 76 | for(int pat : id2node[i].patternIds) { 77 | terminatedPatterns.set(pat); 78 | } 79 | basis[i] = new IntState(i, terminatedPatterns); 80 | } 81 | 82 | TransferTable transfer = new TransferTable() { 83 | private TransferFunction[] transfer = new TransferFunction[Character.MAX_VALUE+1]; 84 | 85 | public TransferFunction forToken(Character token) { 86 | char t = token; 87 | TransferFunction f = transfer[t]; 88 | if(f == null) { 89 | transfer[t] = f = computeTransferFor(t); 90 | } 91 | return f; 92 | } 93 | 94 | private TransferFunction computeTransferFor(char token) { 95 | WrappedBitSet[] state2next = new WrappedBitSet[numStates]; 96 | for(int i = 0; i < numStates; ++i) { 97 | WrappedBitSet res = new WrappedBitSet(numStates); 98 | NFA.Node node = id2node[i]; 99 | for (Pair out : node.out) { 100 | if(out.first.acceptsChar(token)) { 101 | res.set(node2id.get(out.second)); 102 | } 103 | } 104 | state2next[i] = res; 105 | } 106 | return new PowerIntTable(state2next); 107 | } 108 | }; 109 | 110 | final WrappedBitSet justInitial = new WrappedBitSet(numStates); 111 | justInitial.set(node2id.get(newInitial)); 112 | PowerIntState initial = new PowerIntState(basis, justInitial); 113 | 114 | // StringBuilder dot = new StringBuilder(); 115 | // dot.append("digraph g {\n"); 116 | // for(int i = 0; i < numStates; ++i) { 117 | // WrappedBitSet justThis = new WrappedBitSet(numStates); 118 | // justThis.set(i); 119 | // PowerIntState state = new PowerIntState(basis, justThis); 120 | // dot.append(i + " [shape=" + (state.getTerminatedPatterns().isEmpty() ? "circle" : "square") + "]\n"); 121 | // } 122 | // for(int i = 0; i < numStates; ++i) { 123 | // WrappedBitSet justThis = new WrappedBitSet(numStates); 124 | // justThis.set(i); 125 | // PowerIntState state = new PowerIntState(basis, justThis); 126 | // PowerIntState nextState = transfer.forToken('t').next(state); 127 | // WrappedBitSet next = nextState.getSubset(); 128 | // for(int bit = next.nextSetBit(0); bit != -1; bit = next.nextSetBit(bit+1)) { 129 | // dot.append(i + " -> " + bit + "\n"); 130 | // } 131 | // } 132 | // dot.append("}\n"); 133 | // System.out.println(dot); 134 | 135 | return new DFA(transfer, initial, PowerIntTable.REDUCER) { 136 | @Override 137 | public PowerIntState resetTerminatedPattern(PowerIntState state, int pattern) { 138 | WrappedBitSet reset = new WrappedBitSet(basis.length); 139 | reset.or(state.getSubset()); 140 | for(int substate = reset.nextSetBit(0); substate != -1; substate = reset.nextSetBit(substate + 1)) { 141 | if(basis[substate].getTerminatedPatterns().get(pattern)) { 142 | reset.clear(substate); 143 | } 144 | } 145 | reset.or(justInitial); 146 | return new PowerIntState(basis, reset); 147 | } 148 | }; 149 | } 150 | 151 | private static Pair, NFA.Node> optimize(NFA nfa) { 152 | Pair, NFA.Node> eClosure = computeEClosure(nfa); 153 | Pair, NFA.Node> groupedLeft = groupEquivalentStates(eClosure, true); 154 | Pair, NFA.Node> groupedRight = groupEquivalentStates(groupedLeft, false); 155 | 156 | return groupedRight; 157 | } 158 | 159 | private static Pair, NFA.Node> groupEquivalentStates( 160 | Pair, NFA.Node> nfa, boolean leftNotRight) 161 | { 162 | // See paper "On NFA reductions". 163 | Set nodes = nfa.first; 164 | NFA.Node initial = nfa.second; 165 | 166 | // Nodes terminating different patterns are different. 167 | Map, Integer> patIds2block = newLinkedHashMap(); 168 | Map node2block = newLinkedHashMap(); 169 | for(NFA.Node node : nodes) { 170 | Integer block = patIds2block.get(node.patternIds); 171 | if(block == null) { 172 | patIds2block.put(node.patternIds, block = patIds2block.size()); 173 | } 174 | node2block.put(node, block); 175 | } 176 | 177 | NFA.Node[] id2node = nodes.toArray(new NFA.Node[nodes.size()]); 178 | Map node2id = newLinkedHashMap(); 179 | for(int i = 0; i < id2node.length; ++i) { 180 | node2id.put(id2node[i], i); 181 | } 182 | 183 | int[] p = new int[id2node.length]; 184 | for(int i = 0; i < id2node.length; ++i) { 185 | p[i] = node2block.get(id2node[i]); 186 | } 187 | 188 | // Instead of iterating over the whole unicode alphabet, 189 | // let us iterate over the distinct labels of the automaton. 190 | Set alphabet = newLinkedHashSet(); 191 | for(NFA.Node node : nodes) { 192 | for (Pair out : node.out) { 193 | alphabet.add(out.first); 194 | } 195 | } 196 | 197 | boolean anythingChanged; 198 | do { 199 | anythingChanged = false; 200 | for(CharacterClass c : alphabet) { 201 | List edges = newArrayList(); 202 | for(int i = 0; i < id2node.length; ++i) { 203 | NFA.Node node = id2node[i]; 204 | for (Pair out : node.out) { 205 | // When splitting by a particular label, say, [agc], 206 | // we should take into account all edges that might 207 | // be triggered by any of the characters accepted 208 | // by this label. For example, a "." edge should be used. 209 | if(out.first.intersects(c)) { 210 | int j = node2id.get(out.second); 211 | edges.add(leftNotRight ? new int[] {i,j} : new int[] {j, i}); 212 | } 213 | } 214 | } 215 | int[] newP = CoarsestPartition.coarsestStablePartition(p, edges.toArray(new int[edges.size()][])); 216 | if(!Arrays.equals(p, newP)) { 217 | anythingChanged = true; 218 | p = newP; 219 | } 220 | } 221 | } while(anythingChanged); 222 | 223 | // Group nodes of the nfa according to 'p'. 224 | Map block2newNode = newLinkedHashMap(); 225 | Map> block2oldNodeIds = newLinkedHashMap(); 226 | for (int i = 0; i < p.length; i++) { 227 | int b = p[i]; 228 | if (!block2newNode.containsKey(b)) { 229 | NFA.Node newNode = new NFA.Node(); 230 | block2newNode.put(b, newNode); 231 | block2oldNodeIds.put(b, new ArrayList()); 232 | } 233 | block2oldNodeIds.get(b).add(i); 234 | } 235 | for(int b : block2newNode.keySet()) { 236 | NFA.Node newNode = block2newNode.get(b); 237 | for(int oldNodeId : block2oldNodeIds.get(b)) { 238 | NFA.Node oldNode = id2node[oldNodeId]; 239 | newNode.patternIds.addAll(oldNode.patternIds); 240 | for (Pair out : oldNode.out) { 241 | CharacterClass cc = out.first; 242 | NFA.Node dest = out.second; 243 | NFA.Node newDest = block2newNode.get(p[node2id.get(dest)]); 244 | Pair edge = Pair.of(cc, newDest); 245 | if(!newNode.out.contains(edge)) 246 | newNode.out.add(edge); 247 | } 248 | } 249 | } 250 | NFA.Node newInitial = block2newNode.get(p[node2id.get(initial)]); 251 | Set newNodes = new HashSet(block2newNode.values()); 252 | return Pair.of(newNodes, newInitial); 253 | } 254 | 255 | private static Pair, NFA.Node> computeEClosure(NFA nfa) { 256 | final Map> node2closure = newLinkedHashMap(); 257 | 258 | Set allOldNodes = dfs(nfa.begin, true); 259 | for(NFA.Node node : allOldNodes) { 260 | node2closure.put(node, dfs(node, false)); 261 | } 262 | 263 | final Map> newNode2contents = newLinkedHashMap(); 264 | final Map, NFA.Node> contents2newNode = newLinkedHashMap(); 265 | Set newNodesToVisit = newLinkedHashSet(); 266 | Set initialEC = node2closure.get(nfa.begin); 267 | NFA.Node newInitial = new NFA.Node(); 268 | for(NFA.Node subNode : initialEC) { 269 | newInitial.patternIds.addAll(subNode.patternIds); 270 | } 271 | newNodesToVisit.add(newInitial); 272 | newNode2contents.put(newInitial, initialEC); 273 | contents2newNode.put(initialEC, newInitial); 274 | while(!newNodesToVisit.isEmpty()) { 275 | NFA.Node newNode = newNodesToVisit.iterator().next(); 276 | newNodesToVisit.remove(newNode); 277 | Map> class2dest = newLinkedHashMap(); 278 | for(NFA.Node subNode : newNode2contents.get(newNode)) { 279 | for(Pair out : subNode.out) { 280 | if(out.first == null) { 281 | // Skip epsilon transitions: we're operating on epsilon closures 282 | continue; 283 | } 284 | Set dest = class2dest.get(out.first); 285 | if(dest == null) { 286 | class2dest.put(out.first, dest = newLinkedHashSet()); 287 | } 288 | dest.addAll(node2closure.get(out.second)); 289 | } 290 | } 291 | for(CharacterClass cc : class2dest.keySet()) { 292 | Set dest = class2dest.get(cc); 293 | NFA.Node newDest = contents2newNode.get(dest); 294 | if(newDest == null) { 295 | newDest = new NFA.Node(); 296 | for(NFA.Node subNode : dest) { 297 | newDest.patternIds.addAll(subNode.patternIds); 298 | } 299 | newNode2contents.put(newDest, dest); 300 | contents2newNode.put(dest, newDest); 301 | newNodesToVisit.add(newDest); 302 | } 303 | newNode.transition(cc, newDest); 304 | } 305 | } 306 | 307 | return Pair.of(newNode2contents.keySet(), newInitial); 308 | } 309 | 310 | static Set dfs(NFA.Node origin, boolean acceptNonEps) { 311 | Set res = newLinkedHashSet(); 312 | Stack toVisit = new Stack(); 313 | toVisit.add(origin); 314 | while(!toVisit.isEmpty()) { 315 | NFA.Node node = toVisit.pop(); 316 | if(!res.add(node)) 317 | continue; 318 | for (Pair out : node.out) { 319 | if(out.first == null || acceptNonEps) { 320 | toVisit.push(out.second); 321 | } 322 | } 323 | } 324 | return res; 325 | } 326 | 327 | static NFA reduceNFA(NFA nfa) { 328 | // 329 | return nfa; 330 | // TODO 331 | } 332 | 333 | static NFA toNFA(RxNode rxNode) { 334 | if(rxNode instanceof Alternative) { 335 | Alternative x = (Alternative) rxNode; 336 | NFA res = new NFA(new NFA.Node(), new NFA.Node()); 337 | NFA a = toNFA(x.a), b = toNFA(x.b); 338 | res.begin.transition(null, a.begin); 339 | res.begin.transition(null, b.begin); 340 | a.end.transition(null, res.end); 341 | b.end.transition(null, res.end); 342 | return res; 343 | } else if(rxNode instanceof CharacterClass) { 344 | NFA res = new NFA(new NFA.Node(), new NFA.Node()); 345 | res.begin.transition((CharacterClass) rxNode, res.end); 346 | return res; 347 | } else if(rxNode instanceof Empty) { 348 | NFA res = new NFA(new NFA.Node(), new NFA.Node()); 349 | res.begin.transition(null, res.end); 350 | return res; 351 | } else if(rxNode instanceof OnceOrMore) { 352 | OnceOrMore x = (OnceOrMore) rxNode; 353 | NFA res = toNFA(x.a); 354 | res.end.transition(null, res.begin); 355 | return res; 356 | } else if(rxNode instanceof Sequence) { 357 | Sequence x = (Sequence) rxNode; 358 | NFA a = toNFA(x.a), b = toNFA(x.b); 359 | NFA res = new NFA(a.begin, b.end); 360 | a.end.transition(null, b.begin); 361 | return res; 362 | } else if(rxNode instanceof Labeled) { 363 | Labeled x = (Labeled) rxNode; 364 | NFA a = toNFA(x.a); 365 | a.end.patternIds.add(x.patternId); 366 | return a; 367 | } else { 368 | throw new UnsupportedOperationException("Unsupported node type " + rxNode.getClass()); 369 | } 370 | } 371 | 372 | static class NFA { 373 | final Node begin, end; 374 | 375 | public NFA(Node begin, Node end) { 376 | this.begin = begin; 377 | this.end = end; 378 | } 379 | 380 | static class Node { 381 | static AtomicInteger nextId = new AtomicInteger(0); 382 | 383 | final List> out = newArrayList(); 384 | final Set patternIds = newLinkedHashSet(); 385 | final int id = nextId.incrementAndGet(); 386 | 387 | void transition(CharacterClass cc, Node dest) { 388 | out.add(Pair.of(cc, dest)); 389 | } 390 | 391 | public String toString() { 392 | return ""+id; 393 | } 394 | 395 | public boolean equals(Object o) { 396 | return id == ((Node) o).id; 397 | } 398 | public int hashCode() { 399 | return id; 400 | } 401 | } 402 | } 403 | 404 | private static RxNode reverse(RxNode rxNode) { 405 | if(rxNode instanceof Alternative) { 406 | Alternative x = (Alternative) rxNode; 407 | return new Alternative(reverse(x.a), reverse(x.b)); 408 | } else if(rxNode instanceof CharacterClass) { 409 | return rxNode; 410 | } else if(rxNode instanceof Empty) { 411 | return rxNode; 412 | } else if(rxNode instanceof OnceOrMore) { 413 | OnceOrMore x = (OnceOrMore) rxNode; 414 | return new OnceOrMore(reverse(x.a)); 415 | } else if(rxNode instanceof Sequence) { 416 | Sequence x = (Sequence) rxNode; 417 | // The only interesting case. 418 | return new Sequence(reverse(x.b), reverse(x.a)); 419 | } else if(rxNode instanceof Labeled) { 420 | Labeled x = (Labeled) rxNode; 421 | return new Labeled(reverse(x.a), x.patternId); 422 | } else { 423 | throw new UnsupportedOperationException("Unsupported node type " + rxNode.getClass()); 424 | } 425 | } 426 | } 427 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/regex/RegexParser.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.regex; 2 | 3 | /** 4 | * Created on: 04.09.2010 13:12:26 5 | */ 6 | public class RegexParser { 7 | public static RxNode parse(String regex) { 8 | return parseAlt(new Tokenizer(regex)); 9 | } 10 | 11 | private static boolean expect(Tokenizer t, char c) { 12 | Character p = t.peek(); 13 | return p != null && p.charValue() == c; 14 | } 15 | 16 | private static RxNode parseAlt(Tokenizer t) { 17 | RxNode a = parseSequence(t); 18 | if (!expect(t, '|')) { 19 | return a; 20 | } 21 | t.next(); 22 | RxNode b = parseAlt(t); 23 | return new Alternative(a, b); 24 | } 25 | 26 | private static RxNode parseSequence(Tokenizer t) { 27 | if(expect(t, '|') || expect(t, ')')) { 28 | return new Empty(); 29 | } 30 | RxNode a = parseUnary(t); 31 | if(expect(t, '|') || expect(t, ')') || t.peek() == null) { 32 | return a; 33 | } 34 | RxNode b = parseSequence(t); 35 | return new Sequence(a, b); 36 | } 37 | 38 | private static RxNode parseUnary(Tokenizer t) { 39 | RxNode a = parseAtom(t); 40 | while(true) { 41 | if(expect(t, '+')) { 42 | t.next(); 43 | a = new OnceOrMore(a); 44 | } else if(expect(t, '?')) { 45 | t.next(); 46 | a = new Alternative(new Empty(), a); 47 | } else if(expect(t, '*')) { 48 | t.next(); 49 | a = new Alternative(new Empty(), new OnceOrMore(a)); 50 | } else { 51 | return a; 52 | } 53 | } 54 | } 55 | 56 | private static RxNode parseAtom(Tokenizer t) { 57 | if(expect(t, '(')) { 58 | t.next(); 59 | return parseParen(t); 60 | } else if(expect(t, '[')) { 61 | t.next(); 62 | return parseCharacterRange(t); 63 | } else if(expect(t, '.')) { 64 | t.next(); 65 | return CharacterClass.ANY_CHAR; 66 | } else { 67 | return CharacterClass.oneOf(""+parseChar(t)); 68 | } 69 | } 70 | 71 | private static RxNode parseParen(Tokenizer t) { 72 | RxNode a = parseAlt(t); 73 | if(!expect(t, ')')) { 74 | throw new IllegalArgumentException("Expected ')', got " + t.peek()); 75 | } 76 | t.next(); 77 | return a; 78 | } 79 | 80 | private static RxNode parseCharacterRange(Tokenizer t) { 81 | StringBuilder s = new StringBuilder(); 82 | Character last = null; 83 | while(!expect(t, ']')) { 84 | Character c = parseChar(t); 85 | if(c != null && c.charValue() == '-' && last != null) { 86 | c = parseChar(t); 87 | for(char i = last; i <= c; ++i) { 88 | s.append(i); 89 | } 90 | } else { 91 | s.append(c); 92 | } 93 | last = c; 94 | } 95 | t.next(); 96 | return CharacterClass.oneOf(s.toString()); 97 | } 98 | 99 | private static Character parseChar(Tokenizer t) { 100 | if(expect(t, '\\')) { 101 | t.next(); 102 | } 103 | return t.next(); 104 | } 105 | 106 | private static class Tokenizer { 107 | private String s; 108 | private int pos; 109 | 110 | public Tokenizer(String s) { 111 | this.s = s; 112 | } 113 | 114 | public Character peek() { 115 | return (pos < s.length()) ? s.charAt(pos) : null; 116 | } 117 | 118 | public Character next() { 119 | return (pos < s.length()) ? s.charAt(pos++) : null; 120 | } 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/regex/RxNode.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.regex; 2 | 3 | /** 4 | * Created on: 01.09.2010 21:59:05 5 | */ 6 | public interface RxNode { 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/regex/Sequence.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.regex; 2 | 3 | /** 4 | * Created on: 01.09.2010 21:59:15 5 | */ 6 | public class Sequence implements RxNode { 7 | public final RxNode a; 8 | public final RxNode b; 9 | 10 | public Sequence(RxNode a, RxNode b) { 11 | this.a = a; 12 | this.b = b; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/rope/Rope.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.rope; 2 | 3 | import org.jkff.ire.fa.Sequence; 4 | import org.jetbrains.annotations.NotNull; 5 | import org.jkff.ire.util.Function2; 6 | import org.jkff.ire.util.Pair; 7 | import org.jkff.ire.util.Predicate; 8 | import org.jkff.ire.util.Reducer; 9 | 10 | /** 11 | * Created on: 21.08.2010 17:46:38 12 | */ 13 | public class Rope { 14 | @NotNull 15 | private RopeFactory factory; 16 | 17 | private final M sum; 18 | 19 | private final Rope a, b, c; 20 | private final int h; 21 | private final int length; 22 | 23 | private final String block; 24 | 25 | private Rope(Rope a, Rope b, M sum) { 26 | this(a, b, null, a.factory, null, sum); 27 | } 28 | 29 | private Rope(Rope a, Rope b, Rope c, M sum) { 30 | this(a, b, c, a.factory, null, sum); 31 | } 32 | 33 | private Rope(RopeFactory factory, String block, M sum) { 34 | this(null, null, null, factory, block, sum); 35 | } 36 | 37 | private Rope(RopeFactory factory, String block) { 38 | this(null, null, null, factory, block, sumString(factory, block)); 39 | } 40 | 41 | private static M sumString(RopeFactory factory, final String block) { 42 | return factory.mapReduce(new Sequence() { 43 | public int length() { 44 | return block.length(); 45 | } 46 | 47 | public Character get(int i) { 48 | return block.charAt(i); 49 | } 50 | }); 51 | } 52 | 53 | private Rope(Rope a, Rope b, Rope c, RopeFactory factory, String block, M sum) { 54 | if (block != null) { 55 | assert a == null : "Block can't have a child: 'a'"; 56 | assert b == null : "Block can't have a child: 'b'"; 57 | assert c == null : "Block can't have a child: 'c'"; 58 | this.a = this.b = this.c = null; 59 | this.h = 0; 60 | this.length = block.length(); 61 | this.block = block; 62 | this.factory = factory; 63 | this.sum = sum; 64 | } else { 65 | assert a != null : "Fork must have a child: 'a'"; 66 | assert b != null : "Fork must have a child: 'b'"; 67 | assert a.h == b.h : "Fork's 'a' and 'b' children must have same height"; 68 | this.a = a; 69 | this.b = b; 70 | this.block = null; 71 | this.factory = factory; 72 | this.sum = sum; 73 | if (c == null) { 74 | this.c = null; 75 | this.h = a.h + 1; 76 | this.length = a.length + b.length; 77 | } else { 78 | assert a.h == c.h : "Fork's 'a' and 'c' children must have same height"; 79 | this.c = c; 80 | this.h = a.h + 1; 81 | this.length = a.length + b.length + c.length; 82 | } 83 | } 84 | } 85 | 86 | public int length() { 87 | return length; 88 | } 89 | 90 | public char charAt(int index) { 91 | if (block != null) 92 | return block.charAt(index); 93 | if (index < a.length()) 94 | return a.charAt(index); 95 | if (index < a.length() + b.length()) 96 | return b.charAt(index - a.length()); 97 | return c.charAt(index - a.length() - b.length()); 98 | } 99 | 100 | public M getSum() { 101 | return sum; 102 | } 103 | 104 | public Rope append(Rope other) { 105 | return append(this, other); 106 | } 107 | 108 | private static Rope append(Rope left, Rope right) { 109 | int blockSize = left.factory.getBlockSize(); 110 | Reducer reducer = left.factory.getReducer(); 111 | 112 | M sum = reducer.compose(left.sum, right.sum); 113 | 114 | if (left.h == right.h) { 115 | if (left.h > 0) 116 | return new Rope(left, right, sum); 117 | if (!left.isUnderflownBlock() && !right.isUnderflownBlock()) 118 | return new Rope(left, right, sum); 119 | String bigBlock = left.block + right.block; 120 | if (bigBlock.length() <= 2 * blockSize - 1) 121 | return new Rope(left.factory, bigBlock, sum); 122 | return new Rope( 123 | new Rope(left.factory, bigBlock.substring(0, blockSize)), 124 | new Rope(left.factory, bigBlock.substring(blockSize, bigBlock.length())), 125 | sum); 126 | } else if (left.h == right.h + 1) { 127 | if (left.c == null) 128 | return new Rope(left.a, left.b, right, sum); 129 | else 130 | return new Rope( 131 | // Optimization opportunity: remember a+b and b+c sums in 3-child nodes 132 | new Rope(left.a, left.b, reducer.compose(left.a.sum, left.b.sum)), 133 | new Rope(left.c, right, reducer.compose(left.c.sum, right.sum)), 134 | sum); 135 | } else if (right.h == left.h + 1) { 136 | if (right.c == null) 137 | return new Rope(left, right.a, right.b, sum); 138 | else 139 | return new Rope( 140 | new Rope(left, right.a, reducer.compose(left.sum, right.a.sum)), 141 | // Optimization opportunity: remember a+b and b+c sums in 3-child nodes 142 | new Rope(right.b, right.c, reducer.compose(right.b.sum, right.c.sum)), 143 | sum); 144 | } else if (left.h > right.h + 1) { 145 | if (left.c == null) 146 | // This would not be well-founded recursion, if not for the two previous cases 147 | // left.b.append(right) may be at most left.a.h+1 high and this will be handled by them. 148 | return left.a.append(left.b.append(right)); 149 | else 150 | // etc. 151 | return (left.a.append(left.b)).append(left.c.append(right)); 152 | } else { // right.h > left.h + 1 153 | if (right.c == null) 154 | return left.append(right.a).append(right.b); 155 | else 156 | return (left.append(right.a)).append(right.b.append(right.c)); 157 | } 158 | } 159 | 160 | public Pair, Rope> splitAfterRise( 161 | S seed, 162 | Function2, S> addChunk, Function2 addChar, 163 | Predicate toBool) { 164 | if (block != null) { 165 | S s = seed; 166 | for (int i = 0; i < block.length(); ++i) { 167 | if (toBool.isTrueFor(s)) 168 | return Pair.of( 169 | new Rope(this.factory, block.substring(0, i)), 170 | new Rope(this.factory, block.substring(i, block.length()))); 171 | s = addChar.applyTo(s, block.charAt(i)); 172 | } 173 | if (toBool.isTrueFor(s)) 174 | return Pair.of(this, new Rope(this.factory, "")); 175 | return null; 176 | } else { 177 | if (toBool.isTrueFor(seed)) 178 | return Pair.of(new Rope(this.factory, ""), this); 179 | S afterA = addChunk.applyTo(seed, a); 180 | if (toBool.isTrueFor(afterA)) { 181 | Pair, Rope> sa = a.splitAfterRise(seed, addChunk, addChar, toBool); 182 | if(sa == null) { 183 | System.out.println("Oops"); 184 | } 185 | return (c == null) 186 | ? Pair.of(sa.first, sa.second.append(b)) 187 | : Pair.of(sa.first, sa.second.append(b).append(c)); 188 | } 189 | S afterB = addChunk.applyTo(afterA, b); 190 | if (toBool.isTrueFor(afterB)) { 191 | Pair, Rope> sb = b.splitAfterRise(afterA, addChunk, addChar, toBool); 192 | return (c == null) 193 | ? Pair.of(a.append(sb.first), sb.second) 194 | : Pair.of(a.append(sb.first), sb.second.append(c)); 195 | } 196 | if (c == null) 197 | return null; 198 | S afterC = addChunk.applyTo(afterB, c); 199 | if (toBool.isTrueFor(afterC)) { 200 | Pair, Rope> sc = c.splitAfterRise(afterB, addChunk, addChar, toBool); 201 | return Pair.of(a.append(b).append(sc.first), sc.second); 202 | } 203 | return null; 204 | } 205 | } 206 | 207 | public Pair, Rope> splitAfterBackRise( 208 | S seed, 209 | Function2, S> addChunk, Function2 addChar, 210 | Predicate toBool) { 211 | if (block != null) { 212 | S s = seed; 213 | for (int i = block.length() - 1; i >= 0; --i) { 214 | if (toBool.isTrueFor(s)) 215 | return Pair.of( 216 | new Rope(this.factory, block.substring(0, i + 1)), 217 | new Rope(this.factory, block.substring(i + 1, block.length()))); 218 | s = addChar.applyTo(s, block.charAt(i)); 219 | } 220 | if (toBool.isTrueFor(s)) 221 | return Pair.of(new Rope(this.factory, ""), this); 222 | return null; 223 | } else { 224 | if (toBool.isTrueFor(seed)) 225 | return Pair.of(this, new Rope(this.factory, "")); 226 | S beforeC = seed; 227 | if (c != null) { 228 | beforeC = addChunk.applyTo(seed, c); 229 | if (toBool.isTrueFor(beforeC)) { 230 | Pair, Rope> sc = c.splitAfterBackRise(seed, addChunk, addChar, toBool); 231 | return Pair.of(a.append(b).append(sc.first), sc.second); 232 | } 233 | } 234 | S beforeB = addChunk.applyTo(beforeC, b); 235 | if (toBool.isTrueFor(beforeB)) { 236 | Pair, Rope> sb = b.splitAfterBackRise(beforeC, addChunk, addChar, toBool); 237 | return (c == null) 238 | ? Pair.of(a.append(sb.first), sb.second) 239 | : Pair.of(a.append(sb.first), sb.second.append(c)); 240 | } 241 | S beforeA = addChunk.applyTo(beforeB, a); 242 | if (toBool.isTrueFor(beforeA)) { 243 | Pair, Rope> sa = a.splitAfterBackRise(beforeB, addChunk, addChar, toBool); 244 | return (c == null) 245 | ? Pair.of(sa.first, sa.second.append(b)) 246 | : Pair.of(sa.first, sa.second.append(b).append(c)); 247 | } 248 | return null; 249 | } 250 | } 251 | 252 | private boolean isUnderflownBlock() { 253 | return h == 0 && block.length() < factory.getBlockSize(); 254 | } 255 | 256 | public String toString() { 257 | StringBuilder res = new StringBuilder(); 258 | return toString(res).toString(); 259 | } 260 | 261 | private StringBuilder toString(StringBuilder sb) { 262 | if (block != null) { 263 | sb.append(block); 264 | } else { 265 | a.toString(sb); 266 | b.toString(sb); 267 | if (c != null) 268 | c.toString(sb); 269 | } 270 | return sb; 271 | } 272 | 273 | public static Rope fromString(RopeFactory factory, String value) { 274 | Rope res = new Rope(factory, ""); 275 | for (int i = 0; i < value.length(); i += 2 * factory.getBlockSize() - 1) { 276 | String block = value.substring(i, Math.min(value.length(), i + 2 * factory.getBlockSize() - 1)); 277 | res = res.append(new Rope(factory, block)); 278 | } 279 | return res; 280 | } 281 | } 282 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/rope/RopeBasedIS.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.rope; 2 | 3 | import org.jkff.ire.DFAIndexedString; 4 | import org.jkff.ire.*; 5 | import org.jkff.ire.IndexedString; 6 | import org.jkff.ire.fa.BiDFA; 7 | import org.jkff.ire.fa.Sequence; 8 | import org.jkff.ire.fa.State; 9 | import org.jkff.ire.fa.TransferFunction; 10 | import org.jkff.ire.util.*; 11 | 12 | /** 13 | * Created on: 21.08.2010 21:10:19 14 | */ 15 | @SuppressWarnings("unchecked") 16 | public class RopeBasedIS implements DFAIndexedString { 17 | private static final int DEFAULT_BLOCK_SIZE = 128; 18 | 19 | private BiDFA bidfa; 20 | private Rope> rope; 21 | 22 | public RopeBasedIS(BiDFA bidfa, String value) { 23 | this(bidfa, value, DEFAULT_BLOCK_SIZE); 24 | } 25 | 26 | public RopeBasedIS(BiDFA bidfa, String value, int blockSize) { 27 | this(bidfa, Rope.fromString( 28 | new RopeFactory>( 29 | blockSize, 30 | new TFProduct( 31 | bidfa.getForward().getTransferFunctionsReducer(), 32 | bidfa.getBackward().getTransferFunctionsReducer()), 33 | new TFMap(bidfa)), 34 | value)); 35 | } 36 | 37 | private RopeBasedIS(BiDFA bidfa, Rope> rope) { 38 | this.bidfa = bidfa; 39 | this.rope = rope; 40 | } 41 | 42 | public TransferFunction getForward() { 43 | return rope.getSum().forward; 44 | } 45 | 46 | public TransferFunction getBackward() { 47 | return rope.getSum().backward; 48 | } 49 | 50 | public Iterable getMatches() { 51 | return DFAMatcher.getMatches(bidfa, this); 52 | } 53 | 54 | public Pair splitBefore(final int index) { 55 | Function2>, Integer> addRopeLength = new Function2>, Integer>() { 56 | public Integer applyTo(Integer len, Rope> rope) { 57 | return len + rope.length(); 58 | } 59 | }; 60 | Function2 inc = new Function2() { 61 | public Integer applyTo(Integer len, Character c) { 62 | return len + 1; 63 | } 64 | }; 65 | Predicate isAfterIndex = new Predicate() { 66 | public boolean isTrueFor(Integer x) { 67 | return x >= index; 68 | } 69 | }; 70 | Pair>, Rope>> p = 71 | rope.splitAfterRise(0, addRopeLength, inc, isAfterIndex); 72 | return Pair.of( 73 | (IndexedString)new RopeBasedIS(bidfa, p.first), 74 | (IndexedString)new RopeBasedIS(bidfa, p.second)); 75 | } 76 | 77 | public Pair splitAfterRise( 78 | T seed, 79 | final Function2 addChunk, 80 | Function2 addChar, 81 | Predicate toBool) 82 | { 83 | Pair>, Rope>> p = rope.splitAfterRise( 84 | seed, toRopeAddChunkFun(addChunk), addChar, toBool); 85 | return (p == null) ? null : Pair.of( 86 | (IndexedString) new RopeBasedIS(bidfa, p.first), 87 | (IndexedString) new RopeBasedIS(bidfa, p.second)); 88 | } 89 | 90 | public Pair splitAfterBackRise( 91 | T seed, 92 | Function2 addChunk, 93 | Function2 addChar, Predicate toBool) 94 | { 95 | Pair>, Rope>> p = rope.splitAfterBackRise( 96 | seed, toRopeAddChunkFun(addChunk), addChar, toBool); 97 | return (p == null) ? null : Pair.of( 98 | (IndexedString) new RopeBasedIS(bidfa, p.first), 99 | (IndexedString) new RopeBasedIS(bidfa, p.second)); 100 | } 101 | 102 | private Function2>, T> toRopeAddChunkFun( 103 | final Function2 addChunk) { 104 | return new Function2>, T>() { 105 | public T applyTo(T st, Rope> r) { 106 | return addChunk.applyTo(st, new RopeBasedIS(bidfa, r)); 107 | } 108 | }; 109 | } 110 | 111 | public RopeBasedIS append(IndexedString s) { 112 | return new RopeBasedIS(bidfa, rope.append(((RopeBasedIS) s).rope)); 113 | } 114 | 115 | public RopeBasedIS subSequence(int start, int end) { 116 | return (RopeBasedIS) splitBefore(start).second.splitBefore(end-start).first; 117 | } 118 | 119 | public int length() { 120 | return rope.length(); 121 | } 122 | 123 | public char charAt(int index) { 124 | return rope.charAt(index); 125 | } 126 | 127 | public String toString() { 128 | return rope.toString(); 129 | } 130 | 131 | private static class TransferFunctions { 132 | TransferFunction forward; 133 | TransferFunction backward; 134 | 135 | private TransferFunctions(TransferFunction forward, TransferFunction backward) { 136 | this.forward = forward; 137 | this.backward = backward; 138 | } 139 | } 140 | private static class TFProduct implements Reducer> { 141 | private static TransferFunction UNIT_TF = new TransferFunction() { 142 | public Object next(Object x) { 143 | return x; 144 | } 145 | }; 146 | private static TransferFunctions UNIT = new TransferFunctions(UNIT_TF, UNIT_TF); 147 | 148 | private Reducer> forwardReducer; 149 | private Reducer> backwardReducer; 150 | 151 | private TFProduct( 152 | Reducer> forwardReducer, 153 | Reducer> backwardReducer) 154 | { 155 | this.forwardReducer = forwardReducer; 156 | this.backwardReducer = backwardReducer; 157 | } 158 | 159 | public TransferFunctions compose(TransferFunctions a, TransferFunctions b) { 160 | return new TransferFunctions( 161 | a.forward==UNIT_TF ? b.forward : b.forward == UNIT_TF ? a.forward : 162 | forwardReducer.compose(a.forward, b.forward), 163 | a.backward==UNIT_TF ? b.backward : b.backward == UNIT_TF ? a.backward : 164 | backwardReducer.compose(b.backward, a.backward)); 165 | } 166 | 167 | public TransferFunctions composeAll(final Sequence> tfs) { 168 | if(tfs.length() == 0) { 169 | return UNIT; 170 | } 171 | 172 | TransferFunction sumForward = forwardReducer.composeAll(new Sequence>() { 173 | public int length() { 174 | return tfs.length(); 175 | } 176 | 177 | public TransferFunction get(int i) { 178 | return tfs.get(i).forward; 179 | } 180 | }); 181 | 182 | TransferFunction sumBackward = backwardReducer.composeAll(new Sequence>() { 183 | public int length() { 184 | return tfs.length(); 185 | } 186 | 187 | public TransferFunction get(int i) { 188 | return tfs.get(length()-i-1).backward; 189 | } 190 | }); 191 | 192 | return new TransferFunctions(sumForward, sumBackward); 193 | } 194 | 195 | public TransferFunctions unit() { 196 | return UNIT; 197 | } 198 | } 199 | 200 | private static class TFMap implements Function> { 201 | private BiDFA bidfa; 202 | 203 | private TransferFunctions[] cache = new TransferFunctions[1 + Character.MAX_VALUE]; 204 | 205 | private TFMap(BiDFA bidfa) { 206 | this.bidfa = bidfa; 207 | } 208 | 209 | public TransferFunctions applyTo(Character ch) { 210 | char c = ch; 211 | if(cache[c] == null) { 212 | cache[c] = new TransferFunctions( 213 | bidfa.getForward().transfer(c), 214 | bidfa.getBackward().transfer(c)); 215 | } 216 | return cache[c]; 217 | } 218 | } 219 | } 220 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/rope/RopeFactory.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.rope; 2 | 3 | import org.jkff.ire.fa.Sequence; 4 | import org.jkff.ire.util.Function; 5 | import org.jkff.ire.util.Reducer; 6 | 7 | /** 8 | * Created on: 30.08.2010 22:42:16 9 | */ 10 | public class RopeFactory { 11 | private int blockSize; 12 | private Reducer reducer; 13 | private Function map; 14 | 15 | public RopeFactory(int blockSize, Reducer reducer, Function map) { 16 | this.blockSize = blockSize; 17 | this.reducer = reducer; 18 | this.map = map; 19 | } 20 | 21 | public int getBlockSize() { 22 | return blockSize; 23 | } 24 | 25 | public Reducer getReducer() { 26 | return reducer; 27 | } 28 | 29 | public Function getMap() { 30 | return map; 31 | } 32 | 33 | public M mapReduce(final Sequence chars) { 34 | return reducer.composeAll(new Sequence() { 35 | public int length() { 36 | return chars.length(); 37 | } 38 | 39 | public M get(int i) { 40 | return getMap().applyTo(chars.get(i)); 41 | } 42 | }); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/util/CoarsestPartition.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.util; 2 | 3 | import java.util.*; 4 | 5 | /** 6 | * Created on: 13.10.2010 21:44:19 7 | */ 8 | public class CoarsestPartition { 9 | public static int[] coarsestStablePartition(int[] p, int[][] edges) { 10 | // Find the coarsest refining 'q' of the partition 'p' such that 11 | // for every two blocks B1 and B2 of 'q', either in(B1) = B2, 12 | // or in(B1) is disjoint with B2. 13 | 14 | // p and q are specified as a mapping to equivalence classes (blocks). 15 | // edges is an array of 2-element arrays. 16 | 17 | // An extremely naive and inefficient implementation. 18 | // There exists an O(m log n) one, described in "Three partitioning algorithms" 19 | // by Paigue and Tarjan, 1987. However I'm too lazy to implement it, 20 | 21 | boolean anythingChanged; 22 | do { 23 | anythingChanged = false; 24 | // Refine p with respect to each block of p. 25 | Map> pBlocks = CollectionFactory.newLinkedHashMap(); 26 | for (int i = 0; i < p.length; ++i) { 27 | List block = pBlocks.get(p[i]); 28 | if (block == null) { 29 | pBlocks.put(p[i], block = CollectionFactory.newArrayList()); 30 | } 31 | block.add(i); 32 | } 33 | 34 | Map> ins = CollectionFactory.newLinkedHashMap(); 35 | for (int[] edge : edges) { 36 | int src = edge[0], dst = edge[1]; 37 | List is = ins.get(dst); 38 | if (is == null) { 39 | ins.put(dst, is = CollectionFactory.newArrayList()); 40 | } 41 | is.add(src); 42 | } 43 | 44 | int[] q = Arrays.copyOf(p, p.length); 45 | for (List block : pBlocks.values()) { 46 | Set in = CollectionFactory.newLinkedHashSet(); 47 | for (int member : block) { 48 | List curIn = ins.get(member); 49 | if (curIn != null) in.addAll(curIn); 50 | } 51 | int[] newQ = refine(q, in); 52 | if (!Arrays.equals(q, newQ)) { 53 | anythingChanged = true; 54 | q = newQ; 55 | } 56 | } 57 | p = q; 58 | } while (anythingChanged); 59 | return p; 60 | } 61 | 62 | private static int[] refine(int[] q, Set s) { 63 | // Make it so that every block is either contained in S 64 | // or doesn't intersect it. 65 | int maxBlock = 0; 66 | for (int c : q) maxBlock = Math.max(c, maxBlock); 67 | Map block2size = CollectionFactory.newLinkedHashMap(); 68 | Map block2covered = CollectionFactory.newLinkedHashMap(); 69 | for (int i = 0; i < q.length; ++i) { 70 | int block = q[i]; 71 | if (!block2size.containsKey(block)) block2size.put(block, 0); 72 | block2size.put(block, block2size.get(block) + 1); 73 | if (s.contains(i)) { 74 | if (!block2covered.containsKey(block)) block2covered.put(block, 0); 75 | block2covered.put(block, block2covered.get(block) + 1); 76 | } 77 | } 78 | 79 | int[] res = Arrays.copyOf(q, q.length); 80 | 81 | for (int block : block2size.keySet()) { 82 | if (block2covered.containsKey(block) && !block2covered.get(block).equals(block2size.get(block))) { 83 | // Split this block into S and B-S: Move S to a new block. 84 | int newBlock = ++maxBlock; 85 | for (int i : s) if (q[i] == block) res[i] = newBlock; 86 | } 87 | } 88 | 89 | return res; 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/util/CollectionFactory.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.util; 2 | 3 | import java.util.*; 4 | 5 | /** 6 | * Created on: 04.09.2010 10:52:27 7 | */ 8 | public class CollectionFactory { 9 | public static List newArrayList() { 10 | return new ArrayList(); 11 | } 12 | public static Set newLinkedHashSet() { 13 | return new LinkedHashSet(); 14 | } 15 | public static Map newLinkedHashMap() { 16 | return new LinkedHashMap(); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/util/Function.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.util; 2 | 3 | /** 4 | * Created on: 21.08.2010 20:13:50 5 | */ 6 | public interface Function { 7 | B applyTo(A a); 8 | } 9 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/util/Function2.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.util; 2 | 3 | /** 4 | * Created on: 21.08.2010 18:39:24 5 | */ 6 | public interface Function2 { 7 | C applyTo(A a, B b); 8 | } 9 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/util/Pair.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.util; 2 | 3 | /** 4 | * Created on: 22.07.2010 23:23:43 5 | */ 6 | public class Pair { 7 | public final A first; 8 | public final B second; 9 | 10 | public Pair(A first, B second) { 11 | this.first = first; 12 | this.second = second; 13 | } 14 | 15 | @Override 16 | public boolean equals(Object o) { 17 | if (this == o) return true; 18 | if (o == null || getClass() != o.getClass()) return false; 19 | 20 | Pair pair = (Pair) o; 21 | 22 | if (first != null ? !first.equals(pair.first) : pair.first != null) return false; 23 | if (second != null ? !second.equals(pair.second) : pair.second != null) return false; 24 | 25 | return true; 26 | } 27 | 28 | @Override 29 | public int hashCode() { 30 | int result = first != null ? first.hashCode() : 0; 31 | result = 31 * result + (second != null ? second.hashCode() : 0); 32 | return result; 33 | } 34 | 35 | public String toString() { 36 | return "(" + first + ", " + second + ")"; 37 | } 38 | 39 | public static Pair of(A a, B b) { 40 | return new Pair(a, b); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/util/Predicate.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.util; 2 | 3 | /** 4 | * Created on: 25.07.2010 14:10:21 5 | */ 6 | public interface Predicate { 7 | boolean isTrueFor(T t); 8 | } 9 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/util/Reducer.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.util; 2 | 3 | import org.jkff.ire.fa.Sequence; 4 | 5 | /** 6 | * Contract: null must be a unit. 7 | * 8 | * Created on: 21.08.2010 20:11:35 9 | */ 10 | public interface Reducer { 11 | T compose(T a, T b); 12 | 13 | T composeAll(Sequence ts); 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/org/jkff/ire/util/WrappedBitSet.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.util; 2 | 3 | public class WrappedBitSet implements Cloneable { 4 | public final static int ADDRESS_BITS_PER_WORD = 6; 5 | public final static int BITS_PER_WORD = 1 << ADDRESS_BITS_PER_WORD; 6 | 7 | public static final long WORD_MASK = 0xffffffffffffffffL; 8 | 9 | private long[] words; 10 | private int offset; 11 | private int numWords; 12 | private int numBits; 13 | 14 | public WrappedBitSet(int numBits) { 15 | words = new long[wordIndex(numBits -1) + 1]; 16 | this.offset = 0; 17 | this.numWords = words.length; 18 | this.numBits = numBits; 19 | } 20 | 21 | public WrappedBitSet(long[] words, int offset, int numWords, int numBits) { 22 | this.words = words; 23 | this.offset = offset; 24 | this.numWords = numWords; 25 | } 26 | 27 | private int wordIndex(int bitIndex) { 28 | return offset + (bitIndex >> ADDRESS_BITS_PER_WORD); 29 | } 30 | 31 | public void set(int bitIndex) { 32 | int wordIndex = wordIndex(bitIndex); 33 | words[wordIndex] |= (1L << bitIndex); 34 | } 35 | 36 | public void clear(int bitIndex) { 37 | int wordIndex = wordIndex(bitIndex); 38 | words[wordIndex] &= ~(1L << bitIndex); 39 | } 40 | 41 | public boolean get(int bitIndex) { 42 | int wordIndex = wordIndex(bitIndex); 43 | return ((words[wordIndex] & (1L << bitIndex)) != 0); 44 | } 45 | 46 | public int nextSetBit(int fromIndex) { 47 | int u = (fromIndex >> ADDRESS_BITS_PER_WORD); 48 | if(u >= numWords) 49 | return -1; 50 | long word = words[offset+u] & (WORD_MASK << fromIndex); 51 | while (true) { 52 | if (word != 0) 53 | return (u * BITS_PER_WORD) + Long.numberOfTrailingZeros(word); 54 | if (++u == numWords) 55 | return -1; 56 | word = words[offset+u]; 57 | } 58 | } 59 | 60 | public static int nextSetBit(long[] words, int offset, int length, int fromIndex) { 61 | int u = (fromIndex >> ADDRESS_BITS_PER_WORD); 62 | if(u >= length) 63 | return -1; 64 | long word = words[offset + u] & (WORD_MASK << fromIndex); 65 | while (true) { 66 | if (word != 0) 67 | return (u * BITS_PER_WORD) + Long.numberOfTrailingZeros(word); 68 | if (++u == length) 69 | return -1; 70 | word = words[offset + u]; 71 | } 72 | } 73 | 74 | public int length() { 75 | return numWords; 76 | } 77 | 78 | public boolean isEmpty() { 79 | for(int i = offset; i < offset + numWords; ++i) { 80 | if(words[i] != 0) 81 | return false; 82 | } 83 | return true; 84 | } 85 | 86 | public int cardinality() { 87 | int sum = 0; 88 | for(int i = offset; i < offset + numWords; ++i) { 89 | sum += Long.bitCount(words[i]); 90 | } 91 | return sum; 92 | } 93 | 94 | public void and(WrappedBitSet set) { 95 | for (int i = 0; i < numWords; ++i) 96 | words[offset+i] &= set.words[set.offset+i]; 97 | } 98 | 99 | public void or(WrappedBitSet set) { 100 | for (int i = 0; i < numWords; ++i) 101 | words[offset+i] |= set.words[set.offset+i]; 102 | } 103 | 104 | public String toString() { 105 | StringBuilder b = new StringBuilder(); 106 | 107 | b.append('{'); 108 | int i = nextSetBit(0); 109 | if (i != -1) { 110 | b.append(i); 111 | for (i = nextSetBit(i+1); i >= 0; i = nextSetBit(i+1)) { 112 | b.append(", ").append(i); 113 | } 114 | } 115 | 116 | b.append('}'); 117 | return b.toString(); 118 | } 119 | 120 | public WrappedBitSet makeCopy() { 121 | long[] words = new long[numWords]; 122 | System.arraycopy(this.words, offset, words, 0, numWords); 123 | return new WrappedBitSet(words, 0, words.length, numBits); 124 | } 125 | 126 | public int numBits() { 127 | return numBits; 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /src/test/java/org/jkff/ire/DFABuilder.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire; 2 | 3 | import org.jkff.ire.fa.*; 4 | import org.jkff.ire.util.WrappedBitSet; 5 | 6 | import java.util.List; 7 | import java.util.Map; 8 | 9 | import static org.jkff.ire.util.CollectionFactory.newArrayList; 10 | import static org.jkff.ire.util.CollectionFactory.newLinkedHashMap; 11 | 12 | /** 13 | * Created on: 01.08.2010 13:47:21 14 | */ 15 | public class DFABuilder { 16 | private IntState[] states; 17 | private int numPatterns; 18 | private int initialState; 19 | private List transitions = newArrayList(); 20 | 21 | public DFABuilder(int numStates, int initialState, int numPatterns) { 22 | this.states = new IntState[numStates]; 23 | this.initialState = initialState; 24 | this.numPatterns = numPatterns; 25 | } 26 | 27 | public StateBuilder state(int i, int... termPatterns) { 28 | return new StateBuilder(i, termPatterns); 29 | } 30 | 31 | public DFA build() { 32 | final Map char2table = newLinkedHashMap(); 33 | for(Transition t : transitions) { 34 | int[] table = char2table.get(t.c); 35 | if(table == null) 36 | char2table.put(t.c, table = new int[states.length]); 37 | table[t.from] = t.to; 38 | } 39 | final int[][] char2state2next = new int[256][states.length]; 40 | for(Transition t : transitions) { 41 | if(t.c == null) { 42 | for(int i = 0; i < 256; ++i) 43 | char2state2next[i][t.from] = t.to; 44 | } 45 | } 46 | for(Transition t : transitions) { 47 | if(t.c != null) { 48 | char2state2next[t.c][t.from] = t.to; 49 | } 50 | } 51 | TransferTable transfer = new TransferTable() { 52 | public TransferFunction forToken(Character token) { 53 | return new IntTable(states, char2state2next[token]); 54 | } 55 | }; 56 | return new DFA(transfer, states[initialState], IntTable.REDUCER) { 57 | @Override 58 | public IntState resetTerminatedPattern(IntState state, int pattern) { 59 | return states[initialState]; 60 | } 61 | }; 62 | } 63 | 64 | public class StateBuilder { 65 | private int state; 66 | private int[] termPatterns; 67 | 68 | public StateBuilder(int state, int... termPatterns) { 69 | this.state = state; 70 | this.termPatterns = termPatterns; 71 | } 72 | 73 | public void transitions(Object... char2state) { 74 | for(int i = 0; i < char2state.length; i += 2) { 75 | transitions.add(new Transition(this.state, (Character)char2state[i], (Integer)char2state[i+1])); 76 | } 77 | WrappedBitSet t = new WrappedBitSet(numPatterns); 78 | for(int tp : termPatterns) 79 | t.set(tp); 80 | states[state] = new IntState(state, t); 81 | } 82 | } 83 | 84 | private static class Transition { 85 | int from; 86 | Character c; 87 | int to; 88 | 89 | private Transition(int from, Character c, int to) { 90 | this.from = from; 91 | this.c = c; 92 | this.to = to; 93 | } 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /src/test/java/org/jkff/ire/IntegrationTest.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire; 2 | 3 | import org.jkff.ire.regex.RegexCompiler; 4 | import org.junit.Ignore; 5 | import org.junit.Test; 6 | import sun.misc.BASE64Decoder; 7 | 8 | import java.io.ByteArrayInputStream; 9 | import java.io.IOException; 10 | import java.io.InputStreamReader; 11 | import java.io.LineNumberReader; 12 | import java.util.regex.Matcher; 13 | import java.util.regex.Pattern; 14 | import java.util.zip.GZIPInputStream; 15 | 16 | import static junit.framework.Assert.assertEquals; 17 | 18 | /** 19 | * Created on: 04.09.2010 18:10:51 20 | */ 21 | public class IntegrationTest { 22 | private static final String DNA; 23 | 24 | static { 25 | String z = 26 | "H4sIAAAAAAAAAFWcbZZjSYpE18rhBxtg/6dS3Gv+onpmeqojM6QndzDsA3VN/f6np3bq3792//1f" + 27 | "z/a///fvp70z//6suv79U83vX9X9+8N/f9T77397/v3l/f3qvz/+/Xx/vzz3UnP/uL8X+/en+3uf" + 28 | "f3+9fr/47/fn98b1e61/f+H3n/7927+/8+/f9vc2vwfY/v3Vfy//7y1//7P3678fzb+n+L37vz/4" + 29 | "98h7//d769/Pfn/x99L+cO+Nf6/1e9rfT/495X3s31P8e6J7z3v+3y/+/uLvrX/P+nvc+X3w/T36" + 30 | "+ql+D7DlY/IwvOTvp/8e+Peo//7q7y3u4X9/9u+Hv9f8/SUejCP8vfPvhH/H9Pu13/P9juR3FL9/" + 31 | "7nukvb+0dw33PHc4/TuXu6y68yyehV9czuBeepfDaC6Ct79X3fH0ePbKA/0Oo+66/a25c947prrn" + 32 | "rrv736f9ncTeefLZ72z6Xrbvn37/gQv9/affLd2J3lX8Puc96q9UrtruyfqO7leTfef0O5G9h6KO" + 33 | "lvO8S5r7K5zz/d7cK92d/D4EpXYHemXze4y6+uEyqil2H96qpS55gPW0fy98T3tHfSfzO7PrjH9/" + 34 | "b+iJX21tU+Z91XIVd+/7++kdPv32+0jXev17nd9B/g79zsO/xHn93p9GmqvCO7XfL3FZVzv3J/cb" + 35 | "1zZ30b9PQTVfMdFDU5zM76PQ0VUUCe+wa/P2Pdw1zu+IxqK4xt07UZ5l7uLvQr3z36H8uu1q8wpk" + 36 | "eK2ihA4gRBra9oDgjrrux4cH95z3qe4Afq98tdPUVqcgV0i50uAiryOWj1K8wa9Win6s+//XDT0W" + 37 | "1FJE0xzGHfHByV4pDv001kpzFocEvwquoupoquvVtjVsB971PpFtR23ScPTy7//TEV1lcR6o3MVa" + 38 | "lDN3G1eLV34CYNlSPOK9ynXR4UMfct8xvZc+mLDneFg67FdKV+VXW331svRiC/sHjRT3wRFNcHjr" + 39 | "JxArfxXGVXW9thlqCZBcar8sPNrdMxi7kQoCEvrQdQSxAWiWl14671oGrCt/g17jL92R3g+uPZoy" + 40 | "PSi94qNwr6IPJPp+JL6t5TfrcKTnr5c4h8PFuculTa5brpBpr23P81qxKT6KPijA4/BmZZEHxm/A" + 41 | "Lk/8ezO6pq8aLQRemDe/36RfrkTuztoBuTTPvcW9/lzRX23OdQBPBKTcyd+wAn/yB3PX/AOE8X65" + 42 | "Su4+xKAASB7joPtatEEoGhseQtc4SSiOm/Qcy5W8jOWg5wqmmRIPLBinEAc/7MFre+y2gbcLih7w" + 43 | "BxEX4sIcSz0Op9pwhaFbPOldf5GRAhOBYLQfNHczYgbodMPo6EN7dtOe3h3XMPdsw3u5hRy9Gudn" + 44 | "Rfdfhx5wH0EZYcQi511lNF5uqoeLGcqnAnW/Qj4CwBMFQATzDVpeed+HvhbdzNJN1Qao7r3hWQy5" + 45 | "g2aq7fjEkR36mhu7Fzxac5/2KsqxyiidO/JJ9d8/D2ANDoM+j5I5IW84U0O1DyHuLmCJPnQmlSyn" + 46 | "g+QDrrfsDybYnIHE+uqT6i/pYMuH6b4ZL59/s/3vWe6XZVFwgrvtbkc0/Od+6w6F2XhEB17Tcqm6" + 47 | "Qrwr4vRv9tyTO8Sc/i2ei0ItrkYY/H40lcq3OCvcC6YzgvPdOSPi6Fg7crsdhj6rvG59Skb5ykX4" + 48 | "VMDMNQSI4tExEdQFc5U5tMmVE1dK1VlVdMaR0puGYDNTlUZixKAxFrrhQ8HPmxb08O5uDxuOsvwQ" + 49 | "5KZJc7JORsBuD1kss/uTI2Q8tw/JcOSloSQ3P6C95WjvjxnBzCnDEXK5nT6CsOqtCSsONC+FY48J" + 50 | "RFdph6YFDwfpQVxH59h1987otnl1NZzdFcndzFjpDNurqGa4jqSp6dGW0x8vt0buaKZlJ7DYd6BM" + 51 | "U4GMprtrOIbTTjHahw+O9HJWSFTL1wg9KJ/rlEloy/3FUHtkweH/9To6hgGynqHjtmzZ+1vAHrOK" + 52 | "V+fuafv77HwqmDKC1T4EQyFvkLRmKMBW4TMH+Pda7QRoqXcYRYWWdZWjN/cMkb9JSAv79E3H0Ks2" + 53 | "wQa0mdMgJPMfslDQjMpvqGYGnKhoqs1Ms7c5XCQ4dPfmSlPGaGME93FiUHE0CRCN0aI3IqdDBdSO" + 54 | "tNb1jfwcVce0w0kYLAoJFbV4XdJv6FGCD92OyjK4nJxHGuduDGa16blj0HCT6ejAeQRE8KWXx1o/" + 55 | "kgw1gwxWJN1gm6hbShbBfAGkqTa0XTjjCPh0402+mCi/M7tfsgAVciK9egHCtnBh3o+Bca9L5cA9" + 56 | "qL9SfBf0rjV4FvXqzR+THQVO6fE44ujg/Uj+SSYowCAxK6oqSA5za6d7Wxye8nGsZmSWndp2KQAF" + 57 | "3Wo5DF6VjLnGwlPoP69oMakAcbgwF7teH8jvxGReA1zxBGhLal+vhYrAgkPwZYLRxFcUEJmr20GX" + 58 | "VT0PYnXsuMVVEqzNIiOHyICTHDLMEHjOqL2hAtu5oznh3OrcmedqoSJQcoEeuSuqZJyQqNNyRD3j" + 59 | "brm/o338a3fDcqifVoStHfG5LUqXhxyvUjCA6FVOGgaEJEVKraaQUJFp+zEUrT/AopAuEl29HkCr" + 60 | "goKWHsAzn9LVSIJo3NnYuc0gkwuUYgqtifHWfkoLb1WC6FWdoVVylO6ECgu4HGePFpt3tv2422Zk" + 61 | "r3g/pc1LH8Cfj1HeWTgLUWXzp1nRBlgsJYzGKLrrHfgPvSS4McdUNmoMGVNoCsjRjjobiz8bHaPV" + 62 | "k4jptDqO5dy5z8DcLW5BXTwcxEPtUSv7aSF1MCMOqZWtIAeVORU/YTAC9j3tcG53Ir3PEaUz1Ec+" + 63 | "kcNrQn44/rAEWnsEuA6MUeubcbuCNy7o9Y5eTrwTZkClTuxc+3ajOdbDXAkENjSPjujHyCvkaxQh" + 64 | "7vMz6tpk4S4ewH1MRdwBn0oq15LTMhzQBOrUKVY5xJJ2fpZopw1xzxDicBSNp5Hk+zYd2yPve/7P" + 65 | "tX/rCirHytgA1oNBCoUbXTD6XBOhPxbIiIssP77eutVt1wFVf16J1KPwolD991CL8G3MLsoAGsY1" + 66 | "l523uuRmEYy66/coIR4caqlFIwtv2cbaLvdXNtd1RbU4J9s5+1bd8TE6mrU9xIIHUOjxpUoXfiRf" + 67 | "pXFnr88fR4cLbC3dKCr82DerWg3vHBynaW1FM0X4qVxB8pZgXOvRbHrNiw2pBjXy6Jx9/AOFv66t" + 68 | "Ek9yljO7p78pBgYY8UhUJdfzofNWiJT9t7AvFMwwb6hmqM7rMdR1gOtjJ2gyB+rIiQrJW1bMVizk" + 69 | "lpSqu6cc6ldu+9BQOsFVDrbbBXRPYTcNsj6irw8Sl+yhbW5lshKfAtj4sW9mCjtRnYkWiK5kyvvp" + 70 | "0HoKOLKldbNQd6jqzoe42+16PeKb0lgR3LGPqIuOptxPxCPn5BGYS2jUj3E3zv34mRGrCi2IZ5da" + 71 | "JXfo5IEM4WhBf3Ik87EZ7oHIdPfZiiZzUNDDWwcGWhxxx4VNroLbzXBYxvOaBKGxbtqhoiushk9x" + 72 | "QMxkRbsES57i32j0YH3lXT0nhBxiCIJCBtLRoSG4eipthgdTrM/TnVE8IaIkARKnkslW+jjkVrZq" + 73 | "Lfi4pgU94RYaLk5tSXFrrwjMrVOLd/mFVt6iZJHmT8KMpwD4yWY0W7ffJ5jcjCGLqUM54Xmt2Esa" + 74 | "1zgtJcw3PBBYbh3Q+Rj6rnIUyRejs0BmsgHOTtEkORI5MxW6MgOxmIE5uJMQiuz+UskRiTL7wz6F" + 75 | "Lvp0WvZrItGjd4Zw08mzH3GPfBeRs3BLHxM3+2nN7XEOMqg/Rwqz3UQYbK5JIN9agxtVzrhg5I9q" + 76 | "GeRnSD/Xvpjjf5BsmNcItlYgasdANkcNikSDozyKhbKRSY7ejeK6/kcljftG5LBQGC26cr53iVik" + 77 | "NfeuhkD7HBo6626k/dDrcXWozJ3nRrfhRZHHUSVw2TtuoU+lRfs9o7DVonKR7ohAiqtXJh6kRFUw" + 78 | "6WjRTWZVI1HFixxSF/C2tdy3HNYD5WqfksHWkMUMabwVTx/BzWTmecq0gwZZ/S4XKQbinc6gNeKV" + 79 | "xFAYxsYbMsk2hFls8LFmdz6GJfrw+msUZQSJVIcYzwvx+xvG+EhQD6CnFeO1fxIYhr0OkC75Ps+D" + 80 | "DOfuwtp1wJAoLT4zwV/ACzG/Qa3hSaZVPNr2qmi8e8+ZKFNPupR+t9/QkyzOCa5X4iMgALCG6QJA" + 81 | "wp+ty0lJZ7kiVLd39AiAtlOsCCjnluccZU/2TC7dyXpbfw/RA+MAlHWHFoOGibY07oiVPASUXAYn" + 82 | "a3fwQ98AJRxErA2rkleXU2qowa+KG0ZzFswJcY7ER1GtfpczXsRdE54prSDN/W9vyiZ73tuGZ9Lr" + 83 | "JvlYOUcvchabOA2krz8rAmZXBNtvAWTMoOr7DakymNvtbhMkadLbdxEsTnmktFn7JrTHPCkL39SG" + 84 | "eNYzrxPJVZ/r5/ADtuTnivfBcwSDqR3WgqjFg/IY9EmuZG+a6FcsCAk/wEkVnCSFtZnGOq1XZCKd" + 85 | "0Kg0jh4NCMplovKsCM2+ERQcPPSjyyvAWj/fbZDRrVj0w0PPlAG+/mie9B90jj83JA5b4RWWrXtN" + 86 | "93IY+4FJlEdDzEawEVdb46KdO+1KUTJS7Mylg2flJ/s+79XUZwFM2Bfl17q3qFMUdutjPiGQvy3p" + 87 | "HEVSG7AoEEqpGo0OF0k+LD3ACIWrVlJtm9y5jdgGpdmeZMy9i6qxqJWhFVcWzge6rRH8kvAdHebd" + 88 | "YCgRCOYIK6/eidCVEkR5MAQr7tI4MXffjbl6uGoBTSMZRDv9eVV4wH0y4liNUkcfCPHoKKYneiE1" + 89 | "t4YxBsayZ0jisITikMsUgGpAObfCBUCOyg4BE8aHuNncWEr1SnA8HfZ3RrddFrWK4x6jdrwbG7MF" + 90 | "KUE75vXKidx6qSjdzVx71GEqHJb01IEol2QJZvx4Kp6RPTupKDrkEvKwVwNzjTEYk0/JxSKdSTzB" + 91 | "Ao/9hFmWCVYMZyyb9jxF0DUSepBV6k0qboQ2JIRbaM/BzdS5Qc5y0Iazxrsp10JWbwLa6Rk0XAwJ" + 92 | "yobRSgDnuVzGALxZMzRLFnWS5yjealVQlGtEt4Y+HU9LHcoIwXmj1dTES3Tn5gIs5G6sbAEJxEDc" + 93 | "mppyQlklloftq/HyiT21CG4n1QDioUsMn6QcsK7YlhgY8AG4yE6IDj3X/uM8x/djsVFgWhl6bjB8" + 94 | "RJ2G9USjAuntKiYRjE5kRzxBhewXleuE4tHODJeByxizbzT2aG7NEyvaubTCS1/w8Oy981eUY0xq" + 95 | "hu1L2TmemC7SCvp8Ql9cr2JfBH6NuwXzZT2jabZkv6sH4Kz70hQ10/3NGIxhtoy5gSvjcUCWupMY" + 96 | "qxbD7fbbx563O6CDUtIINdC6lds0f+MzTz+O7uhBQmgJYHu0aR9mThNCsP0z4eLAoEEDRkd5Jich" + 97 | "MAxb72/cL+poa2RNHB3kvByt8lFQwOUY7nifNz07nk5XvOJeowjQgCvqcaH1IesKtiba5YnKT3Ju" + 98 | "EQhXCnwshl5tThK/WfbgpIMjbtxMm+lqwu8qCGTTKXyJCt2lWxWRQMOsUz4p1sadR+ZL3EYnX5Yo" + 99 | "HJe0AptkWHzCEWH+i+u+jt3CDERh94QK/ZnuUdij+3WXX9KpVd8q0HHSRLbr+NIVQKTRzJmZ6mJE" + 100 | "ncqgmGjAUMRPd1CMtiiNQVY4TBpB3IklOkYMI2BInfZb/NGRHsrJpR81ZcWJnH5pVvQIT70ZlPzL" + 101 | "EGpDoSe7mBXOJYggYszg6OpKlgZNLqVKh6cz0B6TEsMeAR9WCzRv2tWm8YsSbBGN0rQdZFl5eESD" + 102 | "qnrFZdczeRsN6WjwBkRhXZAvxytfZU0jJBVIkA3XZLKO5i4vUhv0PMLnY7XfdIHE6AnGJxq2VpBz" + 103 | "rIfpteHvCUG8REU9wTerrNnsPuh6jxR8s/YAbDvmjCPsCu+OI1uNWSSgxjByR+EmvIMd/QeYd6Nb" + 104 | "ObWtyJ2/o1osS5l/zHQyG3TDiX22DMQWBXzHlgTqfumdA5wgYnqNEffR+qw3IW58WjrIEFrXcSox" + 105 | "1Bi0wD4NfrmNNShYz6IVOJ61rHT1bxi9x/nksKvJlU7cB2uDT+zSnkwcYoGp85EO7GOdmc3Dtw4G" + 106 | "2DxhXVBXDeKeLM3qrk7U8mK7RQ0iXvbbx44N4OgLW0t2Nb1xrKBlpmPEf7q4Tcxt/LW210NuNCWM" + 107 | "L2AdIYBpDhN9bEr9DFSNZu5oD5eCq+VmUBVd5sD3xktYRoPTkXryJVRs+F1GJ7KysZ00eE3wNjG1" + 108 | "/pJ+aeUO3ddwZa0iZu7SFFfs+tBM2keS4rskk7fVfS3Ddk/qdR7cBwmcOhkVnjIFuGWo8r0YHISC" + 109 | "h3dAkmxvJBVmCMrNj3hSIRNfZY3xMeAZepVFgk9gzB9qggyF0GF+81LwTVtH1oqNsjGswehFRr5U" + 110 | "YzU628bMSXD7Sj40r7LF9T6mPsMRL2Tddh0PZd4+9sTGMR5I3+GS8d0IkguOLgOkwqm1J6KyaCyg" + 111 | "DUDsJAIrt6MjWwYq39tYuTk50bYTJmHDyNUURMhoAZ9XowJ7n3nz5x302mCq8uCN6ojDEeX6cK7C" + 112 | "rje7jIwWnlGLBcnj4NtsNEdJXasbnsfxivf5KOmqmbJw2grWkTqXehb2tSIuCePEaTODmsnSLm4o" + 113 | "gQI26V1ovjbA1d25H68HTnn6uxApfGoQ9jt6DEY64WCY53fcjMLvnTebKXhgMA85YUPNdXMc7rJw" + 114 | "QsfGO3FLqRxZ+evILVxZl6iYyRvrc418oMfM8tCrTfisUq6OKU6lYmm0xh3ej3ohJ5dKuvtV3Epa" + 115 | "pXSRCNVRHZzBMP92QtETSLcfFG/ArQy5UomSSpp2zd7dVZ4nFBJth/kX9qBMq4Q24B3CikU2jRUE" + 116 | "gQwR7owr6FdwwXK6m7AIvrf6HpCbxs1wFP3Zx6aNnNdICg4n9upu1g6yIRgtLpkck81F9Cj8Kl4d" + 117 | "myH4ivgD8kb6107dT2kttbsO8A3WkHjnKu9FEXr2aGdiM6J1QCe0ZjVesHYEa+j0my4Qi/E4KUU8" + 118 | "DCbANUKbnIL1+B/oaDlpK52s2XFTQ5bmUhkAZ7PAuEUDrtWAayI5649rDG28fyK8Yvobc+JqMRGi" + 119 | "CWSgGQLTLwUpo7fIq4Kmwqxp+a4ksbJGAyF6T1dFQ1hNVmyeigLoWufKhEwX+yF+NnRn+wUHN2pH" + 120 | "UsgJa99obxrJl6agsGz2kinFWcTp9KzvtphrclNtjnbDojqWPBJ8Os79O5hn68Dwt156bSTMnaub" + 121 | "HjWY3CNl0Bwp/lvnIdT03zgjEjR9rHwG3wRmPW4Errdrl0bceAsQHDjaxMENM10uNyfdb0W3dGCs" + 122 | "s4RlE7aD+svDQtoAfb3U3ihjv1054MKGfCVGgvPpqOHl4krQvq417lMSPcmbx4UsbEOqJ9bHRTqs" + 123 | "KOls1dvHZhHCmoKzls60yFOl1uEgblOAHYXPyY42cEBDVRQqsq6VmLYdblFP/IoN3ajW0o3H/CSt" + 124 | "LHa1p9/OnD9NdyRNRLyjnGSUpYaryjZDuypneGG/0KSmRxTVhku1OreM5DWOkMI6JdhndlFc3N3A" + 125 | "DgiPy8xlf1Pt6J8pAlngQ41WsY3ZajO5pYFYsaxJcZc65W4ZPaRxYhxjBFkncjaa7Gjfth7h6lH5" + 126 | "HpV0S5nU+sMmxszf9Vebytcdxtf1f3VM1UD17WMjkiTQbXpe4psmPHQVudAGFRoRr16UrzLJVTvJ" + 127 | "RJVR8S1pXUwi/0RzpDRiO44OasuUqOo5IUwqpTMM8qAZtGITUCpWag+FJAQJL5YyTh6AkeoMWEdn" + 128 | "tgoV3Qmz7zHBm5pYTeIErFen+ngX8KzrV6aDOkKQa7ciwk1zOcY0hAb8ZDczeed/+9jlyeWoIiBe" + 129 | "M8WbawOzka4B+JBUaaAcUPiEIOu8Ol+oH4u6Naelt6vCViY4FvYZFVcu4/9rFi7u+ADpLS05nOpU" + 130 | "oxEeuqEr3kq9FaRAu/aeeoQF00ce2qHCpI6nQ6SS5CUWoP04vhXlm+/SX6NDYdeFFB07cmJ92tWR" + 131 | "FF1bZdnJ0BiuLwGAtsNE/0AVybOTpxF/TUqoVoccOtax6N0S7PzgMQKt3cIFrnIjAAtt9Jfmz4gN" + 132 | "s9iSlytsyYvXe91M3nwbZHjSBIQuTMneWlqsUsNASlYhiLcfJG1WujOWLPYtByBUPCjH5SHbg5rV" + 133 | "d5VAKiTM0tkS653/s1EN5JYb64YTWy9ZgkLPlHr2sSITNkT+M8R1Is2juUW20156V9mRQVqwUwkG" + 134 | "MfkambxZmQqgtKroTwebVIQt4b+ZUPGSVJ1HP/7NVW0BAh1A2ucFoH06Sus+UnJQ0gOcEk2jCqvL" + 135 | "NJfQMrVuAi7R35C03KdoL9G3MqUY9abT5Zq4NCKtOQC8BQCUk4YXnG64/u+J2g0WpsiffWzqDrb9" + 136 | "bT5ONDGUh6TtTgN6fPggF0PTO2R0zWCc5XUx4jH9OD1mobr1gsY4xHD9suVHhOBSjquw+wLQWXrE" + 137 | "JefeYN0GuTvo7XztP/vYvn4sSewa9LMx8sjS3KgRrsVQZBPrQl8Qywsx04rorCv72E4puZSrRwvc" + 138 | "jh7TlZJJV9oe5ktNcZaFJd31pAsuA35I0V2Gkmy6OiFik3i1VohoYPVsOEJ6iZGhH+E+NrEkg6t1" + 139 | "XBShjIY2or0PFdNdVsLwIfXQpY0ud6jr/I97m2TkPJo0y+wBxjQ45y5jsIYx6VZDohjETw8pFBu6" + 140 | "1+4QLRDR0vuQMbMRXb162WY+7f7Zx16NwDJnt+N5xsDcsLBy9xJny/1kveneZL+tPupYm2bJB1au" + 141 | "zx7bj4xGGXl8/pqW3v7ZSolZ7u/5v8jqjrUz8RU7lOZP8pM0zvQrCn4RnI8RY88g1v1seCWOI7m/" + 142 | "x1aCmlt9OhwGLpXWlFZuJu8YqspTMQ0rmBi7S/amosAHmxhMQf2KukEqw5SJYEIrIIPAUqaR4xO4" + 143 | "qhiV/9/HLlUTlVwg31vNiUnaYkw/6HxZQaSbcixnlaUorSmkx4wBl/7k/UWrdiTMWI3snCLG21xY" + 144 | "GDcIE8uLwHajEe+sN/vY6zplqkLe0OaehDzrUgg4BvvVpZZubIZCS0kkxXi1UZXmjBLefmKC8s3T" + 145 | "tpNT46e+fWwOl762bsbYiC8W5qBdo4ne1sLGsfHjrUUzL2myeDIjPb2AQtIbMu0C9GM6IRLeKokx" + 146 | "Nw6bfPhjiJbIRC0iDHB4HtuDOJmPg1XuY4PSZUKNv3Ie3Ka03AuaPIqdVIxbpb+fmsZu81mAvzUR" + 147 | "XPj6mqaisYaxXtG068ChnsVBH57ZNo7h1vHmBud7vP2odibzp36b1ER3xqqLE29ZWT0TsV+xKUDA" + 148 | "8ntM2cvbVJE2mXsgkwXKlTt2ANPUEyu11N2gjmrFqxwV7WDpyRRWf6nFc+VB+4HtasAikCzY4bBa" + 149 | "3i1t9ezSOohL3cFsX41JJr+0+kuynHy7Q1fFpAbKCcefnMQNXlq6VZv2xpea6A+vphqEttyleDjj" + 150 | "m761vzULVwOt9irP094Np6yLVJhNrymZa3d070vkpqetPVX6+fnyvi8vW203NSC+x3BKNEEmKo9k" + 151 | "pxSBgsAe0vfjNCmAO2keo9wyV8wrDiyTFrilWIwDb1c+qcfQn82AtfumMs/q1Ll+PKY6WgcR17ir" + 152 | "WWrR+IjnEua+lj6LgqUn4U6fmRyDc7R0IYzzVlu6lad/1EsZasMg9B4IPeMRwKU2yHAtUJ2foy2f" + 153 | "7eoxrfa/lqsLQOQHxW4cbVsr05vI35JtIN/QKERq5bbcS1m4jj8JJxt5TWkAyFigfqOLT5txO5rJ" + 154 | "OFHicGJLveGh281OWgcCFgX+5CoNmKKQ15WQmA1F841aXSbI3ilOjfIaw0yJHUo78dhkIl8XrR4J" + 155 | "NGcfAo2k8nMK79EfPdKDQQZc76vYNAtQd4PK22hTgZ2+XdPgsIiQm9J+H41/NzgQeUa+Y2Zf9SZS" + 156 | "B4qMCcHb9bdgveQjZgI8UkJc13queD7HVF+TjNtv1+Nllx55Saj/jB9RETHX8pM1X+uIr3zrT4Gt" + 157 | "3y7nvqfhdq6712+80fIMV50eNeJzdFyDiLn04qEMPGcVnXglAYCUE7jCTo8cP8u+0O8aLQ4ZPnoF" + 158 | "dldbhFnH5EaUDXbKiwocqbohqXygriSL5DhQHBdbTBf8xqLobHZVfiabKrZiOF1lDWV0HXEE1xRW" + 159 | "ysdY0LmJubVxUcU3KsuLles5RxBcqAgq1I1Ulijb6mMgvgzjMEPG0c9PM2OgFoNyTC9Ps0gs+bys" + 160 | "Sm48GzZOeHU3lEintLQmjOy5M1pwKmCce20UF1JyFcSDkJtMgCV8KfNZ2KSiko/hpt5mGHCNPGVc" + 161 | "l2/1HgLcVumyPOIOupKBmN9pnd9gsK2SXuArB4WBMItvaQfsV7NM3hNbJph0xCPbHhVuNZ0dXDcy" + 162 | "JJdxhiNbmOVSyXnTTsoA7oup9qpHgOrGUEByaZ/gIMFokgvcFW8K/phCu74ORq9uCfUTl7LiIhXi" + 163 | "apMIlVS3xy2KVBCb56a08oFtiTAxEQShIOUwOuaHdjZ6pPYZV3hPMZw0xbicTQjWGnZOjJKMRDCu" + 164 | "rTSarzhhrUqJz6yZglI7irv7+FA6uNZjMl+rSkJyt1X9LJY1ENvSDykHpdPCtDujZGJlnRTrlBiR" + 165 | "XbwSPr4JFy3NNOdNOln1RwWF0Yp1N3GS4WoxwJ+7/bFSGWappK+g75W6Y7f7LyokyMkTecVWoyey" + 166 | "9ViaALfo/H3uXl5LOrvZC40SxcUobYCOa7a4AcyNwdWybb0PEVQK6/6jc8HtPYUnhBseOSO/28kp" + 167 | "IPf/OENx1uW+MkHt9PBGFUpFCq8Wso6jTASX7Ntg4gZPSUsO5XvlvXCiYLwIIVtxONyRsDT2uO0J" + 168 | "xbU0RrBv1/VUTvy0XlwMuOKtILaLObCx/Sqamw0R3F2/mbRa2cwLeYuY1HpVC6S8wn9LHvqwh/H8" + 169 | "0/vKCmgez3jkXnmwiRvMQ2tfPpGkMkQkg4ZouMqHj1eerOuwONyRqfUxLJl7+a3jeoE4LvYobZiE" + 170 | "TNqS0OoEQIbm1eghrtPUzUFSRqmcSsLrcAowPHqSnBThiNoU/DGNdsqu4l5TayIarNvaGABsIYym" + 171 | "tA68ztq3uCh52qCPlR7WpQBNyYb99gaWtXAk4oB5kruChTMOrC2OMuxQRGX6td+0xiUr881YHhJ+" + 172 | "LXf96arYR5IdQ0d08OinMfu9FQCbFT5xNeqRBi0plaqPbzjF59Er1e0k6vIrxhgXqn8OXiMiyKCW" + 173 | "0ELXsxTL6aEKm5+kLeHm9aSLugOnOVsEyQpeN+Yfx/j17n54TPexQ04sfeP12op3XPK9z0HR7lz5" + 174 | "bcJPWpCOs7bIzx4SpLHrzxZgGXjd5zUOsqZ6ozhAJtRrmeKAoyPYwu97HvluXWn1HNVcEpR71u08" + 175 | "7RqDae1WxWvzXmDQ6BsRWt4ClWB8a3LREaXxQzClZ6ZwcRWomRm2TnZvvC43C2C9G4NPBhiPUssJ" + 176 | "o4rwlbXOzXSjzF6auPpmJcx22J5fAnjpHfyKTxBTymADyeonYB8iKgePJyxOt2PGlTEDOD6q774f" + 177 | "D0S4gl38h3YDyhRH2llasuQBTgn5ZcdzlQZo4TkVKLx2tHIwm8tUcDRW7+CqKQkjOYNDZZzIY1cG" + 178 | "EpWmkh6+g6RIKSIMbeQC2+y7kj2QeDwzajsWMcFSSyRpf1E05FnSe9fcE8NjjQvhmFoXLYN4fUMH" + 179 | "CARthaaBOOldmVQlS5gnvwDYcvaM+Zq87ybOhMkka5p5mYkl2Lr57XcyEFoaFaqYl4i2X+5IpbH4" + 180 | "VI6WzEC6A72LFzOyqZfkeDiqmi8JLGuLoHgyyMt0pKPuCYpYQbC2M0gQBC8VU6gkSZSerb2NBr+W" + 181 | "oGgkR0DDuB/i1evMlXE4CmzNvPfP1ho357ca9BI0UzqzsfU68fq/m9n9zL+oOIrlQz7jPWt8lZfw" + 182 | "6GiMtWks5NEf2LDrvPputs36GXaHq6qa8TOtKpGlL7FNk45tCtVL6d00nxYToPyLJnKSGJGh3BqR" + 183 | "ld5c4/tUWvD9SVC2jpwvKE73c17YUSGIeMG43yTDGrgAnHBLGTMxVux+CScj0QU9JzBKyqepcF8L" + 184 | "ot7pXy0zBNvuMe14yxqro75/lP1h27x3qGi5Aet9kwntnkc1EuZxQaK5WmFI4p6Q24AXLlv1y041" + 185 | "WNq/XJnhvj0q90oeS6s1Hh36OGP6I87rhFd8FjNGfaPBmx/3fEC4HllE4BZ0zk5cTTjWUHWVSAKU" + 186 | "lKKIYC6h+LFRqIZG65eoYR9q0jIZplljslYEDFIjSQeDgQVReqI7xqt+DB9LH+aKt037xrmnOwAs" + 187 | "m4P4Gl2xNbU7XusCMa2s/5OsxpWmajay3Xiad/xkANdfjhk3XfFKeS7ifBF6dR03hjhUjf9agPkU" + 188 | "baWvOFG/EsXCTpexpltuBLq08+PsrV/0vIljE+z/OJHgVs47BmxTsoyoCg9rfUE3g3BtGFzbH0jn" + 189 | "jsl+GRiqgmBlb719bPToxw4rphdteEU/pTlwJUg4OtmNgBJpj2/aBbcKYLAmeDclLJeTyT/+v4J7" + 190 | "hnOvhtCIDxgKz0EP38ONQTiSsxqzAj2u85TPrmzh6ipyY0OCZVl8gr/u/Yqqwi2Dn4S8zZQ+MMSB" + 191 | "xrHruDFQyXK7dPQkwFvkTDzITL82jPjeeWTXhStFrfnhShUh8DwG+gfCFMKIs/rScXoeQF23/tzE" + 192 | "xN9zdi6ylQEXPwTPuPgmgXWw3IreRywy3tNqVk3kprAflzOZPw2NPkx9wBSAMEBqFLq7WevQNy4N" + 193 | "gmQqfxIhYHUUHKmg7EARbjvW8Oq4Jtnqqsqm/ID6Wv8z4zcC9c8+ttBYWT6arGSw42aaYLCgyQRq" + 194 | "ydBH7MBdcshI8k1FVxKUbMEL9kgoCYrdQzbDjxNEaErLYEwtezfPqcqsbTFNm7fApoo51+Lq6Di2" + 195 | "ABrx5ShhFUu/XQ6+afD9s30eIrupFUBu9rW4YVmxO9LPnsLZYkpAKTLB9EuH/erOhNEPKc2aFkGr" + 196 | "9NOMfQXbuHvOZ1IYFjf8b+cZg/4B3ayg9b8bqOLsrV6jrtYTXlUWJM/b7gBBcjsp2lY2A7feeDAV" + 197 | "3dR4yRpHulwZNgJzmKBnAMO8uhuJsY9TwbZKDq0Mc+6slmXJCYqUJekVo81UEozHVbsioZnvhnig" + 198 | "ewuocNYTTUe4XPi7dzuSinpn2R69/ozSWX5eMucvvR5fT9KAoIk6k+IwfXwA7AXwgQEycmgQC4M9" + 199 | "u7qMNsk4Vb2dhB/9TrMn86AhRiCcJ6k/BxdUhWYqUEe5GW1E0XNhyWaV1S3ZIO+c2KN05L0EzaCq" + 200 | "gRUyl1xv5rlxmWPAReti8ZegQ50r53Wsad5Sz674hsfDm1XHwEIuxyx5yqkk+nJ5UysEXqt8y31s" + 201 | "EuDST0MQkC50u2MUAjgesraUBgDcDoFte3Flcnqg3T6iFPejFCGWemluDpY0spCGkW/jAzL8OPv+" + 202 | "882hyF4KtzJsarJyVY9x1Lgw6ewvE7Iv/mlf6zl/lM03mEqnn29m84kgtY+CTH1+ydOh8Vjnc6Pq" + 203 | "mQJKKjzLuCe4YHZQ53dYTGFCdYRjSUPHNKUMR/tNTfBnVVmjnhUWveCJDS2hixtO9cPW8vBiAcIN" + 204 | "UcD6JdQPXhafGV2UhNAdq5ywEtBrICxaEZ1+XMeEEknIwL6u2DwFL0vlAHdbkilqX6uldHTCuY2W" + 205 | "9CdxlON8t/pkyS8qIj8l3eOKl9s2ClUYB3s94psGSio9RTPo7zbc+cNkbyQ64zfRrlID9hD7/86A" + 206 | "Ixu9PZkd/Hkzcxgq+E4hRvscsixn6lcQ+PAhjL+vMMag7Um9zSdYn96tS2ap496HNKlQjoROMpbW" + 207 | "8XhlV8Y2HU3Yj+x5ynwAkcdOVfeThTxk+mPjfOmOaV8SEq/KgSq/MtXWFdUzPuR/KcVkI8Ba1wfX" + 208 | "wcNob+hfckskqDFOhH+Iu26pFunzIBCRZZrFReNIY6RIdDd2TOu+DhkA8hW2RwfI87qcczGp7oe6" + 209 | "Wn6tZ8wX1uN/TOo4TtRpfVsdDUp19qBEdHs0LGe0vlXG77mg+rBe3rArXo7oY5njuC64mkqTbZeG" + 210 | "MjFYvp8sSMXhqBjMqzV9j57VEmDw8F1DC/NTucEgJhziI74Er/wp1qt/rAXVT3GOE89oJzbCTLYp" + 211 | "9GrxKZfQ5So3u92r0jswdW7U427Kj9pYRJb3FXC+eik9UQvZUEloJyN33HcyZWd4KeZIf1UOplFH" + 212 | "9cyBbF4nMRowSuqdwX5qRDEOUhK0tfpPVbN+poQc4VzjTi54bWPhKHBUIWOMFHQoecuRbiBCiuYf" + 213 | "ot7jAlBoOlu0mv34OXwjJfwzYSSP90MESiSWg637jyO1BkQxQcpB5BGO9r7cZ75NlnIzlJ7DxSAt" + 214 | "0XCO+9A4ssPSdjIP2wzb2p3UzjanbhfOsYmKDeOp6WCU4K/bEEtGC954g5HEhFvluAqSBnNvJ1zH" + 215 | "sK8D0/2p/URa80210TbVOiFlWFdADPEoiDHwPdpL3gELxAWjhKRO84cFOow/xUnmANmmx0BNVjj2" + 216 | "z0TS4uNSFDu53qiUfTo936bBocSbFqBafrWJdWBLTNJnpsLC6cN7pPUbAPoxLd66zHfQBH1xAyiY" + 217 | "R0X5HnLwEpCvC9+HlTIo9dFhX0eCWpWlGEkIz7v6sSsbQfBhbzCpumJFbMS2XGu5QMSJn5DN2KwX" + 218 | "QxxGz9KtHSuq404yLvXzZenj5RDZxIx3nNktYAY2Idbc28fmzojoV/d13lGPCoImMs3ROoqmmmC8" + 219 | "wTsCFsNZoH7ELdq4fTykIu1pCIhlFE2/bt3plxbWG0zKfsGtojiTFYSFxttz8icE4s6yjx36hqmv" + 220 | "H4tfmk9OuZUJqq6c7J8AID2z8xSwQU5YhxdHG+Nxd1kRUr525AFriWfr67tWd6ybcXhc0AMX+Q9i" + 221 | "QiFLxj37uYcn4gUG4WQ9QrvFrFVJDyFdHbZPRejBMISedIS6tEMtx7owdNA5cILbY+64bcxVList" + 222 | "zpVH1W8rAhXHZ5540jywjXdH91Vz6Q9zUzANjoHvJHX2okuoHz0n97EZIUe9hBjqB8Mm1gimDsdi" + 223 | "b3A1gmpZ/Pd5G26HvyflDmTJmFVqKsc1LQiHNrdya4o1gRFLFO15WT0F/bX5bhFogmxvRG4peMaB" + 224 | "CP/VJYH8jJwVjhPlKKGXBpcff2RL5Xrk5mfjgvF4bdDdMHSpkMt87xF3wjVw3tSrPIDbiP0n5CkD" + 225 | "xnX7ef8KO+Bxx208fClNiTG0MZGrqMV1kzAl84cNgzJamIBVW3HMQH2OSfM4YWTiIJLXt9n1UEvV" + 226 | "c6TqOUxVsdalrZh23iIOmyZOYxQnM/FAYzYFAwxUhXshIA6xdrSZWAdvEVIbZ8YlHBz1CpPUn3mW" + 227 | "0IZDtANxw1gxzLOZzDoLf9ZJTVSPdwNMBvqx1BYahxp0wnppw4Yp1Ub8r7sqJoDt8GwVhKGGnkHp" + 228 | "SGeLGip31af7Lx/YUUuV9Hozgpx0bIiQ5ZR6ZDp3ohVKcTgM0e9AiqTUp2zDVZgChDsmVb2mbOUK" + 229 | "bKI7htq4GhGihgWOeVDSf11OpD+cmEtMHrFjl4mJFT9EmztODLrfFq/sjctYgAP0uttyaxsyS52c" + 230 | "4vn4w04M9b5qsh92+L5HLFtc9E64N21shUA9LIOfSBVW9+Mmg/PFGU6VO+u0JFyhBKZ9YyqrUgT7" + 231 | "vnblDslzGTaS/+l0rcS1mGXXACkDiH/reWKfMVb2BHa61gRTqsynFHVsjN5AeoySShm8TMtZ/rgW" + 232 | "lOvEuLFIgDX1Gckvyruyw8i88OuD2r8KILFnGIxal9JJegH84cNBOWDK5VFutrlUG/aF+Ia6K2Lp" + 233 | "6rgnpfGn6h6lzXPygH+PmVLOmXFs7pytPto4QC1dsAoYYTr3h/T+p3mD2HfGDeJViStGr2XT25+b" + 234 | "s9aKduSbvipHjZrCqVLtxIMkqaMfHWDMqRC1/sgtI3DnD1mWhEBroyrfQIcBww6DbmqxdeeP8khA" + 235 | "zUVP0vkW0rzCMaMY1NfinGOZWq8jMxmpciUKmzWR7aQ/D46FsXUL2fkYzthWI2fIviSadqImrKuy" + 236 | "YEpDEeNFgt+q4ixomSdC9Wo1YNSZEe0lm0GlpN1bU7TKqCAgVRF1zMGJhxLCvtplG5kN3Ota4EtA" + 237 | "Ysu3gyfoEQIq7RNjnTF1xVUdR4N3KJWLHLCR8kbAKgYFebXqVwN0pDpUdvlv8kluT/Qm+0TNTsVZ" + 238 | "ydncXzWfRcVJLAW4bpVXsb7C8GxHrqjAp3rK4waOjiBieP3mzcbple3V41YQyzZ5BkHtgI0Rv6ni" + 239 | "zw0UEd+ErBcYru2p3kiSKaJL9fVLOy/5uSqbXuFKuj/eaEj2pIutuTAg/WvYG6eT0H7zxeZ6RgZ0" + 240 | "IM8+KvJyc4CkN5AD+N4r4HS2wlYLdx/eqq27Y+ePh+4MLHMZ5nnDw405J2aW6fXnc25qcVVBzJ+Y" + 241 | "a8EsbahuVXErphvjBqqC1kB7GC4h1Qxr2lwG7VcATYtvCP5XQ8UWGggYjgJHFtwQLlLq1VznPGEd" + 242 | "E0N2N9xg8qdSPYKxntg5KuNOJieoxdSo9mvMkdzZx6aVx0878teNYz3Zxy7jvz/JiSw5spjLXMxe" + 243 | "janY1azs6K244jPj1tF++9js/+ihlmlNP5Wesdmdjz4bGbqCNgDiCRGmnbEJglgqB5Qzf04fFruP" + 244 | "/CxuIxqMgbYds+vt1PDx0TQdwr/B+vrmlPP8JX4SvWNdXq1Kitm4ZSPqPWnG6v65UFmxTlrV50SV" + 245 | "37cfATtk7BgJwnwDsUQNiVLL3LVfmylfGZaKqWdawP3ppmyv1mqACxG1GdQsmfRG4qsJVBPUVbIQ" + 246 | "EmCsbvT7fSo0K0tqdl6yjXDSFW3ZqAdSWqNgQ5w+eriytTOWXcnKkDNEdRNNvaE7AFw5ID5GNpoK" + 247 | "oypn8InBpkofbNmRPVazw4e7vIcwfqIoN2wEhG4WDY/UcPdPuK4dsyrXV4z9B1nL7UCkhHYIWzvG" + 248 | "7YTMPbGHxdWPNDoflwyDIikp1PekuNx8aF6nuVx4C+Kknud1aOxaQLYh5Qmjjnvqej6LbP/sY9fb" + 249 | "oIqCZ1ds3Qr91Dis6W1+oUUdT6Y2W//bx1YUOt1V2KMQ/bMQZI7fGsSJVIJsGQCJ9JNJ3GOMeaKw" + 250 | "7sS2u8zWcgaK4aVtOomAFsB5DBIfYSJzX7K3EjeSu3JbQhVMVvotSLY2kwKofXpmSJTRvCKruAY7" + 251 | "fxb5JCnHhSMl+I2tGCVXsd59gg3OGKz+9rFn/4hjHODKZJG8asxkcLqDwA4po+2bs6XfvnHduX5o" + 252 | "4OS7DxX7YJUL3klPpZq9nXqKf5PNsBhn5DRxTV02Ih9YKUfpNXChoudmcayk9vxXQJhIk32Ct0Jy" + 253 | "KQe7Fbj4e/zy2uIVYMY5f2luyXLiIIn7u1/aBCsqbVgkJA6GTmRhvkEBSgsUzMsmwqSgwsVUK/Wu" + 254 | "Fyb5nEUQMfEiU5WqKMiNnG9SrM4XCKU8h3OdMsR95A6RZJyuoOEi0LMIfY26O51zSp5K5DrkhAFf" + 255 | "55Oo7Tbel0q65Dv5PkDLPifBd6pnZckl3iq+4d+KXzwYRcWd+HOm42ppPrkKJ41i5BpYuByBbYbK" + 256 | "ptrkYfyYD38nv0+LamL2N+HB90omA0qTUbSzcAVGF00zIbHfCV8eQix4wIhiZhPxYB0VfB3OPv6l" + 257 | "itcDsy4mOoaNeiTHs0rE1hXyjZkKG0LxVN/nBOheEVNwoqOzW3LGYkcQhi5hq9BpMEzbfGX4q7YZ" + 258 | "3242LcBsX2zLlrrLxNXAL0oczZZsqUkfxSbWFMG2JG+TP4UeWGQFCjMzAdZ2C9P0US8vWVkWUDgQ" + 259 | "qeq6j31//Wg/3q67XGtrTLw0C6VNbWJ9rv5MhExXvhUw1fnisjYF2mLdkSzTKVD4xoIh0ErP7x6/" + 260 | "7UO1imbFSzUqdu87f6Idbr81xMd5DTdfhe2YlOdEE1YoaxTzbXE0fKD0Z0xTuACWMqtdfIlHrnki" + 261 | "J15V1v/2sTvuW79Fz4pr4wGrnWjR6CxrpJDGUgqdG7naPo4+TCm+JU5QkWSgcluAEJXyuckA8caK" + 262 | "ew5H3jLa9T6aO84IrafDYEEyTf8LQNAQYxfF1s1RzPwxQuwXlIwdVpm1PNX6QhrNarxP3fFzAkxe" + 263 | "ayS5xiKjlGtxFSeqLU207nyNE/nXEl+jVM8auvXXVCDL63heAenC82MEJd0jSlZTEQPyDZ1KTuc+" + 264 | "9oYbsY/tvwMHLSNw7QwsD667n4b7I8vMnMI72PxNpWqpfLOP3XEVjcJQrBW73tO743LXQQ2HEw8g" + 265 | "tExApodXKFNefS/OoPl0GrPIwmR4JAWKYOZaO/dh71yI3pgioJRMOv3ztF1FU+4Ti00CknQwaqJe" + 266 | "4SDlWBohpys2cPb7Ruk+rYJZJpEb97HLsQHT55j+kjJvfgJp2IdrxlgTvI1zEgVqdF3z0LC8fBz+" + 267 | "1jXxVLwcPzMsyr/t5shiJsz8OU0hxZME9Y4jiuebToz3OuYGFabEukGuYLOPHTIBb6vKZodzrQKp" + 268 | "xj8YR/rbyjvO3UyLYD+6rNc/4EPe77UUoqUEA1PSYRIEuM97m/ELm97ifdD2nSmXaN2Ji9Ha8MtE" + 269 | "iCagVWI2jCg0m5SKE7DmTDbWs+F/9mONbx8bZhHkfm4LmqLd+MiOIU88fofTLCT2OkYqpwNcCS4K" + 270 | "wNF5insltZ236gTgIG3EvtzK53Ry1k+Ir2ugyCk5U4Jw+pH73VjvDxTHh+0xSlkUmeYY1s9aiwS5" + 271 | "AnJUbWyJ7U/5fU/JrApnp5/5Uxq9M7FNeY3nDCCYn4XqGNlmy/jc8c1UhqOsaZ2n1vLAVR+J8YYI" + 272 | "+Cdw3DFJHq7ckSz8sZlrzqEyDoeVEEBvpdEL9YDQUnod00QHdILBEKXFVI5VApetNjnRRnpDI0/r" + 273 | "lIWombWoC3KHOKkjoELBbeHV6y93fUQ4XdpSE5ezTYyLhxEsZKty2HpEa2HU6a1gCE1O/w4ui1GD" + 274 | "W9hmVjrETEnwNQCmMdduAJmW3SSVWuoM8dBiMcpVJghKtYrV1ErHAFF22N1Z7Gu6XbaktbKQzuAM" + 275 | "CK+AbuUYuMtURTL66aae56EZg1lg4MDPpX21KYyaj/nunyAvC3hQiftcKFovSauE+aEd35V1YQzx" + 276 | "d9iMhHoH6fYLXptVyzsPs2Hnj4OypW0wYG28v8rd3L0c6xmcBwl2b5YtZKYtnJT5xIYA9eMDIOa4" + 277 | "OfVnHzsOzBpmIRGwREsDZjVgypkxPCOWgas8kX5vVimW9mXAYjJocqWW99ULYBrpibrLDvEoG0Pb" + 278 | "VltE2w2mjuvdFUn2LQsVXk5XFkkfuwKs5EQxLFSWqqyRm2p5L96VJg3RYDJ72c6M3wqE6U2mSZLG" + 279 | "ElPC8kdNNbrBnma7EYm2JKmY4PpIVUUrGuYMvjiUk36pzS+ssvrtY7NvYoAJcAyaGInAZpBxPETS" + 280 | "+NLBTmVgJ7rxIvinw3iz+BibfWwoUrHioMfgfyUYJLk77gkFvaqm19nrPjbEbMsrVpfVn00x6NY+" + 281 | "16Wyj/1FC00P4v/QXfh+BAERW3QAdEopjiMy1hwYv1g0ujmlufahP8KajLRkBDVR3rY/HmR9e5t+" + 282 | "DLwD9VH2sbVj+ZhsVKmTJ6KGPPlt81CKPCa2SVjW6Ongha2NMP5sXLpTGR119Txpn4eeG08lzs3K" + 283 | "S7yVaHgm7z2thKskZD47V8lLhBjIz+8xzIv9C+QXKo8pVzCD8vHYSr0gVyjfOll3xaAGLEaJc6gG" + 284 | "vFd2j+rFS3wCUhrgojqeo19Edk9Vpr2mJ8pZEyo8u6dsmLql1S8wmBc7LcAMhEaCHKPbUbV6AxUO" + 285 | "61Sox+oAJewM4aYVN8ZOd55tAEVoo2ZzsgHZGt6Tn7e20c4fMYWmh5IA3pCtTlPrlupuXa12vmGK" + 286 | "if6HbT9HSmaGcsBdQkOQ70POyuLRvVyhRpnwx8VQZFa5PBLuxWm3iDh/UhBMCq3HdR876ViVHFn7" + 287 | "5TXyypjAeKesihjAk7FiZPMrb7MyCpKsAogbYxjmb7IWFsr2U/uOMb0QXuxPiuAVANyVec2fldHJ" + 288 | "KCXYaADG3Nr08y5N5kRgYrThG2Cq4uEtYLdQdTdQdLmjDUTRFnBHf5jfVlmZFaFZ4w47WlqJw6dY" + 289 | "Nnx7XchSY7leko7SBsVQ7ZAKttQOySP6e2WOv3/9B2z4DPRQwwAA"; 290 | try { 291 | DNA = new LineNumberReader( 292 | new InputStreamReader( 293 | new GZIPInputStream( 294 | new ByteArrayInputStream( 295 | new BASE64Decoder().decodeBuffer(z))))).readLine().toLowerCase(); 296 | } catch (IOException e) { 297 | throw new AssertionError(); 298 | } 299 | } 300 | 301 | @Test 302 | public void test007() { 303 | PatternSet pat = RegexCompiler.compile("007", "008"); 304 | IndexedString s1 = pat.match("as00haklsdjhfla00"); 305 | IndexedString s2 = pat.match("7jhd7dsh007dsfa"); 306 | System.out.println(s1.append(s2).getMatches()); 307 | } 308 | 309 | @Test 310 | public void testOverlappingMatches() { 311 | PatternSet pat = RegexCompiler.compile("abra", "braha"); 312 | IndexedString s = pat.match("habrahabr"); 313 | System.out.println(s.getMatches()); 314 | } 315 | 316 | @Test 317 | public void testDNA() throws IOException { 318 | String[] regexes = new String[]{ 319 | "[cgt]gggtaaa|tttaccc[acg]", 320 | "a[act]ggtaaa|tttacc[agt]t", 321 | "ag[act]gtaaa|tttac[agt]ct", 322 | "agg[act]taaa|ttta[agt]cct", 323 | "aggg[acg]aaa|ttt[cgt]ccct", 324 | "agggt[cgt]aa|tt[acg]accct", 325 | "agggta[cgt]a|t[acg]taccct", 326 | "agggtaa[cgt]|[acg]ttaccct", 327 | }; 328 | 329 | PatternSet pat = RegexCompiler.compile(regexes); 330 | 331 | IndexedString idna = pat.match(DNA); 332 | 333 | int[] expectedFreq = new int[regexes.length]; 334 | int[] actualFreq = new int[regexes.length]; 335 | 336 | for (Match m : idna.getMatches()) { 337 | ++actualFreq[m.whichPattern()]; 338 | } 339 | 340 | for (int i = 0; i < regexes.length; i++) { 341 | String regex = regexes[i]; 342 | Matcher m = Pattern.compile(regex).matcher(DNA); 343 | for (int s = 0; m.find(s); s = m.start() + 1) { 344 | ++expectedFreq[i]; 345 | } 346 | } 347 | 348 | for (int i = 0; i < regexes.length; ++i) { 349 | assertEquals(expectedFreq[i], actualFreq[i]); 350 | } 351 | } 352 | 353 | @Test 354 | @Ignore("Not really a test") 355 | public void testPerformance() { 356 | String[] regexes = new String[]{ 357 | "[cgt]gggtaaa|tttaccc[acg]", 358 | "a[act]ggtaaa|tttacc[agt]t", 359 | "ag[act]gtaaa|tttac[agt]ct", 360 | "agg[act]taaa|ttta[agt]cct", 361 | "aggg[acg]aaa|ttt[cgt]ccct", 362 | "agggt[cgt]aa|tt[acg]accct", 363 | "agggta[cgt]a|t[acg]taccct", 364 | "agggtaa[cgt]|[acg]ttaccct", 365 | }; 366 | 367 | PatternSet pat = RegexCompiler.compile(regexes); 368 | Pattern[] pats = new Pattern[regexes.length]; 369 | for(int i = 0; i < regexes.length; ++i) { 370 | pats[i] = Pattern.compile(regexes[i]); 371 | } 372 | 373 | for(int n = 1; n < 50; ++n) { 374 | StringBuilder sb = new StringBuilder(); 375 | for(int i = 0; i < n; ++i) { 376 | sb.append(DNA); 377 | } 378 | String dna = sb.toString(); 379 | 380 | int[] expectedFreq = new int[regexes.length]; 381 | int[] actualFreq = new int[regexes.length]; 382 | 383 | long t0 = System.nanoTime(); 384 | IndexedString idna = pat.match(dna); 385 | long ireIndexingTimeMs = (System.nanoTime() - t0)/1000000L; 386 | t0 = System.nanoTime(); 387 | for(Match m : idna.getMatches()) { 388 | ++actualFreq[m.whichPattern()]; 389 | } 390 | long ireMatchingTimeMs = (System.nanoTime() - t0)/1000000L; 391 | 392 | t0 = System.nanoTime(); 393 | for (int i = 0; i < pats.length; i++) { 394 | Pattern p = pats[i]; 395 | Matcher m = p.matcher(dna); 396 | for (int s = 0; m.find(s); s = m.start() + 1) { 397 | ++expectedFreq[i]; 398 | } 399 | } 400 | long javaMatchingTimeMs = (System.nanoTime() - t0)/1000000L; 401 | 402 | System.out.println( 403 | n + " " + ireIndexingTimeMs + " " + ireMatchingTimeMs + " " + 404 | (ireIndexingTimeMs + ireMatchingTimeMs) + " " + javaMatchingTimeMs); 405 | } 406 | } 407 | } 408 | -------------------------------------------------------------------------------- /src/test/java/org/jkff/ire/LinearISTest.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire; 2 | 3 | import org.jkff.ire.fa.BiDFA; 4 | import org.jkff.ire.fa.IntState; 5 | import org.jkff.ire.fa.PowerIntState; 6 | import org.junit.Test; 7 | 8 | import java.util.List; 9 | 10 | import static org.jkff.ire.util.CollectionFactory.newArrayList; 11 | import static org.junit.Assert.assertEquals; 12 | 13 | /** 14 | * Created on: 31.07.2010 12:16:56 15 | */ 16 | public class LinearISTest { 17 | @Test 18 | public void testABConDFA() { 19 | DFABuilder forward = new DFABuilder(4, 0, 1); 20 | forward.state(0).transitions('a', 1, null, 0); 21 | forward.state(1).transitions('b', 2, null, 0); 22 | forward.state(2).transitions('c', 3, null, 0); 23 | forward.state(3, 0).transitions(null, 3); 24 | 25 | DFABuilder backward = new DFABuilder(4, 0, 1); 26 | backward.state(0).transitions('c', 1, null, 0); 27 | backward.state(1).transitions('b', 2, null, 0); 28 | backward.state(2).transitions('a', 3, null, 0); 29 | backward.state(3, 0).transitions(null, 3); 30 | 31 | BiDFA bidfa = new BiDFA(forward.build(), backward.build()); 32 | LinearIS is = new LinearIS("xxxcabccccc", bidfa); 33 | 34 | List matches = newArrayList(); 35 | for(Match m : is.getMatches()) { 36 | matches.add(m); 37 | } 38 | assertEquals(1, matches.size()); 39 | assertEquals(3, matches.get(0).length()); 40 | assertEquals(0, matches.get(0).whichPattern()); 41 | assertEquals(4, matches.get(0).startPos()); 42 | } 43 | 44 | @Test 45 | public void testABConNFA() { 46 | NFABuilder forward = new NFABuilder(4, 0, 1); 47 | forward.state(0).transitions('a', 1, null, 0); 48 | forward.state(1).transitions('b', 2, null, 0); 49 | forward.state(2).transitions('c', 3, null, 0); 50 | forward.state(3, 0).transitions(null, 3); 51 | 52 | NFABuilder backward = new NFABuilder(4, 0, 1); 53 | backward.state(0).transitions('c', 1, null, 0); 54 | backward.state(1).transitions('b', 2, null, 0); 55 | backward.state(2).transitions('a', 3, null, 0); 56 | backward.state(3, 0).transitions(null, 3); 57 | 58 | BiDFA bidfa = new BiDFA( 59 | forward.build(), backward.build()); 60 | LinearIS is = new LinearIS("xxxcabccccc", bidfa); 61 | 62 | List matches = newArrayList(); 63 | for(Match m : is.getMatches()) { 64 | matches.add(m); 65 | } 66 | assertEquals(1, matches.size()); 67 | assertEquals(3, matches.get(0).length()); 68 | assertEquals(0, matches.get(0).whichPattern()); 69 | assertEquals(4, matches.get(0).startPos()); 70 | } 71 | 72 | @Test 73 | public void testABorAConNFA() { 74 | NFABuilder forward = new NFABuilder(5, 0, 2); 75 | forward.state(0).transitions('a', 1, 'a', 3, null, 0); 76 | forward.state(1).transitions('b', 2, null, 0); 77 | forward.state(2, 0).transitions(null, 2); 78 | forward.state(3).transitions('c', 4, null, 0); 79 | forward.state(4, 1).transitions(null, 4); 80 | 81 | NFABuilder backward = new NFABuilder(5, 0, 2); 82 | backward.state(0).transitions('b', 1, 'c', 3, null, 0); 83 | backward.state(1).transitions('a', 2, null, 0); 84 | backward.state(2, 0).transitions(null, 2); 85 | backward.state(3).transitions('a', 4, null, 0); 86 | backward.state(4, 1).transitions(null, 4); 87 | 88 | BiDFA bidfa = new BiDFA( 89 | forward.build(), backward.build()); 90 | LinearIS is = new LinearIS("xxxcabcacccc", bidfa); 91 | 92 | List matches = newArrayList(); 93 | for(Match m : is.getMatches()) { 94 | matches.add(m); 95 | } 96 | assertEquals(2, matches.size()); 97 | assertEquals(2, matches.get(0).length()); 98 | assertEquals(0, matches.get(0).whichPattern()); 99 | assertEquals(4, matches.get(0).startPos()); 100 | 101 | assertEquals(2, matches.get(1).length()); 102 | assertEquals(1, matches.get(1).whichPattern()); 103 | assertEquals(7, matches.get(1).startPos()); 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /src/test/java/org/jkff/ire/NFABuilder.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire; 2 | 3 | import org.jkff.ire.fa.*; 4 | import org.jkff.ire.util.WrappedBitSet; 5 | 6 | import java.util.List; 7 | 8 | import static org.jkff.ire.util.CollectionFactory.newArrayList; 9 | 10 | /** 11 | * Created on: 01.08.2010 13:47:21 12 | */ 13 | public class NFABuilder { 14 | private IntState[] basisStates; 15 | private int numPatterns; 16 | private int initialState; 17 | private List transitions = newArrayList(); 18 | 19 | public NFABuilder(int numBasisStates, int initialState, int numPatterns) { 20 | this.basisStates = new IntState[numBasisStates]; 21 | this.initialState = initialState; 22 | this.numPatterns = numPatterns; 23 | } 24 | 25 | public StateBuilder state(int i, int... termPatterns) { 26 | return new StateBuilder(i, termPatterns); 27 | } 28 | 29 | public DFA build() { 30 | final WrappedBitSet[][] char2state2next = new WrappedBitSet[256][basisStates.length]; 31 | for(int i = 0; i < 256; ++i) 32 | for(int j = 0; j < basisStates.length; ++j) 33 | char2state2next[i][j] = new WrappedBitSet(basisStates.length); 34 | 35 | for(Transition t : transitions) { 36 | if(t.c != null) { 37 | char2state2next[t.c][t.from].set(t.to); 38 | } 39 | } 40 | for(Transition t : transitions) { 41 | if(t.c == null) { 42 | for(int i = 0; i < 256; ++i) 43 | if(char2state2next[i][t.from].isEmpty()) 44 | char2state2next[i][t.from].set(t.to); 45 | } 46 | } 47 | TransferTable transfer = new TransferTable() { 48 | public TransferFunction forToken(Character token) { 49 | return new PowerIntTable(char2state2next[token]); 50 | } 51 | }; 52 | 53 | final WrappedBitSet justInitial = new WrappedBitSet(basisStates.length); 54 | justInitial.set(initialState); 55 | return new DFA(transfer, 56 | new PowerIntState(basisStates, justInitial), PowerIntTable.REDUCER) 57 | { 58 | @Override 59 | public PowerIntState resetTerminatedPattern(PowerIntState state, int pattern) { 60 | WrappedBitSet reset = new WrappedBitSet(basisStates.length); 61 | reset.or(state.getSubset()); 62 | for(int substate = reset.nextSetBit(0); substate != -1; substate = reset.nextSetBit(substate + 1)) { 63 | if(basisStates[substate].getTerminatedPatterns().get(pattern)) { 64 | reset.clear(substate); 65 | } 66 | } 67 | reset.or(justInitial); 68 | return new PowerIntState(basisStates, reset); 69 | } 70 | }; 71 | } 72 | 73 | public class StateBuilder { 74 | private int state; 75 | private int[] termPatterns; 76 | 77 | public StateBuilder(int state, int... termPatterns) { 78 | this.state = state; 79 | this.termPatterns = termPatterns; 80 | } 81 | 82 | public void transitions(Object... char2state) { 83 | for(int i = 0; i < char2state.length; i += 2) { 84 | transitions.add(new Transition(this.state, (Character)char2state[i], (Integer)char2state[i+1])); 85 | } 86 | WrappedBitSet t = new WrappedBitSet(numPatterns); 87 | for(int tp : termPatterns) 88 | t.set(tp); 89 | basisStates[state] = new IntState(state, t); 90 | } 91 | } 92 | 93 | private static class Transition { 94 | int from; 95 | Character c; 96 | int to; 97 | 98 | private Transition(int from, Character c, int to) { 99 | this.from = from; 100 | this.c = c; 101 | this.to = to; 102 | } 103 | } 104 | } -------------------------------------------------------------------------------- /src/test/java/org/jkff/ire/regex/RegexCompilerTest.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.regex; 2 | 3 | import org.jkff.ire.fa.DFA; 4 | import org.jkff.ire.fa.PowerIntState; 5 | import org.jkff.ire.util.WrappedBitSet; 6 | import org.junit.Test; 7 | 8 | import static org.jkff.ire.regex.RegexCompiler.*; 9 | import static org.junit.Assert.*; 10 | 11 | /** 12 | * Created on: 04.09.2010 11:42:12 13 | */ 14 | public class RegexCompilerTest { 15 | @Test 16 | public void testEmpty() { 17 | DFA dfa = toDFA(toNFA(new Labeled(new Empty(), 0)), 1); 18 | assertTerminatesPatterns(dfa, "", true); 19 | assertTerminatesPatterns(dfa, "a", false); 20 | } 21 | 22 | @Test 23 | public void testAnyChar() { 24 | DFA dfa = toDFA(toNFA(new Labeled(CharacterClass.ANY_CHAR, 0)), 1); 25 | assertTerminatesPatterns(dfa, "", false); 26 | assertTerminatesPatterns(dfa, "a", true); 27 | assertTerminatesPatterns(dfa, "ab", false); 28 | } 29 | 30 | @Test 31 | public void testAB() { 32 | DFA dfa = toDFA(toNFA(new Labeled( 33 | new Sequence(CharacterClass.oneOf("a"), CharacterClass.oneOf("b")), 0)), 1); 34 | assertTerminatesPatterns(dfa, "", false); 35 | assertTerminatesPatterns(dfa, "a", false); 36 | assertTerminatesPatterns(dfa, "ab", true); 37 | assertTerminatesPatterns(dfa, "abc", false); 38 | } 39 | 40 | @Test 41 | public void testAorB() { 42 | DFA dfa = toDFA(toNFA(new Labeled( 43 | new Alternative(CharacterClass.oneOf("a"), CharacterClass.oneOf("b")), 0)), 1); 44 | assertTerminatesPatterns(dfa, "", false); 45 | assertTerminatesPatterns(dfa, "a", true); 46 | assertTerminatesPatterns(dfa, "b", true); 47 | assertTerminatesPatterns(dfa, "c", false); 48 | assertTerminatesPatterns(dfa, "ab", false); 49 | assertTerminatesPatterns(dfa, "ca", false); 50 | } 51 | 52 | @Test 53 | public void testAorMore() { 54 | DFA dfa = toDFA(toNFA(new Labeled( 55 | new OnceOrMore(CharacterClass.oneOf("a")), 0)), 1); 56 | assertTerminatesPatterns(dfa, "", false); 57 | assertTerminatesPatterns(dfa, "a", true); 58 | assertTerminatesPatterns(dfa, "aa", true); 59 | assertTerminatesPatterns(dfa, "aaa", true); 60 | assertTerminatesPatterns(dfa, "b", false); 61 | assertTerminatesPatterns(dfa, "ba", false); 62 | assertTerminatesPatterns(dfa, "ab", false); 63 | } 64 | 65 | @Test 66 | public void testABorMore() { 67 | DFA dfa = toDFA(toNFA(new Labeled( 68 | new OnceOrMore(new Sequence(CharacterClass.oneOf("a"), CharacterClass.oneOf("b"))), 0)), 1); 69 | assertTerminatesPatterns(dfa, "", false); 70 | assertTerminatesPatterns(dfa, "a", false); 71 | assertTerminatesPatterns(dfa, "ab", true); 72 | assertTerminatesPatterns(dfa, "aba", false); 73 | assertTerminatesPatterns(dfa, "abab", true); 74 | assertTerminatesPatterns(dfa, "ababa", false); 75 | assertTerminatesPatterns(dfa, "aabab", false); 76 | } 77 | 78 | private void assertTerminatesPatterns( 79 | DFA dfa, String input, boolean... terminatesWhichPatterns) 80 | { 81 | PowerIntState s = dfa.getInitialState(); 82 | for(char c : input.toCharArray()) { 83 | s = dfa.transfer(c).next(s); 84 | } 85 | WrappedBitSet bs = s.getTerminatedPatterns(); 86 | for(int i = 0; i < terminatesWhichPatterns.length; ++i) { 87 | assertEquals("Pattern " + i, terminatesWhichPatterns[i], (bs == null) ? false : bs.get(i)); 88 | } 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/test/java/org/jkff/ire/regex/RegexParserTest.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.regex; 2 | 3 | import org.junit.Test; 4 | 5 | /** 6 | * Created on: 04.09.2010 13:58:49 7 | */ 8 | public class RegexParserTest { 9 | @Test 10 | public void testParseDoesNotFail() { 11 | RegexParser.parse(""); 12 | RegexParser.parse("a"); 13 | RegexParser.parse("a*"); 14 | RegexParser.parse("a+"); 15 | RegexParser.parse("a?"); 16 | RegexParser.parse("ab"); 17 | RegexParser.parse("a+b"); 18 | RegexParser.parse("a|b"); 19 | RegexParser.parse("abc"); 20 | RegexParser.parse("a|bc"); 21 | RegexParser.parse("ab|c"); 22 | RegexParser.parse("ab|bc"); 23 | RegexParser.parse("[abc]"); 24 | RegexParser.parse("[abc]+"); 25 | RegexParser.parse("[abc]+|[a-z]?"); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/test/java/org/jkff/ire/rope/RopeBasedISTest.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.rope; 2 | 3 | import org.jkff.ire.DFABuilder; 4 | import org.jkff.ire.Match; 5 | import org.jkff.ire.NFABuilder; 6 | import org.jkff.ire.fa.BiDFA; 7 | import org.jkff.ire.fa.IntState; 8 | import org.jkff.ire.fa.PowerIntState; 9 | import org.junit.Test; 10 | 11 | import java.util.List; 12 | 13 | import static org.jkff.ire.util.CollectionFactory.newArrayList; 14 | import static org.junit.Assert.assertEquals; 15 | 16 | /** 17 | * Created on: 31.08.2010 8:20:00 18 | */ 19 | public class RopeBasedISTest { 20 | @Test 21 | public void testABConDFA() { 22 | DFABuilder forward = new DFABuilder(4, 0, 1); 23 | forward.state(0).transitions('a', 1, null, 0); 24 | forward.state(1).transitions('b', 2, null, 0); 25 | forward.state(2).transitions('c', 3, null, 0); 26 | forward.state(3, 0).transitions(null, 3); 27 | 28 | DFABuilder backward = new DFABuilder(4, 0, 1); 29 | backward.state(0).transitions('c', 1, null, 0); 30 | backward.state(1).transitions('b', 2, null, 0); 31 | backward.state(2).transitions('a', 3, null, 0); 32 | backward.state(3, 0).transitions(null, 3); 33 | 34 | BiDFA bidfa = new BiDFA(forward.build(), backward.build()); 35 | int blockSize = 3; 36 | RopeBasedIS is = new RopeBasedIS(bidfa, "xxxcabccccc", blockSize); 37 | 38 | List matches = newArrayList(); 39 | for(Match m : is.getMatches()) { 40 | matches.add(m); 41 | } 42 | assertEquals(1, matches.size()); 43 | assertEquals(3, matches.get(0).length()); 44 | assertEquals(0, matches.get(0).whichPattern()); 45 | assertEquals(4, matches.get(0).startPos()); 46 | } 47 | 48 | @Test 49 | public void testABConNFA() { 50 | NFABuilder forward = new NFABuilder(4, 0, 1); 51 | forward.state(0).transitions('a', 1, null, 0); 52 | forward.state(1).transitions('b', 2, null, 0); 53 | forward.state(2).transitions('c', 3, null, 0); 54 | forward.state(3, 0).transitions(null, 3); 55 | 56 | NFABuilder backward = new NFABuilder(4, 0, 1); 57 | backward.state(0).transitions('c', 1, null, 0); 58 | backward.state(1).transitions('b', 2, null, 0); 59 | backward.state(2).transitions('a', 3, null, 0); 60 | backward.state(3, 0).transitions(null, 3); 61 | 62 | BiDFA bidfa = new BiDFA( 63 | forward.build(), backward.build()); 64 | int blockSize = 3; 65 | RopeBasedIS is = new RopeBasedIS(bidfa, "xxxcabccccc", blockSize); 66 | 67 | List matches = newArrayList(); 68 | for(Match m : is.getMatches()) { 69 | matches.add(m); 70 | } 71 | assertEquals(1, matches.size()); 72 | assertEquals(3, matches.get(0).length()); 73 | assertEquals(0, matches.get(0).whichPattern()); 74 | assertEquals(4, matches.get(0).startPos()); 75 | } 76 | 77 | @Test 78 | public void testABorAConNFA() { 79 | NFABuilder forward = new NFABuilder(5, 0, 2); 80 | forward.state(0).transitions('a', 1, 'a', 3, null, 0); 81 | forward.state(1).transitions('b', 2, null, 0); 82 | forward.state(2, 0).transitions(null, 2); 83 | forward.state(3).transitions('c', 4, null, 0); 84 | forward.state(4, 1).transitions(null, 4); 85 | 86 | NFABuilder backward = new NFABuilder(5, 0, 2); 87 | backward.state(0).transitions('b', 1, 'c', 3, null, 0); 88 | backward.state(1).transitions('a', 2, null, 0); 89 | backward.state(2, 0).transitions(null, 2); 90 | backward.state(3).transitions('a', 4, null, 0); 91 | backward.state(4, 1).transitions(null, 4); 92 | 93 | BiDFA bidfa = new BiDFA( 94 | forward.build(), backward.build()); 95 | int blockSize = 3; 96 | RopeBasedIS is = new RopeBasedIS(bidfa, "xxxcabcacccc", blockSize); 97 | 98 | List matches = newArrayList(); 99 | for(Match m : is.getMatches()) { 100 | matches.add(m); 101 | } 102 | assertEquals(2, matches.size()); 103 | assertEquals(2, matches.get(0).length()); 104 | assertEquals(0, matches.get(0).whichPattern()); 105 | assertEquals(4, matches.get(0).startPos()); 106 | 107 | assertEquals(2, matches.get(1).length()); 108 | assertEquals(1, matches.get(1).whichPattern()); 109 | assertEquals(7, matches.get(1).startPos()); 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /src/test/java/org/jkff/ire/rope/RopeTest.java: -------------------------------------------------------------------------------- 1 | package org.jkff.ire.rope; 2 | 3 | import org.jkff.ire.fa.Sequence; 4 | import org.jkff.ire.util.*; 5 | import org.junit.Test; 6 | 7 | import static org.junit.Assert.assertEquals; 8 | import static org.junit.Assert.assertNull; 9 | 10 | /** 11 | * Created on: 30.08.2010 23:04:07 12 | */ 13 | public class RopeTest { 14 | private static final Reducer CONCAT = new Reducer() { 15 | public String compose(String a, String b) { 16 | return a + b; 17 | } 18 | 19 | public String composeAll(Sequence ss) { 20 | StringBuilder res = new StringBuilder(); 21 | for(int i = 0; i < ss.length(); ++i) { 22 | res.append(ss.get(i)); 23 | } 24 | return res.toString(); 25 | } 26 | }; 27 | private static final Function SINGLETON_STRING = new Function() { 28 | public String applyTo(Character c) { 29 | return "" + c; 30 | } 31 | }; 32 | private static final Function2,Integer> ADD_LENGTH = new Function2, Integer>() { 33 | public Integer applyTo(Integer s, Rope r) { 34 | return s + r.length(); 35 | } 36 | }; 37 | private static final Function2 INCREMENT = new Function2() { 38 | public Integer applyTo(Integer s, Character character) { 39 | return s + 1; 40 | } 41 | }; 42 | 43 | @Test 44 | public void testToFromString() { 45 | RopeFactory f = new RopeFactory(4, CONCAT, SINGLETON_STRING); 46 | assertEquals("abc", Rope.fromString(f, "abc").toString()); 47 | assertEquals("abc", Rope.fromString(f, "abc").getSum()); 48 | assertEquals("abcd", Rope.fromString(f, "abcd").toString()); 49 | assertEquals("abcd", Rope.fromString(f, "abcd").getSum()); 50 | assertEquals("abcde", Rope.fromString(f, "abcde").toString()); 51 | assertEquals("abcde", Rope.fromString(f, "abcde").getSum()); 52 | assertEquals("abcdefgh", Rope.fromString(f, "abcdefgh").toString()); 53 | assertEquals("abcdefgh", Rope.fromString(f, "abcdefgh").getSum()); 54 | assertEquals("abcdefghij", Rope.fromString(f, "abcdefghij").toString()); 55 | assertEquals("abcdefghij", Rope.fromString(f, "abcdefghij").getSum()); 56 | assertEquals("abcdefghijklmnopqrstuvwxyz", Rope.fromString(f, "abcdefghijklmnopqrstuvwxyz").toString()); 57 | assertEquals("abcdefghijklmnopqrstuvwxyz", Rope.fromString(f, "abcdefghijklmnopqrstuvwxyz").getSum()); 58 | } 59 | 60 | @Test 61 | public void testAppend() { 62 | RopeFactory f = new RopeFactory(4, CONCAT, SINGLETON_STRING); 63 | String s = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"; 64 | for (int i = 0; i < s.length(); ++i) { 65 | Rope part1 = Rope.fromString(f, s.substring(0, i)); 66 | Rope part2 = Rope.fromString(f, s.substring(i)); 67 | assertEquals(s, part1.append(part2).toString()); 68 | assertEquals(s, part1.append(part2).getSum()); 69 | } 70 | 71 | for (int a = 0; a < s.length(); ++a) { 72 | for (int b = a; b < s.length(); ++b) { 73 | for (int c = b; c < s.length(); ++c) { 74 | String[] parts = new String[]{ 75 | s.substring(0, a), 76 | s.substring(a, b), 77 | s.substring(b, c), 78 | s.substring(c), 79 | }; 80 | Rope[] ropes = new Rope[]{ 81 | Rope.fromString(f, parts[0]), 82 | Rope.fromString(f, parts[1]), 83 | Rope.fromString(f, parts[2]), 84 | Rope.fromString(f, parts[3]), 85 | }; 86 | assertEquals(s, 87 | ropes[0].append(ropes[1]).append(ropes[2]) 88 | .append(ropes[3]).toString()); 89 | assertEquals(s, 90 | ropes[0].append(ropes[1]).append(ropes[2]) 91 | .append(ropes[3]).getSum()); 92 | } 93 | } 94 | } 95 | } 96 | 97 | @Test 98 | public void testSplitAfterRise() { 99 | RopeFactory f = new RopeFactory(4, CONCAT, SINGLETON_STRING); 100 | String s = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"; 101 | Rope r = Rope.fromString(f, s); 102 | 103 | for(int i = 0; i < s.length(); ++i) { 104 | Pair,Rope> p = r.splitAfterRise(0, ADD_LENGTH, INCREMENT, greaterThan(i-1)); 105 | assertEquals(i, p.first.length()); 106 | assertEquals(s, p.first.append(p.second).toString()); 107 | } 108 | assertNull(r.splitAfterRise(0, ADD_LENGTH, INCREMENT, greaterThan(s.length()))); 109 | } 110 | 111 | @Test 112 | public void testSplitAfterBackRise() { 113 | RopeFactory f = new RopeFactory(4, CONCAT, SINGLETON_STRING); 114 | String s = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"; 115 | Rope r = Rope.fromString(f, s); 116 | 117 | for(int i = 0; i < s.length(); ++i) { 118 | Pair,Rope> p = r.splitAfterBackRise(0, ADD_LENGTH, INCREMENT, greaterThan(i-1)); 119 | assertEquals(i, p.second.length()); 120 | assertEquals(s, p.first.append(p.second).toString()); 121 | } 122 | assertNull(r.splitAfterBackRise(0, ADD_LENGTH, INCREMENT, greaterThan(s.length()))); 123 | } 124 | 125 | private static Predicate greaterThan(final int x) { 126 | return new Predicate() { 127 | public boolean isTrueFor(Integer i) { 128 | return i > x; 129 | } 130 | }; 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /target/ire-0.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkff/ire/7fc556f73b0a7f21d059463eb153e6629b90b48a/target/ire-0.1.jar --------------------------------------------------------------------------------