├── .gitignore ├── .settings ├── org.eclipse.core.resources.prefs ├── org.maven.ide.eclipse.prefs └── org.eclipse.jdt.core.prefs ├── .classpath ├── .project ├── LICENSE ├── src ├── main │ ├── resources │ │ └── com │ │ │ └── eaio │ │ │ └── stringsearch │ │ │ └── copying.txt │ └── java │ │ └── com │ │ └── eaio │ │ └── stringsearch │ │ ├── package.html │ │ ├── BNDMCI.java │ │ ├── BNDMWildcardsCI.java │ │ ├── BoyerMooreHorspoolRaita.java │ │ ├── BNDM.java │ │ ├── BNDMWildcards.java │ │ ├── BoyerMooreHorspool.java │ │ ├── ShiftOrMismatches.java │ │ ├── CharIntMap.java │ │ ├── MismatchSearch.java │ │ └── StringSearch.java └── test │ └── java │ └── com │ └── eaio │ └── stringsearch │ ├── BNDMCITest.java │ ├── BNDMWildcardsTest.java │ ├── BNDMWildcardsCITest.java │ ├── ShiftOrMismatchesTest.java │ ├── BoyerMooreHorspoolTest.java │ ├── BoyerMooreHorspoolRaitaTest.java │ ├── BNDMTest.java │ ├── performanceTest │ ├── Index.java │ ├── StringAccess.java │ ├── OneAndTwoBytePatternBenchmark.java │ ├── Benchmark.java │ └── Bits.java │ ├── CharIntMapTest.java │ ├── StringSearchTest.java │ └── AbstractStringSearchTest.java ├── README.md └── pom.xml /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | -------------------------------------------------------------------------------- /.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | #Fri Mar 20 17:06:11 CET 2009 2 | eclipse.preferences.version=1 3 | encoding/=UTF-8 4 | -------------------------------------------------------------------------------- /.settings/org.maven.ide.eclipse.prefs: -------------------------------------------------------------------------------- 1 | #Fri Mar 20 17:12:18 CET 2009 2 | activeProfiles= 3 | eclipse.preferences.version=1 4 | fullBuildGoals=process-test-resources 5 | includeModules=false 6 | resolveWorkspaceProjects=true 7 | resourceFilterGoals=process-resources resources\:testResources 8 | version=1 9 | -------------------------------------------------------------------------------- /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | com.eaio.stringsearch 4 | StringSearch provides implementations of the Boyer-Moore and the Shift-Or (bit-parallel) algorithms. These algorithms are easily five to ten times faster than the naïve implementation found in java.lang.String. 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.m2e.core.maven2Builder 15 | 16 | 17 | 18 | 19 | 20 | org.eclipse.jdt.core.javanature 21 | org.eclipse.m2e.core.maven2Nature 22 | 23 | 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | StringSearch - high-performance pattern matching algorithms in Java 2 | Copyright (c) 2003-2015 Johann Burkard () 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a 5 | copy of this software and associated documentation files (the "Software"), 6 | to deal in the Software without restriction, including without limitation 7 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | and/or sell copies of the Software, and to permit persons to whom the 9 | Software is furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included 12 | in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 17 | NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 18 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 19 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 20 | USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /src/main/resources/com/eaio/stringsearch/copying.txt: -------------------------------------------------------------------------------- 1 | StringSearch - high-performance pattern matching algorithms in Java 2 | Copyright (c) 2003-2015 Johann Burkard () 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a 5 | copy of this software and associated documentation files (the "Software"), 6 | to deal in the Software without restriction, including without limitation 7 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | and/or sell copies of the Software, and to permit persons to whom the 9 | Software is furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included 12 | in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 17 | NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 18 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 19 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 20 | USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /src/main/java/com/eaio/stringsearch/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Untitled Document 5 | 6 | 7 |

Implementations of high-performance String searching algorithms.

8 |

Please see {@link com.eaio.stringsearch.StringSearch} for the basics.

9 |

Included algorithms

10 |
    11 |
  • General purpose 12 |
      13 |
    • {@link com.eaio.stringsearch.BNDM}
    • 14 |
    • {@link com.eaio.stringsearch.BoyerMooreHorspool} (fastest for 15 | searching in byte arrays)
    • 16 |
    • {@link com.eaio.stringsearch.BoyerMooreHorspoolRaita} (fastest 17 | for searching in char arrays and Strings)
    • 18 |
    19 |
  • 20 |
  • Searching with wildcards (don't-care-symbols) 21 |
      22 |
    • {@link com.eaio.stringsearch.BNDMWildcards}
    • 23 |
    24 |
  • 25 |
  • Searching with mismatches 26 |
      27 |
    • {@link com.eaio.stringsearch.ShiftOrMismatches}
    • 28 |
    29 |
  • 30 |
  • Case-insensitive searching 31 |
      32 |
    • {@link com.eaio.stringsearch.BNDMCI}
    • 33 |
    34 |
  • 35 |
  • Case-insensitive searching with wildcards (don't-care-symbols) 36 |
      37 |
    • {@link com.eaio.stringsearch.BNDMWildcardsCI}
    • 38 |
    39 |
  • 40 |
41 | 42 | 43 | -------------------------------------------------------------------------------- /src/test/java/com/eaio/stringsearch/BNDMCITest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * BNDMCITest.java 3 | * 4 | * Created on 16.10.2004. 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch; 29 | 30 | /** 31 | * Test case for the {@link BNDMCI} class. 32 | * 33 | * @author Johann Burkard 34 | * @version $Id: BNDMCITest.java 6675 2015-01-17 21:02:35Z johann $ 35 | */ 36 | public class BNDMCITest extends BNDMTest { 37 | 38 | @Override 39 | protected StringSearch createInstance() { 40 | return new BNDMCI(); 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /src/test/java/com/eaio/stringsearch/BNDMWildcardsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * BNDMWildcardsTest.java 3 | * 4 | * Created on 19.01.2004. 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch; 29 | 30 | /** 31 | * Test case for the {@link BNDMWildcards} class. 32 | * 33 | * @author Johann Burkard 34 | * @version $Id: BNDMWildcardsTest.java 6675 2015-01-17 21:02:35Z johann $ 35 | */ 36 | public class BNDMWildcardsTest extends BNDMTest { 37 | 38 | @Override 39 | protected StringSearch createInstance() { 40 | return new BNDMWildcards(); 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /src/test/java/com/eaio/stringsearch/BNDMWildcardsCITest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * BNDMWildcardsTest.java 3 | * 4 | * Created on 19.01.2004. 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch; 29 | 30 | /** 31 | * Test case for the {@link BNDMWildcardsCI} class. 32 | * 33 | * @author Johann Burkard 34 | * @version $Id: BNDMWildcardsCITest.java 6675 2015-01-17 21:02:35Z johann $ 35 | */ 36 | public class BNDMWildcardsCITest extends BNDMWildcardsTest { 37 | 38 | @Override 39 | protected StringSearch createInstance() { 40 | return new BNDMWildcardsCI(); 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /src/test/java/com/eaio/stringsearch/ShiftOrMismatchesTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * ShiftOrMismatchesTest.java 3 | * 4 | * Created on 15.11.2003. 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch; 29 | 30 | /** 31 | * Test case for the {@link ShiftOrMismatches} class. 32 | * 33 | * @author Johann Burkard 34 | * @version $Id: ShiftOrMismatchesTest.java 6675 2015-01-17 21:02:35Z johann $ 35 | */ 36 | public class ShiftOrMismatchesTest extends AbstractStringSearchTest { 37 | 38 | @Override 39 | public StringSearch createInstance() { 40 | return new ShiftOrMismatches(); 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /src/test/java/com/eaio/stringsearch/BoyerMooreHorspoolTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * BoyerMooreHorspoolTest.java 3 | * 4 | * Created on 12.09.2003 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch; 29 | 30 | import com.eaio.stringsearch.BoyerMooreHorspool; 31 | import com.eaio.stringsearch.StringSearch; 32 | 33 | /** 34 | * Test case for the {@link BoyerMooreHorspool} class. 35 | * 36 | * @author Johann Burkard 37 | * @version $Id: BoyerMooreHorspoolTest.java 6675 2015-01-17 21:02:35Z johann $ 38 | */ 39 | public class BoyerMooreHorspoolTest extends AbstractStringSearchTest { 40 | 41 | @Override 42 | protected StringSearch createInstance() { 43 | return new BoyerMooreHorspool(); 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /src/test/java/com/eaio/stringsearch/BoyerMooreHorspoolRaitaTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * BoyerMooreHorspoolRaitaTest.java 3 | * 4 | * Created on 15.09.2003 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch; 29 | 30 | import com.eaio.stringsearch.BoyerMooreHorspoolRaita; 31 | import com.eaio.stringsearch.StringSearch; 32 | 33 | /** 34 | * Test case for the {@link BoyerMooreHorspoolRaita} class. 35 | * 36 | * @author Johann Burkard 37 | * @version $Id: BoyerMooreHorspoolRaitaTest.java 6675 2015-01-17 21:02:35Z johann $ 38 | */ 39 | public class BoyerMooreHorspoolRaitaTest extends AbstractStringSearchTest { 40 | 41 | @Override 42 | protected StringSearch createInstance() { 43 | return new BoyerMooreHorspoolRaita(); 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /src/test/java/com/eaio/stringsearch/BNDMTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * BNDMTest.java 3 | * 4 | * Created on 21.10.2003 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch; 29 | 30 | import static org.junit.Assert.assertEquals; 31 | 32 | /** 33 | * Test case for the {@link BNDM} class. 34 | * 35 | * @author Johann Burkard 36 | * @version $Id: BNDMTest.java 6675 2015-01-17 21:02:35Z johann $ 37 | */ 38 | public class BNDMTest extends AbstractStringSearchTest { 39 | 40 | @Override 41 | protected StringSearch createInstance() { 42 | return new BNDM(); 43 | } 44 | 45 | public void testLongPatterns() { 46 | StringSearch b = createInstance(); 47 | Object o32 = b.processString(" sublicense, and/or sell copies "); 48 | Object o33 = b.processString(" sublicense, and/or sell copies o"); 49 | assertEquals(o32, o33); 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /src/test/java/com/eaio/stringsearch/performanceTest/Index.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Index.java 3 | * 4 | * Created on 10.03.2004. 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch.performanceTest; 29 | 30 | /** 31 | * Tests speed of the index method that converts java bytes to their unsigned 32 | * byte values (needed for array access). 33 | * 34 | * @author Johann Burkard 35 | * @version $Id: Index.java 6675 2015-01-17 21:02:35Z johann $ 36 | */ 37 | public class Index { 38 | 39 | /** 40 | * Shell interface 41 | * 42 | * @param args arguments -- ignored 43 | */ 44 | public static void main(String[] args) { 45 | if (Bits.asString(index((byte) -3)).indexOf(Bits.asString((byte) -3)) == -1) { 46 | throw new Error("index method is probably broken"); 47 | } 48 | long then = System.currentTimeMillis(); 49 | final int max = Integer.MAX_VALUE; 50 | for (int i = 0; i < max; i++) { 51 | // b = /* index((byte) - 3); */ -3 < 0 ? -3 + 256 : -3; 52 | index((byte) -3); 53 | } 54 | System.out.println("Took " + (System.currentTimeMillis() - then) 55 | + " ms."); 56 | } 57 | 58 | private static int index(byte b) { 59 | 60 | /* Sun 1.4.1: 7,5s, BEA 1.3: 2s, IBM 1.3: 2s, Sun 1.3: 7s, BEA 1.4.2: 2,3s */ 61 | // return ((int) b) & 0x000000ff; 62 | /* Sun 1.4.1: 6,7s, BEA 1.3: 9,5s, IBM 1.3: 2s, Sun 1.3: 13s, BEA 1.4.2: 9,9s */ 63 | // return b < 0 ? b + 256 : b; 64 | /* Sun 1.4.1: 7,5s, BEA 1.3: 2s, IBM 1.3: 2s, Sun 1.3: 7s, BEA 1.4.2: 2,3s */ 65 | return b & 0x000000ff; 66 | } 67 | 68 | } 69 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # StringSearch 2 | ## High-performance pattern matching algorithms in Java 3 | 4 | The Java language lacks fast string searching algorithms. StringSearch provides implementations of the Boyer-Moore and the Shift-Or (bit-parallel) algorithms. These algorithms are easily five to ten times faster than the naïve implementation found in `java.lang.String`. 5 | 6 | ## Download 7 | 8 | [Download StringSearch 2.2 (JAR)](http://repo.eaio.com/maven2/com/eaio/stringsearch/stringsearch/2.2/stringsearch-2.2.jar) 9 | 10 | [Download StringSearch 2.2 (Source JAR)](http://repo.eaio.com/maven2/com/eaio/stringsearch/stringsearch/2.2/stringsearch-2.2-sources.jar) 11 | 12 | Or get StringSearch through Maven: 13 | 14 | ```XML 15 | 16 | 17 | com.eaio.stringsearch 18 | stringsearch 19 | 2.2 20 | 21 | 22 | … 23 | 24 | 25 | eaio.com 26 | http://repo.eaio.com/maven2 27 | 28 | 29 | ``` 30 | 31 | ### StringSearch 1.2 32 | 33 | StringSearch 1.2, which includes a native library and a different selection of algorithms, is still available. 34 | 35 | [Download StringSearch 1.2 (ZIP)](http://johannburkard.de/software/stringsearch/stringsearch-1.2.zip) 36 | 37 | [Download StringSearch 1.2 (TAR.GZ)](http://johannburkard.de/software/stringsearch/stringsearch-1.2.tar.gz) 38 | 39 | ## Documentation 40 | 41 | This library contains implementations of the following pattern matching algorithms: 42 | 43 | * General purpose 44 | * [BNDM](http://johannburkard.de/software/stringsearch/site/apidocs/com/eaio/stringsearch/BNDM.html) 45 | * [BoyerMooreHorspool](http://johannburkard.de/software/stringsearch/site/apidocs/com/eaio/stringsearch/BoyerMooreHorspool.html) 46 | * [BoyerMooreHorspoolRaita](http://johannburkard.de/software/stringsearch/site/apidocs/com/eaio/stringsearch/BoyerMooreHorspoolRaita.html) 47 | * Searching with wildcards (don't-care-symbols) 48 | * [BNDMWildcards](http://johannburkard.de/software/stringsearch/site/apidocs/com/eaio/stringsearch/BNDMWildcards.html) 49 | * Searching with mismatches 50 | * [ShiftOrMismatches](http://johannburkard.de/software/stringsearch/site/apidocs/com/eaio/stringsearch/ShiftOrMismatches.html) 51 | * Case-insensitive searching 52 | * [BNDMCI](http://johannburkard.de/software/stringsearch/site/apidocs/com/eaio/stringsearch/BNDMCI.html) 53 | * Case-insensitive searching with wildcards (don't-care-symbols) 54 | * [BNDMWildcardsCI](http://johannburkard.de/software/stringsearch/site/apidocs/com/eaio/stringsearch/BNDMWildcardsCI.html) 55 | 56 | ## License 57 | 58 | StringSearch is licensed under the [MIT License](http://johannburkard.de/software/stringsearch/copying.txt) ([OSI certified](http://opensource.org/licenses/mit-license.php)). 59 | 60 | ## Other Resources 61 | 62 | * [StringSearch on johannburkard.de](http://johannburkard.de/software/stringsearch/) 63 | * [Maven-generated Site](http://johannburkard.de/software/stringsearch/site/) 64 | * [APIdoc](http://johannburkard.de/software/stringsearch/site/apidocs/) 65 | 66 | [![Analytics](https://ga-beacon.appspot.com/UA-7427410-89/StringSearch/README.md?pixel)](https://github.com/igrigorik/ga-beacon) -------------------------------------------------------------------------------- /src/test/java/com/eaio/stringsearch/CharIntMapTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * CharIntMapTest.java 3 | * 4 | * Created on 24.11.2003. 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch; 29 | 30 | import static com.eaio.stringsearch.AbstractStringSearchTest.assertSerializable; 31 | import static org.junit.Assert.*; 32 | 33 | import org.junit.Test; 34 | 35 | /** 36 | * Test case for the {@link CharIntMap} class. 37 | * 38 | * @author Johann Burkard 39 | * @version $Id: CharIntMapTest.java 6675 2015-01-17 21:02:35Z johann $ 40 | */ 41 | public class CharIntMapTest { 42 | 43 | @Test 44 | public void defaultConstructor() { 45 | CharIntMap m = new CharIntMap(); 46 | assertTrue(m.equals(m)); 47 | assertFalse(m.equals(null)); 48 | assertTrue(m.equals(new CharIntMap())); 49 | assertFalse(m.equals(new CharIntMap(2, (char) 0, 0))); 50 | assertTrue(m.hashCode() == new CharIntMap().hashCode()); 51 | } 52 | 53 | @Test 54 | public void specificConstructor() { 55 | CharIntMap m = new CharIntMap(2, 'a', 0); 56 | assertEquals('a', m.getLowest()); 57 | assertEquals(2, m.getExtent()); 58 | assertEquals('c', m.getHighest()); 59 | m.set('a', 42); 60 | assertEquals(42, m.get('a')); 61 | m.set('b', 1); 62 | m.set('x', 42); 63 | assertEquals(1, m.get('b')); 64 | assertEquals(0, m.get('r')); 65 | } 66 | 67 | @Test 68 | public void specificConstructorAndDefault() { 69 | CharIntMap m = new CharIntMap(2, 'a', 42); 70 | m.set('a', 42); 71 | assertEquals(42, m.get('a')); 72 | m.set('b', 1); 73 | m.set('x', 42); 74 | assertEquals(1, m.get('b')); 75 | assertEquals(42, m.get('r')); 76 | } 77 | 78 | @Test 79 | public void isSerializable() throws Exception { 80 | assertSerializable(new CharIntMap()); 81 | assertSerializable(new CharIntMap(2, (char) 0, 0)); 82 | } 83 | 84 | } 85 | -------------------------------------------------------------------------------- /src/test/java/com/eaio/stringsearch/performanceTest/StringAccess.java: -------------------------------------------------------------------------------- 1 | /* 2 | * StringAccess.java 3 | * 4 | * Created on 18.03.2005. 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch.performanceTest; 29 | 30 | import java.lang.reflect.Field; 31 | 32 | /** 33 | * Tests the speed of accessing the char array in String -- either 34 | * through reflection by cloning. 35 | * 36 | * @author Johann Burkard 37 | * @version $Id: StringAccess.java 6675 2015-01-17 21:02:35Z johann $ 38 | */ 39 | public class StringAccess { 40 | 41 | /** 42 | * Shell interface. 43 | * 44 | * @param args arguments -- ignored 45 | */ 46 | public static void main(String[] args) throws IllegalAccessException { 47 | Field[] fields = String.class.getDeclaredFields(); 48 | for (int i = 0; i < fields.length; ++i) { 49 | if (fields[i].getType() == char[].class) { 50 | value = fields[i]; 51 | value.setAccessible(true); 52 | break; 53 | } 54 | } 55 | 56 | System.out.println("Using Reflection"); 57 | 58 | for (int i = 1; i < 1000; i += 1 + (int) (Math.log(i) + Math.log(i)) << 2) { 59 | 60 | String s = generate(i); 61 | 62 | long then = System.currentTimeMillis(); 63 | 64 | for (int x = 0; x < 10000000; ++x) { 65 | getUsingReflection(s); 66 | } 67 | 68 | System.out.println(i + "\t" + (System.currentTimeMillis() - then)); 69 | 70 | } 71 | 72 | System.out.println("Using cloning"); 73 | 74 | for (int i = 1; i < 1000; i += 1 + (int) (Math.log(i) + Math.log(i)) << 2) { 75 | 76 | String s = generate(i); 77 | 78 | long then = System.currentTimeMillis(); 79 | 80 | for (int x = 0; x < 10000000; ++x) { 81 | getUsingCloning(s); 82 | } 83 | 84 | System.out.println(i + "\t" + (System.currentTimeMillis() - then)); 85 | 86 | } 87 | 88 | } 89 | 90 | private static Field value; 91 | 92 | private static char[] getUsingReflection(String s) 93 | throws IllegalAccessException { 94 | return (char[]) value.get(s); 95 | } 96 | 97 | private static char[] getUsingCloning(String s) { 98 | return s.toCharArray(); 99 | } 100 | 101 | private static String generate(int length) { 102 | char[] buf = new char[length]; 103 | return new String(buf); 104 | } 105 | 106 | } 107 | -------------------------------------------------------------------------------- /.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | #Mon Aug 31 11:04:07 CEST 2009 2 | eclipse.preferences.version=1 3 | org.eclipse.jdt.core.builder.resourceCopyExclusionFilter=*.launch,*.groovy 4 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled 5 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6 6 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve 7 | org.eclipse.jdt.core.compiler.compliance=1.6 8 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate 9 | org.eclipse.jdt.core.compiler.debug.localVariable=generate 10 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate 11 | org.eclipse.jdt.core.compiler.problem.annotationSuperInterface=warning 12 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error 13 | org.eclipse.jdt.core.compiler.problem.autoboxing=ignore 14 | org.eclipse.jdt.core.compiler.problem.deprecation=warning 15 | org.eclipse.jdt.core.compiler.problem.deprecationInDeprecatedCode=disabled 16 | org.eclipse.jdt.core.compiler.problem.deprecationWhenOverridingDeprecatedMethod=disabled 17 | org.eclipse.jdt.core.compiler.problem.discouragedReference=warning 18 | org.eclipse.jdt.core.compiler.problem.emptyStatement=warning 19 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error 20 | org.eclipse.jdt.core.compiler.problem.fallthroughCase=warning 21 | org.eclipse.jdt.core.compiler.problem.fieldHiding=ignore 22 | org.eclipse.jdt.core.compiler.problem.finalParameterBound=warning 23 | org.eclipse.jdt.core.compiler.problem.finallyBlockNotCompletingNormally=warning 24 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=error 25 | org.eclipse.jdt.core.compiler.problem.hiddenCatchBlock=warning 26 | org.eclipse.jdt.core.compiler.problem.incompatibleNonInheritedInterfaceMethod=warning 27 | org.eclipse.jdt.core.compiler.problem.incompleteEnumSwitch=warning 28 | org.eclipse.jdt.core.compiler.problem.indirectStaticAccess=warning 29 | org.eclipse.jdt.core.compiler.problem.localVariableHiding=ignore 30 | org.eclipse.jdt.core.compiler.problem.methodWithConstructorName=error 31 | org.eclipse.jdt.core.compiler.problem.missingDeprecatedAnnotation=warning 32 | org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotation=warning 33 | org.eclipse.jdt.core.compiler.problem.missingSerialVersion=warning 34 | org.eclipse.jdt.core.compiler.problem.noEffectAssignment=error 35 | org.eclipse.jdt.core.compiler.problem.noImplicitStringConversion=warning 36 | org.eclipse.jdt.core.compiler.problem.nonExternalizedStringLiteral=ignore 37 | org.eclipse.jdt.core.compiler.problem.nullReference=warning 38 | org.eclipse.jdt.core.compiler.problem.overridingPackageDefaultMethod=warning 39 | org.eclipse.jdt.core.compiler.problem.parameterAssignment=warning 40 | org.eclipse.jdt.core.compiler.problem.possibleAccidentalBooleanAssignment=warning 41 | org.eclipse.jdt.core.compiler.problem.rawTypeReference=ignore 42 | org.eclipse.jdt.core.compiler.problem.specialParameterHidingField=enabled 43 | org.eclipse.jdt.core.compiler.problem.staticAccessReceiver=warning 44 | org.eclipse.jdt.core.compiler.problem.suppressWarnings=enabled 45 | org.eclipse.jdt.core.compiler.problem.syntheticAccessEmulation=ignore 46 | org.eclipse.jdt.core.compiler.problem.typeParameterHiding=warning 47 | org.eclipse.jdt.core.compiler.problem.uncheckedTypeOperation=warning 48 | org.eclipse.jdt.core.compiler.problem.undocumentedEmptyBlock=warning 49 | org.eclipse.jdt.core.compiler.problem.unhandledWarningToken=warning 50 | org.eclipse.jdt.core.compiler.problem.unnecessaryElse=warning 51 | org.eclipse.jdt.core.compiler.problem.unnecessaryTypeCheck=warning 52 | org.eclipse.jdt.core.compiler.problem.unqualifiedFieldAccess=ignore 53 | org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownException=warning 54 | org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionWhenOverriding=enabled 55 | org.eclipse.jdt.core.compiler.problem.unusedImport=warning 56 | org.eclipse.jdt.core.compiler.problem.unusedLabel=warning 57 | org.eclipse.jdt.core.compiler.problem.unusedLocal=warning 58 | org.eclipse.jdt.core.compiler.problem.unusedParameter=ignore 59 | org.eclipse.jdt.core.compiler.problem.unusedParameterWhenImplementingAbstract=enabled 60 | org.eclipse.jdt.core.compiler.problem.unusedParameterWhenOverridingConcrete=enabled 61 | org.eclipse.jdt.core.compiler.problem.unusedPrivateMember=ignore 62 | org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=ignore 63 | org.eclipse.jdt.core.compiler.source=1.6 64 | -------------------------------------------------------------------------------- /src/test/java/com/eaio/stringsearch/StringSearchTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * StringSearchTest.java 3 | * 4 | * Created 23.11.2006. 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch; 29 | 30 | import static org.junit.Assert.*; 31 | 32 | import org.junit.Test; 33 | 34 | /** 35 | * Tests methods in {@link StringSearch}. 36 | * 37 | * @author Johann Burkard 38 | * @version $Id: StringSearchTest.java 6675 2015-01-17 21:02:35Z johann $ 39 | */ 40 | public class StringSearchTest { 41 | 42 | private static String LONG_STRING; 43 | 44 | static { 45 | StringBuffer buf = new StringBuffer(256); 46 | buf.setLength(256); 47 | buf.setCharAt(255, 'a'); 48 | LONG_STRING = buf.toString(); 49 | } 50 | 51 | @Test 52 | public void getChars() { 53 | String a = "abc"; 54 | char[] aChars = StringSearch.getChars(a); 55 | assertEquals(3, aChars.length); 56 | assertEquals('a', aChars[0]); 57 | assertEquals('b', aChars[1]); 58 | assertEquals('c', aChars[2]); 59 | 60 | char[] aChars2 = StringSearch.getChars(a); 61 | if (aChars == aChars2) { 62 | assertTrue(StringSearch.usesReflection()); 63 | } 64 | 65 | String b = a.substring(1); 66 | char[] bChars = StringSearch.getChars(b); 67 | assertEquals(2, bChars.length); 68 | assertEquals('b', bChars[0]); 69 | assertEquals('c', bChars[1]); 70 | 71 | String c = a.substring(0, 2); 72 | char[] cChars = StringSearch.getChars(c); 73 | assertEquals(2, cChars.length); 74 | assertEquals('a', cChars[0]); 75 | assertEquals('b', cChars[1]); 76 | 77 | char[] longChars1 = StringSearch.getChars(LONG_STRING); 78 | char[] longChars2 = StringSearch.getChars(LONG_STRING); 79 | if (StringSearch.usesReflection()) { 80 | assertTrue(longChars1 == longChars2); 81 | } 82 | else { 83 | assertFalse(longChars1 == longChars2); 84 | } 85 | 86 | String shorterString = LONG_STRING.substring(1); 87 | char[] shorterChars1 = StringSearch.getChars(shorterString); 88 | char[] shorterChars2 = StringSearch.getChars(shorterString); 89 | if (StringSearch.usesReflection()) { 90 | assertTrue(shorterChars1 == shorterChars2); 91 | } 92 | else { 93 | assertFalse(shorterChars1 == shorterChars2); 94 | } 95 | 96 | shorterString = LONG_STRING.substring(0, 200); 97 | shorterChars1 = StringSearch.getChars(shorterString); 98 | shorterChars2 = StringSearch.getChars(shorterString); 99 | if (StringSearch.usesReflection()) { 100 | assertTrue(shorterChars1 == shorterChars2); 101 | } 102 | else { 103 | assertFalse(shorterChars1 == shorterChars2); 104 | } 105 | } 106 | 107 | @Test 108 | public void createCharIntMap() { 109 | BNDM b = new BNDM(); 110 | CharIntMap c = b.createCharIntMap("abcde".toCharArray(), -1); 111 | c.set('a', 0); 112 | assertEquals(0, c.get('a')); 113 | assertEquals(-1, c.get('A')); 114 | c.set('e', 42); 115 | assertEquals(42, c.get('e')); 116 | } 117 | 118 | } 119 | -------------------------------------------------------------------------------- /src/test/java/com/eaio/stringsearch/performanceTest/OneAndTwoBytePatternBenchmark.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Benchmark.java 3 | * 4 | * Created on 21.10.2003 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch.performanceTest; 29 | 30 | import java.io.FileInputStream; 31 | import java.io.IOException; 32 | 33 | import com.eaio.stringsearch.*; 34 | 35 | /** 36 | * Performance benchmark that tests the performance of searching for patterns of length one and two. 37 | *

38 | * Suggested by someone who doesn't want to be named. 39 | * 40 | * @author Johann Burkard 41 | * @version $Id: OneAndTwoBytePatternBenchmark.java 6675 2015-01-17 21:02:35Z johann $ 42 | */ 43 | public class OneAndTwoBytePatternBenchmark { 44 | 45 | private static final Class[] CLASSES = new Class[] { BoyerMooreHorspool.class, 46 | BoyerMooreHorspoolRaita.class }; 47 | 48 | @SuppressWarnings("deprecation") 49 | public static void main(String[] args) throws Exception { 50 | FileInputStream inStream = null; 51 | byte[] buf = null; 52 | try { 53 | inStream = new FileInputStream("pom.xml"); 54 | buf = new byte[inStream.available()]; 55 | inStream.read(buf); 56 | } 57 | finally { 58 | if (inStream != null) { 59 | try { 60 | inStream.close(); 61 | } 62 | catch (IOException ex) { /* Ignored */ } 63 | } 64 | } 65 | 66 | String doc = new String(buf, 0); 67 | 68 | String query; 69 | 70 | if (args.length > 0) { 71 | query = args[0]; 72 | } 73 | else { 74 | query = "5"; 75 | } 76 | 77 | System.out.println("Searching for '" + query + "'"); 78 | System.out.println(); 79 | 80 | long then; 81 | int x = doc.indexOf(query); 82 | 83 | StringSearch ps = null; 84 | Object o; 85 | 86 | System.out.println("Testing StringSearch searchChars methods"); 87 | System.out.println(); 88 | 89 | char[] sourceArray = doc.toCharArray(); 90 | char[] q1 = query.toCharArray(); 91 | 92 | for (int c = 0; c < CLASSES.length; c++) { 93 | ps = (StringSearch) CLASSES[c].newInstance(); 94 | 95 | o = ps.processChars(q1); 96 | 97 | System.out.print("Testing " + ps.toString()); 98 | 99 | then = System.currentTimeMillis(); 100 | 101 | int res = 0; 102 | 103 | for (int i = 0; i < 200000; i++) { 104 | res = ps.searchChars(sourceArray, q1, o); 105 | } 106 | 107 | System.out.println(" took " + (System.currentTimeMillis() - then) 108 | + " ms."); 109 | 110 | if (res != x) { 111 | System.err.println(res); 112 | } 113 | 114 | } 115 | 116 | System.out.println(); 117 | System.out.println("Testing StringSearch searchBytes methods"); 118 | System.out.println(); 119 | 120 | byte[] sourceArrayBytes = buf; 121 | byte[] q1Bytes = query.getBytes(); 122 | 123 | for (int c = 0; c < CLASSES.length; c++) { 124 | ps = (StringSearch) CLASSES[c].newInstance(); 125 | 126 | o = ps.processBytes(q1Bytes); 127 | 128 | System.out.print("Testing " + ps.toString()); 129 | 130 | then = System.currentTimeMillis(); 131 | 132 | int res = 0; 133 | 134 | for (int i = 0; i < 200000; i++) { 135 | res = ps.searchBytes(sourceArrayBytes, q1Bytes, o); 136 | } 137 | 138 | System.out.println(" took " + (System.currentTimeMillis() - then) 139 | + " ms."); 140 | 141 | if (res != x) { 142 | System.err.println(res); 143 | } 144 | 145 | } 146 | 147 | } 148 | 149 | } 150 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | com.eaio.stringsearch 6 | stringsearch 7 | jar 8 | 2.2 9 | StringSearch 10 | StringSearch provides implementations of the Boyer-Moore and the Shift-Or (bit-parallel) algorithms. These algorithms are easily five to ten times faster than the naïve implementation found in java.lang.String. 11 | http://johannburkard.de/software/stringsearch/ 12 | 2003 13 | 14 | GitHub 15 | https://github.com/johannburkard/StringSearch/issues 16 | 17 | 18 | scm:git:git@github.com:johannburkard/StringSearch.git 19 | scm:git:git@github.com:johannburkard/StringSearch.git 20 | https://github.com/johannburkard/StringSearch.git 21 | 22 | 23 | 24 | MIT License 25 | http://www.opensource.org/licenses/mit-license.php 26 | repo 27 | 28 | 29 | 30 | 31 | johann 32 | Johann Burkard 33 | johann@johannburkard.de 34 | http://johannburkard.de 35 | Johann Burkard 36 | http://johannburkard.de 37 | 38 | 39 | 40 | Johann Burkard 41 | http://johannburkard.de 42 | 43 | 44 | 3.3.3 45 | 46 | 47 | 48 | 49 | org.apache.maven.plugins 50 | maven-compiler-plugin 51 | 3.0 52 | 53 | UTF-8 54 | 1.6 55 | 1.6 56 | -g 57 | 58 | 59 | 60 | org.apache.maven.plugins 61 | maven-source-plugin 62 | 2.2.1 63 | 64 | 65 | attach-sources 66 | verify 67 | 68 | jar 69 | test-jar 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | org.eclipse.m2e 79 | lifecycle-mapping 80 | 1.0.0 81 | 82 | 83 | 84 | 85 | 86 | org.apache.maven.plugins 87 | maven-resources-plugin 88 | [2.6,) 89 | 90 | resources 91 | testResources 92 | 93 | 94 | 95 | 96 | false 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | junit 110 | junit 111 | 4.12 112 | test 113 | 114 | 115 | 116 | 117 | 118 | org.apache.maven.plugins 119 | maven-javadoc-plugin 120 | 2.9 121 | 122 | UTF-8 123 | 124 | 125 | 126 | org.codehaus.mojo 127 | cobertura-maven-plugin 128 | 2.5.2 129 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /src/main/java/com/eaio/stringsearch/BNDMCI.java: -------------------------------------------------------------------------------- 1 | /* 2 | * BNDMCI.java 3 | * 4 | * Created on 11.10.2004. 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch; 29 | 30 | /** 31 | * This is a case-insensitive version of the 32 | * {@link com.eaio.stringsearch.BNDM} algorithm. 33 | *

34 | * Because of the bit-parallel {@link com.eaio.stringsearch.BNDM} algorithm, 35 | * there is no speed penalty when using this algorithm -- using the case 36 | * sensitive version is as fast as using the case insensitive version. 37 | * 38 | * @see StringSearch 39 | * – high-performance pattern matching algorithms in Java 40 | * @author Johann Burkard 41 | * @version $Id: BNDMCI.java 6675 2015-01-17 21:02:35Z johann $ 42 | */ 43 | public class BNDMCI extends BNDM { 44 | 45 | /** 46 | * @see com.eaio.stringsearch.StringSearch#processBytes(byte[]) 47 | */ 48 | @Override 49 | public Object processBytes(byte[] pattern) { 50 | int end = pattern.length < 32 ? pattern.length : 32; 51 | 52 | int[] b = new int[256]; 53 | 54 | int j = 1; 55 | for (int i = end - 1; i >= 0; --i, j <<= 1) { 56 | if (pattern[i] > 64 && pattern[i] < 91) { 57 | b[pattern[i]] |= j; 58 | b[pattern[i] + 32] |= j; 59 | } 60 | else if (pattern[i] > 96 && pattern[i] < 123) { 61 | b[pattern[i]] |= j; 62 | b[pattern[i] - 32] |= j; 63 | } 64 | else { 65 | char idx = (char) index(pattern[i]); 66 | 67 | if (idx > 127) { 68 | char c; 69 | if (Character.isUpperCase(idx)) { 70 | c = Character.toLowerCase(idx); 71 | if (c < 256) { 72 | b[c] |= j; 73 | } 74 | } 75 | else if (Character.isLowerCase(idx)) { 76 | c = Character.toUpperCase(idx); 77 | if (c < 256) { 78 | b[c] |= j; 79 | } 80 | } 81 | } 82 | 83 | b[idx] |= j; 84 | } 85 | } 86 | 87 | return b; 88 | } 89 | 90 | /** 91 | * @see com.eaio.stringsearch.BNDM#processChars(char[]) 92 | * @see com.eaio.stringsearch.StringSearch#processChars(char[]) 93 | */ 94 | @Override 95 | public Object processChars(char[] pattern) { 96 | int end = pattern.length < 32 ? pattern.length : 32; 97 | 98 | char t; 99 | 100 | char min = Character.MAX_VALUE; 101 | char max = Character.MIN_VALUE; 102 | for (int i = 0; i < end; i++) { 103 | if (Character.isLetter(pattern[i])) { 104 | min = min < (t = min(Character.toLowerCase(pattern[i]), 105 | Character.toUpperCase(pattern[i]))) ? min : t; 106 | max = max > (t = max(Character.toLowerCase(pattern[i]), 107 | Character.toUpperCase(pattern[i]))) ? max : t; 108 | } 109 | else { 110 | max = max > pattern[i] ? max : pattern[i]; 111 | min = min < pattern[i] ? min : pattern[i]; 112 | } 113 | } 114 | CharIntMap b = new CharIntMap(max - min + 1, min, 0); 115 | 116 | int j = 1; 117 | for (int i = end - 1; i >= 0; --i, j <<= 1) { 118 | if (Character.isLetter(pattern[i])) { 119 | t = Character.toLowerCase(pattern[i]); 120 | b.set(t, b.get(t) | j); 121 | t = Character.toUpperCase(t); 122 | b.set(t, b.get(t) | j); 123 | } 124 | else { 125 | b.set(pattern[i], b.get(pattern[i]) | j); 126 | } 127 | } 128 | 129 | return b; 130 | } 131 | 132 | } 133 | -------------------------------------------------------------------------------- /src/test/java/com/eaio/stringsearch/performanceTest/Benchmark.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Benchmark.java 3 | * 4 | * Created on 21.10.2003 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch.performanceTest; 29 | 30 | import java.io.FileInputStream; 31 | import java.io.IOException; 32 | 33 | import com.eaio.stringsearch.*; 34 | 35 | /** 36 | * Performance benchmark. 37 | * 38 | * @author Johann Burkard 39 | * @version $Id: Benchmark.java 6675 2015-01-17 21:02:35Z johann $ 40 | */ 41 | public class Benchmark { 42 | 43 | private static final Class[] CLASSES = new Class[] { BNDM.class, 44 | BoyerMooreHorspool.class, BoyerMooreHorspoolRaita.class, 45 | ShiftOrMismatches.class }; 46 | 47 | @SuppressWarnings("deprecation") 48 | public static void main(String[] args) throws Exception { 49 | FileInputStream inStream = null; 50 | byte[] buf = null; 51 | try { 52 | inStream = new FileInputStream("pom.xml"); 53 | buf = new byte[inStream.available()]; 54 | inStream.read(buf); 55 | } 56 | finally { 57 | if (inStream != null) { 58 | try { 59 | inStream.close(); 60 | } 61 | catch (IOException ex) { /* Ignored */ } 62 | } 63 | } 64 | 65 | String doc = new String(buf, 0); 66 | 67 | String query; 68 | 69 | if (args.length > 0) { 70 | query = args[0]; 71 | } 72 | else { 73 | query = "org.codehaus.mojo"; 74 | } 75 | 76 | System.out.println("Searching for '" + query + "'"); 77 | System.out.println(); 78 | 79 | long then; 80 | int x = -1; 81 | 82 | System.out.print("Testing String#indexOf()"); 83 | 84 | then = System.currentTimeMillis(); 85 | 86 | for (int i = 0; i < 200000; i++) { 87 | x = doc.indexOf(query); 88 | } 89 | 90 | System.out.println(" took " + (System.currentTimeMillis() - then) 91 | + " ms."); 92 | 93 | StringSearch ps = null; 94 | Object o; 95 | 96 | System.out.println(); 97 | System.out.println("Testing StringSearch searchChars methods"); 98 | System.out.println(); 99 | 100 | char[] sourceArray = doc.toCharArray(); 101 | char[] q1 = query.toCharArray(); 102 | 103 | for (int c = 0; c < CLASSES.length; c++) { 104 | ps = (StringSearch) CLASSES[c].newInstance(); 105 | 106 | o = ps.processChars(q1); 107 | 108 | System.out.print("Testing " + ps.toString()); 109 | 110 | then = System.currentTimeMillis(); 111 | 112 | int res = 0; 113 | 114 | for (int i = 0; i < 200000; i++) { 115 | res = ps.searchChars(sourceArray, q1, o); 116 | } 117 | 118 | System.out.println(" took " + (System.currentTimeMillis() - then) 119 | + " ms."); 120 | 121 | if (res != x) { 122 | System.err.println(res); 123 | } 124 | 125 | } 126 | 127 | System.out.println(); 128 | System.out.println("Testing StringSearch searchBytes methods"); 129 | System.out.println(); 130 | 131 | byte[] sourceArrayBytes = buf; 132 | byte[] q1Bytes = query.getBytes(); 133 | 134 | for (int c = 0; c < CLASSES.length; c++) { 135 | ps = (StringSearch) CLASSES[c].newInstance(); 136 | 137 | o = ps.processBytes(q1Bytes); 138 | 139 | System.out.print("Testing " + ps.toString()); 140 | 141 | then = System.currentTimeMillis(); 142 | 143 | int res = 0; 144 | 145 | for (int i = 0; i < 200000; i++) { 146 | res = ps.searchBytes(sourceArrayBytes, q1Bytes, o); 147 | } 148 | 149 | System.out.println(" took " + (System.currentTimeMillis() - then) 150 | + " ms."); 151 | 152 | if (res != x) { 153 | System.err.println(res); 154 | } 155 | 156 | } 157 | 158 | } 159 | 160 | } 161 | -------------------------------------------------------------------------------- /src/main/java/com/eaio/stringsearch/BNDMWildcardsCI.java: -------------------------------------------------------------------------------- 1 | /* 2 | * BNDMWildcardsCI.java 3 | * 4 | * Created on 23.10.2004. 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch; 29 | 30 | /** 31 | * This is a case-insensitive version of the 32 | * {@link com.eaio.stringsearch.BNDMWildcards} algorithm. 33 | *

34 | * Because of the bit-parallel {@link com.eaio.stringsearch.BNDM} algorithm, 35 | * there is no speed penalty when using this algorithm -- using the case 36 | * sensitive version is as fast as using the case insensitive version. 37 | * 38 | * @see StringSearch 39 | * – high-performance pattern matching algorithms in Java 40 | * @see com.eaio.stringsearch.BNDMWildcards#wildcard 41 | * @author Johann Burkard 42 | * @version $Id: BNDMWildcardsCI.java 6675 2015-01-17 21:02:35Z johann $ 43 | */ 44 | public class BNDMWildcardsCI extends BNDMWildcards { 45 | 46 | /** 47 | * Constructor for BNDMWildcardsCI. 48 | */ 49 | public BNDMWildcardsCI() { 50 | super(); 51 | } 52 | 53 | /** 54 | * Constructor for BNDMWildcardsCI. 55 | * 56 | * @param wildcard the wildcard character 57 | */ 58 | public BNDMWildcardsCI(char wildcard) { 59 | super(wildcard); 60 | } 61 | 62 | /** 63 | * @see com.eaio.stringsearch.BNDMWildcards#processBytes(byte[], byte) 64 | */ 65 | @Override 66 | public Object processBytes(byte[] pattern, byte w) { 67 | int j = 0; 68 | int end = pattern.length < 32 ? pattern.length : 32; 69 | 70 | for (int i = 0; i < end; ++i) { 71 | if (pattern[i] == w) { 72 | j |= (1 << end - i - 1); 73 | } 74 | } 75 | 76 | int[] b = new int[256]; 77 | 78 | if (j != 0) { 79 | for (int i = 0; i < b.length; i++) { 80 | b[i] = j; 81 | } 82 | } 83 | 84 | j = 1; 85 | for (int i = end - 1; i >= 0; --i, j <<= 1) { 86 | if (pattern[i] > 64 && pattern[i] < 91) { 87 | b[pattern[i]] |= j; 88 | b[pattern[i] + 32] |= j; 89 | } 90 | else if (pattern[i] > 96 && pattern[i] < 123) { 91 | b[pattern[i]] |= j; 92 | b[pattern[i] - 32] |= j; 93 | } 94 | else { 95 | char idx = (char) index(pattern[i]); 96 | 97 | if (idx > 127) { 98 | char c; 99 | if (Character.isUpperCase(idx)) { 100 | c = Character.toLowerCase(idx); 101 | if (c < 256) { 102 | b[c] |= j; 103 | } 104 | } 105 | else if (Character.isLowerCase(idx)) { 106 | c = Character.toUpperCase(idx); 107 | if (c < 256) { 108 | b[c] |= j; 109 | } 110 | } 111 | } 112 | 113 | b[idx] |= j; 114 | } 115 | } 116 | 117 | return b; 118 | } 119 | 120 | /** 121 | * @see com.eaio.stringsearch.BNDMWildcards#processChars(char[], char) 122 | */ 123 | @Override 124 | public Object processChars(char[] pattern, char w) { 125 | int j = 0; 126 | int end = pattern.length < 32 ? pattern.length : 32; 127 | 128 | for (int i = 0; i < end; ++i) { 129 | if (pattern[i] == w) { 130 | j |= (1 << end - i - 1); 131 | } 132 | } 133 | 134 | char t; 135 | 136 | char min = Character.MAX_VALUE; 137 | char max = Character.MIN_VALUE; 138 | for (int i = 0; i < end; i++) { 139 | if (Character.isLetter(pattern[i])) { 140 | min = min < (t = min(Character.toLowerCase(pattern[i]), 141 | Character.toUpperCase(pattern[i]))) ? min : t; 142 | max = max > (t = max(Character.toLowerCase(pattern[i]), 143 | Character.toUpperCase(pattern[i]))) ? max : t; 144 | } 145 | else { 146 | max = max > pattern[i] ? max : pattern[i]; 147 | min = min < pattern[i] ? min : pattern[i]; 148 | } 149 | } 150 | CharIntMap b = new CharIntMap(max - min + 1, min, j); 151 | 152 | j = 1; 153 | for (int i = end - 1; i >= 0; --i, j <<= 1) { 154 | if (Character.isLetter(pattern[i])) { 155 | t = Character.toLowerCase(pattern[i]); 156 | b.set(t, b.get(t) | j); 157 | t = Character.toUpperCase(t); 158 | b.set(t, b.get(t) | j); 159 | } 160 | else { 161 | b.set(pattern[i], b.get(pattern[i]) | j); 162 | } 163 | } 164 | 165 | return b; 166 | } 167 | 168 | } 169 | -------------------------------------------------------------------------------- /src/main/java/com/eaio/stringsearch/BoyerMooreHorspoolRaita.java: -------------------------------------------------------------------------------- 1 | /* 2 | * BoyerMooreHorspoolRaita.java 3 | * 4 | * Created on 15.09.2003 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch; 29 | 30 | /** 31 | * An implementation of Raita's enhancement to the Boyer-Moore-Horspool String 32 | * searching algorithm. See "Tuning the Boyer-Moore-Horspool string searching 33 | * algorithm" (appeared in Software - Practice & Experience, 34 | * 22(10):879-884). 35 | *

36 | * This algorithm is slightly faster than the 37 | * {@link com.eaio.stringsearch.BoyerMooreHorspool} algorithm for the 38 | * searchChars and searchString methods. It's 39 | * searchBytes methods are slightly slower. 40 | * 41 | * @see StringSearch 42 | * – high-performance pattern matching algorithms in Java 43 | * @see 45 | * http://johannburkard.de/documents/spe787tr.pdf 46 | * @author Johann Burkard 47 | * @version $Id: BoyerMooreHorspoolRaita.java 6675 2015-01-17 21:02:35Z johann $ 48 | */ 49 | public class BoyerMooreHorspoolRaita extends BoyerMooreHorspool { 50 | 51 | /** 52 | * @see com.eaio.stringsearch.StringSearch#searchBytes(byte[], int, int, 53 | * byte[], java.lang.Object) 54 | */ 55 | @Override 56 | public int searchBytes(byte[] text, int textStart, int textEnd, 57 | byte[] pattern, Object processed) { 58 | 59 | // Unrolled fast paths for patterns of length 1 and 2. Suggested by someone who doesn't want to be named. 60 | 61 | if (pattern.length == 1) { 62 | final int nLimit = Math.min(text.length, textEnd); 63 | for (int n = textStart; n < nLimit; n++) { 64 | if (text[n] == pattern[0]) 65 | return n; 66 | } 67 | return -1; 68 | } 69 | else if (pattern.length == 2) { 70 | final int nLimit = Math.min(text.length, textEnd) - 1; 71 | for (int n = textStart; n < nLimit; n++) { 72 | if (text[n] == pattern[0]) { 73 | if (text[n + 1] == pattern[1]) 74 | return n; 75 | } 76 | } 77 | return -1; 78 | } 79 | 80 | int[] b = (int[]) processed; 81 | 82 | int i, j, k, mMinusOne; 83 | byte last, first; 84 | 85 | i = pattern.length - 1; 86 | mMinusOne = pattern.length - 2; 87 | 88 | last = pattern[pattern.length - 1]; 89 | first = pattern[0]; 90 | 91 | i += textStart; 92 | 93 | while (i < textEnd) { 94 | 95 | if (text[i] == last && text[i - (pattern.length - 1)] == first) { 96 | 97 | k = i - 1; 98 | j = mMinusOne; 99 | 100 | while (k > -1 && j > -1 && text[k] == pattern[j]) { 101 | --k; 102 | --j; 103 | } 104 | if (j == -1) { 105 | return k + 1; 106 | } 107 | 108 | } 109 | 110 | i += b[index(text[i])]; 111 | } 112 | 113 | return -1; 114 | } 115 | 116 | /** 117 | * @see com.eaio.stringsearch.StringSearch#searchChars(char[], int, int, 118 | * char[], Object) 119 | */ 120 | @Override 121 | public int searchChars(char[] text, int textStart, int textEnd, 122 | char[] pattern, Object processed) { 123 | 124 | // Unrolled fast paths for patterns of length 1 and 2. Suggested by someone who doesn't want to be named. 125 | 126 | if (pattern.length == 1) { 127 | final int nLimit = Math.min(text.length, textEnd); 128 | for (int n = textStart; n < nLimit; n++) { 129 | if (text[n] == pattern[0]) 130 | return n; 131 | } 132 | return -1; 133 | } 134 | else if (pattern.length == 2) { 135 | final int nLimit = Math.min(text.length, textEnd) - 1; 136 | for (int n = textStart; n < nLimit; n++) { 137 | if (text[n] == pattern[0]) { 138 | if (text[n + 1] == pattern[1]) 139 | return n; 140 | } 141 | } 142 | return -1; 143 | } 144 | 145 | CharIntMap m = (CharIntMap) processed; 146 | 147 | int i, j, k, mMinusOne; 148 | char last, first; 149 | 150 | i = pattern.length - 1; 151 | mMinusOne = i - 1; 152 | 153 | last = pattern[i]; 154 | first = pattern[0]; 155 | 156 | i += textStart; 157 | 158 | while (i < textEnd) { 159 | 160 | if (text[i] == last && text[i - (pattern.length - 1)] == first) { 161 | 162 | k = i - 1; 163 | j = mMinusOne; 164 | 165 | while (k > -1 && j > -1 && text[k] == pattern[j]) { 166 | --k; 167 | --j; 168 | } 169 | if (j == -1) { 170 | return k + 1; 171 | } 172 | 173 | } 174 | i += m.get(text[i]); 175 | } 176 | 177 | return -1; 178 | } 179 | 180 | } 181 | -------------------------------------------------------------------------------- /src/main/java/com/eaio/stringsearch/BNDM.java: -------------------------------------------------------------------------------- 1 | /* 2 | * BNDM.java 3 | * 4 | * Created on 21.10.2003 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch; 29 | 30 | /** 31 | * An implementation of the Backwards Non-deterministic DAWG (Directed acyclic 32 | * word graph) Matching algorithm by Gonzalo Navarro and Mathieu Raffinot. See 33 | * "A Bit-Parallel Approach to Suffix Automata: Fast Extended String Matching" 34 | * (appeared in Proceedings of the 9th Annual Symposium on Combinatorial 35 | * Pattern Matching, 1998). 36 | *

37 | * See 38 | * {@link com.eaio.stringsearch.BNDMWildcards} for searching with wildcards, 39 | * {@link com.eaio.stringsearch.BNDMCI} for case insensitive searching 40 | * and {@link com.eaio.stringsearch.BNDMWildcardsCI} for case 41 | * insensitive searching with wildcards. 42 | *

43 | * This is one of the fastest algorithms, but it does not beat the 44 | * {@link com.eaio.stringsearch.BoyerMooreHorspoolRaita} and the 45 | * {@link com.eaio.stringsearch.BoyerMooreHorspool} algorithms. 46 | * 47 | * @see 48 | * StringSearch – high-performance pattern matching algorithms in Java 49 | * @see 51 | * http://www.dcc.uchile.cl/~gnavarro/ps/cpm98.ps.gz 52 | * 53 | * @see 55 | * http://www-igm.univ-mlv.fr/~raffinot/ftp/cpm98.ps.gz 56 | * 57 | * @see 59 | * http://citeseer.ist.psu.edu/navarro98bitparallel.html 60 | * 61 | * @author Johann Burkard 62 | * @version $Id: BNDM.java 6675 2015-01-17 21:02:35Z johann $ 63 | */ 64 | public class BNDM extends StringSearch { 65 | 66 | /** 67 | * Pre-processing of the pattern. The pattern may not exceed 32 bytes in 68 | * length. If it does, only it's first 32 bytes are processed which 69 | * might lead to unexpected results. Returns an int array which is 70 | * serializable. 71 | * 72 | * @see com.eaio.stringsearch.StringSearch#processBytes(byte[]) 73 | */ 74 | @Override 75 | public Object processBytes(byte[] pattern) { 76 | int end = pattern.length < 32 ? pattern.length : 32; 77 | 78 | int[] b = new int[256]; 79 | 80 | int j = 1; 81 | for (int i = end - 1; i >= 0; --i, j <<= 1) { 82 | b[index(pattern[i])] |= j; 83 | } 84 | 85 | return b; 86 | } 87 | 88 | /** 89 | * Pre-processing of the pattern. The pattern may not exceed 32 bytes in 90 | * length. If it does, only it's first 32 bytes are processed which 91 | * might lead to unexpected results. Returns a {@link CharIntMap} which is 92 | * serializable. 93 | * 94 | * @see com.eaio.stringsearch.StringSearch#processChars(char[]) 95 | */ 96 | @Override 97 | public Object processChars(char[] pattern) { 98 | int end = pattern.length < 32 ? pattern.length : 32; 99 | 100 | CharIntMap b = createCharIntMap(pattern, end, 0); 101 | 102 | int j = 1; 103 | for (int i = end - 1; i >= 0; --i, j <<= 1) { 104 | b.set(pattern[i], b.get(pattern[i]) | j); 105 | } 106 | 107 | return b; 108 | } 109 | 110 | /** 111 | * @see com.eaio.stringsearch.StringSearch#searchBytes(byte[], int, int, 112 | * byte[], java.lang.Object) 113 | */ 114 | @Override 115 | public int searchBytes(byte[] text, int textStart, int textEnd, 116 | byte[] pattern, Object processed) { 117 | 118 | int[] t = (int[]) processed; 119 | int l = pattern.length < 32 ? pattern.length : 32; 120 | 121 | int d, j, pos, last; 122 | pos = textStart; 123 | while (pos <= textEnd - l) { 124 | j = l - 1; 125 | last = l; 126 | d = -1; 127 | while (d != 0) { 128 | d &= t[index(text[pos + j])]; 129 | if (d != 0) { 130 | if (j == 0) { 131 | return pos; 132 | } 133 | last = j; 134 | } 135 | --j; 136 | d <<= 1; 137 | } 138 | pos += last; 139 | } 140 | 141 | return -1; 142 | } 143 | 144 | /** 145 | * @see com.eaio.stringsearch.StringSearch#searchChars(char[], int, int, char[], Object) 146 | */ 147 | @Override 148 | public int searchChars(char[] text, int textStart, int textEnd, 149 | char[] pattern, Object processed) { 150 | 151 | CharIntMap b = (CharIntMap) processed; 152 | int l = pattern.length < 32 ? pattern.length : 32; 153 | 154 | int d, j, pos, last; 155 | pos = textStart; 156 | while (pos <= textEnd - l) { 157 | j = l - 1; 158 | last = l; 159 | d = -1; 160 | while (d != 0) { 161 | d &= b.get(text[pos + j]); 162 | if (d != 0) { 163 | if (j == 0) { 164 | return pos; 165 | } 166 | last = j; 167 | } 168 | --j; 169 | d <<= 1; 170 | } 171 | pos += last; 172 | } 173 | 174 | return -1; 175 | } 176 | 177 | /** 178 | * Returns the smaller of two chars. 179 | * 180 | * @param one the first char 181 | * @param two the second char 182 | * @return the smaller char 183 | */ 184 | protected final char min(char one, char two) { 185 | return one < two ? one : two; 186 | } 187 | 188 | /** 189 | * Returns the larger of two chars. 190 | * 191 | * @param one the first char 192 | * @param two the second char 193 | * @return the larger char 194 | */ 195 | protected final char max(char one, char two) { 196 | return one > two ? one : two; 197 | } 198 | 199 | } 200 | -------------------------------------------------------------------------------- /src/main/java/com/eaio/stringsearch/BNDMWildcards.java: -------------------------------------------------------------------------------- 1 | /* 2 | * BNDMWildcards.java 3 | * 4 | * Created on 19.01.2004. 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch; 29 | 30 | /** 31 | * An implementation of the {@link BNDM} algorithm with wildcards ("don't care" 32 | * symbols). The wildcard character is initially '.', but any character can be 33 | * used. 34 | *

35 | * Please note that the wildcard character has changed from '?' in 36 | * StringSearch version 1 to '.' in version 2. 37 | * 38 | * @see StringSearch 39 | * – high-performance pattern matching algorithms in Java 40 | * @see BNDMWildcards#BNDMWildcards(char) 41 | * @see #processBytes(byte[], byte) 42 | * @see #processChars(char[], char) 43 | * @see com.eaio.stringsearch.BNDM 44 | * @author Johann Burkard 45 | * @version $Id: BNDMWildcards.java 6675 2015-01-17 21:02:35Z johann $ 46 | */ 47 | public class BNDMWildcards extends BNDM { 48 | 49 | /** 50 | * The wildcard character. 51 | */ 52 | public final char wildcard; 53 | 54 | /** 55 | * Constructor for BNDMWildcards. Uses '.' as the wildcard character. 56 | */ 57 | public BNDMWildcards() { 58 | this('.'); 59 | } 60 | 61 | /** 62 | * 63 | * Constructor for BNDMWildcards. 64 | * 65 | * @param wildcard 66 | */ 67 | public BNDMWildcards(char wildcard) { 68 | super(); 69 | this.wildcard = wildcard; 70 | } 71 | 72 | /** 73 | * Pre-processing of the pattern. The pattern may not exceed 32 bytes in 74 | * length. If it does, only it's first 32 bytes are processed which 75 | * might lead to unexpected results. The wildcard character is obtained 76 | * from the {@link #wildcard} field. Returns an int 77 | * array which is serializable. 78 | * 79 | * @see com.eaio.stringsearch.StringSearch#processBytes(byte[]) 80 | * @see #processBytes(byte[], byte) 81 | */ 82 | @Override 83 | public Object processBytes(byte[] pattern) { 84 | return processBytes(pattern, (byte) wildcard); 85 | } 86 | 87 | /** 88 | * Pre-processes the pattern. The pattern may not exceed 32 characters in 89 | * length. If it does, only it's first 32 bytes are processed which 90 | * might lead to unexpected results. The wildcard character is obtained 91 | * from the {@link #wildcard} field. Returns a {@link CharIntMap} 92 | * which is serializable. 93 | * 94 | * @param pattern the char array containing the pattern, may 95 | * not be null 96 | * @return a {@link CharIntMap} 97 | * @see StringSearch#processChars(char[]) 98 | * @see #processChars(char[], char) 99 | */ 100 | @Override 101 | public Object processChars(char[] pattern) { 102 | return processChars(pattern, wildcard); 103 | } 104 | 105 | /** 106 | * Pre-processing of the pattern. The pattern may not exceed 32 bytes in 107 | * length. If it does, only it's first 32 bytes are processed which 108 | * might lead to unexpected results. Returns an int array 109 | * which is serializable. 110 | * 111 | * @param pattern the byte array containing the pattern, may 112 | * not be null 113 | * @param w the wildcard byte character 114 | * @return an int array 115 | */ 116 | public Object processBytes(byte[] pattern, byte w) { 117 | int j = 0; 118 | int end = pattern.length < 32 ? pattern.length : 32; 119 | 120 | for (int i = 0; i < end; ++i) { 121 | if (pattern[i] == w) { 122 | j |= (1 << end - i - 1); 123 | } 124 | } 125 | 126 | int[] b = new int[256]; 127 | 128 | if (j != 0) { 129 | for (int i = 0; i < b.length; i++) { 130 | b[i] = j; 131 | } 132 | } 133 | 134 | j = 1; 135 | for (int i = end - 1; i >= 0; --i, j <<= 1) { 136 | b[index(pattern[i])] |= j; 137 | } 138 | 139 | return b; 140 | } 141 | 142 | /** 143 | * Pre-processes the pattern. The pattern may not exceed 32 characters in 144 | * length. If it does, only it's first 32 bytes are processed which 145 | * might lead to unexpected results. Returns a {@link CharIntMap} which is 146 | * serializable. 147 | * 148 | * @param pattern the String array containing the pattern, may not be 149 | * null 150 | * @param w the wildcard character 151 | * @return a {@link CharIntMap}. 152 | */ 153 | public Object processString(String pattern, char w) { 154 | return processChars(getChars(pattern), w); 155 | } 156 | 157 | /** 158 | * Pre-processes the pattern. The pattern may not exceed 32 characters in 159 | * length. If it does, only it's first 32 bytes are processed which 160 | * might lead to unexpected results. Returns a {@link CharIntMap}. 161 | * 162 | * @param pattern the char array containing the pattern, may 163 | * not be null 164 | * @param w the wildcard character 165 | * @return a {@link CharIntMap}. 166 | */ 167 | public Object processChars(char[] pattern, char w) { 168 | int j = 0; 169 | int end = pattern.length < 32 ? pattern.length : 32; 170 | 171 | for (int i = 0; i < end; ++i) { 172 | if (pattern[i] == w) { 173 | j |= (1 << end - i - 1); 174 | } 175 | } 176 | 177 | CharIntMap b = createCharIntMap(pattern, end, j); 178 | 179 | j = 1; 180 | for (int i = end - 1; i >= 0; --i, j <<= 1) { 181 | b.set(pattern[i], b.get(pattern[i]) | j); 182 | } 183 | 184 | return b; 185 | } 186 | 187 | @Override 188 | public boolean equals(Object obj) { 189 | return super.equals(obj) && wildcard == ((BNDMWildcards) obj).wildcard; 190 | } 191 | 192 | @Override 193 | public int hashCode() { 194 | return super.hashCode() ^ wildcard; 195 | } 196 | 197 | } 198 | -------------------------------------------------------------------------------- /src/main/java/com/eaio/stringsearch/BoyerMooreHorspool.java: -------------------------------------------------------------------------------- 1 | /* 2 | * BoyerMooreHorspool.java 3 | * 4 | * Created on 12.09.2003 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch; 29 | 30 | /** 31 | * An implementation of Horspool's improved version of the Boyer-Moore String 32 | * searching algorithm. See "Practical fast searching in strings" (appeared in 33 | * Software - Practice & Experience, 10(6):501-506). Unfortunately, 34 | * there seems to be no on-line version of his paper. 35 | *

36 | * This is the second fastest algorithm in this library for the 37 | * searchChars and searchString. Except for very 38 | * short patterns (< 5 characters), it is always faster than any other 39 | * algorithm except {@link com.eaio.stringsearch.BoyerMooreHorspoolRaita} and 40 | * faster than {@link String#indexOf(String)} by more than 5 times for 41 | * patterns longer than 24 characters. It's searchBytes methods 42 | * are slightly faster than in the 43 | * {@link com.eaio.stringsearch.BoyerMooreHorspoolRaita} algorithm. 44 | *

45 | * This implementation is based on Ricardo Baeza-Yates' implementation. 48 | * 49 | * @see StringSearch 50 | * – high-performance pattern matching algorithms in Java 51 | * @author Johann Burkard 52 | * @version $Id: BoyerMooreHorspool.java 6675 2015-01-17 21:02:35Z johann $ 53 | */ 54 | public class BoyerMooreHorspool extends StringSearch { 55 | 56 | /** 57 | * Returns a int array for patterns longer than 2 characters, null otherwise. 58 | * 59 | * @see com.eaio.stringsearch.StringSearch#processBytes(byte[]) 60 | */ 61 | @Override 62 | public Object processBytes(byte[] pattern) { 63 | if (pattern.length == 1 || pattern.length == 2) { 64 | return null; 65 | } 66 | 67 | int[] skip = new int[256]; 68 | 69 | for (int i = 0; i < skip.length; ++i) { 70 | skip[i] = pattern.length; 71 | } 72 | 73 | for (int i = 0; i < pattern.length - 1; ++i) { 74 | skip[index(pattern[i])] = pattern.length - i - 1; 75 | } 76 | 77 | return skip; 78 | } 79 | 80 | /** 81 | * Returns a {@link CharIntMap} for patterns longer than 2 characters, null otherwise. 82 | * 83 | * @see com.eaio.stringsearch.StringSearch#processChars(char[]) 84 | */ 85 | @Override 86 | public Object processChars(char[] pattern) { 87 | if (pattern.length == 1 || pattern.length == 2) { 88 | return null; 89 | } 90 | 91 | CharIntMap skip = createCharIntMap(pattern, pattern.length); 92 | 93 | for (int i = 0; i < pattern.length - 1; ++i) { 94 | skip.set(pattern[i], pattern.length - i - 1); 95 | } 96 | 97 | return skip; 98 | } 99 | 100 | /** 101 | * @see com.eaio.stringsearch.StringSearch#searchBytes(byte[], int, int, 102 | * byte[], java.lang.Object) 103 | */ 104 | @Override 105 | public int searchBytes(byte[] text, int textStart, int textEnd, 106 | byte[] pattern, Object processed) { 107 | 108 | // Unrolled fast paths for patterns of length 1 and 2. Suggested by someone who doesn't want to be named. 109 | 110 | if (pattern.length == 1) { 111 | final int nLimit = Math.min(text.length, textEnd); 112 | for (int n = textStart; n < nLimit; n++) { 113 | if (text[n] == pattern[0]) 114 | return n; 115 | } 116 | return -1; 117 | } 118 | else if (pattern.length == 2) { 119 | final int nLimit = Math.min(text.length, textEnd) - 1; 120 | for (int n = textStart; n < nLimit; n++) { 121 | if (text[n] == pattern[0]) { 122 | if (text[n + 1] == pattern[1]) 123 | return n; 124 | } 125 | } 126 | return -1; 127 | } 128 | 129 | int[] skip = (int[]) processed; 130 | 131 | int i, j, k; 132 | 133 | final int lengthMinusOne = pattern.length - 1; 134 | 135 | for (k = lengthMinusOne; k < textEnd; k += skip[index(text[k])]) { 136 | for (j = lengthMinusOne, i = k; j >= 0 && text[i] == pattern[j] 137 | && i >= textStart; --j, --i) { 138 | // Blank. 139 | } 140 | if (j == -1) return ++i; 141 | } 142 | 143 | return -1; 144 | 145 | } 146 | 147 | /** 148 | * @see com.eaio.stringsearch.StringSearch#searchChars(char[], int, int, char[], Object) 149 | */ 150 | @Override 151 | public int searchChars(char[] text, int textStart, int textEnd, 152 | char[] pattern, Object processed) { 153 | 154 | // Unrolled fast paths for patterns of length 1 and 2. Suggested by someone who doesn't want to be named. 155 | 156 | if (pattern.length == 1) { 157 | final int nLimit = Math.min(text.length, textEnd); 158 | for (int n = textStart; n < nLimit; n++) { 159 | if (text[n] == pattern[0]) 160 | return n; 161 | } 162 | return -1; 163 | } 164 | else if (pattern.length == 2) { 165 | final int nLimit = Math.min(text.length, textEnd) - 1; 166 | for (int n = textStart; n < nLimit; n++) { 167 | if (text[n] == pattern[0]) { 168 | if (text[n + 1] == pattern[1]) 169 | return n; 170 | } 171 | } 172 | return -1; 173 | } 174 | 175 | CharIntMap skip = (CharIntMap) processed; 176 | 177 | int i, j, k; 178 | 179 | final int lengthMinusOne = pattern.length - 1; 180 | 181 | for (k = lengthMinusOne; k < textEnd; k += skip.get(text[k])) { 182 | for (j = lengthMinusOne, i = k; j >= 0 && text[i] == pattern[j] 183 | && i >= textStart; --j, --i) { 184 | // Blank. 185 | } 186 | if (j == -1) return ++i; 187 | } 188 | 189 | return -1; 190 | } 191 | 192 | } 193 | -------------------------------------------------------------------------------- /src/test/java/com/eaio/stringsearch/performanceTest/Bits.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Bits.java 3 | * 4 | * Created on 30.05.2003. 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch.performanceTest; 29 | 30 | /** 31 | * This class allows formatting of number values as binary codes. 32 | * 33 | * @author Johann Burkard 34 | * @version $Id: Bits.java 6675 2015-01-17 21:02:35Z johann $ 35 | */ 36 | public final class Bits { 37 | 38 | /** 39 | * No instances needed. 40 | */ 41 | private Bits() { 42 | super(); 43 | } 44 | 45 | /** 46 | * Returns a byte as a String 47 | * 48 | * @param b the byte 49 | * @return a String 50 | */ 51 | @SuppressWarnings("deprecation") 52 | public static String asString(byte b) { 53 | return new String(asBytes(b), 0); 54 | } 55 | 56 | /** 57 | * Returns a short as a String. 58 | * 59 | * @param s the short 60 | * @return a String 61 | */ 62 | @SuppressWarnings("deprecation") 63 | public static String asString(short s) { 64 | return new String(asBytes(s), 0); 65 | } 66 | 67 | /** 68 | * Returns a char as a String. 69 | * 70 | * @param c the char 71 | * @return a String 72 | */ 73 | @SuppressWarnings("deprecation") 74 | public static String asString(char c) { 75 | return new String(asBytes(c), 0); 76 | } 77 | 78 | /** 79 | * Returns an int as a String. 80 | * 81 | * @param i the int 82 | * @return a String 83 | */ 84 | @SuppressWarnings("deprecation") 85 | public static String asString(int i) { 86 | return new String(asBytes(i), 0); 87 | } 88 | 89 | /** 90 | * Returns a long as a String. 91 | * 92 | * @param l the long 93 | * @return a String 94 | */ 95 | @SuppressWarnings("deprecation") 96 | public static String asString(long l) { 97 | return new String(asBytes(l), 0); 98 | } 99 | 100 | /** 101 | * Returns a byte array as a String. 102 | * 103 | * @param b the byte array, may not be null 104 | * @return a String 105 | */ 106 | @SuppressWarnings("deprecation") 107 | public static String asString(byte[] b) { 108 | return new String(asBytes(b), 0); 109 | } 110 | 111 | /** 112 | * Returns a byte array as a byte array of binary 113 | * values. 114 | * 115 | * @param b the byte array, may not be null 116 | * @return a byte array 117 | */ 118 | public static byte[] asBytes(byte b) { 119 | byte in = b; 120 | byte[] out = new byte[8]; 121 | for (int i = 7; i > -1; i--) { 122 | if ((in & 1) == 1) { 123 | out[i] = '1'; 124 | } 125 | else { 126 | out[i] = '0'; 127 | } 128 | in >>= 1; 129 | } 130 | return out; 131 | } 132 | 133 | /** 134 | * Returns a short as a byte array of binary 135 | * values. 136 | * 137 | * @param s the short 138 | * @return a byte array 139 | */ 140 | public static byte[] asBytes(short s) { 141 | short in = s; 142 | byte[] out = new byte[16]; 143 | for (int j = 15; j > -1; j--) { 144 | if ((in & 1) == 1) { 145 | out[j] = '1'; 146 | } 147 | else { 148 | out[j] = '0'; 149 | } 150 | in >>= 1; 151 | } 152 | return out; 153 | } 154 | 155 | /** 156 | * Returns a char as a byte array of binary values. 157 | * 158 | * @param c the char 159 | * @return a byte array 160 | */ 161 | public static byte[] asBytes(char c) { 162 | char in = c; 163 | byte[] out = new byte[16]; 164 | for (int j = 15; j > -1; j--) { 165 | if ((in & 1) == 1) { 166 | out[j] = '1'; 167 | } 168 | else { 169 | out[j] = '0'; 170 | } 171 | in >>= 1; 172 | } 173 | return out; 174 | } 175 | 176 | /** 177 | * Returns an int as a byte array of binary 178 | * values. 179 | * 180 | * @param i the int 181 | * @return a byte array 182 | */ 183 | public static byte[] asBytes(int i) { 184 | int in = i; 185 | byte[] out = new byte[32]; 186 | for (int j = 31; j > -1; j--) { 187 | if ((in & 1) == 1) { 188 | out[j] = '1'; 189 | } 190 | else { 191 | out[j] = '0'; 192 | } 193 | in >>= 1; 194 | } 195 | return out; 196 | } 197 | 198 | /** 199 | * Returns a long as a byte array of binary values. 200 | * 201 | * @param l the long 202 | * @return a byte array 203 | */ 204 | public static byte[] asBytes(long l) { 205 | long in = l; 206 | byte[] out = new byte[64]; 207 | for (int j = 63; j > -1; j--) { 208 | if ((in & 1) == 1) { 209 | out[j] = '1'; 210 | } 211 | else { 212 | out[j] = '0'; 213 | } 214 | in >>= 1; 215 | } 216 | return out; 217 | } 218 | 219 | /** 220 | * Returns a byte array as a byte array of binary 221 | * values. 222 | * 223 | * @param b the byte array 224 | * @return a byte array 225 | */ 226 | public static byte[] asBytes(byte[] b) { 227 | if (b == null) { 228 | throw new NullPointerException(); 229 | } 230 | byte[] out = new byte[b.length << 3]; 231 | int i = 0; 232 | for (int j = 0; j < b.length; j++) { 233 | for (int k = 7; k > -1; k--) { 234 | if (((b[j] >> k) & 1) == 1) { 235 | out[i++] = '1'; 236 | } 237 | else { 238 | out[i++] = '0'; 239 | } 240 | } 241 | } 242 | return out; 243 | } 244 | 245 | } 246 | -------------------------------------------------------------------------------- /src/main/java/com/eaio/stringsearch/ShiftOrMismatches.java: -------------------------------------------------------------------------------- 1 | /* 2 | * ShiftOrMismatches.java 3 | * 4 | * Created on 14.11.2003. 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch; 29 | 30 | /** 31 | * An implementation of the Shift-Or algorithm with mismatches. 32 | *

33 | * The pattern length may not be larger than 34 | * 31 / ⌈ log2(k + 1) ⌉. If it is, only 35 | * characters to the maximum pattern length will be processed which might 36 | * lead to unexpected results. 37 | *

38 | * 39 | * 40 | * 41 | * 42 | * 43 | * 44 | * 45 | * 46 | * 47 | * 48 | * 49 | * 50 | * 51 | * 52 | * 53 | * 54 | * 55 | * 56 | * 57 | * 58 | * 59 | *
Number of mismatches (k)Maximum pattern length
031
115
2-310
4-57
60 | *

61 | * Note that the number of mismatches might not be correct if 62 | * k is set higher than half the pattern length. 63 | * 64 | * @see StringSearch 65 | * – high-performance pattern matching algorithms in Java 66 | * @see 67 | * 68 | * ftp://sunsite.dcc.uchile.cl/pub/users/rbaeza/papers/CACM92.ps.gz 69 | * 70 | * @see 71 | * http://citeseer.ist.psu.edu/50265.html 72 | * 73 | * @author Johann Burkard 74 | * @version $Id: ShiftOrMismatches.java 6675 2015-01-17 21:02:35Z johann $ 75 | * @see #processBytes(byte[], int) 76 | * @see #processChars(char[], int) 77 | */ 78 | public class ShiftOrMismatches extends MismatchSearch { 79 | 80 | /** 81 | * The Object returned is serializable. 82 | * 83 | * @throws IllegalArgumentException if the pattern length is larger than 84 | * 31 / ⌈ log2(k + 1) ⌉ 85 | * @see com.eaio.stringsearch.MismatchSearch#processBytes(byte[], int) 86 | */ 87 | @Override 88 | public Object processBytes(byte[] pattern, int k) { 89 | 90 | int b = clog2(k + 1) + 1; 91 | int l = Math.min(pattern.length, 31 / b); 92 | int lim = k << ((l - 1) * b); 93 | int ovmask = 0; 94 | int mask = 1 << (b - 1); 95 | 96 | for (int j = 0; j < l; ++j) { 97 | ovmask |= mask; 98 | mask <<= b; 99 | } 100 | 101 | int tInit = ovmask >> (b - 1); 102 | int[] T = new int[256]; 103 | for (int a = 0; a < T.length; ++a) { 104 | T[a] = tInit; 105 | } 106 | 107 | lim += 1 << ((l - 1) * b); 108 | mask = 1; 109 | 110 | for (int j = 0; j < l; ++j) { 111 | T[index(pattern[j])] &= ~mask; 112 | mask <<= b; 113 | } 114 | 115 | return new Object[] { T, mask - 1, ovmask, lim, b }; 116 | } 117 | 118 | /** 119 | * The Object returned is serializable. 120 | * 121 | * @throws IllegalArgumentException if the pattern length is larger than 122 | * 31 / ⌈ log2(k + 1) ⌉ 123 | * @see com.eaio.stringsearch.MismatchSearch#processChars(char[], int) 124 | */ 125 | @Override 126 | public Object processChars(char[] pattern, int k) { 127 | 128 | int b = clog2(k + 1) + 1; 129 | int l = Math.min(pattern.length, 31 / b); 130 | int lim = k << ((l - 1) * b); 131 | int ovmask = 0; 132 | int mask = 1 << (b - 1); 133 | 134 | for (int j = 0; j < l; ++j) { 135 | ovmask |= mask; 136 | mask <<= b; 137 | } 138 | 139 | int tInit = ovmask >> (b - 1); 140 | CharIntMap T = createCharIntMap(pattern, l, tInit); 141 | 142 | lim += 1 << ((l - 1) * b); 143 | mask = 1; 144 | 145 | for (int j = 0; j < l; ++j) { 146 | T.set(pattern[j], T.get(pattern[j]) & ~mask); 147 | mask <<= b; 148 | } 149 | 150 | return new Object[] { T, mask - 1, ovmask, lim, b }; 151 | } 152 | 153 | /** 154 | * @see com.eaio.stringsearch.MismatchSearch#searchBytes(byte[], int, int, 155 | * byte[], Object, int) 156 | */ 157 | @Override 158 | public int[] searchBytes(byte[] text, int textStart, int textEnd, 159 | byte[] pattern, Object processed, int k) { 160 | 161 | Object[] o = (Object[]) processed; 162 | int[] T = (int[]) o[0]; 163 | final int mask = (Integer) o[1]; 164 | final int ovmask = (Integer) o[2]; 165 | final int lim = (Integer) o[3]; 166 | final int b = (Integer) o[4]; 167 | final int l = Math.min(pattern.length, 31 / b); 168 | 169 | int s = mask & ~ovmask; 170 | int ov = ovmask; 171 | 172 | for (int i = textStart; i < textEnd; ++i) { 173 | s = ((s << b) + T[index(text[i])]) & mask; 174 | ov = ((ov << b) | (s & ovmask)) & mask; 175 | s &= ~ovmask; 176 | if ((s | ov) < lim) { 177 | return new int[] { i - l + 1, s >> b * (l - 1) }; 178 | } 179 | } 180 | 181 | return new int[] { -1, 0 }; 182 | } 183 | 184 | /** 185 | * @see com.eaio.stringsearch.MismatchSearch#searchChars(char[], int, int, 186 | * char[], Object, int) 187 | */ 188 | @Override 189 | public int[] searchChars(char[] text, int textStart, int textEnd, 190 | char[] pattern, Object processed, int k) { 191 | 192 | Object[] o = (Object[]) processed; 193 | CharIntMap T = (CharIntMap) o[0]; 194 | final int mask = (Integer) o[1]; 195 | final int ovmask = (Integer) o[2]; 196 | final int lim = (Integer) o[3]; 197 | final int b = (Integer) o[4]; 198 | final int l = Math.min(pattern.length, 31 / b); 199 | 200 | int s = mask & ~ovmask; 201 | int ov = ovmask; 202 | 203 | for (int i = textStart; i < textEnd; ++i) { 204 | s = ((s << b) + T.get(text[i])) & mask; 205 | ov = ((ov << b) | (s & ovmask)) & mask; 206 | s &= ~ovmask; 207 | if ((s | ov) < lim) { 208 | return new int[] { i - l + 1, s >> b * (l - 1) }; 209 | } 210 | } 211 | 212 | return new int[] { -1, 0 }; 213 | } 214 | 215 | /** 216 | * Ceiling of log2(x). 217 | * 218 | * @param x x 219 | * @return ⌈log2(x)⌉ 220 | */ 221 | private int clog2(int x) { 222 | int i = 0; 223 | while (x > (1 << i)) { 224 | ++i; 225 | } 226 | return i; 227 | } 228 | 229 | } 230 | -------------------------------------------------------------------------------- /src/main/java/com/eaio/stringsearch/CharIntMap.java: -------------------------------------------------------------------------------- 1 | /* 2 | * CharIntMap.java 3 | * 4 | * Created on 13.11.2003. 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch; 29 | 30 | import java.io.Externalizable; 31 | import java.io.IOException; 32 | import java.io.ObjectInput; 33 | import java.io.ObjectOutput; 34 | import java.util.Arrays; 35 | 36 | /** 37 | * The CharIntMap is a collection to save char to int 38 | * mappings in. The CharIntMap is destined to provide fast access to skip 39 | * tables while being both Unicode-safe and more RAM-effective than a naive 40 | * int array. 41 | *

42 | * The CharIntMap is initialized by specifying the extent between the lowest 43 | * and the highest occuring character. Only 44 | * an array of size highest - lowest + 1 is constructed. 45 | *

46 | * CharIntMap are created automatically in the 47 | * pre-processing methods of each {@link com.eaio.stringsearch.StringSearch} 48 | * instance. 49 | * 50 | * @see StringSearch 51 | * – high-performance pattern matching algorithms in Java 52 | * @see com.eaio.stringsearch.StringSearch#createCharIntMap(char[], int) 53 | * @author Johann Burkard 54 | * @version $Id: CharIntMap.java 6675 2015-01-17 21:02:35Z johann $ 55 | */ 56 | public class CharIntMap implements Externalizable { 57 | 58 | static final long serialVersionUID = 1351686633123489568L; 59 | 60 | private int[] array; 61 | 62 | private char lowest; 63 | 64 | private int defaultValue; 65 | 66 | /** 67 | * Constructor for CharIntMap. Required for Serialization. 68 | */ 69 | public CharIntMap() { 70 | super(); 71 | } 72 | 73 | /** 74 | * Constructor for CharIntMap. 75 | * 76 | * @param extent the extent of the text 77 | * @param lowest the lowest occuring character 78 | * @param defaultValue a default value to initialize the underlying 79 | * int array with 80 | */ 81 | public CharIntMap(int extent, char lowest, int defaultValue) { 82 | array = new int[extent]; 83 | this.lowest = lowest; 84 | this.defaultValue = defaultValue; 85 | if (defaultValue != 0) { 86 | for (int i = 0; i < array.length; i++) { 87 | array[i] = defaultValue; 88 | } 89 | } 90 | } 91 | 92 | /** 93 | * Returns the stored value for the given char. 94 | * 95 | * @param c the char 96 | * @return the stored value 97 | */ 98 | public final int get(char c) { 99 | char x = (char) (c - lowest); 100 | if (x >= array.length) { 101 | return defaultValue; 102 | } 103 | return array[x]; 104 | } 105 | 106 | /** 107 | * Sets the stored value for the given char. 108 | * 109 | * @param c the char 110 | * @param val the new value 111 | */ 112 | public final void set(char c, int val) { 113 | char x = (char) (c - lowest); 114 | if (x >= array.length) { 115 | return; 116 | } 117 | array[x] = val; 118 | } 119 | 120 | /** 121 | * Returns the extent of the actual char array. 122 | * 123 | * @return the extent 124 | */ 125 | public final int getExtent() { 126 | return array.length; 127 | } 128 | 129 | /** 130 | * Returns the lowest char that mappings can be saved for. 131 | * 132 | * @return a char 133 | */ 134 | public final char getLowest() { 135 | return lowest; 136 | } 137 | 138 | /** 139 | * Returns the highest char that mappings can be saved for. 140 | * @return char 141 | */ 142 | public final char getHighest() { 143 | return (char) (lowest + array.length); 144 | } 145 | 146 | /** 147 | * Returns if this Object is equal to another Object. 148 | * 149 | * @param obj the other Object 150 | * @return if this Object is equal 151 | * @see java.lang.Object#equals(Object) 152 | */ 153 | @Override 154 | public boolean equals(Object obj) { 155 | if (this == obj) { 156 | return true; 157 | } 158 | if (!(obj instanceof CharIntMap)) { 159 | return false; 160 | } 161 | CharIntMap m = (CharIntMap) obj; 162 | if (lowest != m.lowest) { 163 | return false; 164 | } 165 | if (defaultValue != m.defaultValue) { 166 | return false; 167 | } 168 | if (array == null && m.array == null) { 169 | return true; 170 | } 171 | return Arrays.equals(array, m.array); 172 | } 173 | 174 | /** 175 | * Returns the hashCode of this Object. 176 | * 177 | * @return the hashCode 178 | * @see java.lang.Object#hashCode() 179 | */ 180 | @Override 181 | public int hashCode() { 182 | int out = getClass().getName().hashCode(); 183 | out ^= lowest; 184 | out ^= defaultValue; 185 | if (array != null) { 186 | for (int i = 0; i < array.length; i++) { 187 | out ^= array[i]; 188 | } 189 | } 190 | return out; 191 | } 192 | 193 | /** 194 | * Returns a String representation of this Object. 195 | * 196 | * @return a String, never null 197 | * @see java.lang.Object#toString() 198 | * @see #toStringBuffer(StringBuffer) 199 | */ 200 | @Override 201 | public final String toString() { 202 | return toStringBuffer(null).toString(); 203 | } 204 | 205 | /** 206 | * Appends a String representation of this Object to the given 207 | * {@link StringBuffer} or creates a new one if none is given. This method is 208 | * not final because subclasses might want a different String 209 | * format. 210 | * 211 | * @param in the StringBuffer to append to, may be null 212 | * @return a StringBuffer, never null 213 | */ 214 | public StringBuffer toStringBuffer(StringBuffer in) { 215 | StringBuffer out = in; 216 | if (out == null) { 217 | out = new StringBuffer(128); 218 | } 219 | else { 220 | out.ensureCapacity(out.length() + 128); 221 | } 222 | out.append("{ CharIntMap: lowest = "); 223 | out.append(lowest); 224 | out.append(", defaultValue = "); 225 | out.append(defaultValue); 226 | if (array != null) { 227 | out.append(", array = "); 228 | for (int i = 0; i < array.length; i++) { 229 | if (array[i] != 0) { 230 | out.append(i); 231 | out.append(": "); 232 | out.append(array[i]); 233 | out.append(' '); 234 | } 235 | } 236 | } 237 | out.append('}'); 238 | return out; 239 | } 240 | 241 | /** 242 | * @see java.io.Externalizable#writeExternal(java.io.ObjectOutput) 243 | */ 244 | @Override 245 | public void writeExternal(ObjectOutput out) throws IOException { 246 | if (array == null) { 247 | out.writeInt(0); 248 | } 249 | else { 250 | out.writeInt(array.length); 251 | for (int i = 0; i < array.length; i++) { 252 | out.writeInt(array[i]); 253 | } 254 | } 255 | out.writeChar(lowest); 256 | out.writeInt(defaultValue); 257 | } 258 | 259 | /** 260 | * @see java.io.Externalizable#readExternal(java.io.ObjectInput) 261 | */ 262 | @Override 263 | public void readExternal(ObjectInput in) throws IOException { 264 | int l = in.readInt(); 265 | if (l > 0) { 266 | array = new int[l]; 267 | for (int i = 0; i < array.length; i++) { 268 | array[i] = in.readInt(); 269 | } 270 | } 271 | lowest = in.readChar(); 272 | defaultValue = in.readInt(); 273 | } 274 | 275 | } 276 | -------------------------------------------------------------------------------- /src/test/java/com/eaio/stringsearch/AbstractStringSearchTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * AbstractStringSearchTest.java 3 | * 4 | * Created on 28.11.2003. 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch; 29 | 30 | import static org.junit.Assert.*; 31 | 32 | import java.io.*; 33 | import java.util.Hashtable; 34 | 35 | import org.junit.Test; 36 | 37 | /** 38 | * Base class of pattern matching algorithms. 39 | * 40 | * @author Johann Burkard 41 | * @version $Id: AbstractStringSearchTest.java 6675 2015-01-17 21:02:35Z johann $ 42 | */ 43 | public abstract class AbstractStringSearchTest { 44 | 45 | public static final String LICENSE_STRING = "Permission is hereby granted, free of charge, to any person obtaining a " 46 | + "copy of this software and associated documentation files (the \"Software\"), " 47 | + "to deal in the Software without restriction, including without limitation " 48 | + "the rights to use, copy, modify, merge, publish, distribute, sublicense, " 49 | + "and/or sell copies of the Software, and to permit persons to whom the " 50 | + "Software is furnished to do so, subject to the following conditions: " 51 | + "\r\n\r\n" 52 | + "The above copyright notice and this permission notice shall be included " 53 | + "in all copies or substantial portions of the Software."; 54 | 55 | /* MismatchSearch methods */ 56 | 57 | public void assertHit(int pos, int k, String text, String pattern, 58 | int mismatches) { 59 | assertNotNull(text); 60 | assertNotNull(pattern); 61 | if (!(o instanceof MismatchSearch)) { 62 | return; 63 | } 64 | MismatchSearch m = (MismatchSearch) o; 65 | /* Strings */ 66 | int[] hit = m.searchString(text, 0, pattern, mismatches); 67 | assertNotNull(hit); 68 | assertEquals(2, hit.length); 69 | assertEquals(pos, hit[0]); 70 | assertEquals(k, hit[1]); 71 | Object preprocessed = m.processString(pattern, mismatches); 72 | hit = m.searchString(text, pattern, preprocessed, mismatches); 73 | assertNotNull(hit); 74 | assertEquals(2, hit.length); 75 | assertEquals(pos, hit[0]); 76 | assertEquals(k, hit[1]); 77 | hit = m.searchString(text, 0, pattern, preprocessed, mismatches); 78 | assertNotNull(hit); 79 | assertEquals(2, hit.length); 80 | assertEquals(pos, hit[0]); 81 | assertEquals(k, hit[1]); 82 | /* Bytes */ 83 | hit = m.searchBytes(text.getBytes(), 0, pattern.getBytes(), mismatches); 84 | assertNotNull(hit); 85 | assertEquals(2, hit.length); 86 | assertEquals(pos, hit[0]); 87 | assertEquals(k, hit[1]); 88 | preprocessed = m.processBytes(pattern.getBytes(), mismatches); 89 | hit = m.searchBytes(text.getBytes(), 0, pattern.getBytes(), 90 | preprocessed, mismatches); 91 | assertNotNull(hit); 92 | assertEquals(2, hit.length); 93 | assertEquals(pos, hit[0]); 94 | assertEquals(k, hit[1]); 95 | /* Test the other methods as well */ 96 | assertHit(pos, k, text, 0, text.length(), pattern, mismatches); 97 | } 98 | 99 | public void assertHit(int pos, int k, String text, int start, int end, 100 | String pattern, int mismatches) { 101 | assertNotNull(text); 102 | assertNotNull(pattern); 103 | if (!(o instanceof MismatchSearch)) { 104 | return; 105 | } 106 | MismatchSearch m = (MismatchSearch) o; 107 | /* Strings */ 108 | int[] hit = m.searchString(text, pattern, mismatches); 109 | assertNotNull(hit); 110 | assertEquals(2, hit.length); 111 | assertEquals(pos, hit[0]); 112 | assertEquals(k, hit[1]); 113 | hit = m.searchString(text, start, end, pattern, mismatches); 114 | assertNotNull(hit); 115 | assertEquals(2, hit.length); 116 | assertEquals(pos, hit[0]); 117 | assertEquals(k, hit[1]); 118 | Object preprocessed = m.processString(pattern, mismatches); 119 | hit = m.searchString(text, pattern, preprocessed, mismatches); 120 | assertNotNull(hit); 121 | assertEquals(2, hit.length); 122 | assertEquals(pos, hit[0]); 123 | assertEquals(k, hit[1]); 124 | hit = m.searchString(text, start, end, pattern, preprocessed, 125 | mismatches); 126 | assertNotNull(hit); 127 | assertEquals(2, hit.length); 128 | assertEquals(pos, hit[0]); 129 | assertEquals(k, hit[1]); 130 | /* Bytes */ 131 | hit = m.searchBytes(text.getBytes(), pattern.getBytes(), mismatches); 132 | assertNotNull(hit); 133 | assertEquals(2, hit.length); 134 | assertEquals(pos, hit[0]); 135 | assertEquals(k, hit[1]); 136 | hit = m.searchBytes(text.getBytes(), start, end, pattern.getBytes(), 137 | mismatches); 138 | assertNotNull(hit); 139 | assertEquals(2, hit.length); 140 | assertEquals(pos, hit[0]); 141 | assertEquals(k, hit[1]); 142 | preprocessed = m.processBytes(pattern.getBytes(), mismatches); 143 | hit = m.searchBytes(text.getBytes(), pattern.getBytes(), preprocessed, 144 | mismatches); 145 | assertNotNull(hit); 146 | assertEquals(2, hit.length); 147 | assertEquals(pos, hit[0]); 148 | assertEquals(k, hit[1]); 149 | hit = m.searchBytes(text.getBytes(), start, end, pattern.getBytes(), 150 | preprocessed, mismatches); 151 | assertNotNull(hit); 152 | assertEquals(2, hit.length); 153 | assertEquals(pos, hit[0]); 154 | assertEquals(k, hit[1]); 155 | } 156 | 157 | /* End MismatchSearch methods */ 158 | 159 | /* StringSearch methods */ 160 | 161 | public void assertHit(int pos, String text, String pattern) { 162 | assertNotNull(text); 163 | assertNotNull(pattern); 164 | /* Strings */ 165 | assertEquals(pos, o.searchString(text, pattern)); 166 | assertEquals(pos, o.searchString(text, 0, pattern)); 167 | Object preprocessed = o.processString(pattern); 168 | assertEquals(pos, o.searchString(text, pattern, preprocessed)); 169 | assertEquals(pos, o.searchString(text, 0, pattern, preprocessed)); 170 | assertEquals(pos, o.searchChars(StringSearch.getChars(text), 171 | StringSearch.getChars(pattern))); 172 | assertEquals(pos, o.searchChars(StringSearch.getChars(text), 0, 173 | StringSearch.getChars(pattern))); 174 | preprocessed = o.processChars(StringSearch.getChars(pattern)); 175 | assertEquals(pos, o.searchChars(StringSearch.getChars(text), 176 | StringSearch.getChars(pattern), preprocessed)); 177 | assertEquals(pos, o.searchChars(StringSearch.getChars(text), 0, 178 | StringSearch.getChars(pattern), preprocessed)); 179 | assertHit(pos, text, 0, text.length(), pattern); 180 | /* Bytes */ 181 | assertEquals(pos, o.searchBytes(text.getBytes(), pattern.getBytes())); 182 | assertEquals(pos, o.searchBytes(text.getBytes(), 0, pattern.getBytes())); 183 | preprocessed = o.processBytes(pattern.getBytes()); 184 | assertEquals(pos, o.searchBytes(text.getBytes(), pattern.getBytes(), 185 | preprocessed)); 186 | assertEquals(pos, o.searchBytes(text.getBytes(), 0, pattern.getBytes(), 187 | preprocessed)); 188 | } 189 | 190 | public void assertHit(int pos, String text, int start, int end, 191 | String pattern) { 192 | assertNotNull(text); 193 | assertNotNull(pattern); 194 | /* Strings */ 195 | assertEquals(pos, o.searchString(text, start, end, pattern)); 196 | Object preprocessed = o.processString(pattern); 197 | assertEquals(pos, o.searchString(text, start, end, pattern, 198 | preprocessed)); 199 | assertEquals(pos, o.searchChars(StringSearch.getChars(text), start, 200 | end, StringSearch.getChars(pattern))); 201 | preprocessed = o.processChars(StringSearch.getChars(pattern)); 202 | assertEquals(pos, o.searchChars(StringSearch.getChars(text), start, 203 | end, StringSearch.getChars(pattern), preprocessed)); 204 | /* Bytes */ 205 | assertEquals(pos, o.searchBytes(text.getBytes(), start, end, 206 | pattern.getBytes())); 207 | preprocessed = o.processBytes(pattern.getBytes()); 208 | assertEquals(pos, o.searchBytes(text.getBytes(), start, end, 209 | pattern.getBytes(), preprocessed)); 210 | } 211 | 212 | /* End StringSearch methods */ 213 | 214 | /** 215 | * Creates a new instance of the given algorithm. 216 | * 217 | * @return a new instance 218 | */ 219 | protected abstract StringSearch createInstance(); 220 | 221 | protected final StringSearch o = createInstance(); 222 | private final boolean caseInsensitive = o.getClass().getName().indexOf("CI") != -1; 223 | private final boolean wildcardsSearch = o.getClass().getName().indexOf( 224 | "Wildcards") != -1; 225 | private final boolean mismatchSearch = MismatchSearch.class.isAssignableFrom(o.getClass()); 226 | 227 | @Test 228 | public void basics() { 229 | assertHit(2, "q-_-p", "_"); 230 | assertHit(2, "q-_-p", "_-"); 231 | assertHit(4, "keksbassbla", "bass"); 232 | assertHit(4, "keksbassbla", 4, "keksbassbla".length(), "bass"); 233 | assertHit(-1, "keksbassbla", 5, "keksbassbla".length(), "bass"); 234 | assertHit(-1, "keksbassbla", 0, 7, "bass"); 235 | 236 | assertHit(6, "Hallo Johann", "Johann"); 237 | 238 | if (caseInsensitive) { 239 | assertHit(4, "keksbassbla", "BASS"); 240 | } 241 | if (wildcardsSearch) { 242 | assertHit(4, "keksbassbla", "b.ss"); 243 | } 244 | if (caseInsensitive && wildcardsSearch) { 245 | assertHit(4, "keksbassbla", "B.SS"); 246 | } 247 | if (mismatchSearch) { 248 | assertHit(4, 1, "keksbassbla", "boss", 1); 249 | } 250 | 251 | } 252 | 253 | @Test 254 | public void shortPatterns() { 255 | assertHit(1, LICENSE_STRING, "e"); 256 | assertHit(0, "a", "a"); 257 | assertHit(-1, "a", "b"); 258 | assertHit(0, "aa".substring(1, 2), "a"); 259 | assertHit(0, "aa".substring(0, 1), "a"); 260 | } 261 | 262 | @Test 263 | public void longTexts() { 264 | assertTrue(LICENSE_STRING.indexOf(LICENSE_STRING.substring(LICENSE_STRING.length() >> 1)) == LICENSE_STRING.length() >> 1); 265 | 266 | assertHit(0, LICENSE_STRING, LICENSE_STRING); 267 | assertHit(538, LICENSE_STRING, "portions"); 268 | assertHit(LICENSE_STRING.length() >> 1, LICENSE_STRING, 269 | LICENSE_STRING.substring(LICENSE_STRING.length() >> 1)); 270 | 271 | if (caseInsensitive) { 272 | assertHit(0, LICENSE_STRING, LICENSE_STRING.toLowerCase()); 273 | assertHit(538, LICENSE_STRING, "PoRtIoNs"); 274 | assertHit( 275 | LICENSE_STRING.length() >> 1, 276 | LICENSE_STRING, 277 | LICENSE_STRING.substring(LICENSE_STRING.length() >> 1).toLowerCase()); 278 | } 279 | if (wildcardsSearch) { 280 | assertHit(0, LICENSE_STRING, LICENSE_STRING.replace('e', '.')); 281 | assertHit(538, LICENSE_STRING, "p...io.."); 282 | assertHit( 283 | LICENSE_STRING.length() >> 1, 284 | LICENSE_STRING, 285 | LICENSE_STRING.substring(LICENSE_STRING.length() >> 1).replace( 286 | 'e', '.')); 287 | } 288 | if (caseInsensitive && wildcardsSearch) { 289 | assertHit(0, LICENSE_STRING, LICENSE_STRING.toUpperCase().replace('E', '.')); 290 | assertHit(538, LICENSE_STRING, "P...IO.."); 291 | assertHit( 292 | LICENSE_STRING.length() >> 1, 293 | LICENSE_STRING, 294 | LICENSE_STRING.substring(LICENSE_STRING.length() >> 1).toLowerCase().replace( 295 | 'e', '.')); 296 | } 297 | if (mismatchSearch) { 298 | assertHit(538, 1, LICENSE_STRING, "partions", 1); 299 | } 300 | } 301 | 302 | @Test 303 | public void hiByte() throws UnsupportedEncodingException { 304 | assertEquals(1, o.searchBytes("äöüß".getBytes("ISO-8859-1"), 305 | "öü".getBytes("ISO-8859-1"))); 306 | 307 | if (caseInsensitive) { 308 | assertEquals(1, o.searchBytes("äöüß".getBytes("ISO-8859-1"), 309 | "ÖÜ".getBytes("ISO-8859-1"))); 310 | } 311 | if (wildcardsSearch) { 312 | assertEquals(1, o.searchBytes("äöüß".getBytes("ISO-8859-1"), 313 | "ö.".getBytes("ISO-8859-1"))); 314 | } 315 | if (caseInsensitive && wildcardsSearch) { 316 | assertEquals(1, o.searchBytes("äöüß".getBytes("ISO-8859-1"), 317 | "Ö.".getBytes("ISO-8859-1"))); 318 | } 319 | } 320 | 321 | @Test 322 | public void mismatchSearch() { 323 | assertHit(1, 1, "abc", "bd", 1); 324 | assertHit(1, 1, "abc", 1, 3, "bd", 1); 325 | assertHit(-1, 0, "abc", 2, 3, "bd", 0); 326 | 327 | assertHit(0, 1, "kakao", "kako", 1); 328 | assertHit(0, 2, "kakao", "cacao", 2); 329 | assertHit(-1, 0, "kakao", "cacao", 1); 330 | 331 | assertHit(1, 1, " fuzzy octave-up", "fuzzi octave-up", 1); 332 | assertHit(7, 1, " fuzzy octave-up", "octave-ap", 1); 333 | assertHit(7, 2, " fuzzy octave-up", "octava-ap", 2); 334 | assertHit(7, 3, " fuzzy octave-up", "oktava-ap", 3); 335 | assertHit(7, 4, " fuzzy octave-up", "oktuwa", 4); 336 | assertHit(7, 4, " fuzzy octave-up", "aktuwe", 4); 337 | } 338 | 339 | @Test 340 | public void caseInsensitiveAlgorithms() 341 | throws UnsupportedEncodingException { 342 | if (!caseInsensitive) { 343 | return; 344 | } 345 | 346 | Object pattern; 347 | 348 | pattern = o.processBytes("Ö".getBytes("ISO-8859-1")); 349 | if (pattern instanceof int[]) { 350 | int[] pat = (int[]) pattern; 351 | assertEquals(pat[214], pat[246]); 352 | } 353 | 354 | pattern = o.processString("Ö"); 355 | if (pattern instanceof CharIntMap) { 356 | CharIntMap m = (CharIntMap) pattern; 357 | assertEquals(m.get((char) 214), m.get((char) 246)); 358 | } 359 | } 360 | 361 | @Test 362 | public void twoInstancesHaveIdenticalHashCodes() { 363 | StringSearch o1 = o; 364 | StringSearch o2 = createInstance(); 365 | assertTrue(o1 != o2); 366 | assertEquals(o1.hashCode(), o2.hashCode()); 367 | 368 | Hashtable t = new Hashtable(); 369 | t.put(o1, o1); 370 | t.put(o2, o2); 371 | assertEquals(1, t.size()); 372 | } 373 | 374 | @Test 375 | public void twoInstancesAreEqual() { 376 | StringSearch s1 = o; 377 | StringSearch s2 = createInstance(); 378 | assertTrue(s1.equals(s2)); 379 | assertFalse(s1.equals(null)); 380 | assertTrue(s1.equals(s1)); 381 | } 382 | 383 | @Test 384 | public void preprocessedObjectsAreSerializable() throws Exception { 385 | Object o1 = o.processBytes("hallo".getBytes()); 386 | Object o2 = o.processString("hallo"); 387 | assertSerializable(o1); 388 | assertSerializable(o2); 389 | } 390 | 391 | /** 392 | * Asserts that the given object is serializable. 393 | * 394 | * @param o the object, may not be null 395 | */ 396 | public static void assertSerializable(Object o) throws Exception { 397 | assertNotNull(o); 398 | ByteArrayOutputStream bOut = new ByteArrayOutputStream() { 399 | 400 | /** 401 | * @see java.io.ByteArrayOutputStream#toByteArray() 402 | */ 403 | @Override 404 | public byte[] toByteArray() { 405 | return buf; 406 | } 407 | 408 | }; 409 | ObjectOutputStream oOut = new ObjectOutputStream(bOut); 410 | oOut.writeObject(o); 411 | byte[] buf = bOut.toByteArray(); 412 | ByteArrayInputStream bIn = new ByteArrayInputStream(buf); 413 | ObjectInputStream oIn = new ObjectInputStream(bIn); 414 | Object o2 = oIn.readObject(); 415 | if (!o.getClass().isArray()) { 416 | assertEquals(o, o); 417 | assertEquals(o2, o2); 418 | assertEquals(o, o2); 419 | assertEquals(o2, o); 420 | assertFalse(o.equals(null)); 421 | assertEquals(o.hashCode(), o2.hashCode()); 422 | assertEquals(o.toString(), o2.toString()); 423 | } 424 | 425 | } 426 | 427 | } 428 | -------------------------------------------------------------------------------- /src/main/java/com/eaio/stringsearch/MismatchSearch.java: -------------------------------------------------------------------------------- 1 | /* 2 | * MismatchSearch.java 3 | * 4 | * Created on 12.11.2003. 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch; 29 | 30 | /** 31 | * Subclasses of MismatchSearch allow for searching with a fixed number of 32 | * possible errors. Subclasses of this class return an int array 33 | * of two elements with the first int being the position at which 34 | * the hit occurred and the second int being the number of 35 | * mismatches at the position. 36 | *

37 | * Example: 38 | *

 39 |  * int[] positions = new ShiftOrMismatches().searchString("this is null",
 40 |  * "nu1l", 1);
 41 |  * 
42 | * positions[0] would be 8, positions[1] (the number of mismatches) would be 1. 43 | * 44 | * @see StringSearch 45 | * – high-performance pattern matching algorithms in Java 46 | * @author Johann Burkard 47 | * @version $Id: MismatchSearch.java 6675 2015-01-17 21:02:35Z johann $ 48 | */ 49 | public abstract class MismatchSearch extends StringSearch { 50 | 51 | /* 52 | * Pre-processing methods 53 | */ 54 | 55 | /** 56 | * Pre-processes the pattern, allowing zero errors. 57 | *

58 | * Identical to process(pattern, 0) 59 | * 60 | * @param pattern the byte array containing the pattern, may 61 | * not be null 62 | * @see com.eaio.stringsearch.StringSearch#processBytes(byte[]) 63 | * @see #processBytes(byte[], int) 64 | */ 65 | @Override 66 | public final Object processBytes(byte[] pattern) { 67 | return processBytes(pattern, 0); 68 | } 69 | 70 | /** 71 | * Pre-processes the pattern, allowing k errors. 72 | * 73 | * @param pattern the byte array containing the pattern, may 74 | * not be null 75 | * @param k the editing distance 76 | * @return an Object 77 | */ 78 | public abstract Object processBytes(byte[] pattern, int k); 79 | 80 | /** 81 | * Pre-processes the pattern, allowing zero errors. 82 | *

83 | * Identical to process(pattern, 0). 84 | * 85 | * @param pattern a char array containing the pattern, may not 86 | * be null 87 | * @return an Object 88 | * @see #processChars(char[], int) 89 | * @see com.eaio.stringsearch.StringSearch#processChars(char[]) 90 | */ 91 | @Override 92 | public final Object processChars(char[] pattern) { 93 | return processChars(pattern, 0); 94 | } 95 | 96 | /** 97 | * Pre-processes a char array, allowing k errors. 98 | * 99 | * @param pattern a char array containing the pattern, may not 100 | * be null 101 | * @param k the editing distance 102 | * @return an Object 103 | */ 104 | public abstract Object processChars(char[] pattern, int k); 105 | 106 | /** 107 | * Pre-processes a String, allowing k errors. This method should not be used 108 | * directly because it is implicitly called in the 109 | * {@link #searchString(String, String)} methods. 110 | * 111 | * @param pattern the String containing the pattern, may not be 112 | * null 113 | * @param k the editing distance 114 | * @return an Object 115 | */ 116 | public Object processString(String pattern, int k) { 117 | return processChars(StringSearch.getChars(pattern), k); 118 | } 119 | 120 | /* 121 | * Byte searching methods 122 | */ 123 | 124 | /** 125 | * @see com.eaio.stringsearch.StringSearch#searchBytes(byte[], int, int, 126 | * byte[], Object) 127 | * @see #searchBytes(byte[], int, int, byte[], Object, int) 128 | */ 129 | @Override 130 | public final int searchBytes(byte[] text, int textStart, int textEnd, 131 | byte[] pattern, Object processed) { 132 | 133 | return searchBytes(text, textStart, textEnd, pattern, processed, 0)[0]; 134 | 135 | } 136 | 137 | /** 138 | * Returns the position in the text at which the pattern was found. Returns 139 | * -1 if the pattern was not found. 140 | * 141 | * @param text the byte array containing the text, may not be 142 | * null 143 | * @param pattern the byte array containing the pattern, may not 144 | * be null 145 | * @param k the editing distance 146 | * @return the position in the text or -1 if the pattern was not found 147 | * @see #searchBytes(byte[], int, int, byte[], Object, int) 148 | */ 149 | public final int[] searchBytes(byte[] text, byte[] pattern, int k) { 150 | return searchBytes(text, 0, text.length, pattern, processBytes( 151 | pattern, k), k); 152 | } 153 | 154 | /** 155 | * Returns the position in the text at which the pattern was found. Returns 156 | * -1 if the pattern was not found. 157 | * 158 | * @param text the byte array containing the text, may not be 159 | * null 160 | * @param pattern the byte array containing the pattern, may 161 | * not be null 162 | * @param processed an Object as returned from 163 | * {@link #processBytes(byte[], int)}, may not be null 164 | * @param k the editing distance 165 | * @return the position in the text or -1 if the pattern was not found 166 | * @see #searchBytes(byte[], int, int, byte[], Object, int) 167 | */ 168 | public final int[] searchBytes(byte[] text, byte[] pattern, 169 | Object processed, int k) { 170 | 171 | return searchBytes(text, 0, text.length, pattern, processed, k); 172 | } 173 | 174 | /** 175 | * Returns the position in the text at which the pattern was found. Returns 176 | * -1 if the pattern was not found. 177 | * 178 | * @param text the byte array containing the text, may not be 179 | * null 180 | * @param textStart at which position in the text the comparing should 181 | * start 182 | * @param pattern the byte array containing the pattern, may 183 | * not be null 184 | * @param k the editing distance 185 | * @return int the position in the text or -1 if the pattern was not found 186 | * @see #searchBytes(byte[], int, int, byte[], Object, int) 187 | */ 188 | public final int[] searchBytes(byte[] text, int textStart, byte[] pattern, 189 | int k) { 190 | 191 | return searchBytes(text, textStart, text.length, pattern, 192 | processBytes(pattern, k), k); 193 | } 194 | 195 | /** 196 | * Returns the position in the text at which the pattern was found. Returns 197 | * -1 if the pattern was not found. 198 | * 199 | * @param text the byte array containing the text, may not be 200 | * null 201 | * @param textStart at which position in the text the comparing should 202 | * start 203 | * @param pattern the pattern to search for, may not be null 204 | * @param processed 205 | * @param k the editing distance 206 | * @return the position in the text or -1 if the pattern was not found 207 | * @see #searchBytes(byte[], int, int, byte[], Object, int) 208 | */ 209 | public final int[] searchBytes(byte[] text, int textStart, byte[] pattern, 210 | Object processed, int k) { 211 | 212 | return searchBytes(text, textStart, text.length, pattern, processed, k); 213 | } 214 | 215 | /** 216 | * Returns the position in the text at which the pattern was found. Returns 217 | * -1 if the pattern was not found. 218 | * 219 | * @param text text the byte array containing the text, may 220 | * not be null 221 | * @param textStart at which position in the text the comparing should 222 | * start 223 | * @param textEnd at which position in the text comparing should stop 224 | * @param pattern the byte array containing the pattern, may 225 | * not be null 226 | * @param k the editing distance 227 | * @return the position in the text or -1 if the pattern was not found 228 | * @see #searchBytes(byte[], int, int, byte[], Object, int) 229 | */ 230 | public final int[] searchBytes(byte[] text, int textStart, int textEnd, 231 | byte[] pattern, int k) { 232 | 233 | return searchBytes(text, textStart, textEnd, pattern, processBytes( 234 | pattern, k), k); 235 | } 236 | 237 | /** 238 | * Returns the position in the text at which the pattern was found. Returns 239 | * -1 if the pattern was not found. 240 | * 241 | * @param text text the byte array containing the text, may 242 | * not be null 243 | * @param textStart at which position in the text the comparing should 244 | * start 245 | * @param textEnd at which position in the text comparing should stop 246 | * @param pattern the pattern to search for, may not be null 247 | * @param processed an Object as returned from 248 | * {@link #processBytes(byte[], int)}, may not be null 249 | * @param k the editing distance 250 | * @return the position in the text or -1 if the pattern was not found 251 | * @see #processBytes(byte[], int) 252 | */ 253 | public abstract int[] searchBytes(byte[] text, int textStart, 254 | int textEnd, byte[] pattern, Object processed, int k); 255 | 256 | /* 257 | * Char searching methods 258 | */ 259 | 260 | /** 261 | * Finder for the given pattern in the text, starting at textStart and 262 | * comparing to at most textEnd, allowing zero errors. 263 | * 264 | * @see StringSearch#searchChars(char[], int, int, char[], Object) 265 | * @see #processChars(char[], int) 266 | */ 267 | @Override 268 | public final int searchChars(char[] text, int textStart, int textEnd, 269 | char[] pattern, Object processed) { 270 | 271 | return searchChars(text, textStart, textEnd, pattern, processed, 0)[0]; 272 | } 273 | 274 | /** 275 | * Finder for the given pattern in the text, allowing k errors. 276 | * 277 | * @param text the String containing the text, may not be null 278 | * @param pattern the pattern to search for, may not be null 279 | * @param k the maximum number of mismatches (the editing distance) 280 | * @return the position in the text or -1 if the pattern was not found 281 | * @see #searchChars(char[], int, int, char[], Object, int) 282 | */ 283 | public final int[] searchChars(char[] text, char[] pattern, int k) { 284 | return searchChars(text, 0, text.length, pattern, processChars( 285 | pattern, k), k); 286 | } 287 | 288 | /** 289 | * Finder for the given pattern in the text, allowing k errors. 290 | * 291 | * @param text the String containing the text, may not be null 292 | * @param pattern the pattern to search for, may not be null 293 | * @param processed an Object as returned from 294 | * {@link #processChars(char[], int)} or {@link #processString(String, int)}, 295 | * may not be null 296 | * @param k the maximum number of mismatches (the editing distance) 297 | * @return the position in the text or -1 if the pattern was not found 298 | * @see #searchChars(char[], int, int, char[], Object, int) 299 | */ 300 | public final int[] searchChars(char[] text, char[] pattern, 301 | Object processed, int k) { 302 | 303 | return searchChars(text, 0, text.length, pattern, processed, k); 304 | } 305 | 306 | /** 307 | * Finder for the given pattern in the text, starting at textStart, 308 | * allowing k errors. 309 | * 310 | * @param text the String containing the text, may not be null 311 | * @param textStart at which position in the text the comparing should 312 | * start 313 | * @param pattern the pattern to search for, may not be null 314 | * @param k the maximum number of mismatches (the editing distance) 315 | * @return the position in the text or -1 if the pattern was not found 316 | * @see #searchChars(char[], int, int, char[], Object) 317 | */ 318 | public final int[] searchChars(char[] text, int textStart, 319 | char[] pattern, int k) { 320 | 321 | return searchChars(text, textStart, text.length, pattern, 322 | processChars(pattern, k), k); 323 | } 324 | 325 | /** 326 | * Finder for the given pattern in the text, starting at textStart, 327 | * allowing k errors. 328 | * 329 | * @param text the String containing the text, may not be null 330 | * @param textStart at which position in the text the comparing should 331 | * start 332 | * @param pattern the pattern to search for, may not be null 333 | * @param processed an Object as returned from 334 | * {@link #processChars(char[], int)} or 335 | * {@link #processString(String, int)}, may not be null 336 | * @param k the maximum number of mismatches (the editing distance) 337 | * @return the position in the text or -1 if the pattern was not found 338 | * @see #searchChars(char[], int, int, char[], Object, int) 339 | */ 340 | public final int[] searchChars(char[] text, int textStart, 341 | char[] pattern, Object processed, int k) { 342 | 343 | return searchChars(text, textStart, text.length, pattern, processed, 344 | k); 345 | } 346 | 347 | /** 348 | * Finder for the given pattern in the text, starting at textStart and 349 | * comparing to at most textEnd, allowing k errors. 350 | * 351 | * @param text the String containing the text, may not be null 352 | * @param textStart at which position in the text the comparing should 353 | * start 354 | * @param textEnd at which position in the text comparing should stop 355 | * @param pattern the pattern to search for, may not be null 356 | * @param k the maximum number of mismatches (the editing distance) 357 | * @return the position in the text or -1 if the pattern was not found 358 | */ 359 | public final int[] searchChars(char[] text, int textStart, int textEnd, 360 | char[] pattern, int k) { 361 | 362 | return searchChars(text, textStart, textEnd, pattern, processChars( 363 | pattern, k), k); 364 | } 365 | 366 | /** 367 | * Finder for the given pattern in the text, starting at textStart and 368 | * comparing to at most textEnd, allowing k errors. 369 | * 370 | * @param text the String containing the text, may not be null 371 | * @param textStart at which position in the text the comparing should 372 | * start 373 | * @param textEnd at which position in the text comparing should stop 374 | * @param pattern the pattern to search for, may not be null 375 | * @param processed an Object as returned from 376 | * {@link #processChars(char[], int)} or 377 | * {@link #processString(String, int)}, may not be null 378 | * @param k the maximum number of mismatches (the editing distance) 379 | * @return the position in the text or -1 if the pattern was not found 380 | */ 381 | public abstract int[] searchChars(char[] text, int textStart, 382 | int textEnd, char[] pattern, Object processed, int k); 383 | 384 | /* String searching methods */ 385 | 386 | /** 387 | * Convenience method to search for patterns in Strings. Returns the position 388 | * in the text at which the pattern was found. Returns -1 if the pattern was 389 | * not found. 390 | * 391 | * @param text the String containing the text, may not be null 392 | * @param pattern the String containing the pattern, may not be 393 | * null 394 | * @param k the maximum number of mismatches (the editing distance) 395 | * @return the position in the text or -1 if the pattern was not found 396 | * @see #searchChars(char[], int, int, char[], int) 397 | */ 398 | public final int[] searchString(String text, String pattern, int k) { 399 | return searchString(text, 0, text.length(), pattern, k); 400 | } 401 | 402 | /** 403 | * Convenience method to search for patterns in Strings. Returns the position 404 | * in the text at which the pattern was found. Returns -1 if the pattern was 405 | * not found. 406 | * 407 | * @param text the String containing the text, may not be null 408 | * @param pattern the String containing the pattern, may not be 409 | * null 410 | * @param processed an Object as returned from 411 | * {@link #processChars(char[], int)} or {@link #processString(String, int)}, 412 | * may not be null 413 | * @param k the maximum number of mismatches (the editing distance) 414 | * @return the position in the text or -1 if the pattern was not found 415 | * @see #searchChars(char[], int, int, char[], Object, int) 416 | */ 417 | public final int[] searchString(String text, String pattern, 418 | Object processed, int k) { 419 | 420 | return searchString(text, 0, text.length(), pattern, processed, k); 421 | } 422 | 423 | /** 424 | * Convenience method to search for patterns in Strings. Returns the position 425 | * in the text at which the pattern was found. Returns -1 if the pattern was 426 | * not found. 427 | * 428 | * @param text the String containing the text, may not be null 429 | * @param textStart at which position in the text the comparing should start 430 | * @param pattern the String containing the pattern, may not be 431 | * null 432 | * @param k the maximum number of mismatches (the editing distance) 433 | * @return the position in the text or -1 if the pattern was not found 434 | * @see #searchChars(char[], int, int, char[], int) 435 | */ 436 | public final int[] searchString(String text, int textStart, 437 | String pattern, int k) { 438 | 439 | return searchString(text, textStart, text.length(), pattern, k); 440 | } 441 | 442 | /** 443 | * Convenience method to search for patterns in Strings. Returns the position 444 | * in the text at which the pattern was found. Returns -1 if the pattern was 445 | * not found. 446 | * 447 | * @param text the String containing the text, may not be null 448 | * @param textStart at which position in the text the comparing should start 449 | * @param pattern the String containing the pattern, may not be 450 | * null 451 | * @param processed an Object as returned from 452 | * {@link #processChars(char[], int)} or {@link #processString(String, int)}, 453 | * may not be null 454 | * @param k the maximum number of mismatches (the editing distance) 455 | * @return the position in the text or -1 if the pattern was not found 456 | * @see #searchChars(char[], int, int, char[], Object, int) 457 | */ 458 | public final int[] searchString(String text, int textStart, 459 | String pattern, Object processed, int k) { 460 | 461 | return searchString(text, textStart, text.length(), pattern, 462 | processed, k); 463 | } 464 | 465 | /** 466 | * Convenience method to search for patterns in Strings. Returns the position 467 | * in the text at which the pattern was found. Returns -1 if the pattern was 468 | * not found. 469 | * 470 | * @param text the String containing the text, may not be null 471 | * @param textStart at which position in the text the comparing should start 472 | * @param textEnd at which position in the text comparing should stop 473 | * @param pattern the String containing the pattern, may not be 474 | * null 475 | * @param k the maximum number of mismatches (the editing distance) 476 | * @return the position in the text or -1 if the pattern was not found 477 | * @see #searchChars(char[], int, int, char[], int) 478 | */ 479 | public final int[] searchString(String text, int textStart, int textEnd, 480 | String pattern, int k) { 481 | 482 | return StringSearch.activeStringAccess.searchString(text, textStart, 483 | textEnd, pattern, k, this); 484 | } 485 | 486 | /** 487 | * Convenience method to search for patterns in Strings. Returns the position 488 | * in the text at which the pattern was found. Returns -1 if the pattern was 489 | * not found. 490 | * 491 | * @param text the String containing the text, may not be null 492 | * @param textStart at which position in the text the comparing should start 493 | * @param textEnd at which position in the text comparing should stop 494 | * @param pattern the String containing the pattern, may not be 495 | * null 496 | * @param processed an Object as returned from 497 | * {@link #processChars(char[], int)} or {@link #processString(String, int)}, 498 | * may not be null 499 | * @param k the maximum number of mismatches (the editing distance) 500 | * @return the position in the text or -1 if the pattern was not found 501 | * @see #searchChars(char[], int, int, char[], Object, int) 502 | */ 503 | public final int[] searchString(String text, int textStart, int textEnd, 504 | String pattern, Object processed, int k) { 505 | 506 | return StringSearch.activeStringAccess.searchString(text, textStart, 507 | textEnd, pattern, processed, k, this); 508 | } 509 | 510 | } 511 | -------------------------------------------------------------------------------- /src/main/java/com/eaio/stringsearch/StringSearch.java: -------------------------------------------------------------------------------- 1 | /* 2 | * StringSearch.java 3 | * 4 | * Created on 14.06.2003. 5 | * 6 | * StringSearch - high-performance pattern matching algorithms in Java 7 | * Copyright (c) 2003-2015 Johann Burkard () 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the "Software"), 11 | * to deal in the Software without restriction, including without limitation 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | * and/or sell copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included 17 | * in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 22 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | * 27 | */ 28 | package com.eaio.stringsearch; 29 | 30 | import java.lang.reflect.Field; 31 | import java.lang.reflect.Modifier; 32 | import java.security.AccessControlException; 33 | import java.security.AccessController; 34 | import java.security.PrivilegedActionException; 35 | import java.security.PrivilegedExceptionAction; 36 | 37 | /** 38 | * The base class for pattern matching algorithm implementations. 39 | * Most implementations do not maintain state and are thread safe -- one instance 40 | * can be used by as many threads as required. 41 | *

42 | * Most pattern matching algorithms pre-process the pattern in 43 | * some way. Subclasses of StringSearch allow retrieving the pre-processed 44 | * pattern to save time if the pattern is used several times. 45 | *

46 | * Some of the Objects returned from the pre-processing methods 47 | * {@link #processBytes(byte[])}, {@link #processChars(char[])} and 48 | * {@link #processString(String)} might implement 49 | * the {@link java.io.Serializable} interface and make it possible to 50 | * serialize pre-processed Objects to disk. 51 | *

52 | * When this Class is loaded, an attempt is made to obtain {@link java.lang.reflect.Field} instances of 53 | * the "offset" and the "value" fields of the {@link java.lang.String} class. 54 | * These fields are set as accessible. If this succeeds, StringSearch will use 55 | * Reflection to access the underlying char array in Strings. If 56 | * not, the char array will be cloned by calling 57 | * {@link java.lang.String#toCharArray()}.

58 | * 59 | * @see 60 | * StringSearch – high-performance pattern matching algorithms in Java 61 | * @author Johann Burkard 62 | * @version $Id: StringSearch.java 6675 2015-01-17 21:02:35Z johann $ 63 | */ 64 | public abstract class StringSearch { 65 | 66 | private static final int CROSSOVER_MACOSX = 50; 67 | 68 | /** 69 | * The crossover point at which the Reflection based char accessor should 70 | * be used. The crossover point is set in the static initializer. If a 71 | * String is longer than this value and Reflection is allowed, it's 72 | * char array will be accessed through Reflection. 73 | */ 74 | private static int crossover = 0; 75 | 76 | /** 77 | * The StringAccess instance. 78 | */ 79 | static StringAccess activeStringAccess; 80 | 81 | /** 82 | * The StringAccess class implements the strategy to convert Strings to 83 | * char arrays and calls the appropriate 84 | * searchChars method in the given StringSearch instance. 85 | */ 86 | static class StringAccess { 87 | 88 | /** 89 | * Instances are created in StringSearch only. 90 | */ 91 | private StringAccess() { 92 | super(); 93 | } 94 | 95 | /** 96 | * Searches a pattern inside a text, using the pre-processed Object and 97 | * using the given StringSearch instance. 98 | */ 99 | int searchString(String text, int textStart, int textEnd, 100 | String pattern, Object processed, StringSearch instance) { 101 | 102 | return instance.searchChars(text.toCharArray(), textStart, textEnd, 103 | pattern.toCharArray(), processed); 104 | 105 | } 106 | 107 | /** 108 | * Searches a pattern inside a text, using the given StringSearch 109 | * instance. 110 | */ 111 | int searchString(String text, int textStart, int textEnd, 112 | String pattern, StringSearch instance) { 113 | 114 | return instance.searchChars(text.toCharArray(), textStart, textEnd, 115 | pattern.toCharArray()); 116 | } 117 | 118 | /** 119 | * Searches a pattern inside a text with at most k mismatches, using 120 | * the given MismatchSearch instance. 121 | */ 122 | int[] searchString(String text, int textStart, int textEnd, 123 | String pattern, int k, MismatchSearch instance) { 124 | 125 | return instance.searchChars(text.toCharArray(), textStart, textEnd, 126 | pattern.toCharArray(), k); 127 | } 128 | 129 | /** 130 | * Searches a pattern inside a text, using the pre-processed Object and 131 | * at most k mismatches, using the given MismatchSearch instance. 132 | */ 133 | int[] searchString(String text, int textStart, int textEnd, 134 | String pattern, Object processed, int k, MismatchSearch instance) { 135 | 136 | return instance.searchChars(text.toCharArray(), textStart, textEnd, 137 | pattern.toCharArray(), processed, k); 138 | } 139 | 140 | /** 141 | * Returns the underlying char array. 142 | * 143 | * @param s the String, may not be null 144 | * @return char[] 145 | */ 146 | char[] getChars(String s) { 147 | return s.toCharArray(); 148 | } 149 | 150 | } 151 | 152 | /** 153 | * The ReflectionStringAccess class is used if Reflection can be used to access the 154 | * underlying char array in Strings to avoid the cloning 155 | * overhead. 156 | */ 157 | static class ReflectionStringAccess extends StringAccess { 158 | 159 | private Field value, offset; 160 | 161 | /** 162 | * Instances are created in StringSearch only. 163 | * 164 | * @param value the "value" field in String 165 | * @param offset the "offset" field in String 166 | */ 167 | private ReflectionStringAccess(Field value, Field offset) { 168 | this.value = value; 169 | this.offset = offset; 170 | } 171 | 172 | /** 173 | * @see com.eaio.stringsearch.StringSearch.StringAccess#searchString( 174 | * String, int, int, String, Object, StringSearch) 175 | */ 176 | @Override 177 | int searchString(String text, int textStart, int textEnd, 178 | String pattern, Object processed, StringSearch instance) { 179 | 180 | int l = text.length(); 181 | if (l > crossover) { 182 | try { 183 | int o = offset.getInt(text); 184 | char[] t = (char[]) value.get(text); 185 | return instance.searchChars(t, textStart + o, textEnd + o, 186 | getChars(pattern), processed) 187 | - o; 188 | 189 | } 190 | catch (IllegalAccessException ex) { 191 | activeStringAccess = new StringAccess(); 192 | } 193 | } 194 | return super.searchString(text, textStart, textEnd, pattern, 195 | processed, instance); 196 | } 197 | 198 | /** 199 | * @see com.eaio.stringsearch.StringSearch.StringAccess#searchString( 200 | * String, int, int, String, StringSearch) 201 | */ 202 | @Override 203 | int searchString(String text, int textStart, int textEnd, 204 | String pattern, StringSearch instance) { 205 | 206 | int l = text.length(); 207 | if (l > crossover) { 208 | try { 209 | int o = offset.getInt(text); 210 | char[] t = (char[]) value.get(text); 211 | return instance.searchChars(t, textStart + o, textEnd + o, 212 | getChars(pattern)) 213 | - o; 214 | } 215 | catch (IllegalAccessException ex) { 216 | activeStringAccess = new StringAccess(); 217 | } 218 | } 219 | return super.searchString(text, textStart, textEnd, pattern, 220 | instance); 221 | } 222 | 223 | /** 224 | * @see com.eaio.stringsearch.StringSearch.StringAccess#searchString( 225 | * String, int, int, String, int, MismatchSearch) 226 | */ 227 | @Override 228 | int[] searchString(String text, int textStart, int textEnd, 229 | String pattern, int k, MismatchSearch instance) { 230 | 231 | int l = text.length(); 232 | if (l > crossover) { 233 | try { 234 | int o = offset.getInt(text); 235 | char[] t = (char[]) value.get(text); 236 | int[] r = instance.searchChars(t, textStart + o, textEnd 237 | + o, getChars(pattern), k); 238 | if (r[0] != -1) { 239 | r[0] -= o; 240 | } 241 | return r; 242 | } 243 | catch (IllegalAccessException ex) { 244 | activeStringAccess = new StringAccess(); 245 | } 246 | } 247 | return super.searchString(text, textStart, textEnd, pattern, k, 248 | instance); 249 | } 250 | 251 | /** 252 | * @see com.eaio.stringsearch.StringSearch.StringAccess#searchString( 253 | * String, int, int, String, Object, int, MismatchSearch) 254 | */ 255 | @Override 256 | int[] searchString(String text, int textStart, int textEnd, 257 | String pattern, Object processed, int k, MismatchSearch instance) { 258 | 259 | int l = text.length(); 260 | if (l > crossover) { 261 | try { 262 | int o = offset.getInt(text); 263 | char[] t = (char[]) value.get(text); 264 | int[] r = instance.searchChars(t, textStart + o, textEnd 265 | + o, getChars(pattern), processed, k); 266 | if (r[0] != -1) { 267 | r[0] -= o; 268 | } 269 | return r; 270 | } 271 | catch (IllegalAccessException ex) { 272 | activeStringAccess = new StringAccess(); 273 | } 274 | } 275 | return super.searchString(text, textStart, textEnd, pattern, 276 | processed, k, instance); 277 | } 278 | 279 | /** 280 | * Tries to return the underlying char array directly. 281 | * Only works if the "offset" field is 0 and the "count" field is equal 282 | * to the String's length. 283 | * 284 | * @see com.eaio.stringsearch.StringSearch.StringAccess#getChars( 285 | * java.lang.String) 286 | */ 287 | @Override 288 | char[] getChars(String s) { 289 | int l = s.length(); 290 | if (l > crossover) { 291 | try { 292 | if (offset.getInt(s) != 0) { 293 | return super.getChars(s); 294 | } 295 | char[] c = (char[]) value.get(s); 296 | if (c.length != l) { 297 | return super.getChars(s); 298 | } 299 | return c; 300 | } 301 | catch (IllegalAccessException ex) { 302 | activeStringAccess = new StringAccess(); 303 | } 304 | } 305 | return super.getChars(s); 306 | } 307 | 308 | } 309 | 310 | static { 311 | 312 | final String shortString = "."; 313 | shortString.hashCode(); // make sure the cached hashCode is not 0 314 | 315 | Field value = null; 316 | Field offset = null; 317 | 318 | try { 319 | Field[] valueOffset = AccessController.doPrivileged(new PrivilegedExceptionAction() { 320 | 321 | @Override 322 | public Field[] run() throws Exception { 323 | Field[] stringFields = shortString.getClass().getDeclaredFields(); 324 | 325 | Class charArray = new char[0].getClass(); 326 | 327 | Field val = null, off = null; 328 | 329 | for (int i = 0; i < stringFields.length; ++i) { 330 | final Field field = stringFields[i]; 331 | if (field.getType() == charArray && !Modifier.isStatic(field.getModifiers())) { 332 | val = stringFields[i]; 333 | val.setAccessible(true); 334 | } 335 | else if (field.getType() == Integer.TYPE) { 336 | field.setAccessible(true); 337 | 338 | if (field.getInt(shortString) == 0) { 339 | off = stringFields[i]; 340 | } 341 | } 342 | } 343 | return new Field[] { val, off }; 344 | } 345 | 346 | }); 347 | 348 | value = valueOffset[0]; 349 | offset = valueOffset[1]; 350 | } 351 | catch (AccessControlException ex) { 352 | // Ignored. 353 | } 354 | catch (PrivilegedActionException ex) { 355 | // Ignored. 356 | } 357 | catch (SecurityException ex) { 358 | // Ignored. 359 | } 360 | 361 | if (value != null && offset != null) { 362 | StringSearch.activeStringAccess = new ReflectionStringAccess(value, 363 | offset); 364 | try { 365 | if (System.getProperty("mrj.version") != null) { 366 | crossover = CROSSOVER_MACOSX; 367 | } 368 | } 369 | catch (SecurityException ex) { 370 | // Ignored. 371 | } 372 | } 373 | else { 374 | StringSearch.activeStringAccess = new StringAccess(); 375 | } 376 | } 377 | 378 | /** 379 | * Returns if Reflection is used to access the underlying char 380 | * array in Strings. 381 | * 382 | * @return boolean 383 | */ 384 | public static boolean usesReflection() { 385 | return activeStringAccess instanceof ReflectionStringAccess; 386 | } 387 | 388 | /** 389 | * Attempts to return the underlying char array of a String 390 | * directly. If Reflection cannot be used, the array is cloned by a call to 391 | * {@link String#toCharArray()}. 392 | * 393 | * @param s the String 394 | * @return a char array 395 | */ 396 | public static char[] getChars(String s) { 397 | return activeStringAccess.getChars(s); 398 | } 399 | 400 | /** 401 | * Constructor for StringSearch. Note that it is not required to create 402 | * multiple instances of an algorithm. This constructor does nothing. 403 | */ 404 | protected StringSearch() { 405 | super(); 406 | } 407 | 408 | /* 409 | * Pre-processing methods 410 | */ 411 | 412 | /** 413 | * Pre-processes a byte array. This method should be used if a 414 | * pattern is searched for more than one time. 415 | * 416 | * @param pattern the byte array containing the pattern, may not 417 | * be null 418 | * @return an Object 419 | */ 420 | public abstract Object processBytes(byte[] pattern); 421 | 422 | /** 423 | * Pre-processes a char array. This method should be used if a 424 | * pattern is searched for more than one time. 425 | * 426 | * @param pattern a char array containing the pattern, may not be 427 | * null 428 | * @return an Object 429 | */ 430 | public abstract Object processChars(char[] pattern); 431 | 432 | /** 433 | * Pre-processes a String. This method should be used if a pattern is searched 434 | * for more than one time. 435 | * 436 | * @param pattern the String containing the pattern, may not be 437 | * null 438 | * @return an Object 439 | * @see #processChars(char[]) 440 | */ 441 | public Object processString(String pattern) { 442 | return processChars(getChars(pattern)); 443 | } 444 | 445 | /* Byte searching methods */ 446 | 447 | /** 448 | * Returns the position in the text at which the pattern was found. Returns -1 449 | * if the pattern was not found. 450 | * 451 | * @param text the byte array containing the text, may not be 452 | * null 453 | * @param pattern the byte array containing the pattern, may not 454 | * be null 455 | * @return the position in the text or -1 if the pattern was not found 456 | * @see #searchBytes(byte[], int, int, byte[], Object) 457 | */ 458 | public final int searchBytes(byte[] text, byte[] pattern) { 459 | return searchBytes(text, 0, text.length, pattern, processBytes(pattern)); 460 | } 461 | 462 | /** 463 | * Returns the position in the text at which the pattern was found. Returns -1 464 | * if the pattern was not found. 465 | * 466 | * @param text the byte array containing the text, may not be 467 | * null 468 | * @param pattern the pattern to search for, may not be null 469 | * @param processed an Object as returned from {@link #processBytes(byte[])}, 470 | * may not be null 471 | * @return the position in the text or -1 if the pattern was not found 472 | * @see #searchBytes(byte[], int, int, byte[], Object) 473 | */ 474 | public final int searchBytes(byte[] text, byte[] pattern, Object processed) { 475 | return searchBytes(text, 0, text.length, pattern, processed); 476 | } 477 | 478 | /** 479 | * Returns the position in the text at which the pattern was found. Returns -1 480 | * if the pattern was not found. 481 | * 482 | * @param text the byte array containing the text, may not be 483 | * null 484 | * @param textStart at which position in the text the comparing should start 485 | * @param pattern the byte array containing the pattern, may not 486 | * be null 487 | * @return int the position in the text or -1 if the pattern was not found 488 | * @see #searchBytes(byte[], int, int, byte[], Object) 489 | */ 490 | public final int searchBytes(byte[] text, int textStart, byte[] pattern) { 491 | return searchBytes(text, textStart, text.length, pattern, 492 | processBytes(pattern)); 493 | } 494 | 495 | /** 496 | * Returns the position in the text at which the pattern was found. Returns -1 497 | * if the pattern was not found. 498 | * 499 | * @param text the byte array containing the text, may not be 500 | * null 501 | * @param textStart at which position in the text the comparing should start 502 | * @param pattern the pattern to search for, may not be null 503 | * @param processed 504 | * @return the position in the text or -1 if the pattern was not found 505 | * @see #searchBytes(byte[], int, int, byte[], Object) 506 | */ 507 | public final int searchBytes(byte[] text, int textStart, byte[] pattern, 508 | Object processed) { 509 | 510 | return searchBytes(text, textStart, text.length, pattern, processed); 511 | } 512 | 513 | /** 514 | * Returns the position in the text at which the pattern was found. Returns -1 515 | * if the pattern was not found. 516 | * 517 | * @param text text the byte array containing the text, may not be 518 | * null 519 | * @param textStart at which position in the text the comparing should start 520 | * @param textEnd at which position in the text comparing should stop 521 | * @param pattern the byte array containing the pattern, may not 522 | * be null 523 | * @return the position in the text or -1 if the pattern was not found 524 | * @see #searchBytes(byte[], int, int, byte[], Object) 525 | */ 526 | public final int searchBytes(byte[] text, int textStart, int textEnd, 527 | byte[] pattern) { 528 | 529 | return searchBytes(text, textStart, textEnd, pattern, 530 | processBytes(pattern)); 531 | } 532 | 533 | /** 534 | * Returns the position in the text at which the pattern was found. Returns -1 535 | * if the pattern was not found. 536 | * 537 | * @param text text the byte array containing the text, may not be 538 | * null 539 | * @param textStart at which position in the text the comparing should start 540 | * @param textEnd at which position in the text comparing should stop 541 | * @param pattern the pattern to search for, may not be null 542 | * @param processed an Object as returned from {@link #processBytes(byte[])}, 543 | * may not be null 544 | * @return the position in the text or -1 if the pattern was not found 545 | * @see #processBytes(byte[]) 546 | */ 547 | public abstract int searchBytes(byte[] text, int textStart, int textEnd, 548 | byte[] pattern, Object processed); 549 | 550 | /* Char searching methods */ 551 | 552 | /** 553 | * Returns the position in the text at which the pattern was found. Returns -1 554 | * if the pattern was not found. 555 | * 556 | * @param text the character array containing the text, may not be 557 | * null 558 | * @param pattern the char array containing the pattern, may not 559 | * be null 560 | * @return the position in the text or -1 if the pattern was not found 561 | * @see #searchChars(char[], int, int, char[], Object) 562 | */ 563 | public final int searchChars(char[] text, char[] pattern) { 564 | return searchChars(text, 0, text.length, pattern, processChars(pattern)); 565 | } 566 | 567 | /** 568 | * Returns the index of the pattern in the text using the pre-processed Object. 569 | * Returns -1 if the pattern was not found. 570 | * 571 | * @param text the character array containing the text, may not be 572 | * null 573 | * @param pattern the char array containing the pattern, may not 574 | * be null 575 | * @param processed an Object as returned from {@link #processChars(char[])} or 576 | * {@link #processString(String)}, may not be null 577 | * @return the position in the text or -1 if the pattern was not found 578 | * @see #searchChars(char[], int, int, char[], Object) 579 | */ 580 | public final int searchChars(char[] text, char[] pattern, Object processed) { 581 | return searchChars(text, 0, text.length, pattern, processed); 582 | } 583 | 584 | /** 585 | * Returns the position in the text at which the pattern was found. Returns -1 586 | * if the pattern was not found. 587 | * 588 | * @param text the character array containing the text, may not be 589 | * null 590 | * @param textStart at which position in the text the comparing should start 591 | * @param pattern the char array containing the pattern, may not 592 | * be null 593 | * @return the position in the text or -1 if the pattern was not found 594 | * @see #searchChars(char[], int, int, char[], Object) 595 | */ 596 | public final int searchChars(char[] text, int textStart, char[] pattern) { 597 | return searchChars(text, textStart, text.length, pattern, 598 | processChars(pattern)); 599 | } 600 | 601 | /** 602 | * Returns the index of the pattern in the text using the pre-processed Object. 603 | * Returns -1 if the pattern was not found. 604 | * 605 | * @param text the String containing the text, may not be null 606 | * @param textStart at which position in the text the comparing should start 607 | * @param pattern the char array containing the pattern, may not 608 | * be null 609 | * @param processed an Object as returned from {@link #processChars(char[])} or 610 | * {@link #processString(String)}, may not be null 611 | * @return the position in the text or -1 if the pattern was not found 612 | * @see #searchChars(char[], int, int, char[], Object) 613 | */ 614 | public final int searchChars(char[] text, int textStart, char[] pattern, 615 | Object processed) { 616 | 617 | return searchChars(text, textStart, text.length, pattern, processed); 618 | } 619 | 620 | /** 621 | * Returns the position in the text at which the pattern was found. Returns -1 622 | * if the pattern was not found. 623 | * 624 | * @param text the character array containing the text, may not be 625 | * null 626 | * @param textStart at which position in the text the comparing should start 627 | * @param textEnd at which position in the text comparing should stop 628 | * @param pattern the char array containing the pattern, may not 629 | * be null 630 | * @return the position in the text or -1 if the pattern was not found 631 | * @see #searchChars(char[], int, int, char[], Object) 632 | */ 633 | public final int searchChars(char[] text, int textStart, int textEnd, 634 | char[] pattern) { 635 | 636 | return searchChars(text, textStart, textEnd, pattern, 637 | processChars(pattern)); 638 | } 639 | 640 | /** 641 | * Returns the index of the pattern in the text using the pre-processed Object. 642 | * Returns -1 if the pattern was not found. 643 | * 644 | * @param text the String containing the text, may not be null 645 | * @param textStart at which position in the text the comparing should start 646 | * @param textEnd at which position in the text comparing should stop 647 | * @param pattern the pattern to search for, may not be null 648 | * @param processed an Object as returned from {@link #processChars(char[])} or 649 | * {@link #processString(String)}, may not be null 650 | * @return the position in the text or -1 if the pattern was not found 651 | */ 652 | public abstract int searchChars(char[] text, int textStart, int textEnd, 653 | char[] pattern, Object processed); 654 | 655 | /* String searching methods */ 656 | 657 | /** 658 | * Convenience method to search for patterns in Strings. Returns the position 659 | * in the text at which the pattern was found. Returns -1 if the pattern was 660 | * not found. 661 | * 662 | * @param text the String containing the text, may not be null 663 | * @param pattern the String containing the pattern, may not be 664 | * null 665 | * @return the position in the text or -1 if the pattern was not found 666 | * @see #searchChars(char[], int, int, char[], Object) 667 | */ 668 | public final int searchString(String text, String pattern) { 669 | return searchString(text, 0, text.length(), pattern); 670 | } 671 | 672 | /** 673 | * Convenience method to search for patterns in Strings. Returns the position 674 | * in the text at which the pattern was found. Returns -1 if the pattern was 675 | * not found. 676 | * 677 | * @param text the String containing the text, may not be null 678 | * @param pattern the String containing the pattern, may not be 679 | * null 680 | * @param processed an Object as returned from {@link #processChars(char[])} or 681 | * {@link #processString(String)}, may not be null 682 | * @return the position in the text or -1 if the pattern was not found 683 | * @see #searchChars(char[], int, int, char[], Object) 684 | */ 685 | public final int searchString(String text, String pattern, Object processed) { 686 | return searchString(text, 0, text.length(), pattern, processed); 687 | } 688 | 689 | /** 690 | * Convenience method to search for patterns in Strings. Returns the position 691 | * in the text at which the pattern was found. Returns -1 if the pattern was 692 | * not found. 693 | * 694 | * @param text the String containing the text, may not be null 695 | * @param textStart at which position in the text the comparing should start 696 | * @param pattern the String containing the pattern, may not be 697 | * null 698 | * @return the position in the text or -1 if the pattern was not found 699 | * @see #searchChars(char[], int, int, char[], Object) 700 | */ 701 | public final int searchString(String text, int textStart, String pattern) { 702 | return searchString(text, textStart, text.length(), pattern); 703 | } 704 | 705 | /** 706 | * Convenience method to search for patterns in Strings. Returns the position 707 | * in the text at which the pattern was found. Returns -1 if the pattern was 708 | * not found. 709 | * 710 | * @param text the String containing the text, may not be null 711 | * @param textStart at which position in the text the comparing should start 712 | * @param pattern the String containing the pattern, may not be 713 | * null 714 | * @param processed an Object as returned from {@link #processChars(char[])} or 715 | * {@link #processString(String)}, may not be null 716 | * @return the position in the text or -1 if the pattern was not found 717 | * @see #searchChars(char[], int, int, char[], Object) 718 | */ 719 | public final int searchString(String text, int textStart, String pattern, 720 | Object processed) { 721 | 722 | return searchString(text, textStart, text.length(), pattern, processed); 723 | } 724 | 725 | /** 726 | * Convenience method to search for patterns in Strings. Returns the position 727 | * in the text at which the pattern was found. Returns -1 if the pattern was 728 | * not found. 729 | * 730 | * @param text the String containing the text, may not be null 731 | * @param textStart at which position in the text the comparing should start 732 | * @param textEnd at which position in the text comparing should stop 733 | * @param pattern the String containing the pattern, may not be 734 | * null 735 | * @return the position in the text or -1 if the pattern was not found 736 | * @see #searchChars(char[], int, int, char[]) 737 | */ 738 | public final int searchString(String text, int textStart, int textEnd, 739 | String pattern) { 740 | 741 | return StringSearch.activeStringAccess.searchString(text, textStart, 742 | textEnd, pattern, this); 743 | } 744 | 745 | /** 746 | * Convenience method to search for patterns in Strings. Returns the position 747 | * in the text at which the pattern was found. Returns -1 if the pattern was 748 | * not found. 749 | * 750 | * @param text the String containing the text, may not be null 751 | * @param textStart at which position in the text the comparing should start 752 | * @param textEnd at which position in the text comparing should stop 753 | * @param pattern the String containing the pattern, may not be 754 | * null 755 | * @param processed an Object as returned from {@link #processChars(char[])} or 756 | * {@link #processString(String)}, may not be null 757 | * @return the position in the text or -1 if the pattern was not found 758 | * @see #searchChars(char[], int, int, char[]) 759 | */ 760 | public final int searchString(String text, int textStart, int textEnd, 761 | String pattern, Object processed) { 762 | 763 | return StringSearch.activeStringAccess.searchString(text, textStart, 764 | textEnd, pattern, processed, this); 765 | } 766 | 767 | /** 768 | * Returns if the Object's class matches this Object's class. 769 | * 770 | * @param obj the other Object, may be null 771 | * @return if the Object is equal to this Object 772 | * @see java.lang.Object#equals(Object) 773 | */ 774 | @Override 775 | public boolean equals(Object obj) { 776 | if (this == obj) { 777 | return true; 778 | } 779 | if (obj == null) { 780 | return false; 781 | } 782 | return getClass().getName().equals(obj.getClass().getName()); 783 | } 784 | 785 | /** 786 | * Returns the hashCode of the current class' name because all instances of 787 | * this class are equal. 788 | * 789 | * @return int 790 | * @see java.lang.Object#hashCode() 791 | */ 792 | @Override 793 | public int hashCode() { 794 | return getClass().getName().hashCode(); 795 | } 796 | 797 | /** 798 | * Returns a String representation of this Object. Simply returns the name of 799 | * the Class. 800 | * 801 | * @return a String, never null 802 | * @see java.lang.Object#toString() 803 | */ 804 | @Override 805 | public String toString() { 806 | return getClass().getName(); 807 | } 808 | 809 | /** 810 | * Returns a {@link CharIntMap} of the extent of the given pattern, using 811 | * the specified default value. 812 | * 813 | * @param pattern the pattern, may not be null 814 | * @param defaultValue the default value 815 | * @return a CharIntMap, never null 816 | * @see CharIntMap#CharIntMap(int, char, int) 817 | */ 818 | protected final CharIntMap createCharIntMap(char[] pattern, int defaultValue) { 819 | return createCharIntMap(pattern, pattern.length, defaultValue); 820 | } 821 | 822 | /** 823 | * Returns a {@link CharIntMap} of the extent of the given pattern, using 824 | * the specified default value. 825 | * 826 | * @param pattern the pattern, may not be null 827 | * @param patternEnd where to stop searching for extent values in the 828 | * pattern 829 | * @param defaultValue the default value 830 | * @return a CharIntMap, never null 831 | * @see CharIntMap#CharIntMap(int, char, int) 832 | */ 833 | protected final CharIntMap createCharIntMap(char[] pattern, int patternEnd, 834 | int defaultValue) { 835 | char min = Character.MAX_VALUE; 836 | char max = Character.MIN_VALUE; 837 | for (int i = 0; i < patternEnd; i++) { 838 | max = max > pattern[i] ? max : pattern[i]; 839 | min = min < pattern[i] ? min : pattern[i]; 840 | } 841 | return new CharIntMap(max - min + 1, min, defaultValue); 842 | } 843 | 844 | /** 845 | * Interprets the given byte as an unsigned byte. 846 | * 847 | * @param idx the byte 848 | * @return int 849 | */ 850 | protected final int index(byte idx) { 851 | /* Much faster in IBM, see com.eaio.stringsearch.performanceTest.Index. */ 852 | /* And MUCH faster in Sun, too. */ 853 | return idx & 0x000000ff; 854 | } 855 | 856 | } 857 | --------------------------------------------------------------------------------