├── .github └── workflows │ └── maven.yml ├── .gitignore ├── .mvn └── wrapper │ └── maven-wrapper.properties ├── .travis.yml ├── Jenkinsfile ├── LICENSE ├── README.md ├── mvnw ├── mvnw.cmd ├── pom.xml ├── src ├── module-info.java └── org │ └── joni │ ├── Analyser.java │ ├── ApplyCaseFold.java │ ├── ApplyCaseFoldArg.java │ ├── ArrayCompiler.java │ ├── BitSet.java │ ├── BitStatus.java │ ├── ByteCodeMachine.java │ ├── ByteCodePrinter.java │ ├── CaptureTreeNode.java │ ├── CodeRangeBuffer.java │ ├── Compiler.java │ ├── Config.java │ ├── ConfigSupport.java │ ├── Lexer.java │ ├── Matcher.java │ ├── MatcherFactory.java │ ├── MinMaxLen.java │ ├── MultiRegion.java │ ├── NameEntry.java │ ├── NativeMachine.java │ ├── NodeOptInfo.java │ ├── OptAnchorInfo.java │ ├── OptEnvironment.java │ ├── OptExactInfo.java │ ├── OptMapInfo.java │ ├── Option.java │ ├── Parser.java │ ├── Regex.java │ ├── Region.java │ ├── ScanEnvironment.java │ ├── ScannerSupport.java │ ├── Search.java │ ├── SingleRegion.java │ ├── StackEntry.java │ ├── StackMachine.java │ ├── Syntax.java │ ├── Token.java │ ├── UnsetAddrList.java │ ├── WarnCallback.java │ ├── ast │ ├── AnchorNode.java │ ├── AnyCharNode.java │ ├── BackRefNode.java │ ├── CClassNode.java │ ├── CTypeNode.java │ ├── CallNode.java │ ├── EncloseNode.java │ ├── ListNode.java │ ├── Node.java │ ├── QuantifierNode.java │ ├── StateNode.java │ └── StringNode.java │ ├── bench │ ├── AbstractBench.java │ ├── BenchGreedyBacktrack.java │ ├── BenchRailsRegs.java │ └── BenchSeveralRegexps.java │ ├── constants │ ├── MetaChar.java │ ├── SyntaxProperties.java │ └── internal │ │ ├── AnchorType.java │ │ ├── Arguments.java │ │ ├── EncloseType.java │ │ ├── NodeStatus.java │ │ ├── NodeType.java │ │ ├── OPCode.java │ │ ├── OPSize.java │ │ ├── StackPopLevel.java │ │ ├── StackType.java │ │ ├── StringType.java │ │ ├── TargetInfo.java │ │ ├── TokenType.java │ │ └── Traverse.java │ └── exception │ ├── ErrorMessages.java │ ├── InternalException.java │ ├── JOniException.java │ ├── SyntaxException.java │ ├── TimeoutException.java │ └── ValueException.java └── test └── org └── joni └── test ├── Test.java ├── TestA.java ├── TestC.java ├── TestCaseInsensitive.java ├── TestCornerCases.java ├── TestCrnl.java ├── TestError.java ├── TestInterrupt.java ├── TestNSU8.java ├── TestPerl.java ├── TestU.java ├── TestU8.java └── TestUtf8CaseFoldingLatin1Supplement.java /.github/workflows/maven.yml: -------------------------------------------------------------------------------- 1 | # This workflow will build a Java project with Maven, and cache/restore any dependencies to improve the workflow execution time 2 | # For more information see: 3 | # - https://docs.github.com/en/actions/use-cases-and-examples/building-and-testing/building-and-testing-java-with-maven 4 | # - https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions 5 | name: Java CI with Maven 6 | 7 | on: 8 | push: 9 | branches-ignore: # build all branches except: 10 | - 'dependabot/**' # prevent GHA triggered twice (once for commit to the branch and once for opening/syncing the PR) 11 | tags-ignore: # don't build tags 12 | - '**' 13 | paths-ignore: 14 | - 'Jenkinsfile' 15 | - 'LICENSE' 16 | - '**/*.md' 17 | - '.git*' 18 | - '.github/*.yml' 19 | pull_request: 20 | paths-ignore: 21 | - 'Jenkinsfile' 22 | - 'LICENSE' 23 | - '**/*.md' 24 | - '.git*' 25 | - '.github/*.yml' 26 | workflow_dispatch: 27 | # https://github.blog/changelog/2020-07-06-github-actions-manual-triggers-with-workflow_dispatch/ 28 | 29 | 30 | defaults: 31 | run: 32 | shell: bash 33 | 34 | 35 | jobs: 36 | build: 37 | runs-on: ubuntu-latest 38 | 39 | env: 40 | JAVA_VERSION: 11 41 | 42 | steps: 43 | - name: Git Checkout 44 | uses: actions/checkout@v4 # https://github.com/actions/checkout 45 | 46 | - name: Set up JDK ${{ env.JAVA_VERSION }} ☕ 47 | uses: actions/setup-java@v4 # https://github.com/actions/setup-java 48 | with: 49 | java-version: ${{ env.JAVA_VERSION }} 50 | distribution: temurin 51 | cache: maven 52 | 53 | - name: Build with Maven 🔨 54 | run: mvn -ntp -B verify 55 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | .tool-versions 3 | -------------------------------------------------------------------------------- /.mvn/wrapper/maven-wrapper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | wrapperVersion=3.3.2 18 | distributionType=only-script 19 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip 20 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | 3 | jdk: 4 | - openjdk9 5 | 6 | arch: 7 | - amd64 8 | - ppc64le 9 | 10 | 11 | cache: 12 | directories: 13 | - $HOME/.m2 14 | -------------------------------------------------------------------------------- /Jenkinsfile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env groovy 2 | 3 | pipeline { 4 | agent none 5 | options { 6 | buildDiscarder(logRotator(numToKeepStr: '10')) 7 | timeout(time: 1, unit: 'HOURS') 8 | } 9 | stages { 10 | stage('OpenJDK 8') { 11 | agent { docker 'openjdk:8-jdk' } 12 | steps { 13 | checkout scm 14 | sh './mvnw test -B' 15 | } 16 | post { 17 | always { 18 | junit testResults: '**/surefire-reports/**/*.xml', allowEmptyResults: true 19 | } 20 | } 21 | } 22 | 23 | stage('Alternative Platforms') { 24 | parallel { 25 | stage('OpenJDK 9') { 26 | agent { docker 'openjdk:9-jdk' } 27 | steps { 28 | checkout scm 29 | sh './mvnw test -B' 30 | } 31 | post { 32 | always { 33 | junit testResults: '**/surefire-reports/**/*.xml', allowEmptyResults: true 34 | } 35 | } 36 | } 37 | stage('Alpine Linux') { 38 | agent { docker 'openjdk:8-jdk-alpine' } 39 | steps { 40 | checkout scm 41 | sh './mvnw test -B' 42 | } 43 | post { 44 | always { 45 | junit testResults: '**/surefire-reports/**/*.xml', allowEmptyResults: true 46 | } 47 | } 48 | } 49 | stage('FreeBSD 11') { 50 | agent { label 'freebsd' } 51 | steps { 52 | checkout scm 53 | sh './mvnw test -B' 54 | } 55 | post { 56 | always { 57 | junit testResults: '**/surefire-reports/**/*.xml', allowEmptyResults: true 58 | } 59 | } 60 | } 61 | /* awaiting platform support in Code Valet */ 62 | stage('Windows 2016') { 63 | when { branch 'windows-support' } 64 | steps { 65 | echo 'Not yet available' 66 | } 67 | } 68 | } 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 JRuby Team 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | joni 2 | ==== 3 | 4 | [![Maven Central](https://img.shields.io/maven-central/v/org.jruby.joni/joni)](https://central.sonatype.com/artifact/org.jruby.joni/joni) 5 | [![Build Status](https://github.com/jruby/joni/actions/workflows/maven.yml/badge.svg)](https://github.com/jruby/joni/actions/workflows/maven.yml) 6 | 7 | Java port of [Oniguruma](https://github.com/kkos/oniguruma) regexp library 8 | 9 | ## Usage 10 | 11 | ### Imports 12 | 13 | ```java 14 | import org.jcodings.specific.UTF8Encoding; 15 | import org.joni.Matcher; 16 | import org.joni.Option; 17 | import org.joni.Regex; 18 | ``` 19 | 20 | ### Matching 21 | 22 | ```java 23 | byte[] pattern = "a*".getBytes(); 24 | byte[] str = "aaa".getBytes(); 25 | 26 | Regex regex = new Regex(pattern, 0, pattern.length, Option.NONE, UTF8Encoding.INSTANCE); 27 | Matcher matcher = regex.matcher(str); 28 | int result = matcher.search(0, str.length, Option.DEFAULT); 29 | ``` 30 | 31 | ### Using captures 32 | 33 | ```java 34 | byte[] pattern = "(a*)".getBytes(); 35 | byte[] str = "aaa".getBytes(); 36 | 37 | Regex regex = new Regex(pattern, 0, pattern.length, Option.NONE, UTF8Encoding.INSTANCE); 38 | Matcher matcher = regex.matcher(str); 39 | int result = matcher.search(0, str.length, Option.DEFAULT); 40 | if (result != -1) { 41 | Region region = matcher.getEagerRegion(); 42 | } 43 | ``` 44 | 45 | ### Using named captures 46 | 47 | ```java 48 | byte[] pattern = "(?a*)".getBytes(); 49 | byte[] str = "aaa".getBytes(); 50 | 51 | Regex regex = new Regex(pattern, 0, pattern.length, Option.NONE, UTF8Encoding.INSTANCE); 52 | Matcher matcher = regex.matcher(str); 53 | int result = matcher.search(0, str.length, Option.DEFAULT); 54 | if (result != -1) { 55 | Region region = matcher.getEagerRegion(); 56 | for (Iterator entry = regex.namedBackrefIterator(); entry.hasNext();) { 57 | NameEntry e = entry.next(); 58 | int number = e.getBackRefs()[0]; // can have many refs per name 59 | // int begin = region.beg[number]; 60 | // int end = region.end[number]; 61 | } 62 | } 63 | ``` 64 | 65 | ## License 66 | 67 | Joni is released under the [MIT License](http://www.opensource.org/licenses/MIT). 68 | -------------------------------------------------------------------------------- /src/module-info.java: -------------------------------------------------------------------------------- 1 | open module org.jruby.joni { 2 | exports org.joni; 3 | exports org.joni.constants; 4 | exports org.joni.exception; 5 | 6 | requires transitive org.jruby.jcodings; 7 | } -------------------------------------------------------------------------------- /src/org/joni/ApplyCaseFold.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni; 21 | 22 | import org.jcodings.ApplyAllCaseFoldFunction; 23 | import org.jcodings.Encoding; 24 | import org.joni.ast.CClassNode; 25 | import org.joni.ast.ListNode; 26 | import org.joni.ast.StringNode; 27 | 28 | final class ApplyCaseFold implements ApplyAllCaseFoldFunction { 29 | 30 | // i_apply_case_fold 31 | @Override 32 | public void apply(int from, int[]to, int length, Object o) { 33 | ApplyCaseFoldArg arg = (ApplyCaseFoldArg)o; 34 | 35 | ScanEnvironment env = arg.env; 36 | Encoding enc = env.enc; 37 | CClassNode cc = arg.cc; 38 | CClassNode ascCc = arg.ascCc; 39 | BitSet bs = cc.bs; 40 | boolean addFlag; 41 | 42 | if (ascCc == null) { 43 | addFlag = false; 44 | } else if (Encoding.isAscii(from) == Encoding.isAscii(to[0])) { 45 | addFlag = true; 46 | } else { 47 | addFlag = ascCc.isCodeInCC(enc, from); 48 | if (ascCc.isNot()) addFlag = !addFlag; 49 | } 50 | 51 | if (length == 1) { 52 | boolean inCC = cc.isCodeInCC(enc, from); 53 | if (Config.CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS) { 54 | if ((inCC && !cc.isNot()) || (!inCC && cc.isNot())) { 55 | if (addFlag) { 56 | if (enc.minLength() > 1 || to[0] >= BitSet.SINGLE_BYTE_SIZE || enc.codeToMbcLength(to[0]) > 1) { 57 | cc.addCodeRange(env, to[0], to[0], false); 58 | } else { 59 | /* /(?i:[^A-C])/.match("a") ==> fail. */ 60 | bs.set(to[0]); 61 | } 62 | } 63 | } 64 | } else { 65 | if (inCC) { 66 | if (addFlag) { 67 | if (enc.minLength() > 1 || to[0] >= BitSet.SINGLE_BYTE_SIZE) { 68 | if (cc.isNot()) cc.clearNotFlag(env); 69 | cc.addCodeRange(env, to[0], to[0], false); 70 | } else { 71 | if (cc.isNot()) { 72 | bs.clear(to[0]); 73 | } else { 74 | bs.set(to[0]); 75 | } 76 | } 77 | } 78 | } 79 | } // CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS 80 | 81 | } else { 82 | if (cc.isCodeInCC(enc, from) && (!Config.CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS || !cc.isNot())) { 83 | StringNode node = null; 84 | for (int i=0; i>> ROOM_SHIFT] & bit(pos)) != 0; 33 | } 34 | 35 | public void set(ScanEnvironment env, int pos) { 36 | if (at(pos)) env.ccDuplicateWarn(); 37 | set(pos); 38 | } 39 | 40 | public void set(int pos) { 41 | bits[pos >>> ROOM_SHIFT] |= bit(pos); 42 | } 43 | 44 | public void clear(int pos) { 45 | bits[pos >>> ROOM_SHIFT] &= ~bit(pos); 46 | } 47 | 48 | public void invert(int pos) { 49 | bits[pos >>> ROOM_SHIFT] ^= bit(pos); 50 | } 51 | 52 | public void clear() { 53 | for (int i=0; i>>= 1) != 0) log++; 109 | return log; 110 | } 111 | 112 | private static final int BITS_TO_STRING_WRAP = 4; 113 | @Override 114 | public String toString() { 115 | StringBuilder buffer = new StringBuilder(); 116 | buffer.append("BitSet"); 117 | for (int i=0; i= children.length) { 42 | CaptureTreeNode[] tmp = new CaptureTreeNode[children.length << 1]; 43 | System.arraycopy(children, 0, tmp, 0, children.length); 44 | children = tmp; 45 | } 46 | 47 | children[numChildren] = child; 48 | numChildren++; 49 | } 50 | 51 | void clear() { 52 | for (int i = 0; i < numChildren; i++) { 53 | children[i] = null; // ??? 54 | } 55 | numChildren = 0; 56 | beg = end = Region.REGION_NOTPOS; 57 | group = -1; 58 | } 59 | 60 | CaptureTreeNode cloneTree() { 61 | CaptureTreeNode clone = new CaptureTreeNode(); 62 | clone.beg = beg; 63 | clone.end = end; 64 | 65 | for (int i = 0; i < numChildren; i++) { 66 | CaptureTreeNode child = children[i].cloneTree(); 67 | clone.addChild(child); 68 | } 69 | return clone; 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/org/joni/Compiler.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni; 21 | 22 | import org.jcodings.Encoding; 23 | import org.joni.ast.AnchorNode; 24 | import org.joni.ast.BackRefNode; 25 | import org.joni.ast.CClassNode; 26 | import org.joni.ast.CTypeNode; 27 | import org.joni.ast.CallNode; 28 | import org.joni.ast.ListNode; 29 | import org.joni.ast.EncloseNode; 30 | import org.joni.ast.Node; 31 | import org.joni.ast.QuantifierNode; 32 | import org.joni.ast.StringNode; 33 | import org.joni.constants.internal.NodeType; 34 | import org.joni.exception.ErrorMessages; 35 | import org.joni.exception.InternalException; 36 | import org.joni.exception.SyntaxException; 37 | 38 | abstract class Compiler implements ErrorMessages { 39 | protected final Analyser analyser; 40 | protected final Encoding enc; 41 | protected final Regex regex; 42 | 43 | protected Compiler(Analyser analyser) { 44 | this.analyser = analyser; 45 | this.regex = analyser.regex; 46 | this.enc = regex.enc; 47 | } 48 | 49 | final void compile(Node root) { 50 | prepare(); 51 | compileTree(root); 52 | finish(); 53 | } 54 | 55 | protected abstract void prepare(); 56 | protected abstract void finish(); 57 | 58 | protected abstract void compileAltNode(ListNode node); 59 | 60 | private void compileStringRawNode(StringNode sn) { 61 | if (sn.length() <= 0) return; 62 | addCompileString(sn.bytes, sn.p, 1 /*sb*/, sn.length(), false); 63 | } 64 | 65 | private void compileStringNode(StringNode node) { 66 | StringNode sn = node; 67 | if (sn.length() <= 0) return; 68 | 69 | boolean ambig = sn.isAmbig(); 70 | 71 | int p, prev; 72 | p = prev = sn.p; 73 | int end = sn.end; 74 | byte[]bytes = sn.bytes; 75 | int prevLen = enc.length(bytes, p, end); 76 | p += prevLen; 77 | int blen = prevLen; 78 | 79 | while (p < end) { 80 | int len = enc.length(bytes, p, end); 81 | if (len == prevLen || ambig) { 82 | blen += len; 83 | } else { 84 | addCompileString(bytes, prev, prevLen, blen, ambig); 85 | prev = p; 86 | blen = len; 87 | prevLen = len; 88 | } 89 | p += len; 90 | } 91 | addCompileString(bytes, prev, prevLen, blen, ambig); 92 | } 93 | 94 | protected abstract void addCompileString(byte[]bytes, int p, int mbLength, int strLength, boolean ignoreCase); 95 | 96 | protected abstract void compileCClassNode(CClassNode node); 97 | protected abstract void compileCTypeNode(CTypeNode node); 98 | protected abstract void compileAnyCharNode(); 99 | protected abstract void compileCallNode(CallNode node); 100 | protected abstract void compileBackrefNode(BackRefNode node); 101 | protected abstract void compileCECQuantifierNode(QuantifierNode node); 102 | protected abstract void compileNonCECQuantifierNode(QuantifierNode node); 103 | protected abstract void compileOptionNode(EncloseNode node); 104 | protected abstract void compileEncloseNode(EncloseNode node); 105 | protected abstract void compileAnchorNode(AnchorNode node); 106 | 107 | protected final void compileTree(Node node) { 108 | switch (node.getType()) { 109 | case NodeType.LIST: 110 | ListNode lin = (ListNode)node; 111 | do { 112 | compileTree(lin.value); 113 | } while ((lin = lin.tail) != null); 114 | break; 115 | 116 | case NodeType.ALT: 117 | compileAltNode((ListNode)node); 118 | break; 119 | 120 | case NodeType.STR: 121 | StringNode sn = (StringNode)node; 122 | if (sn.isRaw()) { 123 | compileStringRawNode(sn); 124 | } else { 125 | compileStringNode(sn); 126 | } 127 | break; 128 | 129 | case NodeType.CCLASS: 130 | compileCClassNode((CClassNode)node); 131 | break; 132 | 133 | case NodeType.CTYPE: 134 | compileCTypeNode((CTypeNode)node); 135 | break; 136 | 137 | case NodeType.CANY: 138 | compileAnyCharNode(); 139 | break; 140 | 141 | case NodeType.BREF: 142 | compileBackrefNode((BackRefNode)node); 143 | break; 144 | 145 | case NodeType.CALL: 146 | if (Config.USE_SUBEXP_CALL) { 147 | compileCallNode((CallNode)node); 148 | break; 149 | } // USE_SUBEXP_CALL 150 | break; 151 | 152 | case NodeType.QTFR: 153 | if (Config.USE_CEC) { 154 | compileCECQuantifierNode((QuantifierNode)node); 155 | } else { 156 | compileNonCECQuantifierNode((QuantifierNode)node); 157 | } 158 | break; 159 | 160 | case NodeType.ENCLOSE: 161 | EncloseNode enode = (EncloseNode)node; 162 | if (enode.isOption()) { 163 | compileOptionNode(enode); 164 | } else { 165 | compileEncloseNode(enode); 166 | } 167 | break; 168 | 169 | case NodeType.ANCHOR: 170 | compileAnchorNode((AnchorNode)node); 171 | break; 172 | 173 | default: 174 | // undefined node type 175 | newInternalException(PARSER_BUG); 176 | } // switch 177 | } 178 | 179 | protected final void compileTreeNTimes(Node node, int n) { 180 | for (int i=0; i, \k */ 34 | 35 | boolean USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT = ConfigSupport.getBoolean("joni.use_monomaniac_check_captures_in_endless_repeat", true); /* /(?:()|())*\2/ */ 36 | boolean USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE = ConfigSupport.getBoolean("joni.use_newline_at_end_of_string_has_empty_line", true); /* /\n$/ =~ "\n" */ 37 | boolean USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR = ConfigSupport.getBoolean("joni.use_warning_redundant_nested_repeat_operator", true); 38 | 39 | boolean CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS = ConfigSupport.getBoolean("joni.case_fold_is_applied_inside_negative_cclass", true); 40 | 41 | boolean USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE = ConfigSupport.getBoolean("joni.use_match_range_must_be_inside_of_specified_range", false); 42 | boolean USE_CAPTURE_HISTORY = ConfigSupport.getBoolean("joni.use_capture_history", false); 43 | boolean USE_VARIABLE_META_CHARS = ConfigSupport.getBoolean("joni.use_variable_meta_chars", true); 44 | boolean USE_WORD_BEGIN_END = ConfigSupport.getBoolean("joni.use_word_begin_end", true); /* "\<": word-begin, "\>": word-end */ 45 | boolean USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE = ConfigSupport.getBoolean("joni.use_find_longest_search_all_of_range", true); 46 | boolean USE_SUNDAY_QUICK_SEARCH = ConfigSupport.getBoolean("joni.use_sunday_quick_search", true); 47 | boolean USE_CEC = ConfigSupport.getBoolean("joni.use_cec", false); 48 | boolean USE_DYNAMIC_OPTION = ConfigSupport.getBoolean("joni.use_dynamic_option", false); 49 | boolean USE_BYTE_MAP = ConfigSupport.getBoolean("joni.use_byte_map", OptExactInfo.OPT_EXACT_MAXLEN <= CHAR_TABLE_SIZE); 50 | boolean USE_INT_MAP_BACKWARD = ConfigSupport.getBoolean("joni.use_int_map_backward", false); 51 | 52 | int NREGION = ConfigSupport.getInt("joni.nregion", 10); 53 | int MAX_BACKREF_NUM = ConfigSupport.getInt("joni.max_backref_num", 1000); 54 | int MAX_CAPTURE_GROUP_NUM = ConfigSupport.getInt("joni.max_capture_group_num", 32767); 55 | int MAX_REPEAT_NUM = ConfigSupport.getInt("joni.max_multi_byte_ranges_num", 100000); 56 | int MAX_MULTI_BYTE_RANGES_NUM = ConfigSupport.getInt("joni.max_multi_byte_ranges_num", 10000); 57 | 58 | // internal config 59 | boolean USE_OP_PUSH_OR_JUMP_EXACT = ConfigSupport.getBoolean("joni.use_op_push_or_jump_exact", true); 60 | boolean USE_QTFR_PEEK_NEXT = ConfigSupport.getBoolean("joni.use_qtfr_peek_next", true); 61 | 62 | int INIT_MATCH_STACK_SIZE = ConfigSupport.getInt("joni.init_match_stack_size", 64); 63 | 64 | boolean OPTIMIZE = ConfigSupport.getBoolean("joni.optimize", true); 65 | @Deprecated boolean DONT_OPTIMIZE = !OPTIMIZE; 66 | 67 | // use embedded string templates in Regex object as byte arrays instead of compiling them into int bytecode array 68 | boolean USE_STRING_TEMPLATES = ConfigSupport.getBoolean("joni.use_string_templates", true); 69 | 70 | 71 | int MAX_CAPTURE_HISTORY_GROUP = ConfigSupport.getInt("joni.max_capture_history_group", 31); 72 | 73 | 74 | int CHECK_STRING_THRESHOLD_LEN = ConfigSupport.getInt("joni.check_string_threshold_len", 7); 75 | int CHECK_BUFF_MAX_SIZE = ConfigSupport.getInt("joni.check_buff_max_size", 0x4000); 76 | 77 | PrintStream log = System.out; 78 | PrintStream err = System.err; 79 | 80 | boolean DEBUG_ALL = ConfigSupport.getBoolean("joni.debug.all", false); 81 | 82 | boolean DEBUG = ConfigSupport.getBoolean("joni.debug", false) || DEBUG_ALL; 83 | boolean DEBUG_PARSE_TREE = ConfigSupport.getBoolean("joni.debug.parse.tree", false) || DEBUG_ALL; 84 | boolean DEBUG_PARSE_TREE_RAW = ConfigSupport.getBoolean("joni.debug.parse.tree.raw", true) || DEBUG_ALL; 85 | boolean DEBUG_COMPILE = ConfigSupport.getBoolean("joni.debug.compile", false) || DEBUG_ALL; 86 | boolean DEBUG_COMPILE_BYTE_CODE_INFO = ConfigSupport.getBoolean("joni.debug.compile.bytecode.info", false) || DEBUG_ALL; 87 | boolean DEBUG_SEARCH = ConfigSupport.getBoolean("joni.debug.search", false) || DEBUG_ALL; 88 | boolean DEBUG_MATCH = ConfigSupport.getBoolean("joni.debug.match", false) || DEBUG_ALL; 89 | } 90 | -------------------------------------------------------------------------------- /src/org/joni/ConfigSupport.java: -------------------------------------------------------------------------------- 1 | package org.joni; 2 | 3 | public class ConfigSupport { 4 | static boolean getBoolean(String property, boolean def) { 5 | String value = System.getProperty(property, def ? "true" : "false"); 6 | return !value.equals("false"); 7 | } 8 | 9 | static int getInt(String property, int def) { 10 | String value = System.getProperty(property); 11 | return value != null ? Integer.parseInt(value) : def; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/org/joni/MatcherFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni; 21 | 22 | abstract class MatcherFactory { 23 | abstract Matcher create(Regex regex, Region region, byte[]bytes, int p, int end); 24 | 25 | public Matcher create(Regex regex, Region region, byte[]bytes, int p, int end, long timeout) { 26 | Matcher matcher = create(regex, region, bytes, p, end); 27 | matcher.setTimeout(timeout); 28 | return matcher; 29 | } 30 | 31 | static final MatcherFactory DEFAULT = new MatcherFactory() { 32 | @Override 33 | Matcher create(Regex regex, Region region, byte[]bytes, int p, int end) { 34 | return new ByteCodeMachine(regex, region, bytes, p, end); 35 | } 36 | }; 37 | } 38 | -------------------------------------------------------------------------------- /src/org/joni/MinMaxLen.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni; 21 | 22 | final class MinMaxLen { 23 | int min; /* min byte length */ 24 | int max; /* max byte length */ 25 | 26 | /* 1000 / (min-max-dist + 1) */ 27 | private static final short[] distValues = { 28 | 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, 29 | 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, 30 | 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, 31 | 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, 32 | 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, 33 | 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 34 | 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 35 | 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 36 | 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 37 | 11, 11, 11, 11, 11, 10, 10, 10, 10, 10 38 | }; 39 | 40 | int distanceValue() { 41 | if (max == INFINITE_DISTANCE) return 0; 42 | int d = max - min; 43 | /* return dist_vals[d] * 16 / (mm->min + 12); */ 44 | return d < distValues.length ? distValues[d] : 1; 45 | } 46 | 47 | int compareDistanceValue(MinMaxLen other, int v1, int v2) { 48 | if (v2 <= 0) return -1; 49 | if (v1 <= 0) return 1; 50 | 51 | v1 *= distanceValue(); 52 | v2 *= other.distanceValue(); 53 | 54 | if (v2 > v1) return 1; 55 | if (v2 < v1) return -1; 56 | 57 | return Integer.compare(min, other.min); 58 | } 59 | 60 | boolean equal(MinMaxLen other) { 61 | return min == other.min && max == other.max; 62 | } 63 | 64 | void set(int min, int max) { 65 | this.min = min; 66 | this.max = max; 67 | } 68 | 69 | void clear() { 70 | min = max = 0; 71 | } 72 | 73 | void copy(MinMaxLen other) { 74 | min = other.min; 75 | max = other.max; 76 | } 77 | 78 | void add(MinMaxLen other) { 79 | min = distanceAdd(min, other.min); 80 | max = distanceAdd(max, other.max); 81 | } 82 | 83 | void addLength(int len) { 84 | min = distanceAdd(min, len); 85 | max = distanceAdd(max, len); 86 | } 87 | 88 | void altMerge(MinMaxLen other) { 89 | if (min > other.min) min = other.min; 90 | if (max < other.max) max = other.max; 91 | } 92 | 93 | static final int INFINITE_DISTANCE = 0x7FFFFFFF; 94 | static int distanceAdd(int d1, int d2) { 95 | if (d1 == INFINITE_DISTANCE || d2 == INFINITE_DISTANCE) { 96 | return INFINITE_DISTANCE; 97 | } else { 98 | if (d1 <= INFINITE_DISTANCE - d2) return d1 + d2; 99 | else return INFINITE_DISTANCE; 100 | } 101 | } 102 | 103 | static int distanceMultiply(int d, int m) { 104 | if (m == 0) return 0; 105 | if (d < INFINITE_DISTANCE / m) { 106 | return d * m; 107 | } else { 108 | return INFINITE_DISTANCE; 109 | } 110 | } 111 | 112 | static String distanceRangeToString(int a, int b) { 113 | String s = ""; 114 | if (a == INFINITE_DISTANCE) { 115 | s += "inf"; 116 | } else { 117 | s += "(" + a + ")"; 118 | } 119 | 120 | s += "-"; 121 | 122 | if (b == INFINITE_DISTANCE) { 123 | s += "inf"; 124 | } else { 125 | s += "(" + b + ")"; 126 | } 127 | return s; 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /src/org/joni/MultiRegion.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni; 21 | 22 | import java.util.Arrays; 23 | 24 | public final class MultiRegion extends Region { 25 | private final int[] begEnd; 26 | 27 | public MultiRegion(int num) { 28 | this.begEnd = new int[num * 2]; 29 | } 30 | 31 | public MultiRegion(int begin, int end) { 32 | this.begEnd = new int[]{begin, end}; 33 | } 34 | 35 | @Override 36 | public final int getNumRegs() { 37 | return begEnd.length / 2; 38 | } 39 | 40 | @Override 41 | public MultiRegion clone() { 42 | MultiRegion region = new MultiRegion(getNumRegs()); 43 | System.arraycopy(begEnd, 0, region.begEnd, 0, begEnd.length); 44 | if (getCaptureTree() != null) region.setCaptureTree(getCaptureTree().cloneTree()); 45 | return region; 46 | } 47 | 48 | @Override 49 | public int getBeg(int index) { 50 | return begEnd[index * 2]; 51 | } 52 | 53 | @Override 54 | public int setBeg(int index, int value) { 55 | return begEnd[index * 2] = value; 56 | } 57 | 58 | @Override 59 | public int getEnd(int index) { 60 | return begEnd[index * 2 + 1]; 61 | } 62 | 63 | @Override 64 | public int setEnd(int index, int value) { 65 | return begEnd[index * 2 + 1] = value; 66 | } 67 | 68 | @Override 69 | void clear() { 70 | Arrays.fill(begEnd, REGION_NOTPOS); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/org/joni/NameEntry.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni; 21 | 22 | public final class NameEntry { 23 | static final int INIT_NAME_BACKREFS_ALLOC_NUM = 8; 24 | 25 | public final byte[]name; 26 | public final int nameP; 27 | public final int nameEnd; 28 | 29 | int backNum; 30 | int backRef1; 31 | int[] backRefs; 32 | 33 | public NameEntry(byte[]bytes, int p, int end) { 34 | name = bytes; 35 | nameP = p; 36 | nameEnd = end; 37 | } 38 | 39 | public int[] getBackRefs() { 40 | switch (backNum) { 41 | case 0: 42 | return new int[]{}; 43 | case 1: 44 | return new int[]{backRef1}; 45 | default: 46 | int[]result = new int[backNum]; 47 | System.arraycopy(backRefs, 0, result, 0, backNum); 48 | return result; 49 | } 50 | } 51 | 52 | private void alloc() { 53 | backRefs = new int[INIT_NAME_BACKREFS_ALLOC_NUM]; 54 | } 55 | 56 | private void ensureSize() { 57 | if (backNum > backRefs.length) { 58 | int[]tmp = new int[backRefs.length << 1]; 59 | System.arraycopy(backRefs, 0, tmp, 0, backRefs.length); 60 | backRefs = tmp; 61 | } 62 | } 63 | 64 | public void addBackref(int backRef) { 65 | backNum++; 66 | 67 | switch (backNum) { 68 | case 1: 69 | backRef1 = backRef; 70 | break; 71 | case 2: 72 | alloc(); 73 | backRefs[0] = backRef1; 74 | backRefs[1] = backRef; 75 | break; 76 | default: 77 | ensureSize(); 78 | backRefs[backNum - 1] = backRef; 79 | } 80 | } 81 | 82 | @Override 83 | public String toString() { 84 | StringBuilder buff = new StringBuilder(new String(name, nameP, nameEnd - nameP) + " "); 85 | if (backNum == 0) { 86 | buff.append("-"); 87 | } else if (backNum == 1){ 88 | buff.append(backRef1); 89 | } else { 90 | for (int i=0; i 0) buff.append(", "); 92 | buff.append(backRefs[i]); 93 | } 94 | } 95 | return buff.toString(); 96 | } 97 | 98 | } 99 | -------------------------------------------------------------------------------- /src/org/joni/NativeMachine.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni; 21 | 22 | abstract class NativeMachine extends Matcher { 23 | 24 | protected NativeMachine(Regex regex, Region region, byte[]bytes, int p, int end) { 25 | super(regex, region, bytes, p, end); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/org/joni/NodeOptInfo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni; 21 | 22 | import org.jcodings.Encoding; 23 | 24 | final class NodeOptInfo { 25 | final MinMaxLen length = new MinMaxLen(); 26 | final OptAnchorInfo anchor = new OptAnchorInfo(); 27 | final OptExactInfo exb = new OptExactInfo(); /* boundary */ 28 | final OptExactInfo exm = new OptExactInfo(); /* middle */ 29 | final OptExactInfo expr = new OptExactInfo(); /* prec read (?=...) */ 30 | final OptMapInfo map = new OptMapInfo(); /* boundary */ 31 | 32 | public void setBoundNode(MinMaxLen mmd) { 33 | exb.mmd.copy(mmd); 34 | expr.mmd.copy(mmd); 35 | map.mmd.copy(mmd); 36 | } 37 | 38 | public void clear() { 39 | length.clear(); 40 | anchor.clear(); 41 | exb.clear(); 42 | exm.clear(); 43 | expr.clear(); 44 | map.clear(); 45 | } 46 | 47 | public void copy(NodeOptInfo other) { 48 | length.copy(other.length); 49 | anchor.copy(other.anchor); 50 | exb.copy(other.exb); 51 | exm.copy(other.exm); 52 | expr.copy(other.expr); 53 | map.copy(other.map); 54 | } 55 | 56 | public void concatLeftNode(NodeOptInfo other, Encoding enc) { 57 | OptAnchorInfo tanchor = new OptAnchorInfo(); // remove it somehow ? 58 | tanchor.concat(anchor, other.anchor, length.max, other.length.max); 59 | anchor.copy(tanchor); 60 | 61 | if (other.exb.length > 0 && length.max == 0) { 62 | tanchor.concat(anchor, other.exb.anchor, length.max, other.length.max); 63 | other.exb.anchor.copy(tanchor); 64 | } 65 | 66 | if (other.map.value > 0 && length.max == 0) { 67 | if (other.map.mmd.max == 0) { 68 | other.map.anchor.leftAnchor |= anchor.leftAnchor; 69 | } 70 | } 71 | 72 | boolean exbReach = exb.reachEnd; 73 | boolean exmReach = exm.reachEnd; 74 | 75 | if (other.length.max != 0) { 76 | exb.reachEnd = exm.reachEnd = false; 77 | } 78 | 79 | if (other.exb.length > 0) { 80 | if (exbReach) { 81 | exb.concat(other.exb, enc); 82 | other.exb.clear(); 83 | } else if (exmReach) { 84 | exm.concat(other.exb, enc); 85 | other.exb.clear(); 86 | } 87 | } 88 | 89 | exm.select(other.exb, enc); 90 | exm.select(other.exm, enc); 91 | 92 | if (expr.length > 0) { 93 | if (other.length.max > 0) { 94 | // TODO: make sure it is not an Oniguruma bug (casting unsigned int to int for arithmetic comparison) 95 | int otherLengthMax = other.length.max; 96 | if (otherLengthMax == MinMaxLen.INFINITE_DISTANCE) otherLengthMax = -1; 97 | if (expr.length > otherLengthMax) expr.length = otherLengthMax; 98 | if (expr.mmd.max == 0) { 99 | exb.select(expr, enc); 100 | } else { 101 | exm.select(expr, enc); 102 | } 103 | } 104 | } else if (other.expr.length > 0) { 105 | expr.copy(other.expr); 106 | } 107 | 108 | map.select(other.map); 109 | length.add(other.length); 110 | } 111 | 112 | public void altMerge(NodeOptInfo other, OptEnvironment env) { 113 | anchor.altMerge(other.anchor); 114 | exb.altMerge(other.exb, env); 115 | exm.altMerge(other.exm, env); 116 | expr.altMerge(other.expr, env); 117 | map.altMerge(other.map, env.enc); 118 | length.altMerge(other.length); 119 | } 120 | 121 | public void setBound(MinMaxLen mmd) { 122 | exb.mmd.copy(mmd); 123 | expr.mmd.copy(mmd); 124 | map.mmd.copy(mmd); 125 | } 126 | 127 | } 128 | -------------------------------------------------------------------------------- /src/org/joni/OptAnchorInfo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni; 21 | 22 | import org.joni.constants.internal.AnchorType; 23 | 24 | final class OptAnchorInfo implements AnchorType { 25 | int leftAnchor; 26 | int rightAnchor; 27 | 28 | void clear() { 29 | leftAnchor = rightAnchor = 0; 30 | } 31 | 32 | void copy(OptAnchorInfo other) { 33 | leftAnchor = other.leftAnchor; 34 | rightAnchor = other.rightAnchor; 35 | } 36 | 37 | void concat(OptAnchorInfo left, OptAnchorInfo right, int leftLength, int rightLength) { 38 | leftAnchor = left.leftAnchor; 39 | if (leftLength == 0) leftAnchor |= right.leftAnchor; 40 | 41 | rightAnchor = right.rightAnchor; 42 | if (rightLength == 0) { 43 | rightAnchor |= left.rightAnchor; 44 | } else { 45 | rightAnchor |= left.rightAnchor & AnchorType.PREC_READ_NOT; 46 | } 47 | } 48 | 49 | boolean isSet(int anchor) { 50 | if ((leftAnchor & anchor) != 0) return true; 51 | return (rightAnchor & anchor) != 0; 52 | } 53 | 54 | void add(int anchor) { 55 | if (isLeftAnchor(anchor)) { 56 | leftAnchor |= anchor; 57 | } else { 58 | rightAnchor |= anchor; 59 | } 60 | } 61 | 62 | void remove(int anchor) { 63 | if (isLeftAnchor(anchor)) { 64 | leftAnchor &= ~anchor; 65 | } else { 66 | rightAnchor &= ~anchor; 67 | } 68 | } 69 | 70 | void altMerge(OptAnchorInfo other) { 71 | leftAnchor &= other.leftAnchor; 72 | rightAnchor &= other.rightAnchor; 73 | } 74 | 75 | static boolean isLeftAnchor(int anchor) { // make a mask for it ? 76 | return !(anchor == END_BUF || anchor == SEMI_END_BUF || 77 | anchor == END_LINE || anchor == PREC_READ || 78 | anchor == PREC_READ_NOT); 79 | } 80 | 81 | static String anchorToString(int anchor) { 82 | StringBuilder s = new StringBuilder("["); 83 | 84 | if ((anchor & AnchorType.BEGIN_BUF) !=0 ) s.append("begin-buf "); 85 | if ((anchor & AnchorType.BEGIN_LINE) !=0 ) s.append("begin-line "); 86 | if ((anchor & AnchorType.BEGIN_POSITION) !=0 ) s.append("begin-pos "); 87 | if ((anchor & AnchorType.END_BUF) !=0 ) s.append("end-buf "); 88 | if ((anchor & AnchorType.SEMI_END_BUF) !=0 ) s.append("semi-end-buf "); 89 | if ((anchor & AnchorType.END_LINE) !=0 ) s.append("end-line "); 90 | if ((anchor & AnchorType.ANYCHAR_STAR) !=0 ) s.append("anychar-star "); 91 | if ((anchor & AnchorType.ANYCHAR_STAR_ML) !=0 ) s.append("anychar-star-pl "); 92 | s.append("]"); 93 | 94 | return s.toString(); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/org/joni/OptEnvironment.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni; 21 | 22 | import org.jcodings.Encoding; 23 | 24 | // remove this one in future and pass mmd directly 25 | final class OptEnvironment { 26 | final MinMaxLen mmd = new MinMaxLen(); 27 | Encoding enc; 28 | int options; 29 | int caseFoldFlag; 30 | ScanEnvironment scanEnv; 31 | 32 | void copy(OptEnvironment other) { 33 | mmd.copy(other.mmd); 34 | enc = other.enc; 35 | options = other.options; 36 | caseFoldFlag = other.caseFoldFlag; 37 | scanEnv = other.scanEnv; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/org/joni/OptExactInfo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni; 21 | 22 | import org.jcodings.Encoding; 23 | 24 | final class OptExactInfo { 25 | static final int OPT_EXACT_MAXLEN = 24; 26 | 27 | final MinMaxLen mmd = new MinMaxLen(); 28 | final OptAnchorInfo anchor = new OptAnchorInfo(); 29 | boolean reachEnd; 30 | int ignoreCase; /* -1: unset, 0: case sensitive, 1: ignore case */ 31 | final byte[] bytes = new byte[OPT_EXACT_MAXLEN]; 32 | int length; 33 | 34 | boolean isFull() { 35 | return length >= OPT_EXACT_MAXLEN; 36 | } 37 | 38 | void clear() { 39 | mmd.clear(); 40 | anchor.clear(); 41 | reachEnd = false; 42 | ignoreCase = -1; 43 | length = 0; 44 | } 45 | 46 | void copy(OptExactInfo other) { 47 | mmd.copy(other.mmd); 48 | anchor.copy(other.anchor); 49 | reachEnd = other.reachEnd; 50 | ignoreCase = other.ignoreCase; 51 | length = other.length; 52 | 53 | System.arraycopy(other.bytes, 0, bytes, 0, OPT_EXACT_MAXLEN); 54 | } 55 | 56 | void concat(OptExactInfo other, Encoding enc) { 57 | if (ignoreCase < 0) { 58 | ignoreCase = other.ignoreCase; 59 | } else if (ignoreCase != other.ignoreCase) { 60 | return; 61 | } 62 | 63 | int p = 0; // add->s; 64 | int end = p + other.length; 65 | 66 | int i; 67 | for (i = length; p < end;) { 68 | int len = enc.length(other.bytes, p, end); 69 | if (i + len > OPT_EXACT_MAXLEN) break; 70 | for (int j = 0; j < len && p < end; j++) { 71 | bytes[i++] = other.bytes[p++]; // arraycopy or even don't copy anything ?? 72 | } 73 | } 74 | 75 | length = i; 76 | reachEnd = (p == end && other.reachEnd); 77 | 78 | OptAnchorInfo tmp = new OptAnchorInfo(); 79 | tmp.concat(anchor, other.anchor, 1, 1); 80 | if (!reachEnd) tmp.rightAnchor = 0; 81 | anchor.copy(tmp); 82 | } 83 | 84 | void concatStr(byte[]lbytes, int p, int end, boolean raw, Encoding enc) { 85 | int i; 86 | for (i = length; p < end && i < OPT_EXACT_MAXLEN;) { 87 | int len = enc.length(lbytes, p, end); 88 | if (i + len > OPT_EXACT_MAXLEN) break; 89 | for (int j = 0; j < len && p < end; j++) { 90 | bytes[i++] = lbytes[p++]; 91 | } 92 | } 93 | length = i; 94 | } 95 | 96 | void altMerge(OptExactInfo other, OptEnvironment env) { 97 | if (other.length == 0 || length == 0) { 98 | clear(); 99 | return; 100 | } 101 | 102 | if (!mmd.equal(other.mmd)) { 103 | clear(); 104 | return; 105 | } 106 | 107 | int i; 108 | for (i=0; i= 0) { 127 | ignoreCase |= other.ignoreCase; 128 | } 129 | 130 | anchor.altMerge(other.anchor); 131 | 132 | if (!reachEnd) anchor.rightAnchor = 0; 133 | } 134 | 135 | 136 | void select(OptExactInfo alt, Encoding enc) { 137 | int v1 = length; 138 | int v2 = alt.length; 139 | 140 | if (v2 == 0) { 141 | return; 142 | } else if (v1 == 0) { 143 | copy(alt); 144 | return; 145 | } else if (v1 <= 2 && v2 <= 2) { 146 | /* ByteValTable[x] is big value --> low price */ 147 | v2 = OptMapInfo.positionValue(enc, bytes[0] & 0xff); 148 | v1 = OptMapInfo.positionValue(enc, alt.bytes[0] & 0xff); 149 | 150 | if (length > 1) v1 += 5; 151 | if (alt.length > 1) v2 += 5; 152 | } 153 | 154 | if (ignoreCase <= 0) v1 *= 2; 155 | if (alt.ignoreCase <= 0) v2 *= 2; 156 | 157 | if (mmd.compareDistanceValue(alt.mmd, v1, v2) > 0) copy(alt); 158 | } 159 | 160 | // comp_opt_exact_or_map_info 161 | private static final int COMP_EM_BASE = 20; 162 | int compare(OptMapInfo m) { 163 | if (m.value <= 0) return -1; 164 | 165 | int ve = COMP_EM_BASE * length * (ignoreCase > 0 ? 1 : 2); 166 | int vm = COMP_EM_BASE * 5 * 2 / m.value; 167 | 168 | return mmd.compareDistanceValue(m.mmd, ve, vm); 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /src/org/joni/OptMapInfo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni; 21 | 22 | import org.jcodings.CaseFoldCodeItem; 23 | import org.jcodings.Encoding; 24 | 25 | final class OptMapInfo { 26 | final MinMaxLen mmd = new MinMaxLen(); /* info position */ 27 | final OptAnchorInfo anchor = new OptAnchorInfo(); 28 | int value; /* weighted value */ 29 | final byte[] map = new byte[Config.CHAR_TABLE_SIZE]; 30 | 31 | void clear() { 32 | mmd.clear(); 33 | anchor.clear(); 34 | value = 0; 35 | for (int i=0; i 0) copy(alt); 79 | } 80 | 81 | // alt_merge_opt_map_info 82 | void altMerge(OptMapInfo other, Encoding enc) { 83 | /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */ 84 | if (value == 0) return; 85 | if (other.value == 0 || mmd.max < other.mmd.max) { 86 | clear(); 87 | return; 88 | } 89 | 90 | mmd.altMerge(other.mmd); 91 | 92 | int val = 0; 93 | for (int i=0; i 1) { 117 | return 20; 118 | } else { 119 | return ByteValTable[i]; 120 | } 121 | } else { 122 | return 4; /* Take it easy. */ 123 | } 124 | } 125 | 126 | } 127 | -------------------------------------------------------------------------------- /src/org/joni/Option.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni; 21 | 22 | public final class Option { 23 | /* options */ 24 | public static final int NONE = 0; 25 | public static final int IGNORECASE = (1 << 0); 26 | public static final int EXTEND = (1 << 1); 27 | public static final int MULTILINE = (1 << 2); 28 | public static final int SINGLELINE = (1 << 3); 29 | public static final int FIND_LONGEST = (1 << 4); 30 | public static final int FIND_NOT_EMPTY = (1 << 5); 31 | public static final int NEGATE_SINGLELINE = (1 << 6); 32 | public static final int DONT_CAPTURE_GROUP = (1 << 7); 33 | public static final int CAPTURE_GROUP = (1 << 8); 34 | 35 | /* options (search time) */ 36 | public static final int NOTBOL = (1 << 9); 37 | public static final int NOTEOL = (1 << 10); 38 | public static final int POSIX_REGION = (1 << 11); 39 | 40 | /* options (ctype range) */ 41 | public static final int ASCII_RANGE = (1 << 12); 42 | public static final int POSIX_BRACKET_ALL_RANGE = (1 << 13); 43 | public static final int WORD_BOUND_ALL_RANGE = (1 << 14); 44 | /* options (newline) */ 45 | public static final int NEWLINE_CRLF = (1 << 15); 46 | public static final int NOTBOS = (1 << 16); 47 | public static final int NOTEOS = (1 << 17); 48 | public static final int CR_7_BIT = (1 << 18); 49 | 50 | public static final int MAXBIT = (1 << 19); /* limit */ 51 | 52 | public static final int DEFAULT = NONE; 53 | 54 | public static String toString(int option) { 55 | String options = ""; 56 | if (isIgnoreCase(option)) options += "IGNORECASE"; 57 | if (isExtend(option)) options += "EXTEND"; 58 | if (isMultiline(option)) options += "MULTILINE"; 59 | if (isSingleline(option)) options += "SINGLELINE"; 60 | if (isFindLongest(option)) options += "FIND_LONGEST"; 61 | if (isFindNotEmpty(option)) options += "FIND_NOT_EMPTY"; 62 | if (isNegateSingleline(option)) options += "NEGATE_SINGLELINE"; 63 | if (isDontCaptureGroup(option)) options += "DONT_CAPTURE_GROUP"; 64 | if (isCaptureGroup(option)) options += "CAPTURE_GROUP"; 65 | if (isNotBol(option)) options += "NOTBOL"; 66 | if (isNotEol(option)) options += "NOTEOL"; 67 | if (isPosixRegion(option)) options += "POSIX_REGION"; 68 | if (isCR7Bit(option)) options += "CR_7_BIT"; 69 | return options; 70 | } 71 | 72 | public static boolean isIgnoreCase(int option) { 73 | return (option & IGNORECASE) != 0; 74 | } 75 | 76 | public static boolean isExtend(int option) { 77 | return (option & EXTEND) != 0; 78 | } 79 | 80 | public static boolean isSingleline(int option) { 81 | return (option & SINGLELINE) != 0; 82 | } 83 | 84 | public static boolean isMultiline(int option) { 85 | return (option & MULTILINE) != 0; 86 | } 87 | 88 | public static boolean isFindLongest(int option) { 89 | return (option & FIND_LONGEST) != 0; 90 | } 91 | 92 | public static boolean isFindNotEmpty(int option) { 93 | return (option & FIND_NOT_EMPTY) != 0; 94 | } 95 | 96 | public static boolean isFindCondition(int option) { 97 | return (option & (FIND_LONGEST | FIND_NOT_EMPTY)) != 0; 98 | } 99 | 100 | public static boolean isNegateSingleline(int option) { 101 | return (option & NEGATE_SINGLELINE) != 0; 102 | } 103 | 104 | public static boolean isDontCaptureGroup(int option) { 105 | return (option & DONT_CAPTURE_GROUP) != 0; 106 | } 107 | 108 | public static boolean isCaptureGroup(int option) { 109 | return (option & CAPTURE_GROUP) != 0; 110 | } 111 | 112 | public static boolean isNotBol(int option) { 113 | return (option & NOTBOL) != 0; 114 | } 115 | 116 | public static boolean isNotEol(int option) { 117 | return (option & NOTEOL) != 0; 118 | } 119 | 120 | public static boolean isPosixRegion(int option) { 121 | return (option & POSIX_REGION) != 0; 122 | } 123 | 124 | public static boolean isAsciiRange(int option) { 125 | return (option & ASCII_RANGE) != 0; 126 | } 127 | 128 | public static boolean isPosixBracketAllRange(int option) { 129 | return (option & POSIX_BRACKET_ALL_RANGE) != 0; 130 | } 131 | 132 | public static boolean isWordBoundAllRange(int option) { 133 | return (option & WORD_BOUND_ALL_RANGE) != 0; 134 | } 135 | 136 | public static boolean isNewlineCRLF(int option) { 137 | return (option & NEWLINE_CRLF) != 0; 138 | } 139 | 140 | public static boolean isCR7Bit(int option) { 141 | return (option & CR_7_BIT) != 0; 142 | } 143 | 144 | public static boolean isDynamic(int option) { 145 | // ignore-case and multibyte status are included in compiled code 146 | // return (option & (MULTILINE | IGNORECASE)) != 0; 147 | return false; 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /src/org/joni/Region.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni; 21 | 22 | public abstract class Region { 23 | static final int REGION_NOTPOS = -1; 24 | 25 | protected CaptureTreeNode historyRoot; 26 | 27 | public static Region newRegion(int num) { 28 | if (num == 1) return new SingleRegion(num); 29 | return new MultiRegion(num); 30 | } 31 | 32 | public static Region newRegion(int begin, int end) { 33 | return new SingleRegion(begin, end); 34 | } 35 | 36 | @Override 37 | public abstract Region clone(); 38 | 39 | public abstract int getNumRegs(); 40 | 41 | public abstract int getBeg(int index); 42 | 43 | public abstract int setBeg(int index, int value); 44 | 45 | public abstract int getEnd(int index); 46 | 47 | public abstract int setEnd(int index, int value); 48 | 49 | @Override 50 | public String toString() { 51 | StringBuilder sb = new StringBuilder(); 52 | sb.append("Region: \n"); 53 | for (int i=0; i= Config.MAX_CAPTURE_GROUP_NUM) throw new InternalException(ErrorMessages.TOO_MANY_CAPTURE_GROUPS); 69 | if (numMem++ == 0) { 70 | memNodes = new EncloseNode[Config.SCANENV_MEMNODES_SIZE]; 71 | } else if (numMem >= memNodes.length) { 72 | EncloseNode[]tmp = new EncloseNode[memNodes.length << 1]; 73 | System.arraycopy(memNodes, 0, tmp, 0, memNodes.length); 74 | memNodes = tmp; 75 | } 76 | 77 | return numMem; 78 | } 79 | 80 | void setMemNode(int num, EncloseNode node) { 81 | if (numMem >= num) { 82 | memNodes[num] = node; 83 | } else { 84 | throw new InternalException(ErrorMessages.PARSER_BUG); 85 | } 86 | } 87 | 88 | 89 | void pushPrecReadNotNode(Node node) { 90 | numPrecReadNotNodes++; 91 | if (precReadNotNodes == null) { 92 | precReadNotNodes = new Node[Config.SCANENV_MEMNODES_SIZE]; 93 | } else if (numPrecReadNotNodes >= precReadNotNodes.length) { 94 | Node[]tmp = new Node[precReadNotNodes.length << 1]; 95 | System.arraycopy(precReadNotNodes, 0, tmp, 0, precReadNotNodes.length); 96 | precReadNotNodes = tmp; 97 | } 98 | precReadNotNodes[numPrecReadNotNodes - 1] = node; 99 | } 100 | 101 | void popPrecReadNotNode(Node node) { 102 | if (precReadNotNodes != null && precReadNotNodes[numPrecReadNotNodes - 1] == node) { 103 | precReadNotNodes[numPrecReadNotNodes - 1] = null; 104 | numPrecReadNotNodes--; 105 | } 106 | } 107 | 108 | Node currentPrecReadNotNode() { 109 | if (numPrecReadNotNodes > 0) { 110 | return precReadNotNodes[numPrecReadNotNodes - 1]; 111 | } 112 | return null; 113 | } 114 | 115 | int convertBackslashValue(int c) { 116 | if (syntax.opEscControlChars()) { 117 | switch (c) { 118 | case 'n': return '\n'; 119 | case 't': return '\t'; 120 | case 'r': return '\r'; 121 | case 'f': return '\f'; 122 | case 'a': return '\007'; 123 | case 'b': return '\010'; 124 | case 'e': return '\033'; 125 | case 'v': 126 | if (syntax.op2EscVVtab()) return 11; // '\v' 127 | break; 128 | default: 129 | if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')) unknownEscWarn(String.valueOf((char)c)); 130 | } 131 | } 132 | return c; 133 | } 134 | 135 | void ccEscWarn(String s) { 136 | if (warnings != WarnCallback.NONE) { 137 | if (syntax.warnCCOpNotEscaped() && syntax.backSlashEscapeInCC()) { 138 | warnings.warn("character class has '" + s + "' without escape"); 139 | } 140 | } 141 | } 142 | 143 | void unknownEscWarn(String s) { 144 | if (warnings != WarnCallback.NONE) { 145 | warnings.warn("Unknown escape \\" + s + " is ignored"); 146 | } 147 | } 148 | 149 | void closeBracketWithoutEscapeWarn(String s) { 150 | if (warnings != WarnCallback.NONE) { 151 | if (syntax.warnCCOpNotEscaped()) { 152 | warnings.warn("regular expression has '" + s + "' without escape"); 153 | } 154 | } 155 | } 156 | 157 | void ccDuplicateWarn() { 158 | if (syntax.warnCCDup() && (warningsFlag & SyntaxProperties.WARN_CC_DUP) == 0) { 159 | warningsFlag |= SyntaxProperties.WARN_CC_DUP; 160 | // FIXME: #34 points out problem and what it will take to uncomment this (we were getting erroneous versions of this) 161 | // warnings.warn("character class has duplicated range"); 162 | } 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /src/org/joni/ScannerSupport.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni; 21 | 22 | import org.jcodings.Encoding; 23 | import org.jcodings.IntHolder; 24 | import org.joni.exception.ErrorMessages; 25 | import org.joni.exception.InternalException; 26 | import org.joni.exception.SyntaxException; 27 | import org.joni.exception.ValueException; 28 | 29 | abstract class ScannerSupport extends IntHolder implements ErrorMessages { 30 | protected final Encoding enc; // fast access to encoding 31 | protected final byte[]bytes; // pattern 32 | protected int p; // current scanner position 33 | protected int stop; // pattern end (mutable) 34 | private int lastFetched; // last fetched value for unfetch support 35 | protected int c; // current code point 36 | 37 | private final int begin; // pattern begin position for reset() support 38 | private final int end; // pattern end position for reset() support 39 | protected int _p; // used by mark()/restore() to mark positions 40 | 41 | protected ScannerSupport(Encoding enc, byte[]bytes, int p, int end) { 42 | this.enc = enc; 43 | this.bytes = bytes; 44 | this.begin = p; 45 | this.end = end; 46 | } 47 | 48 | protected final int getBegin() { 49 | return begin; 50 | } 51 | 52 | protected final int getEnd() { 53 | return end; 54 | } 55 | 56 | private static final int INT_SIGN_BIT = 1 << 31; 57 | protected final int scanUnsignedNumber() { 58 | int last = c; 59 | int num = 0; // long ??? 60 | while(left()) { 61 | fetch(); 62 | if (enc.isDigit(c)) { 63 | int onum = num; 64 | num = num * 10 + Encoding.digitVal(c); 65 | if (((onum ^ num) & INT_SIGN_BIT) != 0) return -1; 66 | } else { 67 | unfetch(); 68 | break; 69 | } 70 | } 71 | c = last; 72 | return num; 73 | } 74 | 75 | protected final int scanUnsignedHexadecimalNumber(int minLength, int maxLength) { 76 | int last = c; 77 | int num = 0; 78 | int restLen = maxLength - minLength; 79 | while(left() && maxLength-- != 0) { 80 | fetch(); 81 | if (enc.isXDigit(c)) { 82 | int val = enc.xdigitVal(c); 83 | if ((Integer.MAX_VALUE - val) / 16 < num) return -1; 84 | num = (num << 4) + val; 85 | } else { 86 | unfetch(); 87 | maxLength++; 88 | break; 89 | } 90 | } 91 | if (maxLength > restLen) return -2; 92 | c = last; 93 | return num; 94 | } 95 | 96 | protected final int scanUnsignedOctalNumber(int maxLength) { 97 | int last = c; 98 | int num = 0; 99 | while(left() && maxLength-- != 0) { 100 | fetch(); 101 | if (enc.isDigit(c) && c < '8') { 102 | int onum = num; 103 | int val = Encoding.odigitVal(c); 104 | num = (num << 3) + val; 105 | if (((onum ^ num) & INT_SIGN_BIT) != 0) return -1; 106 | } else { 107 | unfetch(); 108 | break; 109 | } 110 | } 111 | c = last; 112 | return num; 113 | } 114 | 115 | protected final void reset() { 116 | p = begin; 117 | stop = end; 118 | } 119 | 120 | protected final void mark() { 121 | _p = p; 122 | } 123 | 124 | protected final void restore() { 125 | p = _p; 126 | } 127 | 128 | protected final void inc() { 129 | lastFetched = p; 130 | p += enc.length(bytes, p, stop); 131 | } 132 | 133 | protected final void fetch() { 134 | c = enc.mbcToCode(bytes, p, stop); 135 | lastFetched = p; 136 | p += enc.length(bytes, p, stop); 137 | } 138 | 139 | protected int fetchTo() { 140 | int to = enc.mbcToCode(bytes, p, stop); 141 | lastFetched = p; 142 | p += enc.length(bytes, p, stop); 143 | return to; 144 | } 145 | 146 | protected final void unfetch() { 147 | p = lastFetched; 148 | } 149 | 150 | protected final int peek() { 151 | return p < stop ? enc.mbcToCode(bytes, p, stop) : 0; 152 | } 153 | 154 | protected final boolean peekIs(int c) { 155 | return peek() == c; 156 | } 157 | 158 | protected final boolean left() { 159 | return p < stop; 160 | } 161 | 162 | protected void newSyntaxException(String message) { 163 | throw new SyntaxException(message); 164 | } 165 | 166 | protected void newValueException(String message) { 167 | throw new ValueException(message); 168 | } 169 | 170 | protected void newValueException(String message, String str) { 171 | throw new ValueException(message, str); 172 | } 173 | 174 | protected void newValueException(String message, int p, int end) { 175 | throw new ValueException(message, new String(bytes, p, end - p)); 176 | } 177 | 178 | protected void newInternalException(String message) { 179 | throw new InternalException(message); 180 | } 181 | 182 | } 183 | -------------------------------------------------------------------------------- /src/org/joni/SingleRegion.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni; 21 | 22 | public class SingleRegion extends Region { 23 | int beg; 24 | int end; 25 | 26 | public SingleRegion(int num) { 27 | if (num != 1) throw new IndexOutOfBoundsException(""+num); 28 | } 29 | 30 | public SingleRegion(int begin, int end) { 31 | this.beg = begin; 32 | this.end = end; 33 | } 34 | 35 | @Override 36 | public int getNumRegs() { 37 | return 1; 38 | } 39 | 40 | @Override 41 | public SingleRegion clone() { 42 | SingleRegion region = new SingleRegion(beg, end); 43 | if (getCaptureTree() != null) region.setCaptureTree(getCaptureTree().cloneTree()); 44 | return region; 45 | } 46 | 47 | @Override 48 | public int getBeg(int index) { 49 | if (index != 0) throw new IndexOutOfBoundsException(""+index); 50 | return beg; 51 | } 52 | 53 | @Override 54 | public int setBeg(int index, int value) { 55 | if (index != 0) throw new IndexOutOfBoundsException(""+index); 56 | return beg = value; 57 | } 58 | 59 | @Override 60 | public int getEnd(int index) { 61 | if (index != 0) throw new IndexOutOfBoundsException(""+index); 62 | return end; 63 | } 64 | 65 | @Override 66 | public int setEnd(int index, int value) { 67 | if (index != 0) throw new IndexOutOfBoundsException(""+index); 68 | return end = value; 69 | } 70 | 71 | @Override 72 | void clear() { 73 | beg = end = REGION_NOTPOS; 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/org/joni/StackEntry.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni; 21 | 22 | class StackEntry { 23 | int type; 24 | private int E1, E2, E3, E4; 25 | 26 | // first union member 27 | /* byte code position */ 28 | void setStatePCode(int pcode) { 29 | E1 = pcode; 30 | } 31 | int getStatePCode() { 32 | return E1; 33 | } 34 | /* string position */ 35 | void setStatePStr(int pstr) { 36 | E2 = pstr; 37 | } 38 | int getStatePStr() { 39 | return E2; 40 | } 41 | /* previous char position of pstr */ 42 | void setStatePStrPrev(int pstrPrev) { 43 | E3 = pstrPrev; 44 | } 45 | int getStatePStrPrev() { 46 | return E3; 47 | } 48 | 49 | void setPKeep(int pkeep) { 50 | E4 = pkeep; 51 | } 52 | int getPKeep() { 53 | return E4; 54 | } 55 | 56 | // second union member 57 | /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ 58 | void setRepeatCount(int count) { 59 | E1 = count; 60 | } 61 | int getRepeatCount() { 62 | return E1; 63 | } 64 | void decreaseRepeatCount() { 65 | E1--; 66 | } 67 | void increaseRepeatCount() { 68 | E1++; 69 | } 70 | /* byte code position (head of repeated target) */ 71 | void setRepeatPCode(int pcode) { 72 | E2 = pcode; 73 | } 74 | int getRepeatPCode() { 75 | return E2; 76 | } 77 | /* repeat id */ 78 | void setRepeatNum(int num) { 79 | E3 = num; 80 | } 81 | int getRepeatNum() { 82 | return E3; 83 | } 84 | 85 | // third union member 86 | /* index of stack */ /*int repeat_inc struct*/ 87 | void setSi(int si) { 88 | E1 = si; 89 | } 90 | int getSi() { 91 | return E1; 92 | } 93 | 94 | // fourth union member 95 | /* memory num */ 96 | void setMemNum(int num) { 97 | E1 = num; 98 | } 99 | int getMemNum() { 100 | return E1; 101 | } 102 | /* start/end position */ 103 | void setMemPstr(int pstr) { 104 | E2 = pstr; 105 | } 106 | int getMemPStr() { 107 | return E2; 108 | } 109 | 110 | /* Following information is set, if this stack type is MEM-START */ 111 | /* prev. info (for backtrack "(...)*" ) */ 112 | void setMemStart(int start) { 113 | E3 = start; 114 | } 115 | int getMemStart() { 116 | return E3; 117 | } 118 | /* prev. info (for backtrack "(...)*" ) */ 119 | void setMemEnd(int end) { 120 | E4 = end; 121 | } 122 | int getMemEnd() { 123 | return E4; 124 | } 125 | 126 | // fifth union member 127 | /* null check id */ 128 | void setNullCheckNum(int num) { 129 | E1 = num; 130 | } 131 | int getNullCheckNum() { 132 | return E1; 133 | } 134 | /* start position */ 135 | void setNullCheckPStr(int pstr) { 136 | E2 = pstr; 137 | } 138 | int getNullCheckPStr() { 139 | return E2; 140 | } 141 | 142 | // sixth union member 143 | /* byte code position */ 144 | void setCallFrameRetAddr(int addr) { 145 | E1 = addr; 146 | } 147 | int getCallFrameRetAddr() { 148 | return E1; 149 | } 150 | /* null check id */ 151 | void setCallFrameNum(int num) { 152 | E2 = num; 153 | } 154 | int getCallFrameNum() { 155 | return E2; 156 | } 157 | /* string position */ 158 | void setCallFramePStr(int pstr) { 159 | E3 = pstr; 160 | } 161 | int getCallFramePStr() { 162 | return E3; 163 | } 164 | 165 | /* absent position */ 166 | void setAbsentStr(int pos) { 167 | E1 = pos; 168 | } 169 | int getAbsentStr() { 170 | return E1; 171 | } 172 | 173 | void setAbsentEndStr(int pos) { 174 | E2 = pos; 175 | } 176 | int getAbsentEndStr() { 177 | return E2; 178 | } 179 | } 180 | 181 | final class SCStackEntry extends StackEntry { 182 | private int E5; 183 | 184 | void setStateCheck(int check) { 185 | E5 = check; 186 | } 187 | 188 | int getStateCheck() { 189 | return E5; 190 | } 191 | } -------------------------------------------------------------------------------- /src/org/joni/Token.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni; 21 | 22 | import org.joni.constants.internal.TokenType; 23 | 24 | final class Token { 25 | TokenType type; 26 | boolean escaped; 27 | int base; /* is number: 8, 16 (used in [....]) */ 28 | int backP; 29 | 30 | // union fields 31 | private int INT1, INT2, INT3, INT4, INT5; 32 | private int []INTA1; 33 | 34 | // union accessors 35 | int getC() { 36 | return INT1; 37 | } 38 | void setC(int c) { 39 | INT1 = c; 40 | } 41 | 42 | int getCode() { 43 | return INT1; 44 | } 45 | void setCode(int code) { 46 | INT1 = code; 47 | } 48 | 49 | int getAnchorSubtype() { 50 | return INT1; 51 | } 52 | void setAnchorSubtype(int anchor) { 53 | INT1 = anchor; 54 | } 55 | 56 | boolean getAnchorASCIIRange() { 57 | return INT2 == 1; 58 | } 59 | 60 | void setAnchorASCIIRange(boolean ascii) { 61 | INT2 = ascii ? 1 : 0; 62 | } 63 | 64 | // repeat union member 65 | int getRepeatLower() { 66 | return INT1; 67 | } 68 | void setRepeatLower(int lower) { 69 | INT1 = lower; 70 | } 71 | 72 | int getRepeatUpper() { 73 | return INT2; 74 | } 75 | void setRepeatUpper(int upper) { 76 | INT2 = upper; 77 | } 78 | 79 | boolean getRepeatGreedy() { 80 | return INT3 != 0; 81 | } 82 | void setRepeatGreedy(boolean greedy) { 83 | INT3 = greedy ? 1 : 0; 84 | } 85 | 86 | boolean getRepeatPossessive() { 87 | return INT4 != 0; 88 | } 89 | void setRepeatPossessive(boolean possessive) { 90 | INT4 = possessive ? 1 : 0; 91 | } 92 | 93 | // backref union member 94 | int getBackrefNum() { 95 | return INT1; 96 | } 97 | void setBackrefNum(int num) { 98 | INT1 = num; 99 | } 100 | 101 | int getBackrefRef1() { 102 | return INT2; 103 | } 104 | void setBackrefRef1(int ref1) { 105 | INT2 = ref1; 106 | } 107 | 108 | int[]getBackrefRefs() { 109 | return INTA1; 110 | } 111 | void setBackrefRefs(int[]refs) { 112 | INTA1 = refs; 113 | } 114 | 115 | boolean getBackrefByName() { 116 | return INT3 != 0; 117 | } 118 | void setBackrefByName(boolean byName) { 119 | INT3 = byName ? 1 : 0; 120 | } 121 | 122 | // USE_BACKREF_AT_LEVEL 123 | boolean getBackrefExistLevel() { 124 | return INT4 != 0; 125 | } 126 | void setBackrefExistLevel(boolean existLevel) { 127 | INT4 = existLevel ? 1 : 0; 128 | } 129 | 130 | int getBackrefLevel() { 131 | return INT5; 132 | } 133 | void setBackrefLevel(int level) { 134 | INT5 = level; 135 | } 136 | 137 | // call union member 138 | int getCallNameP() { 139 | return INT1; 140 | } 141 | void setCallNameP(int nameP) { 142 | INT1 = nameP; 143 | } 144 | 145 | int getCallNameEnd() { 146 | return INT2; 147 | } 148 | void setCallNameEnd(int nameEnd) { 149 | INT2 = nameEnd; 150 | } 151 | 152 | int getCallGNum() { 153 | return INT3; 154 | } 155 | void setCallGNum(int gnum) { 156 | INT3 = gnum; 157 | } 158 | 159 | boolean getCallRel() { 160 | return INT4 != 0; 161 | } 162 | void setCallRel(boolean rel) { 163 | INT4 = rel ? 1 : 0; 164 | } 165 | 166 | // prop union member 167 | int getPropCType() { 168 | return INT1; 169 | } 170 | void setPropCType(int ctype) { 171 | INT1 = ctype; 172 | } 173 | 174 | boolean getPropNot() { 175 | return INT2 != 0; 176 | } 177 | void setPropNot(boolean not) { 178 | INT2 = not ? 1 : 0; 179 | } 180 | } 181 | -------------------------------------------------------------------------------- /src/org/joni/UnsetAddrList.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni; 21 | 22 | import org.joni.ast.EncloseNode; 23 | import org.joni.exception.ErrorMessages; 24 | import org.joni.exception.InternalException; 25 | 26 | public final class UnsetAddrList { 27 | EncloseNode[]targets; 28 | int[]offsets; 29 | int num; 30 | 31 | public UnsetAddrList(int size) { 32 | targets = new EncloseNode[size]; 33 | offsets = new int[size]; 34 | } 35 | 36 | public void add(int offset, EncloseNode node) { 37 | if (num >= offsets.length) { 38 | EncloseNode []ttmp = new EncloseNode[targets.length << 1]; 39 | System.arraycopy(targets, 0, ttmp, 0, num); 40 | targets = ttmp; 41 | int[]otmp = new int[offsets.length << 1]; 42 | System.arraycopy(offsets, 0, otmp, 0, num); 43 | offsets = otmp; 44 | } 45 | targets[num] = node; 46 | offsets[num] = offset; 47 | num++; 48 | } 49 | 50 | public void fix(Regex regex) { 51 | for (int i=0; i 0) { 62 | for (int i = 0; i < num; i++) { 63 | value.append("offset + ").append(offsets[i]).append(" target: ").append(targets[i].getAddressName()); 64 | } 65 | } 66 | return value.toString(); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/org/joni/WarnCallback.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni; 21 | 22 | /** 23 | * @author Ola Bini 24 | */ 25 | public interface WarnCallback { 26 | WarnCallback DEFAULT = new WarnCallback() { 27 | @Override 28 | public void warn(String message) { 29 | System.err.println(message); 30 | } 31 | }; 32 | 33 | WarnCallback NONE = new WarnCallback() { 34 | @Override 35 | public void warn(String message) { 36 | } 37 | }; 38 | 39 | void warn(String message); 40 | } 41 | -------------------------------------------------------------------------------- /src/org/joni/ast/AnchorNode.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni.ast; 21 | 22 | import org.joni.constants.internal.AnchorType; 23 | 24 | public final class AnchorNode extends Node { 25 | public final int type; 26 | public final boolean asciiRange; 27 | public Node target; 28 | public int charLength; 29 | 30 | public AnchorNode(int type, boolean asciiRange) { 31 | super(ANCHOR); 32 | this.type = type; 33 | charLength = -1; 34 | this.asciiRange = asciiRange; 35 | } 36 | 37 | public AnchorNode(int type) { 38 | this(type, false); 39 | } 40 | 41 | @Override 42 | protected void setChild(Node child) { 43 | target = child; 44 | } 45 | 46 | @Override 47 | protected Node getChild() { 48 | return target; 49 | } 50 | 51 | public void setTarget(Node tgt) { 52 | target = tgt; 53 | tgt.parent = this; 54 | } 55 | 56 | @Override 57 | public String getName() { 58 | return "Anchor"; 59 | } 60 | 61 | @Override 62 | public String toString(int level) { 63 | StringBuilder value = new StringBuilder(); 64 | value.append("\n type: " + typeToString()); 65 | value.append(", ascii: " + asciiRange); 66 | value.append("\n target: " + pad(target, level + 1)); 67 | return value.toString(); 68 | } 69 | 70 | public String typeToString() { 71 | StringBuilder type = new StringBuilder(); 72 | if (isType(AnchorType.BEGIN_BUF)) type.append("BEGIN_BUF "); 73 | if (isType(AnchorType.BEGIN_LINE)) type.append("BEGIN_LINE "); 74 | if (isType(AnchorType.BEGIN_POSITION)) type.append("BEGIN_POSITION "); 75 | if (isType(AnchorType.END_BUF)) type.append("END_BUF "); 76 | if (isType(AnchorType.SEMI_END_BUF)) type.append("SEMI_END_BUF "); 77 | if (isType(AnchorType.END_LINE)) type.append("END_LINE "); 78 | if (isType(AnchorType.WORD_BOUND)) type.append("WORD_BOUND "); 79 | if (isType(AnchorType.NOT_WORD_BOUND)) type.append("NOT_WORD_BOUND "); 80 | if (isType(AnchorType.WORD_BEGIN)) type.append("WORD_BEGIN "); 81 | if (isType(AnchorType.WORD_END)) type.append("WORD_END "); 82 | if (isType(AnchorType.PREC_READ)) type.append("PREC_READ "); 83 | if (isType(AnchorType.PREC_READ_NOT)) type.append("PREC_READ_NOT "); 84 | if (isType(AnchorType.LOOK_BEHIND)) type.append("LOOK_BEHIND "); 85 | if (isType(AnchorType.LOOK_BEHIND_NOT)) type.append("LOOK_BEHIND_NOT "); 86 | if (isType(AnchorType.ANYCHAR_STAR)) type.append("ANYCHAR_STAR "); 87 | if (isType(AnchorType.ANYCHAR_STAR_ML)) type.append("ANYCHAR_STAR_ML "); 88 | return type.toString(); 89 | } 90 | 91 | private boolean isType(int type) { 92 | return (this.type & type) != 0; 93 | } 94 | 95 | } 96 | -------------------------------------------------------------------------------- /src/org/joni/ast/AnyCharNode.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni.ast; 21 | 22 | public final class AnyCharNode extends Node { 23 | public AnyCharNode(){ 24 | super(CANY); 25 | } 26 | 27 | @Override 28 | public String getName() { 29 | return "Any Char"; 30 | } 31 | 32 | @Override 33 | public String toString(int level) { 34 | String value = ""; 35 | return value; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/org/joni/ast/BackRefNode.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni.ast; 21 | 22 | import org.joni.Config; 23 | import org.joni.ScanEnvironment; 24 | import org.joni.exception.ErrorMessages; 25 | import org.joni.exception.ValueException; 26 | 27 | public final class BackRefNode extends StateNode { 28 | public final int[] back; 29 | public int backNum; 30 | public int nestLevel; 31 | 32 | private BackRefNode(int backNum, int[]backRefs, boolean byName, ScanEnvironment env) { 33 | super(BREF); 34 | this.backNum = backNum; 35 | if (byName) setNameRef(); 36 | 37 | for (int i=0; i 0) { 64 | back[pos] = n; 65 | pos++; 66 | } 67 | } 68 | backNum = pos; 69 | } 70 | 71 | @Override 72 | public String getName() { 73 | return "Back Ref"; 74 | } 75 | 76 | @Override 77 | public String toString(int level) { 78 | StringBuilder sb = new StringBuilder(super.toString(level)); 79 | sb.append("\n backNum: " + backNum); 80 | String backs = ""; 81 | for (int i=0; i"); 66 | return s + toString(0); 67 | } 68 | 69 | protected static String pad(Object value, int level) { 70 | if (value == null) return "NULL"; 71 | 72 | StringBuilder pad = new StringBuilder(" "); 73 | for (int i=0; i= bytes.length) { 65 | byte[]tmp = new byte[len + NODE_STR_MARGIN]; 66 | System.arraycopy(bytes, p, tmp, 0, end - p); 67 | bytes = tmp; 68 | } 69 | } 70 | 71 | /* COW and/or ensure there is ahead bytes available in node's buffer 72 | */ 73 | private void modifyEnsure(int ahead) { 74 | if (isShared()) { 75 | int len = (end - p) + ahead; 76 | byte[]tmp = new byte[len + NODE_STR_MARGIN]; 77 | System.arraycopy(bytes, p, tmp, 0, end - p); 78 | bytes = tmp; 79 | end = end - p; 80 | p = 0; 81 | clearShared(); 82 | } else { 83 | ensure(ahead); 84 | } 85 | } 86 | 87 | @Override 88 | public String getName() { 89 | return "String"; 90 | } 91 | 92 | public int length() { 93 | return end - p; 94 | } 95 | 96 | public int length(Encoding enc) { 97 | return enc.strLength(bytes, p, end); 98 | } 99 | 100 | public StringNode splitLastChar(Encoding enc) { 101 | StringNode n = null; 102 | if (end > p) { 103 | int prev = enc.prevCharHead(bytes, p, end, end); 104 | if (prev != -1 && prev > p) { /* can be split */ 105 | n = new StringNode(bytes, prev, end); 106 | if (isRaw()) n.setRaw(); 107 | end = prev; 108 | } 109 | } 110 | return n; 111 | } 112 | 113 | public boolean canBeSplit(Encoding enc) { 114 | if (end > p) { 115 | return enc.length(bytes, p, end) < (end - p); 116 | } 117 | return false; 118 | } 119 | 120 | public void set(byte[]bytes, int p, int end) { 121 | this.bytes = bytes; 122 | this.p = p; 123 | this.end = end; 124 | setShared(); 125 | } 126 | 127 | public void catBytes(byte[]cat, int catP, int catEnd) { 128 | int len = catEnd - catP; 129 | modifyEnsure(len); 130 | System.arraycopy(cat, catP, bytes, end, len); 131 | end += len; 132 | } 133 | 134 | public void catByte(byte c) { 135 | modifyEnsure(1); 136 | bytes[end++] = c; 137 | } 138 | 139 | public void catCode(int code, Encoding enc) { 140 | modifyEnsure(Config.ENC_CODE_TO_MBC_MAXLEN); 141 | end += enc.codeToMbc(code, bytes, end); 142 | } 143 | 144 | public void setRaw() { 145 | flag |= NSTR_RAW; 146 | } 147 | 148 | public void clearRaw() { 149 | flag &= ~NSTR_RAW; 150 | } 151 | 152 | public boolean isRaw() { 153 | return (flag & NSTR_RAW) != 0; 154 | } 155 | 156 | public void setAmbig() { 157 | flag |= NSTR_AMBIG; 158 | } 159 | 160 | public void clearAmbig() { 161 | flag &= ~NSTR_AMBIG; 162 | } 163 | 164 | public boolean isAmbig() { 165 | return (flag & NSTR_AMBIG) != 0; 166 | } 167 | 168 | public void setDontGetOptInfo() { 169 | flag |= NSTR_DONT_GET_OPT_INFO; 170 | } 171 | 172 | public void clearDontGetOptInfo() { 173 | flag &= ~NSTR_DONT_GET_OPT_INFO; 174 | } 175 | 176 | public boolean isDontGetOptInfo() { 177 | return (flag & NSTR_DONT_GET_OPT_INFO) != 0; 178 | } 179 | 180 | public void setShared() { 181 | flag |= NSTR_SHARED; 182 | } 183 | 184 | public void clearShared() { 185 | flag &= ~NSTR_SHARED; 186 | } 187 | 188 | public boolean isShared() { 189 | return (flag & NSTR_SHARED) != 0; 190 | } 191 | 192 | public String flagsToString() { 193 | StringBuilder flags = new StringBuilder(); 194 | if (isRaw()) flags.append("RAW "); 195 | if (isAmbig()) flags.append("AMBIG "); 196 | if (isDontGetOptInfo()) flags.append("DONT_GET_OPT_INFO "); 197 | if (isShared()) flags.append("SHARED "); 198 | return flags.toString(); 199 | } 200 | 201 | @Override 202 | public String toString(int level) { 203 | StringBuilder sb = new StringBuilder(); 204 | sb.append("\n flags: " + flagsToString()); 205 | sb.append("\n bytes: '"); 206 | for (int i=p; i= 0x20 && (bytes[i] & 0xff) < 0x7f) { 208 | sb.append((char)bytes[i]); 209 | } else { 210 | sb.append(String.format("[0x%02x]", bytes[i])); 211 | } 212 | } 213 | sb.append("'"); 214 | return sb.toString(); 215 | } 216 | } 217 | -------------------------------------------------------------------------------- /src/org/joni/bench/AbstractBench.java: -------------------------------------------------------------------------------- 1 | package org.joni.bench; 2 | 3 | import org.jcodings.specific.ASCIIEncoding; 4 | import org.joni.Option; 5 | import org.joni.Regex; 6 | import org.joni.Syntax; 7 | 8 | public abstract class AbstractBench { 9 | protected void bench(String _reg, String _str, int warmup, int times) throws Exception { 10 | byte[] reg = _reg.getBytes(); 11 | byte[] str = _str.getBytes(); 12 | 13 | Regex p = new Regex(reg,0,reg.length,Option.DEFAULT,ASCIIEncoding.INSTANCE,Syntax.DEFAULT); 14 | 15 | System.err.println("::: /" + _reg + "/ =~ \"" + _str + "\", " + warmup + " * " + times + " times"); 16 | 17 | for(int j=0;j...)" */ 42 | int REPEAT = 0x0700; 43 | int CALL_FRAME = 0x0800; 44 | int RETURN = 0x0900; 45 | int VOID = 0x0a00; /* for fill a blank */ 46 | int ABSENT_POS = 0x0b00; /* for absent */ 47 | int ABSENT = 0x0c00; /* absent inner loop marker */ 48 | 49 | /* stack type check mask */ 50 | int MASK_POP_USED = 0x00ff; 51 | int MASK_TO_VOID_TARGET = 0x10ff; 52 | int MASK_MEM_END_OR_MARK = 0x8000; /* MEM_END or MEM_END_MARK */ 53 | } 54 | -------------------------------------------------------------------------------- /src/org/joni/constants/internal/StringType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni.constants.internal; 21 | 22 | public interface StringType { 23 | int NSTR_RAW = 1<<0; 24 | int NSTR_AMBIG = 1<<1; 25 | int NSTR_DONT_GET_OPT_INFO = 1<<2; 26 | int NSTR_SHARED = 1<<3; 27 | } 28 | -------------------------------------------------------------------------------- /src/org/joni/constants/internal/TargetInfo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni.constants.internal; 21 | 22 | public interface TargetInfo { 23 | int ISNOT_EMPTY = 0; 24 | int IS_EMPTY = 1; 25 | int IS_EMPTY_MEM = 2; 26 | int IS_EMPTY_REC = 3; 27 | } 28 | -------------------------------------------------------------------------------- /src/org/joni/constants/internal/TokenType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni.constants.internal; 21 | 22 | public enum TokenType { 23 | EOT, /* end of token */ 24 | RAW_BYTE, 25 | CHAR, 26 | STRING, 27 | CODE_POINT, 28 | ANYCHAR, 29 | CHAR_TYPE, 30 | BACKREF, 31 | CALL, 32 | ANCHOR, 33 | OP_REPEAT, 34 | INTERVAL, 35 | ANYCHAR_ANYTIME, /* SQL '%' == .* */ 36 | ALT, 37 | SUBEXP_OPEN, 38 | SUBEXP_CLOSE, 39 | CC_OPEN, 40 | QUOTE_OPEN, 41 | CHAR_PROPERTY, /* \p{...}, \P{...} */ 42 | LINEBREAK, 43 | EXTENDED_GRAPHEME_CLUSTER, 44 | KEEP, 45 | /* in cc */ 46 | CC_CLOSE, 47 | CC_RANGE, 48 | POSIX_BRACKET_OPEN, 49 | CC_AND, /* && */ 50 | CC_CC_OPEN /* [ */ 51 | } 52 | -------------------------------------------------------------------------------- /src/org/joni/constants/internal/Traverse.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni.constants.internal; 21 | 22 | public interface Traverse { 23 | int TRAVERSE_CALLBACK_AT_FIRST = 1; 24 | int TRAVERSE_CALLBACK_AT_LAST = 2; 25 | int TRAVERSE_CALLBACK_AT_BOTH = TRAVERSE_CALLBACK_AT_FIRST | TRAVERSE_CALLBACK_AT_LAST; 26 | } 27 | -------------------------------------------------------------------------------- /src/org/joni/exception/ErrorMessages.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni.exception; 21 | 22 | import org.joni.Config; 23 | 24 | public interface ErrorMessages extends org.jcodings.exception.ErrorMessages { 25 | /* internal error */ 26 | String PARSER_BUG = "internal parser error (bug)"; 27 | String UNDEFINED_BYTECODE = "undefined bytecode (bug)"; 28 | String UNEXPECTED_BYTECODE = "unexpected bytecode (bug)"; 29 | String TOO_MANY_CAPTURE_GROUPS = "too many capture groups are specified"; 30 | 31 | /* general error */ 32 | String INVALID_ARGUMENT = "invalid argument"; 33 | 34 | /* syntax error */ 35 | String REGEX_TOO_LONG = "regex length too long"; 36 | String END_PATTERN_AT_LEFT_BRACE = "end pattern at left brace"; 37 | String END_PATTERN_AT_LEFT_BRACKET = "end pattern at left bracket"; 38 | String EMPTY_CHAR_CLASS = "empty char-class"; 39 | String PREMATURE_END_OF_CHAR_CLASS = "premature end of char-class"; 40 | String END_PATTERN_AT_ESCAPE = "end pattern at escape"; 41 | String END_PATTERN_AT_META = "end pattern at meta"; 42 | String END_PATTERN_AT_CONTROL = "end pattern at control"; 43 | String META_CODE_SYNTAX = "invalid meta-code syntax"; 44 | String CONTROL_CODE_SYNTAX = "invalid control-code syntax"; 45 | String CHAR_CLASS_VALUE_AT_END_OF_RANGE = "char-class value at end of range"; 46 | String CHAR_CLASS_VALUE_AT_START_OF_RANGE = "char-class value at start of range"; 47 | String UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS = "unmatched range specifier in char-class"; 48 | String TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED = "target of repeat operator is not specified"; 49 | String TARGET_OF_REPEAT_OPERATOR_INVALID = "target of repeat operator is invalid"; 50 | String NESTED_REPEAT_NOT_ALLOWED = "nested repeat is not allowed"; 51 | String NESTED_REPEAT_OPERATOR = "nested repeat operator"; 52 | String UNMATCHED_CLOSE_PARENTHESIS = "unmatched close parenthesis"; 53 | String END_PATTERN_WITH_UNMATCHED_PARENTHESIS = "end pattern with unmatched parenthesis"; 54 | String END_PATTERN_IN_GROUP = "end pattern in group"; 55 | String UNDEFINED_GROUP_OPTION = "undefined group option"; 56 | String INVALID_POSIX_BRACKET_TYPE = "invalid POSIX bracket type"; 57 | String INVALID_LOOK_BEHIND_PATTERN = "invalid pattern in look-behind"; 58 | String INVALID_REPEAT_RANGE_PATTERN = "invalid repeat range {lower,upper}"; 59 | String INVALID_CONDITION_PATTERN = "invalid conditional pattern"; 60 | 61 | /* values error (syntax error) */ 62 | String TOO_BIG_NUMBER = "too big number"; 63 | String TOO_BIG_NUMBER_FOR_REPEAT_RANGE = "too big number for repeat range"; 64 | String UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE = "upper is smaller than lower in repeat range"; 65 | String EMPTY_RANGE_IN_CHAR_CLASS = "empty range in char class"; 66 | String MISMATCH_CODE_LENGTH_IN_CLASS_RANGE = "mismatch multibyte code length in char-class range"; 67 | String TOO_MANY_MULTI_BYTE_RANGES = "too many multibyte code ranges are specified"; 68 | String TOO_SHORT_MULTI_BYTE_STRING = "too short multibyte code string"; 69 | String TOO_BIG_BACKREF_NUMBER = "too big backref number"; 70 | String INVALID_BACKREF = Config.USE_NAMED_GROUP ? "invalid backref number/name" : "invalid backref number"; 71 | String NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED = "numbered backref/call is not allowed. (use name)"; 72 | String TOO_SHORT_DIGITS = "too short digits"; 73 | String INVALID_WIDE_CHAR_VALUE = "invalid wide-char value"; 74 | String EMPTY_GROUP_NAME = "group name is empty"; 75 | String INVALID_GROUP_NAME = "invalid group name <%n>"; 76 | String INVALID_CHAR_IN_GROUP_NAME = Config.USE_NAMED_GROUP ? "invalid char in group name <%n>" : "invalid char in group number <%n>"; 77 | String UNDEFINED_NAME_REFERENCE = "undefined name <%n> reference"; 78 | String UNDEFINED_GROUP_REFERENCE = "undefined group <%n> reference"; 79 | String MULTIPLEX_DEFINED_NAME = "multiplex defined name <%n>"; 80 | String MULTIPLEX_DEFINITION_NAME_CALL = "multiplex definition name <%n> call"; 81 | String PROPERTY_NAME_NEVER_TERMINATED = "property name never terminated \\p{%n"; 82 | String NEVER_ENDING_RECURSION = "never ending recursion"; 83 | String GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY = "group number is too big for capture history"; 84 | String NOT_SUPPORTED_ENCODING_COMBINATION = "not supported encoding combination"; 85 | String INVALID_COMBINATION_OF_OPTIONS = "invalid combination of options"; 86 | String OVER_THREAD_PASS_LIMIT_COUNT = "over thread pass limit count"; 87 | String TOO_BIG_SB_CHAR_VALUE = "too big singlebyte char value"; 88 | 89 | } 90 | -------------------------------------------------------------------------------- /src/org/joni/exception/InternalException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni.exception; 21 | 22 | public class InternalException extends JOniException{ 23 | private static final long serialVersionUID = -3871816465397927992L; 24 | 25 | public InternalException(String message) { 26 | super(message); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/org/joni/exception/JOniException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni.exception; 21 | 22 | public class JOniException extends RuntimeException{ 23 | private static final long serialVersionUID = -6027192180014164667L; 24 | 25 | public JOniException(String message) { 26 | super(message); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/org/joni/exception/SyntaxException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni.exception; 21 | 22 | public class SyntaxException extends JOniException{ 23 | private static final long serialVersionUID = 7862720128961874288L; 24 | 25 | public SyntaxException(String message) { 26 | super(message); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/org/joni/exception/TimeoutException.java: -------------------------------------------------------------------------------- 1 | package org.joni.exception; 2 | 3 | public class TimeoutException extends InterruptedException { 4 | private static final long serialVersionUID = 1L; 5 | 6 | public TimeoutException() { 7 | super(); 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /src/org/joni/exception/ValueException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni.exception; 21 | 22 | public class ValueException extends SyntaxException{ 23 | private static final long serialVersionUID = -196013852479929134L; 24 | 25 | public ValueException(String message) { 26 | super(message); 27 | } 28 | 29 | public ValueException(String message, String str) { 30 | super(message.replaceAll("%n", str)); 31 | } 32 | 33 | public ValueException(String message, byte[]bytes, int p, int end) { 34 | this(message, new String(bytes, p, end - p)); 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /test/org/joni/test/TestCaseInsensitive.java: -------------------------------------------------------------------------------- 1 | package org.joni.test; 2 | 3 | import org.jcodings.Encoding; 4 | import org.jcodings.specific.UTF8Encoding; 5 | import org.joni.Option; 6 | import org.joni.Syntax; 7 | 8 | public class TestCaseInsensitive extends Test { 9 | 10 | @Override 11 | public int option() { 12 | return Option.IGNORECASE; 13 | } 14 | @Override 15 | public Encoding encoding() { 16 | return UTF8Encoding.INSTANCE; 17 | } 18 | @Override 19 | public String testEncoding() { 20 | return "utf-8"; 21 | } 22 | @Override 23 | public Syntax syntax() { 24 | return Syntax.TEST; 25 | } 26 | 27 | @Override 28 | public void test() throws Exception { 29 | xx("^\\d\\d\\d-".getBytes(), new byte[]{-30, -126, -84, 48, 45}, 0, 0, 0, true); 30 | x2s("ab", "\uD835\uDC4D ab", 5, 7); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /test/org/joni/test/TestCornerCases.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni.test; 21 | 22 | import org.jcodings.Encoding; 23 | import org.jcodings.specific.ASCIIEncoding; 24 | import org.joni.Config; 25 | import org.joni.Option; 26 | import org.joni.Regex; 27 | import org.joni.Syntax; 28 | 29 | public class TestCornerCases extends Test { 30 | @Override 31 | public int option() { 32 | return Option.DEFAULT; 33 | } 34 | @Override 35 | public Encoding encoding() { 36 | return ASCIIEncoding.INSTANCE; 37 | } 38 | @Override 39 | public String testEncoding() { 40 | return "cp1250"; 41 | } 42 | @Override 43 | public Syntax syntax() { 44 | return Syntax.TEST; 45 | } 46 | 47 | @Override 48 | public void test() throws Exception { 49 | byte[] reg = "l.".getBytes(); 50 | byte[] str = "hello,lo".getBytes(); 51 | 52 | Regex p = new Regex(reg,0,reg.length,Option.DEFAULT,ASCIIEncoding.INSTANCE,Syntax.DEFAULT); 53 | int result = p.matcher(str, 0, str.length).search(3, 0, Option.NONE); 54 | if(result != 3) { 55 | Config.log.println("FAIL: /l./ 'hello,lo' - with reverse, 3,0"); 56 | nfail++; 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /test/org/joni/test/TestCrnl.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni.test; 21 | 22 | import org.joni.Config; 23 | import org.joni.Option; 24 | import org.joni.Syntax; 25 | import org.junit.Ignore; 26 | import org.jcodings.Encoding; 27 | import org.jcodings.specific.ASCIIEncoding; 28 | 29 | @Ignore 30 | public class TestCrnl extends Test { 31 | @Override 32 | public int option() { 33 | return Option.DEFAULT; 34 | } 35 | @Override 36 | public Encoding encoding() { 37 | return ASCIIEncoding.INSTANCE; 38 | } 39 | @Override 40 | public String testEncoding() { 41 | return "ascii"; 42 | } 43 | @Override 44 | public Syntax syntax() { 45 | return Syntax.TEST; 46 | } 47 | @Override 48 | public void test() throws Exception { 49 | x2s("", "\r\n", 0, 0); 50 | x2s(".", "\r\n", 0, 1); 51 | ns("..", "\r\n"); 52 | x2s("^", "\r\n", 0, 0); 53 | x2s("\\n^", "\r\nf", 1, 2); 54 | x2s("\\n^a", "\r\na", 1, 3); 55 | x2s("$", "\r\n", 0, 0); 56 | x2s("T$", "T\r\n", 0, 1); 57 | x2s("T$", "T\raT\r\n", 3, 4); 58 | x2s("\\z", "\r\n", 2, 2); 59 | ns("a\\z", "a\r\n"); 60 | x2s("\\Z", "\r\n", 0, 0); 61 | x2s("\\Z", "\r\na", 3, 3); 62 | x2s("\\Z", "\r\n\r\n\n", 4, 4); 63 | x2s("\\Z", "\r\n\r\nX", 5, 5); 64 | x2s("a\\Z", "a\r\n", 0, 1); 65 | x2s("aaaaaaaaaaaaaaa\\Z", "aaaaaaaaaaaaaaa\r\n", 0, 15); 66 | x2s("a|$", "b\r\n", 1, 1); 67 | x2s("$|b", "\rb", 1, 2); 68 | x2s("a$|ab$", "\r\nab\r\n", 2, 4); 69 | 70 | x2s("a|\\Z", "b\r\n", 1, 1); 71 | x2s("\\Z|b", "\rb", 1, 2); 72 | x2s("a\\Z|ab\\Z", "\r\nab\r\n", 2, 4); 73 | x2s("(?=a$).", "a\r\n", 0, 1); 74 | ns("(?=a$).", "a\r"); 75 | x2s("(?!a$)..", "a\r", 0, 2); 76 | x2s("(?<=a$).\\n", "a\r\n", 1, 3); 77 | ns("(? 0 || nerror > 0) Config.err.println("make sure to enable USE_CRNL_AS_LINE_TERMINATOR"); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /test/org/joni/test/TestError.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni.test; 21 | 22 | import org.jcodings.Encoding; 23 | import org.jcodings.specific.UTF8Encoding; 24 | import org.joni.Option; 25 | import org.joni.Syntax; 26 | import org.joni.WarnCallback; 27 | import org.joni.exception.ErrorMessages; 28 | 29 | public class TestError extends Test { 30 | @Override 31 | public int option() { 32 | return Option.DEFAULT; 33 | } 34 | 35 | @Override 36 | public Encoding encoding() { 37 | return UTF8Encoding.INSTANCE; 38 | } 39 | 40 | @Override 41 | public String testEncoding() { 42 | return "iso-8859-2"; 43 | } 44 | 45 | @Override 46 | public Syntax syntax() { 47 | return Syntax.TEST; 48 | } 49 | 50 | @Override 51 | public void test() throws Exception { 52 | xerrs("(", ErrorMessages.END_PATTERN_WITH_UNMATCHED_PARENTHESIS); 53 | xerrs("[[:WoRd:]]", ErrorMessages.INVALID_POSIX_BRACKET_TYPE); 54 | xerrs("(0?0|(?(1)||)|(?(1)||))?", ErrorMessages.INVALID_CONDITION_PATTERN); 55 | xerrs("[\\40000000000", ErrorMessages.TOO_BIG_NUMBER); 56 | xerrs("[\\40000000000\n", ErrorMessages.TOO_BIG_NUMBER); 57 | xerrs("[]", ErrorMessages.EMPTY_CHAR_CLASS); 58 | xerrs("[c-a]", ErrorMessages.EMPTY_RANGE_IN_CHAR_CLASS); 59 | xerrs("\\x{FFFFFFFF}", ErrorMessages.ERR_TOO_BIG_WIDE_CHAR_VALUE); 60 | xerrs("\\x{100000000}", ErrorMessages.ERR_TOO_LONG_WIDE_CHAR_VALUE); 61 | xerrs("\\u026x", ErrorMessages.TOO_SHORT_DIGITS); 62 | xerrs("()(?\\!(?'a')\\1)", ErrorMessages.UNDEFINED_GROUP_OPTION); 63 | xerrs("\\((", ErrorMessages.END_PATTERN_WITH_UNMATCHED_PARENTHESIS); 64 | xerrs("(|", ErrorMessages.END_PATTERN_WITH_UNMATCHED_PARENTHESIS); 65 | xerrs("'/g\\\u00ff\u00ff\u00ff\u00ff&))", ErrorMessages.UNMATCHED_CLOSE_PARENTHESIS); 66 | xerrs("0'/g\\\u00ff\u00ff\u00ff\u00ff&))", 1, 12, ErrorMessages.UNMATCHED_CLOSE_PARENTHESIS, WarnCallback.DEFAULT); 67 | xerrs("0\\1**?", 1, 6, ErrorMessages.INVALID_BACKREF, WarnCallback.DEFAULT); 68 | xerrs("[0-0-\u00ff ", ErrorMessages.PREMATURE_END_OF_CHAR_CLASS); // \xe2 69 | xerrs("\\p{foobarbaz}", ErrorMessages.ERR_INVALID_CHAR_PROPERTY_NAME.replace("%n", "foobarbaz")); 70 | //xerrs("\\p{あ}", ErrorMessages.ERR_INVALID_CHAR_PROPERTY_NAME.replace("%n", "あ")); 71 | 72 | xerrs("a{100001}", ErrorMessages.TOO_BIG_NUMBER_FOR_REPEAT_RANGE); 73 | xerrs("a{0,100001}", ErrorMessages.TOO_BIG_NUMBER_FOR_REPEAT_RANGE); 74 | xerrs("a{5,1}", ErrorMessages.UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE); 75 | xerrs("[\\6000", ErrorMessages.TOO_BIG_NUMBER); // CVE-2017-9226 76 | xerrs("[\\H- ]", ErrorMessages.UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS); // CVE-2017-9228 77 | xerrs("[a-\\d]", ErrorMessages.CHAR_CLASS_VALUE_AT_END_OF_RANGE); 78 | 79 | xerrs("(?:ab|cd)*\\1", ErrorMessages.INVALID_BACKREF); 80 | xerrs("(ab|cd)*\\1", ErrorMessages.INVALID_BACKREF, Option.DONT_CAPTURE_GROUP); 81 | 82 | xerrs("(.(?=\\g<1>))", ErrorMessages.NEVER_ENDING_RECURSION); 83 | xerrs("(a)(?b)\\g<1>\\g", ErrorMessages.NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED); 84 | 85 | // xerrs("(?<", ErrorMessages.ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS); 86 | xerrs("(?<>)", ErrorMessages.EMPTY_GROUP_NAME); 87 | //xerrs("(?<.>)", ErrorMessages.ERR_INVALID_CHAR_IN_GROUP_NAME); 88 | xerrs("\\g<1->", ErrorMessages.INVALID_CHAR_IN_GROUP_NAME.replace("%n", "1->")); 89 | xerrs("\\k<1/>", ErrorMessages.INVALID_GROUP_NAME.replace("%n", "1/")); 90 | // xerrs("\\k<1-1/>", ErrorMessages.ERR_INVALID_GROUP_NAME.replace("%n", "1-1/>")); 91 | // xerrs("\\k", ErrorMessages.ERR_INVALID_CHAR_IN_GROUP_NAME.replace("%n", "a/")); 92 | // xerrs("\\g<1>", ErrorMessages.UNDEFINED_GROUP_REFERENCE); 93 | 94 | xerrs("*", ErrorMessages.TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); 95 | xerrs("{1}", ErrorMessages.TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); 96 | 97 | xerrs("(?a)(?b)\\g", ErrorMessages.MULTIPLEX_DEFINITION_NAME_CALL.replace("%n", "a")); 98 | 99 | xerrs("(a)?(?b)?(?(1)a)(?()b)", ErrorMessages.NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED); 100 | 101 | xerrs("()(?(2))", ErrorMessages.INVALID_BACKREF); 102 | xerrs("(?(700000))", ErrorMessages.INVALID_BACKREF); 103 | 104 | xerrs("(? " + acceptableMaximumTime); 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /test/org/joni/test/TestNSU8.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni.test; 21 | 22 | import org.joni.Option; 23 | import org.joni.Syntax; 24 | import org.jcodings.Encoding; 25 | import org.jcodings.specific.NonStrictUTF8Encoding; 26 | 27 | public class TestNSU8 extends Test { 28 | @Override 29 | public int option() { 30 | return Option.DEFAULT; 31 | } 32 | @Override 33 | public Encoding encoding() { 34 | return NonStrictUTF8Encoding.INSTANCE; 35 | } 36 | @Override 37 | public String testEncoding() { 38 | return "utf-8"; 39 | } 40 | @Override 41 | public Syntax syntax() { 42 | return Syntax.TEST; 43 | } 44 | @Override 45 | public void test() throws Exception { 46 | xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)240, (byte)32, (byte)32, (byte)32, (byte)32}, 0, 5, 1, false); 47 | xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)240, (byte)32, (byte)32, (byte)32}, 0, 4, 1, false); 48 | xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)240, (byte)32, (byte)32}, 0, 3, 1, false); 49 | xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)240, (byte)32}, 0, 2, 1, false); 50 | xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)240}, 0, 1, 1, false); 51 | 52 | xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)224, (byte)32, (byte)32, (byte)32}, 0, 4, 1, false); 53 | xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)224, (byte)32, (byte)32}, 0, 3, 1, false); 54 | xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)224, (byte)32}, 0, 2, 1, false); 55 | xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)224}, 0, 1, 1, false); 56 | 57 | xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)192, (byte)32, (byte)32}, 0, 3, 1, false); 58 | xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)192, (byte)32}, 0, 2, 1, false); 59 | xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)192}, 0, 1, 1, false); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /test/org/joni/test/TestPerl.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni.test; 21 | 22 | import org.joni.Option; 23 | import org.joni.Syntax; 24 | import org.jcodings.Encoding; 25 | import org.jcodings.specific.ASCIIEncoding; 26 | 27 | public class TestPerl extends Test { 28 | @Override 29 | public int option() { 30 | return Option.DEFAULT; 31 | } 32 | @Override 33 | public Encoding encoding() { 34 | return ASCIIEncoding.INSTANCE; 35 | } 36 | @Override 37 | public String testEncoding() { 38 | return "iso-8859-2"; 39 | } 40 | @Override 41 | public Syntax syntax() { 42 | return Syntax.PerlNG; 43 | } 44 | @Override 45 | public void test() throws Exception { 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /test/org/joni/test/TestUtf8CaseFoldingLatin1Supplement.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 | * this software and associated documentation files (the "Software"), to deal in 4 | * the Software without restriction, including without limitation the rights to 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 | * of the Software, and to permit persons to whom the Software is furnished to do 7 | * so, subject to the following conditions: 8 | * 9 | * The above copyright notice and this permission notice shall be included in all 10 | * copies or substantial portions of the Software. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | * SOFTWARE. 19 | */ 20 | package org.joni.test; 21 | 22 | import org.jcodings.Encoding; 23 | import org.jcodings.specific.UTF8Encoding; 24 | import org.joni.Option; 25 | import org.joni.Syntax; 26 | 27 | public class TestUtf8CaseFoldingLatin1Supplement extends Test { 28 | 29 | @Override 30 | public int option() { 31 | return Option.DEFAULT; 32 | } 33 | 34 | @Override 35 | public Encoding encoding() { 36 | return UTF8Encoding.INSTANCE; 37 | } 38 | 39 | @Override 40 | public String testEncoding() { 41 | return "utf-8"; 42 | } 43 | 44 | @Override 45 | public Syntax syntax() { 46 | return Syntax.Java; 47 | } 48 | 49 | @Override 50 | public void test() throws InterruptedException { 51 | // test ignorecase for Latin-1 Supplement 52 | x2s("[\\u00e0-\\u00e5]", "\u00c2", 0, 2, Option.IGNORECASE); 53 | x2s("[\\u00e2]", "\u00c2", 0, 2, Option.IGNORECASE); 54 | x2s("\\u00e2", "\u00c2", 0, 2, Option.IGNORECASE); 55 | } 56 | } 57 | --------------------------------------------------------------------------------