├── .github └── workflows │ └── haskell-ci.yml ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── cabal.haskell-ci ├── cabal.project ├── lib ├── Data │ ├── IntMap │ │ ├── CharMap2.hs │ │ └── EnumMap2.hs │ └── IntSet │ │ └── EnumSet2.hs └── Text │ └── Regex │ ├── TDFA.hs │ └── TDFA │ ├── ByteString.hs │ ├── ByteString │ └── Lazy.hs │ ├── Common.hs │ ├── CorePattern.hs │ ├── IntArrTrieSet.hs │ ├── NewDFA │ ├── Engine.hs │ ├── Engine_FA.hs │ ├── Engine_NC.hs │ ├── Engine_NC_FA.hs │ ├── MakeTest.hs │ ├── Tester.hs │ └── Uncons.hs │ ├── Pattern.hs │ ├── ReadRegex.hs │ ├── Sequence.hs │ ├── String.hs │ ├── TDFA.hs │ ├── TNFA.hs │ ├── Text.hs │ ├── Text │ └── Lazy.hs │ └── Wrap.hs ├── regex-tdfa.cabal └── test ├── DocTestMain.hs ├── LICENSE ├── Main.hs └── cases ├── basic3.txt ├── class.txt ├── forced-assoc.txt ├── left-assoc.txt ├── nullsub3.txt ├── osx-bsd-critical.txt ├── repetition2.txt ├── right-assoc.txt └── totest.txt /.github/workflows/haskell-ci.yml: -------------------------------------------------------------------------------- 1 | # This GitHub workflow config has been generated by a script via 2 | # 3 | # haskell-ci 'github' 'regex-tdfa.cabal' 4 | # 5 | # To regenerate the script (for example after adjusting tested-with) run 6 | # 7 | # haskell-ci regenerate 8 | # 9 | # For more information, see https://github.com/haskell-CI/haskell-ci 10 | # 11 | # version: 0.19.20250506 12 | # 13 | # REGENDATA ("0.19.20250506",["github","regex-tdfa.cabal"]) 14 | # 15 | name: Haskell-CI 16 | on: 17 | push: 18 | branches: 19 | - master 20 | pull_request: 21 | branches: 22 | - master 23 | jobs: 24 | linux: 25 | name: Haskell-CI - Linux - ${{ matrix.compiler }} 26 | runs-on: ubuntu-24.04 27 | timeout-minutes: 28 | 60 29 | container: 30 | image: buildpack-deps:jammy 31 | continue-on-error: ${{ matrix.allow-failure }} 32 | strategy: 33 | matrix: 34 | include: 35 | - compiler: ghc-9.12.2 36 | compilerKind: ghc 37 | compilerVersion: 9.12.2 38 | setup-method: ghcup 39 | allow-failure: false 40 | - compiler: ghc-9.10.2 41 | compilerKind: ghc 42 | compilerVersion: 9.10.2 43 | setup-method: ghcup 44 | allow-failure: false 45 | - compiler: ghc-9.8.4 46 | compilerKind: ghc 47 | compilerVersion: 9.8.4 48 | setup-method: ghcup 49 | allow-failure: false 50 | - compiler: ghc-9.6.7 51 | compilerKind: ghc 52 | compilerVersion: 9.6.7 53 | setup-method: ghcup 54 | allow-failure: false 55 | - compiler: ghc-9.4.8 56 | compilerKind: ghc 57 | compilerVersion: 9.4.8 58 | setup-method: ghcup 59 | allow-failure: false 60 | - compiler: ghc-9.2.8 61 | compilerKind: ghc 62 | compilerVersion: 9.2.8 63 | setup-method: ghcup 64 | allow-failure: false 65 | - compiler: ghc-9.0.2 66 | compilerKind: ghc 67 | compilerVersion: 9.0.2 68 | setup-method: ghcup 69 | allow-failure: false 70 | - compiler: ghc-8.10.7 71 | compilerKind: ghc 72 | compilerVersion: 8.10.7 73 | setup-method: ghcup 74 | allow-failure: false 75 | - compiler: ghc-8.8.4 76 | compilerKind: ghc 77 | compilerVersion: 8.8.4 78 | setup-method: ghcup 79 | allow-failure: false 80 | - compiler: ghc-8.6.5 81 | compilerKind: ghc 82 | compilerVersion: 8.6.5 83 | setup-method: ghcup 84 | allow-failure: false 85 | - compiler: ghc-8.4.4 86 | compilerKind: ghc 87 | compilerVersion: 8.4.4 88 | setup-method: ghcup 89 | allow-failure: false 90 | - compiler: ghc-8.2.2 91 | compilerKind: ghc 92 | compilerVersion: 8.2.2 93 | setup-method: ghcup 94 | allow-failure: false 95 | - compiler: ghc-8.0.2 96 | compilerKind: ghc 97 | compilerVersion: 8.0.2 98 | setup-method: ghcup 99 | allow-failure: false 100 | fail-fast: false 101 | steps: 102 | - name: apt-get install 103 | run: | 104 | apt-get update 105 | apt-get install -y --no-install-recommends gnupg ca-certificates dirmngr curl git software-properties-common libtinfo5 libnuma-dev 106 | - name: Install GHCup 107 | run: | 108 | mkdir -p "$HOME/.ghcup/bin" 109 | curl -sL https://downloads.haskell.org/ghcup/0.1.50.1/x86_64-linux-ghcup-0.1.50.1 > "$HOME/.ghcup/bin/ghcup" 110 | chmod a+x "$HOME/.ghcup/bin/ghcup" 111 | - name: Install cabal-install 112 | run: | 113 | "$HOME/.ghcup/bin/ghcup" install cabal 3.14.2.0 || (cat "$HOME"/.ghcup/logs/*.* && false) 114 | echo "CABAL=$HOME/.ghcup/bin/cabal-3.14.2.0 -vnormal+nowrap" >> "$GITHUB_ENV" 115 | - name: Install GHC (GHCup) 116 | if: matrix.setup-method == 'ghcup' 117 | run: | 118 | "$HOME/.ghcup/bin/ghcup" install ghc "$HCVER" || (cat "$HOME"/.ghcup/logs/*.* && false) 119 | HC=$("$HOME/.ghcup/bin/ghcup" whereis ghc "$HCVER") 120 | HCPKG=$(echo "$HC" | sed 's#ghc$#ghc-pkg#') 121 | HADDOCK=$(echo "$HC" | sed 's#ghc$#haddock#') 122 | echo "HC=$HC" >> "$GITHUB_ENV" 123 | echo "HCPKG=$HCPKG" >> "$GITHUB_ENV" 124 | echo "HADDOCK=$HADDOCK" >> "$GITHUB_ENV" 125 | env: 126 | HCKIND: ${{ matrix.compilerKind }} 127 | HCNAME: ${{ matrix.compiler }} 128 | HCVER: ${{ matrix.compilerVersion }} 129 | - name: Set PATH and environment variables 130 | run: | 131 | echo "$HOME/.cabal/bin" >> $GITHUB_PATH 132 | echo "LANG=C.UTF-8" >> "$GITHUB_ENV" 133 | echo "CABAL_DIR=$HOME/.cabal" >> "$GITHUB_ENV" 134 | echo "CABAL_CONFIG=$HOME/.cabal/config" >> "$GITHUB_ENV" 135 | HCNUMVER=$(${HC} --numeric-version|perl -ne '/^(\d+)\.(\d+)\.(\d+)(\.(\d+))?$/; print(10000 * $1 + 100 * $2 + ($3 == 0 ? $5 != 1 : $3))') 136 | echo "HCNUMVER=$HCNUMVER" >> "$GITHUB_ENV" 137 | echo "ARG_TESTS=--enable-tests" >> "$GITHUB_ENV" 138 | echo "ARG_BENCH=--enable-benchmarks" >> "$GITHUB_ENV" 139 | echo "HEADHACKAGE=false" >> "$GITHUB_ENV" 140 | echo "ARG_COMPILER=--$HCKIND --with-compiler=$HC" >> "$GITHUB_ENV" 141 | env: 142 | HCKIND: ${{ matrix.compilerKind }} 143 | HCNAME: ${{ matrix.compiler }} 144 | HCVER: ${{ matrix.compilerVersion }} 145 | - name: env 146 | run: | 147 | env 148 | - name: write cabal config 149 | run: | 150 | mkdir -p $CABAL_DIR 151 | cat >> $CABAL_CONFIG <> $CABAL_CONFIG < cabal-plan.xz 184 | echo 'f62ccb2971567a5f638f2005ad3173dba14693a45154c1508645c52289714cb2 cabal-plan.xz' | sha256sum -c - 185 | xz -d < cabal-plan.xz > $HOME/.cabal/bin/cabal-plan 186 | rm -f cabal-plan.xz 187 | chmod a+x $HOME/.cabal/bin/cabal-plan 188 | cabal-plan --version 189 | - name: checkout 190 | uses: actions/checkout@v4 191 | with: 192 | path: source 193 | - name: initial cabal.project for sdist 194 | run: | 195 | touch cabal.project 196 | echo "packages: $GITHUB_WORKSPACE/source/." >> cabal.project 197 | cat cabal.project 198 | - name: sdist 199 | run: | 200 | mkdir -p sdist 201 | $CABAL sdist all --output-dir $GITHUB_WORKSPACE/sdist 202 | - name: unpack 203 | run: | 204 | mkdir -p unpacked 205 | find sdist -maxdepth 1 -type f -name '*.tar.gz' -exec tar -C $GITHUB_WORKSPACE/unpacked -xzvf {} \; 206 | - name: generate cabal.project 207 | run: | 208 | PKGDIR_regex_tdfa="$(find "$GITHUB_WORKSPACE/unpacked" -maxdepth 1 -type d -regex '.*/regex-tdfa-[0-9.]*')" 209 | echo "PKGDIR_regex_tdfa=${PKGDIR_regex_tdfa}" >> "$GITHUB_ENV" 210 | rm -f cabal.project cabal.project.local 211 | touch cabal.project 212 | touch cabal.project.local 213 | echo "packages: ${PKGDIR_regex_tdfa}" >> cabal.project 214 | if [ $((HCNUMVER >= 80200)) -ne 0 ] ; then echo "package regex-tdfa" >> cabal.project ; fi 215 | if [ $((HCNUMVER >= 80200)) -ne 0 ] ; then echo " ghc-options: -Werror=missing-methods" >> cabal.project ; fi 216 | cat >> cabal.project <> cabal.project.local 219 | cat cabal.project 220 | cat cabal.project.local 221 | - name: dump install plan 222 | run: | 223 | $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dry-run all 224 | cabal-plan 225 | - name: restore cache 226 | uses: actions/cache/restore@v4 227 | with: 228 | key: ${{ runner.os }}-${{ matrix.compiler }}-${{ github.sha }} 229 | path: ~/.cabal/store 230 | restore-keys: ${{ runner.os }}-${{ matrix.compiler }}- 231 | - name: install dependencies 232 | run: | 233 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --dependencies-only -j2 all 234 | $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dependencies-only -j2 all 235 | - name: build w/o tests 236 | run: | 237 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks all 238 | - name: build 239 | run: | 240 | $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --write-ghc-environment-files=always 241 | - name: tests 242 | run: | 243 | $CABAL v2-test $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --test-show-details=direct 244 | - name: cabal check 245 | run: | 246 | cd ${PKGDIR_regex_tdfa} || false 247 | ${CABAL} -vnormal check 248 | - name: haddock 249 | run: | 250 | $CABAL v2-haddock --disable-documentation --haddock-all $ARG_COMPILER --with-haddock $HADDOCK $ARG_TESTS $ARG_BENCH all 251 | - name: unconstrained build 252 | run: | 253 | rm -f cabal.project.local 254 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks all 255 | - name: save cache 256 | if: always() 257 | uses: actions/cache/save@v4 258 | with: 259 | key: ${{ runner.os }}-${{ matrix.compiler }}-${{ github.sha }} 260 | path: ~/.cabal/store 261 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /dist/ 2 | /.cabal-sandbox/ 3 | /dist-newstyle/ 4 | /cabal.sandbox.config 5 | /cabal.config 6 | /cabal.project.local 7 | /cabal.project.freeze 8 | /.ghc.environment.* 9 | 10 | TAGS 11 | .DS_Store 12 | *~ 13 | *# 14 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ### 1.3.2.4 2 | 3 | _2025-05-09 Andreas Abel_ 4 | 5 | - Fix definition of `[[:graph:]]` character class ([#66](https://github.com/haskell-hvr/regex-tdfa/issues/66)) 6 | - Tested with GHC 8.0 - 9.12.2 7 | 8 | ### 1.3.2.3 9 | 10 | _2025-03-02 Andreas Abel_ 11 | 12 | - Drop support for GHC 7 13 | - Allow `containers < 1` 14 | - Tested with GHC 8.0 - 9.12.1 15 | 16 | ### 1.3.2.2 17 | 18 | _2023-08-02, Andreas Abel_ 19 | 20 | - Fix return type in `memcpy` FFI signature ([#52](https://github.com/haskell-hvr/regex-tdfa/pull/52)) 21 | - Refactor `regexec` to avoid partial functions `tail` and `(!0)` 22 | - Tested with GHC 7.4 - 9.8.1-alpha1 23 | 24 | ### 1.3.2.1 25 | 26 | _2023-05-19, Andreas Abel_ 27 | 28 | - Fix haddock rendering of code examples in top-level documentation 29 | ([#50](https://github.com/haskell-hvr/regex-tdfa/issues/50)) 30 | - Tested with GHC 7.4 - 9.6 31 | 32 | ## 1.3.2 33 | 34 | _2022-07-18, Andreas Abel_ 35 | 36 | - Export `decodePatternSet` and `decodeCharacterClass` from `Text.Regex.TDFA.Pattern` 37 | ([#16](https://github.com/haskell-hvr/regex-tdfa/issues/16)) 38 | - Extend and correct docs for `Pattern` module 39 | - Tested with GHC 7.4 - 9.4 40 | 41 | ### 1.3.1.5 42 | 43 | _2022-07-18, Andreas Abel_ 44 | 45 | - Allow dash (`-`) as start of a range, e.g. `[--z]` 46 | ([#1](https://github.com/haskell-hvr/regex-tdfa/issues/1), 47 | [#45](https://github.com/haskell-hvr/regex-tdfa/pull/45)) 48 | - Tested with GHC 7.4 - 9.4 49 | 50 | ### 1.3.1.4 51 | 52 | _2022-07-17, Andreas Abel_ 53 | 54 | - Fix parsing of dashes in bracket expressions, e.g. `[-a-z]` ([#1](https://github.com/haskell-hvr/regex-tdfa/issues/1)) 55 | - Fix a deprecation warning except for on GHC 8.2 ([#21](https://github.com/haskell-hvr/regex-tdfa/issues/21)) 56 | - Documentation: link `defaultComptOpt` to its definition ([#13](https://github.com/haskell-hvr/regex-tdfa/issues/13)) 57 | - Verify documentation examples with new `doc-test` testsuite 58 | - Tested with GHC 7.4 - 9.4 59 | 60 | ### 1.3.1.3 61 | 62 | _2022-07-14, Andreas Abel_ 63 | 64 | - Fix an `undefined` in `Show PatternSet` ([#37](https://github.com/haskell-hvr/regex-tdfa/issues/37)) 65 | - Document POSIX character classes (e.g. `[[:digit:]]`) in README 66 | - Tested with GHC 7.4 - 9.4 67 | 68 | ### 1.3.1.2 Revision 1 69 | 70 | _2022-05-25, Andreas Abel_ 71 | 72 | - Allow `base >= 4.17` (GHC 9.4) 73 | 74 | ### 1.3.1.2 75 | 76 | _2022-02-19, Andreas Abel_ 77 | - No longer rely on the `MonadFail` instance for `ST` 78 | (future `base` library change, see [#29](https://github.com/haskell-hvr/regex-tdfa/pull/29)). 79 | - Silence warning `incomplete-uni-patterns` (GHC >= 9.2). 80 | - Import from `Data.List` explicitly or qualified (warning `compat-unqualified-imports`). 81 | - Import from `Control.Monad` to allow `mtl-2.3` in its `rc3` incarnation. 82 | 83 | ### 1.3.1.1 Revision 3 84 | 85 | _2022-01-31, Andreas Abel_ 86 | - Speculatively allow unreleased `mtl-2.3` (works with release candidate `mtl-2.3-rc4`). 87 | 88 | ### 1.3.1.1 Revision 2 89 | 90 | _2021-12-26, Andreas Abel_ 91 | - Allow `text-2.0`. 92 | 93 | ### 1.3.1.1 Revision 1 94 | 95 | _2021-08-12, Andreas Abel_ 96 | - Compatibility with `base-4.16` (GHC 9.2). 97 | 98 | ### 1.3.1.1 99 | 100 | _2021-06-03, Andreas Abel_ 101 | - Removed extension `NoMonoPatBinds` from `.cabal`-file for GHC 9.2 compatibility. 102 | - Removed some outdated documentation. 103 | 104 | ### 1.3.1.0 Revision 2 105 | 106 | _2021-02-20, Andreas Abel_ 107 | - Compatibility with `base-4.15` (GHC 9.0) and `bytestring-0.11`. 108 | 109 | ### 1.3.1.0 Revision 1 110 | 111 | _2020-03-26, phadej_ 112 | - Compatibility with `base-4.14` (GHC 8.10). 113 | 114 | ## 1.3.1.0 115 | 116 | _2019-11-26, hvr_ 117 | - Merge into `regex-tdfa`; see . 118 | - Don't inject `ghc-options: -O2` by default anymore (see #7 for rationale) and introduce `force-O2` cabal flag to control the injection of `ghc-options: -O2`. 119 | Note that you can conveniently control optimization levels on a per-package granularity via `cabal.project` files; see [cabal's user-guide](https://cabal.readthedocs.io/en/latest/nix-local-build.html#configuring-builds-with-cabal-project) for more details. 120 | 121 | ## 1.3.0 Revision 1 122 | 123 | _2019-11-26, hvr_ 124 | - Tighten bounds on `base`, `mtl`, `parsec` and fail. 125 | 126 | # 1.3.0 127 | 128 | _2019-09-29, Artyom_ 129 | - Same as 1.2.3.3 release, but in accordance to PVP; 130 | see . 131 | - Compatibility with GHC 8.8 and regex-base-0.9.4 (h/t @asr). 132 | - Turned `regex-tdfa-unittest` into a `regex-tdfa` testsuite. 133 | 134 | ### 1.2.3.3 (deprecated, not following PVP) 135 | 136 | * Compatibility with GHC 8.8 and regex-base-0.9.4 (h/t @asr). 137 | * Turned `regex-tdfa-unittest` into a `regex-tdfa` testsuite. 138 | 139 | ### 1.2.3.2 140 | 141 | _2019-05-09, Artyom_ 142 | * Significantly improved documentation (h/t William Yao). 143 | 144 | ### 1.2.3.1 145 | 146 | _2018-06-22, Artyom_ 147 | * Compatibility with `containers-0.6`. 148 | 149 | ## 1.2.3 150 | 151 | _2018-03-10, Artyom_ 152 | * Added `Semigroup` instances for some types (h/t Herbert Valerio Riedel). 153 | 154 | ## 1.2.2 155 | 156 | _2016-04-28, Artyom_ 157 | * New maintainer. 158 | * Now we don't reexport the problematic `Show` instance for functions. 159 | 160 | ## 1.2.1 161 | 162 | _2015-08-29, Chris Kuklewicz_ 163 | * Updated dependency versions. 164 | 165 | # 1.2.0 166 | 167 | _2014-02-02, Chris Kuklewicz_ 168 | * "Almost ghc-7.8" with the array 0.4 changes for `Data.Array.Unsafe` 169 | 170 | 171 | ## 1.1.8 172 | 173 | Make ghc-7.0.2 on platform 2011.2.0.0.0 happy 174 | 175 | ## 1.1.7 176 | 177 | fix url below 178 | 179 | ## 1.1.6 180 | 181 | Fix bug preventing `[]] [-] [^]] [^-]` (thanks to Maxime Henrion) 182 | 183 | ## 1.1.5 184 | 185 | try `needUniqTags` in `POr` in CorePattern.hs, try `toAdvice b` for `PStar child` 186 | 187 | ## 1.1.4 188 | 189 | fixed 190 | 191 | ## 1.1.3 192 | 193 | BROKEN after 100 characters the `compressOrbit` dies! 194 | 195 | ## 1.1.2 196 | 197 | worked 198 | 199 | ## 1.1.1 200 | 201 | add gnu escapes 202 | 203 | # 1.1.0 204 | 205 | NewDFA code working 206 | 207 | ## 1.0.7 208 | 209 | make NewDFA directory and String_NC 210 | 211 | ## 1.0.6 212 | 213 | try NewDFATest_SBS with `uncons` 214 | 215 | ## 1.0.5 216 | 217 | use `uncons` on SBS 218 | 219 | ## 1.0.4 220 | 221 | try repaired NewDFATest_SBS 222 | 223 | * np13: try to improve readability with the `mm` combinator? Yes! 224 | * np12: expand `o` in the case where `t` lookup get `Nothing`? Yes – this is the fix!? 225 | * np11: break multi to not look at `o` and just return `True`? Yes !!!! 226 | * np10: Peel off `CharMap`/`IntMap` and DFA/DT with pattern matching? No 227 | * np9: `INLINE` `endOf`? No 228 | * np8: np6 and `NOINLINE` `endOff`? No 229 | * np7: just return `True`? Fast 230 | * np6: comment out ans check? No 231 | * np5: comment out all `Multi0` code? No 232 | * np4: comment out all `Single0` and `Single` code? No 233 | * np3: `!off` the multi? No 234 | * np2: comment out all Testing code? No 235 | 236 | ## 1.0.3 237 | 238 | try to alter `matchTest` to not have the `Bool` args? No 239 | 240 | ## 1.0.2 241 | 242 | arg, the prof is fast and the normal slow! 243 | 244 | # 1.0.1 245 | 246 | add NewDFATest.hs 247 | 248 | ## 0.99.20 249 | 250 | go to many vs single? 251 | 252 | ## 0.99.19 253 | 254 | try for pre-comparison of orbit-logs! 255 | 256 | ## 0.99.18 257 | 258 | try alternate lazy/strict strategy in NewDFA. Fix offset laziness. 259 | 260 | ## 0.99.17 261 | 262 | radical removal of flag array and adding of `SetVal` to handle groups 263 | 264 | ## 0.99.16 265 | 266 | performance? up to v15 267 | 268 | ## 0.99.15 269 | 270 | get string with NewDFA testing, unit tests and 1000 random regex pass 271 | 272 | ## 0.99.14 273 | 274 | start changing to the new real DFA 275 | 276 | ## 0.99.13 277 | 278 | more cleanup 279 | 280 | ## 0.99.12 281 | 282 | try to debug 0.99.11: fixed `updateWinner` 283 | 284 | ## 0.99.11 285 | 286 | improve above fix and make stuff work better – HAS BUG, along with old TDFA! 287 | 288 | ## 0.99.10 289 | 290 | fixed `((.?)*)*` patterns by changing `PStar nullView` when `mayFirstBeNull` 291 | 292 | ## 0.99.9 293 | 294 | testing changing `bestTrans`/`chooseWith`/`choose` to include `enterOrbit`/`newFlags`/`(_,True)` info 295 | 296 | ## 0.99.8 297 | 298 | testing changing `Maximize` to `Minimize` for `Tag`s, decide `(a*)*` is canonical problem 299 | 300 | ## 0.99.7 301 | 302 | Use `(PGroup Nothing)` in `Pattern` to decompose `PBound` 303 | 304 | ## 0.99.6 305 | 306 | change to nested `nonEmpty` calls for `PBound` 307 | 308 | ## 0.99.5 309 | 310 | remove `PNonEmpty` constructor 311 | 312 | ## 0.99.4 313 | 314 | tests `pnonempty' = \ p -> POr [ PEmpty, p ]` instead of `PNonEmpty` 315 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This module is under this "3 clause" BSD license: 2 | 3 | Copyright (c) 2007-2009, Christopher Kuklewicz 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 10 | * The names of the contributors may not be used to endorse or promote products derived from this software without specific prior written permission. 11 | 12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Hackage version](https://img.shields.io/hackage/v/regex-tdfa.svg?label=Hackage&color=informational)](http://hackage.haskell.org/package/regex-tdfa) 2 | [![Stackage Nightly](http://stackage.org/package/regex-tdfa/badge/nightly)](http://stackage.org/nightly/package/regex-tdfa) 3 | [![Stackage LTS](http://stackage.org/package/regex-tdfa/badge/lts)](http://stackage.org/lts/package/regex-tdfa) 4 | [![Haskell-CI](https://github.com/haskell-hvr/regex-tdfa/actions/workflows/haskell-ci.yml/badge.svg?branch=master&event=push)](https://github.com/haskell-hvr/regex-tdfa/actions/workflows/haskell-ci.yml) 5 | [![License](https://img.shields.io/badge/License-BSD_3--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) 6 | # regex-tdfa 7 | 8 | This is [`regex-tdfa`](http://hackage.haskell.org/package/regex-tdfa) which is a pure Haskell regular expression library (for POSIX extended regular expressions) originally written by Christopher Kuklewicz. 9 | 10 | The name "tdfa" stands for Tagged-DFA. 11 | 12 | ## Getting started 13 | 14 | ### Importing and using 15 | 16 | [Declare a dependency](https://www.haskell.org/cabal/users-guide/developing-packages.html#pkg-field-build-depends) on the `regex-tdfa` library in your `.cabal` file: 17 | 18 | ``` 19 | build-depends: regex-tdfa ^>= 1.3.2 20 | ``` 21 | 22 | In Haskell modules where you need to use regexes `import` the respective `regex-tdfa` module: 23 | 24 | ```haskell 25 | import Text.Regex.TDFA 26 | ``` 27 | 28 | ### Basics 29 | 30 | ```haskell 31 | λ> emailRegex = "[a-zA-Z0-9+._-]+@[a-zA-Z-]+\\.[a-z]+" 32 | λ> "my email is email@email.com" =~ emailRegex :: Bool 33 | >>> True 34 | 35 | -- non-monadic 36 | =~ 37 | 38 | -- monadic, uses 'fail' on lack of match 39 | =~~ 40 | ``` 41 | 42 | `(=~)` and `(=~~)` are polymorphic in their return type. This is so that 43 | regex-tdfa can pick the most efficient way to give you your result based on 44 | what you need. For instance, if all you want is to check whether the regex 45 | matched or not, there's no need to allocate a result string. If you only want 46 | the first match, rather than all the matches, then the matching engine can stop 47 | after finding a single hit. 48 | 49 | This does mean, though, that you may sometimes have to explicitly specify the 50 | type you want, especially if you're trying things out at the REPL. 51 | 52 | ### Common use cases 53 | 54 | #### Get the first match 55 | 56 | ```haskell 57 | -- returns empty string if no match 58 | a =~ b :: String -- or ByteString, or Text... 59 | 60 | λ> "alexis-de-tocqueville" =~ "[a-z]+" :: String 61 | >>> "alexis" 62 | 63 | λ> "alexis-de-tocqueville" =~ "[[:digit:]]+" :: String 64 | >>> "" 65 | ``` 66 | 67 | #### Check if it matched at all 68 | 69 | ```haskell 70 | a =~ b :: Bool 71 | 72 | λ> "alexis-de-tocqueville" =~ "[a-z]+" :: Bool 73 | >>> True 74 | ``` 75 | 76 | #### Get first match + text before/after 77 | 78 | ```haskell 79 | -- if no match, will just return whole 80 | -- string in the first element of the tuple 81 | a =~ b :: (String, String, String) 82 | 83 | λ> "alexis-de-tocqueville" =~ "de" :: (String, String, String) 84 | >>> ("alexis-", "de", "-tocqueville") 85 | 86 | λ> "alexis-de-tocqueville" =~ "kant" :: (String, String, String) 87 | >>> ("alexis-de-tocqueville", "", "") 88 | ``` 89 | 90 | #### Get first match + submatches 91 | 92 | ```haskell 93 | -- same as above, but also returns a list of /just/ submatches 94 | -- submatch list is empty if regex doesn't match at all 95 | a =~ b :: (String, String, String, [String]) 96 | 97 | λ> "div[attr=1234]" =~ "div\\[([a-z]+)=([^]]+)\\]" 98 | :: (String, String, String, [String]) 99 | >>> ("", "div[attr=1234]", "", ["attr","1234"]) 100 | ``` 101 | 102 | #### Get all non-overlapping matches 103 | 104 | ```haskell 105 | -- can also return Data.Array instead of List 106 | getAllTextMatches (a =~ b) :: [String] 107 | 108 | λ> getAllTextMatches ("john anne yifan" =~ "[a-z]+") :: [String] 109 | >>> ["john","anne","yifan"] 110 | 111 | λ> getAllTextMatches ("0a0b0" =~ "0[[:lower:]]0") :: [String] 112 | >>> ["0a0"] 113 | ``` 114 | Note that `"0b0"` is not included in the result since it overlaps with `"0a0"`. 115 | 116 | #### Special characters 117 | 118 | `regex-tdfa` only supports a small set of special characters and is much less 119 | featureful than some other regex engines you might be used to, such as PCRE. 120 | 121 | * ``\` `` — Match start of entire text (similar to `^` in other regex engines) 122 | * `\'` — Match end of entire text (similar to `$` in other regex engines) 123 | * `\<` — Match beginning of word 124 | * `\>` — Match end of word 125 | * `\b` — Match beginning or end of word 126 | * `\B` — Match neither beginning nor end of word 127 | 128 | While shorthands like `\d` (for digit) are not recognized, one can use the respective 129 | POSIX character class inside `[...]`. E.g., `[[:digit:][:lower:]_]` is short for 130 | `[0-9a-z_]`. The supported character classes are listed on 131 | [Wikipedia](https://en.wikipedia.org/w/index.php?title=Regular_expression&oldid=1095256273#Character_classes) 132 | and defined in module 133 | [`TNFA`](https://github.com/haskell-hvr/regex-tdfa/blob/95d47cb982d2cf636b2cb6260a866f9907341c45/lib/Text/Regex/TDFA/TNFA.hs#L804-L816). 134 | 135 | Please also consult a variant of this documentation which is part of the 136 | [Text.Regex.TDFA haddock](http://hackage.haskell.org/package/regex-tdfa/docs/Text-Regex-TDFA.html), 137 | and the original documentation at the [Haskell wiki](https://wiki.haskell.org/Regular_expressions#regex-tdfa). 138 | 139 | ### Less common stuff 140 | 141 | #### Get match indices 142 | 143 | ```haskell 144 | -- can also return Data.Array instead of List 145 | getAllMatches (a =~ b) :: [(Int, Int)] -- (index, length) 146 | 147 | λ> getAllMatches ("john anne yifan" =~ "[a-z]+") :: [(Int, Int)] 148 | >>> [(0,4), (5,4), (10,5)] 149 | `````` 150 | 151 | #### Get submatch indices 152 | 153 | ```haskell 154 | -- match of __entire__ regex is first element, not first capture 155 | -- can also return Data.Array instead of List 156 | getAllSubmatches (a =~ b) :: [(Int, Int)] -- (index, length) 157 | 158 | λ> getAllSubmatches ("div[attr=1234]" =~ "div\\[([a-z]+)=([^]]+)\\]") 159 | :: [(Int, Int)] 160 | >>> [(0,14), (4,4), (9,4)] 161 | ``` 162 | 163 | ### Replacement 164 | 165 | `regex-tdfa` does not provide find-and-replace. 166 | 167 | ## Avoiding backslashes 168 | 169 | If you find yourself writing a lot of regexes, take a look at 170 | [raw-strings-qq](http://hackage.haskell.org/package/raw-strings-qq). It'll 171 | let you write regexes without needing to escape all your backslashes. 172 | 173 | ```haskell 174 | {-# LANGUAGE QuasiQuotes #-} 175 | 176 | import Text.RawString.QQ 177 | import Text.Regex.TDFA 178 | 179 | λ> "2 * (3 + 1) / 4" =~ [r|\([^)]+\)|] :: String 180 | >>> "(3 + 1)" 181 | ``` 182 | 183 | ## Known bugs and infelicities 184 | 185 | * Regexes with large character classes combined with `{m,n}` are very slow and memory-hungry ([#3][]). 186 | 187 | > An example of such a regex is `^[\x0020-\xD7FF]{1,255}$`. 188 | 189 | * POSIX submatch semantics are broken in some rare cases ([#2][]). 190 | 191 | [#2]: https://github.com/haskell-hvr/regex-tdfa/issues/2 192 | [#3]: https://github.com/haskell-hvr/regex-tdfa/issues/3 193 | 194 | ## About this package 195 | 196 | This was inspired by the algorithm (and Master's thesis) behind the regular expression library known as [TRE or libtre](https://github.com/laurikari/tre/). This was created by Ville Laurikari and tackled the difficult issue of efficient sub-match capture for POSIX regular expressions. 197 | 198 | By building on this thesis and adding a few more optimizations, regex-tdfa matching input text of length N should have O(N) runtime, and should have a maximum memory bounded by the pattern size that does not scale with N. It should do this while returning well defined (and correct) values for the parenthesized sub-matches. 199 | 200 | Regardless of performance, nearly every single OS and Libra for POSIX regular expressions has bugs in sub-matches. This was detailed on the [Regex POSIX Haskell wiki page](https://wiki.haskell.org/Regex_Posix), and can be demonstrated with the [regex-posix-unittest](http://hackage.haskell.org/package/regex-posix-unittest) suite of checks. Test [regex-tdfa-unittest](http://hackage.haskell.org/package/regex-tdfa-unittest) should show regex-tdfa passing these same checks. I owe my understanding of the correct behvior and many of these unit tests to Glenn Fowler at AT&T ("An Interpretation of the POSIX regex Standard"). 201 | 202 | ### Maintenance history 203 | 204 | The original Darcs repository was at [code.haskell.org](http://code.haskell.org/regex-tdfa/). 205 | For a while a fork was maintained by Roman Cheplyaka as 206 | [regex-tdfa-rc](http://hackage.haskell.org/package/regex-tdfa-rc). 207 | 208 | Then the repository moved to , 209 | which was primarily maintained by [Artyom (neongreen)](https://github.com/neongreen). 210 | 211 | Finally, maintainership was passed on again and the repository moved to its current location 212 | at . 213 | 214 | ## Other related packages 215 | 216 | Searching for "tdfa" on [hackage](http://hackage.haskell.org/packages/search?terms=tdfa) 217 | finds some related packages (unmaintained as of 2022-07-14). 218 | 219 | ## Document notes 220 | 221 | This README was originally written 2016-04-30. 222 | -------------------------------------------------------------------------------- /cabal.haskell-ci: -------------------------------------------------------------------------------- 1 | branches: master 2 | 3 | installed: +all 4 | 5 | -- constraint-set bytestring-0.12 6 | -- -- bytestring-0.12 requires base >=4.9 (GHC 8.0) 7 | -- ghc: >= 8.0 8 | -- constraints: bytestring ^>= 0.12 9 | -- tests: True 10 | -- run-tests: True 11 | -- -- 12 | -- -- The following is silently ignored here: 13 | -- -- 14 | -- -- raw-project 15 | -- -- allow-newer: bytestring 16 | -- -- 17 | -- 18 | -- constraint-set containers-0.7 19 | -- -- containers-0.7 requires base >=4.9 (GHC 8.0) 20 | -- -- fails with GHCs 8.0 and 9.8.0 21 | -- ghc: >= 8.2 && < 9.7 22 | -- constraints: containers ^>= 0.7 23 | -- tests: True 24 | -- run-tests: True 25 | -- 26 | -- constraint-set text-2.1 27 | -- -- text-2.1 requires base >=4.10 (GHC 8.2) 28 | -- ghc: >= 8.2 29 | -- constraints: text ^>= 2.1 30 | -- tests: True 31 | -- run-tests: True 32 | -- 33 | -- -- The following is meant to be for the constraint-set bytestring-0.12 only 34 | -- -- (and for the other constraint-sets) 35 | -- -- but there is currently no way to enable `allow-newer: bytestring` 36 | -- -- just for the constraint set. 37 | -- -- 38 | -- -- Since core library `bytestring` is constrained to `installed`, 39 | -- -- it is not harmful to allow newer `bytestring` in the default runs 40 | -- -- as well---it will have no effect there. 41 | -- -- 42 | -- raw-project 43 | -- allow-newer: bytestring 44 | -- allow-newer: containers 45 | -- allow-newer: text 46 | -------------------------------------------------------------------------------- /cabal.project: -------------------------------------------------------------------------------- 1 | packages: . 2 | 3 | write-ghc-environment-files: always 4 | -------------------------------------------------------------------------------- /lib/Data/IntMap/CharMap2.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | module Data.IntMap.CharMap2 where 3 | 4 | #ifdef __GLASGOW_HASKELL__ 5 | import GHC.Base(unsafeChr) 6 | #else 7 | import Data.Char (chr) 8 | #endif 9 | import Data.Char as C(ord) 10 | import Data.List as L (map) 11 | import qualified Data.IntMap as M 12 | #if MIN_VERSION_containers(0,5,11) 13 | import qualified Data.IntMap.Internal.Debug as MD 14 | #else 15 | import qualified Data.IntMap as MD 16 | #endif 17 | import qualified Data.IntSet as S(IntSet) 18 | import Data.Semigroup as Sem 19 | 20 | #ifndef __GLASGOW_HASKELL__ 21 | unsafeChr = chr 22 | #endif 23 | 24 | newtype CharMap a = CharMap {unCharMap :: M.IntMap a} deriving (Eq,Ord,Read,Show) 25 | 26 | instance Sem.Semigroup (CharMap a) where 27 | CharMap x <> CharMap y = CharMap (x `mappend` y) 28 | 29 | instance Monoid (CharMap a) where 30 | mempty = CharMap mempty 31 | mappend = (<>) 32 | 33 | instance Functor CharMap where 34 | fmap f (CharMap m) = CharMap (fmap f m) 35 | 36 | type Key = Char 37 | 38 | (!) :: CharMap a -> Key -> a 39 | (!) (CharMap m) k = (M.!) m (C.ord k) 40 | 41 | (\\) :: CharMap a -> CharMap b -> CharMap a 42 | (\\) (CharMap m1) (CharMap m2) = CharMap ((M.\\) m1 m2) 43 | 44 | null :: CharMap a -> Bool 45 | null (CharMap m) = M.null m 46 | 47 | size :: CharMap a -> Int 48 | size (CharMap m) = M.size m 49 | 50 | member :: Key -> CharMap a -> Bool 51 | member k (CharMap m) = M.member (C.ord k) m 52 | 53 | notMember :: Key -> CharMap a -> Bool 54 | notMember k (CharMap m) = M.notMember (C.ord k) m 55 | 56 | lookup :: Key -> CharMap a -> Maybe a 57 | lookup k (CharMap m) = M.lookup (C.ord k) m 58 | 59 | findWithDefault :: a -> Key -> CharMap a -> a 60 | findWithDefault a k (CharMap m) = M.findWithDefault a (C.ord k) m 61 | 62 | empty :: CharMap a 63 | empty = CharMap M.empty 64 | 65 | singleton :: Key -> a -> CharMap a 66 | singleton k a = CharMap (M.singleton (C.ord k) a) 67 | 68 | insert :: Key -> a -> CharMap a -> CharMap a 69 | insert k a (CharMap m) = CharMap (M.insert (C.ord k) a m) 70 | 71 | insertWith :: (a -> a -> a) -> Key -> a -> CharMap a -> CharMap a 72 | insertWith f k a (CharMap m) = CharMap (M.insertWith f (C.ord k) a m) 73 | 74 | insertWithKey :: (Key -> a -> a -> a) -> Key -> a -> CharMap a -> CharMap a 75 | insertWithKey f k a (CharMap m) = CharMap (M.insertWithKey f' (C.ord k) a m) 76 | where f' b a1 a2 = f (unsafeChr b) a1 a2 77 | 78 | insertLookupWithKey :: (Key -> a -> a -> a) -> Key -> a -> CharMap a -> (Maybe a, CharMap a) 79 | insertLookupWithKey f k a (CharMap m) = (ma,CharMap m') 80 | where (ma,m') = M.insertLookupWithKey f' (C.ord k) a m 81 | f' b a1 a2 = f (unsafeChr b) a1 a2 82 | 83 | delete :: Key -> CharMap a -> CharMap a 84 | delete k (CharMap m) = CharMap (M.delete (C.ord k) m) 85 | 86 | adjust :: (a -> a) -> Key -> CharMap a -> CharMap a 87 | adjust f k (CharMap m) = CharMap (M.adjust f (C.ord k) m) 88 | 89 | adjustWithKey :: (Key -> a -> a) -> Key -> CharMap a -> CharMap a 90 | adjustWithKey f k (CharMap m) = CharMap (M.adjustWithKey f' (C.ord k) m) 91 | where f' b a = f (unsafeChr b) a 92 | 93 | update :: (a -> Maybe a) -> Key -> CharMap a -> CharMap a 94 | update f k (CharMap m) = CharMap (M.update f (C.ord k) m) 95 | 96 | updateWithKey :: (Key -> a -> Maybe a) -> Key -> CharMap a -> CharMap a 97 | updateWithKey f k (CharMap m) = CharMap (M.updateWithKey f' (C.ord k) m) 98 | where f' b a = f (unsafeChr b) a 99 | 100 | updateLookupWithKey :: (Key -> a -> Maybe a) -> Key -> CharMap a -> (Maybe a, CharMap a) 101 | updateLookupWithKey f k (CharMap m) = (a,CharMap m') 102 | where (a,m') = M.updateLookupWithKey f' (C.ord k) m 103 | f' b a1 = f (unsafeChr b) a1 104 | 105 | union :: CharMap a -> CharMap a -> CharMap a 106 | union (CharMap m1) (CharMap m2) = CharMap (M.union m1 m2) 107 | 108 | unionWith :: (a -> a -> a) -> CharMap a -> CharMap a -> CharMap a 109 | unionWith f (CharMap m1) (CharMap m2) = CharMap (M.unionWith f m1 m2) 110 | 111 | unionWithKey :: (Key -> a -> a -> a) -> CharMap a -> CharMap a -> CharMap a 112 | unionWithKey f (CharMap m1) (CharMap m2) = CharMap (M.unionWithKey f' m1 m2) 113 | where f' b a1 a2 = f (unsafeChr b) a1 a2 114 | 115 | unions :: [CharMap a] -> CharMap a 116 | unions cs = CharMap (M.unions (L.map unCharMap cs)) 117 | 118 | unionsWith :: (a -> a -> a) -> [CharMap a] -> CharMap a 119 | unionsWith f cs = CharMap (M.unionsWith f (L.map unCharMap cs)) 120 | 121 | difference :: CharMap a -> CharMap b -> CharMap a 122 | difference (CharMap m1) (CharMap m2) = CharMap (M.difference m1 m2) 123 | 124 | differenceWith :: (a -> b -> Maybe a) -> CharMap a -> CharMap b -> CharMap a 125 | differenceWith f (CharMap m1) (CharMap m2) = CharMap (M.differenceWith f m1 m2) 126 | 127 | differenceWithKey :: (Key -> a -> b -> Maybe a) -> CharMap a -> CharMap b -> CharMap a 128 | differenceWithKey f (CharMap m1) (CharMap m2) = CharMap (M.differenceWithKey f' m1 m2) 129 | where f' b a1 a2 = f (unsafeChr b) a1 a2 130 | 131 | intersection :: CharMap a -> CharMap b -> CharMap a 132 | intersection (CharMap m1) (CharMap m2) = CharMap (M.intersection m1 m2) 133 | 134 | intersectionWith :: (a -> b -> a) -> CharMap a -> CharMap b -> CharMap a 135 | intersectionWith f (CharMap m1) (CharMap m2) = CharMap (M.intersectionWith f m1 m2) 136 | 137 | intersectionWithKey :: (Key -> a -> b -> a) -> CharMap a -> CharMap b -> CharMap a 138 | intersectionWithKey f (CharMap m1) (CharMap m2) = CharMap (M.intersectionWithKey f' m1 m2) 139 | where f' b a1 a2 = f (unsafeChr b) a1 a2 140 | 141 | map :: (a -> b) -> CharMap a -> CharMap b 142 | map f (CharMap m) = CharMap (M.map f m) 143 | 144 | mapWithKey :: (Key -> a -> b) -> CharMap a -> CharMap b 145 | mapWithKey f (CharMap m) = CharMap (M.mapWithKey f' m) 146 | where f' b a = f (unsafeChr b) a 147 | 148 | mapAccum :: (a -> b -> (a, c)) -> a -> CharMap b -> (a, CharMap c) 149 | mapAccum f a (CharMap m) = (a',CharMap m') 150 | where (a',m') = M.mapAccum f a m 151 | 152 | mapAccumWithKey :: (a -> Key -> b -> (a, c)) -> a -> CharMap b -> (a, CharMap c) 153 | mapAccumWithKey f a (CharMap m) = (a',CharMap m') 154 | where (a',m') = M.mapAccumWithKey f' a m 155 | f' a1 b a2 = f a1 (unsafeChr b) a2 156 | 157 | fold :: (a -> b -> b) -> b -> CharMap a -> b 158 | fold f a (CharMap m) = M.foldr f a m 159 | 160 | foldWithKey :: (Key -> a -> b -> b) -> b -> CharMap a -> b 161 | foldWithKey f a (CharMap m) = M.foldrWithKey f' a m 162 | where f' b a1 a2 = f (unsafeChr b) a1 a2 163 | 164 | elems :: CharMap a -> [a] 165 | elems (CharMap m) = M.elems m 166 | 167 | keys :: CharMap a -> [Key] 168 | keys (CharMap m) = L.map unsafeChr (M.keys m) 169 | 170 | keysSet :: CharMap a -> S.IntSet 171 | keysSet (CharMap m) = M.keysSet m 172 | 173 | assocs :: CharMap a -> [(Key, a)] 174 | assocs (CharMap m) = L.map (\(b,a) -> (unsafeChr b,a)) (M.assocs m) 175 | 176 | toList :: CharMap a -> [(Key, a)] 177 | toList (CharMap m) = L.map (\(b,a) -> (unsafeChr b,a)) (M.toList m) 178 | 179 | fromList :: [(Key, a)] -> CharMap a 180 | fromList ka = CharMap (M.fromList (L.map (\(k,a) -> (C.ord k,a)) ka)) 181 | 182 | fromListWith :: (a -> a -> a) -> [(Key, a)] -> CharMap a 183 | fromListWith f ka = CharMap (M.fromListWith f (L.map (\(k,a) -> (C.ord k,a)) ka)) 184 | 185 | fromListWithKey :: (Key -> a -> a -> a) -> [(Key, a)] -> CharMap a 186 | fromListWithKey f ka = CharMap (M.fromListWithKey f' (L.map (\(k,a) -> (C.ord k,a)) ka)) 187 | where f' b a1 a2 = f (unsafeChr b) a1 a2 188 | 189 | toAscList :: CharMap a -> [(Key, a)] 190 | toAscList (CharMap m) = L.map (\(b,a) -> (unsafeChr b,a)) (M.toAscList m) 191 | 192 | fromAscList :: [(Key, a)] -> CharMap a 193 | fromAscList ka = CharMap (M.fromAscList (L.map (\(k,a) -> (C.ord k,a)) ka)) 194 | 195 | fromAscListWith :: (a -> a -> a) -> [(Key, a)] -> CharMap a 196 | fromAscListWith f ka = CharMap (M.fromAscListWith f (L.map (\(k,a) -> (C.ord k,a)) ka)) 197 | 198 | fromAscListWithKey :: (Key -> a -> a -> a) -> [(Key, a)] -> CharMap a 199 | fromAscListWithKey f ka = CharMap (M.fromAscListWithKey f' (L.map (\(k,a) -> (C.ord k,a)) ka)) 200 | where f' b a1 a2 = f (unsafeChr b) a1 a2 201 | 202 | fromDistinctAscList :: [(Key, a)] -> CharMap a 203 | fromDistinctAscList ka = CharMap (M.fromDistinctAscList (L.map (\(k,a) -> (C.ord k,a)) ka)) 204 | 205 | filter :: (a -> Bool) -> CharMap a -> CharMap a 206 | filter f (CharMap m) = CharMap (M.filter f m) 207 | 208 | filterWithKey :: (Key -> a -> Bool) -> CharMap a -> CharMap a 209 | filterWithKey f (CharMap m) = CharMap (M.filterWithKey f' m) 210 | where f' b a = f (unsafeChr b) a 211 | 212 | partition :: (a -> Bool) -> CharMap a -> (CharMap a, CharMap a) 213 | partition f (CharMap m) = (CharMap m1', CharMap m2') 214 | where (m1',m2') = M.partition f m 215 | 216 | partitionWithKey :: (Key -> a -> Bool) -> CharMap a -> (CharMap a, CharMap a) 217 | partitionWithKey f (CharMap m) = (CharMap m1', CharMap m2') 218 | where (m1',m2') = M.partitionWithKey f' m 219 | f' b a = f (unsafeChr b) a 220 | 221 | mapMaybe :: (a -> Maybe b) -> CharMap a -> CharMap b 222 | mapMaybe f (CharMap m) = CharMap (M.mapMaybe f m) 223 | 224 | mapMaybeWithKey :: (Key -> a -> Maybe b) -> CharMap a -> CharMap b 225 | mapMaybeWithKey f (CharMap m) = CharMap (M.mapMaybeWithKey f' m) 226 | where f' b a = f (unsafeChr b) a 227 | 228 | mapEither :: (a -> Either b c) -> CharMap a -> (CharMap b, CharMap c) 229 | mapEither f (CharMap m) = (CharMap m1', CharMap m2') 230 | where (m1',m2') = M.mapEither f m 231 | 232 | mapEitherWithKey :: (Key -> a -> Either b c) -> CharMap a -> (CharMap b, CharMap c) 233 | mapEitherWithKey f (CharMap m) = (CharMap m1', CharMap m2') 234 | where (m1',m2') = M.mapEitherWithKey f' m 235 | f' b a = f (unsafeChr b) a 236 | 237 | split :: Key -> CharMap a -> (CharMap a, CharMap a) 238 | split k (CharMap m) = (CharMap m1', CharMap m2') 239 | where (m1',m2') = M.split (C.ord k) m 240 | 241 | splitLookup :: Key -> CharMap a -> (CharMap a, Maybe a, CharMap a) 242 | splitLookup k (CharMap m) = (CharMap m1', a, CharMap m2') 243 | where (m1',a,m2') = M.splitLookup (C.ord k) m 244 | 245 | isSubmapOf :: Eq a => CharMap a -> CharMap a -> Bool 246 | isSubmapOf (CharMap m1) (CharMap m2) = M.isSubmapOf m1 m2 247 | 248 | isSubmapOfBy :: (a -> b -> Bool) -> CharMap a -> CharMap b -> Bool 249 | isSubmapOfBy f (CharMap m1) (CharMap m2) = M.isSubmapOfBy f m1 m2 250 | 251 | isProperSubmapOf :: Eq a => CharMap a -> CharMap a -> Bool 252 | isProperSubmapOf (CharMap m1) (CharMap m2) = M.isProperSubmapOf m1 m2 253 | 254 | isProperSubmapOfBy :: (a -> b -> Bool) -> CharMap a -> CharMap b -> Bool 255 | isProperSubmapOfBy f (CharMap m1) (CharMap m2) = M.isProperSubmapOfBy f m1 m2 256 | 257 | showTree :: Show a => CharMap a -> String 258 | showTree (CharMap m) = MD.showTree m 259 | 260 | showTreeWith :: Show a => Bool -> Bool -> CharMap a -> String 261 | showTreeWith b1 b2 (CharMap m) = MD.showTreeWith b1 b2 m 262 | {-# INLINE (!) #-} 263 | {-# INLINE (\\) #-} 264 | {-# INLINE null #-} 265 | {-# INLINE size #-} 266 | {-# INLINE member #-} 267 | {-# INLINE notMember #-} 268 | {-# INLINE lookup #-} 269 | {-# INLINE findWithDefault #-} 270 | {-# INLINE empty #-} 271 | {-# INLINE singleton #-} 272 | {-# INLINE insert #-} 273 | {-# INLINE insertWith #-} 274 | {-# INLINE insertWithKey #-} 275 | {-# INLINE insertLookupWithKey #-} 276 | {-# INLINE delete #-} 277 | {-# INLINE adjust #-} 278 | {-# INLINE adjustWithKey #-} 279 | {-# INLINE update #-} 280 | {-# INLINE updateWithKey #-} 281 | {-# INLINE updateLookupWithKey #-} 282 | {-# INLINE union #-} 283 | {-# INLINE unionWith #-} 284 | {-# INLINE unionWithKey #-} 285 | {-# INLINE unions #-} 286 | {-# INLINE unionsWith #-} 287 | {-# INLINE difference #-} 288 | {-# INLINE differenceWith #-} 289 | {-# INLINE differenceWithKey #-} 290 | {-# INLINE intersection #-} 291 | {-# INLINE intersectionWith #-} 292 | {-# INLINE intersectionWithKey #-} 293 | {-# INLINE map #-} 294 | {-# INLINE mapWithKey #-} 295 | {-# INLINE mapAccum #-} 296 | {-# INLINE mapAccumWithKey #-} 297 | {-# INLINE fold #-} 298 | {-# INLINE foldWithKey #-} 299 | {-# INLINE elems #-} 300 | {-# INLINE keys #-} 301 | {-# INLINE keysSet #-} 302 | {-# INLINE assocs #-} 303 | {-# INLINE toList #-} 304 | {-# INLINE fromList #-} 305 | {-# INLINE fromListWith #-} 306 | {-# INLINE fromListWithKey #-} 307 | {-# INLINE toAscList #-} 308 | {-# INLINE fromAscList #-} 309 | {-# INLINE fromAscListWith #-} 310 | {-# INLINE fromAscListWithKey #-} 311 | {-# INLINE fromDistinctAscList #-} 312 | {-# INLINE filter #-} 313 | {-# INLINE filterWithKey #-} 314 | {-# INLINE partition #-} 315 | {-# INLINE partitionWithKey #-} 316 | {-# INLINE mapMaybe #-} 317 | {-# INLINE mapMaybeWithKey #-} 318 | {-# INLINE mapEither #-} 319 | {-# INLINE mapEitherWithKey #-} 320 | {-# INLINE split #-} 321 | {-# INLINE splitLookup #-} 322 | {-# INLINE isSubmapOf #-} 323 | {-# INLINE isSubmapOfBy #-} 324 | {-# INLINE isProperSubmapOf #-} 325 | {-# INLINE isProperSubmapOfBy #-} 326 | {-# INLINE showTree #-} 327 | {-# INLINE showTreeWith #-} 328 | -------------------------------------------------------------------------------- /lib/Data/IntMap/EnumMap2.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | 3 | module Data.IntMap.EnumMap2 where 4 | 5 | import Data.Foldable as F (Foldable(foldMap)) 6 | import qualified Data.IntMap as M 7 | #if MIN_VERSION_containers(0,5,11) 8 | import qualified Data.IntMap.Internal.Debug as MD 9 | #else 10 | import qualified Data.IntMap as MD 11 | #endif 12 | import qualified Data.IntSet.EnumSet2 as S (EnumSet(..)) 13 | import Data.Semigroup as Sem 14 | import Prelude 15 | import qualified Prelude as L (map) 16 | 17 | newtype EnumMap k a = EnumMap {unEnumMap :: M.IntMap a} 18 | deriving (Eq,Ord,Read,Show) 19 | 20 | instance Ord k => Sem.Semigroup (EnumMap k a) where 21 | EnumMap x <> EnumMap y = EnumMap (x `mappend` y) 22 | 23 | instance Ord k => Monoid (EnumMap k a) where 24 | mempty = EnumMap mempty 25 | mappend = (<>) 26 | 27 | instance Ord k => Functor (EnumMap k) where 28 | fmap f (EnumMap m) = EnumMap (fmap f m) 29 | 30 | instance Ord k => F.Foldable (EnumMap k) where 31 | foldMap f (EnumMap m) = F.foldMap f m 32 | 33 | (!) :: (Enum key) => EnumMap key a -> key -> a 34 | (!) (EnumMap m) k = (M.!) m (fromEnum k) 35 | 36 | (\\) :: (Enum key) => EnumMap key a -> EnumMap key b -> EnumMap key a 37 | (\\) (EnumMap m1) (EnumMap m2) = EnumMap ((M.\\) m1 m2) 38 | 39 | null :: (Enum key) => EnumMap key a -> Bool 40 | null (EnumMap m) = M.null m 41 | 42 | size :: (Enum key) => EnumMap key a -> Int 43 | size (EnumMap m) = M.size m 44 | 45 | member :: (Enum key) => key -> EnumMap key a -> Bool 46 | member k (EnumMap m) = M.member (fromEnum k) m 47 | 48 | notMember :: (Enum key) => key -> EnumMap key a -> Bool 49 | notMember k (EnumMap m) = M.notMember (fromEnum k) m 50 | 51 | {-# INLINE lookup #-} 52 | lookup :: (Enum key) => key -> EnumMap key a -> Maybe a 53 | lookup k (EnumMap m) = M.lookup (fromEnum k) m 54 | 55 | findWithDefault :: (Enum key) => a -> key -> EnumMap key a -> a 56 | findWithDefault a k (EnumMap m) = M.findWithDefault a (fromEnum k) m 57 | 58 | empty :: (Enum key) => EnumMap key a 59 | empty = EnumMap M.empty 60 | 61 | singleton :: (Enum key) => key -> a -> EnumMap key a 62 | singleton k a = EnumMap (M.singleton (fromEnum k) a) 63 | 64 | insert :: (Enum key) => key -> a -> EnumMap key a -> EnumMap key a 65 | insert k a (EnumMap m) = EnumMap (M.insert (fromEnum k) a m) 66 | 67 | insertWith :: (Enum key) => (a -> a -> a) -> key -> a -> EnumMap key a -> EnumMap key a 68 | insertWith f k a (EnumMap m) = EnumMap (M.insertWith f (fromEnum k) a m) 69 | 70 | insertWithKey :: (Enum key) => (key -> a -> a -> a) -> key -> a -> EnumMap key a -> EnumMap key a 71 | insertWithKey f k a (EnumMap m) = EnumMap (M.insertWithKey f' (fromEnum k) a m) 72 | where f' b a1 a2 = f (toEnum b) a1 a2 73 | 74 | insertLookupWithKey :: (Enum key) => (key -> a -> a -> a) -> key -> a -> EnumMap key a -> (Maybe a, EnumMap key a) 75 | insertLookupWithKey f k a (EnumMap m) = (ma,EnumMap m') 76 | where (ma,m') = M.insertLookupWithKey f' (fromEnum k) a m 77 | f' b a1 a2 = f (toEnum b) a1 a2 78 | 79 | delete :: (Enum key) => key -> EnumMap key a -> EnumMap key a 80 | delete k (EnumMap m) = EnumMap (M.delete (fromEnum k) m) 81 | 82 | adjust :: (Enum key) => (a -> a) -> key -> EnumMap key a -> EnumMap key a 83 | adjust f k (EnumMap m) = EnumMap (M.adjust f (fromEnum k) m) 84 | 85 | adjustWithKey :: (Enum key) => (key -> a -> a) -> key -> EnumMap key a -> EnumMap key a 86 | adjustWithKey f k (EnumMap m) = EnumMap (M.adjustWithKey f' (fromEnum k) m) 87 | where f' b a = f (toEnum b) a 88 | 89 | update :: (Enum key) => (a -> Maybe a) -> key -> EnumMap key a -> EnumMap key a 90 | update f k (EnumMap m) = EnumMap (M.update f (fromEnum k) m) 91 | 92 | updateWithKey :: (Enum key) => (key -> a -> Maybe a) -> key -> EnumMap key a -> EnumMap key a 93 | updateWithKey f k (EnumMap m) = EnumMap (M.updateWithKey f' (fromEnum k) m) 94 | where f' b a = f (toEnum b) a 95 | 96 | updateLookupWithKey :: (Enum key) => (key -> a -> Maybe a) -> key -> EnumMap key a -> (Maybe a, EnumMap key a) 97 | updateLookupWithKey f k (EnumMap m) = (a,EnumMap m') 98 | where (a,m') = M.updateLookupWithKey f' (fromEnum k) m 99 | f' b a1 = f (toEnum b) a1 100 | 101 | union :: (Enum key) => EnumMap key a -> EnumMap key a -> EnumMap key a 102 | union (EnumMap m1) (EnumMap m2) = EnumMap (M.union m1 m2) 103 | 104 | unionWith :: (Enum key) => (a -> a -> a) -> EnumMap key a -> EnumMap key a -> EnumMap key a 105 | unionWith f (EnumMap m1) (EnumMap m2) = EnumMap (M.unionWith f m1 m2) 106 | 107 | unionWithKey :: (Enum key) => (key -> a -> a -> a) -> EnumMap key a -> EnumMap key a -> EnumMap key a 108 | unionWithKey f (EnumMap m1) (EnumMap m2) = EnumMap (M.unionWithKey f' m1 m2) 109 | where f' b a1 a2 = f (toEnum b) a1 a2 110 | 111 | unions :: (Enum key) => [EnumMap key a] -> EnumMap key a 112 | unions cs = EnumMap (M.unions (L.map unEnumMap cs)) 113 | 114 | unionsWith :: (Enum key) => (a -> a -> a) -> [EnumMap key a] -> EnumMap key a 115 | unionsWith f cs = EnumMap (M.unionsWith f (L.map unEnumMap cs)) 116 | 117 | difference :: (Enum key) => EnumMap key a -> EnumMap key b -> EnumMap key a 118 | difference (EnumMap m1) (EnumMap m2) = EnumMap (M.difference m1 m2) 119 | 120 | differenceWith :: (Enum key) => (a -> b -> Maybe a) -> EnumMap key a -> EnumMap key b -> EnumMap key a 121 | differenceWith f (EnumMap m1) (EnumMap m2) = EnumMap (M.differenceWith f m1 m2) 122 | 123 | differenceWithKey :: (Enum key) => (key -> a -> b -> Maybe a) -> EnumMap key a -> EnumMap key b -> EnumMap key a 124 | differenceWithKey f (EnumMap m1) (EnumMap m2) = EnumMap (M.differenceWithKey f' m1 m2) 125 | where f' b a1 a2 = f (toEnum b) a1 a2 126 | 127 | intersection :: (Enum key) => EnumMap key a -> EnumMap key b -> EnumMap key a 128 | intersection (EnumMap m1) (EnumMap m2) = EnumMap (M.intersection m1 m2) 129 | 130 | intersectionWith :: (Enum key) => (a -> b -> a) -> EnumMap key a -> EnumMap key b -> EnumMap key a 131 | intersectionWith f (EnumMap m1) (EnumMap m2) = EnumMap (M.intersectionWith f m1 m2) 132 | 133 | intersectionWithKey :: (Enum key) => (key -> a -> b -> a) -> EnumMap key a -> EnumMap key b -> EnumMap key a 134 | intersectionWithKey f (EnumMap m1) (EnumMap m2) = EnumMap (M.intersectionWithKey f' m1 m2) 135 | where f' b a1 a2 = f (toEnum b) a1 a2 136 | 137 | map :: (Enum key) => (a -> b) -> EnumMap key a -> EnumMap key b 138 | map f (EnumMap m) = EnumMap (M.map f m) 139 | 140 | mapWithKey :: (Enum key) => (key -> a -> b) -> EnumMap key a -> EnumMap key b 141 | mapWithKey f (EnumMap m) = EnumMap (M.mapWithKey f' m) 142 | where f' b a = f (toEnum b) a 143 | 144 | mapAccum :: (Enum key) => (a -> b -> (a, c)) -> a -> EnumMap key b -> (a, EnumMap key c) 145 | mapAccum f a (EnumMap m) = (a',EnumMap m') 146 | where (a',m') = M.mapAccum f a m 147 | 148 | mapAccumWithKey :: (Enum key) => (a -> key -> b -> (a, c)) -> a -> EnumMap key b -> (a, EnumMap key c) 149 | mapAccumWithKey f a (EnumMap m) = (a',EnumMap m') 150 | where (a',m') = M.mapAccumWithKey f' a m 151 | f' a1 b a2 = f a1 (toEnum b) a2 152 | 153 | fold :: (Enum key) => (a -> b -> b) -> b -> EnumMap key a -> b 154 | fold f a (EnumMap m) = M.foldr f a m 155 | 156 | foldWithKey :: (Enum key) => (key -> a -> b -> b) -> b -> EnumMap key a -> b 157 | foldWithKey f a (EnumMap m) = M.foldrWithKey f' a m 158 | where f' b a1 a2 = f (toEnum b) a1 a2 159 | 160 | elems :: (Enum key) => EnumMap key a -> [a] 161 | elems (EnumMap m) = M.elems m 162 | 163 | keys :: (Enum key) => EnumMap key a -> [key] 164 | keys (EnumMap m) = L.map toEnum (M.keys m) 165 | 166 | -- Have to break cover until I have CharSet 167 | keysSet :: (Enum key) => EnumMap key a -> S.EnumSet key 168 | keysSet (EnumMap m) = S.EnumSet (M.keysSet m) 169 | 170 | assocs :: (Enum key) => EnumMap key a -> [(key, a)] 171 | assocs (EnumMap m) = L.map (\(b,a) -> (toEnum b,a)) (M.assocs m) 172 | 173 | toList :: (Enum key) => EnumMap key a -> [(key, a)] 174 | toList (EnumMap m) = L.map (\(b,a) -> (toEnum b,a)) (M.toList m) 175 | 176 | fromList :: (Enum key) => [(key, a)] -> EnumMap key a 177 | fromList ka = EnumMap (M.fromList (L.map (\(k,a) -> (fromEnum k,a)) ka)) 178 | 179 | fromListWith :: (Enum key) => (a -> a -> a) -> [(key, a)] -> EnumMap key a 180 | fromListWith f ka = EnumMap (M.fromListWith f (L.map (\(k,a) -> (fromEnum k,a)) ka)) 181 | 182 | fromListWithKey :: (Enum key) => (key -> a -> a -> a) -> [(key, a)] -> EnumMap key a 183 | fromListWithKey f ka = EnumMap (M.fromListWithKey f' (L.map (\(k,a) -> (fromEnum k,a)) ka)) 184 | where f' b a1 a2 = f (toEnum b) a1 a2 185 | 186 | toAscList :: (Enum key) => EnumMap key a -> [(key, a)] 187 | toAscList (EnumMap m) = L.map (\(b,a) -> (toEnum b,a)) (M.toAscList m) 188 | 189 | fromAscList :: (Enum key) => [(key, a)] -> EnumMap key a 190 | fromAscList ka = EnumMap (M.fromAscList (L.map (\(k,a) -> (fromEnum k,a)) ka)) 191 | 192 | fromAscListWith :: (Enum key) => (a -> a -> a) -> [(key, a)] -> EnumMap key a 193 | fromAscListWith f ka = EnumMap (M.fromAscListWith f (L.map (\(k,a) -> (fromEnum k,a)) ka)) 194 | 195 | fromAscListWithKey :: (Enum key) => (key -> a -> a -> a) -> [(key, a)] -> EnumMap key a 196 | fromAscListWithKey f ka = EnumMap (M.fromAscListWithKey f' (L.map (\(k,a) -> (fromEnum k,a)) ka)) 197 | where f' b a1 a2 = f (toEnum b) a1 a2 198 | 199 | fromDistinctAscList :: (Enum key) => [(key, a)] -> EnumMap key a 200 | fromDistinctAscList ka = EnumMap (M.fromDistinctAscList (L.map (\(k,a) -> (fromEnum k,a)) ka)) 201 | 202 | filter :: (Enum key) => (a -> Bool) -> EnumMap key a -> EnumMap key a 203 | filter f (EnumMap m) = EnumMap (M.filter f m) 204 | 205 | filterWithKey :: (Enum key) => (key -> a -> Bool) -> EnumMap key a -> EnumMap key a 206 | filterWithKey f (EnumMap m) = EnumMap (M.filterWithKey f' m) 207 | where f' b a = f (toEnum b) a 208 | 209 | partition :: (Enum key) => (a -> Bool) -> EnumMap key a -> (EnumMap key a, EnumMap key a) 210 | partition f (EnumMap m) = (EnumMap m1', EnumMap m2') 211 | where (m1',m2') = M.partition f m 212 | 213 | partitionWithKey :: (Enum key) => (key -> a -> Bool) -> EnumMap key a -> (EnumMap key a, EnumMap key a) 214 | partitionWithKey f (EnumMap m) = (EnumMap m1', EnumMap m2') 215 | where (m1',m2') = M.partitionWithKey f' m 216 | f' b a = f (toEnum b) a 217 | 218 | mapMaybe :: (Enum key) => (a -> Maybe b) -> EnumMap key a -> EnumMap key b 219 | mapMaybe f (EnumMap m) = EnumMap (M.mapMaybe f m) 220 | 221 | mapMaybeWithKey :: (Enum key) => (key -> a -> Maybe b) -> EnumMap key a -> EnumMap key b 222 | mapMaybeWithKey f (EnumMap m) = EnumMap (M.mapMaybeWithKey f' m) 223 | where f' b a = f (toEnum b) a 224 | 225 | mapEither :: (Enum key) => (a -> Either b c) -> EnumMap key a -> (EnumMap key b, EnumMap key c) 226 | mapEither f (EnumMap m) = (EnumMap m1', EnumMap m2') 227 | where (m1',m2') = M.mapEither f m 228 | 229 | mapEitherWithKey :: (Enum key) => (key -> a -> Either b c) -> EnumMap key a -> (EnumMap key b, EnumMap key c) 230 | mapEitherWithKey f (EnumMap m) = (EnumMap m1', EnumMap m2') 231 | where (m1',m2') = M.mapEitherWithKey f' m 232 | f' b a = f (toEnum b) a 233 | 234 | split :: (Enum key) => key -> EnumMap key a -> (EnumMap key a, EnumMap key a) 235 | split k (EnumMap m) = (EnumMap m1', EnumMap m2') 236 | where (m1',m2') = M.split (fromEnum k) m 237 | 238 | splitLookup :: (Enum key) => key -> EnumMap key a -> (EnumMap key a, Maybe a, EnumMap key a) 239 | splitLookup k (EnumMap m) = (EnumMap m1', a, EnumMap m2') 240 | where (m1',a,m2') = M.splitLookup (fromEnum k) m 241 | 242 | isSubmapOf :: (Enum key,Eq a) => EnumMap key a -> EnumMap key a -> Bool 243 | isSubmapOf (EnumMap m1) (EnumMap m2) = M.isSubmapOf m1 m2 244 | 245 | isSubmapOfBy :: (Enum key) => (a -> b -> Bool) -> EnumMap key a -> EnumMap key b -> Bool 246 | isSubmapOfBy f (EnumMap m1) (EnumMap m2) = M.isSubmapOfBy f m1 m2 247 | 248 | isProperSubmapOf :: (Enum key,Eq a) => EnumMap key a -> EnumMap key a -> Bool 249 | isProperSubmapOf (EnumMap m1) (EnumMap m2) = M.isProperSubmapOf m1 m2 250 | 251 | isProperSubmapOfBy :: (Enum key) => (a -> b -> Bool) -> EnumMap key a -> EnumMap key b -> Bool 252 | isProperSubmapOfBy f (EnumMap m1) (EnumMap m2) = M.isProperSubmapOfBy f m1 m2 253 | 254 | showTree :: (Enum key,Show a) => EnumMap key a -> String 255 | showTree (EnumMap m) = MD.showTree m 256 | 257 | showTreeWith :: (Enum key,Show a) => Bool -> Bool -> EnumMap key a -> String 258 | showTreeWith b1 b2 (EnumMap m) = MD.showTreeWith b1 b2 m 259 | -------------------------------------------------------------------------------- /lib/Data/IntSet/EnumSet2.hs: -------------------------------------------------------------------------------- 1 | module Data.IntSet.EnumSet2 where 2 | 3 | import qualified Data.IntSet as S 4 | import qualified Data.List as L (map) 5 | import Data.Semigroup as Sem 6 | 7 | newtype EnumSet e = EnumSet {unEnumSet :: S.IntSet} 8 | deriving (Eq,Ord,Read,Show) 9 | 10 | instance Sem.Semigroup (EnumSet e) where 11 | EnumSet x <> EnumSet y = EnumSet (x `mappend` y) 12 | 13 | instance Monoid (EnumSet e) where 14 | mempty = EnumSet mempty 15 | mappend = (<>) 16 | 17 | (\\) :: (Enum e) => EnumSet e -> EnumSet e -> EnumSet e 18 | (\\) (EnumSet s1) (EnumSet s2) = EnumSet ((S.\\) s1 s2) 19 | 20 | null :: (Enum e) => EnumSet e -> Bool 21 | null (EnumSet s) = S.null s 22 | 23 | size :: (Enum e) => EnumSet e -> Int 24 | size (EnumSet s) = S.size s 25 | 26 | member :: (Enum e) => e -> EnumSet e -> Bool 27 | member e (EnumSet s) = S.member (fromEnum e) s 28 | 29 | notMember :: (Enum e) => Int -> EnumSet e -> Bool 30 | notMember e (EnumSet s) = S.notMember (fromEnum e) s 31 | 32 | isSubsetOf :: (Enum e) => EnumSet e -> EnumSet e -> Bool 33 | isSubsetOf (EnumSet e1) (EnumSet e2) = S.isSubsetOf e1 e2 34 | 35 | isProperSubsetOf :: (Enum e) => EnumSet e -> EnumSet e -> Bool 36 | isProperSubsetOf (EnumSet e1) (EnumSet e2) = S.isProperSubsetOf e1 e2 37 | 38 | empty :: (Enum e) => EnumSet e 39 | empty = EnumSet (S.empty) 40 | 41 | singleton :: (Enum e) => e -> EnumSet e 42 | singleton e = EnumSet (S.singleton (fromEnum e)) 43 | 44 | insert :: (Enum e) => e -> EnumSet e -> EnumSet e 45 | insert e (EnumSet s) = EnumSet (S.insert (fromEnum e) s) 46 | 47 | delete :: (Enum e) => e -> EnumSet e -> EnumSet e 48 | delete e (EnumSet s) = EnumSet (S.delete (fromEnum e) s) 49 | 50 | union :: (Enum e) => EnumSet e -> EnumSet e -> EnumSet e 51 | union (EnumSet s1) (EnumSet s2) = EnumSet (S.union s1 s2) 52 | 53 | unions :: (Enum e) => [EnumSet e] -> EnumSet e 54 | unions es = EnumSet (S.unions (L.map unEnumSet es)) 55 | 56 | difference :: (Enum e) => EnumSet e -> EnumSet e -> EnumSet e 57 | difference (EnumSet e1) (EnumSet e2) = EnumSet (S.difference e1 e2) 58 | 59 | intersection :: (Enum e) => EnumSet e -> EnumSet e -> EnumSet e 60 | intersection (EnumSet e1) (EnumSet e2) = EnumSet (S.intersection e1 e2) 61 | 62 | filter :: (Enum e) => (e -> Bool) -> EnumSet e -> EnumSet e 63 | filter f (EnumSet s) = EnumSet (S.filter f' s) 64 | where f' b = f (toEnum b) 65 | 66 | partition :: (Enum e) => (e -> Bool) -> EnumSet e -> (EnumSet e, EnumSet e) 67 | partition f (EnumSet s) = (EnumSet s1', EnumSet s2') 68 | where (s1',s2') = S.partition f' s 69 | f' b = f (toEnum b) 70 | 71 | split :: (Enum e) => e -> EnumSet e -> (EnumSet e, EnumSet e) 72 | split e (EnumSet s) = (EnumSet s1', EnumSet s2') 73 | where (s1',s2') = S.split (fromEnum e) s 74 | 75 | splitMember :: (Enum e) => e -> EnumSet e -> (EnumSet e, Bool, EnumSet e) 76 | splitMember e (EnumSet s) = (EnumSet s1',a,EnumSet s2') 77 | where (s1',a,s2') = S.splitMember (fromEnum e) s 78 | 79 | map :: (Enum e) => (e -> e) -> EnumSet e -> EnumSet e 80 | map f (EnumSet s) = EnumSet (S.map f' s) 81 | where f' b = fromEnum (f (toEnum b)) 82 | 83 | fold :: (Enum e) => (e -> b -> b) -> b -> EnumSet e -> b 84 | fold f a (EnumSet s) = S.fold f' a s 85 | where f' b a1 = f (toEnum b) a1 86 | 87 | elems :: (Enum e) => EnumSet e -> [e] 88 | elems (EnumSet s) = L.map toEnum (S.elems s) 89 | 90 | toList :: (Enum e) => EnumSet e -> [e] 91 | toList (EnumSet s) = L.map toEnum (S.toList s) 92 | 93 | fromList :: (Enum e) => [e] -> EnumSet e 94 | fromList es = EnumSet (S.fromList (L.map fromEnum es)) 95 | 96 | toAscList :: (Enum e) => EnumSet e -> [e] 97 | toAscList (EnumSet s) = L.map toEnum (S.toAscList s) 98 | 99 | fromAscList :: (Enum e) => [e] -> EnumSet e 100 | fromAscList es = EnumSet (S.fromAscList (L.map fromEnum es)) 101 | 102 | fromDistinctAscList :: (Enum e) => [e] -> EnumSet e 103 | fromDistinctAscList es = EnumSet (S.fromDistinctAscList (L.map fromEnum es)) 104 | 105 | showTree :: (Enum e) => EnumSet e -> String 106 | showTree (EnumSet s) = S.showTree s 107 | 108 | showTreeWith :: (Enum e) => Bool -> Bool -> EnumSet e -> String 109 | showTreeWith a1 a2 (EnumSet s) = S.showTreeWith a1 a2 s 110 | -------------------------------------------------------------------------------- /lib/Text/Regex/TDFA.hs: -------------------------------------------------------------------------------- 1 | {-| 2 | Module: Text.Regex.TDFA 3 | Copyright: (c) Chris Kuklewicz 2007-2009 4 | SPDX-License-Identifier: BSD-3-Clause 5 | Maintainer: Andreas Abel 6 | Stability: stable 7 | 8 | The "Text.Regex.TDFA" module provides a backend for regular 9 | expressions. It provides instances for the classes defined and 10 | documented in "Text.Regex.Base" and re-exported by this module. If 11 | you import this along with other backends then you should do so with 12 | qualified imports (with renaming for convenience). 13 | 14 | This regex-tdfa package implements, correctly, POSIX extended regular 15 | expressions. It is highly unlikely that the @regex-posix@ package on 16 | your operating system is correct, see 17 | for examples of your 18 | OS's bugs. 19 | 20 | = Importing and using 21 | 22 | Declare a dependency on the @regex-tdfa@ library in your @.cabal@ file: 23 | 24 | > build-depends: regex-tdfa ^>= 1.3.2 25 | 26 | In Haskell modules where you want to use regexes simply @import@ /this/ module: 27 | 28 | @ 29 | import "Text.Regex.TDFA" 30 | @ 31 | 32 | = Basics 33 | 34 | >>> let emailRegex = "[a-zA-Z0-9+._-]+\\@[-a-zA-Z]+\\.[a-z]+" 35 | >>> "my email is first-name.lastname_1974@e-mail.com" =~ emailRegex :: Bool 36 | True 37 | 38 | >>> "invalid@mail@com" =~ emailRegex :: Bool 39 | False 40 | 41 | >>> "invalid@mail.COM" =~ emailRegex :: Bool 42 | False 43 | 44 | >>> "#@invalid.com" =~ emailRegex :: Bool 45 | False 46 | 47 | @ 48 | /-- non-monadic/ 49 | λ> \ '=~' \ 50 | 51 | /-- monadic, uses 'fail' on lack of match/ 52 | λ> \ '=~~' \ 53 | @ 54 | 55 | ('=~') and ('=~~') are polymorphic in their return type. This is so that 56 | regex-tdfa can pick the most efficient way to give you your result based on 57 | what you need. For instance, if all you want is to check whether the regex 58 | matched or not, there's no need to allocate a result string. If you only want 59 | the first match, rather than all the matches, then the matching engine can stop 60 | after finding a single hit. 61 | 62 | This does mean, though, that you may sometimes have to explicitly specify the 63 | type you want, especially if you're trying things out at the REPL. 64 | 65 | = Common use cases 66 | 67 | == Get the first match 68 | 69 | @ 70 | /-- returns empty string if no match/ 71 | a '=~' b :: String /-- or ByteString, or Text.../ 72 | @ 73 | 74 | >>> "alexis-de-tocqueville" =~ "[a-z]+" :: String 75 | "alexis" 76 | 77 | >>> "alexis-de-tocqueville" =~ "[0-9]+" :: String 78 | "" 79 | 80 | == Check if it matched at all 81 | 82 | @ 83 | a '=~' b :: Bool 84 | @ 85 | 86 | >>> "alexis-de-tocqueville" =~ "[a-z]+" :: Bool 87 | True 88 | 89 | == Get first match + text before/after 90 | 91 | @ 92 | /-- if no match, will just return whole/ 93 | /-- string in the first element of the tuple/ 94 | a =~ b :: (String, String, String) 95 | @ 96 | 97 | >>> "alexis-de-tocqueville" =~ "de" :: (String, String, String) 98 | ("alexis-","de","-tocqueville") 99 | 100 | >>> "alexis-de-tocqueville" =~ "kant" :: (String, String, String) 101 | ("alexis-de-tocqueville","","") 102 | 103 | == Get first match + submatches 104 | 105 | @ 106 | /-- same as above, but also returns a list of just submatches./ 107 | /-- submatch list is empty if regex doesn't match at all/ 108 | a '=~' b :: (String, String, String, [String]) 109 | @ 110 | 111 | >>> "div[attr=1234]" =~ "div\\[([a-z]+)=([^]]+)\\]" :: (String, String, String, [String]) 112 | ("","div[attr=1234]","",["attr","1234"]) 113 | 114 | == Get /all/ matches 115 | 116 | @ 117 | /-- can also return Data.Array instead of List/ 118 | 'getAllTextMatches' (a '=~' b) :: [String] 119 | @ 120 | 121 | >>> getAllTextMatches ("john anne yifan" =~ "[a-z]+") :: [String] 122 | ["john","anne","yifan"] 123 | 124 | >>> getAllTextMatches ("* - . a + z" =~ "[--z]+") :: [String] 125 | ["-",".","a","z"] 126 | 127 | = Feature support 128 | 129 | This package does provide captured parenthesized subexpressions. 130 | 131 | Depending on the text being searched this package supports Unicode. 132 | The @[Char]@, @Text@, @Text.Lazy@, and @(Seq Char)@ text types support Unicode. The @ByteString@ 133 | and @ByteString.Lazy@ text types only support ASCII. 134 | 135 | As of version 1.1.1 the following GNU extensions are recognized, all 136 | anchors: 137 | 138 | * \\\` at beginning of entire text 139 | * \\\' at end of entire text 140 | * \\\< at beginning of word 141 | * \\\> at end of word 142 | * \\b at either beginning or end of word 143 | * \\B at neither beginning nor end of word 144 | 145 | The above are controlled by the 'newSyntax' Bool in 'CompOption'. 146 | 147 | Where the "word" boundaries means between characters that are and are 148 | not in the [:word:] character class which contains [a-zA-Z0-9_]. Note 149 | that \\\< and \\b may match before the entire text and \\\> and \\b may 150 | match at the end of the entire text. 151 | 152 | There is no locale support, so collating elements like [.ch.] are 153 | simply ignored and equivalence classes like [=a=] are converted to 154 | just [a]. The character classes like [:alnum:] are supported over 155 | ASCII only, valid classes are alnum, digit, punct, alpha, graph, 156 | space, blank, lower, upper, cntrl, print, xdigit, word. 157 | 158 | >>> getAllTextMatches ("john anne yifan" =~ "[[:lower:]]+") :: [String] 159 | ["john","anne","yifan"] 160 | 161 | 162 | This package does not provide "basic" regular expressions. This 163 | package does not provide back references inside regular expressions. 164 | 165 | The package does not provide Perl style regular expressions. Please 166 | look at the 167 | and packages instead. 168 | 169 | This package does not provide find-and-replace. 170 | 171 | = Avoiding backslashes 172 | 173 | If you find yourself writing a lot of regexes, take a look at 174 | . It'll 175 | let you write regexes without needing to escape all your backslashes. 176 | 177 | @ 178 | \{\-\# LANGUAGE QuasiQuotes \#\-\} 179 | 180 | import Text.RawString.QQ 181 | import Text.Regex.TDFA 182 | 183 | λ> "2 * (3 + 1) / 4" '=~' [r|\\([^)]+\\)|] :: String 184 | "(3 + 1)" 185 | @ 186 | 187 | -} 188 | 189 | module Text.Regex.TDFA(getVersion_Text_Regex_TDFA 190 | ,(=~),(=~~) 191 | ,module Text.Regex.TDFA.Common 192 | ,module Text.Regex.Base) where 193 | 194 | import qualified Control.Monad.Fail as Fail 195 | import Data.Version(Version) 196 | import Text.Regex.Base 197 | import Text.Regex.TDFA.String() 198 | import Text.Regex.TDFA.ByteString() 199 | import Text.Regex.TDFA.ByteString.Lazy() 200 | import Text.Regex.TDFA.Text() 201 | import Text.Regex.TDFA.Text.Lazy() 202 | import Text.Regex.TDFA.Sequence() 203 | import Text.Regex.TDFA.Common(Regex,CompOption(..),ExecOption(..)) 204 | --import Text.Regex.TDFA.Wrap(Regex,CompOption(..),ExecOption(..),(=~),(=~~)) 205 | 206 | import Paths_regex_tdfa(version) 207 | 208 | getVersion_Text_Regex_TDFA :: Version 209 | getVersion_Text_Regex_TDFA = version 210 | 211 | 212 | -- | This is the pure functional matching operator. If the target 213 | -- cannot be produced then some empty result will be returned. If 214 | -- there is an error in processing, then 'error' will be called. 215 | (=~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target) 216 | => source1 -> source -> target 217 | (=~) x r = let make :: RegexMaker Regex CompOption ExecOption a => a -> Regex 218 | make = makeRegex 219 | in match (make r) x 220 | 221 | -- | This is the monadic matching operator. If a single match fails, 222 | -- then 'fail' will be called. 223 | (=~~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target, Fail.MonadFail m) 224 | => source1 -> source -> m target 225 | (=~~) x r = do let make :: (RegexMaker Regex CompOption ExecOption a, Fail.MonadFail m) => a -> m Regex 226 | make = makeRegexM 227 | q <- make r 228 | matchM q x 229 | -------------------------------------------------------------------------------- /lib/Text/Regex/TDFA/ByteString.hs: -------------------------------------------------------------------------------- 1 | {-| 2 | This modules provides 'RegexMaker' and 'RegexLike' instances for using 3 | @ByteString@ with the DFA backend ("Text.Regex.Lib.WrapDFAEngine" and 4 | "Text.Regex.Lazy.DFAEngineFPS"). This module is usually used via 5 | import "Text.Regex.TDFA". 6 | 7 | This exports instances of the high level API and the medium level 8 | API of 'compile','execute', and 'regexec'. 9 | -} 10 | {- By Chris Kuklewicz, 2009. BSD License, see the LICENSE file. -} 11 | module Text.Regex.TDFA.ByteString( 12 | Regex 13 | ,CompOption 14 | ,ExecOption 15 | ,compile 16 | ,execute 17 | ,regexec 18 | ) where 19 | 20 | import Data.Array((!),elems) 21 | import qualified Data.ByteString.Char8 as B(ByteString,take,drop,unpack) 22 | 23 | import Text.Regex.Base(MatchArray,RegexContext(..),RegexMaker(..),RegexLike(..)) 24 | import Text.Regex.Base.Impl(polymatch,polymatchM) 25 | import Text.Regex.TDFA.ReadRegex(parseRegex) 26 | import Text.Regex.TDFA.String() -- piggyback on RegexMaker for String 27 | import Text.Regex.TDFA.TDFA(patternToRegex) 28 | import Text.Regex.TDFA.Common(Regex(..),CompOption,ExecOption(captureGroups)) 29 | 30 | import Data.Maybe(listToMaybe) 31 | import Text.Regex.TDFA.NewDFA.Engine(execMatch) 32 | import Text.Regex.TDFA.NewDFA.Tester as Tester(matchTest) 33 | 34 | instance RegexContext Regex B.ByteString B.ByteString where 35 | match = polymatch 36 | matchM = polymatchM 37 | 38 | instance RegexMaker Regex CompOption ExecOption B.ByteString where 39 | makeRegexOptsM c e source = makeRegexOptsM c e (B.unpack source) 40 | 41 | instance RegexLike Regex B.ByteString where 42 | matchOnce r s = listToMaybe (matchAll r s) 43 | matchAll r s = execMatch r 0 '\n' s 44 | matchCount r s = length (matchAll r' s) 45 | where r' = r { regex_execOptions = (regex_execOptions r) {captureGroups = False} } 46 | matchTest = Tester.matchTest 47 | matchOnceText regex source = 48 | fmap (\ma -> let (o,l) = ma!0 49 | in (B.take o source 50 | ,fmap (\ol@(off,len) -> (B.take len (B.drop off source),ol)) ma 51 | ,B.drop (o+l) source)) 52 | (matchOnce regex source) 53 | matchAllText regex source = 54 | map (fmap (\ol@(off,len) -> (B.take len (B.drop off source),ol))) 55 | (matchAll regex source) 56 | 57 | compile :: CompOption -- ^ Flags (summed together) 58 | -> ExecOption -- ^ Flags (summed together) 59 | -> B.ByteString -- ^ The regular expression to compile 60 | -> Either String Regex -- ^ Returns: the compiled regular expression 61 | compile compOpt execOpt bs = 62 | case parseRegex (B.unpack bs) of 63 | Left err -> Left ("parseRegex for Text.Regex.TDFA.ByteString failed:"++show err) 64 | Right pattern -> Right (patternToRegex pattern compOpt execOpt) 65 | 66 | execute :: Regex -- ^ Compiled regular expression 67 | -> B.ByteString -- ^ ByteString to match against 68 | -> Either String (Maybe MatchArray) 69 | execute r bs = Right (matchOnce r bs) 70 | 71 | regexec :: Regex -- ^ Compiled regular expression 72 | -> B.ByteString -- ^ ByteString to match against 73 | -> Either String (Maybe (B.ByteString, B.ByteString, B.ByteString, [B.ByteString])) 74 | regexec r txt = Right $ 75 | case matchOnceText r txt of 76 | Just (pre, mt, post) | main:rest <- map fst (elems mt) 77 | -> Just (pre, main, post, rest) 78 | _ -> Nothing 79 | -------------------------------------------------------------------------------- /lib/Text/Regex/TDFA/ByteString/Lazy.hs: -------------------------------------------------------------------------------- 1 | {-| 2 | This modules provides 'RegexMaker' and 'RegexLike' instances for using 3 | @ByteString@ with the DFA backend ("Text.Regex.Lib.WrapDFAEngine" and 4 | "Text.Regex.Lazy.DFAEngineFPS"). This module is usually used via 5 | import "Text.Regex.TDFA". 6 | 7 | This exports instances of the high level API and the medium level 8 | API of 'compile','execute', and 'regexec'. 9 | -} 10 | module Text.Regex.TDFA.ByteString.Lazy( 11 | Regex 12 | ,CompOption 13 | ,ExecOption 14 | ,compile 15 | ,execute 16 | ,regexec 17 | ) where 18 | 19 | import Data.Array.IArray((!),elems,amap) 20 | import qualified Data.ByteString.Lazy.Char8 as L(ByteString,take,drop,unpack) 21 | 22 | import Text.Regex.Base(MatchArray,RegexContext(..),RegexMaker(..),RegexLike(..)) 23 | import Text.Regex.Base.Impl(polymatch,polymatchM) 24 | import Text.Regex.TDFA.ReadRegex(parseRegex) 25 | import Text.Regex.TDFA.String() -- piggyback on RegexMaker for String 26 | import Text.Regex.TDFA.TDFA(patternToRegex) 27 | import Text.Regex.TDFA.Common(Regex(..),CompOption,ExecOption(captureGroups)) 28 | 29 | import Data.Maybe(listToMaybe) 30 | import Text.Regex.TDFA.NewDFA.Engine(execMatch) 31 | import Text.Regex.TDFA.NewDFA.Tester as Tester(matchTest) 32 | 33 | {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -} 34 | 35 | instance RegexContext Regex L.ByteString L.ByteString where 36 | match = polymatch 37 | matchM = polymatchM 38 | 39 | instance RegexMaker Regex CompOption ExecOption L.ByteString where 40 | makeRegexOptsM c e source = makeRegexOptsM c e (L.unpack source) 41 | 42 | instance RegexLike Regex L.ByteString where 43 | matchOnce r s = listToMaybe (matchAll r s) 44 | matchAll r s = execMatch r 0 '\n' s 45 | matchCount r s = length (matchAll r' s) 46 | where r' = r { regex_execOptions = (regex_execOptions r) {captureGroups = False} } 47 | matchTest = Tester.matchTest 48 | matchOnceText regex source = 49 | fmap (\ma -> 50 | let (o32,l32) = ma!0 51 | o = fi o32 52 | l = fi l32 53 | in (L.take o source 54 | ,fmap (\ol@(off32,len32) -> 55 | let off = fi off32 56 | len = fi len32 57 | in (L.take len (L.drop off source),ol)) ma 58 | ,L.drop (o+l) source)) 59 | (matchOnce regex source) 60 | matchAllText regex source = 61 | let go i _ _ | i `seq` False = undefined 62 | go _i _t [] = [] 63 | go i t (x:xs) = 64 | let (off0,len0) = x!0 65 | trans pair@(off32,len32) = (L.take (fi len32) (L.drop (fi (off32-i)) t),pair) 66 | t' = L.drop (fi (off0+len0-i)) t 67 | in amap trans x : seq t' (go (off0+len0) t' xs) 68 | in go 0 source (matchAll regex source) 69 | 70 | fi :: (Integral a, Num b) => a -> b 71 | fi = fromIntegral 72 | 73 | compile :: CompOption -- ^ Flags (summed together) 74 | -> ExecOption -- ^ Flags (summed together) 75 | -> L.ByteString -- ^ The regular expression to compile 76 | -> Either String Regex -- ^ Returns: the compiled regular expression 77 | compile compOpt execOpt bs = 78 | case parseRegex (L.unpack bs) of 79 | Left err -> Left ("parseRegex for Text.Regex.TDFA.ByteString failed:"++show err) 80 | Right pattern -> Right (patternToRegex pattern compOpt execOpt) 81 | 82 | execute :: Regex -- ^ Compiled regular expression 83 | -> L.ByteString -- ^ ByteString to match against 84 | -> Either String (Maybe MatchArray) 85 | execute r bs = Right (matchOnce r bs) 86 | 87 | regexec :: Regex -- ^ Compiled regular expression 88 | -> L.ByteString -- ^ ByteString to match against 89 | -> Either String (Maybe (L.ByteString, L.ByteString, L.ByteString, [L.ByteString])) 90 | regexec r txt = Right $ 91 | case matchOnceText r txt of 92 | Just (pre, mt, post) | main:rest <- map fst (elems mt) 93 | -> Just (pre, main, post, rest) 94 | _ -> Nothing 95 | -------------------------------------------------------------------------------- /lib/Text/Regex/TDFA/Common.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS -funbox-strict-fields #-} 2 | 3 | -- | Common provides simple functions to the backend. 4 | -- It defines most of the data types. 5 | -- All modules should call 'error' via the 'common_error' function below. 6 | 7 | module Text.Regex.TDFA.Common where 8 | 9 | import Text.Regex.Base(RegexOptions(..)) 10 | 11 | {- By Chris Kuklewicz, 2007-2009. BSD License, see the LICENSE file. -} 12 | import Data.Array.IArray(Array) 13 | import Data.IntSet.EnumSet2(EnumSet) 14 | import qualified Data.IntSet.EnumSet2 as Set(toList) 15 | import Data.IntMap.CharMap2(CharMap(..)) 16 | import Data.IntMap (IntMap) 17 | import qualified Data.IntMap as IMap (findWithDefault,assocs,toList,null,size,toAscList) 18 | import Data.IntSet(IntSet) 19 | import qualified Data.IntMap.CharMap2 as Map (assocs,toAscList,null) 20 | import Data.Sequence as S(Seq) 21 | --import Debug.Trace 22 | 23 | import Text.Regex.TDFA.IntArrTrieSet(TrieSet) 24 | 25 | {-# INLINE look #-} 26 | look :: Int -> IntMap a -> a 27 | look key imap = IMap.findWithDefault (common_error "Text.Regex.DFA.Common" ("key "++show key++" not found in look")) key imap 28 | 29 | common_error :: String -> String -> a 30 | common_error moduleName message = 31 | error ("Explicit error in module "++moduleName++" : "++message) 32 | 33 | on :: (t1 -> t1 -> t2) -> (t -> t1) -> t -> t -> t2 34 | f `on` g = (\x y -> (g x) `f` (g y)) 35 | 36 | -- | After 'sort' or 'sortBy' the use of 'nub' or 'nubBy' can be replaced by 'norep' or 'norepBy'. 37 | norep :: (Eq a) => [a]->[a] 38 | norep [] = [] 39 | norep x@[_] = x 40 | norep (a:bs@(c:cs)) | a==c = norep (a:cs) 41 | | otherwise = a:norep bs 42 | 43 | -- | After 'sort' or 'sortBy' the use of 'nub' or 'nubBy' can be replaced by 'norep' or 'norepBy'. 44 | norepBy :: (a -> a -> Bool) -> [a] -> [a] 45 | norepBy _ [] = [] 46 | norepBy _ x@[_] = x 47 | norepBy eqF (a:bs@(c:cs)) | a `eqF` c = norepBy eqF (a:cs) 48 | | otherwise = a:norepBy eqF bs 49 | 50 | mapFst :: (Functor f) => (t -> t2) -> f (t, t1) -> f (t2, t1) 51 | mapFst f = fmap (\ (a,b) -> (f a,b)) 52 | 53 | mapSnd :: (Functor f) => (t1 -> t2) -> f (t, t1) -> f (t, t2) 54 | mapSnd f = fmap (\ (a,b) -> (a,f b)) 55 | 56 | fst3 :: (a,b,c) -> a 57 | fst3 (x,_,_) = x 58 | 59 | snd3 :: (a,b,c) -> b 60 | snd3 (_,x,_) = x 61 | 62 | thd3 :: (a,b,c) -> c 63 | thd3 (_,_,x) = x 64 | 65 | flipOrder :: Ordering -> Ordering 66 | flipOrder GT = LT 67 | flipOrder LT = GT 68 | flipOrder EQ = EQ 69 | 70 | noWin :: WinTags -> Bool 71 | noWin = null 72 | 73 | -- | Used to track elements of the pattern that accept characters or are anchors. 74 | newtype DoPa = DoPa {dopaIndex :: Int} deriving (Eq,Ord) 75 | 76 | instance Enum DoPa where 77 | toEnum = DoPa 78 | fromEnum = dopaIndex 79 | 80 | instance Show DoPa where 81 | showsPrec p (DoPa {dopaIndex=i}) = ('#':) . showsPrec p i 82 | 83 | -- | Control whether the pattern is multiline or case-sensitive like Text.Regex and whether to 84 | -- capture the subgroups (\\1, \\2, etc). Controls enabling extra anchor syntax. 85 | data CompOption = CompOption { 86 | caseSensitive :: Bool 87 | -- ^ True in 'blankCompOpt' and 'defaultCompOpt'. 88 | , multiline :: Bool 89 | -- ^ False in 'blankCompOpt', True in 'defaultCompOpt'. 90 | -- Compile for newline-sensitive matching. 91 | -- 92 | -- From [regexp man page](https://www.tcl.tk/man/tcl8.4/TclCmd/regexp.html#M8): 93 | -- "By default, newline is a completely ordinary character with no special meaning in either REs or strings. 94 | -- With this flag, inverted bracket expressions @[^@ and @.@ never match newline, 95 | -- a @^@ anchor matches the null string after any newline in the string in addition to its normal function, 96 | -- and the @$@ anchor matches the null string before any newline in the string in addition to its normal function." 97 | , rightAssoc :: Bool 98 | -- ^ True (and therefore right associative) in 'blankCompOpt' and 'defaultCompOpt'. 99 | , newSyntax :: Bool 100 | -- ^ False in 'blankCompOpt', True in 'defaultCompOpt'. 101 | -- Enables the extended non-POSIX syntax described in "Text.Regex.TDFA" haddock documentation. 102 | , lastStarGreedy :: Bool 103 | -- ^ False by default. This is POSIX correct but it takes space and is slower. 104 | -- Setting this to True will improve performance, and should be done 105 | -- if you plan to set the 'captureGroups' 'ExecOption' to False. 106 | } deriving (Read,Show) 107 | 108 | data ExecOption = ExecOption { 109 | captureGroups :: Bool -- ^ True by default. Set to False to improve speed (and space). 110 | } deriving (Read,Show) 111 | 112 | -- | Used by implementation to name certain 'Position's during 113 | -- matching. Identity of 'Position' tag to set during a transition. 114 | type Tag = Int 115 | 116 | -- | Internal use to indicate type of tag and preference for larger or smaller 'Position's. 117 | data OP = Maximize | Minimize | Orbit | Ignore deriving (Eq,Show) 118 | 119 | -- | Internal NFA node identity number. 120 | type Index = Int 121 | 122 | -- | Internal DFA identity is this 'Set' of NFA 'Index'. 123 | type SetIndex = IntSet {- Index -} 124 | 125 | -- | Index into the text being searched. 126 | type Position = Int 127 | 128 | -- | 'GroupIndex' is for indexing submatches from capturing parenthesized groups ('PGroup' or 'Group'). 129 | type GroupIndex = Int 130 | 131 | -- | 'GroupInfo' collects the parent and tag information for an instance of a group. 132 | data GroupInfo = GroupInfo { 133 | thisIndex, parentIndex :: GroupIndex 134 | , startTag, stopTag, flagTag :: Tag 135 | } deriving Show 136 | 137 | -- | The TDFA backend specific 'Regex' type, used by this module's 'RegexOptions' and 'RegexMaker'. 138 | data Regex = Regex { 139 | regex_dfa :: DFA -- ^ starting DFA state 140 | , regex_init :: Index -- ^ index of starting state 141 | , regex_b_index :: (Index,Index) -- ^ indexes of smallest and largest states 142 | , regex_b_tags :: (Tag,Tag) -- ^ indexes of smallest and largest tags 143 | , regex_trie :: TrieSet DFA -- ^ All DFA states 144 | , regex_tags :: Array Tag OP -- ^ information about each tag 145 | , regex_groups :: Array GroupIndex [GroupInfo] -- ^ information about each group 146 | , regex_isFrontAnchored :: Bool -- ^ used for optimizing execution 147 | , regex_compOptions :: CompOption 148 | , regex_execOptions :: ExecOption 149 | } -- no deriving at all, the DFA may be too big to ever traverse! 150 | 151 | 152 | instance RegexOptions Regex CompOption ExecOption where 153 | blankCompOpt = CompOption { caseSensitive = True 154 | , multiline = False 155 | , rightAssoc = True 156 | , newSyntax = False 157 | , lastStarGreedy = False 158 | } 159 | blankExecOpt = ExecOption { captureGroups = True } 160 | defaultCompOpt = CompOption { caseSensitive = True 161 | , multiline = True 162 | , rightAssoc = True 163 | , newSyntax = True 164 | , lastStarGreedy = False 165 | } 166 | defaultExecOpt = ExecOption { captureGroups = True } 167 | setExecOpts e r = r {regex_execOptions=e} 168 | getExecOpts r = regex_execOptions r 169 | 170 | 171 | data WinEmpty = WinEmpty Instructions 172 | | WinTest WhichTest (Maybe WinEmpty) (Maybe WinEmpty) 173 | deriving Show 174 | 175 | -- | Internal NFA node type. 176 | data QNFA = QNFA {q_id :: Index, q_qt :: QT} 177 | 178 | -- | Internal to 'QNFA' type. 179 | data QT = Simple { qt_win :: WinTags -- ^ empty transitions to the virtual winning state 180 | , qt_trans :: CharMap QTrans -- ^ all ways to leave this QNFA to other or the same QNFA 181 | , qt_other :: QTrans -- ^ default ways to leave this QNFA to other or the same QNFA 182 | } 183 | | Testing { qt_test :: WhichTest -- ^ The test to perform 184 | , qt_dopas :: EnumSet DoPa -- ^ location(s) of the anchor(s) in the original regexp 185 | , qt_a, qt_b :: QT -- ^ use qt_a if test is True, else use qt_b 186 | } 187 | 188 | -- | Internal type to represent the tagged transition from one QNFA to 189 | -- another (or itself). The key is the Index of the destination QNFA. 190 | type QTrans = IntMap {- Destination Index -} [TagCommand] 191 | 192 | -- | Known predicates, just Beginning of Line (^) and End of Line ($). 193 | -- Also support for GNU extensions is being added: \\\` beginning of 194 | -- buffer, \\\' end of buffer, \\\< and \\\> for begin and end of words, \\b 195 | -- and \\B for word boundary and not word boundary. 196 | data WhichTest 197 | = Test_BOL -- ^ @^@ (affected by multiline option) 198 | | Test_EOL -- ^ @$@ (affected by multiline option) 199 | | Test_BOB -- ^ @\\`@ beginning of buffer 200 | | Test_EOB -- ^ @\\'@ end ofbuffer 201 | | Test_BOW -- ^ @\\<@ beginning of word 202 | | Test_EOW -- ^ @\\>@ end of word 203 | | Test_EdgeWord -- ^ @\\b@ word boundary 204 | | Test_NotEdgeWord -- ^ @\\B@ not word boundary 205 | deriving (Show,Eq,Ord,Enum) 206 | 207 | -- | The things that can be done with a Tag. 'TagTask' and 208 | -- 'ResetGroupStopTask' are for tags with Maximize or Minimize OP 209 | -- values. 'ResetOrbitTask' and 'EnterOrbitTask' and 'LeaveOrbitTask' are 210 | -- for tags with Orbit OP value. 211 | data TagTask = TagTask | ResetGroupStopTask | SetGroupStopTask 212 | | ResetOrbitTask | EnterOrbitTask | LeaveOrbitTask deriving (Show,Eq) 213 | 214 | -- | Ordered list of tags and their associated Task. 215 | type TagTasks = [(Tag,TagTask)] 216 | 217 | -- | When attached to a QTrans the TagTask can be done before or after 218 | -- accepting the character. 219 | data TagUpdate = PreUpdate TagTask | PostUpdate TagTask deriving (Show,Eq) 220 | 221 | -- | Ordered list of tags and their associated update operation. 222 | type TagList = [(Tag,TagUpdate)] 223 | 224 | -- | A TagList and the location of the item in the original pattern 225 | -- that is being accepted. 226 | type TagCommand = (DoPa,TagList) 227 | 228 | -- | Ordered list of tags and their associated update operation to 229 | -- perform on an empty transition to the virtual winning state. 230 | type WinTags = TagList 231 | 232 | -- | Internal DFA node, identified by the Set of indices of the QNFA 233 | -- nodes it represents. 234 | data DFA = DFA { d_id :: SetIndex, d_dt :: DT } deriving(Show) 235 | data Transition = Transition { trans_many :: DFA -- ^ where to go (maximal), including respawning 236 | , trans_single :: DFA -- ^ where to go, not including respawning 237 | , trans_how :: DTrans -- ^ how to go, including respawning 238 | } 239 | -- | Internal to the DFA node 240 | data DT = Simple' { dt_win :: IntMap {- Source Index -} Instructions -- ^ Actions to perform to win 241 | , dt_trans :: CharMap Transition -- ^ Transition to accept Char 242 | , dt_other :: Transition -- ^ default accepting transition 243 | } 244 | | Testing' { dt_test :: WhichTest -- ^ The test to perform 245 | , dt_dopas :: EnumSet DoPa -- ^ location(s) of the anchor(s) in the original regexp 246 | , dt_a,dt_b :: DT -- ^ use dt_a if test is True else use dt_b 247 | } 248 | 249 | -- | Internal type to represent the commands for the tagged transition. 250 | -- The outer 'IntMap' is for the destination Index and the inner 'IntMap' 251 | -- is for the Source Index. This is convenient since all runtime data 252 | -- going to the same destination must be compared to find the best. 253 | -- 254 | -- A Destination 'IntMap' entry may have an empty Source 'IntMap' if and 255 | -- only if the destination is the starting index and the NFA or DFA. 256 | -- This instructs the matching engine to spawn a new entry starting at 257 | -- the post-update position. 258 | type DTrans = IntMap {- Index of Destination -} (IntMap {- Index of Source -} (DoPa,Instructions)) 259 | -- type DTrans = IntMap {- Index of Destination -} (IntMap {- Index of Source -} (DoPa,RunState ())) 260 | 261 | -- | Internal convenience type for the text display code. 262 | type DTrans' = [(Index, [(Index, (DoPa, ([(Tag, (Position,Bool))],[String])))])] 263 | 264 | -- | Positions for which a @*@ was re-started while looping. Need to 265 | -- append locations at back but compare starting with front, so use 266 | -- 'Seq' as a queue. The initial position is saved in 'basePos' (and a 267 | -- Maximize Tag), the middle positions in the 'Seq', and the final 268 | -- position is NOT saved in the Orbits (only in a Maximize Tag). 269 | data Orbits = Orbits 270 | { inOrbit :: !Bool -- True if enterOrbit, False if LeaveOrbit 271 | , basePos :: Position 272 | , ordinal :: (Maybe Int) 273 | , getOrbits :: !(Seq Position) 274 | } deriving (Show) 275 | 276 | -- | The 'newPos' and 'newFlags' lists in Instructions are sorted by, and unique in, the Tag values 277 | data Instructions = Instructions 278 | { newPos :: ![(Tag,Action)] -- False is preUpdate, True is postUpdate (there are no Orbit tags here) -- 2009 : Change to enum from bool? 279 | , newOrbits :: !(Maybe (Position -> OrbitTransformer)) 280 | } 281 | 282 | instance Show Instructions where 283 | showsPrec p (Instructions pos _) 284 | = showParen (p >= 11) $ 285 | showString "Instructions {" . 286 | showString "newPos = " . 287 | showsPrec 0 pos . 288 | showString ", " . 289 | showString "newOrbits = " . 290 | showString "" . 291 | showString "}" 292 | 293 | data Action = SetPre | SetPost | SetVal Int deriving (Show,Eq) 294 | type OrbitTransformer = OrbitLog -> OrbitLog 295 | type OrbitLog = IntMap Orbits 296 | 297 | instance Show QNFA where 298 | show (QNFA {q_id = i, q_qt = qt}) = "QNFA {q_id = "++show i 299 | ++"\n ,q_qt = "++ show qt 300 | ++"\n}" 301 | 302 | instance Show QT where 303 | show = showQT 304 | 305 | showQT :: QT -> String 306 | showQT (Simple win trans other) = "{qt_win=" ++ show win 307 | ++ "\n, qt_trans=" ++ show (foo trans) 308 | ++ "\n, qt_other=" ++ show (foo' other) ++ "}" 309 | where foo :: CharMap QTrans -> [(Char,[(Index,[TagCommand])])] 310 | foo = mapSnd foo' . Map.toAscList 311 | foo' :: QTrans -> [(Index,[TagCommand])] 312 | foo' = IMap.toList 313 | showQT (Testing test dopas a b) = "{Testing "++show test++" "++show (Set.toList dopas) 314 | ++"\n"++indent' a 315 | ++"\n"++indent' b++"}" 316 | where indent' = init . unlines . map (spaces++) . lines . showQT 317 | spaces = replicate 9 ' ' 318 | 319 | instance Show DT where show = showDT 320 | 321 | indent :: [String] -> String 322 | indent = unlines . map (\x -> ' ':' ':x) 323 | 324 | showDT :: DT -> String 325 | showDT (Simple' w t o) = 326 | "Simple' { dt_win = " ++ seeWin1 327 | ++ "\n , dt_trans = " ++ seeTrans1 328 | ++ "\n , dt_other = " ++ seeOther1 o 329 | ++ "\n }" 330 | where 331 | seeWin1 | IMap.null w = "No win" 332 | | otherwise = indent . map show . IMap.assocs $ w 333 | 334 | seeTrans1 :: String 335 | seeTrans1 | Map.null t = "No (Char,Transition)" 336 | | otherwise = ('\n':) . indent $ 337 | map (\(char,Transition {trans_many=dfa,trans_single=dfa2,trans_how=dtrans}) -> 338 | concat ["(" 339 | ,show char 340 | ,", MANY " 341 | ,show (d_id dfa) 342 | ,", SINGLE " 343 | ,show (d_id dfa2) 344 | ,", \n" 345 | ,seeDTrans dtrans 346 | ,")"]) (Map.assocs t) 347 | 348 | seeOther1 (Transition {trans_many=dfa,trans_single=dfa2,trans_how=dtrans}) = 349 | concat ["(MANY " 350 | ,show (d_id dfa) 351 | ,", SINGLE " 352 | ,show (d_id dfa2) 353 | ,", \n" 354 | ,seeDTrans dtrans 355 | ,")"] 356 | 357 | showDT (Testing' wt d a b) = "Testing' { dt_test = " ++ show wt 358 | ++ "\n , dt_dopas = " ++ show d 359 | ++ "\n , dt_a = " ++ indent' a 360 | ++ "\n , dt_b = " ++ indent' b 361 | ++ "\n }" 362 | where indent' = init . unlines . (\s -> case s of 363 | [] -> [] 364 | (h:t) -> h : (map (spaces ++) t)) . lines . showDT 365 | spaces = replicate 10 ' ' 366 | 367 | 368 | seeDTrans :: DTrans -> String 369 | --seeDTrans x = concatMap (\(dest,y) -> unlines . map (\(source,ins) -> show (dest,source,ins) ) . IMap.assocs $ y) (IMap.assocs x) 370 | seeDTrans x | IMap.null x = "No DTrans" 371 | seeDTrans x = concatMap seeSource (IMap.assocs x) 372 | where seeSource (dest,srcMap) | IMap.null srcMap = indent [show (dest,"SPAWN")] 373 | | otherwise = indent . map (\(source,ins) -> show (dest,source,ins) ) . IMap.assocs $ srcMap 374 | -- spawnIns = Instructions { newPos = [(0,SetPost)], newOrbits = Nothing } 375 | 376 | 377 | instance Eq QT where 378 | t1@(Testing {}) == t2@(Testing {}) = 379 | (qt_test t1) == (qt_test t2) && (qt_a t1) == (qt_a t2) && (qt_b t1) == (qt_b t2) 380 | (Simple w1 (CharMap t1) o1) == (Simple w2 (CharMap t2) o2) = 381 | w1 == w2 && eqTrans && eqQTrans o1 o2 382 | where eqTrans :: Bool 383 | eqTrans = (IMap.size t1 == IMap.size t2) 384 | && and (zipWith together (IMap.toAscList t1) (IMap.toAscList t2)) 385 | where together (c1,qtrans1) (c2,qtrans2) = (c1 == c2) && eqQTrans qtrans1 qtrans2 386 | eqQTrans :: QTrans -> QTrans -> Bool 387 | eqQTrans = (==) 388 | _ == _ = False 389 | -------------------------------------------------------------------------------- /lib/Text/Regex/TDFA/IntArrTrieSet.hs: -------------------------------------------------------------------------------- 1 | {- | 2 | This creates a lazy Trie based on a finite range of Ints and is used to 3 | memorize a function over the subsets of this range. 4 | 5 | To create a Trie you need two supply 2 things 6 | * Range of keys to bound 7 | * A function or functions used to construct the value for a subset of keys 8 | 9 | The Trie uses the Array type internally. 10 | -} 11 | module Text.Regex.TDFA.IntArrTrieSet where 12 | 13 | {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -} 14 | 15 | import Data.Array.IArray(Array,(!),listArray) 16 | 17 | data TrieSet v = TrieSet { value :: v 18 | , next :: Array Int (TrieSet v) } 19 | 20 | -- | This is the accessor for the Trie. The list of keys should be 21 | -- sorted. 22 | lookupAsc :: TrieSet v -> [Int] -> v 23 | lookupAsc (TrieSet {value=v,next=n}) = 24 | (\keys -> case keys of [] -> v 25 | (key:keys') -> lookupAsc (n!key) keys') 26 | 27 | -- | This is a Trie constructor for a complete range of keys. 28 | fromBounds :: (Int,Int) -- ^ (lower,upper) range of keys, lower<=upper 29 | -> ([Int] -> v) -- ^ Function from list of keys to its value. 30 | -- It must work for distinct ascending lists. 31 | -> TrieSet v -- ^ The constructed Trie 32 | fromBounds (start,stop) keysToValue = build id start where 33 | build keys low = TrieSet { value = keysToValue (keys []) 34 | , next = listArray (low,stop) 35 | [build (keys.(x:)) (succ x) | x <- [low..stop] ] } 36 | 37 | -- | This is a Trie constructor for a complete range of keys that uses 38 | -- a function from single values and a merge operation on values to 39 | -- fill the Trie. 40 | fromSinglesMerge :: v -- ^ value for (lookupAsc trie []) 41 | -> (v->v->v) -- ^ merge operation on values 42 | -> (Int,Int) -- ^ (lower,upper) range of keys, lower<=upper 43 | -> (Int->v) -- ^ Function from a single key to its value 44 | -> TrieSet v -- ^ The constructed Trie 45 | fromSinglesMerge emptyValue mergeValues bound keyToValue = trieSet where 46 | trieSet = fromBounds bound keysToValue' 47 | keysToValue' keys = 48 | case keys of 49 | [] -> emptyValue 50 | [key] -> keyToValue key 51 | _ -> mergeValues (keysToValue (init keys)) (keysToValue [last keys]) 52 | keysToValue = lookupAsc trieSet 53 | 54 | -- | This is a Trie constructor for a complete range of keys that uses 55 | -- a function from single values and a sum operation of values to fill 56 | -- the Trie. 57 | fromSinglesSum :: ([v]->v) -- ^ summation operation for values 58 | -> (Int,Int) -- ^ (lower,upper) range of keys, lower <= upper 59 | -> (Int->v) -- ^ Function from a single key to its value 60 | -> TrieSet v -- ^ The constructed Trie 61 | fromSinglesSum mergeValues bound keyToValue = trieSet where 62 | trieSet = fromBounds bound keysToValue' 63 | keysToValue' = mergeValues . map keyToValue 64 | -------------------------------------------------------------------------------- /lib/Text/Regex/TDFA/NewDFA/Engine_NC.hs: -------------------------------------------------------------------------------- 1 | -- | This is the non-capturing form of Text.Regex.TDFA.NewDFA.String 2 | module Text.Regex.TDFA.NewDFA.Engine_NC(execMatch) where 3 | 4 | import Control.Monad(when,join,filterM) 5 | import Data.Array.Base(unsafeRead,unsafeWrite) 6 | import Prelude hiding ((!!)) 7 | 8 | import Data.Array.MArray(MArray(..)) 9 | import Data.Array.Unsafe(unsafeFreeze) 10 | import Data.Array.IArray(Ix) 11 | import Data.Array.ST(STArray,STUArray) 12 | import qualified Data.IntMap.CharMap2 as CMap(findWithDefault) 13 | import qualified Data.IntMap as IMap(null,toList,keys,member) 14 | import qualified Data.IntSet as ISet(toAscList) 15 | import Data.STRef(STRef,newSTRef,readSTRef,writeSTRef) 16 | import qualified Control.Monad.ST.Lazy as L(runST,strictToLazyST) 17 | import qualified Control.Monad.ST.Strict as S(ST) 18 | import Data.Sequence(Seq) 19 | import qualified Data.ByteString.Char8 as SBS(ByteString) 20 | import qualified Data.ByteString.Lazy.Char8 as LBS(ByteString) 21 | 22 | import Text.Regex.Base(MatchArray,MatchOffset,MatchLength) 23 | import qualified Text.Regex.TDFA.IntArrTrieSet as Trie(lookupAsc) 24 | import Text.Regex.TDFA.Common hiding (indent) 25 | import Text.Regex.TDFA.NewDFA.Uncons(Uncons(uncons)) 26 | import Text.Regex.TDFA.NewDFA.MakeTest(test_singleline,test_multiline) 27 | 28 | -- import Debug.Trace 29 | 30 | -- trace :: String -> a -> a 31 | -- trace _ a = a 32 | 33 | err :: String -> a 34 | err s = common_error "Text.Regex.TDFA.NewDFA.Engine_NC" s 35 | 36 | {-# INLINE (!!) #-} 37 | (!!) :: (MArray a e (S.ST s),Ix i) => a i e -> Int -> S.ST s e 38 | (!!) = unsafeRead 39 | {-# INLINE set #-} 40 | set :: (MArray a e (S.ST s),Ix i) => a i e -> Int -> e -> S.ST s () 41 | set = unsafeWrite 42 | 43 | {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> ([] Char) -> [MatchArray] #-} 44 | {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> (Seq Char) -> [MatchArray] #-} 45 | {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> SBS.ByteString -> [MatchArray] #-} 46 | {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> LBS.ByteString -> [MatchArray] #-} 47 | execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray] 48 | execMatch (Regex { regex_dfa = (DFA {d_id=didIn,d_dt=dtIn}) 49 | , regex_init = startState 50 | , regex_b_index = b_index 51 | , regex_trie = trie 52 | , regex_compOptions = CompOption { multiline = newline } } ) 53 | offsetIn prevIn inputIn = L.runST runCaptureGroup where 54 | 55 | !test = mkTest newline 56 | 57 | runCaptureGroup = {-# SCC "runCaptureGroup" #-} do 58 | obtainNext <- L.strictToLazyST constructNewEngine 59 | let loop = do vals <- L.strictToLazyST obtainNext 60 | if null vals -- force vals before defining valsRest 61 | then return [] 62 | else do valsRest <- loop 63 | return (vals ++ valsRest) 64 | loop 65 | 66 | constructNewEngine :: S.ST s (S.ST s [MatchArray]) 67 | constructNewEngine = {-# SCC "constructNewEngine" #-} do 68 | storeNext <- newSTRef undefined 69 | writeSTRef storeNext (goNext storeNext) 70 | let obtainNext = join (readSTRef storeNext) 71 | return obtainNext 72 | 73 | goNext storeNext = {-# SCC "goNext" #-} do 74 | (SScratch s1In s2In winQ) <- newScratch b_index 75 | set s1In startState offsetIn 76 | writeSTRef storeNext (err "obtainNext called while goNext is running!") 77 | eliminatedStateFlag <- newSTRef False 78 | let next s1 s2 did dt offset prev input = {-# SCC "goNext.next" #-} 79 | case dt of 80 | Testing' {dt_test=wt,dt_a=a,dt_b=b} -> 81 | if test wt offset prev input 82 | then next s1 s2 did a offset prev input 83 | else next s1 s2 did b offset prev input 84 | Simple' {dt_win=w,dt_trans=t, dt_other=o} 85 | | IMap.null w -> 86 | case uncons input of 87 | Nothing -> finalizeWinners 88 | Just (c,input') -> do 89 | case CMap.findWithDefault o c t of 90 | Transition {trans_many=DFA {d_id=did',d_dt=dt'},trans_how=dtrans} -> 91 | findTrans s1 s2 did' dt' dtrans offset c input' 92 | | otherwise -> do 93 | (did',dt') <- processWinner s1 did dt w offset 94 | next' s1 s2 did' dt' offset prev input 95 | 96 | next' s1 s2 did dt offset prev input = {-# SCC "goNext'.next" #-} 97 | case dt of 98 | Testing' {dt_test=wt,dt_a=a,dt_b=b} -> 99 | if test wt offset prev input 100 | then next' s1 s2 did a offset prev input 101 | else next' s1 s2 did b offset prev input 102 | Simple' {dt_trans=t, dt_other=o} -> 103 | case uncons input of 104 | Nothing -> finalizeWinners 105 | Just (c,input') -> do 106 | case CMap.findWithDefault o c t of 107 | Transition {trans_many=DFA {d_id=did',d_dt=dt'},trans_how=dtrans} -> 108 | findTrans s1 s2 did' dt' dtrans offset c input' 109 | 110 | findTrans s1 s2 did' dt' dtrans offset prev' input' = {-# SCC "goNext.findTrans" #-} do 111 | -- 112 | let findTransTo (destIndex,sources) = do 113 | val <- if IMap.null sources then return (succ offset) 114 | else return . minimum =<< mapM (s1 !!) (IMap.keys sources) 115 | set s2 destIndex val 116 | return val 117 | earlyStart <- fmap minimum $ mapM findTransTo (IMap.toList dtrans) 118 | -- 119 | earlyWin <- readSTRef (mq_earliest winQ) 120 | if earlyWin < earlyStart 121 | then do 122 | winnersR <- getMQ earlyStart winQ 123 | writeSTRef storeNext (next s2 s1 did' dt' (succ offset) prev' input') 124 | mapM wsToGroup (reverse winnersR) 125 | else do 126 | let offset' = succ offset in seq offset' $ next s2 s1 did' dt' offset' prev' input' 127 | 128 | processWinner s1 did dt w offset = {-# SCC "goNext.newWinnerThenProceed" #-} do 129 | let getStart (sourceIndex,_) = s1 !! sourceIndex 130 | vals <- mapM getStart (IMap.toList w) 131 | let low = minimum vals -- perhaps a non-empty winner 132 | high = maximum vals -- perhaps an empty winner 133 | if low < offset 134 | then do 135 | putMQ (WScratch low offset) winQ 136 | when (high==offset || IMap.member startState w) $ 137 | putMQ (WScratch offset offset) winQ 138 | let keepState i1 = do 139 | startsAt <- s1 !! i1 140 | let keep = (startsAt <= low) || (offset <= startsAt) 141 | if keep 142 | then return True 143 | else if i1 == startState 144 | then {- check for additional empty winner -} 145 | set s1 i1 (succ offset) >> return True 146 | else writeSTRef eliminatedStateFlag True >> return False 147 | states' <- filterM keepState (ISet.toAscList did) 148 | flag <- readSTRef eliminatedStateFlag 149 | if flag 150 | then do 151 | writeSTRef eliminatedStateFlag False 152 | let DFA {d_id=did',d_dt=dt'} = Trie.lookupAsc trie states' 153 | return (did',dt') 154 | else do 155 | return (did,dt) 156 | else do 157 | -- offset == low == minimum vals == maximum vals == high; vals == [offset] 158 | putMQ (WScratch offset offset) winQ 159 | return (did,dt) 160 | 161 | finalizeWinners = do 162 | winnersR <- readSTRef (mq_list winQ) 163 | resetMQ winQ 164 | writeSTRef storeNext (return []) 165 | mapM wsToGroup (reverse winnersR) 166 | 167 | -- goNext then ends with the next statement 168 | next s1In s2In didIn dtIn offsetIn prevIn inputIn 169 | 170 | ---- 171 | 172 | {-# INLINE mkTest #-} 173 | mkTest :: Uncons text => Bool -> WhichTest -> Index -> Char -> text -> Bool 174 | mkTest isMultiline = if isMultiline then test_multiline else test_singleline 175 | 176 | ---- 177 | 178 | {- MUTABLE WINNER QUEUE -} 179 | 180 | data MQ s = MQ { mq_earliest :: !(STRef s Position) 181 | , mq_list :: !(STRef s [WScratch]) 182 | } 183 | 184 | newMQ :: S.ST s (MQ s) 185 | newMQ = do 186 | earliest <- newSTRef maxBound 187 | list <- newSTRef [] 188 | return (MQ earliest list) 189 | 190 | resetMQ :: MQ s -> S.ST s () 191 | resetMQ (MQ {mq_earliest=earliest,mq_list=list}) = do 192 | writeSTRef earliest maxBound 193 | writeSTRef list [] 194 | 195 | putMQ :: WScratch -> MQ s -> S.ST s () 196 | putMQ ws@(WScratch {ws_start=start}) (MQ {mq_earliest=earliest,mq_list=list}) = do 197 | startE <- readSTRef earliest 198 | if start <= startE 199 | then writeSTRef earliest start >> writeSTRef list [ws] 200 | else do 201 | old <- readSTRef list 202 | let !rest = dropWhile (\ w -> start <= ws_start w) old 203 | !new = ws : rest 204 | writeSTRef list new 205 | 206 | getMQ :: Position -> MQ s -> S.ST s [WScratch] 207 | getMQ pos (MQ {mq_earliest=earliest,mq_list=list}) = do 208 | old <- readSTRef list 209 | case span (\ w -> pos <= ws_start w) old of 210 | ([],ans) -> do 211 | writeSTRef earliest maxBound 212 | writeSTRef list [] 213 | return ans 214 | (new,ans) -> do 215 | writeSTRef earliest (ws_start (last new)) 216 | writeSTRef list new 217 | return ans 218 | 219 | {- MUTABLE SCRATCH DATA STRUCTURES -} 220 | 221 | data SScratch s = SScratch { _s_1 :: !(MScratch s) 222 | , _s_2 :: !(MScratch s) 223 | , _s_mq :: !(MQ s) 224 | } 225 | type MScratch s = STUArray s Index Position 226 | data WScratch = WScratch {ws_start,_ws_stop :: !Position} 227 | deriving Show 228 | 229 | {- DEBUGGING HELPERS -} 230 | {- CREATING INITIAL MUTABLE SCRATCH DATA STRUCTURES -} 231 | 232 | {-# INLINE newA #-} 233 | newA :: (MArray (STUArray s) e (S.ST s)) => (Tag,Tag) -> e -> S.ST s (STUArray s Tag e) 234 | newA b_tags initial = newArray b_tags initial 235 | 236 | newScratch :: (Index,Index) -> S.ST s (SScratch s) 237 | newScratch b_index = do 238 | s1 <- newMScratch b_index 239 | s2 <- newMScratch b_index 240 | winQ <- newMQ 241 | return (SScratch s1 s2 winQ) 242 | 243 | newMScratch :: (Index,Index) -> S.ST s (MScratch s) 244 | newMScratch b_index = newA b_index (-1) 245 | 246 | {- CONVERT WINNERS TO MATCHARRAY -} 247 | 248 | wsToGroup :: WScratch -> S.ST s MatchArray 249 | wsToGroup (WScratch start stop) = do 250 | ma <- newArray (0,0) (start,stop-start) :: S.ST s (STArray s Int (MatchOffset,MatchLength)) 251 | unsafeFreeze ma 252 | 253 | -------------------------------------------------------------------------------- /lib/Text/Regex/TDFA/NewDFA/Engine_NC_FA.hs: -------------------------------------------------------------------------------- 1 | -- | This is the non-capturing form of Text.Regex.TDFA.NewDFA.String 2 | module Text.Regex.TDFA.NewDFA.Engine_NC_FA(execMatch) where 3 | 4 | import Control.Monad(unless) 5 | import Prelude hiding ((!!)) 6 | 7 | import Data.Array.MArray(MArray(..)) 8 | import Data.Array.Unsafe(unsafeFreeze) 9 | import Data.Array.ST(STArray) 10 | import qualified Data.IntMap.CharMap2 as CMap(findWithDefault) 11 | import qualified Data.IntMap as IMap(null) 12 | import qualified Data.IntSet as ISet(null) 13 | import qualified Data.Array.MArray() 14 | import Data.STRef(newSTRef,readSTRef,writeSTRef) 15 | import qualified Control.Monad.ST.Strict as S(ST,runST) 16 | import Data.Sequence(Seq) 17 | import qualified Data.ByteString.Char8 as SBS(ByteString) 18 | import qualified Data.ByteString.Lazy.Char8 as LBS(ByteString) 19 | 20 | import Text.Regex.Base(MatchArray,MatchOffset,MatchLength) 21 | import Text.Regex.TDFA.Common hiding (indent) 22 | import Text.Regex.TDFA.NewDFA.Uncons(Uncons(uncons)) 23 | import Text.Regex.TDFA.NewDFA.MakeTest(test_singleline) 24 | 25 | --import Debug.Trace 26 | 27 | -- trace :: String -> a -> a 28 | -- trace _ a = a 29 | 30 | {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> ([] Char) -> [MatchArray] #-} 31 | {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> (Seq Char) -> [MatchArray] #-} 32 | {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> SBS.ByteString -> [MatchArray] #-} 33 | {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> LBS.ByteString -> [MatchArray] #-} 34 | execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray] 35 | execMatch (Regex { regex_dfa = DFA {d_dt=dtIn} }) 36 | offsetIn _prevIn inputIn = S.runST goNext where 37 | 38 | test wt off input = test_singleline wt off '\n' input 39 | 40 | goNext = {-# SCC "goNext" #-} do 41 | winQ <- newSTRef Nothing 42 | let next dt offset input = {-# SCC "goNext.next" #-} 43 | case dt of 44 | Testing' {dt_test=wt,dt_a=a,dt_b=b} -> 45 | if test wt offset input 46 | then next a offset input 47 | else next b offset input 48 | Simple' {dt_win=w,dt_trans=t, dt_other=o} -> do 49 | unless (IMap.null w) $ 50 | writeSTRef winQ (Just offset) 51 | case uncons input of 52 | Nothing -> finalizeWinner 53 | Just (c,input') -> do 54 | case CMap.findWithDefault o c t of 55 | Transition {trans_single=DFA {d_id=did',d_dt=dt'}} 56 | | ISet.null did' -> finalizeWinner 57 | | otherwise -> 58 | let offset' = succ offset 59 | in seq offset' $ next dt' offset' input' 60 | 61 | finalizeWinner = do 62 | mWinner <- readSTRef winQ 63 | case mWinner of 64 | Nothing -> return [] 65 | Just winner -> mapM (makeGroup offsetIn) [winner] 66 | 67 | next dtIn offsetIn inputIn 68 | 69 | ---- 70 | 71 | {- CONVERT WINNERS TO MATCHARRAY -} 72 | 73 | makeGroup :: Position -> Position -> S.ST s MatchArray 74 | makeGroup start stop = do 75 | ma <- newArray (0,0) (start,stop-start) :: S.ST s (STArray s Int (MatchOffset,MatchLength)) 76 | unsafeFreeze ma 77 | -------------------------------------------------------------------------------- /lib/Text/Regex/TDFA/NewDFA/MakeTest.hs: -------------------------------------------------------------------------------- 1 | module Text.Regex.TDFA.NewDFA.MakeTest(test_singleline,test_multiline) where 2 | 3 | import qualified Data.IntSet as ISet(IntSet,member,fromAscList) 4 | import Text.Regex.TDFA.Common(WhichTest(..),Index) 5 | import Text.Regex.TDFA.NewDFA.Uncons(Uncons(uncons)) 6 | 7 | {-# INLINE test_singleline #-} 8 | {-# INLINE test_multiline #-} 9 | {-# INLINE test_common #-} 10 | test_singleline,test_multiline,test_common :: Uncons text => WhichTest -> Index -> Char -> text -> Bool 11 | test_multiline Test_BOL _off prev _input = prev == '\n' 12 | test_multiline Test_EOL _off _prev input = case uncons input of 13 | Nothing -> True 14 | Just (next,_) -> next == '\n' 15 | test_multiline test off prev input = test_common test off prev input 16 | 17 | test_singleline Test_BOL off _prev _input = off == 0 18 | test_singleline Test_EOL _off _prev input = case uncons input of 19 | Nothing -> True 20 | _ -> False 21 | test_singleline test off prev input = test_common test off prev input 22 | 23 | test_common Test_BOB off _prev _input = off==0 24 | test_common Test_EOB _off _prev input = case uncons input of 25 | Nothing -> True 26 | _ -> False 27 | test_common Test_BOW _off prev input = not (isWord prev) && case uncons input of 28 | Nothing -> False 29 | Just (c,_) -> isWord c 30 | test_common Test_EOW _off prev input = isWord prev && case uncons input of 31 | Nothing -> True 32 | Just (c,_) -> not (isWord c) 33 | test_common Test_EdgeWord _off prev input = 34 | if isWord prev 35 | then case uncons input of Nothing -> True 36 | Just (c,_) -> not (isWord c) 37 | else case uncons input of Nothing -> False 38 | Just (c,_) -> isWord c 39 | test_common Test_NotEdgeWord _off prev input = not (test_common Test_EdgeWord _off prev input) 40 | 41 | test_common Test_BOL _ _ _ = undefined 42 | test_common Test_EOL _ _ _ = undefined 43 | 44 | isWord :: Char -> Bool 45 | isWord c = ISet.member (fromEnum c) wordSet 46 | where wordSet :: ISet.IntSet 47 | wordSet = ISet.fromAscList . map fromEnum $ "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz" 48 | -------------------------------------------------------------------------------- /lib/Text/Regex/TDFA/NewDFA/Tester.hs: -------------------------------------------------------------------------------- 1 | -- | Like Engine, but merely checks to see whether any match at all is found. 2 | -- 3 | module Text.Regex.TDFA.NewDFA.Tester(matchTest) where 4 | 5 | import qualified Data.IntMap.CharMap2 as CMap(findWithDefault) 6 | import qualified Data.IntMap as IMap(null) 7 | import qualified Data.IntSet as ISet(null) 8 | 9 | import Data.Sequence(Seq) 10 | import qualified Data.ByteString.Char8 as SBS(ByteString) 11 | import qualified Data.ByteString.Lazy.Char8 as LBS(ByteString) 12 | 13 | import Text.Regex.Base() 14 | import Text.Regex.TDFA.Common hiding (indent) 15 | import Text.Regex.TDFA.NewDFA.Uncons (Uncons(uncons)) 16 | import Text.Regex.TDFA.NewDFA.MakeTest(test_singleline,test_multiline) 17 | 18 | {-# SPECIALIZE matchTest :: Regex -> ([] Char) -> Bool #-} 19 | {-# SPECIALIZE matchTest :: Regex -> (Seq Char) -> Bool #-} 20 | {-# SPECIALIZE matchTest :: Regex -> SBS.ByteString -> Bool #-} 21 | {-# SPECIALIZE matchTest :: Regex -> LBS.ByteString -> Bool #-} 22 | matchTest :: Uncons text => Regex -> text -> Bool 23 | matchTest (Regex { regex_dfa = dfaIn 24 | , regex_isFrontAnchored = ifa } ) 25 | inputIn = ans where 26 | 27 | ans = case ifa of 28 | True -> single0 (d_dt dfaIn) inputIn 29 | False -> multi0 (d_dt dfaIn) inputIn 30 | 31 | multi0 (Testing' {dt_test=wt,dt_a=a,dt_b=b}) input = 32 | if test0 wt input 33 | then multi0 a input 34 | else multi0 b input 35 | multi0 (Simple' {dt_win=w,dt_trans=t, dt_other=o}) input 36 | | IMap.null w = 37 | case uncons input of 38 | Nothing -> False 39 | Just (c,input') -> 40 | case CMap.findWithDefault o c t of 41 | Transition {trans_many=DFA {d_dt=dt'}} -> multi dt' c input' 42 | | otherwise = True 43 | 44 | multi (Testing' {dt_test=wt,dt_a=a,dt_b=b}) prev input = 45 | if test wt prev input 46 | then multi a prev input 47 | else multi b prev input 48 | multi (Simple' {dt_win=w,dt_trans=t, dt_other=o}) _prev input 49 | | IMap.null w = 50 | case uncons input of 51 | Nothing -> False 52 | Just (c,input') -> 53 | case CMap.findWithDefault o c t of 54 | Transition {trans_many=DFA {d_dt=dt'}} -> multi dt' c input' 55 | | otherwise = True 56 | 57 | single0 (Testing' {dt_test=wt,dt_a=a,dt_b=b}) input = 58 | if testFA0 wt input 59 | then single0 a input 60 | else single0 b input 61 | single0 (Simple' {dt_win=w,dt_trans=t, dt_other=o}) input 62 | | IMap.null w = 63 | case uncons input of 64 | Nothing -> False 65 | Just (c,input') -> 66 | case CMap.findWithDefault o c t of 67 | Transition {trans_single=DFA {d_id=did',d_dt=dt'}} 68 | | ISet.null did' -> False 69 | | otherwise -> single dt' c input' 70 | | otherwise = True 71 | 72 | single (Testing' {dt_test=wt,dt_a=a,dt_b=b}) prev input = 73 | if testFA wt prev input 74 | then single a prev input 75 | else single b prev input 76 | single (Simple' {dt_win=w,dt_trans=t, dt_other=o}) _prev input 77 | | IMap.null w = 78 | case uncons input of 79 | Nothing -> False 80 | Just (c,input') -> 81 | case CMap.findWithDefault o c t of 82 | Transition {trans_single=DFA {d_id=did',d_dt=dt'}} 83 | | ISet.null did' -> False 84 | | otherwise -> single dt' c input' 85 | | otherwise = True 86 | 87 | {-# INLINE testFA0 #-} 88 | testFA0 :: Uncons text => WhichTest -> text -> Bool 89 | testFA0 wt text = test_singleline wt 0 '\n' text 90 | 91 | {-# INLINE testFA #-} 92 | testFA :: Uncons text => WhichTest -> Char -> text -> Bool 93 | testFA wt prev text = test_singleline wt 1 prev text 94 | 95 | {-# INLINE test0 #-} 96 | test0 :: Uncons text => WhichTest -> text -> Bool 97 | test0 wt input = test_multiline wt 0 '\n' input 98 | 99 | {-# INLINE test #-} 100 | test :: Uncons text => WhichTest -> Char -> text -> Bool 101 | test wt prev input = test_multiline wt 1 prev input 102 | -------------------------------------------------------------------------------- /lib/Text/Regex/TDFA/NewDFA/Uncons.hs: -------------------------------------------------------------------------------- 1 | module Text.Regex.TDFA.NewDFA.Uncons(Uncons(uncons)) where 2 | 3 | import qualified Data.ByteString.Char8 as SBS(ByteString,uncons) 4 | import qualified Data.ByteString.Lazy.Char8 as LBS(ByteString,uncons) 5 | import Data.Sequence(Seq,viewl,ViewL(EmptyL,(:<))) 6 | import qualified Data.Text as T 7 | import qualified Data.Text.Lazy as TL 8 | 9 | class Uncons a where 10 | {- INLINE uncons #-} 11 | uncons :: a -> Maybe (Char,a) 12 | 13 | instance Uncons ([] Char) where 14 | {- INLINE uncons #-} 15 | uncons [] = Nothing 16 | uncons (x:xs) = Just (x,xs) 17 | 18 | instance Uncons (Seq Char) where 19 | {- INLINE uncons #-} 20 | uncons s = case viewl s of 21 | EmptyL -> Nothing 22 | x :< xs -> Just (x,xs) 23 | 24 | instance Uncons SBS.ByteString where 25 | {- INLINE uncons #-} 26 | uncons = SBS.uncons 27 | 28 | instance Uncons LBS.ByteString where 29 | {- INLINE uncons #-} 30 | uncons = LBS.uncons 31 | 32 | -- | @since 1.3.1 33 | instance Uncons T.Text where 34 | {- INLINE uncons #-} 35 | uncons = T.uncons 36 | 37 | -- | @since 1.3.1 38 | instance Uncons TL.Text where 39 | {- INLINE uncons #-} 40 | uncons = TL.uncons 41 | -------------------------------------------------------------------------------- /lib/Text/Regex/TDFA/Pattern.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -fno-warn-incomplete-uni-patterns #-} 2 | 3 | -- | This "Text.Regex.TDFA.Pattern" module provides the 'Pattern' data 4 | -- type and its subtypes. This 'Pattern' type is used to represent 5 | -- the parsed form of a regular expression. 6 | 7 | module Text.Regex.TDFA.Pattern 8 | (Pattern(..) 9 | ,PatternSet(..) 10 | ,PatternSetCharacterClass(..) 11 | ,PatternSetCollatingElement(..) 12 | ,PatternSetEquivalenceClass(..) 13 | ,GroupIndex 14 | ,DoPa(..) 15 | ,decodeCharacterClass, decodePatternSet 16 | ,showPattern 17 | -- ** Internal use 18 | ,starTrans 19 | -- ** Internal use, operations to support debugging under @ghci@ 20 | ,starTrans',simplify',dfsPattern 21 | ) where 22 | 23 | {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -} 24 | 25 | import Data.List(intersperse,partition) 26 | import qualified Data.Set as Set 27 | import Data.Set (Set) 28 | import Text.Regex.TDFA.Common(DoPa(..),GroupIndex,common_error) 29 | 30 | err :: String -> a 31 | err = common_error "Text.Regex.TDFA.Pattern" 32 | 33 | -- | 'Pattern' is the type returned by the regular expression parser 'parseRegex'. 34 | -- This is consumed by the "Text.Regex.TDFA.CorePattern" module and the tender leaves 35 | -- are nibbled by the "Text.Regex.TDFA.TNFA" module. 36 | -- 37 | -- The 'DoPa' field is the index of the component in the regex string @r@. 38 | data Pattern 39 | = PEmpty 40 | -- ^ @()@, matches the empty string. 41 | | PGroup (Maybe GroupIndex) Pattern 42 | -- ^ Group @(r)@. @Nothing@ indicates non-matching 'PGroup' 43 | -- (never produced by parser 'parseRegex'). 44 | | POr [Pattern] 45 | -- ^ Alternative @r|s@ (flattened by 'starTrans'). 46 | | PConcat [Pattern] 47 | -- ^ Sequence @rs@ (flattened by 'starTrans'). 48 | | PQuest Pattern 49 | -- ^ Zero or one repetitions @r?@ (eliminated by 'starTrans'). 50 | | PPlus Pattern 51 | -- ^ One or more repetitions @r+@ (eliminated by 'starTrans'). 52 | | PStar Bool Pattern 53 | -- ^ Zero or more repetitions @r*@. 54 | -- @True@ (default) means may accept the empty string on its first iteration. 55 | | PBound Int (Maybe Int) Pattern 56 | -- ^ Given number or repetitions @r{n}@ or @r{n,m}@ 57 | -- (eliminated by 'starTrans'). 58 | 59 | -- The rest of these need an index of where in the regex string it is from 60 | | PCarat { getDoPa :: DoPa } 61 | -- ^ @^@ matches beginning of input. 62 | | PDollar { getDoPa :: DoPa } 63 | -- ^ @$@ matches end of input. 64 | 65 | -- The following test and accept a single character 66 | | PDot { getDoPa :: DoPa } 67 | -- ^ @.@ matches any character. 68 | | PAny { getDoPa :: DoPa, getPatternSet :: PatternSet } 69 | -- ^ Bracket expression @[...]@. 70 | | PAnyNot { getDoPa :: DoPa, getPatternSet :: PatternSet } 71 | -- ^ Inverted bracket expression @[^...]@. 72 | | PEscape { getDoPa :: DoPa, getPatternChar :: Char } 73 | -- ^ Backslashed character @\c@, may have special meaning. 74 | | PChar { getDoPa :: DoPa, getPatternChar :: Char } 75 | -- ^ Single character, matches given character. 76 | 77 | -- The following are semantic tags created in starTrans, not the parser 78 | | PNonCapture Pattern 79 | -- ^ Tag for internal use, introduced by 'starTrans'. 80 | | PNonEmpty Pattern 81 | -- ^ Tag for internal use, introduced by 'starTrans'. 82 | deriving (Eq, Show) 83 | 84 | -- Andreas Abel, 2022-07-18, issue #47: 85 | -- The following claim is FALSE: 86 | -- 87 | -- I have not been checking, but this should have the property that 88 | -- parsing the resulting string should result in an identical 'Pattern'. 89 | -- This is not true if 'starTrans' has created 'PNonCapture' and 'PNonEmpty' 90 | -- values or a @'PStar' False@. The contents of a @[...]@ grouping are 91 | -- always shown in a sorted canonical order. 92 | showPattern :: Pattern -> String 93 | showPattern pIn = 94 | case pIn of 95 | PEmpty -> "()" 96 | PGroup _ p -> paren (showPattern p) 97 | POr ps -> concat $ intersperse "|" (map showPattern ps) 98 | PConcat ps -> concatMap showPattern ps 99 | PQuest p -> (showPattern p)++"?" 100 | PPlus p -> (showPattern p)++"+" 101 | -- If PStar has mayFirstBeNull False then reparsing will forget this flag 102 | PStar _ p -> (showPattern p)++"*" 103 | PBound i (Just j) p | i==j -> showPattern p ++ ('{':show i)++"}" 104 | PBound i mj p -> showPattern p ++ ('{':show i) ++ maybe ",}" (\j -> ',':show j++"}") mj 105 | -- 106 | PCarat _ -> "^" 107 | PDollar _ -> "$" 108 | PDot _ -> "." 109 | PAny _ ps -> ('[':show ps)++"]" 110 | PAnyNot _ ps -> ('[':'^':show ps)++"]" 111 | PEscape _ c -> '\\':c:[] 112 | PChar _ c -> [c] 113 | -- The following were not directly from the parser, and will not be parsed in properly 114 | PNonCapture p -> showPattern p 115 | PNonEmpty p -> showPattern p 116 | where {- 117 | groupRange x n (y:ys) = if (fromEnum y)-(fromEnum x) == n then groupRange x (succ n) ys 118 | else (if n <=3 then take n [x..] 119 | else x:'-':(toEnum (pred n+fromEnum x)):[]) ++ groupRange y 1 ys 120 | groupRange x n [] = if n <=3 then take n [x..] 121 | else x:'-':(toEnum (pred n+fromEnum x)):[] 122 | -} 123 | paren s = ('(':s)++")" 124 | 125 | -- | Content of a bracket expression @[...]@ organized into 126 | -- characters, 127 | -- POSIX character classes (e.g. @[[:alnum:]]@), 128 | -- collating elements (e.g. @[.ch.]@, unused), and 129 | -- equivalence classes (e.g. @[=a=]@, treated as characters). 130 | -- 131 | data PatternSet = PatternSet (Maybe (Set Char)) 132 | (Maybe (Set PatternSetCharacterClass)) 133 | (Maybe (Set PatternSetCollatingElement)) 134 | (Maybe (Set PatternSetEquivalenceClass)) 135 | deriving (Eq) 136 | 137 | -- | Hand-rolled implementation, giving textual rather than Haskell representation. 138 | instance Show PatternSet where 139 | showsPrec i (PatternSet s scc sce sec) = 140 | let (special,normal) = maybe ("","") ((partition (`elem` "]-")) . Set.toAscList) s 141 | charSpec = (if ']' `elem` special then (']':) else id) (byRange normal) 142 | scc' = maybe "" ((concatMap show) . Set.toList) scc 143 | sce' = maybe "" ((concatMap show) . Set.toList) sce 144 | sec' = maybe "" ((concatMap show) . Set.toList) sec 145 | in shows charSpec 146 | . showsPrec i scc' . showsPrec i sce' . showsPrec i sec' 147 | . if '-' `elem` special then showChar '-' else id 148 | where byRange xAll@(~(x:xs)) 149 | | length xAll <=3 = xAll 150 | | otherwise = groupRange x 1 xs 151 | groupRange x n (y:ys) = if (fromEnum y)-(fromEnum x) == n then groupRange x (succ n) ys 152 | else (if n <=3 then take n [x..] 153 | else x:'-':(toEnum (pred n+fromEnum x)):[]) ++ groupRange y 1 ys 154 | groupRange x n [] = if n <=3 then take n [x..] 155 | else x:'-':(toEnum (pred n+fromEnum x)):[] 156 | 157 | -- | Content of @[: :]@, e.g. @"alnum"@ for @[:alnum:]@. 158 | newtype PatternSetCharacterClass = PatternSetCharacterClass {unSCC::String} 159 | deriving (Eq,Ord) 160 | 161 | -- | Content of @[. .]@, e.g. @"ch"@ for @[.ch.]@. 162 | newtype PatternSetCollatingElement = PatternSetCollatingElement {unSCE::String} 163 | deriving (Eq,Ord) 164 | 165 | -- | Content of @[= =]@, e.g. @"a"@ for @[=a=]@. 166 | newtype PatternSetEquivalenceClass = PatternSetEquivalenceClass {unSEC::String} 167 | deriving (Eq,Ord) 168 | 169 | -- | Hand-rolled implementation, giving textual rather than Haskell representation. 170 | instance Show PatternSetCharacterClass where 171 | showsPrec _ p = showChar '[' . showChar ':' . shows (unSCC p) . showChar ':' . showChar ']' 172 | 173 | -- | Hand-rolled implementation, giving textual rather than Haskell representation. 174 | instance Show PatternSetCollatingElement where 175 | showsPrec _ p = showChar '[' . showChar '.' . shows (unSCE p) . showChar '.' . showChar ']' 176 | 177 | -- | Hand-rolled implementation, giving textual rather than Haskell representation. 178 | instance Show PatternSetEquivalenceClass where 179 | showsPrec _ p = showChar '[' . showChar '=' . shows (unSEC p) . showChar '=' . showChar ']' 180 | 181 | -- | @decodePatternSet@ cannot handle collating element and treats 182 | -- equivalence classes as just their definition and nothing more. 183 | -- 184 | -- @since 1.3.2 185 | decodePatternSet :: PatternSet -> Set Char 186 | decodePatternSet (PatternSet msc mscc _ msec) = 187 | let baseMSC = maybe Set.empty id msc 188 | withMSCC = foldl (flip Set.insert) baseMSC (maybe [] (concatMap decodeCharacterClass . Set.toAscList) mscc) 189 | withMSEC = foldl (flip Set.insert) withMSCC (maybe [] (concatMap unSEC . Set.toAscList) msec) 190 | in withMSEC 191 | 192 | -- | This returns the strictly ascending list of characters 193 | -- represented by @[: :]@ POSIX character classes. 194 | -- Unrecognized class names return an empty string. 195 | -- 196 | -- @since 1.3.2 197 | decodeCharacterClass :: PatternSetCharacterClass -> String 198 | decodeCharacterClass (PatternSetCharacterClass s) = 199 | case s of 200 | "alnum" -> ['0'..'9']++['A'..'Z']++['a'..'z'] 201 | "digit" -> ['0'..'9'] 202 | "punct" -> ['\33'..'\47']++['\58'..'\64']++['\91'..'\96']++['\123'..'\126'] 203 | "alpha" -> ['A'..'Z']++['a'..'z'] 204 | "graph" -> ['\33'..'\126'] 205 | "space" -> "\t\n\v\f\r " 206 | "blank" -> "\t " 207 | "lower" -> ['a'..'z'] 208 | "upper" -> ['A'..'Z'] 209 | "cntrl" -> ['\0'..'\31']++"\127" -- with NUL 210 | "print" -> ['\32'..'\126'] 211 | "xdigit" -> ['0'..'9']++['A'..'F']++['a'..'f'] 212 | "word" -> ['0'..'9']++['A'..'Z']++"_"++['a'..'z'] 213 | _ -> [] 214 | 215 | -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == 216 | 217 | -- | Do the transformation and simplification in a single traversal. 218 | -- This removes the 'PPlus', 'PQuest', and 'PBound' values, changing to 'POr' 219 | -- and 'PEmpty' and 'PStar'. For some 'PBound' values it adds 220 | -- 'PNonEmpty' and 'PNonCapture' semantic marker. It also simplifies to 221 | -- flatten out nested 'POr' and 'PConcat' instances and eliminate some 222 | -- unneeded 'PEmpty' values. 223 | starTrans :: Pattern -> Pattern 224 | starTrans = dfsPattern (simplify' . starTrans') 225 | 226 | -- | Apply a 'Pattern' transformation function depth first. 227 | dfsPattern :: (Pattern -> Pattern) -- ^ The transformation function. 228 | -> Pattern -- ^ The 'Pattern' to transform. 229 | -> Pattern -- ^ The transformed 'Pattern'. 230 | dfsPattern f = dfs 231 | where unary c = f . c . dfs 232 | dfs pattern = case pattern of 233 | POr ps -> f (POr (map dfs ps)) 234 | PConcat ps -> f (PConcat (map dfs ps)) 235 | PGroup i p -> unary (PGroup i) p 236 | PQuest p -> unary PQuest p 237 | PPlus p -> unary PPlus p 238 | PStar i p -> unary (PStar i) p 239 | PBound i mi p -> unary (PBound i mi) p 240 | _ -> f pattern 241 | 242 | {- Replace by PNonCapture 243 | unCapture = dfsPattern unCapture' where 244 | unCapture' (PGroup (Just _) p) = PGroup Nothing p 245 | unCapture' x = x 246 | -} 247 | reGroup :: Pattern -> Pattern 248 | reGroup p@(PConcat xs) | 2 <= length xs = PGroup Nothing p 249 | reGroup p@(POr xs) | 2 <= length xs = PGroup Nothing p 250 | reGroup p = p 251 | 252 | starTrans' :: Pattern -> Pattern 253 | starTrans' pIn = 254 | case pIn of -- We know that "p" has been simplified in each of these cases: 255 | PQuest p -> POr [p,PEmpty] 256 | 257 | {- The PStar should not capture 0 characters on its first iteration, 258 | so set its mayFirstBeNull flag to False 259 | -} 260 | PPlus p | canOnlyMatchNull p -> p 261 | | otherwise -> asGroup $ PConcat [reGroup p,PStar False p] 262 | 263 | {- "An ERE matching a single character repeated by an '*' , '?' , or 264 | an interval expression shall not match a null expression unless 265 | this is the only match for the repetition or it is necessary to 266 | satisfy the exact or minimum number of occurrences for the interval 267 | expression." 268 | -} 269 | {- p? is p|PEmpty which prefers even a 0-character match for p 270 | p{0,1} is p? is POr [p,PEmpty] 271 | p{0,2} is (pp?)? NOT p?p? 272 | p{0,3} is (p(pp?)?)? 273 | p{1,2} is like pp{0,1} is like pp? but see below 274 | p{2,5} is ppp{0,3} is pp(p(pp?)?)? 275 | 276 | But this is not always right. Because if the second use of p in 277 | p?p? matches 0 characters then the perhaps non 0 character match of 278 | the first p is overwritten. 279 | 280 | We need a new operation "p!" that means "p?" unless "p" match 0 281 | characters, in which case skip p as if it failed in "p?". Thus 282 | when p cannot accept 0 characters p! and p? are equivalent. And 283 | when p can only match 0 characters p! is PEmpty. So for 284 | simplicity, only use ! when p can match 0 characters but not only 0 285 | characters. 286 | 287 | Call this (PNonEmpty p) in the Pattern type. 288 | p! is PNonEmpty p is POr [PEmpty,p] 289 | IS THIS TRUE? Use QuickCheck? 290 | 291 | Note that if p cannot match 0 characters then p! is p? and vice versa 292 | 293 | The p{0,1} is still always p? and POr [p,PEmpty] 294 | Now p{0,2} means p?p! or (pp!)? and p{0,3} means (p(pp!)!)? or p?p!p! 295 | Equivalently p?p! and p?p!p! 296 | And p{2,2} is p'p and p{3,3} is p'p'p and p{4} is p'p'p'p 297 | The p{1,2} is pp! and p{1,3} is pp!p! or p(pp!)! 298 | And p{2,4} means p'pp!p! and p{3,6} is p'p'pp!p!p! or p'p'p(p(pp!)!)! 299 | 300 | But this second form still has a problem: the (pp!)! can have the first 301 | p match 0 and the second p match non-zero. This showed up for (.|$){1,3} 302 | since ($.!)! should not be a valid path but altered the qt_win commands. 303 | 304 | Thus only p'p'pp!p!p! has the right semantics. For completeness: 305 | 306 | if p can only match only 0 characters then the cases are 307 | p{0,0} is (), p{0,_} = p?, p{_,_} is p 308 | 309 | if p can match 0 or non-zero characters then cases are 310 | p{0,0} is (), p{0,1} is (p)?, p{0,2} is (pp!)?, p{0,3} is (pp!p!)? 311 | p{1,1} is p, p{1,2} is pp!, p{1,3} is pp!p!, p{1,4} is pp!p!p! 312 | p{2,2} is p'p, 313 | p{2,3} is p'pp!, 314 | p{2,4} is p'pp!p! or p'p(pp!)! 315 | p{2,5} is p'pp!p!p! or p'p(p(pp!)!)! 316 | p{3,3} is p'p'p, p{3,4} is p'p'pp!, p{3,5} is p'p'pp!p!, p{3,6} is p'p'pp!p!p! 317 | 318 | if p can only match 1 or more characters then cases are 319 | p{0,0} is () 320 | p{0,1} is p?, p{0,2} is (pp?)?, p{0,3} is (p(pp?)?)?, p{0,4} is (pp{0,3})? 321 | p{1,1} is p, p{1,j} is pp{0,pred j} 322 | p{2,2} is p'p, p{2,3} is p'pp?, p{2,4} is p'p(pp?)?, p{2,5} = p'p{1,4} = p'(pp{0,3}) 323 | p{3,3} is p'p'p, p{3,4} is p'p'pp?, p{3,5} is p'p'p(pp?)?, p{3,6} is 324 | 325 | And by this logic, the PStar False is really p*! So p{0,} is p* 326 | and p{1,} is pp*! and p{2,} is p'pp*! and p{3,} is p'p'pp*! 327 | 328 | The (nonEmpty' p) below is the only way PNonEmpty is introduced 329 | into the Pattern. It is always preceded by p inside a PConcat 330 | list. The p involved never simplifies to PEmpty. Thus it is 331 | impossible to have PNonEmpty directly nested, i.e. (PNonEmpty 332 | (PNonEmpty _)) never occurs even after simplifications. 333 | 334 | The (nonCapture' p) below is the only way PNonCapture is 335 | introduced into the Pattern. It is always followed by p inside a 336 | PConcat list. 337 | 338 | -} 339 | -- Easy cases 340 | PBound i _ _ | i<0 -> PEmpty -- impossibly malformed 341 | PBound i (Just j) _ | i>j -> PEmpty -- impossibly malformed 342 | PBound _ (Just 0) _ -> PEmpty 343 | -- Medium cases 344 | PBound 0 Nothing p | canOnlyMatchNull p -> quest p 345 | | otherwise -> PStar True p 346 | PBound 0 (Just 1) p -> quest p 347 | -- Hard cases 348 | PBound i Nothing p | canOnlyMatchNull p -> p 349 | | otherwise -> asGroup . PConcat $ apply (nc'p:) (pred i) [reGroup p,PStar False p] 350 | where nc'p = nonCapture' p 351 | PBound 0 (Just j) p | canOnlyMatchNull p -> quest p 352 | -- The first operation is quest NOT nonEmpty. This can be tested with 353 | -- "a\nb" "((^)?|b){0,3}" and "a\nb" "((^)|b){0,3}" 354 | | otherwise -> quest . (concat' p) $ 355 | apply (nonEmpty' . (concat' p)) (j-2) (nonEmpty' p) 356 | {- 0.99.6 remove 357 | | cannotMatchNull p -> apply (quest' . (concat' p)) (pred j) (quest' p) 358 | | otherwise -> POr [ simplify' (PConcat (p : replicate (pred j) (nonEmpty' p))) , PEmpty ] 359 | -} 360 | {- 0.99.6 add, 0.99.7 remove 361 | PBound i (Just j) p | canOnlyMatchNull p -> p 362 | | i == j -> PConcat $ apply (p':) (pred i) [p] 363 | | otherwise -> PConcat $ apply (p':) (pred i) 364 | [p,apply (nonEmpty' . (concat' p)) (j-i-1) (nonEmpty' p) ] 365 | where p' = nonCapture' p 366 | -} 367 | {- 0.99.7 add -} 368 | PBound i (Just j) p | canOnlyMatchNull p -> p 369 | | i == j -> asGroup . PConcat $ apply (nc'p:) (pred i) [reGroup p] 370 | | otherwise -> asGroup . PConcat $ apply (nc'p:) (pred i) 371 | [reGroup p,apply (nonEmpty' . (concat' p)) (j-i-1) (ne'p) ] 372 | where nc'p = nonCapture' p 373 | ne'p = nonEmpty' p 374 | {- 0.99.6 375 | | cannotMatchNull p -> PConcat $ apply (p':) (pred i) $ (p:) $ 376 | [apply (quest' . (concat' p)) (pred (j-i)) (quest' p)] 377 | | otherwise -> PConcat $ (replicate (pred i) p') ++ p : (replicate (j-i) (nonEmpty' p)) 378 | -} 379 | PStar mayFirstBeNull p | canOnlyMatchNull p -> if mayFirstBeNull then quest p 380 | else PEmpty 381 | | otherwise -> pass 382 | -- Left intact 383 | PEmpty -> pass 384 | PGroup {} -> pass 385 | POr {} -> pass 386 | PConcat {} -> pass 387 | PCarat {} -> pass 388 | PDollar {} -> pass 389 | PDot {} -> pass 390 | PAny {} -> pass 391 | PAnyNot {} -> pass 392 | PEscape {} -> pass 393 | PChar {} -> pass 394 | PNonCapture {} -> pass 395 | PNonEmpty {} -> pass -- TODO : remove PNonEmpty from program 396 | where 397 | quest = (\ p -> POr [p,PEmpty]) -- require p to have been simplified 398 | -- quest' = (\ p -> simplify' $ POr [p,PEmpty]) -- require p to have been simplified 399 | concat' a b = simplify' $ PConcat [reGroup a,reGroup b] -- require a and b to have been simplified 400 | nonEmpty' = (\ p -> simplify' $ POr [PEmpty,p]) -- 2009-01-19 : this was PNonEmpty 401 | nonCapture' = PNonCapture 402 | apply f n x = foldr ($) x (replicate n f) -- function f applied n times to x : f^n(x) 403 | asGroup p = PGroup Nothing (simplify' p) 404 | pass = pIn 405 | 406 | -- | Function to transform a pattern into an equivalent, but less 407 | -- redundant form. Nested 'POr' and 'PConcat' are flattened. 'PEmpty' 408 | -- is propagated. 409 | simplify' :: Pattern -> Pattern 410 | simplify' x@(POr _) = 411 | let ps' = case span notPEmpty (flatten x) of 412 | (notEmpty,[]) -> notEmpty 413 | (notEmpty,_:rest) -> notEmpty ++ (PEmpty:filter notPEmpty rest) -- keep 1st PEmpty only 414 | in case ps' of 415 | [] -> PEmpty 416 | [p] -> p 417 | _ -> POr ps' 418 | simplify' x@(PConcat _) = 419 | let ps' = filter notPEmpty (flatten x) 420 | in case ps' of 421 | [] -> PEmpty 422 | [p] -> p 423 | _ -> PConcat ps' -- PConcat ps' 424 | simplify' (PStar _ PEmpty) = PEmpty 425 | simplify' (PNonCapture PEmpty) = PEmpty -- 2009, perhaps useful 426 | --simplify' (PNonEmpty PEmpty) = err "simplify' (PNonEmpty PEmpty) = should be Impossible!" -- 2009 427 | simplify' other = other 428 | 429 | -- | Function to flatten nested 'POr' or nested 'PConcat' applicataions. 430 | flatten :: Pattern -> [Pattern] 431 | flatten (POr ps) = (concatMap (\x -> case x of 432 | POr ps' -> ps' 433 | p -> [p]) ps) 434 | flatten (PConcat ps) = (concatMap (\x -> case x of 435 | PConcat ps' -> ps' 436 | p -> [p]) ps) 437 | flatten _ = err "flatten can only be applied to POr or PConcat" 438 | 439 | notPEmpty :: Pattern -> Bool 440 | notPEmpty PEmpty = False 441 | notPEmpty _ = True 442 | 443 | -- | Determines if 'Pattern' will fail or accept @[]@ and never accept any 444 | -- characters. Treat 'PCarat' and 'PDollar' as @True@. 445 | canOnlyMatchNull :: Pattern -> Bool 446 | canOnlyMatchNull pIn = 447 | case pIn of 448 | PEmpty -> True 449 | PGroup _ p -> canOnlyMatchNull p 450 | POr ps -> all canOnlyMatchNull ps 451 | PConcat ps -> all canOnlyMatchNull ps 452 | PQuest p -> canOnlyMatchNull p 453 | PPlus p -> canOnlyMatchNull p 454 | PStar _ p -> canOnlyMatchNull p 455 | PBound _ (Just 0) _ -> True 456 | PBound _ _ p -> canOnlyMatchNull p 457 | PCarat _ -> True 458 | PDollar _ -> True 459 | PNonCapture p -> canOnlyMatchNull p 460 | -- PNonEmpty p -> canOnlyMatchNull p -- like PQuest 461 | _ ->False 462 | 463 | {- 464 | 465 | -- | If 'cannotMatchNull' returns 'True' then it is known that the 466 | -- 'Pattern' will never accept an empty string. If 'cannotMatchNull' 467 | -- returns 'False' then it is possible but not definite that the 468 | -- 'Pattern' could accept an empty string. 469 | cannotMatchNull :: Pattern -> Bool 470 | cannotMatchNull pIn = 471 | case pIn of 472 | PEmpty -> False 473 | PGroup _ p -> cannotMatchNull p 474 | POr [] -> False 475 | POr ps -> all cannotMatchNull ps 476 | PConcat [] -> False 477 | PConcat ps -> any cannotMatchNull ps 478 | PQuest _ -> False 479 | PPlus p -> cannotMatchNull p 480 | PStar {} -> False 481 | PBound 0 _ _ -> False 482 | PBound _ _ p -> cannotMatchNull p 483 | PCarat _ -> False 484 | PDollar _ -> False 485 | PNonCapture p -> cannotMatchNull p 486 | -- PNonEmpty _ -> False -- like PQuest 487 | _ -> True 488 | -} 489 | -------------------------------------------------------------------------------- /lib/Text/Regex/TDFA/ReadRegex.hs: -------------------------------------------------------------------------------- 1 | -- | This is a POSIX version of parseRegex that allows NUL characters. 2 | -- Lazy\/Possessive\/Backrefs are not recognized. Anchors \^ and \$ are 3 | -- recognized. 4 | -- 5 | -- A 'PGroup' returned always has @(Maybe 'GroupIndex')@ set to @(Just _)@ 6 | -- and never to @Nothing@. 7 | 8 | module Text.Regex.TDFA.ReadRegex (parseRegex) where 9 | 10 | {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -} 11 | 12 | import Text.Regex.TDFA.Pattern {- all -} 13 | import Text.ParserCombinators.Parsec((<|>), (), 14 | try, runParser, many, getState, setState, CharParser, ParseError, 15 | sepBy1, option, notFollowedBy, many1, lookAhead, eof, between, 16 | string, noneOf, digit, char, anyChar) 17 | 18 | import Control.Monad (liftM, guard) 19 | 20 | import Data.Foldable (asum) 21 | import qualified Data.Set as Set(fromList) 22 | 23 | -- | An element inside @[...]@, denoting a character class. 24 | data BracketElement 25 | = BEChar Char -- ^ A single character. 26 | | BERange Char Char -- ^ A character range (e.g. @a-z@). 27 | | BEColl String -- ^ @foo@ in @[.foo.]@. 28 | | BEEquiv String -- ^ @bar@ in @[=bar=]@. 29 | | BEClass String -- ^ A POSIX character class (candidate), e.g. @alpha@ parsed from @[:alpha:]@. 30 | 31 | -- | Return either an error message or a tuple of the Pattern and the 32 | -- largest group index and the largest DoPa index (both have smallest 33 | -- index of 1). Since the regular expression is supplied as [Char] it 34 | -- automatically supports unicode and @\\NUL@ characters. 35 | parseRegex :: String -> Either ParseError (Pattern,(GroupIndex,DoPa)) 36 | parseRegex x = runParser (do pat <- p_regex 37 | eof 38 | (lastGroupIndex,lastDopa) <- getState 39 | return (pat,(lastGroupIndex,DoPa lastDopa))) (0,0) x x 40 | 41 | type P = CharParser (GroupIndex, Int) 42 | 43 | p_regex :: P Pattern 44 | p_regex = liftM POr $ sepBy1 p_branch (char '|') 45 | 46 | -- man re_format helps a lot, it says one-or-more pieces so this is 47 | -- many1 not many. Use "()" to indicate an empty piece. 48 | p_branch :: P Pattern 49 | p_branch = liftM PConcat $ many1 p_piece 50 | 51 | p_piece :: P Pattern 52 | p_piece = (p_anchor <|> p_atom) >>= p_post_atom -- correct specification 53 | 54 | p_atom :: P Pattern 55 | p_atom = p_group <|> p_bracket <|> p_char "an atom" 56 | 57 | group_index :: P (Maybe GroupIndex) 58 | group_index = do 59 | (gi,ci) <- getState 60 | let index = succ gi 61 | setState (index,ci) 62 | return (Just index) 63 | 64 | p_group :: P Pattern 65 | p_group = lookAhead (char '(') >> do 66 | index <- group_index 67 | liftM (PGroup index) $ between (char '(') (char ')') p_regex 68 | 69 | -- p_post_atom takes the previous atom as a parameter 70 | p_post_atom :: Pattern -> P Pattern 71 | p_post_atom atom = (char '?' >> return (PQuest atom)) 72 | <|> (char '+' >> return (PPlus atom)) 73 | <|> (char '*' >> return (PStar True atom)) 74 | <|> p_bound atom 75 | <|> return atom 76 | 77 | p_bound :: Pattern -> P Pattern 78 | p_bound atom = try $ between (char '{') (char '}') (p_bound_spec atom) 79 | 80 | p_bound_spec :: Pattern -> P Pattern 81 | p_bound_spec atom = do lowS <- many1 digit 82 | let lowI = read lowS 83 | highMI <- option (Just lowI) $ try $ do 84 | _ <- char ',' 85 | -- parsec note: if 'many digits' fails below then the 'try' ensures 86 | -- that the ',' will not match the closing '}' in p_bound, same goes 87 | -- for any non '}' garbage after the 'many digits'. 88 | highS <- many digit 89 | if null highS then return Nothing -- no upper bound 90 | else do let highI = read highS 91 | guard (lowI <= highI) 92 | return (Just (read highS)) 93 | return (PBound lowI highMI atom) 94 | 95 | -- An anchor cannot be modified by a repetition specifier 96 | p_anchor :: P Pattern 97 | p_anchor = (char '^' >> liftM PCarat char_index) 98 | <|> (char '$' >> liftM PDollar char_index) 99 | <|> try (do _ <- string "()" 100 | index <- group_index 101 | return $ PGroup index PEmpty) 102 | "empty () or anchor ^ or $" 103 | 104 | char_index :: P DoPa 105 | char_index = do (gi,ci) <- getState 106 | let ci' = succ ci 107 | setState (gi,ci') 108 | return (DoPa ci') 109 | 110 | p_char :: P Pattern 111 | p_char = p_dot <|> p_left_brace <|> p_escaped <|> p_other_char where 112 | p_dot = char '.' >> char_index >>= return . PDot 113 | p_left_brace = try $ (char '{' >> notFollowedBy digit >> char_index >>= return . (`PChar` '{')) 114 | p_escaped = char '\\' >> anyChar >>= \c -> char_index >>= return . (`PEscape` c) 115 | p_other_char = noneOf specials >>= \c -> char_index >>= return . (`PChar` c) 116 | where specials = "^.[$()|*+?{\\" 117 | 118 | -- parse [bar] and [^bar] sets of characters 119 | p_bracket :: P Pattern 120 | p_bracket = (char '[') >> ( (char '^' >> p_set True) <|> (p_set False) ) 121 | 122 | p_set :: Bool -> P Pattern 123 | p_set invert = do initial <- option "" (char ']' >> return "]") 124 | values <- if null initial then many1 p_set_elem else many p_set_elem 125 | _ <- char ']' 126 | ci <- char_index 127 | let chars = maybe'set $ concat $ 128 | initial : 129 | [ c | BEChar c <- values ] : 130 | [ [start..end] | BERange start end <- values ] 131 | colls = maybe'set [PatternSetCollatingElement coll | BEColl coll <- values ] 132 | equivs = maybe'set [PatternSetEquivalenceClass equiv | BEEquiv equiv <- values] 133 | class's = maybe'set [PatternSetCharacterClass a'class | BEClass a'class <- values] 134 | maybe'set x = if null x then Nothing else Just (Set.fromList x) 135 | sets = PatternSet chars class's colls equivs 136 | sets `seq` return $ if invert then PAnyNot ci sets else PAny ci sets 137 | 138 | -- From here down the code is the parser and functions for pattern [ ] set things 139 | 140 | p_set_elem :: P BracketElement 141 | p_set_elem = checkBracketElement =<< asum 142 | [ p_set_elem_class 143 | , p_set_elem_equiv 144 | , p_set_elem_coll 145 | , p_set_elem_range 146 | , p_set_elem_char 147 | , fail "Failed to parse bracketed string" 148 | ] 149 | 150 | p_set_elem_class :: P BracketElement 151 | p_set_elem_class = liftM BEClass $ 152 | try (between (string "[:") (string ":]") (many1 $ noneOf ":]")) 153 | 154 | p_set_elem_equiv :: P BracketElement 155 | p_set_elem_equiv = liftM BEEquiv $ 156 | try (between (string "[=") (string "=]") (many1 $ noneOf "=]")) 157 | 158 | p_set_elem_coll :: P BracketElement 159 | p_set_elem_coll = liftM BEColl $ 160 | try (between (string "[.") (string ".]") (many1 $ noneOf ".]")) 161 | 162 | p_set_elem_range :: P BracketElement 163 | p_set_elem_range = try $ do 164 | start <- noneOf "]" 165 | _ <- char '-' 166 | end <- noneOf "]" 167 | return $ BERange start end 168 | 169 | p_set_elem_char :: P BracketElement 170 | p_set_elem_char = do 171 | c <- noneOf "]" 172 | return (BEChar c) 173 | 174 | -- | Fail when 'BracketElement' is invalid, e.g. empty range @1-0@. 175 | -- This failure should not be caught. 176 | -- 177 | checkBracketElement :: BracketElement -> P BracketElement 178 | checkBracketElement e = 179 | case e of 180 | BERange start end 181 | | start > end -> fail $ unwords 182 | [ "End point" 183 | , show end 184 | , "of dashed character range is less than starting point" 185 | , show start 186 | ] 187 | | otherwise -> ok 188 | BEChar _ -> ok 189 | BEClass _ -> ok 190 | BEColl _ -> ok 191 | BEEquiv _ -> ok 192 | where 193 | ok = return e 194 | -------------------------------------------------------------------------------- /lib/Text/Regex/TDFA/Sequence.hs: -------------------------------------------------------------------------------- 1 | {-| 2 | This modules provides 'RegexMaker' and 'RegexLike' instances for using 3 | @ByteString@ with the DFA backend ("Text.Regex.Lib.WrapDFAEngine" and 4 | "Text.Regex.Lazy.DFAEngineFPS"). This module is usually used via 5 | import "Text.Regex.TDFA". 6 | 7 | This exports instances of the high level API and the medium level 8 | API of 'compile','execute', and 'regexec'. 9 | -} 10 | module Text.Regex.TDFA.Sequence( 11 | Regex 12 | ,CompOption 13 | ,ExecOption 14 | ,compile 15 | ,execute 16 | ,regexec 17 | ) where 18 | 19 | import Data.Sequence(Seq) 20 | import Data.Foldable as F(toList) 21 | 22 | import Text.Regex.Base(MatchArray,RegexContext(..),RegexMaker(..),RegexLike(..),Extract(..)) 23 | import Text.Regex.Base.Impl(polymatch,polymatchM) 24 | import Text.Regex.TDFA.Common(Regex(..),CompOption,ExecOption(captureGroups)) 25 | import Text.Regex.TDFA.String() -- piggyback on RegexMaker for String 26 | import Text.Regex.TDFA.TDFA(patternToRegex) 27 | import Text.Regex.TDFA.ReadRegex(parseRegex) 28 | 29 | import Data.Array.IArray((!),elems) 30 | import Data.Maybe(listToMaybe) 31 | import Text.Regex.TDFA.NewDFA.Engine(execMatch) 32 | import Text.Regex.TDFA.NewDFA.Tester as Tester(matchTest) 33 | 34 | {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -} 35 | 36 | instance RegexContext Regex (Seq Char) (Seq Char) where 37 | match = polymatch 38 | matchM = polymatchM 39 | 40 | instance RegexMaker Regex CompOption ExecOption (Seq Char) where 41 | makeRegexOptsM c e source = 42 | case parseRegex (F.toList source) of 43 | Left err -> fail $ "parseRegex for Text.Regex.TDFA.Sequence failed:"++show err 44 | Right pattern -> return $ patternToRegex pattern c e 45 | 46 | instance RegexLike Regex (Seq Char) where 47 | matchOnce r s = listToMaybe (matchAll r s) 48 | matchAll r s = execMatch r 0 '\n' s 49 | matchCount r s = length (matchAll r' s) 50 | where r' = r { regex_execOptions = (regex_execOptions r) {captureGroups = False} } 51 | matchTest = Tester.matchTest 52 | matchOnceText regex source = 53 | fmap (\ma -> let (o,l) = ma!0 54 | in (before o source 55 | ,fmap (\ol -> (extract ol source,ol)) ma 56 | ,after (o+l) source)) 57 | (matchOnce regex source) 58 | matchAllText regex source = 59 | map (fmap (\ol -> (extract ol source,ol))) 60 | (matchAll regex source) 61 | 62 | compile :: CompOption -- ^ Flags (summed together) 63 | -> ExecOption -- ^ Flags (summed together) 64 | -> Seq Char -- ^ The regular expression to compile 65 | -> Either String Regex -- ^ Returns: the compiled regular expression 66 | compile compOpt execOpt bs = 67 | case parseRegex (F.toList bs) of 68 | Left err -> Left ("parseRegex for Text.Regex.TDFA.Sequence failed:"++show err) 69 | Right pattern -> Right (patternToRegex pattern compOpt execOpt) 70 | 71 | execute :: Regex -- ^ Compiled regular expression 72 | -> Seq Char -- ^ String to match against 73 | -> Either String (Maybe MatchArray) 74 | execute r bs = Right (matchOnce r bs) 75 | 76 | regexec :: Regex -- ^ Compiled regular expression 77 | -> Seq Char -- ^ String to match against 78 | -> Either String (Maybe (Seq Char, Seq Char, Seq Char, [Seq Char])) 79 | regexec r txt = Right $ 80 | case matchOnceText r txt of 81 | Just (pre, mt, post) | main:rest <- map fst (elems mt) 82 | -> Just (pre, main, post, rest) 83 | _ -> Nothing 84 | -------------------------------------------------------------------------------- /lib/Text/Regex/TDFA/String.hs: -------------------------------------------------------------------------------- 1 | {- | 2 | This modules provides 'RegexMaker' and 'RegexLike' instances for using 3 | 'String' with the TDFA backend. 4 | 5 | This exports instances of the high level API and the medium level 6 | API of 'compile','execute', and 'regexec'. 7 | -} 8 | {- By Chris Kuklewicz, 2009. BSD License, see the LICENSE file. -} 9 | module Text.Regex.TDFA.String( 10 | -- ** Types 11 | Regex 12 | ,MatchOffset 13 | ,MatchLength 14 | ,CompOption 15 | ,ExecOption 16 | -- ** Medium level API functions 17 | ,compile 18 | ,execute 19 | ,regexec 20 | ) where 21 | 22 | import Text.Regex.Base.Impl(polymatch,polymatchM) 23 | import Text.Regex.Base.RegexLike(RegexMaker(..),RegexLike(..),RegexContext(..),MatchOffset,MatchLength,MatchArray) 24 | import Text.Regex.TDFA.Common(common_error,Regex(..),CompOption,ExecOption(captureGroups)) 25 | import Text.Regex.TDFA.ReadRegex(parseRegex) 26 | import Text.Regex.TDFA.TDFA(patternToRegex) 27 | 28 | import Data.Array.IArray((!),elems,amap) 29 | import Data.Maybe(listToMaybe) 30 | import Text.Regex.TDFA.NewDFA.Engine(execMatch) 31 | import Text.Regex.TDFA.NewDFA.Tester as Tester(matchTest) 32 | 33 | err :: String -> a 34 | err = common_error "Text.Regex.TDFA.String" 35 | 36 | unwrap :: Either String v -> v 37 | unwrap x = case x of Left msg -> err ("Text.Regex.TDFA.String died: "++msg) 38 | Right v -> v 39 | 40 | compile :: CompOption -- ^ Flags (summed together) 41 | -> ExecOption -- ^ Flags (summed together) 42 | -> String -- ^ The regular expression to compile (ASCII only, no null bytes) 43 | -> Either String Regex -- ^ Returns: the compiled regular expression 44 | compile compOpt execOpt source = 45 | case parseRegex source of 46 | Left msg -> Left ("parseRegex for Text.Regex.TDFA.String failed:"++show msg) 47 | Right pattern -> Right (patternToRegex pattern compOpt execOpt) 48 | 49 | instance RegexMaker Regex CompOption ExecOption String where 50 | makeRegexOpts c e source = unwrap (compile c e source) 51 | makeRegexOptsM c e source = either fail return $ compile c e source 52 | 53 | execute :: Regex -- ^ Compiled regular expression 54 | -> String -- ^ String to match against 55 | -> Either String (Maybe MatchArray) 56 | execute r s = Right (matchOnce r s) 57 | 58 | regexec :: Regex -- ^ Compiled regular expression 59 | -> String -- ^ String to match against 60 | -> Either String (Maybe (String, String, String, [String])) 61 | regexec r txt = Right $ 62 | case matchOnceText r txt of 63 | Just (pre, mt, post) | main:rest <- map fst (elems mt) 64 | -> Just (pre, main, post, rest) 65 | _ -> Nothing 66 | 67 | -- Minimal definition for now 68 | instance RegexLike Regex String where 69 | matchOnce r s = listToMaybe (matchAll r s) 70 | matchAll r s = execMatch r 0 '\n' s 71 | matchCount r s = length (matchAll r' s) 72 | where r' = r { regex_execOptions = (regex_execOptions r) {captureGroups = False} } 73 | matchTest = Tester.matchTest 74 | -- matchOnceText 75 | matchAllText r s = 76 | let go i _ _ | i `seq` False = undefined 77 | go _i _t [] = [] 78 | go i t (x:xs) = let (off0,len0) = x!0 79 | trans pair@(off,len) = (take len (drop (off-i) t),pair) 80 | t' = drop (off0+len0-i) t 81 | in amap trans x : seq t' (go (off0+len0) t' xs) 82 | in go 0 s (matchAll r s) 83 | 84 | instance RegexContext Regex String String where 85 | match = polymatch 86 | matchM = polymatchM 87 | -------------------------------------------------------------------------------- /lib/Text/Regex/TDFA/TDFA.hs: -------------------------------------------------------------------------------- 1 | -- | "Text.Regex.TDFA.TDFA" converts the QNFA from TNFA into the DFA. 2 | -- A DFA state corresponds to a Set of QNFA states, represented as list 3 | -- of Index which are used to lookup the DFA state in a lazy Trie 4 | -- which holds all possible subsets of QNFA states. 5 | module Text.Regex.TDFA.TDFA(patternToRegex,DFA(..),DT(..) 6 | ,examineDFA,nfaToDFA,dfaMap) where 7 | 8 | --import Control.Arrow((***)) 9 | import Data.Monoid as Mon(Monoid(..)) 10 | import Control.Monad.State(State,MonadState(..),execState) 11 | import Data.Array.IArray(Array,(!),bounds,{-assocs-}) 12 | import Data.IntMap(IntMap) 13 | import qualified Data.IntMap as IMap(empty,keys,delete,null,lookup,fromDistinctAscList 14 | ,member,unionWith,singleton,union 15 | ,toAscList,Key,elems,toList,insert 16 | ,insertWith,insertWithKey) 17 | import Data.IntMap.CharMap2(CharMap(..)) 18 | import qualified Data.IntMap.CharMap2 as Map(empty) 19 | --import Data.IntSet(IntSet) 20 | import qualified Data.IntSet as ISet(empty,singleton,null) 21 | import Data.List(foldl') 22 | import qualified Data.Map (Map,empty,member,insert,elems) 23 | import Data.Sequence as S((|>),{-viewl,ViewL(..)-}) 24 | 25 | import Text.Regex.TDFA.Common {- all -} 26 | import Text.Regex.TDFA.IntArrTrieSet(TrieSet) 27 | import qualified Text.Regex.TDFA.IntArrTrieSet as Trie(lookupAsc,fromSinglesMerge) 28 | import Text.Regex.TDFA.Pattern(Pattern) 29 | --import Text.Regex.TDFA.RunMutState(toInstructions) 30 | import Text.Regex.TDFA.TNFA(patternToNFA) 31 | --import Debug.Trace 32 | 33 | {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -} 34 | 35 | err :: String -> a 36 | err s = common_error "Text.Regex.TDFA.TDFA" s 37 | 38 | dlose :: DFA 39 | dlose = DFA { d_id = ISet.empty 40 | , d_dt = Simple' { dt_win = IMap.empty 41 | , dt_trans = Map.empty 42 | , dt_other = Transition dlose dlose mempty } } 43 | 44 | -- dumb smart constructor for tracing construction (I wanted to monitor laziness) 45 | {-# INLINE makeDFA #-} 46 | makeDFA :: SetIndex -> DT -> DFA 47 | makeDFA i dt = DFA i dt 48 | 49 | -- Note that no CompOption or ExecOption parameter is needed. 50 | nfaToDFA :: ((Index,Array Index QNFA),Array Tag OP,Array GroupIndex [GroupInfo]) 51 | -> CompOption -> ExecOption 52 | -> Regex 53 | nfaToDFA ((startIndex,aQNFA),aTagOp,aGroupInfo) co eo = Regex dfa startIndex indexBounds tagBounds trie aTagOp aGroupInfo ifa co eo where 54 | dfa = indexesToDFA [startIndex] 55 | indexBounds = bounds aQNFA 56 | tagBounds = bounds aTagOp 57 | ifa = (not (multiline co)) && isDFAFrontAnchored dfa 58 | 59 | indexesToDFA = {-# SCC "nfaToDFA.indexesToDFA" #-} Trie.lookupAsc trie -- Lookup in cache 60 | 61 | trie :: TrieSet DFA 62 | trie = Trie.fromSinglesMerge dlose mergeDFA (bounds aQNFA) indexToDFA 63 | 64 | newTransition :: DTrans -> Transition 65 | newTransition dtrans = Transition { trans_many = indexesToDFA (IMap.keys dtransWithSpawn) 66 | , trans_single = indexesToDFA (IMap.keys dtrans) 67 | , trans_how = dtransWithSpawn } 68 | where dtransWithSpawn = addSpawn dtrans 69 | 70 | makeTransition :: DTrans -> Transition 71 | makeTransition dtrans | hasSpawn = Transition { trans_many = indexesToDFA (IMap.keys dtrans) 72 | , trans_single = indexesToDFA (IMap.keys (IMap.delete startIndex dtrans)) 73 | , trans_how = dtrans } 74 | | otherwise = Transition { trans_many = indexesToDFA (IMap.keys dtrans) 75 | , trans_single = indexesToDFA (IMap.keys dtrans) 76 | , trans_how = dtrans } 77 | where hasSpawn = maybe False IMap.null (IMap.lookup startIndex dtrans) 78 | 79 | -- coming from (-1) means spawn a new starting item 80 | addSpawn :: DTrans -> DTrans 81 | addSpawn dtrans | IMap.member startIndex dtrans = dtrans 82 | | otherwise = IMap.insert startIndex mempty dtrans 83 | 84 | indexToDFA :: Index -> DFA -- used to seed the Trie from the NFA 85 | indexToDFA i = {-# SCC "nfaToDFA.indexToDFA" #-} makeDFA (ISet.singleton source) (qtToDT qtIn) 86 | where 87 | (QNFA {q_id = source,q_qt = qtIn}) = aQNFA!i 88 | qtToDT :: QT -> DT 89 | qtToDT (Testing {qt_test=wt, qt_dopas=dopas, qt_a=a, qt_b=b}) = 90 | Testing' { dt_test = wt 91 | , dt_dopas = dopas 92 | , dt_a = qtToDT a 93 | , dt_b = qtToDT b } 94 | qtToDT (Simple {qt_win=w, qt_trans=t, qt_other=o}) = 95 | Simple' { dt_win = makeWinner 96 | , dt_trans = fmap qtransToDFA t 97 | -- , dt_other = if IMap.null o then Just (newTransition $ IMap.singleton startIndex mempty) else Just (qtransToDFA o)} 98 | , dt_other = qtransToDFA o} 99 | where 100 | makeWinner :: IntMap {- Index -} Instructions -- (RunState ()) 101 | makeWinner | noWin w = IMap.empty 102 | | otherwise = IMap.singleton source (cleanWin w) 103 | 104 | qtransToDFA :: QTrans -> Transition 105 | qtransToDFA qtrans = {-# SCC "nfaToDFA.indexToDFA.qtransToDFA" #-} 106 | newTransition dtrans 107 | where 108 | dtrans :: DTrans 109 | dtrans =IMap.fromDistinctAscList . mapSnd (IMap.singleton source) $ best 110 | best :: [(Index {- Destination -} ,(DoPa,Instructions))] 111 | best = pickQTrans aTagOp $ qtrans 112 | 113 | -- The DFA states are built up by merging the singleton ones converted from the NFA. 114 | -- Thus the "source" indices in the DTrans should not collide. 115 | mergeDFA :: DFA -> DFA -> DFA 116 | mergeDFA d1 d2 = {-# SCC "nfaToDFA.mergeDFA" #-} makeDFA i dt 117 | where 118 | i = d_id d1 `mappend` d_id d2 119 | dt = d_dt d1 `mergeDT` d_dt d2 120 | mergeDT,nestDT :: DT -> DT -> DT 121 | mergeDT (Simple' w1 t1 o1) (Simple' w2 t2 o2) = Simple' w t o 122 | where 123 | w = w1 `mappend` w2 124 | t = fuseDTrans -- t1 o1 t2 o2 125 | o = mergeDTrans o1 o2 126 | -- This is very much like mergeQTrans 127 | mergeDTrans :: Transition -> Transition -> Transition 128 | mergeDTrans (Transition {trans_how=dt1}) (Transition {trans_how=dt2}) = makeTransition dtrans 129 | where dtrans = IMap.unionWith IMap.union dt1 dt2 130 | -- This is very much like fuseQTrans 131 | fuseDTrans :: CharMap Transition 132 | fuseDTrans = CharMap (IMap.fromDistinctAscList (fuse l1 l2)) 133 | where 134 | l1 = IMap.toAscList (unCharMap t1) 135 | l2 = IMap.toAscList (unCharMap t2) 136 | fuse :: [(IMap.Key, Transition)] 137 | -> [(IMap.Key, Transition)] 138 | -> [(IMap.Key, Transition)] 139 | fuse [] y = fmap (fmap (mergeDTrans o1)) y 140 | fuse x [] = fmap (fmap (mergeDTrans o2)) x 141 | fuse x@((xc,xa):xs) y@((yc,ya):ys) = 142 | case compare xc yc of 143 | LT -> (xc,mergeDTrans o2 xa) : fuse xs y 144 | EQ -> (xc,mergeDTrans xa ya) : fuse xs ys 145 | GT -> (yc,mergeDTrans o1 ya) : fuse x ys 146 | mergeDT dt1@(Testing' wt1 dopas1 a1 b1) dt2@(Testing' wt2 dopas2 a2 b2) = 147 | case compare wt1 wt2 of 148 | LT -> nestDT dt1 dt2 149 | EQ -> Testing' { dt_test = wt1 150 | , dt_dopas = dopas1 `mappend` dopas2 151 | , dt_a = mergeDT a1 a2 152 | , dt_b = mergeDT b1 b2 } 153 | GT -> nestDT dt2 dt1 154 | mergeDT dt1@(Testing' {}) dt2 = nestDT dt1 dt2 155 | mergeDT dt1 dt2@(Testing' {}) = nestDT dt2 dt1 156 | nestDT dt1@(Testing' {dt_a=a,dt_b=b}) dt2 = dt1 { dt_a = mergeDT a dt2, dt_b = mergeDT b dt2 } 157 | nestDT _ _ = err "nestDT called on Simple -- cannot happen" 158 | 159 | patternToRegex :: (Pattern,(GroupIndex, DoPa)) -> CompOption -> ExecOption -> Regex 160 | patternToRegex pattern compOpt execOpt = nfaToDFA (patternToNFA compOpt pattern) compOpt execOpt 161 | 162 | dfaMap :: DFA -> Data.Map.Map SetIndex DFA 163 | dfaMap = seen (Data.Map.empty) where 164 | seen old d@(DFA {d_id=i,d_dt=dt}) = 165 | if i `Data.Map.member` old 166 | then old 167 | else let new = Data.Map.insert i d old 168 | in foldl' seen new (flattenDT dt) 169 | 170 | -- Get all trans_many states 171 | flattenDT :: DT -> [DFA] 172 | flattenDT (Simple' {dt_trans=(CharMap mt),dt_other=o}) = concatMap (\d -> [trans_many d {-,trans_single d-}]) . (:) o . IMap.elems $ mt 173 | flattenDT (Testing' {dt_a=a,dt_b=b}) = flattenDT a ++ flattenDT b 174 | 175 | examineDFA :: Regex -> String 176 | examineDFA (Regex {regex_dfa=dfa}) = unlines . (:) ("Number of reachable DFA states: "++show (length dfas)) . map show $ dfas 177 | where dfas = Data.Map.elems $ dfaMap dfa 178 | 179 | {- 180 | 181 | fillMap :: Tag -> IntMap (Position,Bool) 182 | fillMap tag = IMap.fromDistinctAscList [(t,(-1,True)) | t <- [0..tag] ] 183 | 184 | diffMap :: IntMap (Position,Bool) -> IntMap (Position,Bool) -> [(Index,(Position,Bool))] 185 | diffMap old new = IMap.toList (IMap.differenceWith (\a b -> if a==b then Nothing else Just b) old new) 186 | 187 | examineDFA :: (DFA,Index,Array Tag OP,Array GroupIndex [GroupInfo]) -> String 188 | examineDFA (dfa,_,aTags,_) = unlines $ map (examineDFA' (snd . bounds $ aTags)) (Map.elems $ dfaMap dfa) 189 | 190 | examineDFA' :: Tag -> DFA -> String 191 | examineDFA' maxTag = showDFA (fillMap maxTag) 192 | 193 | {- 194 | instance Show DFA where 195 | show (DFA {d_id=i,d_dt=dt}) = "DFA {d_id = "++show (ISet.toList i) 196 | ++"\n ,d_dt = "++ show dt 197 | ++"\n}" 198 | -} 199 | -- instance Show DT where show = showDT 200 | 201 | showDFA :: IntMap (Position,Bool) -> DFA -> String 202 | showDFA m (DFA {d_id=i,d_dt=dt}) = "DFA {d_id = "++show (ISet.toList i) 203 | ++"\n ,d_dt = "++ showDT m dt 204 | ++"\n}" 205 | -} 206 | 207 | 208 | 209 | -- pick QTrans can be told the unique source and knows all the 210 | -- destinations (hmm...along with qt_win)! So if in ascending destination order the last source 211 | -- is free to mutatate the old state. If the QTrans has only one 212 | -- entry then all we need to do is mutate that entry when making a 213 | -- transition. 214 | -- 215 | pickQTrans :: Array Tag OP -> QTrans -> [({-Destination-}Index,(DoPa,Instructions))] 216 | pickQTrans op tr = mapSnd (bestTrans op) . IMap.toList $ tr 217 | 218 | cleanWin :: WinTags -> Instructions 219 | cleanWin = toInstructions 220 | 221 | bestTrans :: Array Tag OP -> [TagCommand] -> (DoPa,Instructions) 222 | bestTrans _ [] = err "bestTrans : There were no transition choose from!" 223 | bestTrans aTagOP (f:fs) | null fs = canonical f 224 | | otherwise = answer -- if null toDisplay then answer else trace toDisplay answer 225 | where 226 | answer = foldl' pick (canonical f) fs 227 | {- toDisplay | null fs = "" 228 | | otherwise = unlines $ "bestTrans" : show (answer) : "from among" : concatMap (\x -> [show x, show (toInstructions (snd x))]) (f:fs) -} 229 | canonical :: TagCommand -> (DoPa,Instructions) 230 | canonical (dopa,spec) = (dopa, toInstructions spec) 231 | pick :: (DoPa,Instructions) -> TagCommand -> (DoPa,Instructions) 232 | pick win@(dopa1,winI) (dopa2,spec) = 233 | let nextI = toInstructions spec 234 | -- in case compareWith choose winPos nextPos of -- XXX 2009: add in enterOrbit information 235 | in case compareWith choose (toListing winI) (toListing nextI) of 236 | GT -> win 237 | LT -> (dopa2,nextI) 238 | EQ -> if dopa1 >= dopa2 then win else (dopa2,nextI) -- no deep reason not to just pick win 239 | 240 | toListing :: Instructions -> [(Tag,Action)] 241 | toListing (Instructions {newPos = nextPos}) = filter notReset nextPos 242 | where notReset (_,SetVal (-1)) = False 243 | notReset _ = True 244 | {- 245 | toListing (Instructions {newPos = nextPos}) = mergeTagOrbit nextPos (filter snd nextFlags) 246 | 247 | mergeTagOrbit xx [] = xx 248 | mergeTagOrbit [] yy = yy 249 | mergeTagOrbit xx@(x:xs) yy@(y:ys) = 250 | case compare (fst x) (fst y) of 251 | GT -> y : mergeTagOrbit xx ys 252 | LT -> x : mergeTagOrbit xs yy 253 | EQ -> x : mergeTagOrbit xs ys -- keep tag setting over orbit setting. 254 | -} 255 | 256 | {-# INLINE choose #-} 257 | choose :: Maybe (Tag,Action) -> Maybe (Tag,Action) -> Ordering 258 | choose Nothing Nothing = EQ 259 | choose Nothing x = flipOrder (choose x Nothing) 260 | choose (Just (tag,_post)) Nothing = 261 | case aTagOP!tag of 262 | Maximize -> GT 263 | Minimize -> LT -- needed to choose best path inside nested * operators, 264 | -- this needs a leading Minimize tag inside at least the parent * operator 265 | Ignore -> GT -- XXX this is a guess in analogy with Maximize for the end bit of a group 266 | Orbit -> LT -- trace ("choose LT! Just "++show tag++" < Nothing") LT -- 2009 XXX : comment out next line and use the Orbit instead 267 | -- Orbit -> err $ "bestTrans.choose : Very Unexpected Orbit in Just Nothing: "++show (tag,post,aTagOP,f:fs) 268 | choose (Just (tag,post1)) (Just (_,post2)) = 269 | case aTagOP!tag of 270 | Maximize -> order 271 | Minimize -> flipOrder order 272 | Ignore -> EQ 273 | Orbit -> EQ 274 | -- Orbit -> err $ "bestTrans.choose : Very Unexpected Orbit in Just Just: "++show (tag,(post1,post2),aTagOP,f:fs) 275 | where order = case (post1,post2) of 276 | (SetPre,SetPre) -> EQ 277 | (SetPost,SetPost) -> EQ 278 | (SetPre,SetPost) -> LT 279 | (SetPost,SetPre) -> GT 280 | (SetVal v1,SetVal v2) -> compare v1 v2 281 | _ -> err $ "bestTrans.compareWith.choose sees incomparable "++show (tag,post1,post2) 282 | 283 | 284 | {-# INLINE compareWith #-} 285 | compareWith :: (Ord x,Monoid a) => (Maybe (x,b) -> Maybe (x,c) -> a) -> [(x,b)] -> [(x,c)] -> a 286 | compareWith comp = cw where 287 | cw [] [] = comp Nothing Nothing 288 | cw xx@(x:xs) yy@(y:ys) = 289 | case compare (fst x) (fst y) of 290 | GT -> comp Nothing (Just y) `mappend` cw xx ys 291 | EQ -> comp (Just x) (Just y) `mappend` cw xs ys 292 | LT -> comp (Just x) Nothing `mappend` cw xs yy 293 | cw xx [] = foldr (\x rest -> comp (Just x) Nothing `mappend` rest) mempty xx 294 | cw [] yy = foldr (\y rest -> comp Nothing (Just y) `mappend` rest) mempty yy 295 | 296 | 297 | isDFAFrontAnchored :: DFA -> Bool 298 | isDFAFrontAnchored = isDTFrontAnchored . d_dt 299 | where 300 | isDTFrontAnchored :: DT -> Bool 301 | isDTFrontAnchored (Simple' {}) = False 302 | isDTFrontAnchored (Testing' {dt_test=wt,dt_a=a,dt_b=b}) | wt == Test_BOL = isDTLosing b 303 | | otherwise = isDTFrontAnchored a && isDTFrontAnchored b 304 | where 305 | -- can DT never win or accept a character (when following trans_single)? 306 | isDTLosing :: DT -> Bool 307 | isDTLosing (Testing' {dt_a=a',dt_b=b'}) = isDTLosing a' && isDTLosing b' 308 | isDTLosing (Simple' {dt_win=w}) | not (IMap.null w) = False -- can win with 0 characters 309 | isDTLosing (Simple' {dt_trans=CharMap mt,dt_other=o}) = 310 | let ts = o : IMap.elems mt 311 | in all transLoses ts 312 | where 313 | transLoses :: Transition -> Bool 314 | transLoses (Transition {trans_single=dfa,trans_how=dtrans}) = isDTLose dfa || onlySpawns dtrans 315 | where 316 | isDTLose :: DFA -> Bool 317 | isDTLose dfa' = ISet.null (d_id dfa') 318 | onlySpawns :: DTrans -> Bool 319 | onlySpawns t = case IMap.elems t of 320 | [m] -> IMap.null m 321 | _ -> False 322 | 323 | {- toInstructions -} 324 | 325 | toInstructions :: TagList -> Instructions 326 | toInstructions spec = 327 | let (p,o) = execState (assemble spec) (mempty,mempty) 328 | in Instructions { newPos = IMap.toList p 329 | , newOrbits = if IMap.null o then Nothing 330 | else Just $ alterOrbits (IMap.toList o) 331 | } 332 | 333 | type CompileInstructions a = State 334 | ( IntMap Action -- 2009: change to SetPre | SetPost enum 335 | , IntMap AlterOrbit 336 | ) a 337 | 338 | data AlterOrbit = AlterReset -- removing the Orbits record from the OrbitLog 339 | | AlterLeave -- set inOrbit to False 340 | | AlterModify { newInOrbit :: Bool -- set inOrbit to the newInOrbit value 341 | , freshOrbit :: Bool} -- freshOrbit of True means to set getOrbits to mempty 342 | deriving (Show) -- freshOrbit of False means try appending position or else Seq.empty 343 | 344 | assemble :: TagList -> CompileInstructions () 345 | assemble = mapM_ oneInstruction where 346 | oneInstruction (tag,command) = 347 | case command of 348 | PreUpdate TagTask -> setPreTag tag 349 | PreUpdate ResetGroupStopTask -> resetGroupTag tag 350 | PreUpdate SetGroupStopTask -> setGroupTag tag 351 | PreUpdate ResetOrbitTask -> resetOrbit tag 352 | PreUpdate EnterOrbitTask -> enterOrbit tag 353 | PreUpdate LeaveOrbitTask -> leaveOrbit tag 354 | PostUpdate TagTask -> setPostTag tag 355 | PostUpdate ResetGroupStopTask -> resetGroupTag tag 356 | PostUpdate SetGroupStopTask -> setGroupTag tag 357 | _ -> err ("assemble : Weird orbit command: "++show (tag,command)) 358 | 359 | setPreTag :: Tag -> CompileInstructions () 360 | setPreTag = modifyPos SetPre 361 | 362 | setPostTag :: Tag -> CompileInstructions () 363 | setPostTag = modifyPos SetPost 364 | 365 | resetGroupTag :: Tag -> CompileInstructions () 366 | resetGroupTag = modifyPos (SetVal (-1)) 367 | 368 | setGroupTag :: Tag -> CompileInstructions () 369 | setGroupTag = modifyPos (SetVal 0) 370 | 371 | -- The following is ten times more complicated than it ought to be. Sorry, I was too new, and now 372 | -- too busy to clean this up. 373 | 374 | resetOrbit :: Tag -> CompileInstructions () 375 | resetOrbit tag = modifyPos (SetVal (-1)) tag >> modifyOrbit (IMap.insert tag AlterReset) 376 | 377 | enterOrbit :: Tag -> CompileInstructions () 378 | enterOrbit tag = modifyPos (SetVal 0) tag >> modifyOrbit changeOrbit where 379 | changeOrbit = IMap.insertWith overwriteOrbit tag appendNewOrbit 380 | 381 | appendNewOrbit = AlterModify {newInOrbit = True, freshOrbit = False} -- try to append 382 | startNewOrbit = AlterModify {newInOrbit = True, freshOrbit = True} -- will start a new series 383 | 384 | overwriteOrbit _ AlterReset = startNewOrbit 385 | overwriteOrbit _ AlterLeave = startNewOrbit 386 | overwriteOrbit _ (AlterModify {newInOrbit = False}) = startNewOrbit 387 | overwriteOrbit _ (AlterModify {newInOrbit = True}) = 388 | err $ "enterOrbit: Cannot enterOrbit twice in a row: " ++ show tag 389 | 390 | leaveOrbit :: Tag -> CompileInstructions () 391 | leaveOrbit tag = modifyOrbit escapeOrbit where 392 | escapeOrbit = IMap.insertWith setInOrbitFalse tag AlterLeave where 393 | setInOrbitFalse _ x@(AlterModify {}) = x {newInOrbit = False} 394 | setInOrbitFalse _ x = x 395 | 396 | modifyPos :: Action -> Tag -> CompileInstructions () 397 | modifyPos todo tag = do 398 | (a,c) <- get 399 | let a' = IMap.insert tag todo a 400 | seq a' $ put (a',c) 401 | 402 | modifyOrbit :: (IntMap AlterOrbit -> IntMap AlterOrbit) -> CompileInstructions () 403 | modifyOrbit f = do 404 | (a,c) <- get 405 | let c' = f c 406 | seq c' $ put (a,c') 407 | 408 | ---- 409 | 410 | alterOrbits :: [(Tag,AlterOrbit)] -> (Position -> OrbitTransformer) 411 | alterOrbits x = let items = map alterOrbit x 412 | in (\ pos m -> foldl (flip ($)) m (map ($ pos) items)) 413 | 414 | alterOrbit :: (Tag,AlterOrbit) -> (Position -> OrbitTransformer) 415 | 416 | alterOrbit (tag,AlterModify {newInOrbit = inOrbit',freshOrbit = True}) = 417 | (\ pos m -> IMap.insert tag (Orbits { inOrbit = inOrbit' 418 | , basePos = pos 419 | , ordinal = Nothing 420 | , getOrbits = mempty}) m) 421 | 422 | alterOrbit (tag,AlterModify {newInOrbit = inOrbit',freshOrbit = False}) = 423 | (\ pos m -> IMap.insertWithKey (updateOrbit pos) tag (newOrbit pos) m) where 424 | newOrbit pos = Orbits { inOrbit = inOrbit' 425 | , basePos = pos 426 | , ordinal = Nothing 427 | , getOrbits = Mon.mempty} 428 | updateOrbit pos _tag new old | inOrbit old = old { inOrbit = inOrbit' 429 | , getOrbits = getOrbits old |> pos } 430 | | otherwise = new 431 | 432 | alterOrbit (tag,AlterReset) = (\ _ m -> IMap.delete tag m) 433 | 434 | alterOrbit (tag,AlterLeave) = (\ _ m -> case IMap.lookup tag m of 435 | Nothing -> m 436 | Just x -> IMap.insert tag (x {inOrbit=False}) m) 437 | -------------------------------------------------------------------------------- /lib/Text/Regex/TDFA/Text.hs: -------------------------------------------------------------------------------- 1 | {-| 2 | Module : Text.Regex.TDFA.Text 3 | Copyright : Chris Kuklewicz 2007-2009, shelarcy 2012 4 | License : BSD-style (see the file LICENSE) 5 | 6 | This modules provides 'RegexMaker' and 'RegexLike' instances for using 7 | 'Text' with the TDFA backend ("Text.Regex.TDFA.NewDFA.Engine" and 8 | "Text.Regex.TDFA.NewDFA.Tester"). 9 | 10 | This exports instances of the high level API and the medium level 11 | API of 'compile','execute', and 'regexec'. 12 | 13 | @since 1.3.1 14 | -} 15 | module Text.Regex.TDFA.Text( 16 | Regex 17 | ,CompOption 18 | ,ExecOption 19 | ,compile 20 | ,execute 21 | ,regexec 22 | ) where 23 | 24 | import Data.Array((!),elems) 25 | import qualified Data.Text as T(Text,unpack) 26 | 27 | import Text.Regex.Base(RegexLike(..),RegexMaker(..),Extract(..),MatchArray,RegexContext(..)) 28 | import Text.Regex.Base.Impl(polymatch,polymatchM) 29 | import Text.Regex.TDFA.ReadRegex(parseRegex) 30 | import Text.Regex.TDFA.String() -- piggyback on RegexMaker for String 31 | import Text.Regex.TDFA.TDFA(patternToRegex) 32 | import Text.Regex.TDFA.Common(Regex(..),CompOption,ExecOption(captureGroups),Position) 33 | 34 | import Data.Maybe(listToMaybe) 35 | import Text.Regex.TDFA.NewDFA.Uncons(Uncons) 36 | import qualified Text.Regex.TDFA.NewDFA.Engine as Engine(execMatch) 37 | import qualified Text.Regex.TDFA.NewDFA.Tester as Tester(matchTest) 38 | 39 | -- | @since 1.3.1 40 | instance RegexContext Regex T.Text T.Text where 41 | match = polymatch 42 | matchM = polymatchM 43 | 44 | -- | @since 1.3.1 45 | instance RegexMaker Regex CompOption ExecOption T.Text where 46 | makeRegexOptsM c e source = makeRegexOptsM c e (T.unpack source) 47 | 48 | -- | @since 1.3.1 49 | {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> T.Text -> [MatchArray] #-} 50 | execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray] 51 | execMatch = Engine.execMatch 52 | 53 | -- | @since 1.3.1 54 | {-# SPECIALIZE myMatchTest :: Regex -> T.Text -> Bool #-} 55 | myMatchTest :: Uncons text => Regex -> text -> Bool 56 | myMatchTest = Tester.matchTest 57 | 58 | -- | @since 1.3.1 59 | instance RegexLike Regex T.Text where 60 | matchOnce r s = listToMaybe (matchAll r s) 61 | matchAll r s = execMatch r 0 '\n' s 62 | matchCount r s = length (matchAll r' s) 63 | where r' = r { regex_execOptions = (regex_execOptions r) {captureGroups = False} } 64 | matchTest = myMatchTest 65 | matchOnceText regex source = 66 | fmap (\ma -> let (o,l) = ma!0 67 | in (before o source 68 | ,fmap (\ol -> (extract ol source,ol)) ma 69 | ,after (o+l) source)) 70 | (matchOnce regex source) 71 | matchAllText regex source = 72 | map (fmap (\ol -> (extract ol source,ol))) 73 | (matchAll regex source) 74 | 75 | -- | @since 1.3.1 76 | compile :: CompOption -- ^ Flags (summed together) 77 | -> ExecOption -- ^ Flags (summed together) 78 | -> T.Text -- ^ The regular expression to compile 79 | -> Either String Regex -- ^ Returns: the compiled regular expression 80 | compile compOpt execOpt txt = 81 | case parseRegex (T.unpack txt) of 82 | Left err -> Left ("parseRegex for Text.Regex.TDFA.Text failed:"++show err) 83 | Right pattern -> Right (patternToRegex pattern compOpt execOpt) 84 | 85 | -- | @since 1.3.1 86 | execute :: Regex -- ^ Compiled regular expression 87 | -> T.Text -- ^ Text to match against 88 | -> Either String (Maybe MatchArray) 89 | execute r txt = Right (matchOnce r txt) 90 | 91 | -- | @since 1.3.1 92 | regexec :: Regex -- ^ Compiled regular expression 93 | -> T.Text -- ^ Text to match against 94 | -> Either String (Maybe (T.Text, T.Text, T.Text, [T.Text])) 95 | regexec r txt = Right $ 96 | case matchOnceText r txt of 97 | Just (pre, mt, post) | main:rest <- map fst (elems mt) 98 | -> Just (pre, main, post, rest) 99 | _ -> Nothing 100 | -------------------------------------------------------------------------------- /lib/Text/Regex/TDFA/Text/Lazy.hs: -------------------------------------------------------------------------------- 1 | {-| 2 | Module : Text.Regex.TDFA.Text.Lazy 3 | Copyright : Chris Kuklewicz 2007-2009, shelarcy 2012 4 | License : BSD-style (see the file LICENSE) 5 | 6 | This modules provides 'RegexMaker' and 'RegexLike' instances for using 7 | 'Text' with the TDFA backend ("Text.Regex.TDFA.NewDFA.Engine" and 8 | "Text.Regex.TDFA.NewDFA.Tester"). 9 | 10 | This exports instances of the high level API and the medium level 11 | API of 'compile','execute', and 'regexec'. 12 | 13 | @since 1.3.1 14 | -} 15 | module Text.Regex.TDFA.Text.Lazy( 16 | Regex 17 | ,CompOption 18 | ,ExecOption 19 | ,compile 20 | ,execute 21 | ,regexec 22 | ) where 23 | 24 | import Data.Array.IArray(Array,(!),elems) 25 | import qualified Data.Text.Lazy as L(Text,unpack) 26 | 27 | import Text.Regex.Base(MatchArray,RegexContext(..),Extract(..),RegexMaker(..),RegexLike(..)) 28 | import Text.Regex.Base.Impl(polymatch,polymatchM) 29 | import Text.Regex.TDFA.ReadRegex(parseRegex) 30 | import Text.Regex.TDFA.String() -- piggyback on RegexMaker for String 31 | import Text.Regex.TDFA.TDFA(patternToRegex) 32 | import Text.Regex.TDFA.Common(Regex(..),CompOption,ExecOption(captureGroups),Position) 33 | 34 | import Data.Maybe(listToMaybe) 35 | import Text.Regex.TDFA.NewDFA.Uncons(Uncons) 36 | import qualified Text.Regex.TDFA.NewDFA.Engine as Engine(execMatch) 37 | import qualified Text.Regex.TDFA.NewDFA.Tester as Tester(matchTest) 38 | 39 | -- | @since 1.3.1 40 | instance RegexContext Regex L.Text L.Text where 41 | match = polymatch 42 | matchM = polymatchM 43 | 44 | -- | @since 1.3.1 45 | instance RegexMaker Regex CompOption ExecOption L.Text where 46 | makeRegexOptsM c e source = makeRegexOptsM c e (L.unpack source) 47 | 48 | -- | @since 1.3.1 49 | {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> L.Text -> [MatchArray] #-} 50 | execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray] 51 | execMatch = Engine.execMatch 52 | 53 | -- | @since 1.3.1 54 | {-# SPECIALIZE myMatchTest :: Regex -> L.Text -> Bool #-} 55 | myMatchTest :: Uncons text => Regex -> text -> Bool 56 | myMatchTest = Tester.matchTest 57 | 58 | -- | @since 1.3.1 59 | instance RegexLike Regex L.Text where 60 | matchOnce r s = listToMaybe (matchAll r s) 61 | matchAll r s = execMatch r 0 '\n' s 62 | matchCount r s = length (matchAll r' s) 63 | where r' = r { regex_execOptions = (regex_execOptions r) {captureGroups = False} } 64 | matchTest = myMatchTest 65 | matchOnceText regex source = 66 | fmap (\ ma -> 67 | let (o,l) = ma!0 68 | in (before o source 69 | ,fmap (\ ol -> (extract ol source,ol)) ma 70 | ,after (o+l) source)) 71 | (matchOnce regex source) 72 | matchAllText regex source = 73 | let go :: Int -> L.Text -> [Array Int (Int, Int)] -> [Array Int (L.Text, (Int, Int))] 74 | go i _ _ | i `seq` False = undefined 75 | go _i _t [] = [] 76 | go i t (x:xs) = 77 | let (off0,len0) = x!0 78 | trans pair@(off,len) = (extract (off-i,len) t,pair) 79 | t' = after (off0+(len0-i)) t 80 | in fmap trans x : seq t' (go (off0+len0) t' xs) 81 | in go 0 source (matchAll regex source) 82 | 83 | -- | @since 1.3.1 84 | compile :: CompOption -- ^ Flags (summed together) 85 | -> ExecOption -- ^ Flags (summed together) 86 | -> L.Text -- ^ The regular expression to compile 87 | -> Either String Regex -- ^ Returns: the compiled regular expression 88 | compile compOpt execOpt txt = 89 | case parseRegex (L.unpack txt) of 90 | Left err -> Left ("parseRegex for Text.Regex.TDFA.Text.Lazy failed:"++show err) 91 | Right pattern -> Right (patternToRegex pattern compOpt execOpt) 92 | 93 | -- | @since 1.3.1 94 | execute :: Regex -- ^ Compiled regular expression 95 | -> L.Text -- ^ Text to match against 96 | -> Either String (Maybe MatchArray) 97 | execute r txt = Right (matchOnce r txt) 98 | 99 | -- | @since 1.3.1 100 | regexec :: Regex -- ^ Compiled regular expression 101 | -> L.Text -- ^ Text to match against 102 | -> Either String (Maybe (L.Text, L.Text, L.Text, [L.Text])) 103 | regexec r txt = Right $ 104 | case matchOnceText r txt of 105 | Just (pre, mt, post) | main:rest <- map fst (elems mt) 106 | -> Just (pre, main, post, rest) 107 | _ -> Nothing 108 | -------------------------------------------------------------------------------- /lib/Text/Regex/TDFA/Wrap.hs: -------------------------------------------------------------------------------- 1 | -- | "Text.Regex.TDFA.Wrap" provides the instance of RegexOptions and 2 | -- the definition of (=~) and (=~~). This is all re-exported by 3 | -- "Text.Regex.TDFA". 4 | 5 | module Text.Regex.TDFA.Wrap(Regex(..),CompOption(..),ExecOption(..),(=~),(=~~)) where 6 | 7 | {- By Chris Kuklewicz, 2007-2009. BSD License, see the LICENSE file. -} 8 | 9 | import Text.Regex.Base.RegexLike(RegexMaker(..),RegexContext(..)) 10 | import Text.Regex.TDFA.Common(CompOption(..),ExecOption(..),Regex(..)) 11 | 12 | -- | This is the pure functional matching operator. If the target 13 | -- cannot be produced then some empty result will be returned. If 14 | -- there is an error in processing, then 'error' will be called. 15 | (=~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target) 16 | => source1 -> source -> target 17 | (=~) x r = let make :: RegexMaker Regex CompOption ExecOption a => a -> Regex 18 | make = makeRegex 19 | in match (make r) x 20 | 21 | -- | This is the monadic matching operator. If a single match fails, 22 | -- then 'fail' will be called. 23 | (=~~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target,Monad m) 24 | => source1 -> source -> m target 25 | (=~~) x r = do let make :: (RegexMaker Regex CompOption ExecOption a, Monad m) => a -> m Regex 26 | make = makeRegexM 27 | q <- make r 28 | matchM q x 29 | -------------------------------------------------------------------------------- /regex-tdfa.cabal: -------------------------------------------------------------------------------- 1 | cabal-version: 1.24 2 | name: regex-tdfa 3 | version: 1.3.2.4 4 | 5 | build-Type: Simple 6 | license: BSD3 7 | license-file: LICENSE 8 | copyright: Copyright (c) 2007-2009, Christopher Kuklewicz 9 | author: Christopher Kuklewicz 10 | maintainer: Andreas Abel 11 | homepage: https://wiki.haskell.org/Regular_expressions 12 | bug-reports: https://github.com/haskell-hvr/regex-tdfa/issues 13 | 14 | category: Text 15 | synopsis: Pure Haskell Tagged DFA Backend for "Text.Regex" (regex-base) 16 | description: 17 | This package provides a pure Haskell \"Tagged\" DFA regex engine for . This implementation was inspired by the algorithm (and Master's thesis) behind the regular expression library known as . 18 | . 19 | Please consult the "Text.Regex.TDFA" module for API documentation including a tutorial with usage examples; 20 | see also for general information about regular expression support in Haskell. 21 | 22 | extra-doc-files: 23 | CHANGELOG.md 24 | README.md 25 | 26 | extra-source-files: 27 | test/cases/*.txt 28 | 29 | tested-with: 30 | GHC == 9.12.2 31 | GHC == 9.10.2 32 | GHC == 9.8.4 33 | GHC == 9.6.7 34 | GHC == 9.4.8 35 | GHC == 9.2.8 36 | GHC == 9.0.2 37 | GHC == 8.10.7 38 | GHC == 8.8.4 39 | GHC == 8.6.5 40 | GHC == 8.4.4 41 | GHC == 8.2.2 42 | GHC == 8.0.2 43 | 44 | source-repository head 45 | type: git 46 | location: https://github.com/haskell-hvr/regex-tdfa.git 47 | 48 | source-repository this 49 | type: git 50 | location: https://github.com/haskell-hvr/regex-tdfa.git 51 | tag: v1.3.2.4 52 | 53 | flag force-O2 54 | default: False 55 | manual: True 56 | description: 57 | Force building @regex-tdfa@ with \"@ghc-options: -O2@\". 58 | . 59 | __NOTE__: This flag is mostly provided for legacy use-cases. Nowadays you can conveniently control optimization levels on a per-package granularity via @cabal.project@ files; see for more details. 60 | 61 | flag doctest 62 | default: True 63 | manual: False 64 | description: 65 | Include test-suite doctest. 66 | 67 | library 68 | hs-source-dirs: lib 69 | 70 | exposed-modules: Data.IntMap.CharMap2 71 | Data.IntMap.EnumMap2 72 | Data.IntSet.EnumSet2 73 | Text.Regex.TDFA 74 | Text.Regex.TDFA.ByteString 75 | Text.Regex.TDFA.ByteString.Lazy 76 | Text.Regex.TDFA.Common 77 | Text.Regex.TDFA.CorePattern 78 | Text.Regex.TDFA.IntArrTrieSet 79 | Text.Regex.TDFA.NewDFA.Engine 80 | Text.Regex.TDFA.NewDFA.Engine_FA 81 | Text.Regex.TDFA.NewDFA.Engine_NC 82 | Text.Regex.TDFA.NewDFA.Engine_NC_FA 83 | Text.Regex.TDFA.NewDFA.Tester 84 | Text.Regex.TDFA.NewDFA.Uncons 85 | Text.Regex.TDFA.NewDFA.MakeTest 86 | Text.Regex.TDFA.Pattern 87 | Text.Regex.TDFA.ReadRegex 88 | Text.Regex.TDFA.Sequence 89 | Text.Regex.TDFA.String 90 | Text.Regex.TDFA.TDFA 91 | Text.Regex.TDFA.TNFA 92 | Text.Regex.TDFA.Text 93 | Text.Regex.TDFA.Text.Lazy 94 | 95 | other-modules: Paths_regex_tdfa 96 | 97 | build-depends: array >= 0.5 && < 0.6 98 | , base >= 4.9 && < 5 99 | , bytestring >= 0.10 && < 0.13 100 | , containers >= 0.5 && < 1 101 | , mtl >= 2.1.3 && < 2.4 102 | , parsec == 3.1.* 103 | , regex-base == 0.94.* 104 | , text >= 1.2.3 && < 2.2 105 | 106 | default-language: Haskell2010 107 | default-extensions: BangPatterns 108 | ExistentialQuantification 109 | FlexibleContexts 110 | FlexibleInstances 111 | ForeignFunctionInterface 112 | FunctionalDependencies 113 | MagicHash 114 | MultiParamTypeClasses 115 | NondecreasingIndentation 116 | RecursiveDo 117 | ScopedTypeVariables 118 | TypeOperators 119 | TypeSynonymInstances 120 | UnboxedTuples 121 | UnliftedFFITypes 122 | other-extensions: CPP 123 | 124 | ghc-options: 125 | -funbox-strict-fields 126 | -fspec-constr-count=10 127 | -Wall 128 | -Wno-orphans 129 | -Wcompat 130 | 131 | if flag(force-O2) 132 | ghc-options: 133 | -O2 134 | 135 | 136 | test-suite regex-tdfa-unittest 137 | type: exitcode-stdio-1.0 138 | 139 | hs-source-dirs: test 140 | main-is: Main.hs 141 | 142 | -- intra-package dependency 143 | build-depends: regex-tdfa 144 | 145 | -- dependencies whose version constraints are inherited via intra-package 'regex-tdfa' dependency 146 | build-depends: array 147 | , base 148 | , bytestring 149 | , containers 150 | , filepath 151 | , mtl 152 | , regex-base 153 | , text 154 | 155 | -- component-specific dependencies not inherited via 'regex-tdfa' 156 | , directory >= 1.1.0 && < 1.4 157 | , filepath >= 1.3.0 && < 1.6 158 | , utf8-string >= 1.0.1 && < 1.1 159 | 160 | default-language: Haskell2010 161 | default-extensions: FlexibleInstances 162 | FlexibleContexts 163 | Rank2Types 164 | other-extensions: GeneralizedNewtypeDeriving 165 | 166 | ghc-options: -Wall -funbox-strict-fields 167 | 168 | if impl(ghc >= 8.0) 169 | ghc-options: -Wcompat 170 | 171 | if flag(force-O2) 172 | ghc-options: -O2 173 | 174 | test-suite doctest 175 | type: exitcode-stdio-1.0 176 | hs-source-dirs: test 177 | main-is: DocTestMain.hs 178 | 179 | build-depends: 180 | base 181 | , regex-tdfa 182 | , doctest-parallel >= 0.2.2 183 | -- doctest-parallel-0.2.2 is the first to filter out autogen-modules 184 | 185 | default-language: Haskell2010 186 | 187 | if !flag(doctest) 188 | buildable: False 189 | -------------------------------------------------------------------------------- /test/DocTestMain.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import System.Environment 4 | ( getArgs ) 5 | import Test.DocTest 6 | ( mainFromLibrary ) 7 | import Test.DocTest.Helpers 8 | ( extractSpecificCabalLibrary, findCabalPackage ) 9 | 10 | main :: IO () 11 | main = do 12 | args <- getArgs 13 | pkg <- findCabalPackage "regex-tdfa" 14 | -- Need to give the library name, otherwise the parser does not find it. 15 | lib <- extractSpecificCabalLibrary Nothing pkg 16 | mainFromLibrary lib args 17 | -------------------------------------------------------------------------------- /test/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009, Christopher Edward Kuklewicz 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 5 | 6 | * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 7 | * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | * Neither the name of the copyright holder nor the names of the contributors may be used to endorse or promote products derived from this software without specific prior written permission. 9 | 10 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 11 | -------------------------------------------------------------------------------- /test/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | {-# LANGUAGE GeneralizedNewtypeDeriving #-} 3 | {-# LANGUAGE MonoLocalBinds #-} 4 | 5 | #if __GLASGOW_HASKELL__ >= 902 6 | {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} 7 | #endif 8 | 9 | module Main where 10 | 11 | import Control.Applicative as App 12 | import Control.Monad 13 | import qualified Control.Monad.Fail as Fail 14 | import Data.Array 15 | import qualified Data.ByteString as BS 16 | import qualified Data.ByteString.UTF8 as UTF8 17 | import Data.List (isInfixOf, mapAccumL, sort) 18 | import Data.String 19 | import Data.Typeable 20 | import Data.Version () 21 | import System.Directory (getDirectoryContents) 22 | import System.Environment 23 | import System.Exit 24 | import System.FilePath (()) 25 | import Text.Regex.Base 26 | 27 | import qualified Text.Regex.TDFA as TDFA 28 | 29 | default(Int) 30 | 31 | type RSource = String 32 | type RType = String -- can be changed to any Extract instance 33 | newtype RegexSource = RegexSource {unSource :: RSource} deriving Show 34 | newtype RegexStringOf a = RegexString {unString :: a} deriving Show 35 | type RegexString = RegexStringOf RType 36 | 37 | dictionary :: [Char] 38 | dictionary = ['a'..'c']++['A'..'C']++"_" 39 | 40 | 41 | type A = Array Int (Int,Int) 42 | 43 | maxItems :: Int 44 | maxItems=100 45 | 46 | testOne :: t -> (t -> t1 -> Array Int (Int, Int)) -> t1 -> String 47 | testOne s op r = 48 | let foo :: String 49 | foo = concatMap (\(o,l) -> show (o,(o+l))) (take maxItems $ elems (op s r :: Array Int (Int,Int))) 50 | in if null foo then "NOMATCH" else foo 51 | 52 | testOne' :: A -> String 53 | testOne' input = 54 | let foo :: String 55 | foo = concatMap (\(o,l) -> show (o,(o+l))) (take maxItems $ elems input) 56 | in if null foo then "NOMATCH" else foo 57 | 58 | toTest :: String -> (Int,String,String,String) 59 | toTest line = let [n,regex,input,output] = words line 60 | noQ [] = [] 61 | noQ ('?':xs) = '-':'1':noQ xs 62 | noQ (x:xs) = x:noQ xs 63 | input' = if input == "NULL" then "" else unN input 64 | in (read n,regex,input',noQ output) 65 | 66 | toTest' :: String -> String -> (String,(Int,String,String,String)) 67 | toTest' oldRegex line = 68 | let [n,regex,input,output] = words line 69 | noQ [] = [] 70 | noQ ('?':xs) = '-':'1':noQ xs 71 | noQ (x:xs) = x:noQ xs 72 | input' = if input == "NULL" then "" else input 73 | regex' = if regex == "SAME" then oldRegex else regex 74 | in (regex',(read n,regex',input',noQ output)) 75 | 76 | load,load' :: String -> [(Int, String, String, String)] 77 | load = map toTest . lines 78 | load' = snd . mapAccumL toTest' "X_X_X_" . lines 79 | 80 | checkTest :: PFT A -> (Int,String,String,String) -> IO [Int] 81 | checkTest opM (n,regex,input,output) = do 82 | let Result output'e = opM input regex 83 | p = putStrLn 84 | p "" 85 | case output'e of 86 | Left msg -> do 87 | p ("############################# Unexpected Error # "++show n ++ " #############################" ) 88 | p ("Searched text: "++show input) 89 | p ("Regex pattern: "++show regex) 90 | p ("Expected output: "++show output) 91 | p ("Error message: "++msg) 92 | return [n] 93 | Right output'a -> do 94 | let output' = testOne' output'a 95 | case (n<0 , output==output') of 96 | (False,True) -> p ("Expected Pass #"++show n) 97 | (False,False) -> p ("############################# Unexpected Fail # "++show n ++ " #############################" ) 98 | (True,True) -> p ("############################# Unexpected Pass # "++show n ++ " #############################" ) 99 | (True,False) -> p ("Expected Fail #"++show n) 100 | if (output == output') 101 | then do p ("text and pattern: "++show input) 102 | p ("Regex pattern: "++show regex) 103 | p ("Outputs agree: "++show output) 104 | return (if n<0 then [n] else []) 105 | else do p "" 106 | p ("Searched text: "++show input) 107 | p ("Regex pattern: "++show regex) 108 | p ("Expected output: "++show output) 109 | p ("Actual result : "++show output') 110 | return (if n<0 then [] else [n]) 111 | 112 | checkFile :: (RType -> RSource -> Result A) -> (FilePath, String) -> IO (FilePath,[Int]) 113 | checkFile opM (filepath, contents) = do 114 | putStrLn $ "\nUsing Tests from: "++filepath 115 | vals <- liftM concat (mapM (checkTest opM) (load' contents)) 116 | return (filepath,vals) 117 | 118 | checkTests :: (RType -> RSource -> Result A) -> [(FilePath,String)] -> IO [(String, [Int])] 119 | checkTests opM testCases = mapM (checkFile opM) testCases 120 | 121 | readTestCases :: FilePath -> IO [(String, String)] 122 | readTestCases folder = do 123 | fns <- filter (".txt" `isInfixOf`) <$> getDirectoryContents folder 124 | when (null fns) $ 125 | fail ("readTestCases: No test-cases found in " ++ show folder) 126 | forM (sort fns) $ \fn -> do 127 | bs <- BS.readFile (folder fn) 128 | return (fn, UTF8.toString bs) 129 | 130 | newtype Result a = Result (Either String a) 131 | deriving (Eq, Show, Functor, App.Applicative, Monad) 132 | 133 | instance Fail.MonadFail Result where 134 | fail = Result . Left 135 | 136 | type PFT a = RegexContext TDFA.Regex RType a => RType -> RSource -> Result a 137 | 138 | posix :: PFT a 139 | posix x reg = 140 | let q :: Result TDFA.Regex 141 | q = makeRegexOptsM (defaultCompOpt { TDFA.caseSensitive = False}) defaultExecOpt reg 142 | in q >>= \ s -> return (match s x) 143 | 144 | unN :: String -> String 145 | unN ('\\':'n':xs) = '\n':unN xs 146 | unN (x:xs) = x:unN xs 147 | unN [] = [] 148 | 149 | manual :: [String] -> IO () 150 | manual [sIn,rIn] = do 151 | let s :: RType 152 | r :: String 153 | s = fromString (unN sIn) 154 | r = (unN rIn) 155 | -- first match 156 | let r1 :: TDFA.Regex 157 | r1 = makeRegex r 158 | let b1u@(_,_b1s,_,_)=(match r1 s :: (RType,RType,RType,[RType])) 159 | putStrLn ("Searched text: "++show s) 160 | putStrLn ("Regex pattern: "++show r) 161 | print b1u 162 | -- multiple matches and counting 163 | let b1 = (match r1 s :: [MatchArray]) 164 | c1 = (match r1 s :: Int) 165 | putStrLn $ "Count of matches = "++show c1 166 | putStrLn $ "Matches found = "++show (length b1) 167 | mapM_ (putStrLn . testOne') b1 168 | manual _ = error "wrong arguments to regex-posix-unittest's manual function" 169 | 170 | main :: IO () 171 | main = do 172 | putStr "Testing Text.Regex.TDFA version: " 173 | print TDFA.getVersion_Text_Regex_TDFA 174 | a <- getArgs 175 | if length a == 2 176 | then manual a 177 | else do 178 | putStrLn $ "Explanation and discussion of these tests on the wiki at http://www.haskell.org/haskellwiki/Regex_Posix including comparing results from different operating systems" 179 | putStrLn $ "Questions about this package to the author at email " 180 | putStrLn $ "The type of both the pattern and test is " ++ show (typeOf (undefined :: RType)) 181 | putStrLn $ "Without exactly two arguments:" 182 | putStrLn $ " This program runs all test files listed in test/data-dir/test-manifest.txt" 183 | putStrLn $ " Lines with negative number are expected to fail, others are expected to pass." 184 | putStrLn $ "With exactly two arguments:" 185 | putStrLn $ " The first argument is the text to be searched." 186 | putStrLn $ " The second argument is the regular expression pattern to search with." 187 | vals <- checkTests posix =<< readTestCases ("test" "cases") 188 | if null (concatMap snd vals) 189 | then putStrLn "\nWow, all the tests passed!" 190 | else do 191 | putStrLn $ "\nBoo, tests failed!\n"++unlines (map show vals) 192 | exitFailure 193 | 194 | {- 195 | -- for TRE 196 | posix x r = let q :: Posix.Regex 197 | q = makeRegexOpts (defaultCompOpt .|. Posix.compRightAssoc .|. Posix.compIgnoreCase) defaultExecOpt r 198 | in match q x 199 | 200 | tdfa x r = let q :: TDFA.Wrap.Regex 201 | q = makeRegexOpts (defaultCompOpt { TDFA.Wrap.caseSensitive = False 202 | , TDFA.Wrap.rightAssoc = True }) defaultExecOpt r 203 | in match q x 204 | 205 | tdfa2 x r = let q :: TDFA2.Wrap.Regex 206 | q = makeRegexOpts (defaultCompOpt { TDFA2.Wrap.caseSensitive = False 207 | , TDFA2.Wrap.rightAssoc = True }) defaultExecOpt r 208 | in match q x 209 | -} 210 | -------------------------------------------------------------------------------- /test/cases/basic3.txt: -------------------------------------------------------------------------------- 1 | 1 \) () (1,2) 2 | 2 \} } (0,1) 3 | 3 ] ] (0,1) 4 | 4 $^ NULL (0,0) 5 | 5 a($) aa (1,2)(2,2) 6 | 6 a*(^a) aa (0,1)(0,1) 7 | 7 (..)*(...)* a (0,0)(?,?)(?,?) 8 | 8 (..)*(...)* abcd (0,4)(2,4)(?,?) 9 | 9 (ab|a)(bc|c) abc (0,3)(0,2)(2,3) 10 | 10 (ab)c|abc abc (0,3)(0,2) 11 | 11 a{0}b ab (1,2) 12 | 12 (a*)(b?)(b+)b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7) 13 | 13 (a*)(b{0,1})(b{1,})b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7) 14 | 15 ((a|a)|a) a (0,1)(0,1)(0,1) 15 | 16 (a*)(a|aa) aaaa (0,4)(0,3)(3,4) 16 | 17 a*(a.|aa) aaaa (0,4)(2,4) 17 | 18 a(b)|c(d)|a(e)f aef (0,3)(?,?)(?,?)(1,2) 18 | 19 (a|b)?.* b (0,1)(0,1) 19 | 20 (a|b)c|a(b|c) ac (0,2)(0,1)(?,?) 20 | 21 (a|b)c|a(b|c) ab (0,2)(?,?)(1,2) 21 | 22 (a|b)*c|(a|ab)*c abc (0,3)(1,2)(?,?) 22 | 23 (a|b)*c|(a|ab)*c xc (1,2)(?,?)(?,?) 23 | 24 (.a|.b).*|.*(.a|.b) xa (0,2)(0,2)(?,?) 24 | 25 a?(ab|ba)ab abab (0,4)(0,2) 25 | 26 a?(ac{0}b|ba)ab abab (0,4)(0,2) 26 | 27 ab|abab abbabab (0,2) 27 | 28 aba|bab|bba baaabbbaba (5,8) 28 | 29 aba|bab baaabbbaba (6,9) 29 | 30 (aa|aaa)*|(a|aaaaa) aa (0,2)(0,2)(?,?) 30 | 31 (a.|.a.)*|(a|.a...) aa (0,2)(0,2)(?,?) 31 | 32 ab|a xabc (1,3) 32 | 33 ab|a xxabc (2,4) 33 | 34 (Ab|cD)* aBcD (0,4)(2,4) 34 | 35 :::1:::0:|:::1:1:0: :::0:::1:::1:::0: (8,17) 35 | 36 :::1:::0:|:::1:1:1: :::0:::1:::1:::0: (8,17) 36 | 37 [[:lower:]]+ `az{ (1,3) 37 | 38 [[:upper:]]+ @AZ[ (1,3) 38 | 39 (a)(b)(c) abc (0,3)(0,1)(1,2)(2,3) 39 | 43 ((((((((((((((((((((((((((((((x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1) 40 | 44 ((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2) 41 | 45 a?(ab|ba)* ababababababababababababababababababababababababababababababababababababababababa (0,81)(79,81) 42 | 46 abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabbbbaa (18,25) 43 | 47 abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabaa (18,22) 44 | 48 aaac|aabc|abac|abbc|baac|babc|bbac|bbbc baaabbbabac (7,11) 45 | 49 aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa (53,57) 46 | 50 a*a*a*a*a*b aaaaaaaaab (0,10) 47 | 51 ab+bc abbc (0,4) 48 | 52 ab+bc abbbbc (0,6) 49 | 53 ab?bc abbc (0,4) 50 | 54 ab?bc abc (0,3) 51 | 55 ab?c abc (0,3) 52 | 56 ab|cd abc (0,2) 53 | 57 ab|cd abcd (0,2) 54 | 58 a\(b a(b (0,3) 55 | 59 a\(*b ab (0,2) 56 | 60 a\(*b a((b (0,4) 57 | 61 ((a)) abc (0,1)(0,1)(0,1) 58 | 62 (a)b(c) abc (0,3)(0,1)(2,3) 59 | 63 a+b+c aabbabc (4,7) 60 | 64 a* aaa (0,3) 61 | 65 (a*)* - (0,0)(0,0) 62 | 66 (a*)+ - (0,0)(0,0) 63 | 67 (a*|b)* - (0,0)(0,0) 64 | 68 (a+|b)* ab (0,2)(1,2) 65 | 69 (a+|b)+ ab (0,2)(1,2) 66 | 70 (a+|b)? ab (0,1)(0,1) 67 | 71 (^)* - (0,0)(0,0) 68 | 72 ([abc])*d abbbcd (0,6)(4,5) 69 | 73 ([abc])*bcd abcd (0,4)(0,1) 70 | 74 a|b|c|d|e e (0,1) 71 | 75 (a|b|c|d|e)f ef (0,2)(0,1) 72 | 76 ((a*|b))* - (0,0)(0,0)(0,0) 73 | 77 (ab|cd)e abcde (2,5)(2,4) 74 | 78 (a|b)c*d abcd (1,4)(1,2) 75 | 79 (ab|ab*)bc abc (0,3)(0,1) 76 | 80 a([bc]*)c* abc (0,3)(1,3) 77 | 81 a([bc]*)(c*d) abcd (0,4)(1,3)(3,4) 78 | 82 a([bc]+)(c*d) abcd (0,4)(1,3)(3,4) 79 | 83 a([bc]*)(c+d) abcd (0,4)(1,2)(2,4) 80 | 84 a[bcd]*dcdcde adcdcde (0,7) 81 | 85 (ab|a)b*c abc (0,3)(0,2) 82 | 86 ((a)(b)c)(d) abcd (0,4)(0,3)(0,1)(1,2)(3,4) 83 | 87 ^a(bc+|b[eh])g|.h$ abh (1,3)(?,?) 84 | 88 (bc+d$|ef*g.|h?i(j|k)) effgz (0,5)(0,5)(?,?) 85 | 89 (bc+d$|ef*g.|h?i(j|k)) ij (0,2)(0,2)(1,2) 86 | 90 (bc+d$|ef*g.|h?i(j|k)) reffgz (1,6)(1,6)(?,?) 87 | 91 (((((((((a))))))))) a (0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1) 88 | 92 (.*)c(.*) abcde (0,5)(0,2)(3,5) 89 | 93 a(bc)d abcd (0,4)(1,3) 90 | 94 a[-]?c ac (0,3) 91 | 95 M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar_Qaddafi (0,15)(?,?)(10,12) 92 | 96 M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mo'ammar_Gadhafi (0,16)(?,?)(11,13) 93 | 97 M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar_Kaddafi (0,15)(?,?)(10,12) 94 | 98 M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar_Qadhafi (0,15)(?,?)(10,12) 95 | 99 M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar_Gadafi (0,14)(?,?)(10,11) 96 | 100 M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar_Qadafi (0,15)(?,?)(11,12) 97 | 101 M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moamar_Gaddafi (0,14)(?,?)(9,11) 98 | 102 M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar_Qadhdhafi (0,18)(?,?)(13,15) 99 | 103 M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar_Khaddafi (0,16)(?,?)(11,13) 100 | 104 M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar_Ghaddafy (0,16)(?,?)(11,13) 101 | 105 M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar_Ghadafi (0,15)(?,?)(11,12) 102 | 106 M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar_Ghaddafi (0,16)(?,?)(11,13) 103 | 107 M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muamar_Kaddafi (0,14)(?,?)(9,11) 104 | 108 M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar_Quathafi (0,16)(?,?)(11,13) 105 | 109 M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar_Gheddafi (0,16)(?,?)(11,13) 106 | 110 M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar_Khadafy (0,15)(?,?)(11,12) 107 | 111 M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar_Qudhafi (0,15)(?,?)(10,12) 108 | 112 a+(b|c)*d+ aabcdd (0,6)(3,4) 109 | 113 ^.+$ vivi (0,4) 110 | 114 ^(.+)$ vivi (0,4)(0,4) 111 | 115 ^([^!.]+).att.com!(.+)$ gryphon.att.com!eby (0,19)(0,7)(16,19) 112 | 116 ^([^!]+!)?([^!]+)$ bas (0,3)(?,?)(0,3) 113 | 117 ^([^!]+!)?([^!]+)$ bar!bas (0,7)(0,4)(4,7) 114 | 118 ^([^!]+!)?([^!]+)$ foo!bas (0,7)(0,4)(4,7) 115 | 119 ^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(4,8)(8,11) 116 | 120 ((foo)|(bar))!bas bar!bas (0,7)(0,3)(?,?)(0,3) 117 | 121 ((foo)|(bar))!bas foo!bar!bas (4,11)(4,7)(?,?)(4,7) 118 | 122 ((foo)|(bar))!bas foo!bas (0,7)(0,3)(0,3)(?,?) 119 | 123 ((foo)|bar)!bas bar!bas (0,7)(0,3)(?,?) 120 | 124 ((foo)|bar)!bas foo!bar!bas (4,11)(4,7)(?,?) 121 | 125 ((foo)|bar)!bas foo!bas (0,7)(0,3)(0,3) 122 | 126 (foo|(bar))!bas bar!bas (0,7)(0,3)(0,3) 123 | 127 (foo|(bar))!bas foo!bar!bas (4,11)(4,7)(4,7) 124 | 128 (foo|(bar))!bas foo!bas (0,7)(0,3)(?,?) 125 | 129 (foo|bar)!bas bar!bas (0,7)(0,3) 126 | 130 (foo|bar)!bas foo!bar!bas (4,11)(4,7) 127 | 131 (foo|bar)!bas foo!bas (0,7)(0,3) 128 | 132 ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11) 129 | 133 ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bas (0,3)(?,?)(0,3)(?,?)(?,?) 130 | 134 ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bar!bas (0,7)(0,4)(4,7)(?,?)(?,?) 131 | 135 ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(?,?)(?,?)(4,8)(8,11) 132 | 136 ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bas (0,7)(0,4)(4,7)(?,?)(?,?) 133 | 137 ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bas (0,3)(0,3)(?,?)(0,3)(?,?)(?,?) 134 | 138 ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bar!bas (0,7)(0,7)(0,4)(4,7)(?,?)(?,?) 135 | 139 ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11) 136 | 140 ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bas (0,7)(0,7)(0,4)(4,7)(?,?)(?,?) 137 | 141 .*(/XXX).* /XXX (0,4)(0,4) 138 | 142 .*(\\XXX).* \XXX (0,4)(0,4) 139 | 143 \\XXX \XXX (0,4) 140 | 144 .*(/000).* /000 (0,4)(0,4) 141 | 145 .*(\\000).* \000 (0,4)(0,4) 142 | 146 \\000 \000 (0,4) 143 | 147 [[:graph:]]+ !#}~ (0,4) 144 | 148 [[:xdigit:]]+ dE4db3eF (0,8) 145 | 149 [[:print:]]+ !#}~ (0,4) 146 | -------------------------------------------------------------------------------- /test/cases/class.txt: -------------------------------------------------------------------------------- 1 | 1 aa* xaxaax (1,2) 2 | 2 (a*)(ab)*(b*) abc (0,2)(0,1)(?,?)(1,2) 3 | -2 (a*)(ab)*(b*) abc (0,2)(0,0)(0,2)(2,2) 4 | 3 ((a*)(ab)*)((b*)(a*)) aba (0,3)(0,2)(0,0)(0,2)(2,3)(2,2)(2,3) 5 | 4 (...?.?)* xxxxxx (0,6)(4,6) 6 | 5 (a|ab)(bc|c) abcabc (0,3)(0,2)(2,3) 7 | 6 (aba|a*b)(aba|a*b) ababa (0,5)(0,2)(2,5) 8 | 7 (a*){2} xxxxx (0,0)(0,0) 9 | 8 (a*)* a (0,1)(0,1) 10 | 9 (aba|a*b)* ababa (0,5)(2,5) 11 | 10 (a(b)?)+ aba (0,3)(2,3)(?,?) 12 | 11 .*(.*) ab (0,2)(2,2) 13 | 12 (a?)((ab)?)(b?)a?(ab)?b? abab (0,4)(0,1)(1,1)(?,?)(1,2)(?,?) 14 | -12 (a?)((ab)?)(b?)a?(ab)?b? abab (0,4)(0,1)(1,1)(?,?)(1,2)(2,4) 15 | -------------------------------------------------------------------------------- /test/cases/forced-assoc.txt: -------------------------------------------------------------------------------- 1 | 1 (a|ab)(c|bcd) abcd (0,4)(0,1)(1,4) 2 | 2 (a|ab)(bcd|c) abcd (0,4)(0,1)(1,4) 3 | 3 (ab|a)(c|bcd) abcd (0,4)(0,1)(1,4) 4 | 4 (ab|a)(bcd|c) abcd (0,4)(0,1)(1,4) 5 | 5 ((a|ab)(c|bcd))(d*) abcd (0,4)(0,4)(0,1)(1,4)(4,4) 6 | 6 ((a|ab)(bcd|c))(d*) abcd (0,4)(0,4)(0,1)(1,4)(4,4) 7 | 7 ((ab|a)(c|bcd))(d*) abcd (0,4)(0,4)(0,1)(1,4)(4,4) 8 | 8 ((ab|a)(bcd|c))(d*) abcd (0,4)(0,4)(0,1)(1,4)(4,4) 9 | 9 (a|ab)((c|bcd)(d*)) abcd (0,4)(0,2)(2,4)(2,3)(3,4) 10 | 10 (a|ab)((bcd|c)(d*)) abcd (0,4)(0,2)(2,4)(2,3)(3,4) 11 | 11 (ab|a)((c|bcd)(d*)) abcd (0,4)(0,2)(2,4)(2,3)(3,4) 12 | 12 (ab|a)((bcd|c)(d*)) abcd (0,4)(0,2)(2,4)(2,3)(3,4) 13 | 13 (a*)(b|abc) abc (0,3)(0,0)(0,3) 14 | 14 (a*)(abc|b) abc (0,3)(0,0)(0,3) 15 | 15 ((a*)(b|abc))(c*) abc (0,3)(0,3)(0,0)(0,3)(3,3) 16 | 16 ((a*)(abc|b))(c*) abc (0,3)(0,3)(0,0)(0,3)(3,3) 17 | 17 (a*)((b|abc)(c*)) abc (0,3)(0,1)(1,3)(1,2)(2,3) 18 | 18 (a*)((abc|b)(c*)) abc (0,3)(0,1)(1,3)(1,2)(2,3) 19 | 19 (a*)(b|abc) abc (0,3)(0,0)(0,3) 20 | 20 (a*)(abc|b) abc (0,3)(0,0)(0,3) 21 | 21 ((a*)(b|abc))(c*) abc (0,3)(0,3)(0,0)(0,3)(3,3) 22 | 22 ((a*)(abc|b))(c*) abc (0,3)(0,3)(0,0)(0,3)(3,3) 23 | 23 (a*)((b|abc)(c*)) abc (0,3)(0,1)(1,3)(1,2)(2,3) 24 | 24 (a*)((abc|b)(c*)) abc (0,3)(0,1)(1,3)(1,2)(2,3) 25 | 25 (a|ab) ab (0,2)(0,2) 26 | 26 (ab|a) ab (0,2)(0,2) 27 | 27 (a|ab)(b*) ab (0,2)(0,2)(2,2) 28 | 28 (ab|a)(b*) ab (0,2)(0,2)(2,2) 29 | -------------------------------------------------------------------------------- /test/cases/left-assoc.txt: -------------------------------------------------------------------------------- 1 | -1 (a|ab)(c|bcd)(d*) abcd (0,4)(0,1)(1,4)(4,4) 2 | -2 (a|ab)(bcd|c)(d*) abcd (0,4)(0,1)(1,4)(4,4) 3 | -3 (ab|a)(c|bcd)(d*) abcd (0,4)(0,1)(1,4)(4,4) 4 | -4 (ab|a)(bcd|c)(d*) abcd (0,4)(0,1)(1,4)(4,4) 5 | -5 (a*)(b|abc)(c*) abc (0,3)(0,0)(0,3)(3,3) 6 | -6 (a*)(abc|b)(c*) abc (0,3)(0,0)(0,3)(3,3) 7 | -7 (a*)(b|abc)(c*) abc (0,3)(0,0)(0,3)(3,3) 8 | -8 (a*)(abc|b)(c*) abc (0,3)(0,0)(0,3)(3,3) 9 | -9 (a|ab)(c|bcd)(d|.*) abcd (0,4)(0,1)(1,4)(4,4) 10 | -10 (a|ab)(bcd|c)(d|.*) abcd (0,4)(0,1)(1,4)(4,4) 11 | -11 (ab|a)(c|bcd)(d|.*) abcd (0,4)(0,1)(1,4)(4,4) 12 | -12 (ab|a)(bcd|c)(d|.*) abcd (0,4)(0,1)(1,4)(4,4) 13 | -------------------------------------------------------------------------------- /test/cases/nullsub3.txt: -------------------------------------------------------------------------------- 1 | 1 (a*)* a (0,1)(0,1) 2 | 2 SAME x (0,0)(0,0) 3 | 3 SAME aaaaaa (0,6)(0,6) 4 | 4 SAME aaaaaax (0,6)(0,6) 5 | 5 (a*)+ a (0,1)(0,1) 6 | 6 SAME x (0,0)(0,0) 7 | 7 SAME aaaaaa (0,6)(0,6) 8 | 8 SAME aaaaaax (0,6)(0,6) 9 | 9 (a+)* a (0,1)(0,1) 10 | 10 SAME x (0,0)(?,?) 11 | 11 SAME aaaaaa (0,6)(0,6) 12 | 12 SAME aaaaaax (0,6)(0,6) 13 | 13 (a+)+ a (0,1)(0,1) 14 | 14 SAME x NOMATCH 15 | 15 SAME aaaaaa (0,6)(0,6) 16 | 16 SAME aaaaaax (0,6)(0,6) 17 | 17 ([a]*)* a (0,1)(0,1) 18 | 18 SAME x (0,0)(0,0) 19 | 19 SAME aaaaaa (0,6)(0,6) 20 | 20 SAME aaaaaax (0,6)(0,6) 21 | 21 ([a]*)+ a (0,1)(0,1) 22 | 22 SAME x (0,0)(0,0) 23 | 23 SAME aaaaaa (0,6)(0,6) 24 | 24 SAME aaaaaax (0,6)(0,6) 25 | 25 ([^b]*)* a (0,1)(0,1) 26 | 26 SAME b (0,0)(0,0) 27 | 27 SAME aaaaaa (0,6)(0,6) 28 | 28 SAME aaaaaab (0,6)(0,6) 29 | 29 ([ab]*)* a (0,1)(0,1) 30 | 30 SAME aaaaaa (0,6)(0,6) 31 | 31 SAME ababab (0,6)(0,6) 32 | 32 SAME bababa (0,6)(0,6) 33 | 33 SAME b (0,1)(0,1) 34 | 34 SAME bbbbbb (0,6)(0,6) 35 | 35 SAME aaaabcde (0,5)(0,5) 36 | 36 ([^a]*)* b (0,1)(0,1) 37 | 37 SAME bbbbbb (0,6)(0,6) 38 | 38 SAME aaaaaa (0,0)(0,0) 39 | 39 ([^ab]*)* ccccxx (0,6)(0,6) 40 | 40 SAME ababab (0,0)(0,0) 41 | 41 ((z)+|a)* zabcde (0,2)(1,2)(?,?) 42 | 42 (a) aaa (0,1)(0,1) 43 | 46 (a*)*(x) x (0,1)(0,0)(0,1) 44 | 47 (a*)*(x) ax (0,2)(0,1)(1,2) 45 | 48 (a*)*(x) axa (0,2)(0,1)(1,2) 46 | 49 (a*)+(x) x (0,1)(0,0)(0,1) 47 | 50 (a*)+(x) ax (0,2)(0,1)(1,2) 48 | 51 (a*)+(x) axa (0,2)(0,1)(1,2) 49 | 52 (a*){2}(x) x (0,1)(0,0)(0,1) 50 | 53 (a*){2}(x) ax (0,2)(1,1)(1,2) 51 | 54 (a*){2}(x) axa (0,2)(1,1)(1,2) 52 | -------------------------------------------------------------------------------- /test/cases/osx-bsd-critical.txt: -------------------------------------------------------------------------------- 1 | 1 (()|.)(b) ab (0,2)(0,1)(?,?)(1,2) 2 | -1 (()|.)(b) ab (1,2)(1,1)(1,1)(1,2) 3 | 2 (()|[ab])(b) ab (0,2)(0,1)(?,?)(1,2) 4 | -2 (()|[ab])(b) ab (1,2)(1,1)(1,1)(1,2) 5 | 3 (()|[ab])+b aaab (0,4)(2,3)(?,?) 6 | -3 (()|[ab])+b aaab (3,4)(3,3)(3,3) 7 | 11 (.|())(b) ab (0,2)(0,1)(?,?)(1,2) 8 | 12 ([ab]|())(b) ab (0,2)(0,1)(?,?)(1,2) 9 | 14 ([ab]|())+b aaab (0,4)(2,3)(?,?) 10 | -14 ([ab]|())+b aaab (0,4)(3,3)(3,3) 11 | 20 (.?)(b) ab (0,2)(0,1)(1,2) 12 | -------------------------------------------------------------------------------- /test/cases/repetition2.txt: -------------------------------------------------------------------------------- 1 | 1 ((..)|(.)) NULL NOMATCH 2 | 2 ((..)|(.))((..)|(.)) NULL NOMATCH 3 | 3 ((..)|(.))((..)|(.))((..)|(.)) NULL NOMATCH 4 | 4 ((..)|(.)){1} NULL NOMATCH 5 | 5 ((..)|(.)){2} NULL NOMATCH 6 | 6 ((..)|(.)){3} NULL NOMATCH 7 | 7 ((..)|(.))* NULL (0,0)(?,?)(?,?)(?,?) 8 | 8 ((..)|(.)) a (0,1)(0,1)(?,?)(0,1) 9 | 9 ((..)|(.))((..)|(.)) a NOMATCH 10 | 10 ((..)|(.))((..)|(.))((..)|(.)) a NOMATCH 11 | 11 ((..)|(.)){1} a (0,1)(0,1)(?,?)(0,1) 12 | 12 ((..)|(.)){2} a NOMATCH 13 | 13 ((..)|(.)){3} a NOMATCH 14 | 14 ((..)|(.))* a (0,1)(0,1)(?,?)(0,1) 15 | 15 ((..)|(.)) aa (0,2)(0,2)(0,2)(?,?) 16 | 16 ((..)|(.))((..)|(.)) aa (0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2) 17 | 17 ((..)|(.))((..)|(.))((..)|(.)) aa NOMATCH 18 | 18 ((..)|(.)){1} aa (0,2)(0,2)(0,2)(?,?) 19 | 19 ((..)|(.)){2} aa (0,2)(1,2)(?,?)(1,2) 20 | 20 ((..)|(.)){3} aa NOMATCH 21 | 21 ((..)|(.))* aa (0,2)(0,2)(0,2)(?,?) 22 | 22 ((..)|(.)) aaa (0,2)(0,2)(0,2)(?,?) 23 | 23 ((..)|(.))((..)|(.)) aaa (0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3) 24 | 24 ((..)|(.))((..)|(.))((..)|(.)) aaa (0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3) 25 | 25 ((..)|(.)){1} aaa (0,2)(0,2)(0,2)(?,?) 26 | 26 ((..)|(.)){2} aaa (0,3)(2,3)(?,?)(2,3) 27 | 27 ((..)|(.)){3} aaa (0,3)(2,3)(?,?)(2,3) 28 | 28 ((..)|(.))* aaa (0,3)(2,3)(?,?)(2,3) 29 | 29 ((..)|(.)) aaaa (0,2)(0,2)(0,2)(?,?) 30 | 30 ((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?) 31 | 31 ((..)|(.))((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4) 32 | 32 ((..)|(.)){1} aaaa (0,2)(0,2)(0,2)(?,?) 33 | 33 ((..)|(.)){2} aaaa (0,4)(2,4)(2,4)(?,?) 34 | 34 ((..)|(.)){3} aaaa (0,4)(3,4)(?,?)(3,4) 35 | 35 ((..)|(.))* aaaa (0,4)(2,4)(2,4)(?,?) 36 | 36 ((..)|(.)) aaaaa (0,2)(0,2)(0,2)(?,?) 37 | 37 ((..)|(.))((..)|(.)) aaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?) 38 | 38 ((..)|(.))((..)|(.))((..)|(.)) aaaaa (0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5) 39 | 39 ((..)|(.)){1} aaaaa (0,2)(0,2)(0,2)(?,?) 40 | 40 ((..)|(.)){2} aaaaa (0,4)(2,4)(2,4)(?,?) 41 | 41 ((..)|(.)){3} aaaaa (0,5)(4,5)(?,?)(4,5) 42 | 42 ((..)|(.))* aaaaa (0,5)(4,5)(?,?)(4,5) 43 | 43 ((..)|(.)) aaaaaa (0,2)(0,2)(0,2)(?,?) 44 | 44 ((..)|(.))((..)|(.)) aaaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?) 45 | 45 ((..)|(.))((..)|(.))((..)|(.)) aaaaaa (0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?) 46 | 46 ((..)|(.)){1} aaaaaa (0,2)(0,2)(0,2)(?,?) 47 | 47 ((..)|(.)){2} aaaaaa (0,4)(2,4)(2,4)(?,?) 48 | 48 ((..)|(.)){3} aaaaaa (0,6)(4,6)(4,6)(?,?) 49 | 49 ((..)|(.))* aaaaaa (0,6)(4,6)(4,6)(?,?) 50 | 100 X(.?){0,}Y X1234567Y (0,9)(7,8) 51 | 101 X(.?){1,}Y X1234567Y (0,9)(7,8) 52 | 102 X(.?){2,}Y X1234567Y (0,9)(7,8) 53 | 103 X(.?){3,}Y X1234567Y (0,9)(7,8) 54 | 104 X(.?){4,}Y X1234567Y (0,9)(7,8) 55 | 105 X(.?){5,}Y X1234567Y (0,9)(7,8) 56 | 106 X(.?){6,}Y X1234567Y (0,9)(7,8) 57 | 107 X(.?){7,}Y X1234567Y (0,9)(7,8) 58 | 108 X(.?){8,}Y X1234567Y (0,9)(8,8) 59 | 110 X(.?){0,8}Y X1234567Y (0,9)(7,8) 60 | 111 X(.?){1,8}Y X1234567Y (0,9)(7,8) 61 | 112 X(.?){2,8}Y X1234567Y (0,9)(7,8) 62 | 113 X(.?){3,8}Y X1234567Y (0,9)(7,8) 63 | 114 X(.?){4,8}Y X1234567Y (0,9)(7,8) 64 | 115 X(.?){5,8}Y X1234567Y (0,9)(7,8) 65 | 116 X(.?){6,8}Y X1234567Y (0,9)(7,8) 66 | 117 X(.?){7,8}Y X1234567Y (0,9)(7,8) 67 | 118 X(.?){8,8}Y X1234567Y (0,9)(8,8) 68 | 260 (a|ab|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6) 69 | 261 (a|ab|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6) 70 | 262 (a|ab|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6) 71 | 263 (a|ab|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6) 72 | 264 (a|ab|c|bcd){4,}(d*) ababcd NOMATCH 73 | 265 (a|ab|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6) 74 | 266 (a|ab|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6) 75 | 267 (a|ab|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6) 76 | 268 (a|ab|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6) 77 | 269 (a|ab|c|bcd){4,10}(d*) ababcd NOMATCH 78 | 270 (a|ab|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6) 79 | 271 (a|ab|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6) 80 | -------------------------------------------------------------------------------- /test/cases/right-assoc.txt: -------------------------------------------------------------------------------- 1 | 1 (a|ab)(c|bcd)(d*) abcd (0,4)(0,2)(2,3)(3,4) 2 | 2 (a|ab)(bcd|c)(d*) abcd (0,4)(0,2)(2,3)(3,4) 3 | 3 (ab|a)(c|bcd)(d*) abcd (0,4)(0,2)(2,3)(3,4) 4 | 4 (ab|a)(bcd|c)(d*) abcd (0,4)(0,2)(2,3)(3,4) 5 | 5 (a*)(b|abc)(c*) abc (0,3)(0,1)(1,2)(2,3) 6 | 6 (a*)(abc|b)(c*) abc (0,3)(0,1)(1,2)(2,3) 7 | 7 (a*)(b|abc)(c*) abc (0,3)(0,1)(1,2)(2,3) 8 | 8 (a*)(abc|b)(c*) abc (0,3)(0,1)(1,2)(2,3) 9 | 9 (a|ab)(c|bcd)(d|.*) abcd (0,4)(0,2)(2,3)(3,4) 10 | 10 (a|ab)(bcd|c)(d|.*) abcd (0,4)(0,2)(2,3)(3,4) 11 | 11 (ab|a)(c|bcd)(d|.*) abcd (0,4)(0,2)(2,3)(3,4) 12 | 12 (ab|a)(bcd|c)(d|.*) abcd (0,4)(0,2)(2,3)(3,4) 13 | -------------------------------------------------------------------------------- /test/cases/totest.txt: -------------------------------------------------------------------------------- 1 | 01 a+ xaax (1,3) 2 | 03 (a?)((ab)?) ab (0,2)(0,0)(0,2)(0,2) 3 | 04 (a?)((ab)?)(b?) ab (0,2)(0,1)(1,1)(?,?)(1,2) 4 | 05 ((a?)((ab)?))(b?) ab (0,2)(0,2)(0,0)(0,2)(0,2)(2,2) 5 | 06 (a?)(((ab)?)(b?)) ab (0,2)(0,1)(1,2)(1,1)(?,?)(1,2) 6 | 07 (.?) x (0,1)(0,1) 7 | 08 (.?){1} x (0,1)(0,1) 8 | 09 (.?)(.?) x (0,1)(0,1)(1,1) 9 | 10 (.?){2} x (0,1)(1,1) 10 | 11 (.?)* x (0,1)(0,1) 11 | 12 (.?.?) xxx (0,2)(0,2) 12 | 13 (.?.?){1} xxx (0,2)(0,2) 13 | 14 (.?.?)(.?.?) xxx (0,3)(0,2)(2,3) 14 | 15 (.?.?){2} xxx (0,3)(2,3) 15 | 16 (.?.?)(.?.?)(.?.?) xxx (0,3)(0,2)(2,3)(3,3) 16 | 17 (.?.?){3} xxx (0,3)(3,3) 17 | 18 (.?.?)* xxx (0,3)(2,3) 18 | 19 a?((ab)?)(b?) ab (0,2)(1,1)(?,?)(1,2) 19 | 20 (a?)((ab)?)b? ab (0,2)(0,1)(1,1)(?,?) 20 | 21 a?((ab)?)b? ab (0,2)(1,1)(?,?) 21 | 22 (a*){2} xxxxx (0,0)(0,0) 22 | 23 (ab?)(b?a) aba (0,3)(0,2)(2,3) 23 | 24 (a|ab)(ba|a) aba (0,3)(0,2)(2,3) 24 | 25 (a|ab|ba) aba (0,2)(0,2) 25 | 26 (a|ab|ba)(a|ab|ba) aba (0,3)(0,2)(2,3) 26 | 27 (a|ab|ba)* aba (0,3)(2,3) 27 | 28 (aba|a*b) ababa (0,3)(0,3) 28 | 29 (aba|a*b)(aba|a*b) ababa (0,5)(0,2)(2,5) 29 | 1029 (aba|a*b)(aba|a*b)(aba|a*b) ababa NOMATCH 30 | 30 (aba|a*b)* ababa (0,5)(2,5) 31 | 31 (aba|ab|a) ababa (0,3)(0,3) 32 | 32 (aba|ab|a)(aba|ab|a) ababa (0,5)(0,2)(2,5) 33 | 1032 (aba|ab|a)(aba|ab|a)(aba|ab|a) ababa (0,5)(0,2)(2,4)(4,5) 34 | 33 (aba|ab|a)* ababa (0,5)(2,5) 35 | 34 (a(b)?) aba (0,2)(0,2)(1,2) 36 | 35 (a(b)?)(a(b)?) aba (0,3)(0,2)(1,2)(2,3)(?,?) 37 | 36 (a(b)?)+ aba (0,3)(2,3)(?,?) 38 | 37 (.*)(.*) xx (0,2)(0,2)(2,2) 39 | 38 .*(.*) xx (0,2)(2,2) 40 | 39 (a.*z|b.*y) azbazby (0,5)(0,5) 41 | 40 (a.*z|b.*y)(a.*z|b.*y) azbazby (0,7)(0,5)(5,7) 42 | 41 (a.*z|b.*y)* azbazby (0,7)(5,7) 43 | 42 (.|..)(.*) ab (0,2)(0,2)(2,2) 44 | 43 ((..)*(...)*) xxx (0,3)(0,3)(?,?)(0,3) 45 | 44 ((..)*(...)*)((..)*(...)*) xxx (0,3)(0,3)(?,?)(0,3)(3,3)(?,?)(?,?) 46 | 45 ((..)*(...)*)* xxx (0,3)(0,3)(?,?)(0,3) 47 | 83 (aa(b(b))?)+ aabbaa (0,6)(4,6)(?,?)(?,?) 48 | 84 (a(b)?)+ aba (0,3)(2,3)(?,?) 49 | 85 ([ab]+)([bc]+)([cd]*) abcd (0,4)(0,2)(2,3)(3,4) 50 | 90 ^(A([^B]*))?(B(.*))? Aa (0,2)(0,2)(1,2)(?,?)(?,?) 51 | 91 ^(A([^B]*))?(B(.*))? Bb (0,2)(?,?)(?,?)(0,2)(1,2) 52 | 110 (^){0,3} a (0,0)(0,0) 53 | 111 ($){0,3} a (0,0)(?,?) 54 | 112 (^){1,3} a (0,0)(0,0) 55 | 113 ($){1,3} a (1,1)(1,1) 56 | 200 ((s^)|(s)|(^)|($)|(^.))* searchme (0,1)(0,1)(?,?)(0,1)(?,?)(?,?)(?,?) 57 | 201 s(()|^)e searchme (0,2)(1,1)(1,1) 58 | 202 s(^|())e searchme (0,2)(1,1)(1,1) 59 | 203 s(^|())e searchme (0,2)(1,1)(1,1) 60 | 204 s()?e searchme (0,2)(1,1) 61 | 205 s(^)?e searchme (0,2)(?,?) 62 | 206 ((s)|(e)|(a))* searchme (0,3)(2,3)(?,?)(?,?)(2,3) 63 | 207 ((s)|(e)|())* searchme (0,2)(1,2)(?,?)(1,2)(?,?) 64 | 208 ((b*)|c(c*))* cbb (0,3)(1,3)(1,3)(?,?) 65 | 209 (yyy|(x?)){2,4} yyyyyy (0,6)(3,6)(?,?) 66 | 210 ($)|() xxx (0,0)(?,?)(0,0) 67 | 211 $()|^() ac\n (0,0)(?,?)(0,0) 68 | 212 ^()|$() ac\n (0,0)(0,0)(?,?) 69 | 213 ($)?(.) __ (0,1)(?,?)(0,1) 70 | 214 (.|()|())* c (0,1)(0,1)(?,?)(?,?) 71 | 215 ((a)|(b)){2,} ab (0,2)(1,2)(?,?)(1,2) 72 | 216 .()|((.)?) NULL (0,0)(?,?)(0,0)(?,?) 73 | 217 (.|$){2,} xx (0,2)(1,2) 74 | 218 (.|$){2,2} xx (0,2)(1,2) 75 | 219 (.){2,} xx (0,2)(1,2) 76 | 220 (a|())(b|())(c|()) abc (0,3)(0,1)(?,?)(1,2)(?,?)(2,3)(?,?) 77 | 220 ab()c|ab()c() abc (0,3)(2,2)(-1,-1)(-1,-1) 78 | 250 (b(c)|d(e))* bcde (0,4)(2,4)(-1,-1)(3,4) 79 | 251 (a(b)*)* aba (0,3)(2,3)(-1,-1) 80 | 260 []] ] (0,1) 81 | 261 [^]] ] NOMATCH 82 | 262 [-] - (0,1) 83 | 263 [^-] - NOMATCH 84 | 260 []] a NOMATCH 85 | 261 [^]] a (0,1) 86 | 262 [-] a NOMATCH 87 | 263 [^-] a (0,1) 88 | --------------------------------------------------------------------------------