├── .github
    └── workflows
    │   └── haskell-ci.yml
├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── cabal.haskell-ci
├── cabal.project
├── lib
    ├── Data
    │   ├── IntMap
    │   │   ├── CharMap2.hs
    │   │   └── EnumMap2.hs
    │   └── IntSet
    │   │   └── EnumSet2.hs
    └── Text
    │   └── Regex
    │       ├── TDFA.hs
    │       └── TDFA
    │           ├── ByteString.hs
    │           ├── ByteString
    │               └── Lazy.hs
    │           ├── Common.hs
    │           ├── CorePattern.hs
    │           ├── IntArrTrieSet.hs
    │           ├── NewDFA
    │               ├── Engine.hs
    │               ├── Engine_FA.hs
    │               ├── Engine_NC.hs
    │               ├── Engine_NC_FA.hs
    │               ├── MakeTest.hs
    │               ├── Tester.hs
    │               └── Uncons.hs
    │           ├── Pattern.hs
    │           ├── ReadRegex.hs
    │           ├── Sequence.hs
    │           ├── String.hs
    │           ├── TDFA.hs
    │           ├── TNFA.hs
    │           ├── Text.hs
    │           ├── Text
    │               └── Lazy.hs
    │           └── Wrap.hs
├── regex-tdfa.cabal
└── test
    ├── DocTestMain.hs
    ├── LICENSE
    ├── Main.hs
    └── cases
        ├── basic3.txt
        ├── class.txt
        ├── forced-assoc.txt
        ├── left-assoc.txt
        ├── nullsub3.txt
        ├── osx-bsd-critical.txt
        ├── repetition2.txt
        ├── right-assoc.txt
        └── totest.txt


/.github/workflows/haskell-ci.yml:
--------------------------------------------------------------------------------
  1 | # This GitHub workflow config has been generated by a script via
  2 | #
  3 | #   haskell-ci 'github' 'regex-tdfa.cabal'
  4 | #
  5 | # To regenerate the script (for example after adjusting tested-with) run
  6 | #
  7 | #   haskell-ci regenerate
  8 | #
  9 | # For more information, see https://github.com/haskell-CI/haskell-ci
 10 | #
 11 | # version: 0.19.20250506
 12 | #
 13 | # REGENDATA ("0.19.20250506",["github","regex-tdfa.cabal"])
 14 | #
 15 | name: Haskell-CI
 16 | on:
 17 |   push:
 18 |     branches:
 19 |       - master
 20 |   pull_request:
 21 |     branches:
 22 |       - master
 23 | jobs:
 24 |   linux:
 25 |     name: Haskell-CI - Linux - ${{ matrix.compiler }}
 26 |     runs-on: ubuntu-24.04
 27 |     timeout-minutes:
 28 |       60
 29 |     container:
 30 |       image: buildpack-deps:jammy
 31 |     continue-on-error: ${{ matrix.allow-failure }}
 32 |     strategy:
 33 |       matrix:
 34 |         include:
 35 |           - compiler: ghc-9.12.2
 36 |             compilerKind: ghc
 37 |             compilerVersion: 9.12.2
 38 |             setup-method: ghcup
 39 |             allow-failure: false
 40 |           - compiler: ghc-9.10.2
 41 |             compilerKind: ghc
 42 |             compilerVersion: 9.10.2
 43 |             setup-method: ghcup
 44 |             allow-failure: false
 45 |           - compiler: ghc-9.8.4
 46 |             compilerKind: ghc
 47 |             compilerVersion: 9.8.4
 48 |             setup-method: ghcup
 49 |             allow-failure: false
 50 |           - compiler: ghc-9.6.7
 51 |             compilerKind: ghc
 52 |             compilerVersion: 9.6.7
 53 |             setup-method: ghcup
 54 |             allow-failure: false
 55 |           - compiler: ghc-9.4.8
 56 |             compilerKind: ghc
 57 |             compilerVersion: 9.4.8
 58 |             setup-method: ghcup
 59 |             allow-failure: false
 60 |           - compiler: ghc-9.2.8
 61 |             compilerKind: ghc
 62 |             compilerVersion: 9.2.8
 63 |             setup-method: ghcup
 64 |             allow-failure: false
 65 |           - compiler: ghc-9.0.2
 66 |             compilerKind: ghc
 67 |             compilerVersion: 9.0.2
 68 |             setup-method: ghcup
 69 |             allow-failure: false
 70 |           - compiler: ghc-8.10.7
 71 |             compilerKind: ghc
 72 |             compilerVersion: 8.10.7
 73 |             setup-method: ghcup
 74 |             allow-failure: false
 75 |           - compiler: ghc-8.8.4
 76 |             compilerKind: ghc
 77 |             compilerVersion: 8.8.4
 78 |             setup-method: ghcup
 79 |             allow-failure: false
 80 |           - compiler: ghc-8.6.5
 81 |             compilerKind: ghc
 82 |             compilerVersion: 8.6.5
 83 |             setup-method: ghcup
 84 |             allow-failure: false
 85 |           - compiler: ghc-8.4.4
 86 |             compilerKind: ghc
 87 |             compilerVersion: 8.4.4
 88 |             setup-method: ghcup
 89 |             allow-failure: false
 90 |           - compiler: ghc-8.2.2
 91 |             compilerKind: ghc
 92 |             compilerVersion: 8.2.2
 93 |             setup-method: ghcup
 94 |             allow-failure: false
 95 |           - compiler: ghc-8.0.2
 96 |             compilerKind: ghc
 97 |             compilerVersion: 8.0.2
 98 |             setup-method: ghcup
 99 |             allow-failure: false
100 |       fail-fast: false
101 |     steps:
102 |       - name: apt-get install
103 |         run: |
104 |           apt-get update
105 |           apt-get install -y --no-install-recommends gnupg ca-certificates dirmngr curl git software-properties-common libtinfo5 libnuma-dev
106 |       - name: Install GHCup
107 |         run: |
108 |           mkdir -p "$HOME/.ghcup/bin"
109 |           curl -sL https://downloads.haskell.org/ghcup/0.1.50.1/x86_64-linux-ghcup-0.1.50.1 > "$HOME/.ghcup/bin/ghcup"
110 |           chmod a+x "$HOME/.ghcup/bin/ghcup"
111 |       - name: Install cabal-install
112 |         run: |
113 |           "$HOME/.ghcup/bin/ghcup" install cabal 3.14.2.0 || (cat "$HOME"/.ghcup/logs/*.* && false)
114 |           echo "CABAL=$HOME/.ghcup/bin/cabal-3.14.2.0 -vnormal+nowrap" >> "$GITHUB_ENV"
115 |       - name: Install GHC (GHCup)
116 |         if: matrix.setup-method == 'ghcup'
117 |         run: |
118 |           "$HOME/.ghcup/bin/ghcup" install ghc "$HCVER" || (cat "$HOME"/.ghcup/logs/*.* && false)
119 |           HC=$("$HOME/.ghcup/bin/ghcup" whereis ghc "$HCVER")
120 |           HCPKG=$(echo "$HC" | sed 's#ghc$#ghc-pkg#')
121 |           HADDOCK=$(echo "$HC" | sed 's#ghc$#haddock#')
122 |           echo "HC=$HC" >> "$GITHUB_ENV"
123 |           echo "HCPKG=$HCPKG" >> "$GITHUB_ENV"
124 |           echo "HADDOCK=$HADDOCK" >> "$GITHUB_ENV"
125 |         env:
126 |           HCKIND: ${{ matrix.compilerKind }}
127 |           HCNAME: ${{ matrix.compiler }}
128 |           HCVER: ${{ matrix.compilerVersion }}
129 |       - name: Set PATH and environment variables
130 |         run: |
131 |           echo "$HOME/.cabal/bin" >> $GITHUB_PATH
132 |           echo "LANG=C.UTF-8" >> "$GITHUB_ENV"
133 |           echo "CABAL_DIR=$HOME/.cabal" >> "$GITHUB_ENV"
134 |           echo "CABAL_CONFIG=$HOME/.cabal/config" >> "$GITHUB_ENV"
135 |           HCNUMVER=$(${HC} --numeric-version|perl -ne '/^(\d+)\.(\d+)\.(\d+)(\.(\d+))?$/; print(10000 * $1 + 100 * $2 + ($3 == 0 ? $5 != 1 : $3))')
136 |           echo "HCNUMVER=$HCNUMVER" >> "$GITHUB_ENV"
137 |           echo "ARG_TESTS=--enable-tests" >> "$GITHUB_ENV"
138 |           echo "ARG_BENCH=--enable-benchmarks" >> "$GITHUB_ENV"
139 |           echo "HEADHACKAGE=false" >> "$GITHUB_ENV"
140 |           echo "ARG_COMPILER=--$HCKIND --with-compiler=$HC" >> "$GITHUB_ENV"
141 |         env:
142 |           HCKIND: ${{ matrix.compilerKind }}
143 |           HCNAME: ${{ matrix.compiler }}
144 |           HCVER: ${{ matrix.compilerVersion }}
145 |       - name: env
146 |         run: |
147 |           env
148 |       - name: write cabal config
149 |         run: |
150 |           mkdir -p $CABAL_DIR
151 |           cat >> $CABAL_CONFIG <<EOF
152 |           remote-build-reporting: anonymous
153 |           write-ghc-environment-files: never
154 |           remote-repo-cache: $CABAL_DIR/packages
155 |           logs-dir:          $CABAL_DIR/logs
156 |           world-file:        $CABAL_DIR/world
157 |           extra-prog-path:   $CABAL_DIR/bin
158 |           symlink-bindir:    $CABAL_DIR/bin
159 |           installdir:        $CABAL_DIR/bin
160 |           build-summary:     $CABAL_DIR/logs/build.log
161 |           store-dir:         $CABAL_DIR/store
162 |           install-dirs user
163 |             prefix: $CABAL_DIR
164 |           repository hackage.haskell.org
165 |             url: http://hackage.haskell.org/
166 |           EOF
167 |           cat >> $CABAL_CONFIG <<EOF
168 |           program-default-options
169 |             ghc-options: $GHCJOBS +RTS -M3G -RTS
170 |           EOF
171 |           cat $CABAL_CONFIG
172 |       - name: versions
173 |         run: |
174 |           $HC --version || true
175 |           $HC --print-project-git-commit-id || true
176 |           $CABAL --version || true
177 |       - name: update cabal index
178 |         run: |
179 |           $CABAL v2-update -v
180 |       - name: install cabal-plan
181 |         run: |
182 |           mkdir -p $HOME/.cabal/bin
183 |           curl -sL https://github.com/haskell-hvr/cabal-plan/releases/download/v0.7.3.0/cabal-plan-0.7.3.0-x86_64-linux.xz > cabal-plan.xz
184 |           echo 'f62ccb2971567a5f638f2005ad3173dba14693a45154c1508645c52289714cb2  cabal-plan.xz' | sha256sum -c -
185 |           xz -d < cabal-plan.xz > $HOME/.cabal/bin/cabal-plan
186 |           rm -f cabal-plan.xz
187 |           chmod a+x $HOME/.cabal/bin/cabal-plan
188 |           cabal-plan --version
189 |       - name: checkout
190 |         uses: actions/checkout@v4
191 |         with:
192 |           path: source
193 |       - name: initial cabal.project for sdist
194 |         run: |
195 |           touch cabal.project
196 |           echo "packages: $GITHUB_WORKSPACE/source/." >> cabal.project
197 |           cat cabal.project
198 |       - name: sdist
199 |         run: |
200 |           mkdir -p sdist
201 |           $CABAL sdist all --output-dir $GITHUB_WORKSPACE/sdist
202 |       - name: unpack
203 |         run: |
204 |           mkdir -p unpacked
205 |           find sdist -maxdepth 1 -type f -name '*.tar.gz' -exec tar -C $GITHUB_WORKSPACE/unpacked -xzvf {} \;
206 |       - name: generate cabal.project
207 |         run: |
208 |           PKGDIR_regex_tdfa="$(find "$GITHUB_WORKSPACE/unpacked" -maxdepth 1 -type d -regex '.*/regex-tdfa-[0-9.]*')"
209 |           echo "PKGDIR_regex_tdfa=${PKGDIR_regex_tdfa}" >> "$GITHUB_ENV"
210 |           rm -f cabal.project cabal.project.local
211 |           touch cabal.project
212 |           touch cabal.project.local
213 |           echo "packages: ${PKGDIR_regex_tdfa}" >> cabal.project
214 |           if [ $((HCNUMVER >= 80200)) -ne 0 ] ; then echo "package regex-tdfa" >> cabal.project ; fi
215 |           if [ $((HCNUMVER >= 80200)) -ne 0 ] ; then echo "    ghc-options: -Werror=missing-methods" >> cabal.project ; fi
216 |           cat >> cabal.project <<EOF
217 |           EOF
218 |           $HCPKG list --simple-output --names-only | perl -ne 'for (split /\s+/) { print "constraints: any.$_ installed\n" unless /^(regex-tdfa)$/; }' >> cabal.project.local
219 |           cat cabal.project
220 |           cat cabal.project.local
221 |       - name: dump install plan
222 |         run: |
223 |           $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dry-run all
224 |           cabal-plan
225 |       - name: restore cache
226 |         uses: actions/cache/restore@v4
227 |         with:
228 |           key: ${{ runner.os }}-${{ matrix.compiler }}-${{ github.sha }}
229 |           path: ~/.cabal/store
230 |           restore-keys: ${{ runner.os }}-${{ matrix.compiler }}-
231 |       - name: install dependencies
232 |         run: |
233 |           $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --dependencies-only -j2 all
234 |           $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dependencies-only -j2 all
235 |       - name: build w/o tests
236 |         run: |
237 |           $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks all
238 |       - name: build
239 |         run: |
240 |           $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --write-ghc-environment-files=always
241 |       - name: tests
242 |         run: |
243 |           $CABAL v2-test $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --test-show-details=direct
244 |       - name: cabal check
245 |         run: |
246 |           cd ${PKGDIR_regex_tdfa} || false
247 |           ${CABAL} -vnormal check
248 |       - name: haddock
249 |         run: |
250 |           $CABAL v2-haddock --disable-documentation --haddock-all $ARG_COMPILER --with-haddock $HADDOCK $ARG_TESTS $ARG_BENCH all
251 |       - name: unconstrained build
252 |         run: |
253 |           rm -f cabal.project.local
254 |           $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks all
255 |       - name: save cache
256 |         if: always()
257 |         uses: actions/cache/save@v4
258 |         with:
259 |           key: ${{ runner.os }}-${{ matrix.compiler }}-${{ github.sha }}
260 |           path: ~/.cabal/store
261 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /dist/
 2 | /.cabal-sandbox/
 3 | /dist-newstyle/
 4 | /cabal.sandbox.config
 5 | /cabal.config
 6 | /cabal.project.local
 7 | /cabal.project.freeze
 8 | /.ghc.environment.*
 9 | 
10 | TAGS
11 | .DS_Store
12 | *~
13 | *#
14 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | ### 1.3.2.4
  2 | 
  3 | _2025-05-09 Andreas Abel_
  4 | 
  5 | - Fix definition of `[[:graph:]]` character class ([#66](https://github.com/haskell-hvr/regex-tdfa/issues/66))
  6 | - Tested with GHC 8.0 - 9.12.2
  7 | 
  8 | ### 1.3.2.3
  9 | 
 10 | _2025-03-02 Andreas Abel_
 11 | 
 12 | - Drop support for GHC 7
 13 | - Allow `containers < 1`
 14 | - Tested with GHC 8.0 - 9.12.1
 15 | 
 16 | ### 1.3.2.2
 17 | 
 18 | _2023-08-02, Andreas Abel_
 19 | 
 20 | - Fix return type in `memcpy` FFI signature ([#52](https://github.com/haskell-hvr/regex-tdfa/pull/52))
 21 | - Refactor `regexec` to avoid partial functions `tail` and `(!0)`
 22 | - Tested with GHC 7.4 - 9.8.1-alpha1
 23 | 
 24 | ### 1.3.2.1
 25 | 
 26 | _2023-05-19, Andreas Abel_
 27 | 
 28 | - Fix haddock rendering of code examples in top-level documentation
 29 |   ([#50](https://github.com/haskell-hvr/regex-tdfa/issues/50))
 30 | - Tested with GHC 7.4 - 9.6
 31 | 
 32 | ## 1.3.2
 33 | 
 34 | _2022-07-18, Andreas Abel_
 35 | 
 36 | - Export `decodePatternSet` and `decodeCharacterClass` from `Text.Regex.TDFA.Pattern`
 37 |   ([#16](https://github.com/haskell-hvr/regex-tdfa/issues/16))
 38 | - Extend and correct docs for `Pattern` module
 39 | - Tested with GHC 7.4 - 9.4
 40 | 
 41 | ### 1.3.1.5
 42 | 
 43 | _2022-07-18, Andreas Abel_
 44 | 
 45 | - Allow dash (`-`) as start of a range, e.g. `[--z]`
 46 |   ([#1](https://github.com/haskell-hvr/regex-tdfa/issues/1),
 47 |   [#45](https://github.com/haskell-hvr/regex-tdfa/pull/45))
 48 | - Tested with GHC 7.4 - 9.4
 49 | 
 50 | ### 1.3.1.4
 51 | 
 52 | _2022-07-17, Andreas Abel_
 53 | 
 54 | - Fix parsing of dashes in bracket expressions, e.g. `[-a-z]` ([#1](https://github.com/haskell-hvr/regex-tdfa/issues/1))
 55 | - Fix a deprecation warning except for on GHC 8.2 ([#21](https://github.com/haskell-hvr/regex-tdfa/issues/21))
 56 | - Documentation: link `defaultComptOpt` to its definition  ([#13](https://github.com/haskell-hvr/regex-tdfa/issues/13))
 57 | - Verify documentation examples with new `doc-test` testsuite
 58 | - Tested with GHC 7.4 - 9.4
 59 | 
 60 | ### 1.3.1.3
 61 | 
 62 | _2022-07-14, Andreas Abel_
 63 | 
 64 | - Fix an `undefined` in `Show PatternSet` ([#37](https://github.com/haskell-hvr/regex-tdfa/issues/37))
 65 | - Document POSIX character classes (e.g. `[[:digit:]]`) in README
 66 | - Tested with GHC 7.4 - 9.4
 67 | 
 68 | ### 1.3.1.2 Revision 1
 69 | 
 70 | _2022-05-25, Andreas Abel_
 71 | 
 72 | - Allow `base >= 4.17` (GHC 9.4)
 73 | 
 74 | ### 1.3.1.2
 75 | 
 76 | _2022-02-19, Andreas Abel_
 77 | - No longer rely on the `MonadFail` instance for `ST`
 78 |   (future `base` library change, see [#29](https://github.com/haskell-hvr/regex-tdfa/pull/29)).
 79 | - Silence warning `incomplete-uni-patterns` (GHC >= 9.2).
 80 | - Import from `Data.List` explicitly or qualified (warning `compat-unqualified-imports`).
 81 | - Import from `Control.Monad` to allow `mtl-2.3` in its `rc3` incarnation.
 82 | 
 83 | ### 1.3.1.1 Revision 3
 84 | 
 85 | _2022-01-31, Andreas Abel_
 86 | - Speculatively allow unreleased `mtl-2.3` (works with release candidate `mtl-2.3-rc4`).
 87 | 
 88 | ### 1.3.1.1 Revision 2
 89 | 
 90 | _2021-12-26, Andreas Abel_
 91 | - Allow `text-2.0`.
 92 | 
 93 | ### 1.3.1.1 Revision 1
 94 | 
 95 | _2021-08-12, Andreas Abel_
 96 | - Compatibility with `base-4.16` (GHC 9.2).
 97 | 
 98 | ### 1.3.1.1
 99 | 
100 | _2021-06-03, Andreas Abel_
101 | - Removed extension `NoMonoPatBinds` from `.cabal`-file for GHC 9.2 compatibility.
102 | - Removed some outdated documentation.
103 | 
104 | ### 1.3.1.0 Revision 2
105 | 
106 | _2021-02-20, Andreas Abel_
107 | - Compatibility with `base-4.15` (GHC 9.0) and `bytestring-0.11`.
108 | 
109 | ### 1.3.1.0 Revision 1
110 | 
111 | _2020-03-26, phadej_
112 | - Compatibility with `base-4.14` (GHC 8.10).
113 | 
114 | ## 1.3.1.0
115 | 
116 | _2019-11-26, hvr_
117 | - Merge <http://hackage.haskell.org/package/regex-tdfa-text> into `regex-tdfa`; see <https://github.com/haskell-hvr/regex-tdfa/issues/4>.
118 | - Don't inject `ghc-options: -O2` by default anymore (see #7 for rationale) and introduce `force-O2` cabal flag to control the injection of `ghc-options: -O2`.
119 |   Note that you can conveniently control optimization levels on a per-package granularity via `cabal.project` files; see [cabal's user-guide](https://cabal.readthedocs.io/en/latest/nix-local-build.html#configuring-builds-with-cabal-project) for more details.
120 | 
121 | ## 1.3.0 Revision 1
122 | 
123 | _2019-11-26, hvr_
124 | - Tighten bounds on `base`, `mtl`, `parsec` and fail.
125 | 
126 | # 1.3.0
127 | 
128 | _2019-09-29, Artyom_
129 | - Same as 1.2.3.3 release, but in accordance to PVP;
130 |   see <https://github.com/ChrisKuklewicz/regex-tdfa/issues/29>.
131 | - Compatibility with GHC 8.8 and regex-base-0.9.4 (h/t @asr).
132 | - Turned `regex-tdfa-unittest` into a `regex-tdfa` testsuite.
133 | 
134 | ### 1.2.3.3 (deprecated, not following PVP)
135 | 
136 | * Compatibility with GHC 8.8 and regex-base-0.9.4 (h/t @asr).
137 | * Turned `regex-tdfa-unittest` into a `regex-tdfa` testsuite.
138 | 
139 | ### 1.2.3.2
140 | 
141 | _2019-05-09, Artyom_
142 | * Significantly improved documentation (h/t William Yao).
143 | 
144 | ### 1.2.3.1
145 | 
146 | _2018-06-22, Artyom_
147 | * Compatibility with `containers-0.6`.
148 | 
149 | ## 1.2.3
150 | 
151 | _2018-03-10, Artyom_
152 | * Added `Semigroup` instances for some types (h/t Herbert Valerio Riedel).
153 | 
154 | ## 1.2.2
155 | 
156 | _2016-04-28, Artyom_
157 | * New maintainer.
158 | * Now we don't reexport the problematic `Show` instance for functions.
159 | 
160 | ## 1.2.1
161 | 
162 | _2015-08-29, Chris Kuklewicz_
163 | * Updated dependency versions.
164 | 
165 | # 1.2.0
166 | 
167 | _2014-02-02, Chris Kuklewicz_
168 | * "Almost ghc-7.8" with the array 0.4 changes for `Data.Array.Unsafe`
169 | 
170 | 
171 | ## 1.1.8
172 | 
173 | Make ghc-7.0.2 on platform 2011.2.0.0.0 happy
174 | 
175 | ## 1.1.7
176 | 
177 | fix url below
178 | 
179 | ## 1.1.6
180 | 
181 | Fix bug preventing `[]] [-] [^]] [^-]` (thanks to Maxime Henrion)
182 | 
183 | ## 1.1.5
184 | 
185 | try `needUniqTags` in `POr` in CorePattern.hs, try `toAdvice b` for `PStar child`
186 | 
187 | ## 1.1.4
188 | 
189 | fixed
190 | 
191 | ## 1.1.3
192 | 
193 | BROKEN after 100 characters the `compressOrbit` dies!
194 | 
195 | ## 1.1.2
196 | 
197 | worked
198 | 
199 | ## 1.1.1
200 | 
201 | add gnu escapes
202 | 
203 | # 1.1.0
204 | 
205 | NewDFA code working
206 | 
207 | ## 1.0.7
208 | 
209 | make NewDFA directory and String_NC
210 | 
211 | ## 1.0.6
212 | 
213 | try NewDFATest_SBS with `uncons`
214 | 
215 | ## 1.0.5
216 | 
217 | use `uncons` on SBS
218 | 
219 | ## 1.0.4
220 | 
221 | try repaired NewDFATest_SBS
222 | 
223 | * np13: try to improve readability with the `mm` combinator? Yes!
224 | * np12: expand `o` in the case where `t` lookup get `Nothing`? Yes – this is the fix!?
225 | * np11: break multi to not look at `o` and just return `True`? Yes !!!!
226 | * np10: Peel off `CharMap`/`IntMap` and DFA/DT with pattern matching? No
227 | * np9:  `INLINE` `endOf`? No
228 | * np8:  np6 and `NOINLINE` `endOff`? No
229 | * np7:  just return `True`? Fast
230 | * np6:  comment out ans check? No
231 | * np5:  comment out all `Multi0` code? No
232 | * np4:  comment out all `Single0` and `Single` code? No
233 | * np3:  `!off` the multi? No
234 | * np2:  comment out all Testing code? No
235 | 
236 | ## 1.0.3
237 | 
238 | try to alter `matchTest` to not have the `Bool` args? No
239 | 
240 | ## 1.0.2
241 | 
242 | arg, the prof is fast and the normal slow!
243 | 
244 | # 1.0.1
245 | 
246 | add NewDFATest.hs
247 | 
248 | ## 0.99.20
249 | 
250 | go to many vs single?
251 | 
252 | ## 0.99.19
253 | 
254 | try for pre-comparison of orbit-logs!
255 | 
256 | ## 0.99.18
257 | 
258 | try alternate lazy/strict strategy in NewDFA. Fix offset laziness.
259 | 
260 | ## 0.99.17
261 | 
262 | radical removal of flag array and adding of `SetVal` to handle groups
263 | 
264 | ## 0.99.16
265 | 
266 | performance? up to v15
267 | 
268 | ## 0.99.15
269 | 
270 | get string with NewDFA testing, unit tests and 1000 random regex pass
271 | 
272 | ## 0.99.14
273 | 
274 | start changing to the new real DFA
275 | 
276 | ## 0.99.13
277 | 
278 | more cleanup
279 | 
280 | ## 0.99.12
281 | 
282 | try to debug 0.99.11: fixed `updateWinner`
283 | 
284 | ## 0.99.11
285 | 
286 | improve above fix and make stuff work better – HAS BUG, along with old TDFA!
287 | 
288 | ## 0.99.10
289 | 
290 | fixed `((.?)*)*` patterns by changing `PStar nullView` when `mayFirstBeNull`
291 | 
292 | ## 0.99.9
293 | 
294 | testing changing `bestTrans`/`chooseWith`/`choose` to include `enterOrbit`/`newFlags`/`(_,True)` info
295 | 
296 | ## 0.99.8
297 | 
298 | testing changing `Maximize` to `Minimize` for `Tag`s, decide `(a*)*` is canonical problem
299 | 
300 | ## 0.99.7
301 | 
302 | Use `(PGroup Nothing)` in `Pattern` to decompose `PBound`
303 | 
304 | ## 0.99.6
305 | 
306 | change to nested `nonEmpty` calls for `PBound`
307 | 
308 | ## 0.99.5
309 | 
310 | remove `PNonEmpty` constructor
311 | 
312 | ## 0.99.4
313 | 
314 | tests `pnonempty' = \ p -> POr [ PEmpty, p ]` instead of `PNonEmpty`
315 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | This module is under this "3 clause" BSD license:
 2 | 
 3 | Copyright (c) 2007-2009, Christopher Kuklewicz
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 9 |     * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
10 |     * The names of the contributors may not be used to endorse or promote products derived from this software without specific prior written permission.
11 | 
12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Hackage version](https://img.shields.io/hackage/v/regex-tdfa.svg?label=Hackage&color=informational)](http://hackage.haskell.org/package/regex-tdfa)
  2 | [![Stackage Nightly](http://stackage.org/package/regex-tdfa/badge/nightly)](http://stackage.org/nightly/package/regex-tdfa)
  3 | [![Stackage LTS](http://stackage.org/package/regex-tdfa/badge/lts)](http://stackage.org/lts/package/regex-tdfa)
  4 | [![Haskell-CI](https://github.com/haskell-hvr/regex-tdfa/actions/workflows/haskell-ci.yml/badge.svg?branch=master&event=push)](https://github.com/haskell-hvr/regex-tdfa/actions/workflows/haskell-ci.yml)
  5 | [![License](https://img.shields.io/badge/License-BSD_3--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause)
  6 | # regex-tdfa
  7 | 
  8 | This is [`regex-tdfa`](http://hackage.haskell.org/package/regex-tdfa) which is a pure Haskell regular expression library (for POSIX extended regular expressions) originally written by Christopher Kuklewicz.
  9 | 
 10 | The name "tdfa" stands for Tagged-DFA.
 11 | 
 12 | ## Getting started
 13 | 
 14 | ### Importing and using
 15 | 
 16 | [Declare a dependency](https://www.haskell.org/cabal/users-guide/developing-packages.html#pkg-field-build-depends) on the `regex-tdfa` library in your `.cabal` file:
 17 | 
 18 | ```
 19 | build-depends: regex-tdfa ^>= 1.3.2
 20 | ```
 21 | 
 22 | In Haskell modules where you need to use regexes `import` the respective `regex-tdfa` module:
 23 | 
 24 | ```haskell
 25 | import Text.Regex.TDFA
 26 | ```
 27 | 
 28 | ### Basics
 29 | 
 30 | ```haskell
 31 | λ> emailRegex = "[a-zA-Z0-9+._-]+@[a-zA-Z-]+\\.[a-z]+"
 32 | λ> "my email is email@email.com" =~ emailRegex :: Bool
 33 | >>> True
 34 | 
 35 | -- non-monadic
 36 | <to-match-against> =~ <regex>
 37 | 
 38 | -- monadic, uses 'fail' on lack of match
 39 | <to-match-against> =~~ <regex>
 40 | ```
 41 | 
 42 | `(=~)` and `(=~~)` are polymorphic in their return type. This is so that
 43 | regex-tdfa can pick the most efficient way to give you your result based on
 44 | what you need. For instance, if all you want is to check whether the regex
 45 | matched or not, there's no need to allocate a result string. If you only want
 46 | the first match, rather than all the matches, then the matching engine can stop
 47 | after finding a single hit.
 48 | 
 49 | This does mean, though, that you may sometimes have to explicitly specify the
 50 | type you want, especially if you're trying things out at the REPL.
 51 | 
 52 | ### Common use cases
 53 | 
 54 | #### Get the first match
 55 | 
 56 | ```haskell
 57 | -- returns empty string if no match
 58 | a =~ b :: String  -- or ByteString, or Text...
 59 | 
 60 | λ> "alexis-de-tocqueville" =~ "[a-z]+" :: String
 61 | >>> "alexis"
 62 | 
 63 | λ> "alexis-de-tocqueville" =~ "[[:digit:]]+" :: String
 64 | >>> ""
 65 | ```
 66 | 
 67 | #### Check if it matched at all
 68 | 
 69 | ```haskell
 70 | a =~ b :: Bool
 71 | 
 72 | λ> "alexis-de-tocqueville" =~ "[a-z]+" :: Bool
 73 | >>> True
 74 | ```
 75 | 
 76 | #### Get first match + text before/after
 77 | 
 78 | ```haskell
 79 | -- if no match, will just return whole
 80 | -- string in the first element of the tuple
 81 | a =~ b :: (String, String, String)
 82 | 
 83 | λ> "alexis-de-tocqueville" =~ "de" :: (String, String, String)
 84 | >>> ("alexis-", "de", "-tocqueville")
 85 | 
 86 | λ> "alexis-de-tocqueville" =~ "kant" :: (String, String, String)
 87 | >>> ("alexis-de-tocqueville", "", "")
 88 | ```
 89 | 
 90 | #### Get first match + submatches
 91 | 
 92 | ```haskell
 93 | -- same as above, but also returns a list of /just/ submatches
 94 | -- submatch list is empty if regex doesn't match at all
 95 | a =~ b :: (String, String, String, [String])
 96 | 
 97 | λ> "div[attr=1234]" =~ "div\\[([a-z]+)=([^]]+)\\]"
 98 |      :: (String, String, String, [String])
 99 | >>> ("", "div[attr=1234]", "", ["attr","1234"])
100 | ```
101 | 
102 | #### Get all non-overlapping matches
103 | 
104 | ```haskell
105 | -- can also return Data.Array instead of List
106 | getAllTextMatches (a =~ b) :: [String]
107 | 
108 | λ> getAllTextMatches ("john anne yifan" =~ "[a-z]+") :: [String]
109 | >>> ["john","anne","yifan"]
110 | 
111 | λ> getAllTextMatches ("0a0b0" =~ "0[[:lower:]]0") :: [String]
112 | >>> ["0a0"]
113 | ```
114 | Note that `"0b0"` is not included in the result since it overlaps with `"0a0"`.
115 | 
116 | #### Special characters
117 | 
118 | `regex-tdfa` only supports a small set of special characters and is much less
119 | featureful than some other regex engines you might be used to, such as PCRE.
120 | 
121 | * ``\` `` &mdash; Match start of entire text (similar to `^` in other regex engines)
122 | * `\'` &mdash; Match end of entire text (similar to `$` in other regex engines)
123 | * `\<` &mdash; Match beginning of word
124 | * `\>` &mdash; Match end of word
125 | * `\b` &mdash; Match beginning or end of word
126 | * `\B` &mdash; Match neither beginning nor end of word
127 | 
128 | While shorthands like `\d` (for digit) are not recognized, one can use the respective
129 | POSIX character class inside `[...]`.  E.g., `[[:digit:][:lower:]_]` is short for
130 | `[0-9a-z_]`.  The supported character classes are listed on
131 | [Wikipedia](https://en.wikipedia.org/w/index.php?title=Regular_expression&oldid=1095256273#Character_classes)
132 | and defined in module
133 | [`TNFA`](https://github.com/haskell-hvr/regex-tdfa/blob/95d47cb982d2cf636b2cb6260a866f9907341c45/lib/Text/Regex/TDFA/TNFA.hs#L804-L816).
134 | 
135 | Please also consult a variant of this documentation which is part of the
136 | [Text.Regex.TDFA haddock](http://hackage.haskell.org/package/regex-tdfa/docs/Text-Regex-TDFA.html),
137 | and the original documentation at the [Haskell wiki](https://wiki.haskell.org/Regular_expressions#regex-tdfa).
138 | 
139 | ### Less common stuff
140 | 
141 | #### Get match indices
142 | 
143 | ```haskell
144 | -- can also return Data.Array instead of List
145 | getAllMatches (a =~ b) :: [(Int, Int)]  -- (index, length)
146 | 
147 | λ> getAllMatches ("john anne yifan" =~ "[a-z]+") :: [(Int, Int)]
148 | >>> [(0,4), (5,4), (10,5)]
149 | ``````
150 | 
151 | #### Get submatch indices
152 | 
153 | ```haskell
154 | -- match of __entire__ regex is first element, not first capture
155 | -- can also return Data.Array instead of List
156 | getAllSubmatches (a =~ b) :: [(Int, Int)]  -- (index, length)
157 | 
158 | λ> getAllSubmatches ("div[attr=1234]" =~ "div\\[([a-z]+)=([^]]+)\\]")
159 |      :: [(Int, Int)]
160 | >>> [(0,14), (4,4), (9,4)]
161 | ```
162 | 
163 | ### Replacement
164 | 
165 | `regex-tdfa` does not provide find-and-replace.
166 | 
167 | ## Avoiding backslashes
168 | 
169 | If you find yourself writing a lot of regexes, take a look at
170 | [raw-strings-qq](http://hackage.haskell.org/package/raw-strings-qq). It'll
171 | let you write regexes without needing to escape all your backslashes.
172 | 
173 | ```haskell
174 | {-# LANGUAGE QuasiQuotes #-}
175 | 
176 | import Text.RawString.QQ
177 | import Text.Regex.TDFA
178 | 
179 | λ> "2 * (3 + 1) / 4" =~ [r|\([^)]+\)|] :: String
180 | >>> "(3 + 1)"
181 | ```
182 | 
183 | ## Known bugs and infelicities
184 | 
185 | * Regexes with large character classes combined with `{m,n}` are very slow and memory-hungry ([#3][]).
186 | 
187 |   > An example of such a regex is `^[\x0020-\xD7FF]{1,255}$`.
188 | 
189 | * POSIX submatch semantics are broken in some rare cases ([#2][]).
190 | 
191 | [#2]: https://github.com/haskell-hvr/regex-tdfa/issues/2
192 | [#3]: https://github.com/haskell-hvr/regex-tdfa/issues/3
193 | 
194 | ## About this package
195 | 
196 | This was inspired by the algorithm (and Master's thesis) behind the regular expression library known as [TRE or libtre](https://github.com/laurikari/tre/).  This was created by Ville Laurikari and tackled the difficult issue of efficient sub-match capture for POSIX regular expressions.
197 | 
198 | By building on this thesis and adding a few more optimizations, regex-tdfa matching input text of length N should have O(N) runtime, and should have a maximum memory bounded by the pattern size that does not scale with N. It should do this while returning well defined (and correct) values for the parenthesized sub-matches.
199 | 
200 | Regardless of performance, nearly every single OS and Libra for POSIX regular expressions has bugs in sub-matches.  This was detailed on the [Regex POSIX Haskell wiki page](https://wiki.haskell.org/Regex_Posix), and can be demonstrated with the [regex-posix-unittest](http://hackage.haskell.org/package/regex-posix-unittest) suite of checks.  Test [regex-tdfa-unittest](http://hackage.haskell.org/package/regex-tdfa-unittest) should show regex-tdfa passing these same checks.  I owe my understanding of the correct behvior and many of these unit tests to Glenn Fowler at AT&T ("An Interpretation of the POSIX regex Standard").
201 | 
202 | ### Maintenance history
203 | 
204 | The original Darcs repository was at [code.haskell.org](http://code.haskell.org/regex-tdfa/).
205 | For a while a fork was maintained by Roman Cheplyaka as
206 | [regex-tdfa-rc](http://hackage.haskell.org/package/regex-tdfa-rc).
207 | 
208 | Then the repository moved to <https://github.com/ChrisKuklewicz/regex-tdfa>,
209 | which was primarily maintained by [Artyom (neongreen)](https://github.com/neongreen).
210 | 
211 | Finally, maintainership was passed on again and the repository moved to its current location
212 | at <https://github.com/haskell-hvr/regex-tdfa>.
213 | 
214 | ## Other related packages
215 | 
216 | Searching for "tdfa" on [hackage](http://hackage.haskell.org/packages/search?terms=tdfa)
217 | finds some related packages (unmaintained as of 2022-07-14).
218 | 
219 | ## Document notes
220 | 
221 | This README was originally written 2016-04-30.
222 | 


--------------------------------------------------------------------------------
/cabal.haskell-ci:
--------------------------------------------------------------------------------
 1 | branches: master
 2 | 
 3 | installed: +all
 4 | 
 5 | -- constraint-set bytestring-0.12
 6 | --   -- bytestring-0.12 requires base >=4.9 (GHC 8.0)
 7 | --   ghc: >= 8.0
 8 | --   constraints: bytestring ^>= 0.12
 9 | --   tests: True
10 | --   run-tests: True
11 | --   --
12 | --   -- The following is silently ignored here:
13 | --   --
14 | --   -- raw-project
15 | --   --   allow-newer: bytestring
16 | --   --
17 | --
18 | -- constraint-set containers-0.7
19 | --   -- containers-0.7 requires base >=4.9 (GHC 8.0)
20 | --   -- fails with GHCs 8.0 and 9.8.0
21 | --   ghc: >= 8.2 && < 9.7
22 | --   constraints: containers ^>= 0.7
23 | --   tests: True
24 | --   run-tests: True
25 | --
26 | -- constraint-set text-2.1
27 | --   -- text-2.1 requires base >=4.10 (GHC 8.2)
28 | --   ghc: >= 8.2
29 | --   constraints: text ^>= 2.1
30 | --   tests: True
31 | --   run-tests: True
32 | --
33 | -- -- The following is meant to be for the constraint-set bytestring-0.12 only
34 | -- -- (and for the other constraint-sets)
35 | -- -- but there is currently no way to enable `allow-newer: bytestring`
36 | -- -- just for the constraint set.
37 | -- --
38 | -- -- Since core library `bytestring` is constrained to `installed`,
39 | -- -- it is not harmful to allow newer `bytestring` in the default runs
40 | -- -- as well---it will have no effect there.
41 | -- --
42 | -- raw-project
43 | --   allow-newer: bytestring
44 | --   allow-newer: containers
45 | --   allow-newer: text
46 | 


--------------------------------------------------------------------------------
/cabal.project:
--------------------------------------------------------------------------------
1 | packages: .
2 | 
3 | write-ghc-environment-files: always
4 | 


--------------------------------------------------------------------------------
/lib/Data/IntMap/CharMap2.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP #-}
  2 | module Data.IntMap.CharMap2 where
  3 | 
  4 | #ifdef __GLASGOW_HASKELL__
  5 | import GHC.Base(unsafeChr)
  6 | #else
  7 | import Data.Char (chr)
  8 | #endif
  9 | import Data.Char as C(ord)
 10 | import Data.List as L (map)
 11 | import qualified Data.IntMap as M
 12 | #if MIN_VERSION_containers(0,5,11)
 13 | import qualified Data.IntMap.Internal.Debug as MD
 14 | #else
 15 | import qualified Data.IntMap as MD
 16 | #endif
 17 | import qualified Data.IntSet as S(IntSet)
 18 | import Data.Semigroup as Sem
 19 | 
 20 | #ifndef __GLASGOW_HASKELL__
 21 | unsafeChr = chr
 22 | #endif
 23 | 
 24 | newtype CharMap a = CharMap {unCharMap :: M.IntMap a} deriving (Eq,Ord,Read,Show)
 25 | 
 26 | instance Sem.Semigroup (CharMap a) where
 27 |   CharMap x <> CharMap y = CharMap (x `mappend` y)
 28 | 
 29 | instance Monoid (CharMap a) where
 30 |   mempty = CharMap mempty
 31 |   mappend = (<>)
 32 | 
 33 | instance Functor CharMap where
 34 |   fmap f (CharMap m) = CharMap (fmap f m)
 35 | 
 36 | type Key = Char
 37 | 
 38 | (!) :: CharMap a -> Key -> a
 39 | (!) (CharMap m) k = (M.!) m (C.ord k)
 40 | 
 41 | (\\) :: CharMap a -> CharMap b -> CharMap a
 42 | (\\) (CharMap m1) (CharMap m2) = CharMap ((M.\\) m1 m2)
 43 | 
 44 | null :: CharMap a -> Bool
 45 | null (CharMap m) = M.null m
 46 | 
 47 | size :: CharMap a -> Int
 48 | size (CharMap m) = M.size m
 49 | 
 50 | member :: Key -> CharMap a -> Bool
 51 | member k (CharMap m) = M.member (C.ord k) m
 52 | 
 53 | notMember :: Key -> CharMap a -> Bool
 54 | notMember k (CharMap m) = M.notMember (C.ord k) m
 55 | 
 56 | lookup :: Key -> CharMap a -> Maybe a
 57 | lookup k (CharMap m) = M.lookup (C.ord k) m
 58 | 
 59 | findWithDefault :: a -> Key -> CharMap a -> a
 60 | findWithDefault a k (CharMap m) = M.findWithDefault a (C.ord k) m
 61 | 
 62 | empty :: CharMap a
 63 | empty = CharMap M.empty
 64 | 
 65 | singleton :: Key -> a -> CharMap a
 66 | singleton k a = CharMap (M.singleton (C.ord k) a)
 67 | 
 68 | insert :: Key -> a -> CharMap a -> CharMap a
 69 | insert k a (CharMap m) = CharMap (M.insert (C.ord k) a m)
 70 | 
 71 | insertWith :: (a -> a -> a) -> Key -> a -> CharMap a -> CharMap a
 72 | insertWith f k a (CharMap m) = CharMap (M.insertWith f (C.ord k) a m)
 73 | 
 74 | insertWithKey :: (Key -> a -> a -> a) -> Key -> a -> CharMap a -> CharMap a
 75 | insertWithKey f k a (CharMap m) = CharMap (M.insertWithKey f' (C.ord k) a m)
 76 |   where f' b a1 a2 = f (unsafeChr b) a1 a2
 77 | 
 78 | insertLookupWithKey :: (Key -> a -> a -> a) -> Key -> a -> CharMap a -> (Maybe a, CharMap a)
 79 | insertLookupWithKey f k a (CharMap m) = (ma,CharMap m')
 80 |   where (ma,m') = M.insertLookupWithKey f' (C.ord k) a m
 81 |         f' b a1 a2 = f (unsafeChr b) a1 a2
 82 | 
 83 | delete :: Key -> CharMap a -> CharMap a
 84 | delete k (CharMap m) = CharMap (M.delete (C.ord k) m)
 85 | 
 86 | adjust :: (a -> a) -> Key -> CharMap a -> CharMap a
 87 | adjust f k (CharMap m) = CharMap (M.adjust f (C.ord k) m)
 88 | 
 89 | adjustWithKey :: (Key -> a -> a) -> Key -> CharMap a -> CharMap a
 90 | adjustWithKey f k (CharMap m) = CharMap (M.adjustWithKey f' (C.ord k) m)
 91 |   where f' b a = f (unsafeChr b) a
 92 | 
 93 | update :: (a -> Maybe a) -> Key -> CharMap a -> CharMap a
 94 | update f k (CharMap m) = CharMap (M.update f (C.ord k) m)
 95 | 
 96 | updateWithKey :: (Key -> a -> Maybe a) -> Key -> CharMap a -> CharMap a
 97 | updateWithKey f k (CharMap m) = CharMap (M.updateWithKey f' (C.ord k) m)
 98 |   where f' b a = f (unsafeChr b) a
 99 | 
100 | updateLookupWithKey :: (Key -> a -> Maybe a) -> Key -> CharMap a -> (Maybe a, CharMap a)
101 | updateLookupWithKey f k (CharMap m) = (a,CharMap m')
102 |   where (a,m') = M.updateLookupWithKey f' (C.ord k) m
103 |         f' b a1 = f (unsafeChr b) a1
104 | 
105 | union :: CharMap a -> CharMap a -> CharMap a
106 | union (CharMap m1) (CharMap m2) = CharMap (M.union m1 m2)
107 | 
108 | unionWith :: (a -> a -> a) -> CharMap a -> CharMap a -> CharMap a
109 | unionWith f (CharMap m1) (CharMap m2) = CharMap (M.unionWith f m1 m2)
110 | 
111 | unionWithKey :: (Key -> a -> a -> a) -> CharMap a -> CharMap a -> CharMap a
112 | unionWithKey f (CharMap m1) (CharMap m2) = CharMap (M.unionWithKey f' m1 m2)
113 |   where f' b a1 a2 = f (unsafeChr b) a1 a2
114 | 
115 | unions :: [CharMap a] -> CharMap a
116 | unions cs = CharMap (M.unions (L.map unCharMap cs))
117 | 
118 | unionsWith :: (a -> a -> a) -> [CharMap a] -> CharMap a
119 | unionsWith f cs = CharMap (M.unionsWith f (L.map unCharMap cs))
120 | 
121 | difference :: CharMap a -> CharMap b -> CharMap a
122 | difference (CharMap m1) (CharMap m2) = CharMap (M.difference m1 m2)
123 | 
124 | differenceWith :: (a -> b -> Maybe a) -> CharMap a -> CharMap b -> CharMap a
125 | differenceWith f (CharMap m1) (CharMap m2) = CharMap (M.differenceWith f m1 m2)
126 | 
127 | differenceWithKey :: (Key -> a -> b -> Maybe a) -> CharMap a -> CharMap b -> CharMap a
128 | differenceWithKey f (CharMap m1) (CharMap m2) = CharMap (M.differenceWithKey f' m1 m2)
129 |   where f' b a1 a2 = f (unsafeChr b) a1 a2
130 | 
131 | intersection :: CharMap a -> CharMap b -> CharMap a
132 | intersection (CharMap m1) (CharMap m2) = CharMap (M.intersection m1 m2)
133 | 
134 | intersectionWith :: (a -> b -> a) -> CharMap a -> CharMap b -> CharMap a
135 | intersectionWith f (CharMap m1) (CharMap m2) = CharMap (M.intersectionWith f m1 m2)
136 | 
137 | intersectionWithKey :: (Key -> a -> b -> a) -> CharMap a -> CharMap b -> CharMap a
138 | intersectionWithKey f (CharMap m1) (CharMap m2) = CharMap (M.intersectionWithKey f' m1 m2)
139 |   where f' b a1 a2 = f (unsafeChr b) a1 a2
140 | 
141 | map :: (a -> b) -> CharMap a -> CharMap b
142 | map f (CharMap m) = CharMap (M.map f m)
143 | 
144 | mapWithKey :: (Key -> a -> b) -> CharMap a -> CharMap b
145 | mapWithKey f (CharMap m) = CharMap (M.mapWithKey f' m)
146 |   where f' b a = f (unsafeChr b) a
147 | 
148 | mapAccum :: (a -> b -> (a, c)) -> a -> CharMap b -> (a, CharMap c)
149 | mapAccum f a (CharMap m) = (a',CharMap m')
150 |   where (a',m') = M.mapAccum f a m
151 | 
152 | mapAccumWithKey :: (a -> Key -> b -> (a, c)) -> a -> CharMap b -> (a, CharMap c)
153 | mapAccumWithKey f a (CharMap m) = (a',CharMap m')
154 |   where (a',m') = M.mapAccumWithKey f' a m
155 |         f' a1 b a2 = f a1 (unsafeChr b) a2
156 | 
157 | fold :: (a -> b -> b) -> b -> CharMap a -> b
158 | fold f a (CharMap m) = M.foldr f a m
159 | 
160 | foldWithKey :: (Key -> a -> b -> b) -> b -> CharMap a -> b
161 | foldWithKey f a (CharMap m) = M.foldrWithKey f' a m
162 |   where f' b a1 a2 = f (unsafeChr b) a1 a2
163 | 
164 | elems :: CharMap a -> [a]
165 | elems (CharMap m) = M.elems m
166 | 
167 | keys :: CharMap a -> [Key]
168 | keys (CharMap m) = L.map unsafeChr (M.keys m)
169 | 
170 | keysSet :: CharMap a -> S.IntSet
171 | keysSet (CharMap m) = M.keysSet m
172 | 
173 | assocs :: CharMap a -> [(Key, a)]
174 | assocs (CharMap m) = L.map (\(b,a) -> (unsafeChr b,a)) (M.assocs m)
175 | 
176 | toList :: CharMap a -> [(Key, a)]
177 | toList (CharMap m) = L.map (\(b,a) -> (unsafeChr b,a)) (M.toList m)
178 | 
179 | fromList :: [(Key, a)] -> CharMap a
180 | fromList ka = CharMap (M.fromList (L.map (\(k,a) -> (C.ord k,a)) ka))
181 | 
182 | fromListWith :: (a -> a -> a) -> [(Key, a)] -> CharMap a
183 | fromListWith f ka = CharMap (M.fromListWith f (L.map (\(k,a) -> (C.ord k,a)) ka))
184 | 
185 | fromListWithKey :: (Key -> a -> a -> a) -> [(Key, a)] -> CharMap a
186 | fromListWithKey f ka = CharMap (M.fromListWithKey f' (L.map (\(k,a) -> (C.ord k,a)) ka))
187 |   where f' b a1 a2 = f (unsafeChr b) a1 a2
188 | 
189 | toAscList :: CharMap a -> [(Key, a)]
190 | toAscList (CharMap m) = L.map (\(b,a) -> (unsafeChr b,a)) (M.toAscList m)
191 | 
192 | fromAscList :: [(Key, a)] -> CharMap a
193 | fromAscList ka = CharMap (M.fromAscList (L.map (\(k,a) -> (C.ord k,a)) ka))
194 | 
195 | fromAscListWith :: (a -> a -> a) -> [(Key, a)] -> CharMap a
196 | fromAscListWith f ka = CharMap (M.fromAscListWith f (L.map (\(k,a) -> (C.ord k,a)) ka))
197 | 
198 | fromAscListWithKey :: (Key -> a -> a -> a) -> [(Key, a)] -> CharMap a
199 | fromAscListWithKey f ka = CharMap (M.fromAscListWithKey f' (L.map (\(k,a) -> (C.ord k,a)) ka))
200 |   where f' b a1 a2 = f (unsafeChr b) a1 a2
201 | 
202 | fromDistinctAscList :: [(Key, a)] -> CharMap a
203 | fromDistinctAscList ka = CharMap (M.fromDistinctAscList (L.map (\(k,a) -> (C.ord k,a)) ka))
204 | 
205 | filter :: (a -> Bool) -> CharMap a -> CharMap a
206 | filter f (CharMap m) = CharMap (M.filter f m)
207 | 
208 | filterWithKey :: (Key -> a -> Bool) -> CharMap a -> CharMap a
209 | filterWithKey f (CharMap m) = CharMap (M.filterWithKey f' m)
210 |   where f' b a = f (unsafeChr b) a
211 | 
212 | partition :: (a -> Bool) -> CharMap a -> (CharMap a, CharMap a)
213 | partition f (CharMap m) = (CharMap m1', CharMap m2')
214 |   where (m1',m2') = M.partition f m
215 | 
216 | partitionWithKey :: (Key -> a -> Bool) -> CharMap a -> (CharMap a, CharMap a)
217 | partitionWithKey f (CharMap m) = (CharMap m1', CharMap m2')
218 |   where (m1',m2') = M.partitionWithKey f' m
219 |         f' b a = f (unsafeChr b) a
220 | 
221 | mapMaybe :: (a -> Maybe b) -> CharMap a -> CharMap b
222 | mapMaybe f (CharMap m) = CharMap (M.mapMaybe f m)
223 | 
224 | mapMaybeWithKey :: (Key -> a -> Maybe b) -> CharMap a -> CharMap b
225 | mapMaybeWithKey f (CharMap m) = CharMap (M.mapMaybeWithKey f' m)
226 |   where f' b a = f (unsafeChr b) a
227 | 
228 | mapEither :: (a -> Either b c) -> CharMap a -> (CharMap b, CharMap c)
229 | mapEither f (CharMap m) = (CharMap m1', CharMap m2')
230 |   where (m1',m2') = M.mapEither f m
231 | 
232 | mapEitherWithKey :: (Key -> a -> Either b c) -> CharMap a -> (CharMap b, CharMap c)
233 | mapEitherWithKey f (CharMap m) = (CharMap m1', CharMap m2')
234 |   where (m1',m2') = M.mapEitherWithKey f' m
235 |         f' b a = f (unsafeChr b) a
236 | 
237 | split :: Key -> CharMap a -> (CharMap a, CharMap a)
238 | split k (CharMap m) = (CharMap m1', CharMap m2')
239 |   where (m1',m2') = M.split (C.ord k) m
240 | 
241 | splitLookup :: Key -> CharMap a -> (CharMap a, Maybe a, CharMap a)
242 | splitLookup k (CharMap m) = (CharMap m1', a, CharMap m2')
243 |   where (m1',a,m2') = M.splitLookup (C.ord k) m
244 | 
245 | isSubmapOf :: Eq a => CharMap a -> CharMap a -> Bool
246 | isSubmapOf (CharMap m1) (CharMap m2) = M.isSubmapOf m1 m2
247 | 
248 | isSubmapOfBy :: (a -> b -> Bool) -> CharMap a -> CharMap b -> Bool
249 | isSubmapOfBy f (CharMap m1) (CharMap m2) = M.isSubmapOfBy f m1 m2
250 | 
251 | isProperSubmapOf :: Eq a => CharMap a -> CharMap a -> Bool
252 | isProperSubmapOf (CharMap m1) (CharMap m2) = M.isProperSubmapOf m1 m2
253 | 
254 | isProperSubmapOfBy :: (a -> b -> Bool) -> CharMap a -> CharMap b -> Bool
255 | isProperSubmapOfBy f (CharMap m1) (CharMap m2) = M.isProperSubmapOfBy f m1 m2
256 | 
257 | showTree :: Show a => CharMap a -> String
258 | showTree (CharMap m) = MD.showTree m
259 | 
260 | showTreeWith :: Show a => Bool -> Bool -> CharMap a -> String
261 | showTreeWith b1 b2 (CharMap m) = MD.showTreeWith b1 b2 m
262 | {-# INLINE (!) #-}
263 | {-# INLINE (\\) #-}
264 | {-# INLINE null #-}
265 | {-# INLINE size #-}
266 | {-# INLINE member #-}
267 | {-# INLINE notMember #-}
268 | {-# INLINE lookup #-}
269 | {-# INLINE findWithDefault #-}
270 | {-# INLINE empty #-}
271 | {-# INLINE singleton #-}
272 | {-# INLINE insert #-}
273 | {-# INLINE insertWith #-}
274 | {-# INLINE insertWithKey #-}
275 | {-# INLINE insertLookupWithKey #-}
276 | {-# INLINE delete #-}
277 | {-# INLINE adjust #-}
278 | {-# INLINE adjustWithKey #-}
279 | {-# INLINE update #-}
280 | {-# INLINE updateWithKey #-}
281 | {-# INLINE updateLookupWithKey #-}
282 | {-# INLINE union #-}
283 | {-# INLINE unionWith #-}
284 | {-# INLINE unionWithKey #-}
285 | {-# INLINE unions #-}
286 | {-# INLINE unionsWith #-}
287 | {-# INLINE difference #-}
288 | {-# INLINE differenceWith #-}
289 | {-# INLINE differenceWithKey #-}
290 | {-# INLINE intersection #-}
291 | {-# INLINE intersectionWith #-}
292 | {-# INLINE intersectionWithKey #-}
293 | {-# INLINE map #-}
294 | {-# INLINE mapWithKey #-}
295 | {-# INLINE mapAccum #-}
296 | {-# INLINE mapAccumWithKey #-}
297 | {-# INLINE fold #-}
298 | {-# INLINE foldWithKey #-}
299 | {-# INLINE elems #-}
300 | {-# INLINE keys #-}
301 | {-# INLINE keysSet #-}
302 | {-# INLINE assocs #-}
303 | {-# INLINE toList #-}
304 | {-# INLINE fromList #-}
305 | {-# INLINE fromListWith #-}
306 | {-# INLINE fromListWithKey #-}
307 | {-# INLINE toAscList #-}
308 | {-# INLINE fromAscList #-}
309 | {-# INLINE fromAscListWith #-}
310 | {-# INLINE fromAscListWithKey #-}
311 | {-# INLINE fromDistinctAscList #-}
312 | {-# INLINE filter #-}
313 | {-# INLINE filterWithKey #-}
314 | {-# INLINE partition #-}
315 | {-# INLINE partitionWithKey #-}
316 | {-# INLINE mapMaybe #-}
317 | {-# INLINE mapMaybeWithKey #-}
318 | {-# INLINE mapEither #-}
319 | {-# INLINE mapEitherWithKey #-}
320 | {-# INLINE split #-}
321 | {-# INLINE splitLookup #-}
322 | {-# INLINE isSubmapOf #-}
323 | {-# INLINE isSubmapOfBy #-}
324 | {-# INLINE isProperSubmapOf #-}
325 | {-# INLINE isProperSubmapOfBy #-}
326 | {-# INLINE showTree #-}
327 | {-# INLINE showTreeWith #-}
328 | 


--------------------------------------------------------------------------------
/lib/Data/IntMap/EnumMap2.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP #-}
  2 | 
  3 | module Data.IntMap.EnumMap2 where
  4 | 
  5 | import Data.Foldable as F (Foldable(foldMap))
  6 | import qualified Data.IntMap as M
  7 | #if MIN_VERSION_containers(0,5,11)
  8 | import qualified Data.IntMap.Internal.Debug as MD
  9 | #else
 10 | import qualified Data.IntMap as MD
 11 | #endif
 12 | import qualified Data.IntSet.EnumSet2 as S (EnumSet(..))
 13 | import Data.Semigroup as Sem
 14 | import Prelude
 15 | import qualified Prelude as L (map)
 16 | 
 17 | newtype EnumMap k a = EnumMap {unEnumMap :: M.IntMap a}
 18 |   deriving (Eq,Ord,Read,Show)
 19 | 
 20 | instance Ord k => Sem.Semigroup (EnumMap k a) where
 21 |   EnumMap x <> EnumMap y = EnumMap (x `mappend` y)
 22 | 
 23 | instance Ord k => Monoid (EnumMap k a) where
 24 |   mempty = EnumMap mempty
 25 |   mappend = (<>)
 26 | 
 27 | instance Ord k => Functor (EnumMap k) where
 28 |   fmap f (EnumMap m) = EnumMap (fmap f m)
 29 | 
 30 | instance Ord k => F.Foldable (EnumMap k) where
 31 |   foldMap f (EnumMap m) = F.foldMap f m
 32 | 
 33 | (!) :: (Enum key) => EnumMap key a -> key -> a
 34 | (!) (EnumMap m) k = (M.!) m (fromEnum k)
 35 | 
 36 | (\\) :: (Enum key) => EnumMap key a -> EnumMap key b -> EnumMap key a
 37 | (\\) (EnumMap m1) (EnumMap m2) = EnumMap ((M.\\) m1 m2)
 38 | 
 39 | null :: (Enum key) => EnumMap key a -> Bool
 40 | null (EnumMap m) = M.null m
 41 | 
 42 | size :: (Enum key) => EnumMap key a -> Int
 43 | size (EnumMap m) = M.size m
 44 | 
 45 | member :: (Enum key) => key -> EnumMap key a -> Bool
 46 | member k (EnumMap m) = M.member (fromEnum k) m
 47 | 
 48 | notMember :: (Enum key) => key -> EnumMap key a -> Bool
 49 | notMember k (EnumMap m) = M.notMember (fromEnum k) m
 50 | 
 51 | {-# INLINE lookup #-}
 52 | lookup :: (Enum key) => key -> EnumMap key a -> Maybe a
 53 | lookup k (EnumMap m) = M.lookup (fromEnum k) m
 54 | 
 55 | findWithDefault :: (Enum key) => a -> key -> EnumMap key a -> a
 56 | findWithDefault a k (EnumMap m) = M.findWithDefault a (fromEnum k) m
 57 | 
 58 | empty :: (Enum key) => EnumMap key a
 59 | empty = EnumMap M.empty
 60 | 
 61 | singleton :: (Enum key) => key -> a -> EnumMap key a
 62 | singleton k a = EnumMap (M.singleton (fromEnum k) a)
 63 | 
 64 | insert :: (Enum key) => key -> a -> EnumMap key a -> EnumMap key a
 65 | insert k a (EnumMap m) = EnumMap (M.insert (fromEnum k) a m)
 66 | 
 67 | insertWith :: (Enum key) => (a -> a -> a) -> key -> a -> EnumMap key a -> EnumMap key a
 68 | insertWith f k a (EnumMap m) = EnumMap (M.insertWith f (fromEnum k) a m)
 69 | 
 70 | insertWithKey :: (Enum key) => (key -> a -> a -> a) -> key -> a -> EnumMap key a -> EnumMap key a
 71 | insertWithKey f k a (EnumMap m) = EnumMap (M.insertWithKey f' (fromEnum k) a m)
 72 |   where f' b a1 a2 = f (toEnum b) a1 a2
 73 | 
 74 | insertLookupWithKey :: (Enum key) => (key -> a -> a -> a) -> key -> a -> EnumMap key a -> (Maybe a, EnumMap key a)
 75 | insertLookupWithKey f k a (EnumMap m) = (ma,EnumMap m')
 76 |   where (ma,m') = M.insertLookupWithKey f' (fromEnum k) a m
 77 |         f' b a1 a2 = f (toEnum b) a1 a2
 78 | 
 79 | delete :: (Enum key) => key -> EnumMap key a -> EnumMap key a
 80 | delete k (EnumMap m) = EnumMap (M.delete (fromEnum k) m)
 81 | 
 82 | adjust :: (Enum key) => (a -> a) -> key -> EnumMap key a -> EnumMap key a
 83 | adjust f k (EnumMap m) = EnumMap (M.adjust f (fromEnum k) m)
 84 | 
 85 | adjustWithKey :: (Enum key) => (key -> a -> a) -> key -> EnumMap key a -> EnumMap key a
 86 | adjustWithKey f k (EnumMap m) = EnumMap (M.adjustWithKey f' (fromEnum k) m)
 87 |   where f' b a = f (toEnum b) a
 88 | 
 89 | update :: (Enum key) => (a -> Maybe a) -> key -> EnumMap key a -> EnumMap key a
 90 | update f k (EnumMap m) = EnumMap (M.update f (fromEnum k) m)
 91 | 
 92 | updateWithKey :: (Enum key) => (key -> a -> Maybe a) -> key -> EnumMap key a -> EnumMap key a
 93 | updateWithKey f k (EnumMap m) = EnumMap (M.updateWithKey f' (fromEnum k) m)
 94 |   where f' b a = f (toEnum b) a
 95 | 
 96 | updateLookupWithKey :: (Enum key) => (key -> a -> Maybe a) -> key -> EnumMap key a -> (Maybe a, EnumMap key a)
 97 | updateLookupWithKey f k (EnumMap m) = (a,EnumMap m')
 98 |   where (a,m') = M.updateLookupWithKey f' (fromEnum k) m
 99 |         f' b a1 = f (toEnum b) a1
100 | 
101 | union :: (Enum key) => EnumMap key a -> EnumMap key a -> EnumMap key a
102 | union (EnumMap m1) (EnumMap m2) = EnumMap (M.union m1 m2)
103 | 
104 | unionWith :: (Enum key) => (a -> a -> a) -> EnumMap key a -> EnumMap key a -> EnumMap key a
105 | unionWith f (EnumMap m1) (EnumMap m2) = EnumMap (M.unionWith f m1 m2)
106 | 
107 | unionWithKey :: (Enum key) => (key -> a -> a -> a) -> EnumMap key a -> EnumMap key a -> EnumMap key a
108 | unionWithKey f (EnumMap m1) (EnumMap m2) = EnumMap (M.unionWithKey f' m1 m2)
109 |   where f' b a1 a2 = f (toEnum b) a1 a2
110 | 
111 | unions :: (Enum key) => [EnumMap key a] -> EnumMap key a
112 | unions cs = EnumMap (M.unions (L.map unEnumMap cs))
113 | 
114 | unionsWith :: (Enum key) => (a -> a -> a) -> [EnumMap key a] -> EnumMap key a
115 | unionsWith f cs = EnumMap (M.unionsWith f (L.map unEnumMap cs))
116 | 
117 | difference :: (Enum key) => EnumMap key a -> EnumMap key b -> EnumMap key a
118 | difference (EnumMap m1) (EnumMap m2) = EnumMap (M.difference m1 m2)
119 | 
120 | differenceWith :: (Enum key) => (a -> b -> Maybe a) -> EnumMap key a -> EnumMap key b -> EnumMap key a
121 | differenceWith f (EnumMap m1) (EnumMap m2) = EnumMap (M.differenceWith f m1 m2)
122 | 
123 | differenceWithKey :: (Enum key) => (key -> a -> b -> Maybe a) -> EnumMap key a -> EnumMap key b -> EnumMap key a
124 | differenceWithKey f (EnumMap m1) (EnumMap m2) = EnumMap (M.differenceWithKey f' m1 m2)
125 |   where f' b a1 a2 = f (toEnum b) a1 a2
126 | 
127 | intersection :: (Enum key) => EnumMap key a -> EnumMap key b -> EnumMap key a
128 | intersection (EnumMap m1) (EnumMap m2) = EnumMap (M.intersection m1 m2)
129 | 
130 | intersectionWith :: (Enum key) => (a -> b -> a) -> EnumMap key a -> EnumMap key b -> EnumMap key a
131 | intersectionWith f (EnumMap m1) (EnumMap m2) = EnumMap (M.intersectionWith f m1 m2)
132 | 
133 | intersectionWithKey :: (Enum key) => (key -> a -> b -> a) -> EnumMap key a -> EnumMap key b -> EnumMap key a
134 | intersectionWithKey f (EnumMap m1) (EnumMap m2) = EnumMap (M.intersectionWithKey f' m1 m2)
135 |   where f' b a1 a2 = f (toEnum b) a1 a2
136 | 
137 | map :: (Enum key) => (a -> b) -> EnumMap key a -> EnumMap key b
138 | map f (EnumMap m) = EnumMap (M.map f m)
139 | 
140 | mapWithKey :: (Enum key) => (key -> a -> b) -> EnumMap key a -> EnumMap key b
141 | mapWithKey f (EnumMap m) = EnumMap (M.mapWithKey f' m)
142 |   where f' b a = f (toEnum b) a
143 | 
144 | mapAccum :: (Enum key) => (a -> b -> (a, c)) -> a -> EnumMap key b -> (a, EnumMap key c)
145 | mapAccum f a (EnumMap m) = (a',EnumMap m')
146 |   where (a',m') = M.mapAccum f a m
147 | 
148 | mapAccumWithKey :: (Enum key) => (a -> key -> b -> (a, c)) -> a -> EnumMap key b -> (a, EnumMap key c)
149 | mapAccumWithKey f a (EnumMap m) = (a',EnumMap m')
150 |   where (a',m') = M.mapAccumWithKey f' a m
151 |         f' a1 b a2 = f a1 (toEnum b) a2
152 | 
153 | fold :: (Enum key) => (a -> b -> b) -> b -> EnumMap key a -> b
154 | fold f a (EnumMap m) = M.foldr f a m
155 | 
156 | foldWithKey :: (Enum key) => (key -> a -> b -> b) -> b -> EnumMap key a -> b
157 | foldWithKey f a (EnumMap m) = M.foldrWithKey f' a m
158 |   where f' b a1 a2 = f (toEnum b) a1 a2
159 | 
160 | elems :: (Enum key) => EnumMap key a -> [a]
161 | elems (EnumMap m) = M.elems m
162 | 
163 | keys :: (Enum key) => EnumMap key a -> [key]
164 | keys (EnumMap m) = L.map toEnum (M.keys m)
165 | 
166 | -- Have to break cover until I have CharSet
167 | keysSet :: (Enum key) => EnumMap key a -> S.EnumSet key
168 | keysSet (EnumMap m) = S.EnumSet (M.keysSet m)
169 | 
170 | assocs :: (Enum key) => EnumMap key a -> [(key, a)]
171 | assocs (EnumMap m) = L.map (\(b,a) -> (toEnum b,a)) (M.assocs m)
172 | 
173 | toList :: (Enum key) => EnumMap key a -> [(key, a)]
174 | toList (EnumMap m) = L.map (\(b,a) -> (toEnum b,a)) (M.toList m)
175 | 
176 | fromList :: (Enum key) => [(key, a)] -> EnumMap key a
177 | fromList ka = EnumMap (M.fromList (L.map (\(k,a) -> (fromEnum k,a)) ka))
178 | 
179 | fromListWith :: (Enum key) => (a -> a -> a) -> [(key, a)] -> EnumMap key a
180 | fromListWith f ka = EnumMap (M.fromListWith f (L.map (\(k,a) -> (fromEnum k,a)) ka))
181 | 
182 | fromListWithKey :: (Enum key) => (key -> a -> a -> a) -> [(key, a)] -> EnumMap key a
183 | fromListWithKey f ka = EnumMap (M.fromListWithKey f' (L.map (\(k,a) -> (fromEnum k,a)) ka))
184 |   where f' b a1 a2 = f (toEnum b) a1 a2
185 | 
186 | toAscList :: (Enum key) => EnumMap key a -> [(key, a)]
187 | toAscList (EnumMap m) = L.map (\(b,a) -> (toEnum b,a)) (M.toAscList m)
188 | 
189 | fromAscList :: (Enum key) => [(key, a)] -> EnumMap key a
190 | fromAscList ka = EnumMap (M.fromAscList (L.map (\(k,a) -> (fromEnum k,a)) ka))
191 | 
192 | fromAscListWith :: (Enum key) => (a -> a -> a) -> [(key, a)] -> EnumMap key a
193 | fromAscListWith f ka = EnumMap (M.fromAscListWith f (L.map (\(k,a) -> (fromEnum k,a)) ka))
194 | 
195 | fromAscListWithKey :: (Enum key) => (key -> a -> a -> a) -> [(key, a)] -> EnumMap key a
196 | fromAscListWithKey f ka = EnumMap (M.fromAscListWithKey f' (L.map (\(k,a) -> (fromEnum k,a)) ka))
197 |   where f' b a1 a2 = f (toEnum b) a1 a2
198 | 
199 | fromDistinctAscList :: (Enum key) => [(key, a)] -> EnumMap key a
200 | fromDistinctAscList ka = EnumMap (M.fromDistinctAscList (L.map (\(k,a) -> (fromEnum k,a)) ka))
201 | 
202 | filter :: (Enum key) => (a -> Bool) -> EnumMap key a -> EnumMap key a
203 | filter f (EnumMap m) = EnumMap (M.filter f m)
204 | 
205 | filterWithKey :: (Enum key) => (key -> a -> Bool) -> EnumMap key a -> EnumMap key a
206 | filterWithKey f (EnumMap m) = EnumMap (M.filterWithKey f' m)
207 |   where f' b a = f (toEnum b) a
208 | 
209 | partition :: (Enum key) => (a -> Bool) -> EnumMap key a -> (EnumMap key a, EnumMap key a)
210 | partition f (EnumMap m) = (EnumMap m1', EnumMap m2')
211 |   where (m1',m2') = M.partition f m
212 | 
213 | partitionWithKey :: (Enum key) => (key -> a -> Bool) -> EnumMap key a -> (EnumMap key a, EnumMap key a)
214 | partitionWithKey f (EnumMap m) = (EnumMap m1', EnumMap m2')
215 |   where (m1',m2') = M.partitionWithKey f' m
216 |         f' b a = f (toEnum b) a
217 | 
218 | mapMaybe :: (Enum key) => (a -> Maybe b) -> EnumMap key a -> EnumMap key b
219 | mapMaybe f (EnumMap m) = EnumMap (M.mapMaybe f m)
220 | 
221 | mapMaybeWithKey :: (Enum key) => (key -> a -> Maybe b) -> EnumMap key a -> EnumMap key b
222 | mapMaybeWithKey f (EnumMap m) = EnumMap (M.mapMaybeWithKey f' m)
223 |   where f' b a = f (toEnum b) a
224 | 
225 | mapEither :: (Enum key) => (a -> Either b c) -> EnumMap key a -> (EnumMap key b, EnumMap key c)
226 | mapEither f (EnumMap m) = (EnumMap m1', EnumMap m2')
227 |   where (m1',m2') = M.mapEither f m
228 | 
229 | mapEitherWithKey :: (Enum key) => (key -> a -> Either b c) -> EnumMap key a -> (EnumMap key b, EnumMap key c)
230 | mapEitherWithKey f (EnumMap m) = (EnumMap m1', EnumMap m2')
231 |   where (m1',m2') = M.mapEitherWithKey f' m
232 |         f' b a = f (toEnum b) a
233 | 
234 | split :: (Enum key) => key -> EnumMap key a -> (EnumMap key a, EnumMap key a)
235 | split k (EnumMap m) = (EnumMap m1', EnumMap m2')
236 |   where (m1',m2') = M.split (fromEnum k) m
237 | 
238 | splitLookup :: (Enum key) => key -> EnumMap key a -> (EnumMap key a, Maybe a, EnumMap key a)
239 | splitLookup k (EnumMap m) = (EnumMap m1', a, EnumMap m2')
240 |   where (m1',a,m2') = M.splitLookup (fromEnum k) m
241 | 
242 | isSubmapOf :: (Enum key,Eq a) => EnumMap key a -> EnumMap key a -> Bool
243 | isSubmapOf (EnumMap m1) (EnumMap m2) = M.isSubmapOf m1 m2
244 | 
245 | isSubmapOfBy :: (Enum key) => (a -> b -> Bool) -> EnumMap key a -> EnumMap key b -> Bool
246 | isSubmapOfBy f (EnumMap m1) (EnumMap m2) = M.isSubmapOfBy f m1 m2
247 | 
248 | isProperSubmapOf :: (Enum key,Eq a) => EnumMap key a -> EnumMap key a -> Bool
249 | isProperSubmapOf (EnumMap m1) (EnumMap m2) = M.isProperSubmapOf m1 m2
250 | 
251 | isProperSubmapOfBy :: (Enum key) => (a -> b -> Bool) -> EnumMap key a -> EnumMap key b -> Bool
252 | isProperSubmapOfBy f (EnumMap m1) (EnumMap m2) = M.isProperSubmapOfBy f m1 m2
253 | 
254 | showTree :: (Enum key,Show a) => EnumMap key a -> String
255 | showTree (EnumMap m) = MD.showTree m
256 | 
257 | showTreeWith :: (Enum key,Show a) => Bool -> Bool -> EnumMap key a -> String
258 | showTreeWith b1 b2 (EnumMap m) = MD.showTreeWith b1 b2 m
259 | 


--------------------------------------------------------------------------------
/lib/Data/IntSet/EnumSet2.hs:
--------------------------------------------------------------------------------
  1 | module Data.IntSet.EnumSet2 where
  2 | 
  3 | import qualified Data.IntSet as S
  4 | import qualified Data.List as L (map)
  5 | import Data.Semigroup as Sem
  6 | 
  7 | newtype EnumSet e = EnumSet {unEnumSet :: S.IntSet}
  8 |   deriving (Eq,Ord,Read,Show)
  9 | 
 10 | instance Sem.Semigroup (EnumSet e) where
 11 |   EnumSet x <> EnumSet y = EnumSet (x `mappend` y)
 12 | 
 13 | instance Monoid (EnumSet e) where
 14 |   mempty = EnumSet mempty
 15 |   mappend = (<>)
 16 | 
 17 | (\\) :: (Enum e) => EnumSet e -> EnumSet e -> EnumSet e
 18 | (\\) (EnumSet s1) (EnumSet s2) = EnumSet ((S.\\) s1 s2)
 19 | 
 20 | null :: (Enum e) => EnumSet e -> Bool
 21 | null (EnumSet s) = S.null s
 22 | 
 23 | size :: (Enum e) => EnumSet e -> Int
 24 | size (EnumSet s) = S.size s
 25 | 
 26 | member :: (Enum e) => e -> EnumSet e -> Bool
 27 | member e (EnumSet s) = S.member (fromEnum e) s
 28 | 
 29 | notMember :: (Enum e) => Int -> EnumSet e -> Bool
 30 | notMember e (EnumSet s) = S.notMember (fromEnum e) s
 31 | 
 32 | isSubsetOf :: (Enum e) => EnumSet e -> EnumSet e -> Bool
 33 | isSubsetOf (EnumSet e1) (EnumSet e2) = S.isSubsetOf e1 e2
 34 | 
 35 | isProperSubsetOf :: (Enum e) => EnumSet e -> EnumSet e -> Bool
 36 | isProperSubsetOf (EnumSet e1) (EnumSet e2) = S.isProperSubsetOf e1 e2
 37 | 
 38 | empty :: (Enum e) => EnumSet e
 39 | empty = EnumSet (S.empty)
 40 | 
 41 | singleton :: (Enum e) => e -> EnumSet e
 42 | singleton e = EnumSet (S.singleton (fromEnum e))
 43 | 
 44 | insert :: (Enum e) => e -> EnumSet e -> EnumSet e
 45 | insert e (EnumSet s) = EnumSet (S.insert (fromEnum e) s)
 46 | 
 47 | delete :: (Enum e) => e -> EnumSet e -> EnumSet e
 48 | delete e (EnumSet s) = EnumSet (S.delete (fromEnum e) s)
 49 | 
 50 | union :: (Enum e) => EnumSet e -> EnumSet e -> EnumSet e
 51 | union (EnumSet s1) (EnumSet s2) = EnumSet (S.union s1 s2)
 52 | 
 53 | unions :: (Enum e) => [EnumSet e] -> EnumSet e
 54 | unions es = EnumSet (S.unions (L.map unEnumSet es))
 55 | 
 56 | difference :: (Enum e) => EnumSet e -> EnumSet e -> EnumSet e
 57 | difference (EnumSet e1) (EnumSet e2) = EnumSet (S.difference e1 e2)
 58 | 
 59 | intersection :: (Enum e) => EnumSet e -> EnumSet e -> EnumSet e
 60 | intersection (EnumSet e1) (EnumSet e2) = EnumSet (S.intersection e1 e2)
 61 | 
 62 | filter :: (Enum e) => (e -> Bool) -> EnumSet e -> EnumSet e
 63 | filter f (EnumSet s) = EnumSet (S.filter f' s)
 64 |   where f' b = f (toEnum b)
 65 | 
 66 | partition :: (Enum e) => (e -> Bool) -> EnumSet e -> (EnumSet e, EnumSet e)
 67 | partition f (EnumSet s) = (EnumSet s1', EnumSet s2')
 68 |   where (s1',s2') = S.partition f' s
 69 |         f' b = f (toEnum b)
 70 | 
 71 | split :: (Enum e) => e -> EnumSet e -> (EnumSet e, EnumSet e)
 72 | split e (EnumSet s) = (EnumSet s1', EnumSet s2')
 73 |   where (s1',s2') = S.split (fromEnum e) s
 74 | 
 75 | splitMember :: (Enum e) => e -> EnumSet e -> (EnumSet e, Bool, EnumSet e)
 76 | splitMember e (EnumSet s) = (EnumSet s1',a,EnumSet s2')
 77 |   where (s1',a,s2') = S.splitMember (fromEnum e) s
 78 | 
 79 | map :: (Enum e) => (e -> e) -> EnumSet e -> EnumSet e
 80 | map f (EnumSet s) = EnumSet (S.map f' s)
 81 |   where f' b = fromEnum (f (toEnum b))
 82 | 
 83 | fold :: (Enum e) => (e -> b -> b) -> b -> EnumSet e -> b
 84 | fold f a (EnumSet s) = S.fold f' a s
 85 |   where f' b a1 = f (toEnum b) a1
 86 | 
 87 | elems :: (Enum e) => EnumSet e -> [e]
 88 | elems (EnumSet s) = L.map toEnum (S.elems s)
 89 | 
 90 | toList :: (Enum e) => EnumSet e -> [e]
 91 | toList (EnumSet s) = L.map toEnum (S.toList s)
 92 | 
 93 | fromList :: (Enum e) => [e] -> EnumSet e
 94 | fromList es = EnumSet (S.fromList (L.map fromEnum es))
 95 | 
 96 | toAscList :: (Enum e) => EnumSet e -> [e]
 97 | toAscList (EnumSet s) = L.map toEnum (S.toAscList s)
 98 | 
 99 | fromAscList :: (Enum e) => [e] -> EnumSet e
100 | fromAscList es = EnumSet (S.fromAscList (L.map fromEnum es))
101 | 
102 | fromDistinctAscList :: (Enum e) => [e] -> EnumSet e
103 | fromDistinctAscList es = EnumSet (S.fromDistinctAscList (L.map fromEnum es))
104 | 
105 | showTree :: (Enum e) => EnumSet e -> String
106 | showTree (EnumSet s) = S.showTree s
107 | 
108 | showTreeWith :: (Enum e) => Bool -> Bool -> EnumSet e -> String
109 | showTreeWith a1 a2 (EnumSet s) = S.showTreeWith a1 a2 s
110 | 


--------------------------------------------------------------------------------
/lib/Text/Regex/TDFA.hs:
--------------------------------------------------------------------------------
  1 | {-|
  2 | Module: Text.Regex.TDFA
  3 | Copyright: (c) Chris Kuklewicz 2007-2009
  4 | SPDX-License-Identifier: BSD-3-Clause
  5 | Maintainer: Andreas Abel
  6 | Stability: stable
  7 | 
  8 | The "Text.Regex.TDFA" module provides a backend for regular
  9 | expressions. It provides instances for the classes defined and
 10 | documented in "Text.Regex.Base" and re-exported by this module.  If
 11 | you import this along with other backends then you should do so with
 12 | qualified imports (with renaming for convenience).
 13 | 
 14 | This regex-tdfa package implements, correctly, POSIX extended regular
 15 | expressions.  It is highly unlikely that the @regex-posix@ package on
 16 | your operating system is correct, see
 17 | <http://www.haskell.org/haskellwiki/Regex_Posix> for examples of your
 18 | OS's bugs.
 19 | 
 20 | = Importing and using
 21 | 
 22 | Declare a dependency on the @regex-tdfa@ library in your @.cabal@ file:
 23 | 
 24 | > build-depends: regex-tdfa ^>= 1.3.2
 25 | 
 26 | In Haskell modules where you want to use regexes simply @import@ /this/ module:
 27 | 
 28 | @
 29 | import "Text.Regex.TDFA"
 30 | @
 31 | 
 32 | = Basics
 33 | 
 34 | >>> let emailRegex = "[a-zA-Z0-9+._-]+\\@[-a-zA-Z]+\\.[a-z]+"
 35 | >>> "my email is first-name.lastname_1974@e-mail.com" =~ emailRegex :: Bool
 36 | True
 37 | 
 38 | >>> "invalid@mail@com" =~ emailRegex :: Bool
 39 | False
 40 | 
 41 | >>> "invalid@mail.COM" =~ emailRegex :: Bool
 42 | False
 43 | 
 44 | >>> "#@invalid.com" =~ emailRegex :: Bool
 45 | False
 46 | 
 47 | @
 48 | /-- non-monadic/
 49 | λ> \<to-match-against\> '=~' \<regex\>
 50 | 
 51 | /-- monadic, uses 'fail' on lack of match/
 52 | λ> \<to-match-against\> '=~~' \<regex\>
 53 | @
 54 | 
 55 | ('=~') and ('=~~') are polymorphic in their return type. This is so that
 56 | regex-tdfa can pick the most efficient way to give you your result based on
 57 | what you need. For instance, if all you want is to check whether the regex
 58 | matched or not, there's no need to allocate a result string. If you only want
 59 | the first match, rather than all the matches, then the matching engine can stop
 60 | after finding a single hit.
 61 | 
 62 | This does mean, though, that you may sometimes have to explicitly specify the
 63 | type you want, especially if you're trying things out at the REPL.
 64 | 
 65 | = Common use cases
 66 | 
 67 | == Get the first match
 68 | 
 69 | @
 70 | /-- returns empty string if no match/
 71 | a '=~' b :: String  /-- or ByteString, or Text.../
 72 | @
 73 | 
 74 | >>> "alexis-de-tocqueville" =~ "[a-z]+" :: String
 75 | "alexis"
 76 | 
 77 | >>> "alexis-de-tocqueville" =~ "[0-9]+" :: String
 78 | ""
 79 | 
 80 | == Check if it matched at all
 81 | 
 82 | @
 83 | a '=~' b :: Bool
 84 | @
 85 | 
 86 | >>> "alexis-de-tocqueville" =~ "[a-z]+" :: Bool
 87 | True
 88 | 
 89 | == Get first match + text before/after
 90 | 
 91 | @
 92 | /-- if no match, will just return whole/
 93 | /-- string in the first element of the tuple/
 94 | a =~ b :: (String, String, String)
 95 | @
 96 | 
 97 | >>> "alexis-de-tocqueville" =~ "de" :: (String, String, String)
 98 | ("alexis-","de","-tocqueville")
 99 | 
100 | >>> "alexis-de-tocqueville" =~ "kant" :: (String, String, String)
101 | ("alexis-de-tocqueville","","")
102 | 
103 | == Get first match + submatches
104 | 
105 | @
106 | /-- same as above, but also returns a list of just submatches./
107 | /-- submatch list is empty if regex doesn't match at all/
108 | a '=~' b :: (String, String, String, [String])
109 | @
110 | 
111 | >>> "div[attr=1234]" =~ "div\\[([a-z]+)=([^]]+)\\]" :: (String, String, String, [String])
112 | ("","div[attr=1234]","",["attr","1234"])
113 | 
114 | == Get /all/ matches
115 | 
116 | @
117 | /-- can also return Data.Array instead of List/
118 | 'getAllTextMatches' (a '=~' b) :: [String]
119 | @
120 | 
121 | >>> getAllTextMatches ("john anne yifan" =~ "[a-z]+") :: [String]
122 | ["john","anne","yifan"]
123 | 
124 | >>> getAllTextMatches ("* - . a + z" =~ "[--z]+") :: [String]
125 | ["-",".","a","z"]
126 | 
127 | = Feature support
128 | 
129 | This package does provide captured parenthesized subexpressions.
130 | 
131 | Depending on the text being searched this package supports Unicode.
132 | The @[Char]@, @Text@, @Text.Lazy@, and @(Seq Char)@ text types support Unicode.  The @ByteString@
133 | and @ByteString.Lazy@ text types only support ASCII.
134 | 
135 | As of version 1.1.1 the following GNU extensions are recognized, all
136 | anchors:
137 | 
138 | * \\\` at beginning of entire text
139 | * \\\' at end of entire text
140 | * \\\< at beginning of word
141 | * \\\> at end of word
142 | * \\b at either beginning or end of word
143 | * \\B at neither beginning nor end of word
144 | 
145 | The above are controlled by the 'newSyntax' Bool in 'CompOption'.
146 | 
147 | Where the "word" boundaries means between characters that are and are
148 | not in the [:word:] character class which contains [a-zA-Z0-9_].  Note
149 | that \\\< and \\b may match before the entire text and \\\> and \\b may
150 | match at the end of the entire text.
151 | 
152 | There is no locale support, so collating elements like [.ch.] are
153 | simply ignored and equivalence classes like [=a=] are converted to
154 | just [a].  The character classes like [:alnum:] are supported over
155 | ASCII only, valid classes are alnum, digit, punct, alpha, graph,
156 | space, blank, lower, upper, cntrl, print, xdigit, word.
157 | 
158 | >>> getAllTextMatches ("john anne yifan" =~ "[[:lower:]]+") :: [String]
159 | ["john","anne","yifan"]
160 | 
161 | 
162 | This package does not provide "basic" regular expressions.  This
163 | package does not provide back references inside regular expressions.
164 | 
165 | The package does not provide Perl style regular expressions.  Please
166 | look at the <http://hackage.haskell.org/package/regex-pcre regex-pcre>
167 | and <http://hackage.haskell.org/package/pcre-light pcre-light> packages instead.
168 | 
169 | This package does not provide find-and-replace.
170 | 
171 | = Avoiding backslashes
172 | 
173 | If you find yourself writing a lot of regexes, take a look at
174 | <http://hackage.haskell.org/package/raw-strings-qq raw-strings-qq>. It'll
175 | let you write regexes without needing to escape all your backslashes.
176 | 
177 | @
178 | \{\-\# LANGUAGE QuasiQuotes \#\-\}
179 | 
180 | import Text.RawString.QQ
181 | import Text.Regex.TDFA
182 | 
183 | λ> "2 * (3 + 1) / 4" '=~' [r|\\([^)]+\\)|] :: String
184 | "(3 + 1)"
185 | @
186 | 
187 | -}
188 | 
189 | module Text.Regex.TDFA(getVersion_Text_Regex_TDFA
190 |                       ,(=~),(=~~)
191 |                       ,module Text.Regex.TDFA.Common
192 |                       ,module Text.Regex.Base) where
193 | 
194 | import qualified Control.Monad.Fail as Fail
195 | import Data.Version(Version)
196 | import Text.Regex.Base
197 | import Text.Regex.TDFA.String()
198 | import Text.Regex.TDFA.ByteString()
199 | import Text.Regex.TDFA.ByteString.Lazy()
200 | import Text.Regex.TDFA.Text()
201 | import Text.Regex.TDFA.Text.Lazy()
202 | import Text.Regex.TDFA.Sequence()
203 | import Text.Regex.TDFA.Common(Regex,CompOption(..),ExecOption(..))
204 | --import Text.Regex.TDFA.Wrap(Regex,CompOption(..),ExecOption(..),(=~),(=~~))
205 | 
206 | import Paths_regex_tdfa(version)
207 | 
208 | getVersion_Text_Regex_TDFA :: Version
209 | getVersion_Text_Regex_TDFA = version
210 | 
211 | 
212 | -- | This is the pure functional matching operator.  If the target
213 | -- cannot be produced then some empty result will be returned.  If
214 | -- there is an error in processing, then 'error' will be called.
215 | (=~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target)
216 |      => source1 -> source -> target
217 | (=~) x r = let make :: RegexMaker Regex CompOption ExecOption a => a -> Regex
218 |                make = makeRegex
219 |            in match (make r) x
220 | 
221 | -- | This is the monadic matching operator.  If a single match fails,
222 | -- then 'fail' will be called.
223 | (=~~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target, Fail.MonadFail m)
224 |       => source1 -> source -> m target
225 | (=~~) x r = do let make :: (RegexMaker Regex CompOption ExecOption a, Fail.MonadFail m) => a -> m Regex
226 |                    make = makeRegexM
227 |                q <- make r
228 |                matchM q x
229 | 


--------------------------------------------------------------------------------
/lib/Text/Regex/TDFA/ByteString.hs:
--------------------------------------------------------------------------------
 1 | {-|
 2 | This modules provides 'RegexMaker' and 'RegexLike' instances for using
 3 | @ByteString@ with the DFA backend ("Text.Regex.Lib.WrapDFAEngine" and
 4 | "Text.Regex.Lazy.DFAEngineFPS").  This module is usually used via
 5 | import "Text.Regex.TDFA".
 6 | 
 7 | This exports instances of the high level API and the medium level
 8 | API of 'compile','execute', and 'regexec'.
 9 | -}
10 | {- By Chris Kuklewicz, 2009. BSD License, see the LICENSE file. -}
11 | module Text.Regex.TDFA.ByteString(
12 |   Regex
13 |  ,CompOption
14 |  ,ExecOption
15 |  ,compile
16 |  ,execute
17 |  ,regexec
18 |  ) where
19 | 
20 | import Data.Array((!),elems)
21 | import qualified Data.ByteString.Char8 as B(ByteString,take,drop,unpack)
22 | 
23 | import Text.Regex.Base(MatchArray,RegexContext(..),RegexMaker(..),RegexLike(..))
24 | import Text.Regex.Base.Impl(polymatch,polymatchM)
25 | import Text.Regex.TDFA.ReadRegex(parseRegex)
26 | import Text.Regex.TDFA.String() -- piggyback on RegexMaker for String
27 | import Text.Regex.TDFA.TDFA(patternToRegex)
28 | import Text.Regex.TDFA.Common(Regex(..),CompOption,ExecOption(captureGroups))
29 | 
30 | import Data.Maybe(listToMaybe)
31 | import Text.Regex.TDFA.NewDFA.Engine(execMatch)
32 | import Text.Regex.TDFA.NewDFA.Tester as Tester(matchTest)
33 | 
34 | instance RegexContext Regex B.ByteString B.ByteString where
35 |   match = polymatch
36 |   matchM = polymatchM
37 | 
38 | instance RegexMaker Regex CompOption ExecOption B.ByteString where
39 |   makeRegexOptsM c e source = makeRegexOptsM c e (B.unpack source)
40 | 
41 | instance RegexLike Regex B.ByteString where
42 |   matchOnce r s = listToMaybe (matchAll r s)
43 |   matchAll r s = execMatch r 0 '\n' s
44 |   matchCount r s = length (matchAll r' s)
45 |     where r' = r { regex_execOptions = (regex_execOptions r) {captureGroups = False} }
46 |   matchTest = Tester.matchTest
47 |   matchOnceText regex source =
48 |     fmap (\ma -> let (o,l) = ma!0
49 |                  in (B.take o source
50 |                     ,fmap (\ol@(off,len) -> (B.take len (B.drop off source),ol)) ma
51 |                     ,B.drop (o+l) source))
52 |          (matchOnce regex source)
53 |   matchAllText regex source =
54 |     map (fmap (\ol@(off,len) -> (B.take len (B.drop off source),ol)))
55 |         (matchAll regex source)
56 | 
57 | compile :: CompOption -- ^ Flags (summed together)
58 |         -> ExecOption -- ^ Flags (summed together)
59 |         -> B.ByteString -- ^ The regular expression to compile
60 |         -> Either String Regex -- ^ Returns: the compiled regular expression
61 | compile compOpt execOpt bs =
62 |   case parseRegex (B.unpack bs) of
63 |     Left err -> Left ("parseRegex for Text.Regex.TDFA.ByteString failed:"++show err)
64 |     Right pattern -> Right (patternToRegex pattern compOpt execOpt)
65 | 
66 | execute :: Regex        -- ^ Compiled regular expression
67 |         -> B.ByteString -- ^ ByteString to match against
68 |         -> Either String (Maybe MatchArray)
69 | execute r bs = Right (matchOnce r bs)
70 | 
71 | regexec :: Regex        -- ^ Compiled regular expression
72 |         -> B.ByteString -- ^ ByteString to match against
73 |         -> Either String (Maybe (B.ByteString, B.ByteString, B.ByteString, [B.ByteString]))
74 | regexec r txt = Right $
75 |   case matchOnceText r txt of
76 |     Just (pre, mt, post) | main:rest <- map fst (elems mt)
77 |       -> Just (pre, main, post, rest)
78 |     _ -> Nothing
79 | 


--------------------------------------------------------------------------------
/lib/Text/Regex/TDFA/ByteString/Lazy.hs:
--------------------------------------------------------------------------------
 1 | {-|
 2 | This modules provides 'RegexMaker' and 'RegexLike' instances for using
 3 | @ByteString@ with the DFA backend ("Text.Regex.Lib.WrapDFAEngine" and
 4 | "Text.Regex.Lazy.DFAEngineFPS").  This module is usually used via
 5 | import "Text.Regex.TDFA".
 6 | 
 7 | This exports instances of the high level API and the medium level
 8 | API of 'compile','execute', and 'regexec'.
 9 | -}
10 | module Text.Regex.TDFA.ByteString.Lazy(
11 |   Regex
12 |  ,CompOption
13 |  ,ExecOption
14 |  ,compile
15 |  ,execute
16 |  ,regexec
17 |  ) where
18 | 
19 | import Data.Array.IArray((!),elems,amap)
20 | import qualified Data.ByteString.Lazy.Char8 as L(ByteString,take,drop,unpack)
21 | 
22 | import Text.Regex.Base(MatchArray,RegexContext(..),RegexMaker(..),RegexLike(..))
23 | import Text.Regex.Base.Impl(polymatch,polymatchM)
24 | import Text.Regex.TDFA.ReadRegex(parseRegex)
25 | import Text.Regex.TDFA.String() -- piggyback on RegexMaker for String
26 | import Text.Regex.TDFA.TDFA(patternToRegex)
27 | import Text.Regex.TDFA.Common(Regex(..),CompOption,ExecOption(captureGroups))
28 | 
29 | import Data.Maybe(listToMaybe)
30 | import Text.Regex.TDFA.NewDFA.Engine(execMatch)
31 | import Text.Regex.TDFA.NewDFA.Tester as Tester(matchTest)
32 | 
33 | {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -}
34 | 
35 | instance RegexContext Regex L.ByteString L.ByteString where
36 |   match = polymatch
37 |   matchM = polymatchM
38 | 
39 | instance RegexMaker Regex CompOption ExecOption L.ByteString where
40 |   makeRegexOptsM c e source = makeRegexOptsM c e (L.unpack source)
41 | 
42 | instance RegexLike Regex L.ByteString where
43 |   matchOnce r s = listToMaybe (matchAll r s)
44 |   matchAll r s = execMatch r 0 '\n' s
45 |   matchCount r s = length (matchAll r' s)
46 |     where r' = r { regex_execOptions = (regex_execOptions r) {captureGroups = False} }
47 |   matchTest = Tester.matchTest
48 |   matchOnceText regex source =
49 |     fmap (\ma ->
50 |             let (o32,l32) = ma!0
51 |                 o = fi o32
52 |                 l = fi l32
53 |             in (L.take o source
54 |                ,fmap (\ol@(off32,len32) ->
55 |                         let off = fi off32
56 |                             len = fi len32
57 |                         in (L.take len (L.drop off source),ol)) ma
58 |                ,L.drop (o+l) source))
59 |          (matchOnce regex source)
60 |   matchAllText regex source =
61 |     let go i _ _ | i `seq` False = undefined
62 |         go _i _t [] = []
63 |         go i t (x:xs) =
64 |           let (off0,len0) = x!0
65 |               trans pair@(off32,len32) = (L.take (fi len32) (L.drop (fi (off32-i)) t),pair)
66 |               t' = L.drop (fi (off0+len0-i)) t
67 |           in amap trans x : seq t' (go (off0+len0) t' xs)
68 |     in go 0 source (matchAll regex source)
69 | 
70 | fi :: (Integral a, Num b) => a -> b
71 | fi = fromIntegral
72 | 
73 | compile :: CompOption -- ^ Flags (summed together)
74 |         -> ExecOption -- ^ Flags (summed together)
75 |         -> L.ByteString -- ^ The regular expression to compile
76 |         -> Either String Regex -- ^ Returns: the compiled regular expression
77 | compile compOpt execOpt bs =
78 |   case parseRegex (L.unpack bs) of
79 |     Left err -> Left ("parseRegex for Text.Regex.TDFA.ByteString failed:"++show err)
80 |     Right pattern -> Right (patternToRegex pattern compOpt execOpt)
81 | 
82 | execute :: Regex        -- ^ Compiled regular expression
83 |         -> L.ByteString -- ^ ByteString to match against
84 |         -> Either String (Maybe MatchArray)
85 | execute r bs = Right (matchOnce r bs)
86 | 
87 | regexec :: Regex        -- ^ Compiled regular expression
88 |         -> L.ByteString -- ^ ByteString to match against
89 |         -> Either String (Maybe (L.ByteString, L.ByteString, L.ByteString, [L.ByteString]))
90 | regexec r txt = Right $
91 |   case matchOnceText r txt of
92 |     Just (pre, mt, post) | main:rest <- map fst (elems mt)
93 |       -> Just (pre, main, post, rest)
94 |     _ -> Nothing
95 | 


--------------------------------------------------------------------------------
/lib/Text/Regex/TDFA/Common.hs:
--------------------------------------------------------------------------------
  1 | {-# OPTIONS -funbox-strict-fields #-}
  2 | 
  3 | -- | Common provides simple functions to the backend.
  4 | -- It defines most of the data types.
  5 | -- All modules should call 'error' via the 'common_error' function below.
  6 | 
  7 | module Text.Regex.TDFA.Common where
  8 | 
  9 | import Text.Regex.Base(RegexOptions(..))
 10 | 
 11 | {- By Chris Kuklewicz, 2007-2009. BSD License, see the LICENSE file. -}
 12 | import Data.Array.IArray(Array)
 13 | import Data.IntSet.EnumSet2(EnumSet)
 14 | import qualified Data.IntSet.EnumSet2 as Set(toList)
 15 | import Data.IntMap.CharMap2(CharMap(..))
 16 | import Data.IntMap (IntMap)
 17 | import qualified Data.IntMap as IMap (findWithDefault,assocs,toList,null,size,toAscList)
 18 | import Data.IntSet(IntSet)
 19 | import qualified Data.IntMap.CharMap2 as Map (assocs,toAscList,null)
 20 | import Data.Sequence as S(Seq)
 21 | --import Debug.Trace
 22 | 
 23 | import Text.Regex.TDFA.IntArrTrieSet(TrieSet)
 24 | 
 25 | {-# INLINE look #-}
 26 | look :: Int -> IntMap a -> a
 27 | look key imap = IMap.findWithDefault (common_error "Text.Regex.DFA.Common" ("key "++show key++" not found in look")) key imap
 28 | 
 29 | common_error :: String -> String -> a
 30 | common_error moduleName message =
 31 |   error ("Explicit error in module "++moduleName++" : "++message)
 32 | 
 33 | on :: (t1 -> t1 -> t2) -> (t -> t1) -> t -> t -> t2
 34 | f `on` g = (\x y -> (g x) `f` (g y))
 35 | 
 36 | -- | After 'sort' or 'sortBy' the use of 'nub' or 'nubBy' can be replaced by 'norep' or 'norepBy'.
 37 | norep :: (Eq a) => [a]->[a]
 38 | norep [] = []
 39 | norep x@[_] = x
 40 | norep (a:bs@(c:cs)) | a==c = norep (a:cs)
 41 |                     | otherwise = a:norep bs
 42 | 
 43 | -- | After 'sort' or 'sortBy' the use of 'nub' or 'nubBy' can be replaced by 'norep' or 'norepBy'.
 44 | norepBy :: (a -> a -> Bool) -> [a] -> [a]
 45 | norepBy _ [] = []
 46 | norepBy _ x@[_] = x
 47 | norepBy eqF (a:bs@(c:cs)) | a `eqF` c = norepBy eqF (a:cs)
 48 |                           | otherwise = a:norepBy eqF bs
 49 | 
 50 | mapFst :: (Functor f) => (t -> t2) -> f (t, t1) -> f (t2, t1)
 51 | mapFst f = fmap (\ (a,b) -> (f a,b))
 52 | 
 53 | mapSnd :: (Functor f) => (t1 -> t2) -> f (t, t1) -> f (t, t2)
 54 | mapSnd f = fmap (\ (a,b) -> (a,f b))
 55 | 
 56 | fst3 :: (a,b,c) -> a
 57 | fst3 (x,_,_) = x
 58 | 
 59 | snd3 :: (a,b,c) -> b
 60 | snd3 (_,x,_) = x
 61 | 
 62 | thd3 :: (a,b,c) -> c
 63 | thd3 (_,_,x) = x
 64 | 
 65 | flipOrder :: Ordering -> Ordering
 66 | flipOrder GT = LT
 67 | flipOrder LT = GT
 68 | flipOrder EQ = EQ
 69 | 
 70 | noWin :: WinTags -> Bool
 71 | noWin = null
 72 | 
 73 | -- | Used to track elements of the pattern that accept characters or are anchors.
 74 | newtype DoPa = DoPa {dopaIndex :: Int} deriving (Eq,Ord)
 75 | 
 76 | instance Enum DoPa where
 77 |   toEnum = DoPa
 78 |   fromEnum = dopaIndex
 79 | 
 80 | instance Show DoPa where
 81 |   showsPrec p (DoPa {dopaIndex=i}) = ('#':) . showsPrec p i
 82 | 
 83 | -- | Control whether the pattern is multiline or case-sensitive like Text.Regex and whether to
 84 | -- capture the subgroups (\\1, \\2, etc).  Controls enabling extra anchor syntax.
 85 | data CompOption = CompOption {
 86 |     caseSensitive :: Bool
 87 |       -- ^ True in 'blankCompOpt' and 'defaultCompOpt'.
 88 |   , multiline :: Bool
 89 |       -- ^ False in 'blankCompOpt', True in 'defaultCompOpt'.
 90 |       -- Compile for newline-sensitive matching.
 91 |       --
 92 |       -- From [regexp man page](https://www.tcl.tk/man/tcl8.4/TclCmd/regexp.html#M8):
 93 |       -- "By default, newline is a completely ordinary character with no special meaning in either REs or strings.
 94 |       -- With this flag, inverted bracket expressions @[^@ and @.@ never match newline,
 95 |       -- a @^@ anchor matches the null string after any newline in the string in addition to its normal function,
 96 |       -- and the @$@ anchor matches the null string before any newline in the string in addition to its normal function."
 97 |   , rightAssoc :: Bool
 98 |       -- ^ True (and therefore right associative) in 'blankCompOpt' and 'defaultCompOpt'.
 99 |   , newSyntax :: Bool
100 |       -- ^ False in 'blankCompOpt', True in 'defaultCompOpt'.
101 |       -- Enables the extended non-POSIX syntax described in "Text.Regex.TDFA" haddock documentation.
102 |   , lastStarGreedy ::  Bool
103 |       -- ^ False by default.  This is POSIX correct but it takes space and is slower.
104 |       -- Setting this to True will improve performance, and should be done
105 |       -- if you plan to set the 'captureGroups' 'ExecOption' to False.
106 |   } deriving (Read,Show)
107 | 
108 | data ExecOption = ExecOption {
109 |     captureGroups :: Bool    -- ^ True by default.  Set to False to improve speed (and space).
110 |   } deriving (Read,Show)
111 | 
112 | -- | Used by implementation to name certain 'Position's during
113 | -- matching. Identity of 'Position' tag to set during a transition.
114 | type Tag = Int
115 | 
116 | -- | Internal use to indicate type of tag and preference for larger or smaller 'Position's.
117 | data OP = Maximize | Minimize | Orbit | Ignore deriving (Eq,Show)
118 | 
119 | -- | Internal NFA node identity number.
120 | type Index = Int
121 | 
122 | -- | Internal DFA identity is this 'Set' of NFA 'Index'.
123 | type SetIndex = IntSet {- Index -}
124 | 
125 | -- | Index into the text being searched.
126 | type Position = Int
127 | 
128 | -- | 'GroupIndex' is for indexing submatches from capturing parenthesized groups ('PGroup' or 'Group').
129 | type GroupIndex = Int
130 | 
131 | -- | 'GroupInfo' collects the parent and tag information for an instance of a group.
132 | data GroupInfo = GroupInfo {
133 |     thisIndex, parentIndex :: GroupIndex
134 |   , startTag, stopTag, flagTag :: Tag
135 |   } deriving Show
136 | 
137 | -- | The TDFA backend specific 'Regex' type, used by this module's 'RegexOptions' and 'RegexMaker'.
138 | data Regex = Regex {
139 |     regex_dfa :: DFA                             -- ^ starting DFA state
140 |   , regex_init :: Index                          -- ^ index of starting state
141 |   , regex_b_index :: (Index,Index)               -- ^ indexes of smallest and largest states
142 |   , regex_b_tags :: (Tag,Tag)                    -- ^ indexes of smallest and largest tags
143 |   , regex_trie :: TrieSet DFA                    -- ^ All DFA states
144 |   , regex_tags :: Array Tag OP                   -- ^ information about each tag
145 |   , regex_groups :: Array GroupIndex [GroupInfo] -- ^ information about each group
146 |   , regex_isFrontAnchored :: Bool                -- ^ used for optimizing execution
147 |   , regex_compOptions :: CompOption
148 |   , regex_execOptions :: ExecOption
149 |   } -- no deriving at all, the DFA may be too big to ever traverse!
150 | 
151 | 
152 | instance RegexOptions Regex CompOption ExecOption where
153 |   blankCompOpt =  CompOption { caseSensitive = True
154 |                              , multiline = False
155 |                              , rightAssoc = True
156 |                              , newSyntax = False
157 |                              , lastStarGreedy = False
158 |                              }
159 |   blankExecOpt =  ExecOption { captureGroups = True }
160 |   defaultCompOpt = CompOption { caseSensitive = True
161 |                               , multiline = True
162 |                               , rightAssoc = True
163 |                               , newSyntax = True
164 |                               , lastStarGreedy = False
165 |                               }
166 |   defaultExecOpt =  ExecOption { captureGroups = True }
167 |   setExecOpts e r = r {regex_execOptions=e}
168 |   getExecOpts r = regex_execOptions r
169 | 
170 | 
171 | data WinEmpty = WinEmpty Instructions
172 |               | WinTest WhichTest (Maybe WinEmpty) (Maybe WinEmpty)
173 |   deriving Show
174 | 
175 | -- | Internal NFA node type.
176 | data QNFA = QNFA {q_id :: Index, q_qt :: QT}
177 | 
178 | -- | Internal to 'QNFA' type.
179 | data QT = Simple { qt_win :: WinTags -- ^ empty transitions to the virtual winning state
180 |                  , qt_trans :: CharMap QTrans -- ^ all ways to leave this QNFA to other or the same QNFA
181 |                  , qt_other :: QTrans -- ^ default ways to leave this QNFA to other or the same QNFA
182 |                  }
183 |         | Testing { qt_test :: WhichTest -- ^ The test to perform
184 |                   , qt_dopas :: EnumSet DoPa  -- ^ location(s) of the anchor(s) in the original regexp
185 |                   , qt_a, qt_b :: QT -- ^ use qt_a if test is True, else use qt_b
186 |                   }
187 | 
188 | -- | Internal type to represent the tagged transition from one QNFA to
189 | -- another (or itself).  The key is the Index of the destination QNFA.
190 | type QTrans = IntMap {- Destination Index -} [TagCommand]
191 | 
192 | -- | Known predicates, just Beginning of Line (^) and End of Line ($).
193 | -- Also support for GNU extensions is being added: \\\` beginning of
194 | -- buffer, \\\' end of buffer, \\\< and \\\> for begin and end of words, \\b
195 | -- and \\B for word boundary and not word boundary.
196 | data WhichTest
197 |   = Test_BOL          -- ^ @^@ (affected by multiline option)
198 |   | Test_EOL          -- ^ @$@ (affected by multiline option)
199 |   | Test_BOB          -- ^ @\\`@ beginning of buffer
200 |   | Test_EOB          -- ^ @\\'@ end ofbuffer
201 |   | Test_BOW          -- ^ @\\<@ beginning of word
202 |   | Test_EOW          -- ^ @\\>@ end of word
203 |   | Test_EdgeWord     -- ^ @\\b@ word boundary
204 |   | Test_NotEdgeWord  -- ^ @\\B@ not word boundary
205 |   deriving (Show,Eq,Ord,Enum)
206 | 
207 | -- | The things that can be done with a Tag.  'TagTask' and
208 | -- 'ResetGroupStopTask' are for tags with Maximize or Minimize OP
209 | -- values.  'ResetOrbitTask' and 'EnterOrbitTask' and 'LeaveOrbitTask' are
210 | -- for tags with Orbit OP value.
211 | data TagTask = TagTask | ResetGroupStopTask | SetGroupStopTask
212 |              | ResetOrbitTask | EnterOrbitTask | LeaveOrbitTask deriving (Show,Eq)
213 | 
214 | -- | Ordered list of tags and their associated Task.
215 | type TagTasks = [(Tag,TagTask)]
216 | 
217 | -- | When attached to a QTrans the TagTask can be done before or after
218 | -- accepting the character.
219 | data TagUpdate = PreUpdate TagTask | PostUpdate TagTask deriving (Show,Eq)
220 | 
221 | -- | Ordered list of tags and their associated update operation.
222 | type TagList = [(Tag,TagUpdate)]
223 | 
224 | -- | A TagList and the location of the item in the original pattern
225 | -- that is being accepted.
226 | type TagCommand = (DoPa,TagList)
227 | 
228 | -- | Ordered list of tags and their associated update operation to
229 | -- perform on an empty transition to the virtual winning state.
230 | type WinTags = TagList
231 | 
232 | -- | Internal DFA node, identified by the Set of indices of the QNFA
233 | -- nodes it represents.
234 | data DFA = DFA { d_id :: SetIndex, d_dt :: DT } deriving(Show)
235 | data Transition = Transition { trans_many :: DFA    -- ^ where to go (maximal), including respawning
236 |                              , trans_single :: DFA  -- ^ where to go, not including respawning
237 |                              , trans_how :: DTrans    -- ^ how to go, including respawning
238 |                              }
239 | -- | Internal to the DFA node
240 | data DT = Simple' { dt_win :: IntMap {- Source Index -} Instructions -- ^ Actions to perform to win
241 |                   , dt_trans :: CharMap Transition -- ^ Transition to accept Char
242 |                   , dt_other :: Transition -- ^ default accepting transition
243 |                   }
244 |         | Testing' { dt_test :: WhichTest -- ^ The test to perform
245 |                    , dt_dopas :: EnumSet DoPa -- ^ location(s) of the anchor(s) in the original regexp
246 |                    , dt_a,dt_b :: DT      -- ^ use dt_a if test is True else use dt_b
247 |                    }
248 | 
249 | -- | Internal type to represent the commands for the tagged transition.
250 | -- The outer 'IntMap' is for the destination Index and the inner 'IntMap'
251 | -- is for the Source Index.  This is convenient since all runtime data
252 | -- going to the same destination must be compared to find the best.
253 | --
254 | -- A Destination 'IntMap' entry may have an empty Source 'IntMap' if and
255 | -- only if the destination is the starting index and the NFA or DFA.
256 | -- This instructs the matching engine to spawn a new entry starting at
257 | -- the post-update position.
258 | type DTrans = IntMap {- Index of Destination -} (IntMap {- Index of Source -} (DoPa,Instructions))
259 | -- type DTrans = IntMap {- Index of Destination -} (IntMap {- Index of Source -} (DoPa,RunState ()))
260 | 
261 | -- | Internal convenience type for the text display code.
262 | type DTrans' = [(Index, [(Index, (DoPa, ([(Tag, (Position,Bool))],[String])))])]
263 | 
264 | -- | Positions for which a @*@ was re-started while looping.  Need to
265 | -- append locations at back but compare starting with front, so use
266 | -- 'Seq' as a queue.  The initial position is saved in 'basePos' (and a
267 | -- Maximize Tag), the middle positions in the 'Seq', and the final
268 | -- position is NOT saved in the Orbits (only in a Maximize Tag).
269 | data Orbits = Orbits
270 |   { inOrbit :: !Bool        -- True if enterOrbit, False if LeaveOrbit
271 |   , basePos :: Position
272 |   , ordinal :: (Maybe Int)
273 |   , getOrbits :: !(Seq Position)
274 |   } deriving (Show)
275 | 
276 | -- | The 'newPos' and 'newFlags' lists in Instructions are sorted by, and unique in, the Tag values
277 | data Instructions = Instructions
278 |   { newPos :: ![(Tag,Action)] -- False is preUpdate, True is postUpdate (there are no Orbit tags here) -- 2009 : Change to enum from bool?
279 |   , newOrbits :: !(Maybe (Position -> OrbitTransformer))
280 |   }
281 | 
282 | instance Show Instructions where
283 |   showsPrec p (Instructions pos _)
284 |     = showParen (p >= 11) $
285 |         showString "Instructions {" .
286 |         showString "newPos = " .
287 |         showsPrec 0 pos .
288 |         showString ", " .
289 |         showString "newOrbits = " .
290 |         showString "<function>" .
291 |         showString "}"
292 | 
293 | data Action = SetPre | SetPost | SetVal Int deriving (Show,Eq)
294 | type OrbitTransformer = OrbitLog -> OrbitLog
295 | type OrbitLog = IntMap Orbits
296 | 
297 | instance Show QNFA where
298 |   show (QNFA {q_id = i, q_qt = qt}) = "QNFA {q_id = "++show i
299 |                                   ++"\n     ,q_qt = "++ show qt
300 |                                   ++"\n}"
301 | 
302 | instance Show QT where
303 |   show = showQT
304 | 
305 | showQT :: QT -> String
306 | showQT (Simple win trans other) = "{qt_win=" ++ show win
307 |                              ++ "\n, qt_trans=" ++ show (foo trans)
308 |                              ++ "\n, qt_other=" ++ show (foo' other) ++ "}"
309 |   where foo :: CharMap QTrans -> [(Char,[(Index,[TagCommand])])]
310 |         foo = mapSnd foo' . Map.toAscList
311 |         foo' :: QTrans -> [(Index,[TagCommand])]
312 |         foo' = IMap.toList
313 | showQT (Testing test dopas a b) = "{Testing "++show test++" "++show (Set.toList dopas)
314 |                               ++"\n"++indent' a
315 |                               ++"\n"++indent' b++"}"
316 |     where indent' = init . unlines . map (spaces++) . lines . showQT
317 |           spaces = replicate 9 ' '
318 | 
319 | instance Show DT where show = showDT
320 | 
321 | indent :: [String] -> String
322 | indent = unlines . map (\x -> ' ':' ':x)
323 | 
324 | showDT :: DT -> String
325 | showDT (Simple' w t o) =
326 |        "Simple' { dt_win = " ++ seeWin1
327 |   ++ "\n        , dt_trans = " ++ seeTrans1
328 |   ++ "\n        , dt_other = " ++ seeOther1 o
329 |   ++ "\n        }"
330 |  where
331 |   seeWin1 | IMap.null w = "No win"
332 |           | otherwise = indent . map show . IMap.assocs $ w
333 | 
334 |   seeTrans1 :: String
335 |   seeTrans1 | Map.null t = "No (Char,Transition)"
336 |             | otherwise = ('\n':) . indent $
337 |      map (\(char,Transition {trans_many=dfa,trans_single=dfa2,trans_how=dtrans}) ->
338 |                            concat ["("
339 |                                   ,show char
340 |                                   ,", MANY "
341 |                                   ,show (d_id dfa)
342 |                                   ,", SINGLE "
343 |                                   ,show (d_id dfa2)
344 |                                   ,", \n"
345 |                                   ,seeDTrans dtrans
346 |                                   ,")"]) (Map.assocs t)
347 | 
348 |   seeOther1 (Transition {trans_many=dfa,trans_single=dfa2,trans_how=dtrans}) =
349 |     concat ["(MANY "
350 |            ,show (d_id dfa)
351 |            ,", SINGLE "
352 |            ,show (d_id dfa2)
353 |            ,", \n"
354 |            ,seeDTrans dtrans
355 |            ,")"]
356 | 
357 | showDT (Testing' wt d a b) = "Testing' { dt_test = " ++ show wt
358 |                           ++ "\n         , dt_dopas = " ++ show d
359 |                           ++ "\n         , dt_a = " ++ indent' a
360 |                           ++ "\n         , dt_b = " ++ indent' b
361 |                           ++ "\n         }"
362 |  where indent' = init . unlines . (\s -> case s of
363 |                                            [] -> []
364 |                                            (h:t) -> h : (map (spaces ++) t)) . lines . showDT
365 |        spaces = replicate 10 ' '
366 | 
367 | 
368 | seeDTrans :: DTrans -> String
369 | --seeDTrans x = concatMap (\(dest,y) -> unlines . map (\(source,ins) -> show (dest,source,ins) ) . IMap.assocs $ y) (IMap.assocs x)
370 | seeDTrans x | IMap.null x = "No DTrans"
371 | seeDTrans x = concatMap seeSource (IMap.assocs x)
372 |   where seeSource (dest,srcMap) | IMap.null srcMap = indent [show (dest,"SPAWN")]
373 |                                 | otherwise = indent . map (\(source,ins) -> show (dest,source,ins) ) . IMap.assocs $ srcMap
374 | --        spawnIns = Instructions { newPos = [(0,SetPost)], newOrbits = Nothing }
375 | 
376 | 
377 | instance Eq QT where
378 |   t1@(Testing {}) == t2@(Testing {}) =
379 |     (qt_test t1) == (qt_test t2) && (qt_a t1) == (qt_a t2) && (qt_b t1) == (qt_b t2)
380 |   (Simple w1 (CharMap t1) o1) == (Simple w2 (CharMap t2) o2) =
381 |     w1 == w2 && eqTrans && eqQTrans o1 o2
382 |     where eqTrans :: Bool
383 |           eqTrans = (IMap.size t1 == IMap.size t2)
384 |                     && and (zipWith together (IMap.toAscList t1) (IMap.toAscList t2))
385 |             where together (c1,qtrans1) (c2,qtrans2) = (c1 == c2) && eqQTrans qtrans1 qtrans2
386 |           eqQTrans :: QTrans -> QTrans -> Bool
387 |           eqQTrans = (==)
388 |   _ == _ = False
389 | 


--------------------------------------------------------------------------------
/lib/Text/Regex/TDFA/IntArrTrieSet.hs:
--------------------------------------------------------------------------------
 1 | {- |
 2 | This creates a lazy Trie based on a finite range of Ints and is used to
 3 | memorize a function over the subsets of this range.
 4 | 
 5 | To create a Trie you need two supply 2 things
 6 |   * Range of keys to bound
 7 |   * A function or functions used to construct the value for a subset of keys
 8 | 
 9 | The Trie uses the Array type internally.
10 | -}
11 | module Text.Regex.TDFA.IntArrTrieSet where
12 | 
13 | {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -}
14 | 
15 | import Data.Array.IArray(Array,(!),listArray)
16 | 
17 | data TrieSet v = TrieSet { value :: v
18 |                          , next :: Array Int (TrieSet v) }
19 | 
20 | -- | This is the accessor for the Trie. The list of keys should be
21 | -- sorted.
22 | lookupAsc :: TrieSet v -> [Int] -> v
23 | lookupAsc (TrieSet {value=v,next=n}) =
24 |   (\keys -> case keys of [] -> v
25 |                          (key:keys') -> lookupAsc (n!key) keys')
26 | 
27 | -- | This is a Trie constructor for a complete range of keys.
28 | fromBounds :: (Int,Int)     -- ^ (lower,upper) range of keys, lower<=upper
29 |            -> ([Int] -> v)  -- ^ Function from list of keys to its value.
30 |                             --   It must work for distinct ascending lists.
31 |            -> TrieSet v     -- ^ The constructed Trie
32 | fromBounds (start,stop) keysToValue = build id start where
33 |   build keys low = TrieSet { value = keysToValue (keys [])
34 |                            , next = listArray (low,stop)
35 |                                     [build (keys.(x:)) (succ x) | x <- [low..stop] ] }
36 | 
37 | -- | This is a Trie constructor for a complete range of keys that uses
38 | -- a function from single values and a merge operation on values to
39 | -- fill the Trie.
40 | fromSinglesMerge :: v          -- ^ value for (lookupAsc trie [])
41 |                  -> (v->v->v)  -- ^ merge operation on values
42 |                  -> (Int,Int)  -- ^ (lower,upper) range of keys, lower<=upper
43 |                  -> (Int->v)   -- ^ Function from a single key to its value
44 |                  -> TrieSet v  -- ^ The constructed Trie
45 | fromSinglesMerge emptyValue mergeValues bound keyToValue = trieSet where
46 |   trieSet = fromBounds bound keysToValue'
47 |   keysToValue' keys =
48 |     case keys of
49 |       [] -> emptyValue
50 |       [key] -> keyToValue key
51 |       _ -> mergeValues (keysToValue (init keys)) (keysToValue [last keys])
52 |   keysToValue = lookupAsc trieSet
53 | 
54 | -- | This is a Trie constructor for a complete range of keys that uses
55 | -- a function from single values and a sum operation of values to fill
56 | -- the Trie.
57 | fromSinglesSum :: ([v]->v)   -- ^ summation operation for values
58 |                -> (Int,Int)  -- ^ (lower,upper) range of keys, lower <= upper
59 |                -> (Int->v)   -- ^ Function from a single key to its value
60 |                -> TrieSet v  -- ^ The constructed Trie
61 | fromSinglesSum mergeValues bound keyToValue = trieSet where
62 |   trieSet = fromBounds bound keysToValue'
63 |   keysToValue' = mergeValues . map keyToValue
64 | 


--------------------------------------------------------------------------------
/lib/Text/Regex/TDFA/NewDFA/Engine_NC.hs:
--------------------------------------------------------------------------------
  1 | -- | This is the non-capturing form of Text.Regex.TDFA.NewDFA.String
  2 | module Text.Regex.TDFA.NewDFA.Engine_NC(execMatch) where
  3 | 
  4 | import Control.Monad(when,join,filterM)
  5 | import Data.Array.Base(unsafeRead,unsafeWrite)
  6 | import Prelude hiding ((!!))
  7 | 
  8 | import Data.Array.MArray(MArray(..))
  9 | import Data.Array.Unsafe(unsafeFreeze)
 10 | import Data.Array.IArray(Ix)
 11 | import Data.Array.ST(STArray,STUArray)
 12 | import qualified Data.IntMap.CharMap2 as CMap(findWithDefault)
 13 | import qualified Data.IntMap as IMap(null,toList,keys,member)
 14 | import qualified Data.IntSet as ISet(toAscList)
 15 | import Data.STRef(STRef,newSTRef,readSTRef,writeSTRef)
 16 | import qualified Control.Monad.ST.Lazy as L(runST,strictToLazyST)
 17 | import qualified Control.Monad.ST.Strict as S(ST)
 18 | import Data.Sequence(Seq)
 19 | import qualified Data.ByteString.Char8 as SBS(ByteString)
 20 | import qualified Data.ByteString.Lazy.Char8 as LBS(ByteString)
 21 | 
 22 | import Text.Regex.Base(MatchArray,MatchOffset,MatchLength)
 23 | import qualified Text.Regex.TDFA.IntArrTrieSet as Trie(lookupAsc)
 24 | import Text.Regex.TDFA.Common hiding (indent)
 25 | import Text.Regex.TDFA.NewDFA.Uncons(Uncons(uncons))
 26 | import Text.Regex.TDFA.NewDFA.MakeTest(test_singleline,test_multiline)
 27 | 
 28 | -- import Debug.Trace
 29 | 
 30 | -- trace :: String -> a -> a
 31 | -- trace _ a = a
 32 | 
 33 | err :: String -> a
 34 | err s = common_error "Text.Regex.TDFA.NewDFA.Engine_NC"  s
 35 | 
 36 | {-# INLINE (!!) #-}
 37 | (!!) :: (MArray a e (S.ST s),Ix i) => a i e -> Int -> S.ST s e
 38 | (!!) = unsafeRead
 39 | {-# INLINE set #-}
 40 | set :: (MArray a e (S.ST s),Ix i) => a i e -> Int -> e -> S.ST s ()
 41 | set = unsafeWrite
 42 | 
 43 | {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> ([] Char) -> [MatchArray] #-}
 44 | {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> (Seq Char) -> [MatchArray] #-}
 45 | {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> SBS.ByteString -> [MatchArray] #-}
 46 | {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> LBS.ByteString -> [MatchArray] #-}
 47 | execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray]
 48 | execMatch (Regex { regex_dfa = (DFA {d_id=didIn,d_dt=dtIn})
 49 |                  , regex_init = startState
 50 |                  , regex_b_index = b_index
 51 |                  , regex_trie = trie
 52 |                  , regex_compOptions = CompOption { multiline = newline } } )
 53 |           offsetIn prevIn inputIn = L.runST runCaptureGroup where
 54 | 
 55 |   !test = mkTest newline
 56 | 
 57 |   runCaptureGroup = {-# SCC "runCaptureGroup" #-} do
 58 |     obtainNext <- L.strictToLazyST constructNewEngine
 59 |     let loop = do vals <- L.strictToLazyST obtainNext
 60 |                   if null vals -- force vals before defining valsRest
 61 |                     then return []
 62 |                     else do valsRest <- loop
 63 |                             return (vals ++ valsRest)
 64 |     loop
 65 | 
 66 |   constructNewEngine :: S.ST s (S.ST s [MatchArray])
 67 |   constructNewEngine =  {-# SCC "constructNewEngine" #-} do
 68 |     storeNext <- newSTRef undefined
 69 |     writeSTRef storeNext (goNext storeNext)
 70 |     let obtainNext = join (readSTRef storeNext)
 71 |     return obtainNext
 72 | 
 73 |   goNext storeNext = {-# SCC "goNext" #-} do
 74 |     (SScratch s1In s2In winQ) <- newScratch b_index
 75 |     set s1In startState offsetIn
 76 |     writeSTRef storeNext (err "obtainNext called while goNext is running!")
 77 |     eliminatedStateFlag <- newSTRef False
 78 |     let next s1 s2 did dt offset prev input = {-# SCC "goNext.next" #-}
 79 |           case dt of
 80 |             Testing' {dt_test=wt,dt_a=a,dt_b=b} ->
 81 |               if test wt offset prev input
 82 |                 then next s1 s2 did a offset prev input
 83 |                 else next s1 s2 did b offset prev input
 84 |             Simple' {dt_win=w,dt_trans=t, dt_other=o}
 85 |               | IMap.null w ->
 86 |                   case uncons input of
 87 |                     Nothing -> finalizeWinners
 88 |                     Just (c,input') -> do
 89 |                       case CMap.findWithDefault o c t of
 90 |                         Transition {trans_many=DFA {d_id=did',d_dt=dt'},trans_how=dtrans} ->
 91 |                           findTrans s1 s2 did' dt' dtrans offset c input'
 92 |               | otherwise -> do
 93 |                   (did',dt') <- processWinner s1 did dt w offset
 94 |                   next' s1 s2 did' dt' offset prev input
 95 | 
 96 |         next' s1 s2 did dt offset prev input = {-# SCC "goNext'.next" #-}
 97 |           case dt of
 98 |             Testing' {dt_test=wt,dt_a=a,dt_b=b} ->
 99 |               if test wt offset prev input
100 |                 then next' s1 s2 did a offset prev input
101 |                 else next' s1 s2 did b offset prev input
102 |             Simple' {dt_trans=t, dt_other=o} ->
103 |               case uncons input of
104 |                 Nothing -> finalizeWinners
105 |                 Just (c,input') -> do
106 |                   case CMap.findWithDefault o c t of
107 |                     Transition {trans_many=DFA {d_id=did',d_dt=dt'},trans_how=dtrans} ->
108 |                       findTrans s1 s2 did' dt' dtrans offset c input'
109 | 
110 |         findTrans s1 s2 did' dt' dtrans offset prev' input' =  {-# SCC "goNext.findTrans" #-} do
111 |           --
112 |           let findTransTo (destIndex,sources) = do
113 |                 val <- if IMap.null sources then return (succ offset)
114 |                          else return . minimum =<< mapM (s1 !!) (IMap.keys sources)
115 |                 set s2 destIndex val
116 |                 return val
117 |           earlyStart <- fmap minimum $ mapM findTransTo (IMap.toList dtrans)
118 |           --
119 |           earlyWin <- readSTRef (mq_earliest winQ)
120 |           if earlyWin < earlyStart
121 |             then do
122 |               winnersR <- getMQ earlyStart winQ
123 |               writeSTRef storeNext (next s2 s1 did' dt' (succ offset) prev' input')
124 |               mapM wsToGroup (reverse winnersR)
125 |             else do
126 |               let offset' = succ offset in seq offset' $ next s2 s1 did' dt' offset' prev' input'
127 | 
128 |         processWinner s1 did dt w offset = {-# SCC "goNext.newWinnerThenProceed" #-} do
129 |           let getStart (sourceIndex,_) = s1 !! sourceIndex
130 |           vals <- mapM getStart (IMap.toList w)
131 |           let low = minimum vals   -- perhaps a non-empty winner
132 |               high = maximum vals  -- perhaps an empty winner
133 |           if low < offset
134 |             then do
135 |               putMQ (WScratch low offset) winQ
136 |               when (high==offset || IMap.member startState w) $
137 |                 putMQ (WScratch offset offset) winQ
138 |               let keepState i1 = do
139 |                     startsAt <- s1 !! i1
140 |                     let keep = (startsAt <= low) || (offset <= startsAt)
141 |                     if keep
142 |                       then return True
143 |                       else if i1 == startState
144 |                              then {- check for additional empty winner -}
145 |                                   set s1 i1 (succ offset) >> return True
146 |                              else writeSTRef eliminatedStateFlag True >> return False
147 |               states' <- filterM keepState (ISet.toAscList did)
148 |               flag <- readSTRef eliminatedStateFlag
149 |               if flag
150 |                 then do
151 |                   writeSTRef eliminatedStateFlag False
152 |                   let DFA {d_id=did',d_dt=dt'} = Trie.lookupAsc trie states'
153 |                   return (did',dt')
154 |                 else do
155 |                   return (did,dt)
156 |             else do
157 |                -- offset == low == minimum vals == maximum vals == high; vals == [offset]
158 |                putMQ (WScratch offset offset) winQ
159 |                return (did,dt)
160 | 
161 |         finalizeWinners = do
162 |           winnersR <- readSTRef (mq_list winQ)
163 |           resetMQ winQ
164 |           writeSTRef storeNext (return [])
165 |           mapM wsToGroup (reverse winnersR)
166 | 
167 |     -- goNext then ends with the next statement
168 |     next s1In s2In didIn dtIn offsetIn prevIn inputIn
169 | 
170 | ----
171 | 
172 | {-# INLINE mkTest #-}
173 | mkTest :: Uncons text => Bool -> WhichTest -> Index -> Char -> text -> Bool
174 | mkTest isMultiline = if isMultiline then test_multiline else test_singleline
175 | 
176 | ----
177 | 
178 | {- MUTABLE WINNER QUEUE -}
179 | 
180 | data MQ s = MQ { mq_earliest :: !(STRef s Position)
181 |                , mq_list :: !(STRef s [WScratch])
182 |                }
183 | 
184 | newMQ :: S.ST s (MQ s)
185 | newMQ = do
186 |   earliest <- newSTRef maxBound
187 |   list <- newSTRef []
188 |   return (MQ earliest list)
189 | 
190 | resetMQ :: MQ s -> S.ST s ()
191 | resetMQ (MQ {mq_earliest=earliest,mq_list=list}) = do
192 |   writeSTRef earliest maxBound
193 |   writeSTRef list []
194 | 
195 | putMQ :: WScratch -> MQ s -> S.ST s ()
196 | putMQ ws@(WScratch {ws_start=start}) (MQ {mq_earliest=earliest,mq_list=list}) = do
197 |   startE <- readSTRef earliest
198 |   if start <= startE
199 |     then writeSTRef earliest start >> writeSTRef list [ws]
200 |     else do
201 |       old <- readSTRef list
202 |       let !rest = dropWhile (\ w -> start <= ws_start w) old
203 |           !new = ws : rest
204 |       writeSTRef list new
205 | 
206 | getMQ :: Position -> MQ s -> S.ST s [WScratch]
207 | getMQ pos (MQ {mq_earliest=earliest,mq_list=list}) = do
208 |   old <- readSTRef list
209 |   case span (\ w -> pos <= ws_start w) old of
210 |     ([],ans) -> do
211 |       writeSTRef earliest maxBound
212 |       writeSTRef list []
213 |       return ans
214 |     (new,ans) -> do
215 |       writeSTRef earliest (ws_start (last new))
216 |       writeSTRef list new
217 |       return ans
218 | 
219 | {- MUTABLE SCRATCH DATA STRUCTURES -}
220 | 
221 | data SScratch s = SScratch { _s_1 :: !(MScratch s)
222 |                            , _s_2 :: !(MScratch s)
223 |                            , _s_mq :: !(MQ s)
224 |                            }
225 | type MScratch s = STUArray s Index Position
226 | data WScratch = WScratch {ws_start,_ws_stop :: !Position}
227 |   deriving Show
228 | 
229 | {- DEBUGGING HELPERS -}
230 | {- CREATING INITIAL MUTABLE SCRATCH DATA STRUCTURES -}
231 | 
232 | {-# INLINE newA #-}
233 | newA :: (MArray (STUArray s) e (S.ST s)) => (Tag,Tag) -> e -> S.ST s (STUArray s Tag e)
234 | newA b_tags initial = newArray b_tags initial
235 | 
236 | newScratch :: (Index,Index) -> S.ST s (SScratch s)
237 | newScratch b_index = do
238 |   s1 <- newMScratch b_index
239 |   s2 <- newMScratch b_index
240 |   winQ <- newMQ
241 |   return (SScratch s1 s2 winQ)
242 | 
243 | newMScratch :: (Index,Index) -> S.ST s (MScratch s)
244 | newMScratch b_index = newA b_index (-1)
245 | 
246 | {- CONVERT WINNERS TO MATCHARRAY -}
247 | 
248 | wsToGroup :: WScratch -> S.ST s MatchArray
249 | wsToGroup (WScratch start stop) = do
250 |   ma <- newArray (0,0) (start,stop-start)  :: S.ST s (STArray s Int (MatchOffset,MatchLength))
251 |   unsafeFreeze ma
252 | 
253 | 


--------------------------------------------------------------------------------
/lib/Text/Regex/TDFA/NewDFA/Engine_NC_FA.hs:
--------------------------------------------------------------------------------
 1 | -- | This is the non-capturing form of Text.Regex.TDFA.NewDFA.String
 2 | module Text.Regex.TDFA.NewDFA.Engine_NC_FA(execMatch) where
 3 | 
 4 | import Control.Monad(unless)
 5 | import Prelude hiding ((!!))
 6 | 
 7 | import Data.Array.MArray(MArray(..))
 8 | import Data.Array.Unsafe(unsafeFreeze)
 9 | import Data.Array.ST(STArray)
10 | import qualified Data.IntMap.CharMap2 as CMap(findWithDefault)
11 | import qualified Data.IntMap as IMap(null)
12 | import qualified Data.IntSet as ISet(null)
13 | import qualified Data.Array.MArray()
14 | import Data.STRef(newSTRef,readSTRef,writeSTRef)
15 | import qualified Control.Monad.ST.Strict as S(ST,runST)
16 | import Data.Sequence(Seq)
17 | import qualified Data.ByteString.Char8 as SBS(ByteString)
18 | import qualified Data.ByteString.Lazy.Char8 as LBS(ByteString)
19 | 
20 | import Text.Regex.Base(MatchArray,MatchOffset,MatchLength)
21 | import Text.Regex.TDFA.Common hiding (indent)
22 | import Text.Regex.TDFA.NewDFA.Uncons(Uncons(uncons))
23 | import Text.Regex.TDFA.NewDFA.MakeTest(test_singleline)
24 | 
25 | --import Debug.Trace
26 | 
27 | -- trace :: String -> a -> a
28 | -- trace _ a = a
29 | 
30 | {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> ([] Char) -> [MatchArray] #-}
31 | {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> (Seq Char) -> [MatchArray] #-}
32 | {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> SBS.ByteString -> [MatchArray] #-}
33 | {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> LBS.ByteString -> [MatchArray] #-}
34 | execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray]
35 | execMatch (Regex { regex_dfa = DFA {d_dt=dtIn} })
36 |           offsetIn _prevIn inputIn = S.runST goNext where
37 | 
38 |   test wt off input = test_singleline wt off '\n' input
39 | 
40 |   goNext = {-# SCC "goNext" #-} do
41 |     winQ <- newSTRef Nothing
42 |     let next dt offset input = {-# SCC "goNext.next" #-}
43 |           case dt of
44 |             Testing' {dt_test=wt,dt_a=a,dt_b=b} ->
45 |               if test wt offset input
46 |                 then next a offset input
47 |                 else next b offset input
48 |             Simple' {dt_win=w,dt_trans=t, dt_other=o} -> do
49 |               unless (IMap.null w) $
50 |                 writeSTRef winQ (Just offset)
51 |               case uncons input of
52 |                 Nothing -> finalizeWinner
53 |                 Just (c,input') -> do
54 |                   case CMap.findWithDefault o c t of
55 |                     Transition {trans_single=DFA {d_id=did',d_dt=dt'}}
56 |                       | ISet.null did' -> finalizeWinner
57 |                       | otherwise ->
58 |                           let offset' = succ offset
59 |                           in seq offset' $ next dt' offset' input'
60 | 
61 |         finalizeWinner = do
62 |           mWinner <- readSTRef winQ
63 |           case mWinner of
64 |             Nothing -> return []
65 |             Just winner -> mapM (makeGroup offsetIn) [winner]
66 | 
67 |     next dtIn offsetIn inputIn
68 | 
69 | ----
70 | 
71 | {- CONVERT WINNERS TO MATCHARRAY -}
72 | 
73 | makeGroup :: Position -> Position -> S.ST s MatchArray
74 | makeGroup start stop = do
75 |   ma <- newArray (0,0) (start,stop-start)  :: S.ST s (STArray s Int (MatchOffset,MatchLength))
76 |   unsafeFreeze ma
77 | 


--------------------------------------------------------------------------------
/lib/Text/Regex/TDFA/NewDFA/MakeTest.hs:
--------------------------------------------------------------------------------
 1 | module Text.Regex.TDFA.NewDFA.MakeTest(test_singleline,test_multiline) where
 2 | 
 3 | import qualified Data.IntSet as ISet(IntSet,member,fromAscList)
 4 | import Text.Regex.TDFA.Common(WhichTest(..),Index)
 5 | import Text.Regex.TDFA.NewDFA.Uncons(Uncons(uncons))
 6 | 
 7 | {-# INLINE test_singleline #-}
 8 | {-# INLINE test_multiline #-}
 9 | {-# INLINE test_common #-}
10 | test_singleline,test_multiline,test_common :: Uncons text => WhichTest -> Index -> Char -> text -> Bool
11 | test_multiline Test_BOL _off prev _input = prev == '\n'
12 | test_multiline Test_EOL _off _prev input = case uncons input of
13 |                                                      Nothing -> True
14 |                                                      Just (next,_) -> next == '\n'
15 | test_multiline test off prev input = test_common test off prev input
16 | 
17 | test_singleline Test_BOL off _prev _input = off == 0
18 | test_singleline Test_EOL _off _prev input = case uncons input of
19 |                                               Nothing -> True
20 |                                               _ -> False
21 | test_singleline test off prev input = test_common test off prev input
22 | 
23 | test_common Test_BOB off _prev _input = off==0
24 | test_common Test_EOB _off _prev input = case uncons input of
25 |                                           Nothing -> True
26 |                                           _ -> False
27 | test_common Test_BOW _off prev input = not (isWord prev) && case uncons input of
28 |                                                             Nothing -> False
29 |                                                             Just (c,_) -> isWord c
30 | test_common Test_EOW _off prev input = isWord prev && case uncons input of
31 |                                                         Nothing -> True
32 |                                                         Just (c,_) -> not (isWord c)
33 | test_common Test_EdgeWord _off prev input =
34 |   if isWord prev
35 |     then case uncons input of Nothing -> True
36 |                               Just (c,_) -> not (isWord c)
37 |     else case uncons input of Nothing -> False
38 |                               Just (c,_) -> isWord c
39 | test_common Test_NotEdgeWord _off prev input = not (test_common Test_EdgeWord _off prev input)
40 | 
41 | test_common Test_BOL _ _ _ = undefined
42 | test_common Test_EOL _ _ _ = undefined
43 | 
44 | isWord :: Char -> Bool
45 | isWord c = ISet.member (fromEnum c) wordSet
46 |   where wordSet :: ISet.IntSet
47 |         wordSet = ISet.fromAscList . map fromEnum $ "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"
48 | 


--------------------------------------------------------------------------------
/lib/Text/Regex/TDFA/NewDFA/Tester.hs:
--------------------------------------------------------------------------------
  1 | -- | Like Engine, but merely checks to see whether any match at all is found.
  2 | --
  3 | module Text.Regex.TDFA.NewDFA.Tester(matchTest) where
  4 | 
  5 | import qualified Data.IntMap.CharMap2 as CMap(findWithDefault)
  6 | import qualified Data.IntMap as IMap(null)
  7 | import qualified Data.IntSet as ISet(null)
  8 | 
  9 | import Data.Sequence(Seq)
 10 | import qualified Data.ByteString.Char8 as SBS(ByteString)
 11 | import qualified Data.ByteString.Lazy.Char8 as LBS(ByteString)
 12 | 
 13 | import Text.Regex.Base()
 14 | import Text.Regex.TDFA.Common hiding (indent)
 15 | import Text.Regex.TDFA.NewDFA.Uncons (Uncons(uncons))
 16 | import Text.Regex.TDFA.NewDFA.MakeTest(test_singleline,test_multiline)
 17 | 
 18 | {-# SPECIALIZE matchTest :: Regex -> ([] Char) -> Bool #-}
 19 | {-# SPECIALIZE matchTest :: Regex -> (Seq Char) -> Bool #-}
 20 | {-# SPECIALIZE matchTest :: Regex -> SBS.ByteString -> Bool #-}
 21 | {-# SPECIALIZE matchTest :: Regex -> LBS.ByteString -> Bool #-}
 22 | matchTest :: Uncons text => Regex -> text -> Bool
 23 | matchTest (Regex { regex_dfa = dfaIn
 24 |                  , regex_isFrontAnchored = ifa } )
 25 |           inputIn = ans where
 26 | 
 27 |   ans = case ifa of
 28 |           True -> single0 (d_dt dfaIn) inputIn
 29 |           False -> multi0 (d_dt dfaIn) inputIn
 30 | 
 31 |   multi0 (Testing' {dt_test=wt,dt_a=a,dt_b=b}) input =
 32 |     if test0 wt input
 33 |       then multi0 a input
 34 |       else multi0 b input
 35 |   multi0 (Simple' {dt_win=w,dt_trans=t, dt_other=o}) input
 36 |     | IMap.null w =
 37 |         case uncons input of
 38 |           Nothing -> False
 39 |           Just (c,input') ->
 40 |             case CMap.findWithDefault o c t of
 41 |               Transition {trans_many=DFA {d_dt=dt'}} -> multi dt' c input'
 42 |     | otherwise = True
 43 | 
 44 |   multi (Testing' {dt_test=wt,dt_a=a,dt_b=b}) prev input =
 45 |     if test wt prev input
 46 |       then multi a prev input
 47 |       else multi b prev input
 48 |   multi (Simple' {dt_win=w,dt_trans=t, dt_other=o}) _prev input
 49 |     | IMap.null w =
 50 |         case uncons input of
 51 |           Nothing -> False
 52 |           Just (c,input') ->
 53 |             case CMap.findWithDefault o c t of
 54 |               Transition {trans_many=DFA {d_dt=dt'}} -> multi dt' c input'
 55 |     | otherwise = True
 56 | 
 57 |   single0 (Testing' {dt_test=wt,dt_a=a,dt_b=b}) input =
 58 |     if testFA0 wt input
 59 |       then single0 a input
 60 |       else single0 b input
 61 |   single0 (Simple' {dt_win=w,dt_trans=t, dt_other=o}) input
 62 |     | IMap.null w =
 63 |         case uncons input of
 64 |              Nothing -> False
 65 |              Just (c,input') ->
 66 |                case CMap.findWithDefault o c t of
 67 |                  Transition {trans_single=DFA {d_id=did',d_dt=dt'}}
 68 |                    | ISet.null did' -> False
 69 |                    | otherwise -> single dt' c input'
 70 |     | otherwise = True
 71 | 
 72 |   single (Testing' {dt_test=wt,dt_a=a,dt_b=b}) prev input =
 73 |     if testFA wt prev input
 74 |       then single a prev input
 75 |       else single b prev input
 76 |   single (Simple' {dt_win=w,dt_trans=t, dt_other=o}) _prev input
 77 |     | IMap.null w =
 78 |         case uncons input of
 79 |              Nothing -> False
 80 |              Just (c,input') ->
 81 |                case CMap.findWithDefault o c t of
 82 |                  Transition {trans_single=DFA {d_id=did',d_dt=dt'}}
 83 |                    | ISet.null did' -> False
 84 |                    | otherwise -> single dt' c input'
 85 |     | otherwise = True
 86 | 
 87 | {-# INLINE testFA0 #-}
 88 | testFA0 :: Uncons text => WhichTest -> text -> Bool
 89 | testFA0 wt text = test_singleline wt 0 '\n' text
 90 | 
 91 | {-# INLINE testFA #-}
 92 | testFA :: Uncons text => WhichTest -> Char -> text -> Bool
 93 | testFA wt prev text = test_singleline wt 1 prev text
 94 | 
 95 | {-# INLINE test0 #-}
 96 | test0 :: Uncons text => WhichTest -> text -> Bool
 97 | test0 wt input = test_multiline wt 0 '\n' input
 98 | 
 99 | {-# INLINE test #-}
100 | test :: Uncons text => WhichTest -> Char -> text -> Bool
101 | test wt prev input = test_multiline wt 1 prev input
102 | 


--------------------------------------------------------------------------------
/lib/Text/Regex/TDFA/NewDFA/Uncons.hs:
--------------------------------------------------------------------------------
 1 | module Text.Regex.TDFA.NewDFA.Uncons(Uncons(uncons)) where
 2 | 
 3 | import qualified Data.ByteString.Char8 as SBS(ByteString,uncons)
 4 | import qualified Data.ByteString.Lazy.Char8 as LBS(ByteString,uncons)
 5 | import Data.Sequence(Seq,viewl,ViewL(EmptyL,(:<)))
 6 | import qualified Data.Text as T
 7 | import qualified Data.Text.Lazy as TL
 8 | 
 9 | class Uncons a where
10 |   {- INLINE uncons #-}
11 |   uncons :: a -> Maybe (Char,a)
12 | 
13 | instance Uncons ([] Char) where
14 |   {- INLINE uncons #-}
15 |   uncons [] = Nothing
16 |   uncons (x:xs) = Just (x,xs)
17 | 
18 | instance Uncons (Seq Char) where
19 |   {- INLINE uncons #-}
20 |   uncons s = case viewl s of
21 |                EmptyL -> Nothing
22 |                x :< xs -> Just (x,xs)
23 | 
24 | instance Uncons SBS.ByteString where
25 |   {- INLINE uncons #-}
26 |   uncons = SBS.uncons
27 | 
28 | instance Uncons LBS.ByteString where
29 |   {- INLINE uncons #-}
30 |   uncons = LBS.uncons
31 | 
32 | -- | @since 1.3.1
33 | instance Uncons T.Text where
34 |   {- INLINE uncons #-}
35 |   uncons = T.uncons
36 | 
37 | -- | @since 1.3.1
38 | instance Uncons TL.Text where
39 |   {- INLINE uncons #-}
40 |   uncons = TL.uncons
41 | 


--------------------------------------------------------------------------------
/lib/Text/Regex/TDFA/Pattern.hs:
--------------------------------------------------------------------------------
  1 | {-# OPTIONS_GHC -fno-warn-incomplete-uni-patterns #-}
  2 | 
  3 | -- | This "Text.Regex.TDFA.Pattern" module provides the 'Pattern' data
  4 | -- type and its subtypes.  This 'Pattern' type is used to represent
  5 | -- the parsed form of a regular expression.
  6 | 
  7 | module Text.Regex.TDFA.Pattern
  8 |     (Pattern(..)
  9 |     ,PatternSet(..)
 10 |     ,PatternSetCharacterClass(..)
 11 |     ,PatternSetCollatingElement(..)
 12 |     ,PatternSetEquivalenceClass(..)
 13 |     ,GroupIndex
 14 |     ,DoPa(..)
 15 |     ,decodeCharacterClass, decodePatternSet
 16 |     ,showPattern
 17 | -- ** Internal use
 18 |     ,starTrans
 19 | -- ** Internal use, operations to support debugging under @ghci@
 20 |     ,starTrans',simplify',dfsPattern
 21 |     ) where
 22 | 
 23 | {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -}
 24 | 
 25 | import Data.List(intersperse,partition)
 26 | import qualified Data.Set as Set
 27 | import Data.Set (Set)
 28 | import Text.Regex.TDFA.Common(DoPa(..),GroupIndex,common_error)
 29 | 
 30 | err :: String -> a
 31 | err = common_error "Text.Regex.TDFA.Pattern"
 32 | 
 33 | -- | 'Pattern' is the type returned by the regular expression parser 'parseRegex'.
 34 | -- This is consumed by the "Text.Regex.TDFA.CorePattern" module and the tender leaves
 35 | -- are nibbled by the "Text.Regex.TDFA.TNFA" module.
 36 | --
 37 | -- The 'DoPa' field is the index of the component in the regex string @r@.
 38 | data Pattern
 39 |   = PEmpty
 40 |       -- ^ @()@, matches the empty string.
 41 |   | PGroup  (Maybe GroupIndex) Pattern
 42 |       -- ^ Group @(r)@.  @Nothing@ indicates non-matching 'PGroup'
 43 |       -- (never produced by parser 'parseRegex').
 44 |   | POr     [Pattern]
 45 |       -- ^ Alternative @r|s@ (flattened by 'starTrans').
 46 |   | PConcat [Pattern]
 47 |       -- ^ Sequence @rs@ (flattened by 'starTrans').
 48 |   | PQuest  Pattern
 49 |       -- ^ Zero or one repetitions @r?@ (eliminated by 'starTrans').
 50 |   | PPlus   Pattern
 51 |       -- ^ One or more repetitions @r+@ (eliminated by 'starTrans').
 52 |   | PStar   Bool Pattern
 53 |       -- ^ Zero or more repetitions @r*@.
 54 |       -- @True@ (default) means may accept the empty string on its first iteration.
 55 |   | PBound  Int (Maybe Int) Pattern
 56 |       -- ^ Given number or repetitions @r{n}@ or @r{n,m}@
 57 |       -- (eliminated by 'starTrans').
 58 | 
 59 |   -- The rest of these need an index of where in the regex string it is from
 60 |   | PCarat  { getDoPa :: DoPa }
 61 |       -- ^ @^@ matches beginning of input.
 62 |   | PDollar { getDoPa :: DoPa }
 63 |       -- ^ @$@ matches end of input.
 64 | 
 65 |   -- The following test and accept a single character
 66 |   | PDot    { getDoPa :: DoPa }
 67 |       -- ^ @.@ matches any character.
 68 |   | PAny    { getDoPa :: DoPa, getPatternSet :: PatternSet }
 69 |       -- ^ Bracket expression @[...]@.
 70 |   | PAnyNot { getDoPa :: DoPa, getPatternSet :: PatternSet }
 71 |       -- ^ Inverted bracket expression @[^...]@.
 72 |   | PEscape { getDoPa :: DoPa, getPatternChar :: Char }
 73 |       -- ^ Backslashed character @\c@, may have special meaning.
 74 |   | PChar   { getDoPa :: DoPa, getPatternChar :: Char }
 75 |       -- ^ Single character, matches given character.
 76 | 
 77 |   -- The following are semantic tags created in starTrans, not the parser
 78 |   | PNonCapture Pattern
 79 |      -- ^ Tag for internal use, introduced by 'starTrans'.
 80 |   | PNonEmpty Pattern
 81 |      -- ^ Tag for internal use, introduced by 'starTrans'.
 82 |   deriving (Eq, Show)
 83 | 
 84 | -- Andreas Abel, 2022-07-18, issue #47:
 85 | -- The following claim is FALSE:
 86 | --
 87 | -- I have not been checking, but this should have the property that
 88 | -- parsing the resulting string should result in an identical 'Pattern'.
 89 | -- This is not true if 'starTrans' has created 'PNonCapture' and 'PNonEmpty'
 90 | -- values or a @'PStar' False@.  The contents of a @[...]@ grouping are
 91 | -- always shown in a sorted canonical order.
 92 | showPattern :: Pattern -> String
 93 | showPattern pIn =
 94 |   case pIn of
 95 |     PEmpty -> "()"
 96 |     PGroup _ p -> paren (showPattern p)
 97 |     POr ps -> concat $ intersperse "|" (map showPattern ps)
 98 |     PConcat ps -> concatMap showPattern ps
 99 |     PQuest p -> (showPattern p)++"?"
100 |     PPlus p -> (showPattern p)++"+"
101 |     -- If PStar has mayFirstBeNull False then reparsing will forget this flag
102 |     PStar _ p -> (showPattern p)++"*"
103 |     PBound i (Just j) p | i==j -> showPattern p ++ ('{':show i)++"}"
104 |     PBound i mj p -> showPattern p ++ ('{':show i) ++ maybe ",}" (\j -> ',':show j++"}") mj
105 |     --
106 |     PCarat _ -> "^"
107 |     PDollar _ -> "$"
108 |     PDot _ -> "."
109 |     PAny _ ps -> ('[':show ps)++"]"
110 |     PAnyNot _ ps ->  ('[':'^':show ps)++"]"
111 |     PEscape _ c -> '\\':c:[]
112 |     PChar _ c -> [c]
113 |     -- The following were not directly from the parser, and will not be parsed in properly
114 |     PNonCapture p -> showPattern p
115 |     PNonEmpty p -> showPattern p
116 |   where {-
117 |         groupRange x n (y:ys) = if (fromEnum y)-(fromEnum x) == n then groupRange x (succ n) ys
118 |                                 else (if n <=3 then take n [x..]
119 |                                       else x:'-':(toEnum (pred n+fromEnum x)):[]) ++ groupRange y 1 ys
120 |         groupRange x n [] = if n <=3 then take n [x..]
121 |                             else x:'-':(toEnum (pred n+fromEnum x)):[]
122 | -}
123 |         paren s = ('(':s)++")"
124 | 
125 | -- | Content of a bracket expression @[...]@ organized into
126 | -- characters,
127 | -- POSIX character classes (e.g. @[[:alnum:]]@),
128 | -- collating elements (e.g. @[.ch.]@, unused), and
129 | -- equivalence classes (e.g. @[=a=]@, treated as characters).
130 | --
131 | data PatternSet = PatternSet (Maybe (Set Char))
132 |                              (Maybe (Set PatternSetCharacterClass))
133 |                              (Maybe (Set PatternSetCollatingElement))
134 |                              (Maybe (Set PatternSetEquivalenceClass))
135 |                              deriving (Eq)
136 | 
137 | -- | Hand-rolled implementation, giving textual rather than Haskell representation.
138 | instance Show PatternSet where
139 |   showsPrec i (PatternSet s scc sce sec) =
140 |     let (special,normal) = maybe ("","") ((partition (`elem` "]-")) . Set.toAscList) s
141 |         charSpec = (if ']' `elem` special then (']':) else id) (byRange normal)
142 |         scc' = maybe "" ((concatMap show) . Set.toList) scc
143 |         sce' = maybe "" ((concatMap show) . Set.toList) sce
144 |         sec' = maybe "" ((concatMap show) . Set.toList) sec
145 |     in shows charSpec
146 |        . showsPrec i scc' . showsPrec i sce' . showsPrec i sec'
147 |        . if '-' `elem` special then showChar '-' else id
148 |     where byRange xAll@(~(x:xs))
149 |             | length xAll <=3 = xAll
150 |             | otherwise       = groupRange x 1 xs
151 |           groupRange x n (y:ys) = if (fromEnum y)-(fromEnum x) == n then groupRange x (succ n) ys
152 |                                   else (if n <=3 then take n [x..]
153 |                                         else x:'-':(toEnum (pred n+fromEnum x)):[]) ++ groupRange y 1 ys
154 |           groupRange x n [] = if n <=3 then take n [x..]
155 |                               else x:'-':(toEnum (pred n+fromEnum x)):[]
156 | 
157 | -- | Content of @[: :]@, e.g. @"alnum"@ for @[:alnum:]@.
158 | newtype PatternSetCharacterClass   = PatternSetCharacterClass   {unSCC::String}
159 |   deriving (Eq,Ord)
160 | 
161 | -- | Content of @[. .]@, e.g. @"ch"@ for @[.ch.]@.
162 | newtype PatternSetCollatingElement = PatternSetCollatingElement {unSCE::String}
163 |   deriving (Eq,Ord)
164 | 
165 | -- | Content of @[= =]@, e.g. @"a"@ for @[=a=]@.
166 | newtype PatternSetEquivalenceClass = PatternSetEquivalenceClass {unSEC::String}
167 |   deriving (Eq,Ord)
168 | 
169 | -- | Hand-rolled implementation, giving textual rather than Haskell representation.
170 | instance Show PatternSetCharacterClass where
171 |   showsPrec _ p = showChar '[' . showChar ':' . shows (unSCC p) . showChar ':' . showChar ']'
172 | 
173 | -- | Hand-rolled implementation, giving textual rather than Haskell representation.
174 | instance Show PatternSetCollatingElement where
175 |   showsPrec _ p = showChar '[' . showChar '.' . shows (unSCE p) . showChar '.' . showChar ']'
176 | 
177 | -- | Hand-rolled implementation, giving textual rather than Haskell representation.
178 | instance Show PatternSetEquivalenceClass where
179 |   showsPrec _ p = showChar '[' . showChar '=' . shows (unSEC p) . showChar '=' . showChar ']'
180 | 
181 | -- | @decodePatternSet@ cannot handle collating element and treats
182 | -- equivalence classes as just their definition and nothing more.
183 | --
184 | -- @since 1.3.2
185 | decodePatternSet :: PatternSet -> Set Char
186 | decodePatternSet (PatternSet msc mscc _ msec) =
187 |   let baseMSC = maybe Set.empty id msc
188 |       withMSCC = foldl (flip Set.insert) baseMSC  (maybe [] (concatMap decodeCharacterClass . Set.toAscList) mscc)
189 |       withMSEC = foldl (flip Set.insert) withMSCC (maybe [] (concatMap unSEC . Set.toAscList) msec)
190 |   in withMSEC
191 | 
192 | -- | This returns the strictly ascending list of characters
193 | -- represented by @[: :]@ POSIX character classes.
194 | -- Unrecognized class names return an empty string.
195 | --
196 | -- @since 1.3.2
197 | decodeCharacterClass :: PatternSetCharacterClass -> String
198 | decodeCharacterClass (PatternSetCharacterClass s) =
199 |   case s of
200 |     "alnum"  -> ['0'..'9']++['A'..'Z']++['a'..'z']
201 |     "digit"  -> ['0'..'9']
202 |     "punct"  -> ['\33'..'\47']++['\58'..'\64']++['\91'..'\96']++['\123'..'\126']
203 |     "alpha"  -> ['A'..'Z']++['a'..'z']
204 |     "graph"  -> ['\33'..'\126']
205 |     "space"  -> "\t\n\v\f\r "
206 |     "blank"  -> "\t "
207 |     "lower"  -> ['a'..'z']
208 |     "upper"  -> ['A'..'Z']
209 |     "cntrl"  -> ['\0'..'\31']++"\127" -- with NUL
210 |     "print"  -> ['\32'..'\126']
211 |     "xdigit" -> ['0'..'9']++['A'..'F']++['a'..'f']
212 |     "word"   -> ['0'..'9']++['A'..'Z']++"_"++['a'..'z']
213 |     _ -> []
214 | 
215 | -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- ==
216 | 
217 | -- | Do the transformation and simplification in a single traversal.
218 | -- This removes the 'PPlus', 'PQuest', and 'PBound' values, changing to 'POr'
219 | -- and 'PEmpty' and 'PStar'.  For some 'PBound' values it adds
220 | -- 'PNonEmpty' and 'PNonCapture' semantic marker.  It also simplifies to
221 | -- flatten out nested 'POr' and 'PConcat' instances and eliminate some
222 | -- unneeded 'PEmpty' values.
223 | starTrans :: Pattern -> Pattern
224 | starTrans = dfsPattern (simplify' . starTrans')
225 | 
226 | -- | Apply a 'Pattern' transformation function depth first.
227 | dfsPattern :: (Pattern -> Pattern)  -- ^ The transformation function.
228 |            -> Pattern               -- ^ The 'Pattern' to transform.
229 |            -> Pattern               -- ^ The transformed 'Pattern'.
230 | dfsPattern f = dfs
231 |  where unary c = f . c . dfs
232 |        dfs pattern = case pattern of
233 |                        POr ps -> f (POr (map dfs ps))
234 |                        PConcat ps -> f (PConcat (map dfs ps))
235 |                        PGroup i p -> unary (PGroup i) p
236 |                        PQuest p -> unary PQuest p
237 |                        PPlus p -> unary PPlus p
238 |                        PStar i p -> unary (PStar i) p
239 |                        PBound i mi p -> unary (PBound i mi) p
240 |                        _ -> f pattern
241 | 
242 | {- Replace by PNonCapture
243 | unCapture = dfsPattern unCapture' where
244 |   unCapture' (PGroup (Just _) p) = PGroup Nothing p
245 |   unCapture' x = x
246 | -}
247 | reGroup :: Pattern -> Pattern
248 | reGroup p@(PConcat xs) | 2 <= length xs = PGroup Nothing p
249 | reGroup p@(POr xs)     | 2 <= length xs = PGroup Nothing p
250 | reGroup p = p
251 | 
252 | starTrans' :: Pattern -> Pattern
253 | starTrans' pIn =
254 |   case pIn of -- We know that "p" has been simplified in each of these cases:
255 |     PQuest p -> POr [p,PEmpty]
256 | 
257 | {- The PStar should not capture 0 characters on its first iteration,
258 |    so set its mayFirstBeNull flag to False
259 |  -}
260 |     PPlus p | canOnlyMatchNull p -> p
261 |             | otherwise -> asGroup $ PConcat [reGroup p,PStar False p]
262 | 
263 | {- "An ERE matching a single character repeated by an '*' , '?' , or
264 |    an interval expression shall not match a null expression unless
265 |    this is the only match for the repetition or it is necessary to
266 |    satisfy the exact or minimum number of occurrences for the interval
267 |    expression."
268 |  -}
269 | {- p? is p|PEmpty which prefers even a 0-character match for p
270 |    p{0,1} is p? is POr [p,PEmpty]
271 |    p{0,2} is (pp?)? NOT p?p?
272 |    p{0,3} is (p(pp?)?)?
273 |    p{1,2} is like pp{0,1} is like pp? but see below
274 |    p{2,5} is ppp{0,3} is pp(p(pp?)?)?
275 | 
276 |    But this is not always right.  Because if the second use of p in
277 |    p?p? matches 0 characters then the perhaps non 0 character match of
278 |    the first p is overwritten.
279 | 
280 |    We need a new operation "p!" that means "p?" unless "p" match 0
281 |    characters, in which case skip p as if it failed in "p?".  Thus
282 |    when p cannot accept 0 characters p! and p? are equivalent.  And
283 |    when p can only match 0 characters p! is PEmpty.  So for
284 |    simplicity, only use ! when p can match 0 characters but not only 0
285 |    characters.
286 | 
287 |    Call this (PNonEmpty p) in the Pattern type.
288 |    p! is PNonEmpty p is POr [PEmpty,p]
289 |    IS THIS TRUE?  Use QuickCheck?
290 | 
291 |    Note that if p cannot match 0 characters then p! is p? and vice versa
292 | 
293 |    The p{0,1} is still always p? and POr [p,PEmpty]
294 |    Now p{0,2} means p?p! or (pp!)? and p{0,3} means (p(pp!)!)? or p?p!p!
295 |    Equivalently p?p! and p?p!p!
296 |    And p{2,2} is p'p and p{3,3} is p'p'p and p{4} is p'p'p'p
297 |    The p{1,2} is pp! and p{1,3} is pp!p! or p(pp!)!
298 |    And p{2,4} means p'pp!p! and p{3,6} is p'p'pp!p!p! or p'p'p(p(pp!)!)!
299 | 
300 |    But this second form still has a problem: the (pp!)! can have the first
301 |    p match 0 and the second p match non-zero. This showed up for (.|$){1,3}
302 |    since ($.!)! should not be a valid path but altered the qt_win commands.
303 | 
304 |    Thus only p'p'pp!p!p! has the right semantics.  For completeness:
305 | 
306 |    if p can only match only 0 characters then the cases are
307 |    p{0,0} is (), p{0,_} = p?, p{_,_} is p
308 | 
309 |    if p can match 0 or non-zero characters then cases are
310 |    p{0,0} is (), p{0,1} is (p)?, p{0,2} is (pp!)?, p{0,3} is (pp!p!)?
311 |    p{1,1} is p, p{1,2} is pp!, p{1,3} is pp!p!, p{1,4} is pp!p!p!
312 |    p{2,2} is p'p,
313 |    p{2,3} is p'pp!,
314 |    p{2,4} is p'pp!p! or p'p(pp!)!
315 |    p{2,5} is p'pp!p!p! or p'p(p(pp!)!)!
316 |    p{3,3} is p'p'p, p{3,4} is p'p'pp!, p{3,5} is p'p'pp!p!, p{3,6} is p'p'pp!p!p!
317 | 
318 |    if p can only match 1 or more characters then cases are
319 |    p{0,0} is ()
320 |    p{0,1} is p?, p{0,2} is (pp?)?, p{0,3} is (p(pp?)?)?, p{0,4} is (pp{0,3})?
321 |    p{1,1} is p, p{1,j} is pp{0,pred j}
322 |    p{2,2} is p'p, p{2,3} is p'pp?, p{2,4} is p'p(pp?)?, p{2,5} = p'p{1,4} = p'(pp{0,3})
323 |    p{3,3} is p'p'p, p{3,4} is p'p'pp?, p{3,5} is p'p'p(pp?)?, p{3,6} is
324 | 
325 |    And by this logic, the PStar False is really p*!  So p{0,} is p*
326 |    and p{1,} is pp*! and p{2,} is p'pp*! and p{3,} is p'p'pp*!
327 | 
328 |    The (nonEmpty' p) below is the only way PNonEmpty is introduced
329 |    into the Pattern.  It is always preceded by p inside a PConcat
330 |    list.  The p involved never simplifies to PEmpty.  Thus it is
331 |    impossible to have PNonEmpty directly nested, i.e. (PNonEmpty
332 |    (PNonEmpty _)) never occurs even after simplifications.
333 | 
334 |    The (nonCapture' p) below is the only way PNonCapture is
335 |    introduced into the Pattern. It is always followed by p inside a
336 |    PConcat list.
337 | 
338 | -}
339 | -- Easy cases
340 |     PBound i _        _ | i<0 -> PEmpty  -- impossibly malformed
341 |     PBound i (Just j) _ | i>j -> PEmpty  -- impossibly malformed
342 |     PBound _ (Just 0) _ -> PEmpty
343 | -- Medium cases
344 |     PBound 0 Nothing  p | canOnlyMatchNull p -> quest p
345 |                         | otherwise -> PStar True p
346 |     PBound 0 (Just 1) p -> quest p
347 | -- Hard cases
348 |     PBound i Nothing  p | canOnlyMatchNull p -> p
349 |                         | otherwise -> asGroup . PConcat $ apply (nc'p:) (pred i) [reGroup p,PStar False p]
350 |       where nc'p = nonCapture' p
351 |     PBound 0 (Just j) p | canOnlyMatchNull p -> quest p
352 |                         -- The first operation is quest NOT nonEmpty. This can be tested with
353 |                         -- "a\nb" "((^)?|b){0,3}" and "a\nb" "((^)|b){0,3}"
354 |                         | otherwise -> quest . (concat' p) $
355 |                                         apply (nonEmpty' . (concat' p)) (j-2) (nonEmpty' p)
356 | {- 0.99.6 remove
357 | | cannotMatchNull p -> apply (quest' . (concat' p)) (pred j) (quest' p)
358 | | otherwise -> POr [ simplify' (PConcat (p : replicate (pred j) (nonEmpty' p))) , PEmpty ]
359 | -}
360 | {- 0.99.6 add, 0.99.7 remove
361 |     PBound i (Just j) p | canOnlyMatchNull p -> p
362 |                         | i == j -> PConcat $ apply (p':) (pred i) [p]
363 |                         | otherwise -> PConcat $ apply (p':) (pred i)
364 |                                         [p,apply (nonEmpty' . (concat' p)) (j-i-1) (nonEmpty' p) ]
365 |       where p' = nonCapture' p
366 | -}
367 | {- 0.99.7 add -}
368 |     PBound i (Just j) p | canOnlyMatchNull p -> p
369 |                         | i == j -> asGroup . PConcat $ apply (nc'p:) (pred i) [reGroup p]
370 |                         | otherwise -> asGroup . PConcat $ apply (nc'p:) (pred i)
371 |                                         [reGroup p,apply (nonEmpty' . (concat' p)) (j-i-1) (ne'p) ]
372 |       where nc'p = nonCapture' p
373 |             ne'p = nonEmpty' p
374 | {- 0.99.6
375 | | cannotMatchNull p -> PConcat $ apply (p':) (pred i) $ (p:) $
376 |   [apply (quest' . (concat' p)) (pred (j-i)) (quest' p)]
377 | | otherwise -> PConcat $ (replicate (pred i) p') ++ p : (replicate (j-i) (nonEmpty' p))
378 | -}
379 |     PStar mayFirstBeNull p | canOnlyMatchNull p -> if mayFirstBeNull then quest p
380 |                                                                     else PEmpty
381 |                            | otherwise -> pass
382 |     -- Left intact
383 |     PEmpty -> pass
384 |     PGroup {} -> pass
385 |     POr {} -> pass
386 |     PConcat {} -> pass
387 |     PCarat {} -> pass
388 |     PDollar {} -> pass
389 |     PDot {} -> pass
390 |     PAny {} -> pass
391 |     PAnyNot {} -> pass
392 |     PEscape {} -> pass
393 |     PChar {} -> pass
394 |     PNonCapture {} -> pass
395 |     PNonEmpty {} -> pass -- TODO : remove PNonEmpty from program
396 |   where
397 |     quest = (\ p -> POr [p,PEmpty])  -- require p to have been simplified
398 | --    quest' = (\ p -> simplify' $ POr [p,PEmpty])  -- require p to have been simplified
399 |     concat' a b = simplify' $ PConcat [reGroup a,reGroup b]      -- require a and b to have been simplified
400 |     nonEmpty' = (\ p -> simplify' $ POr [PEmpty,p]) -- 2009-01-19 : this was PNonEmpty
401 |     nonCapture' = PNonCapture
402 |     apply f n x = foldr ($) x (replicate n f) -- function f applied n times to x : f^n(x)
403 |     asGroup p = PGroup Nothing (simplify' p)
404 |     pass = pIn
405 | 
406 | -- | Function to transform a pattern into an equivalent, but less
407 | -- redundant form.  Nested 'POr' and 'PConcat' are flattened. 'PEmpty'
408 | -- is propagated.
409 | simplify' :: Pattern -> Pattern
410 | simplify' x@(POr _) =
411 |   let ps' = case span notPEmpty (flatten x) of
412 |               (notEmpty,[]) -> notEmpty
413 |               (notEmpty,_:rest) -> notEmpty ++ (PEmpty:filter notPEmpty rest) -- keep 1st PEmpty only
414 |   in case ps' of
415 |        [] -> PEmpty
416 |        [p] -> p
417 |        _ -> POr ps'
418 | simplify' x@(PConcat _) =
419 |   let ps' = filter notPEmpty (flatten x)
420 |   in case ps' of
421 |        [] -> PEmpty
422 |        [p] -> p
423 |        _ -> PConcat ps' -- PConcat ps'
424 | simplify' (PStar _ PEmpty) = PEmpty
425 | simplify' (PNonCapture PEmpty) = PEmpty -- 2009, perhaps useful
426 | --simplify' (PNonEmpty PEmpty) = err "simplify' (PNonEmpty PEmpty) = should be Impossible!" -- 2009
427 | simplify' other = other
428 | 
429 | -- | Function to flatten nested 'POr' or nested 'PConcat' applicataions.
430 | flatten :: Pattern -> [Pattern]
431 | flatten (POr ps) = (concatMap (\x -> case x of
432 |                                        POr ps' -> ps'
433 |                                        p -> [p]) ps)
434 | flatten (PConcat ps) = (concatMap (\x -> case x of
435 |                                            PConcat ps' -> ps'
436 |                                            p -> [p]) ps)
437 | flatten _ = err "flatten can only be applied to POr or PConcat"
438 | 
439 | notPEmpty :: Pattern -> Bool
440 | notPEmpty PEmpty = False
441 | notPEmpty _      = True
442 | 
443 | -- | Determines if 'Pattern' will fail or accept @[]@ and never accept any
444 | -- characters. Treat 'PCarat' and 'PDollar' as @True@.
445 | canOnlyMatchNull :: Pattern -> Bool
446 | canOnlyMatchNull pIn =
447 |   case pIn of
448 |     PEmpty -> True
449 |     PGroup _ p -> canOnlyMatchNull p
450 |     POr ps -> all canOnlyMatchNull ps
451 |     PConcat ps -> all canOnlyMatchNull ps
452 |     PQuest p -> canOnlyMatchNull p
453 |     PPlus p -> canOnlyMatchNull p
454 |     PStar _ p -> canOnlyMatchNull p
455 |     PBound _ (Just 0) _ -> True
456 |     PBound _ _ p -> canOnlyMatchNull p
457 |     PCarat _ -> True
458 |     PDollar _ -> True
459 |     PNonCapture p -> canOnlyMatchNull p
460 | --    PNonEmpty p -> canOnlyMatchNull p -- like PQuest
461 |     _ ->False
462 | 
463 | {-
464 | 
465 | -- | If 'cannotMatchNull' returns 'True' then it is known that the
466 | -- 'Pattern' will never accept an empty string.  If 'cannotMatchNull'
467 | -- returns 'False' then it is possible but not definite that the
468 | -- 'Pattern' could accept an empty string.
469 | cannotMatchNull :: Pattern -> Bool
470 | cannotMatchNull pIn =
471 |   case pIn of
472 |     PEmpty -> False
473 |     PGroup _ p -> cannotMatchNull p
474 |     POr [] -> False
475 |     POr ps -> all cannotMatchNull ps
476 |     PConcat [] -> False
477 |     PConcat ps -> any cannotMatchNull ps
478 |     PQuest _ -> False
479 |     PPlus p -> cannotMatchNull p
480 |     PStar {} -> False
481 |     PBound 0 _ _ -> False
482 |     PBound _ _ p -> cannotMatchNull p
483 |     PCarat _ -> False
484 |     PDollar _ -> False
485 |     PNonCapture p -> cannotMatchNull p
486 | --    PNonEmpty _ -> False -- like PQuest
487 |     _ -> True
488 | -}
489 | 


--------------------------------------------------------------------------------
/lib/Text/Regex/TDFA/ReadRegex.hs:
--------------------------------------------------------------------------------
  1 | -- | This is a POSIX version of parseRegex that allows NUL characters.
  2 | -- Lazy\/Possessive\/Backrefs are not recognized.  Anchors \^ and \$ are
  3 | -- recognized.
  4 | --
  5 | -- A 'PGroup' returned always has @(Maybe 'GroupIndex')@ set to @(Just _)@
  6 | -- and never to @Nothing@.
  7 | 
  8 | module Text.Regex.TDFA.ReadRegex (parseRegex) where
  9 | 
 10 | {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -}
 11 | 
 12 | import Text.Regex.TDFA.Pattern {- all -}
 13 | import Text.ParserCombinators.Parsec((<|>), (<?>),
 14 |   try, runParser, many, getState, setState, CharParser, ParseError,
 15 |   sepBy1, option, notFollowedBy, many1, lookAhead, eof, between,
 16 |   string, noneOf, digit, char, anyChar)
 17 | 
 18 | import Control.Monad (liftM, guard)
 19 | 
 20 | import Data.Foldable (asum)
 21 | import qualified Data.Set as Set(fromList)
 22 | 
 23 | -- | An element inside @[...]@, denoting a character class.
 24 | data BracketElement
 25 |   = BEChar  Char       -- ^ A single character.
 26 |   | BERange Char Char  -- ^ A character range (e.g. @a-z@).
 27 |   | BEColl  String     -- ^ @foo@ in @[.foo.]@.
 28 |   | BEEquiv String     -- ^ @bar@ in @[=bar=]@.
 29 |   | BEClass String     -- ^ A POSIX character class (candidate), e.g. @alpha@ parsed from @[:alpha:]@.
 30 | 
 31 | -- | Return either an error message or a tuple of the Pattern and the
 32 | -- largest group index and the largest DoPa index (both have smallest
 33 | -- index of 1).  Since the regular expression is supplied as [Char] it
 34 | -- automatically supports unicode and @\\NUL@ characters.
 35 | parseRegex :: String -> Either ParseError (Pattern,(GroupIndex,DoPa))
 36 | parseRegex x = runParser (do pat <- p_regex
 37 |                              eof
 38 |                              (lastGroupIndex,lastDopa) <- getState
 39 |                              return (pat,(lastGroupIndex,DoPa lastDopa))) (0,0) x x
 40 | 
 41 | type P = CharParser (GroupIndex, Int)
 42 | 
 43 | p_regex :: P Pattern
 44 | p_regex = liftM POr $ sepBy1 p_branch (char '|')
 45 | 
 46 | -- man re_format helps a lot, it says one-or-more pieces so this is
 47 | -- many1 not many.  Use "()" to indicate an empty piece.
 48 | p_branch :: P Pattern
 49 | p_branch = liftM PConcat $ many1 p_piece
 50 | 
 51 | p_piece :: P Pattern
 52 | p_piece = (p_anchor <|> p_atom) >>= p_post_atom -- correct specification
 53 | 
 54 | p_atom :: P Pattern
 55 | p_atom =  p_group <|> p_bracket <|> p_char <?> "an atom"
 56 | 
 57 | group_index :: P (Maybe GroupIndex)
 58 | group_index = do
 59 |   (gi,ci) <- getState
 60 |   let index = succ gi
 61 |   setState (index,ci)
 62 |   return (Just index)
 63 | 
 64 | p_group :: P Pattern
 65 | p_group = lookAhead (char '(') >> do
 66 |   index <- group_index
 67 |   liftM (PGroup index) $ between (char '(') (char ')') p_regex
 68 | 
 69 | -- p_post_atom takes the previous atom as a parameter
 70 | p_post_atom :: Pattern -> P Pattern
 71 | p_post_atom atom = (char '?' >> return (PQuest atom))
 72 |                <|> (char '+' >> return (PPlus atom))
 73 |                <|> (char '*' >> return (PStar True atom))
 74 |                <|> p_bound atom
 75 |                <|> return atom
 76 | 
 77 | p_bound :: Pattern -> P Pattern
 78 | p_bound atom = try $ between (char '{') (char '}') (p_bound_spec atom)
 79 | 
 80 | p_bound_spec :: Pattern -> P Pattern
 81 | p_bound_spec atom = do lowS <- many1 digit
 82 |                        let lowI = read lowS
 83 |                        highMI <- option (Just lowI) $ try $ do
 84 |                                    _ <- char ','
 85 |   -- parsec note: if 'many digits' fails below then the 'try' ensures
 86 |   -- that the ',' will not match the closing '}' in p_bound, same goes
 87 |   -- for any non '}' garbage after the 'many digits'.
 88 |                                    highS <- many digit
 89 |                                    if null highS then return Nothing -- no upper bound
 90 |                                      else do let highI = read highS
 91 |                                              guard (lowI <= highI)
 92 |                                              return (Just (read highS))
 93 |                        return (PBound lowI highMI atom)
 94 | 
 95 | -- An anchor cannot be modified by a repetition specifier
 96 | p_anchor :: P Pattern
 97 | p_anchor = (char '^' >> liftM PCarat char_index)
 98 |        <|> (char '$' >> liftM PDollar char_index)
 99 |        <|> try (do _ <- string "()"
100 |                    index <- group_index
101 |                    return $ PGroup index PEmpty)
102 |        <?> "empty () or anchor ^ or $"
103 | 
104 | char_index :: P DoPa
105 | char_index = do (gi,ci) <- getState
106 |                 let ci' = succ ci
107 |                 setState (gi,ci')
108 |                 return (DoPa ci')
109 | 
110 | p_char :: P Pattern
111 | p_char = p_dot <|> p_left_brace <|> p_escaped <|> p_other_char where
112 |   p_dot = char '.' >> char_index >>= return . PDot
113 |   p_left_brace = try $ (char '{' >> notFollowedBy digit >> char_index >>= return . (`PChar` '{'))
114 |   p_escaped = char '\\' >> anyChar >>= \c -> char_index >>= return . (`PEscape` c)
115 |   p_other_char = noneOf specials >>= \c -> char_index >>= return . (`PChar` c)
116 |     where specials  = "^.[$()|*+?{\\"
117 | 
118 | -- parse [bar] and [^bar] sets of characters
119 | p_bracket :: P Pattern
120 | p_bracket = (char '[') >> ( (char '^' >> p_set True) <|> (p_set False) )
121 | 
122 | p_set :: Bool -> P Pattern
123 | p_set invert = do initial <- option "" (char ']' >> return "]")
124 |                   values <- if null initial then many1 p_set_elem else many p_set_elem
125 |                   _ <- char ']'
126 |                   ci <- char_index
127 |                   let chars = maybe'set $ concat $
128 |                         initial :
129 |                         [ c | BEChar c <- values ] :
130 |                         [ [start..end] | BERange start end <- values ]
131 |                       colls = maybe'set [PatternSetCollatingElement coll | BEColl coll <- values ]
132 |                       equivs = maybe'set [PatternSetEquivalenceClass equiv | BEEquiv equiv <- values]
133 |                       class's = maybe'set [PatternSetCharacterClass a'class | BEClass a'class <- values]
134 |                       maybe'set x = if null x then Nothing else Just (Set.fromList x)
135 |                       sets = PatternSet chars class's colls equivs
136 |                   sets `seq` return $ if invert then PAnyNot ci sets else PAny ci sets
137 | 
138 | -- From here down the code is the parser and functions for pattern [ ] set things
139 | 
140 | p_set_elem :: P BracketElement
141 | p_set_elem = checkBracketElement =<< asum
142 |   [ p_set_elem_class
143 |   , p_set_elem_equiv
144 |   , p_set_elem_coll
145 |   , p_set_elem_range
146 |   , p_set_elem_char
147 |   , fail "Failed to parse bracketed string"
148 |   ]
149 | 
150 | p_set_elem_class :: P BracketElement
151 | p_set_elem_class = liftM BEClass $
152 |   try (between (string "[:") (string ":]") (many1 $ noneOf ":]"))
153 | 
154 | p_set_elem_equiv :: P BracketElement
155 | p_set_elem_equiv = liftM BEEquiv $
156 |   try (between (string "[=") (string "=]") (many1 $ noneOf "=]"))
157 | 
158 | p_set_elem_coll :: P BracketElement
159 | p_set_elem_coll =  liftM BEColl $
160 |   try (between (string "[.") (string ".]") (many1 $ noneOf ".]"))
161 | 
162 | p_set_elem_range :: P BracketElement
163 | p_set_elem_range = try $ do
164 |   start <- noneOf "]"
165 |   _  <- char '-'
166 |   end <- noneOf "]"
167 |   return $ BERange start end
168 | 
169 | p_set_elem_char :: P BracketElement
170 | p_set_elem_char = do
171 |   c <- noneOf "]"
172 |   return (BEChar c)
173 | 
174 | -- | Fail when 'BracketElement' is invalid, e.g. empty range @1-0@.
175 | -- This failure should not be caught.
176 | --
177 | checkBracketElement :: BracketElement -> P BracketElement
178 | checkBracketElement e =
179 |   case e of
180 |     BERange start end
181 |       | start > end -> fail $ unwords
182 |           [ "End point"
183 |           , show end
184 |           , "of dashed character range is less than starting point"
185 |           , show start
186 |           ]
187 |       | otherwise -> ok
188 |     BEChar  _ -> ok
189 |     BEClass _ -> ok
190 |     BEColl  _ -> ok
191 |     BEEquiv _ -> ok
192 |   where
193 |     ok = return e
194 | 


--------------------------------------------------------------------------------
/lib/Text/Regex/TDFA/Sequence.hs:
--------------------------------------------------------------------------------
 1 | {-|
 2 | This modules provides 'RegexMaker' and 'RegexLike' instances for using
 3 | @ByteString@ with the DFA backend ("Text.Regex.Lib.WrapDFAEngine" and
 4 | "Text.Regex.Lazy.DFAEngineFPS").  This module is usually used via
 5 | import "Text.Regex.TDFA".
 6 | 
 7 | This exports instances of the high level API and the medium level
 8 | API of 'compile','execute', and 'regexec'.
 9 | -}
10 | module Text.Regex.TDFA.Sequence(
11 |   Regex
12 |  ,CompOption
13 |  ,ExecOption
14 |  ,compile
15 |  ,execute
16 |  ,regexec
17 |  ) where
18 | 
19 | import Data.Sequence(Seq)
20 | import Data.Foldable as F(toList)
21 | 
22 | import Text.Regex.Base(MatchArray,RegexContext(..),RegexMaker(..),RegexLike(..),Extract(..))
23 | import Text.Regex.Base.Impl(polymatch,polymatchM)
24 | import Text.Regex.TDFA.Common(Regex(..),CompOption,ExecOption(captureGroups))
25 | import Text.Regex.TDFA.String() -- piggyback on RegexMaker for String
26 | import Text.Regex.TDFA.TDFA(patternToRegex)
27 | import Text.Regex.TDFA.ReadRegex(parseRegex)
28 | 
29 | import Data.Array.IArray((!),elems)
30 | import Data.Maybe(listToMaybe)
31 | import Text.Regex.TDFA.NewDFA.Engine(execMatch)
32 | import Text.Regex.TDFA.NewDFA.Tester as Tester(matchTest)
33 | 
34 | {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -}
35 | 
36 | instance RegexContext Regex (Seq Char) (Seq Char) where
37 |   match = polymatch
38 |   matchM = polymatchM
39 | 
40 | instance RegexMaker Regex CompOption ExecOption (Seq Char) where
41 |   makeRegexOptsM c e source =
42 |     case parseRegex (F.toList source) of
43 |       Left err -> fail $ "parseRegex for Text.Regex.TDFA.Sequence failed:"++show err
44 |       Right pattern -> return $ patternToRegex pattern c e
45 | 
46 | instance RegexLike Regex (Seq Char) where
47 |   matchOnce r s = listToMaybe (matchAll r s)
48 |   matchAll r s = execMatch r 0 '\n' s
49 |   matchCount r s = length (matchAll r' s)
50 |     where r' = r { regex_execOptions = (regex_execOptions r) {captureGroups = False} }
51 |   matchTest = Tester.matchTest
52 |   matchOnceText regex source =
53 |     fmap (\ma -> let (o,l) = ma!0
54 |                  in (before o source
55 |                     ,fmap (\ol -> (extract ol source,ol)) ma
56 |                     ,after (o+l) source))
57 |          (matchOnce regex source)
58 |   matchAllText regex source =
59 |     map (fmap (\ol -> (extract ol source,ol)))
60 |         (matchAll regex source)
61 | 
62 | compile :: CompOption -- ^ Flags (summed together)
63 |         -> ExecOption -- ^ Flags (summed together)
64 |         -> Seq Char   -- ^ The regular expression to compile
65 |         -> Either String Regex -- ^ Returns: the compiled regular expression
66 | compile compOpt execOpt bs =
67 |   case parseRegex (F.toList bs) of
68 |     Left err -> Left ("parseRegex for Text.Regex.TDFA.Sequence failed:"++show err)
69 |     Right pattern -> Right (patternToRegex pattern compOpt execOpt)
70 | 
71 | execute :: Regex    -- ^ Compiled regular expression
72 |         -> Seq Char -- ^ String to match against
73 |         -> Either String (Maybe MatchArray)
74 | execute r bs = Right (matchOnce r bs)
75 | 
76 | regexec :: Regex    -- ^ Compiled regular expression
77 |         -> Seq Char -- ^ String to match against
78 |         -> Either String (Maybe (Seq Char, Seq Char, Seq Char, [Seq Char]))
79 | regexec r txt = Right $
80 |   case matchOnceText r txt of
81 |     Just (pre, mt, post) | main:rest <- map fst (elems mt)
82 |       -> Just (pre, main, post, rest)
83 |     _ -> Nothing
84 | 


--------------------------------------------------------------------------------
/lib/Text/Regex/TDFA/String.hs:
--------------------------------------------------------------------------------
 1 | {- |
 2 | This modules provides 'RegexMaker' and 'RegexLike' instances for using
 3 | 'String' with the TDFA backend.
 4 | 
 5 | This exports instances of the high level API and the medium level
 6 | API of 'compile','execute', and 'regexec'.
 7 | -}
 8 | {- By Chris Kuklewicz, 2009. BSD License, see the LICENSE file. -}
 9 | module Text.Regex.TDFA.String(
10 |   -- ** Types
11 |   Regex
12 |  ,MatchOffset
13 |  ,MatchLength
14 |  ,CompOption
15 |  ,ExecOption
16 |   -- ** Medium level API functions
17 |  ,compile
18 |  ,execute
19 |  ,regexec
20 |  ) where
21 | 
22 | import Text.Regex.Base.Impl(polymatch,polymatchM)
23 | import Text.Regex.Base.RegexLike(RegexMaker(..),RegexLike(..),RegexContext(..),MatchOffset,MatchLength,MatchArray)
24 | import Text.Regex.TDFA.Common(common_error,Regex(..),CompOption,ExecOption(captureGroups))
25 | import Text.Regex.TDFA.ReadRegex(parseRegex)
26 | import Text.Regex.TDFA.TDFA(patternToRegex)
27 | 
28 | import Data.Array.IArray((!),elems,amap)
29 | import Data.Maybe(listToMaybe)
30 | import Text.Regex.TDFA.NewDFA.Engine(execMatch)
31 | import Text.Regex.TDFA.NewDFA.Tester as Tester(matchTest)
32 | 
33 | err :: String -> a
34 | err = common_error "Text.Regex.TDFA.String"
35 | 
36 | unwrap :: Either String v -> v
37 | unwrap x = case x of Left msg -> err ("Text.Regex.TDFA.String died: "++msg)
38 |                      Right v -> v
39 | 
40 | compile  :: CompOption -- ^ Flags (summed together)
41 |          -> ExecOption -- ^ Flags (summed together)
42 |          -> String     -- ^ The regular expression to compile (ASCII only, no null bytes)
43 |          -> Either String Regex -- ^ Returns: the compiled regular expression
44 | compile compOpt execOpt source =
45 |   case parseRegex source of
46 |     Left msg -> Left ("parseRegex for Text.Regex.TDFA.String failed:"++show msg)
47 |     Right pattern -> Right (patternToRegex pattern compOpt execOpt)
48 | 
49 | instance RegexMaker Regex CompOption ExecOption String where
50 |   makeRegexOpts c e source = unwrap (compile c e source)
51 |   makeRegexOptsM c e source = either fail return $ compile c e source
52 | 
53 | execute :: Regex      -- ^ Compiled regular expression
54 |         -> String     -- ^ String to match against
55 |         -> Either String (Maybe MatchArray)
56 | execute r s = Right (matchOnce r s)
57 | 
58 | regexec :: Regex      -- ^ Compiled regular expression
59 |         -> String     -- ^ String to match against
60 |         -> Either String (Maybe (String, String, String, [String]))
61 | regexec r txt = Right $
62 |   case matchOnceText r txt of
63 |     Just (pre, mt, post) | main:rest <- map fst (elems mt)
64 |       -> Just (pre, main, post, rest)
65 |     _ -> Nothing
66 | 
67 | -- Minimal definition for now
68 | instance RegexLike Regex String where
69 |   matchOnce r s = listToMaybe (matchAll r s)
70 |   matchAll r s = execMatch r 0 '\n' s
71 |   matchCount r s = length (matchAll r' s)
72 |     where r' = r { regex_execOptions = (regex_execOptions r) {captureGroups = False} }
73 |   matchTest = Tester.matchTest
74 |   -- matchOnceText
75 |   matchAllText r s =
76 |     let go i _ _ | i `seq` False = undefined
77 |         go _i _t [] = []
78 |         go i t (x:xs) = let (off0,len0) = x!0
79 |                             trans pair@(off,len) = (take len (drop (off-i) t),pair)
80 |                             t' = drop (off0+len0-i) t
81 |                         in amap trans x : seq t' (go (off0+len0) t' xs)
82 |     in go 0 s (matchAll r s)
83 | 
84 | instance RegexContext Regex String String where
85 |   match = polymatch
86 |   matchM = polymatchM
87 | 


--------------------------------------------------------------------------------
/lib/Text/Regex/TDFA/TDFA.hs:
--------------------------------------------------------------------------------
  1 | -- | "Text.Regex.TDFA.TDFA" converts the QNFA from TNFA into the DFA.
  2 | -- A DFA state corresponds to a Set of QNFA states, represented as list
  3 | -- of Index which are used to lookup the DFA state in a lazy Trie
  4 | -- which holds all possible subsets of QNFA states.
  5 | module Text.Regex.TDFA.TDFA(patternToRegex,DFA(..),DT(..)
  6 |                             ,examineDFA,nfaToDFA,dfaMap) where
  7 | 
  8 | --import Control.Arrow((***))
  9 | import Data.Monoid as Mon(Monoid(..))
 10 | import Control.Monad.State(State,MonadState(..),execState)
 11 | import Data.Array.IArray(Array,(!),bounds,{-assocs-})
 12 | import Data.IntMap(IntMap)
 13 | import qualified Data.IntMap as IMap(empty,keys,delete,null,lookup,fromDistinctAscList
 14 |                                     ,member,unionWith,singleton,union
 15 |                                     ,toAscList,Key,elems,toList,insert
 16 |                                     ,insertWith,insertWithKey)
 17 | import Data.IntMap.CharMap2(CharMap(..))
 18 | import qualified Data.IntMap.CharMap2 as Map(empty)
 19 | --import Data.IntSet(IntSet)
 20 | import qualified Data.IntSet as ISet(empty,singleton,null)
 21 | import Data.List(foldl')
 22 | import qualified Data.Map (Map,empty,member,insert,elems)
 23 | import Data.Sequence as S((|>),{-viewl,ViewL(..)-})
 24 | 
 25 | import Text.Regex.TDFA.Common {- all -}
 26 | import Text.Regex.TDFA.IntArrTrieSet(TrieSet)
 27 | import qualified Text.Regex.TDFA.IntArrTrieSet as Trie(lookupAsc,fromSinglesMerge)
 28 | import Text.Regex.TDFA.Pattern(Pattern)
 29 | --import Text.Regex.TDFA.RunMutState(toInstructions)
 30 | import Text.Regex.TDFA.TNFA(patternToNFA)
 31 | --import Debug.Trace
 32 | 
 33 | {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -}
 34 | 
 35 | err :: String -> a
 36 | err s = common_error "Text.Regex.TDFA.TDFA"  s
 37 | 
 38 | dlose :: DFA
 39 | dlose = DFA { d_id = ISet.empty
 40 |             , d_dt = Simple' { dt_win = IMap.empty
 41 |                              , dt_trans = Map.empty
 42 |                              , dt_other = Transition dlose dlose mempty } }
 43 | 
 44 | -- dumb smart constructor for tracing construction (I wanted to monitor laziness)
 45 | {-# INLINE makeDFA #-}
 46 | makeDFA :: SetIndex -> DT -> DFA
 47 | makeDFA i dt = DFA i dt
 48 | 
 49 | -- Note that no CompOption or ExecOption parameter is needed.
 50 | nfaToDFA :: ((Index,Array Index QNFA),Array Tag OP,Array GroupIndex [GroupInfo])
 51 |          -> CompOption -> ExecOption
 52 |          -> Regex
 53 | nfaToDFA ((startIndex,aQNFA),aTagOp,aGroupInfo) co eo = Regex dfa startIndex indexBounds tagBounds trie aTagOp aGroupInfo ifa co eo where
 54 |   dfa = indexesToDFA [startIndex]
 55 |   indexBounds = bounds aQNFA
 56 |   tagBounds = bounds aTagOp
 57 |   ifa = (not (multiline co)) && isDFAFrontAnchored dfa
 58 | 
 59 |   indexesToDFA = {-# SCC "nfaToDFA.indexesToDFA" #-} Trie.lookupAsc trie  -- Lookup in cache
 60 | 
 61 |   trie :: TrieSet DFA
 62 |   trie = Trie.fromSinglesMerge dlose mergeDFA (bounds aQNFA) indexToDFA
 63 | 
 64 |   newTransition :: DTrans -> Transition
 65 |   newTransition dtrans = Transition { trans_many = indexesToDFA (IMap.keys dtransWithSpawn)
 66 |                                     , trans_single = indexesToDFA (IMap.keys dtrans)
 67 |                                     , trans_how = dtransWithSpawn }
 68 |     where dtransWithSpawn = addSpawn dtrans
 69 | 
 70 |   makeTransition :: DTrans -> Transition
 71 |   makeTransition dtrans | hasSpawn  = Transition { trans_many = indexesToDFA (IMap.keys dtrans)
 72 |                                                  , trans_single = indexesToDFA (IMap.keys (IMap.delete startIndex dtrans))
 73 |                                                  , trans_how = dtrans }
 74 |                         | otherwise = Transition { trans_many = indexesToDFA (IMap.keys dtrans)
 75 |                                                  , trans_single = indexesToDFA (IMap.keys dtrans)
 76 |                                                  , trans_how = dtrans }
 77 |     where hasSpawn = maybe False IMap.null (IMap.lookup startIndex dtrans)
 78 | 
 79 |   -- coming from (-1) means spawn a new starting item
 80 |   addSpawn :: DTrans -> DTrans
 81 |   addSpawn dtrans | IMap.member startIndex dtrans = dtrans
 82 |                   | otherwise = IMap.insert startIndex mempty dtrans
 83 | 
 84 |   indexToDFA :: Index -> DFA  -- used to seed the Trie from the NFA
 85 |   indexToDFA i = {-# SCC "nfaToDFA.indexToDFA" #-} makeDFA (ISet.singleton source) (qtToDT qtIn)
 86 |     where
 87 |       (QNFA {q_id = source,q_qt = qtIn}) = aQNFA!i
 88 |       qtToDT :: QT -> DT
 89 |       qtToDT (Testing {qt_test=wt, qt_dopas=dopas, qt_a=a, qt_b=b}) =
 90 |           Testing' { dt_test = wt
 91 |                    , dt_dopas = dopas
 92 |                    , dt_a = qtToDT a
 93 |                    , dt_b = qtToDT b }
 94 |       qtToDT (Simple {qt_win=w, qt_trans=t, qt_other=o}) =
 95 |         Simple' { dt_win = makeWinner
 96 |                 , dt_trans = fmap qtransToDFA t
 97 | --                , dt_other = if IMap.null o then Just (newTransition $ IMap.singleton startIndex mempty) else Just (qtransToDFA o)}
 98 |                 , dt_other = qtransToDFA o}
 99 |         where
100 |           makeWinner :: IntMap {- Index -} Instructions --  (RunState ())
101 |           makeWinner | noWin w = IMap.empty
102 |                      | otherwise = IMap.singleton source (cleanWin w)
103 | 
104 |           qtransToDFA :: QTrans -> Transition
105 |           qtransToDFA qtrans = {-# SCC "nfaToDFA.indexToDFA.qtransToDFA" #-}
106 |                                newTransition dtrans
107 |             where
108 |               dtrans :: DTrans
109 |               dtrans =IMap.fromDistinctAscList . mapSnd (IMap.singleton source) $ best
110 |               best :: [(Index {- Destination -} ,(DoPa,Instructions))]
111 |               best = pickQTrans aTagOp $ qtrans
112 | 
113 |   -- The DFA states are built up by merging the singleton ones converted from the NFA.
114 |   -- Thus the "source" indices in the DTrans should not collide.
115 |   mergeDFA :: DFA -> DFA -> DFA
116 |   mergeDFA d1 d2 = {-# SCC "nfaToDFA.mergeDFA" #-} makeDFA i dt
117 |     where
118 |       i = d_id d1 `mappend` d_id d2
119 |       dt = d_dt d1 `mergeDT` d_dt d2
120 |       mergeDT,nestDT :: DT -> DT -> DT
121 |       mergeDT (Simple' w1 t1 o1) (Simple' w2 t2 o2) = Simple' w t o
122 |         where
123 |           w = w1 `mappend` w2
124 |           t = fuseDTrans -- t1 o1 t2 o2
125 |           o = mergeDTrans o1 o2
126 |           -- This is very much like mergeQTrans
127 |           mergeDTrans :: Transition -> Transition -> Transition
128 |           mergeDTrans (Transition {trans_how=dt1}) (Transition {trans_how=dt2}) = makeTransition dtrans
129 |             where dtrans = IMap.unionWith IMap.union dt1 dt2
130 |           -- This is very much like fuseQTrans
131 |           fuseDTrans :: CharMap Transition
132 |           fuseDTrans = CharMap (IMap.fromDistinctAscList (fuse l1 l2))
133 |             where
134 |               l1 = IMap.toAscList (unCharMap t1)
135 |               l2 = IMap.toAscList (unCharMap t2)
136 |               fuse :: [(IMap.Key, Transition)]
137 |                    -> [(IMap.Key, Transition)]
138 |                    -> [(IMap.Key, Transition)]
139 |               fuse [] y = fmap (fmap (mergeDTrans o1)) y
140 |               fuse x [] = fmap (fmap (mergeDTrans o2)) x
141 |               fuse x@((xc,xa):xs) y@((yc,ya):ys) =
142 |                 case compare xc yc of
143 |                   LT -> (xc,mergeDTrans o2 xa) : fuse xs y
144 |                   EQ -> (xc,mergeDTrans xa ya) : fuse xs ys
145 |                   GT -> (yc,mergeDTrans o1 ya) : fuse x ys
146 |       mergeDT dt1@(Testing' wt1 dopas1 a1 b1) dt2@(Testing' wt2 dopas2 a2 b2) =
147 |         case compare wt1 wt2 of
148 |           LT -> nestDT dt1 dt2
149 |           EQ -> Testing' { dt_test = wt1
150 |                          , dt_dopas = dopas1 `mappend` dopas2
151 |                          , dt_a = mergeDT a1 a2
152 |                          , dt_b = mergeDT b1 b2 }
153 |           GT -> nestDT dt2 dt1
154 |       mergeDT dt1@(Testing' {}) dt2 = nestDT dt1 dt2
155 |       mergeDT dt1 dt2@(Testing' {}) = nestDT dt2 dt1
156 |       nestDT dt1@(Testing' {dt_a=a,dt_b=b}) dt2 = dt1 { dt_a = mergeDT a dt2, dt_b = mergeDT b dt2 }
157 |       nestDT _ _ = err "nestDT called on Simple -- cannot happen"
158 | 
159 | patternToRegex :: (Pattern,(GroupIndex, DoPa)) -> CompOption -> ExecOption -> Regex
160 | patternToRegex pattern compOpt execOpt = nfaToDFA (patternToNFA compOpt pattern) compOpt execOpt
161 | 
162 | dfaMap :: DFA -> Data.Map.Map SetIndex DFA
163 | dfaMap = seen (Data.Map.empty) where
164 |   seen old d@(DFA {d_id=i,d_dt=dt}) =
165 |     if i `Data.Map.member` old
166 |       then old
167 |       else let new = Data.Map.insert i d old
168 |            in foldl' seen new (flattenDT dt)
169 | 
170 | -- Get all trans_many states
171 | flattenDT :: DT -> [DFA]
172 | flattenDT (Simple' {dt_trans=(CharMap mt),dt_other=o}) = concatMap (\d -> [trans_many d {-,trans_single d-}]) . (:) o . IMap.elems $ mt
173 | flattenDT (Testing' {dt_a=a,dt_b=b}) = flattenDT a ++ flattenDT b
174 | 
175 | examineDFA :: Regex -> String
176 | examineDFA (Regex {regex_dfa=dfa}) = unlines . (:) ("Number of reachable DFA states: "++show (length dfas)) . map show $ dfas
177 |   where dfas = Data.Map.elems $ dfaMap dfa
178 | 
179 | {-
180 | 
181 | fillMap :: Tag -> IntMap (Position,Bool)
182 | fillMap tag = IMap.fromDistinctAscList [(t,(-1,True)) | t <- [0..tag] ]
183 | 
184 | diffMap :: IntMap (Position,Bool) -> IntMap (Position,Bool) -> [(Index,(Position,Bool))]
185 | diffMap old new = IMap.toList (IMap.differenceWith (\a b -> if a==b then Nothing else Just b) old new)
186 | 
187 | examineDFA :: (DFA,Index,Array Tag OP,Array GroupIndex [GroupInfo]) -> String
188 | examineDFA (dfa,_,aTags,_) = unlines $ map (examineDFA' (snd . bounds $ aTags)) (Map.elems $ dfaMap dfa)
189 | 
190 | examineDFA' :: Tag -> DFA -> String
191 | examineDFA' maxTag = showDFA (fillMap maxTag)
192 | 
193 | {-
194 | instance Show DFA where
195 |   show (DFA {d_id=i,d_dt=dt}) = "DFA {d_id = "++show (ISet.toList i)
196 |                             ++"\n    ,d_dt = "++ show dt
197 |                             ++"\n}"
198 | -}
199 | -- instance Show DT where show = showDT
200 | 
201 | showDFA :: IntMap (Position,Bool) -> DFA -> String
202 | showDFA m (DFA {d_id=i,d_dt=dt}) = "DFA {d_id = "++show (ISet.toList i)
203 |                                ++"\n    ,d_dt = "++ showDT m dt
204 |                                ++"\n}"
205 | -}
206 | 
207 | 
208 | 
209 | -- pick QTrans can be told the unique source and knows all the
210 | -- destinations (hmm...along with qt_win)!  So if in ascending destination order the last source
211 | -- is free to mutatate the old state.  If the QTrans has only one
212 | -- entry then all we need to do is mutate that entry when making a
213 | -- transition.
214 | --
215 | pickQTrans :: Array Tag OP -> QTrans -> [({-Destination-}Index,(DoPa,Instructions))]
216 | pickQTrans op tr = mapSnd (bestTrans op) . IMap.toList $ tr
217 | 
218 | cleanWin :: WinTags -> Instructions
219 | cleanWin = toInstructions
220 | 
221 | bestTrans :: Array Tag OP -> [TagCommand] -> (DoPa,Instructions)
222 | bestTrans _ [] = err "bestTrans : There were no transition choose from!"
223 | bestTrans aTagOP (f:fs) | null fs = canonical f
224 |                         | otherwise = answer -- if null toDisplay then answer else trace toDisplay answer
225 |  where
226 |   answer = foldl' pick (canonical f) fs
227 |   {- toDisplay | null fs = ""
228 |                | otherwise = unlines $ "bestTrans" : show (answer) : "from among" : concatMap (\x -> [show x, show (toInstructions (snd x))]) (f:fs) -}
229 |   canonical :: TagCommand -> (DoPa,Instructions)
230 |   canonical (dopa,spec) = (dopa, toInstructions spec)
231 |   pick :: (DoPa,Instructions) -> TagCommand -> (DoPa,Instructions)
232 |   pick win@(dopa1,winI) (dopa2,spec) =
233 |     let nextI = toInstructions spec
234 | --    in case compareWith choose winPos nextPos of -- XXX 2009: add in enterOrbit information
235 |     in case compareWith choose (toListing winI) (toListing nextI) of
236 |          GT -> win
237 |          LT -> (dopa2,nextI)
238 |          EQ -> if dopa1 >= dopa2 then win else (dopa2,nextI) -- no deep reason not to just pick win
239 | 
240 |   toListing :: Instructions -> [(Tag,Action)]
241 |   toListing (Instructions {newPos = nextPos}) = filter notReset nextPos
242 |     where notReset (_,SetVal (-1)) = False
243 |           notReset _ = True
244 | {-
245 |   toListing (Instructions {newPos = nextPos}) = mergeTagOrbit nextPos (filter snd nextFlags)
246 | 
247 |   mergeTagOrbit xx [] = xx
248 |   mergeTagOrbit [] yy = yy
249 |   mergeTagOrbit xx@(x:xs) yy@(y:ys) =
250 |     case compare (fst x) (fst y) of
251 |       GT -> y : mergeTagOrbit xx ys
252 |       LT -> x : mergeTagOrbit xs yy
253 |       EQ -> x : mergeTagOrbit xs ys -- keep tag setting over orbit setting.
254 | -}
255 | 
256 |   {-# INLINE choose #-}
257 |   choose :: Maybe (Tag,Action) -> Maybe (Tag,Action) -> Ordering
258 |   choose Nothing Nothing = EQ
259 |   choose Nothing x = flipOrder (choose x Nothing)
260 |   choose (Just (tag,_post)) Nothing =
261 |     case aTagOP!tag of
262 |       Maximize -> GT
263 |       Minimize -> LT -- needed to choose best path inside nested * operators,
264 |                     -- this needs a leading Minimize tag inside at least the parent * operator
265 |       Ignore -> GT -- XXX this is a guess in analogy with Maximize for the end bit of a group
266 |       Orbit -> LT -- trace ("choose LT! Just "++show tag++" < Nothing") LT -- 2009 XXX : comment out next line and use the Orbit instead
267 | --      Orbit -> err $ "bestTrans.choose : Very Unexpected Orbit in Just Nothing: "++show (tag,post,aTagOP,f:fs)
268 |   choose (Just (tag,post1)) (Just (_,post2)) =
269 |     case aTagOP!tag of
270 |       Maximize -> order
271 |       Minimize -> flipOrder order
272 |       Ignore -> EQ
273 |       Orbit -> EQ
274 | --      Orbit -> err $ "bestTrans.choose : Very Unexpected Orbit in Just Just: "++show (tag,(post1,post2),aTagOP,f:fs)
275 |    where order = case (post1,post2) of
276 |                    (SetPre,SetPre) -> EQ
277 |                    (SetPost,SetPost) -> EQ
278 |                    (SetPre,SetPost) -> LT
279 |                    (SetPost,SetPre) -> GT
280 |                    (SetVal v1,SetVal v2) -> compare v1 v2
281 |                    _ -> err $ "bestTrans.compareWith.choose sees incomparable "++show (tag,post1,post2)
282 | 
283 | 
284 |   {-# INLINE compareWith #-}
285 |   compareWith :: (Ord x,Monoid a) => (Maybe (x,b) -> Maybe (x,c) -> a) -> [(x,b)] -> [(x,c)] -> a
286 |   compareWith comp = cw where
287 |     cw [] [] = comp Nothing Nothing
288 |     cw xx@(x:xs) yy@(y:ys) =
289 |       case compare (fst x) (fst y) of
290 |         GT -> comp Nothing  (Just y) `mappend` cw xx ys
291 |         EQ -> comp (Just x) (Just y) `mappend` cw xs ys
292 |         LT -> comp (Just x) Nothing  `mappend` cw xs yy
293 |     cw xx [] = foldr (\x rest -> comp (Just x) Nothing  `mappend` rest) mempty xx
294 |     cw [] yy = foldr (\y rest -> comp Nothing  (Just y) `mappend` rest) mempty yy
295 | 
296 | 
297 | isDFAFrontAnchored :: DFA -> Bool
298 | isDFAFrontAnchored = isDTFrontAnchored . d_dt
299 |  where
300 |   isDTFrontAnchored :: DT -> Bool
301 |   isDTFrontAnchored (Simple' {}) = False
302 |   isDTFrontAnchored (Testing' {dt_test=wt,dt_a=a,dt_b=b}) | wt == Test_BOL = isDTLosing b
303 |                                                           | otherwise = isDTFrontAnchored a && isDTFrontAnchored b
304 |    where
305 |     -- can DT never win or accept a character (when following trans_single)?
306 |     isDTLosing :: DT -> Bool
307 |     isDTLosing (Testing' {dt_a=a',dt_b=b'}) = isDTLosing a' && isDTLosing b'
308 |     isDTLosing (Simple' {dt_win=w}) | not (IMap.null w) = False -- can win with 0 characters
309 |     isDTLosing (Simple' {dt_trans=CharMap mt,dt_other=o}) =
310 |       let ts = o : IMap.elems mt
311 |       in all transLoses ts
312 |      where
313 |       transLoses :: Transition -> Bool
314 |       transLoses (Transition {trans_single=dfa,trans_how=dtrans}) = isDTLose dfa || onlySpawns dtrans
315 |        where
316 |         isDTLose :: DFA -> Bool
317 |         isDTLose dfa' = ISet.null (d_id dfa')
318 |         onlySpawns :: DTrans -> Bool
319 |         onlySpawns t = case IMap.elems t of
320 |                          [m] -> IMap.null m
321 |                          _ -> False
322 | 
323 | {- toInstructions -}
324 | 
325 | toInstructions :: TagList -> Instructions
326 | toInstructions spec =
327 |   let (p,o) = execState (assemble spec) (mempty,mempty)
328 |   in Instructions { newPos = IMap.toList p
329 |                   , newOrbits = if IMap.null o then Nothing
330 |                                   else Just $ alterOrbits (IMap.toList o)
331 |                   }
332 | 
333 | type CompileInstructions a = State
334 |   ( IntMap Action -- 2009: change to SetPre | SetPost enum
335 |   , IntMap AlterOrbit
336 |   ) a
337 | 
338 | data AlterOrbit = AlterReset                        -- removing the Orbits record from the OrbitLog
339 |                 | AlterLeave                        -- set inOrbit to False
340 |                 | AlterModify { newInOrbit :: Bool   -- set inOrbit to the newInOrbit value
341 |                               , freshOrbit :: Bool}  -- freshOrbit of True means to set getOrbits to mempty
342 |                   deriving (Show)                   -- freshOrbit of False means try appending position or else Seq.empty
343 | 
344 | assemble :: TagList -> CompileInstructions ()
345 | assemble = mapM_ oneInstruction where
346 |   oneInstruction (tag,command) =
347 |     case command of
348 |       PreUpdate TagTask -> setPreTag tag
349 |       PreUpdate ResetGroupStopTask -> resetGroupTag tag
350 |       PreUpdate SetGroupStopTask -> setGroupTag tag
351 |       PreUpdate ResetOrbitTask -> resetOrbit tag
352 |       PreUpdate EnterOrbitTask -> enterOrbit tag
353 |       PreUpdate LeaveOrbitTask -> leaveOrbit tag
354 |       PostUpdate TagTask -> setPostTag tag
355 |       PostUpdate ResetGroupStopTask -> resetGroupTag tag
356 |       PostUpdate SetGroupStopTask -> setGroupTag tag
357 |       _ -> err ("assemble : Weird orbit command: "++show (tag,command))
358 | 
359 | setPreTag :: Tag -> CompileInstructions ()
360 | setPreTag = modifyPos SetPre
361 | 
362 | setPostTag :: Tag -> CompileInstructions ()
363 | setPostTag = modifyPos SetPost
364 | 
365 | resetGroupTag :: Tag -> CompileInstructions ()
366 | resetGroupTag = modifyPos (SetVal (-1))
367 | 
368 | setGroupTag :: Tag -> CompileInstructions ()
369 | setGroupTag = modifyPos (SetVal 0)
370 | 
371 | -- The following is ten times more complicated than it ought to be.  Sorry, I was too new, and now
372 | -- too busy to clean this up.
373 | 
374 | resetOrbit :: Tag -> CompileInstructions ()
375 | resetOrbit tag = modifyPos (SetVal (-1)) tag >> modifyOrbit (IMap.insert tag AlterReset)
376 | 
377 | enterOrbit :: Tag -> CompileInstructions ()
378 | enterOrbit tag = modifyPos (SetVal 0) tag >> modifyOrbit changeOrbit where
379 |   changeOrbit = IMap.insertWith overwriteOrbit tag appendNewOrbit
380 | 
381 |   appendNewOrbit = AlterModify {newInOrbit = True, freshOrbit = False} -- try to append
382 |   startNewOrbit  = AlterModify {newInOrbit = True, freshOrbit = True}  -- will start a new series
383 | 
384 |   overwriteOrbit _ AlterReset = startNewOrbit
385 |   overwriteOrbit _ AlterLeave = startNewOrbit
386 |   overwriteOrbit _ (AlterModify {newInOrbit = False}) = startNewOrbit
387 |   overwriteOrbit _ (AlterModify {newInOrbit = True}) =
388 |     err $ "enterOrbit: Cannot enterOrbit twice in a row: " ++ show tag
389 | 
390 | leaveOrbit :: Tag -> CompileInstructions ()
391 | leaveOrbit tag = modifyOrbit escapeOrbit where
392 |   escapeOrbit = IMap.insertWith setInOrbitFalse tag AlterLeave where
393 |     setInOrbitFalse _ x@(AlterModify {}) = x {newInOrbit = False}
394 |     setInOrbitFalse _ x = x
395 | 
396 | modifyPos :: Action -> Tag -> CompileInstructions ()
397 | modifyPos todo tag = do
398 |   (a,c) <- get
399 |   let a' = IMap.insert tag todo a
400 |   seq a' $ put (a',c)
401 | 
402 | modifyOrbit :: (IntMap AlterOrbit -> IntMap AlterOrbit) -> CompileInstructions ()
403 | modifyOrbit f = do
404 |   (a,c) <- get
405 |   let c' = f c
406 |   seq c' $ put (a,c')
407 | 
408 | ----
409 | 
410 | alterOrbits :: [(Tag,AlterOrbit)] -> (Position -> OrbitTransformer)
411 | alterOrbits x = let items = map alterOrbit x
412 |                 in (\ pos m -> foldl (flip ($)) m (map ($ pos) items))
413 | 
414 | alterOrbit :: (Tag,AlterOrbit) -> (Position -> OrbitTransformer)
415 | 
416 | alterOrbit (tag,AlterModify {newInOrbit = inOrbit',freshOrbit = True}) =
417 |   (\ pos m -> IMap.insert tag (Orbits { inOrbit = inOrbit'
418 |                                      , basePos = pos
419 |                                      , ordinal = Nothing
420 |                                      , getOrbits = mempty}) m)
421 | 
422 | alterOrbit (tag,AlterModify {newInOrbit = inOrbit',freshOrbit = False}) =
423 |   (\ pos m -> IMap.insertWithKey (updateOrbit pos) tag (newOrbit pos) m) where
424 |   newOrbit pos = Orbits { inOrbit = inOrbit'
425 |                         , basePos = pos
426 |                         , ordinal = Nothing
427 |                         , getOrbits = Mon.mempty}
428 |   updateOrbit pos _tag new old | inOrbit old = old { inOrbit = inOrbit'
429 |                                                    , getOrbits = getOrbits old |> pos }
430 |                                | otherwise = new
431 | 
432 | alterOrbit (tag,AlterReset) = (\ _ m -> IMap.delete tag m)
433 | 
434 | alterOrbit (tag,AlterLeave) = (\ _ m -> case IMap.lookup tag m of
435 |                                          Nothing -> m
436 |                                          Just x -> IMap.insert tag (x {inOrbit=False}) m)
437 | 


--------------------------------------------------------------------------------
/lib/Text/Regex/TDFA/Text.hs:
--------------------------------------------------------------------------------
  1 | {-|
  2 | Module      :  Text.Regex.TDFA.Text
  3 | Copyright   :  Chris Kuklewicz 2007-2009, shelarcy 2012
  4 | License     :  BSD-style (see the file LICENSE)
  5 | 
  6 | This modules provides 'RegexMaker' and 'RegexLike' instances for using
  7 | 'Text' with the TDFA backend ("Text.Regex.TDFA.NewDFA.Engine" and
  8 | "Text.Regex.TDFA.NewDFA.Tester").
  9 | 
 10 | This exports instances of the high level API and the medium level
 11 | API of 'compile','execute', and 'regexec'.
 12 | 
 13 | @since 1.3.1
 14 | -}
 15 | module Text.Regex.TDFA.Text(
 16 |   Regex
 17 |  ,CompOption
 18 |  ,ExecOption
 19 |  ,compile
 20 |  ,execute
 21 |  ,regexec
 22 |  ) where
 23 | 
 24 | import Data.Array((!),elems)
 25 | import qualified Data.Text as T(Text,unpack)
 26 | 
 27 | import Text.Regex.Base(RegexLike(..),RegexMaker(..),Extract(..),MatchArray,RegexContext(..))
 28 | import Text.Regex.Base.Impl(polymatch,polymatchM)
 29 | import Text.Regex.TDFA.ReadRegex(parseRegex)
 30 | import Text.Regex.TDFA.String() -- piggyback on RegexMaker for String
 31 | import Text.Regex.TDFA.TDFA(patternToRegex)
 32 | import Text.Regex.TDFA.Common(Regex(..),CompOption,ExecOption(captureGroups),Position)
 33 | 
 34 | import Data.Maybe(listToMaybe)
 35 | import Text.Regex.TDFA.NewDFA.Uncons(Uncons)
 36 | import qualified Text.Regex.TDFA.NewDFA.Engine as Engine(execMatch)
 37 | import qualified Text.Regex.TDFA.NewDFA.Tester as Tester(matchTest)
 38 | 
 39 | -- | @since 1.3.1
 40 | instance RegexContext Regex T.Text T.Text where
 41 |   match = polymatch
 42 |   matchM = polymatchM
 43 | 
 44 | -- | @since 1.3.1
 45 | instance RegexMaker Regex CompOption ExecOption T.Text where
 46 |   makeRegexOptsM c e source = makeRegexOptsM c e (T.unpack source)
 47 | 
 48 | -- | @since 1.3.1
 49 | {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> T.Text -> [MatchArray] #-}
 50 | execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray]
 51 | execMatch = Engine.execMatch
 52 | 
 53 | -- | @since 1.3.1
 54 | {-# SPECIALIZE myMatchTest :: Regex -> T.Text -> Bool #-}
 55 | myMatchTest :: Uncons text => Regex -> text -> Bool
 56 | myMatchTest = Tester.matchTest
 57 | 
 58 | -- | @since 1.3.1
 59 | instance RegexLike Regex T.Text where
 60 |   matchOnce r s = listToMaybe (matchAll r s)
 61 |   matchAll r s = execMatch r 0 '\n' s
 62 |   matchCount r s = length (matchAll r' s)
 63 |     where r' = r { regex_execOptions = (regex_execOptions r) {captureGroups = False} }
 64 |   matchTest = myMatchTest
 65 |   matchOnceText regex source =
 66 |     fmap (\ma -> let (o,l) = ma!0
 67 |                  in (before o source
 68 |                     ,fmap (\ol -> (extract ol source,ol)) ma
 69 |                     ,after (o+l) source))
 70 |          (matchOnce regex source)
 71 |   matchAllText regex source =
 72 |     map (fmap (\ol -> (extract ol source,ol)))
 73 |         (matchAll regex source)
 74 | 
 75 | -- | @since 1.3.1
 76 | compile :: CompOption -- ^ Flags (summed together)
 77 |         -> ExecOption -- ^ Flags (summed together)
 78 |         -> T.Text -- ^ The regular expression to compile
 79 |         -> Either String Regex -- ^ Returns: the compiled regular expression
 80 | compile compOpt execOpt txt =
 81 |   case parseRegex (T.unpack txt) of
 82 |     Left err -> Left ("parseRegex for Text.Regex.TDFA.Text failed:"++show err)
 83 |     Right pattern -> Right (patternToRegex pattern compOpt execOpt)
 84 | 
 85 | -- | @since 1.3.1
 86 | execute :: Regex  -- ^ Compiled regular expression
 87 |         -> T.Text -- ^ Text to match against
 88 |         -> Either String (Maybe MatchArray)
 89 | execute r txt = Right (matchOnce r txt)
 90 | 
 91 | -- | @since 1.3.1
 92 | regexec :: Regex  -- ^ Compiled regular expression
 93 |         -> T.Text -- ^ Text to match against
 94 |         -> Either String (Maybe (T.Text, T.Text, T.Text, [T.Text]))
 95 | regexec r txt = Right $
 96 |   case matchOnceText r txt of
 97 |     Just (pre, mt, post) | main:rest <- map fst (elems mt)
 98 |       -> Just (pre, main, post, rest)
 99 |     _ -> Nothing
100 | 


--------------------------------------------------------------------------------
/lib/Text/Regex/TDFA/Text/Lazy.hs:
--------------------------------------------------------------------------------
  1 | {-|
  2 | Module      :  Text.Regex.TDFA.Text.Lazy
  3 | Copyright   :  Chris Kuklewicz 2007-2009, shelarcy 2012
  4 | License     :  BSD-style (see the file LICENSE)
  5 | 
  6 | This modules provides 'RegexMaker' and 'RegexLike' instances for using
  7 | 'Text' with the TDFA backend ("Text.Regex.TDFA.NewDFA.Engine" and
  8 | "Text.Regex.TDFA.NewDFA.Tester").
  9 | 
 10 | This exports instances of the high level API and the medium level
 11 | API of 'compile','execute', and 'regexec'.
 12 | 
 13 | @since 1.3.1
 14 | -}
 15 | module Text.Regex.TDFA.Text.Lazy(
 16 |   Regex
 17 |  ,CompOption
 18 |  ,ExecOption
 19 |  ,compile
 20 |  ,execute
 21 |  ,regexec
 22 |  ) where
 23 | 
 24 | import Data.Array.IArray(Array,(!),elems)
 25 | import qualified Data.Text.Lazy as L(Text,unpack)
 26 | 
 27 | import Text.Regex.Base(MatchArray,RegexContext(..),Extract(..),RegexMaker(..),RegexLike(..))
 28 | import Text.Regex.Base.Impl(polymatch,polymatchM)
 29 | import Text.Regex.TDFA.ReadRegex(parseRegex)
 30 | import Text.Regex.TDFA.String() -- piggyback on RegexMaker for String
 31 | import Text.Regex.TDFA.TDFA(patternToRegex)
 32 | import Text.Regex.TDFA.Common(Regex(..),CompOption,ExecOption(captureGroups),Position)
 33 | 
 34 | import Data.Maybe(listToMaybe)
 35 | import Text.Regex.TDFA.NewDFA.Uncons(Uncons)
 36 | import qualified Text.Regex.TDFA.NewDFA.Engine as Engine(execMatch)
 37 | import qualified Text.Regex.TDFA.NewDFA.Tester as Tester(matchTest)
 38 | 
 39 | -- | @since 1.3.1
 40 | instance RegexContext Regex L.Text L.Text where
 41 |   match = polymatch
 42 |   matchM = polymatchM
 43 | 
 44 | -- | @since 1.3.1
 45 | instance RegexMaker Regex CompOption ExecOption L.Text where
 46 |   makeRegexOptsM c e source = makeRegexOptsM c e (L.unpack source)
 47 | 
 48 | -- | @since 1.3.1
 49 | {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> L.Text -> [MatchArray] #-}
 50 | execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray]
 51 | execMatch = Engine.execMatch
 52 | 
 53 | -- | @since 1.3.1
 54 | {-# SPECIALIZE myMatchTest :: Regex -> L.Text -> Bool #-}
 55 | myMatchTest :: Uncons text => Regex -> text -> Bool
 56 | myMatchTest = Tester.matchTest
 57 | 
 58 | -- | @since 1.3.1
 59 | instance RegexLike Regex L.Text where
 60 |   matchOnce r s = listToMaybe (matchAll r s)
 61 |   matchAll r s = execMatch r 0 '\n' s
 62 |   matchCount r s = length (matchAll r' s)
 63 |     where r' = r { regex_execOptions = (regex_execOptions r) {captureGroups = False} }
 64 |   matchTest = myMatchTest
 65 |   matchOnceText regex source =
 66 |     fmap (\ ma ->
 67 |             let (o,l) = ma!0
 68 |             in (before o source
 69 |                ,fmap (\ ol -> (extract ol source,ol)) ma
 70 |                ,after (o+l) source))
 71 |          (matchOnce regex source)
 72 |   matchAllText regex source =
 73 |     let go :: Int -> L.Text -> [Array Int (Int, Int)] -> [Array Int (L.Text, (Int, Int))]
 74 |         go i _ _ | i `seq` False = undefined
 75 |         go _i _t [] = []
 76 |         go i t (x:xs) =
 77 |           let (off0,len0) = x!0
 78 |               trans pair@(off,len) = (extract (off-i,len) t,pair)
 79 |               t' = after (off0+(len0-i)) t
 80 |           in fmap trans x : seq t' (go (off0+len0) t' xs)
 81 |     in go 0 source (matchAll regex source)
 82 | 
 83 | -- | @since 1.3.1
 84 | compile :: CompOption -- ^ Flags (summed together)
 85 |         -> ExecOption -- ^ Flags (summed together)
 86 |         -> L.Text -- ^ The regular expression to compile
 87 |         -> Either String Regex -- ^ Returns: the compiled regular expression
 88 | compile compOpt execOpt txt =
 89 |   case parseRegex (L.unpack txt) of
 90 |     Left err -> Left ("parseRegex for Text.Regex.TDFA.Text.Lazy failed:"++show err)
 91 |     Right pattern -> Right (patternToRegex pattern compOpt execOpt)
 92 | 
 93 | -- | @since 1.3.1
 94 | execute :: Regex  -- ^ Compiled regular expression
 95 |         -> L.Text -- ^ Text to match against
 96 |         -> Either String (Maybe MatchArray)
 97 | execute r txt = Right (matchOnce r txt)
 98 | 
 99 | -- | @since 1.3.1
100 | regexec :: Regex  -- ^ Compiled regular expression
101 |         -> L.Text -- ^ Text to match against
102 |         -> Either String (Maybe (L.Text, L.Text, L.Text, [L.Text]))
103 | regexec r txt = Right $
104 |   case matchOnceText r txt of
105 |     Just (pre, mt, post) | main:rest <- map fst (elems mt)
106 |       -> Just (pre, main, post, rest)
107 |     _ -> Nothing
108 | 


--------------------------------------------------------------------------------
/lib/Text/Regex/TDFA/Wrap.hs:
--------------------------------------------------------------------------------
 1 | -- | "Text.Regex.TDFA.Wrap" provides the instance of RegexOptions and
 2 | -- the definition of (=~) and (=~~).  This is all re-exported by
 3 | -- "Text.Regex.TDFA".
 4 | 
 5 | module Text.Regex.TDFA.Wrap(Regex(..),CompOption(..),ExecOption(..),(=~),(=~~)) where
 6 | 
 7 | {- By Chris Kuklewicz, 2007-2009. BSD License, see the LICENSE file. -}
 8 | 
 9 | import Text.Regex.Base.RegexLike(RegexMaker(..),RegexContext(..))
10 | import Text.Regex.TDFA.Common(CompOption(..),ExecOption(..),Regex(..))
11 | 
12 | -- | This is the pure functional matching operator.  If the target
13 | -- cannot be produced then some empty result will be returned.  If
14 | -- there is an error in processing, then 'error' will be called.
15 | (=~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target)
16 |      => source1 -> source -> target
17 | (=~) x r = let make :: RegexMaker Regex CompOption ExecOption a => a -> Regex
18 |                make = makeRegex
19 |            in match (make r) x
20 | 
21 | -- | This is the monadic matching operator.  If a single match fails,
22 | -- then 'fail' will be called.
23 | (=~~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target,Monad m)
24 |       => source1 -> source -> m target
25 | (=~~) x r = do let make :: (RegexMaker Regex CompOption ExecOption a, Monad m) => a -> m Regex
26 |                    make = makeRegexM
27 |                q <- make r
28 |                matchM q x
29 | 


--------------------------------------------------------------------------------
/regex-tdfa.cabal:
--------------------------------------------------------------------------------
  1 | cabal-version:          1.24
  2 | name:                   regex-tdfa
  3 | version:                1.3.2.4
  4 | 
  5 | build-Type:             Simple
  6 | license:                BSD3
  7 | license-file:           LICENSE
  8 | copyright:              Copyright (c) 2007-2009, Christopher Kuklewicz
  9 | author:                 Christopher Kuklewicz
 10 | maintainer:             Andreas Abel
 11 | homepage:               https://wiki.haskell.org/Regular_expressions
 12 | bug-reports:            https://github.com/haskell-hvr/regex-tdfa/issues
 13 | 
 14 | category:               Text
 15 | synopsis:               Pure Haskell Tagged DFA Backend for "Text.Regex" (regex-base)
 16 | description:
 17 |   This package provides a pure Haskell \"Tagged\" DFA regex engine for <//hackage.haskell.org/package/regex-base regex-base>. This implementation was inspired by the algorithm (and Master's thesis) behind the regular expression library known as <https://github.com/laurikari/tre/ TRE or libtre>.
 18 |   .
 19 |   Please consult the "Text.Regex.TDFA" module for API documentation including a tutorial with usage examples;
 20 |   see also <https://wiki.haskell.org/Regular_expressions> for general information about regular expression support in Haskell.
 21 | 
 22 | extra-doc-files:
 23 |   CHANGELOG.md
 24 |   README.md
 25 | 
 26 | extra-source-files:
 27 |   test/cases/*.txt
 28 | 
 29 | tested-with:
 30 |   GHC == 9.12.2
 31 |   GHC == 9.10.2
 32 |   GHC == 9.8.4
 33 |   GHC == 9.6.7
 34 |   GHC == 9.4.8
 35 |   GHC == 9.2.8
 36 |   GHC == 9.0.2
 37 |   GHC == 8.10.7
 38 |   GHC == 8.8.4
 39 |   GHC == 8.6.5
 40 |   GHC == 8.4.4
 41 |   GHC == 8.2.2
 42 |   GHC == 8.0.2
 43 | 
 44 | source-repository head
 45 |   type:                git
 46 |   location:            https://github.com/haskell-hvr/regex-tdfa.git
 47 | 
 48 | source-repository this
 49 |   type:                git
 50 |   location:            https://github.com/haskell-hvr/regex-tdfa.git
 51 |   tag:                 v1.3.2.4
 52 | 
 53 | flag force-O2
 54 |   default: False
 55 |   manual: True
 56 |   description:
 57 |     Force building @regex-tdfa@ with \"@ghc-options: -O2@\".
 58 |     .
 59 |     __NOTE__: This flag is mostly provided for legacy use-cases. Nowadays you can conveniently control optimization levels on a per-package granularity via @cabal.project@ files; see <https://cabal.readthedocs.io/en/latest/nix-local-build.html#configuring-builds-with-cabal-project cabal's user-guide> for more details.
 60 | 
 61 | flag doctest
 62 |   default: True
 63 |   manual: False
 64 |   description:
 65 |     Include test-suite doctest.
 66 | 
 67 | library
 68 |   hs-source-dirs:       lib
 69 | 
 70 |   exposed-modules:      Data.IntMap.CharMap2
 71 |                         Data.IntMap.EnumMap2
 72 |                         Data.IntSet.EnumSet2
 73 |                         Text.Regex.TDFA
 74 |                         Text.Regex.TDFA.ByteString
 75 |                         Text.Regex.TDFA.ByteString.Lazy
 76 |                         Text.Regex.TDFA.Common
 77 |                         Text.Regex.TDFA.CorePattern
 78 |                         Text.Regex.TDFA.IntArrTrieSet
 79 |                         Text.Regex.TDFA.NewDFA.Engine
 80 |                         Text.Regex.TDFA.NewDFA.Engine_FA
 81 |                         Text.Regex.TDFA.NewDFA.Engine_NC
 82 |                         Text.Regex.TDFA.NewDFA.Engine_NC_FA
 83 |                         Text.Regex.TDFA.NewDFA.Tester
 84 |                         Text.Regex.TDFA.NewDFA.Uncons
 85 |                         Text.Regex.TDFA.NewDFA.MakeTest
 86 |                         Text.Regex.TDFA.Pattern
 87 |                         Text.Regex.TDFA.ReadRegex
 88 |                         Text.Regex.TDFA.Sequence
 89 |                         Text.Regex.TDFA.String
 90 |                         Text.Regex.TDFA.TDFA
 91 |                         Text.Regex.TDFA.TNFA
 92 |                         Text.Regex.TDFA.Text
 93 |                         Text.Regex.TDFA.Text.Lazy
 94 | 
 95 |   other-modules:        Paths_regex_tdfa
 96 | 
 97 |   build-depends:        array              >= 0.5    && < 0.6
 98 |                       , base               >= 4.9    && < 5
 99 |                       , bytestring         >= 0.10   && < 0.13
100 |                       , containers         >= 0.5    && < 1
101 |                       , mtl                >= 2.1.3  && < 2.4
102 |                       , parsec             == 3.1.*
103 |                       , regex-base         == 0.94.*
104 |                       , text               >= 1.2.3  && < 2.2
105 | 
106 |   default-language:     Haskell2010
107 |   default-extensions:   BangPatterns
108 |                         ExistentialQuantification
109 |                         FlexibleContexts
110 |                         FlexibleInstances
111 |                         ForeignFunctionInterface
112 |                         FunctionalDependencies
113 |                         MagicHash
114 |                         MultiParamTypeClasses
115 |                         NondecreasingIndentation
116 |                         RecursiveDo
117 |                         ScopedTypeVariables
118 |                         TypeOperators
119 |                         TypeSynonymInstances
120 |                         UnboxedTuples
121 |                         UnliftedFFITypes
122 |   other-extensions:     CPP
123 | 
124 |   ghc-options:
125 |       -funbox-strict-fields
126 |       -fspec-constr-count=10
127 |       -Wall
128 |       -Wno-orphans
129 |       -Wcompat
130 | 
131 |   if flag(force-O2)
132 |     ghc-options:
133 |       -O2
134 | 
135 | 
136 | test-suite regex-tdfa-unittest
137 |   type:                 exitcode-stdio-1.0
138 | 
139 |   hs-source-dirs:       test
140 |   main-is:              Main.hs
141 | 
142 |   -- intra-package dependency
143 |   build-depends:        regex-tdfa
144 | 
145 |   -- dependencies whose version constraints are inherited via intra-package 'regex-tdfa' dependency
146 |   build-depends:        array
147 |                       , base
148 |                       , bytestring
149 |                       , containers
150 |                       , filepath
151 |                       , mtl
152 |                       , regex-base
153 |                       , text
154 | 
155 |   -- component-specific dependencies not inherited via 'regex-tdfa'
156 |                       , directory          >= 1.1.0  && < 1.4
157 |                       , filepath           >= 1.3.0  && < 1.6
158 |                       , utf8-string        >= 1.0.1  && < 1.1
159 | 
160 |   default-language:     Haskell2010
161 |   default-extensions:   FlexibleInstances
162 |                         FlexibleContexts
163 |                         Rank2Types
164 |   other-extensions:     GeneralizedNewtypeDeriving
165 | 
166 |   ghc-options:          -Wall -funbox-strict-fields
167 | 
168 |   if impl(ghc >= 8.0)
169 |     ghc-options:        -Wcompat
170 | 
171 |   if flag(force-O2)
172 |     ghc-options:        -O2
173 | 
174 | test-suite doctest
175 |   type:           exitcode-stdio-1.0
176 |   hs-source-dirs: test
177 |   main-is:        DocTestMain.hs
178 | 
179 |   build-depends:
180 |       base
181 |     , regex-tdfa
182 |     , doctest-parallel >= 0.2.2
183 |         -- doctest-parallel-0.2.2 is the first to filter out autogen-modules
184 | 
185 |   default-language:     Haskell2010
186 | 
187 |   if !flag(doctest)
188 |     buildable: False
189 | 


--------------------------------------------------------------------------------
/test/DocTestMain.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import System.Environment
 4 |          ( getArgs )
 5 | import Test.DocTest
 6 |          ( mainFromLibrary )
 7 | import Test.DocTest.Helpers
 8 |          ( extractSpecificCabalLibrary, findCabalPackage )
 9 | 
10 | main :: IO ()
11 | main = do
12 |   args <- getArgs
13 |   pkg  <- findCabalPackage "regex-tdfa"
14 |   -- Need to give the library name, otherwise the parser does not find it.
15 |   lib  <- extractSpecificCabalLibrary Nothing pkg
16 |   mainFromLibrary lib args
17 | 


--------------------------------------------------------------------------------
/test/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2009, Christopher Edward Kuklewicz
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 5 | 
 6 |     * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 7 |     * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 8 |     * Neither the name of the copyright holder nor the names of the contributors may be used to endorse or promote products derived from this software without specific prior written permission.
 9 | 
10 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
11 | 


--------------------------------------------------------------------------------
/test/Main.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP                        #-}
  2 | {-# LANGUAGE GeneralizedNewtypeDeriving #-}
  3 | {-# LANGUAGE MonoLocalBinds             #-}
  4 | 
  5 | #if __GLASGOW_HASKELL__ >= 902
  6 | {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
  7 | #endif
  8 | 
  9 | module Main where
 10 | 
 11 | import           Control.Applicative  as App
 12 | import           Control.Monad
 13 | import qualified Control.Monad.Fail   as Fail
 14 | import           Data.Array
 15 | import qualified Data.ByteString      as BS
 16 | import qualified Data.ByteString.UTF8 as UTF8
 17 | import           Data.List            (isInfixOf, mapAccumL, sort)
 18 | import           Data.String
 19 | import           Data.Typeable
 20 | import           Data.Version         ()
 21 | import           System.Directory     (getDirectoryContents)
 22 | import           System.Environment
 23 | import           System.Exit
 24 | import           System.FilePath      ((</>))
 25 | import           Text.Regex.Base
 26 | 
 27 | import qualified Text.Regex.TDFA      as TDFA
 28 | 
 29 | default(Int)
 30 | 
 31 | type RSource = String
 32 | type RType = String -- can be changed to any Extract instance
 33 | newtype RegexSource = RegexSource {unSource :: RSource} deriving Show
 34 | newtype RegexStringOf a = RegexString {unString :: a} deriving Show
 35 | type RegexString = RegexStringOf RType
 36 | 
 37 | dictionary :: [Char]
 38 | dictionary = ['a'..'c']++['A'..'C']++"_"
 39 | 
 40 | 
 41 | type A = Array Int (Int,Int)
 42 | 
 43 | maxItems :: Int
 44 | maxItems=100
 45 | 
 46 | testOne :: t -> (t -> t1 -> Array Int (Int, Int)) -> t1 -> String
 47 | testOne s op r =
 48 |   let foo ::  String
 49 |       foo = concatMap (\(o,l) -> show (o,(o+l))) (take maxItems $ elems (op s r :: Array Int (Int,Int)))
 50 |   in if null foo then "NOMATCH" else foo
 51 | 
 52 | testOne' :: A -> String
 53 | testOne' input =
 54 |   let foo ::  String
 55 |       foo = concatMap (\(o,l) -> show (o,(o+l))) (take maxItems $ elems input)
 56 |   in if null foo then "NOMATCH" else foo
 57 | 
 58 | toTest :: String -> (Int,String,String,String)
 59 | toTest line = let [n,regex,input,output] = words line
 60 |                   noQ []       = []
 61 |                   noQ ('?':xs) = '-':'1':noQ xs
 62 |                   noQ (x:xs)   = x:noQ xs
 63 |                   input' = if input == "NULL" then "" else unN input
 64 |               in (read n,regex,input',noQ output)
 65 | 
 66 | toTest' :: String -> String -> (String,(Int,String,String,String))
 67 | toTest' oldRegex line =
 68 |   let [n,regex,input,output] = words line
 69 |       noQ []       = []
 70 |       noQ ('?':xs) = '-':'1':noQ xs
 71 |       noQ (x:xs)   = x:noQ xs
 72 |       input' = if input == "NULL" then "" else input
 73 |       regex' = if regex == "SAME" then oldRegex else regex
 74 |   in (regex',(read n,regex',input',noQ output))
 75 | 
 76 | load,load' :: String -> [(Int, String, String, String)]
 77 | load = map toTest . lines
 78 | load' = snd . mapAccumL toTest' "X_X_X_" . lines
 79 | 
 80 | checkTest :: PFT A -> (Int,String,String,String) -> IO [Int]
 81 | checkTest opM (n,regex,input,output) = do
 82 |   let Result output'e = opM input regex
 83 |       p = putStrLn
 84 |   p ""
 85 |   case output'e of
 86 |     Left msg -> do
 87 |       p ("############################# Unexpected Error # "++show n ++ " #############################" )
 88 |       p ("Searched text: "++show input)
 89 |       p ("Regex pattern: "++show regex)
 90 |       p ("Expected output: "++show output)
 91 |       p ("Error message: "++msg)
 92 |       return [n]
 93 |     Right output'a -> do
 94 |       let output' = testOne' output'a
 95 |       case (n<0 , output==output') of
 96 |         (False,True) -> p ("Expected Pass #"++show n)
 97 |         (False,False) -> p ("############################# Unexpected Fail # "++show n ++ " #############################" )
 98 |         (True,True) -> p ("############################# Unexpected Pass # "++show n ++ " #############################" )
 99 |         (True,False) ->  p ("Expected Fail #"++show n)
100 |       if (output == output')
101 |         then do p ("text and pattern: "++show input)
102 |                 p ("Regex pattern: "++show regex)
103 |                 p ("Outputs agree: "++show output)
104 |                 return (if n<0 then [n] else [])
105 |         else do p ""
106 |                 p ("Searched text: "++show input)
107 |                 p ("Regex pattern: "++show regex)
108 |                 p ("Expected output: "++show output)
109 |                 p ("Actual result  : "++show output')
110 |                 return (if n<0 then [] else [n])
111 | 
112 | checkFile :: (RType -> RSource -> Result A) -> (FilePath, String) -> IO (FilePath,[Int])
113 | checkFile opM (filepath, contents) = do
114 |   putStrLn $ "\nUsing Tests from: "++filepath
115 |   vals <- liftM concat (mapM (checkTest opM) (load' contents))
116 |   return (filepath,vals)
117 | 
118 | checkTests :: (RType -> RSource -> Result A) -> [(FilePath,String)] -> IO [(String, [Int])]
119 | checkTests opM testCases = mapM (checkFile opM) testCases
120 | 
121 | readTestCases :: FilePath -> IO [(String, String)]
122 | readTestCases folder = do
123 |   fns <- filter (".txt" `isInfixOf`) <$> getDirectoryContents folder
124 |   when (null fns) $
125 |     fail ("readTestCases: No test-cases found in " ++ show folder)
126 |   forM (sort fns) $ \fn -> do
127 |     bs <- BS.readFile (folder </> fn)
128 |     return (fn, UTF8.toString bs)
129 | 
130 | newtype Result a = Result (Either String a)
131 |   deriving (Eq, Show, Functor, App.Applicative, Monad)
132 | 
133 | instance Fail.MonadFail Result where
134 |   fail = Result . Left
135 | 
136 | type PFT a = RegexContext TDFA.Regex RType a => RType -> RSource -> Result a
137 | 
138 | posix :: PFT a
139 | posix x reg =
140 |   let q :: Result TDFA.Regex
141 |       q = makeRegexOptsM (defaultCompOpt { TDFA.caseSensitive = False}) defaultExecOpt reg
142 |   in q >>= \ s -> return (match s x)
143 | 
144 | unN :: String -> String
145 | unN ('\\':'n':xs) = '\n':unN xs
146 | unN (x:xs)        = x:unN xs
147 | unN []            = []
148 | 
149 | manual :: [String] -> IO ()
150 | manual [sIn,rIn] = do
151 |   let s :: RType
152 |       r :: String
153 |       s = fromString (unN sIn)
154 |       r = (unN rIn)
155 |   -- first match
156 |   let r1 :: TDFA.Regex
157 |       r1 = makeRegex r
158 |   let b1u@(_,_b1s,_,_)=(match r1 s :: (RType,RType,RType,[RType]))
159 |   putStrLn ("Searched text: "++show s)
160 |   putStrLn ("Regex pattern: "++show r)
161 |   print b1u
162 |   -- multiple matches and counting
163 |   let b1 = (match r1 s :: [MatchArray])
164 |       c1 = (match r1 s :: Int)
165 |   putStrLn $ "Count of matches = "++show c1
166 |   putStrLn $ "Matches found = "++show (length b1)
167 |   mapM_ (putStrLn . testOne') b1
168 | manual _ = error "wrong arguments to regex-posix-unittest's manual function"
169 | 
170 | main :: IO ()
171 | main = do
172 |   putStr "Testing Text.Regex.TDFA version: "
173 |   print TDFA.getVersion_Text_Regex_TDFA
174 |   a <- getArgs
175 |   if length a == 2
176 |     then manual a
177 |     else do
178 |       putStrLn $ "Explanation and discussion of these tests on the wiki at http://www.haskell.org/haskellwiki/Regex_Posix including comparing results from different operating systems"
179 |       putStrLn $ "Questions about this package to the author at email <TextRegexLazy@personal.mightyreason.com>"
180 |       putStrLn $ "The type of both the pattern and test is " ++ show (typeOf (undefined :: RType))
181 |       putStrLn $ "Without exactly two arguments:"
182 |       putStrLn $ "    This program runs all test files listed in test/data-dir/test-manifest.txt"
183 |       putStrLn $ "    Lines with negative number are expected to fail, others are expected to pass."
184 |       putStrLn $ "With exactly two arguments:"
185 |       putStrLn $ "    The first argument is the text to be searched."
186 |       putStrLn $ "    The second argument is the regular expression pattern to search with."
187 |       vals <- checkTests posix =<< readTestCases ("test" </> "cases")
188 |       if null (concatMap snd vals)
189 |         then putStrLn "\nWow, all the tests passed!"
190 |         else do
191 |           putStrLn $ "\nBoo, tests failed!\n"++unlines (map show vals)
192 |           exitFailure
193 | 
194 | {-
195 | -- for TRE
196 | posix x r = let q :: Posix.Regex
197 |                 q = makeRegexOpts (defaultCompOpt .|. Posix.compRightAssoc .|. Posix.compIgnoreCase) defaultExecOpt r
198 |             in match q x
199 | 
200 | tdfa x r = let q :: TDFA.Wrap.Regex
201 |                q = makeRegexOpts (defaultCompOpt { TDFA.Wrap.caseSensitive = False
202 |                                                  , TDFA.Wrap.rightAssoc = True }) defaultExecOpt r
203 |            in match q x
204 | 
205 | tdfa2 x r = let q :: TDFA2.Wrap.Regex
206 |                 q = makeRegexOpts (defaultCompOpt { TDFA2.Wrap.caseSensitive = False
207 |                                                   , TDFA2.Wrap.rightAssoc = True }) defaultExecOpt r
208 |             in match q x
209 | -}
210 | 


--------------------------------------------------------------------------------
/test/cases/basic3.txt:
--------------------------------------------------------------------------------
  1 |      1		\)		()	(1,2)
  2 |      2		\}		}	(0,1)
  3 |      3		]		]	(0,1)
  4 |      4		$^		NULL	(0,0)
  5 |      5		a($)		aa	(1,2)(2,2)
  6 |      6		a*(^a)		aa	(0,1)(0,1)
  7 |      7		(..)*(...)*		a	(0,0)(?,?)(?,?)
  8 |      8		(..)*(...)*		abcd	(0,4)(2,4)(?,?)
  9 |      9		(ab|a)(bc|c)		abc	(0,3)(0,2)(2,3)
 10 |     10		(ab)c|abc		abc	(0,3)(0,2)
 11 |     11		a{0}b		ab			(1,2)
 12 |     12		(a*)(b?)(b+)b{3}	aaabbbbbbb	(0,10)(0,3)(3,4)(4,7)
 13 |     13		(a*)(b{0,1})(b{1,})b{3}	aaabbbbbbb	(0,10)(0,3)(3,4)(4,7)
 14 |     15		((a|a)|a)			a	(0,1)(0,1)(0,1)
 15 |     16		(a*)(a|aa)			aaaa	(0,4)(0,3)(3,4)
 16 |     17		a*(a.|aa)			aaaa	(0,4)(2,4)
 17 |     18		a(b)|c(d)|a(e)f			aef	(0,3)(?,?)(?,?)(1,2)
 18 |     19		(a|b)?.*			b	(0,1)(0,1)
 19 |     20		(a|b)c|a(b|c)			ac	(0,2)(0,1)(?,?)
 20 |     21		(a|b)c|a(b|c)			ab	(0,2)(?,?)(1,2)
 21 |     22		(a|b)*c|(a|ab)*c		abc	(0,3)(1,2)(?,?)
 22 |     23		(a|b)*c|(a|ab)*c		xc	(1,2)(?,?)(?,?)
 23 |     24		(.a|.b).*|.*(.a|.b)		xa	(0,2)(0,2)(?,?)
 24 |     25		a?(ab|ba)ab			abab	(0,4)(0,2)
 25 |     26		a?(ac{0}b|ba)ab			abab	(0,4)(0,2)
 26 |     27		ab|abab				abbabab	(0,2)
 27 |     28		aba|bab|bba			baaabbbaba	(5,8)
 28 |     29		aba|bab				baaabbbaba	(6,9)
 29 |     30		(aa|aaa)*|(a|aaaaa)		aa	(0,2)(0,2)(?,?)
 30 |     31		(a.|.a.)*|(a|.a...)		aa	(0,2)(0,2)(?,?)
 31 |     32		ab|a				xabc	(1,3)
 32 |     33		ab|a				xxabc	(2,4)
 33 |     34		(Ab|cD)*			aBcD	(0,4)(2,4)
 34 |     35		:::1:::0:|:::1:1:0:	:::0:::1:::1:::0:	(8,17)
 35 |     36		:::1:::0:|:::1:1:1:	:::0:::1:::1:::0:	(8,17)
 36 |     37		[[:lower:]]+		`az{		(1,3)
 37 |     38		[[:upper:]]+		@AZ[		(1,3)
 38 |     39		(a)(b)(c)	abc	(0,3)(0,1)(1,2)(2,3)
 39 |     43  	((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))	x	(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)
 40 |     44  	((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*	xx	(0,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)(1,2)
 41 |     45		a?(ab|ba)*	ababababababababababababababababababababababababababababababababababababababababa	(0,81)(79,81)
 42 |     46		abaa|abbaa|abbbaa|abbbbaa	ababbabbbabbbabbbbabbbbaa	(18,25)
 43 |     47		abaa|abbaa|abbbaa|abbbbaa	ababbabbbabbbabbbbabaa	(18,22)
 44 |     48		aaac|aabc|abac|abbc|baac|babc|bbac|bbbc	baaabbbabac	(7,11)
 45 |     49		aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll		XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa	(53,57)
 46 |     50		a*a*a*a*a*b		aaaaaaaaab	(0,10)
 47 |     51		ab+bc			abbc		(0,4)
 48 |     52		ab+bc			abbbbc		(0,6)
 49 |     53		ab?bc			abbc		(0,4)
 50 |     54		ab?bc			abc		(0,3)
 51 |     55		ab?c			abc		(0,3)
 52 |     56		ab|cd			abc		(0,2)
 53 |     57		ab|cd			abcd		(0,2)
 54 |     58		a\(b			a(b		(0,3)
 55 |     59		a\(*b			ab		(0,2)
 56 |     60		a\(*b			a((b		(0,4)
 57 |     61		((a))			abc		(0,1)(0,1)(0,1)
 58 |     62		(a)b(c)			abc		(0,3)(0,1)(2,3)
 59 |     63		a+b+c			aabbabc		(4,7)
 60 |     64		a*			aaa		(0,3)
 61 |     65		(a*)*			-		(0,0)(0,0)
 62 |     66		(a*)+			-		(0,0)(0,0)
 63 |     67		(a*|b)*			-		(0,0)(0,0)
 64 |     68		(a+|b)*			ab		(0,2)(1,2)
 65 |     69		(a+|b)+			ab		(0,2)(1,2)
 66 |     70		(a+|b)?			ab		(0,1)(0,1)
 67 |     71		(^)*			-		(0,0)(0,0)
 68 |     72		([abc])*d		abbbcd		(0,6)(4,5)
 69 |     73		([abc])*bcd		abcd		(0,4)(0,1)
 70 |     74		a|b|c|d|e		e		(0,1)
 71 |     75		(a|b|c|d|e)f		ef		(0,2)(0,1)
 72 |     76		((a*|b))*		-		(0,0)(0,0)(0,0)
 73 |     77		(ab|cd)e		abcde		(2,5)(2,4)
 74 |     78		(a|b)c*d		abcd		(1,4)(1,2)
 75 |     79		(ab|ab*)bc		abc		(0,3)(0,1)
 76 |     80		a([bc]*)c*		abc		(0,3)(1,3)
 77 |     81		a([bc]*)(c*d)		abcd		(0,4)(1,3)(3,4)
 78 |     82		a([bc]+)(c*d)		abcd		(0,4)(1,3)(3,4)
 79 |     83		a([bc]*)(c+d)		abcd		(0,4)(1,2)(2,4)
 80 |     84		a[bcd]*dcdcde		adcdcde		(0,7)
 81 |     85		(ab|a)b*c		abc		(0,3)(0,2)
 82 |     86		((a)(b)c)(d)		abcd		(0,4)(0,3)(0,1)(1,2)(3,4)
 83 |     87		^a(bc+|b[eh])g|.h$	abh		(1,3)(?,?)
 84 |     88		(bc+d$|ef*g.|h?i(j|k))	effgz		(0,5)(0,5)(?,?)
 85 |     89		(bc+d$|ef*g.|h?i(j|k))	ij		(0,2)(0,2)(1,2)
 86 |     90		(bc+d$|ef*g.|h?i(j|k))	reffgz		(1,6)(1,6)(?,?)
 87 |     91		(((((((((a)))))))))	a		(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)
 88 |     92		(.*)c(.*)		abcde		(0,5)(0,2)(3,5)
 89 |     93		a(bc)d			abcd		(0,4)(1,3)
 90 |     94		a[-]?c		ac		(0,3)
 91 |     95		M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar_Qaddafi	(0,15)(?,?)(10,12)
 92 |     96		M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mo'ammar_Gadhafi	(0,16)(?,?)(11,13)
 93 |     97		M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar_Kaddafi	(0,15)(?,?)(10,12)
 94 |     98		M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar_Qadhafi	(0,15)(?,?)(10,12)
 95 |     99		M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar_Gadafi	(0,14)(?,?)(10,11)
 96 |    100		M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mu'ammar_Qadafi	(0,15)(?,?)(11,12)
 97 |    101		M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moamar_Gaddafi	(0,14)(?,?)(9,11)
 98 |    102		M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mu'ammar_Qadhdhafi	(0,18)(?,?)(13,15)
 99 |    103		M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar_Khaddafi	(0,16)(?,?)(11,13)
100 |    104		M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar_Ghaddafy	(0,16)(?,?)(11,13)
101 |    105		M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar_Ghadafi	(0,15)(?,?)(11,12)
102 |    106		M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar_Ghaddafi	(0,16)(?,?)(11,13)
103 |    107		M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muamar_Kaddafi	(0,14)(?,?)(9,11)
104 |    108		M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar_Quathafi	(0,16)(?,?)(11,13)
105 |    109		M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar_Gheddafi	(0,16)(?,?)(11,13)
106 |    110		M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moammar_Khadafy	(0,15)(?,?)(11,12)
107 |    111		M[ou]'?am+[ae]r_.*([AEae]l[-_])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moammar_Qudhafi	(0,15)(?,?)(10,12)
108 |    112		a+(b|c)*d+		aabcdd			(0,6)(3,4)
109 |    113		^.+$			vivi			(0,4)
110 |    114		^(.+)$			vivi			(0,4)(0,4)
111 |    115		^([^!.]+).att.com!(.+)$	gryphon.att.com!eby	(0,19)(0,7)(16,19)
112 |    116		^([^!]+!)?([^!]+)$	bas			(0,3)(?,?)(0,3)
113 |    117		^([^!]+!)?([^!]+)$	bar!bas			(0,7)(0,4)(4,7)
114 |    118		^([^!]+!)?([^!]+)$	foo!bas			(0,7)(0,4)(4,7)
115 |    119		^.+!([^!]+!)([^!]+)$	foo!bar!bas		(0,11)(4,8)(8,11)
116 |    120		((foo)|(bar))!bas	bar!bas			(0,7)(0,3)(?,?)(0,3)
117 |    121		((foo)|(bar))!bas	foo!bar!bas		(4,11)(4,7)(?,?)(4,7)
118 |    122		((foo)|(bar))!bas	foo!bas			(0,7)(0,3)(0,3)(?,?)
119 |    123		((foo)|bar)!bas		bar!bas			(0,7)(0,3)(?,?)
120 |    124		((foo)|bar)!bas		foo!bar!bas		(4,11)(4,7)(?,?)
121 |    125		((foo)|bar)!bas		foo!bas			(0,7)(0,3)(0,3)
122 |    126		(foo|(bar))!bas		bar!bas			(0,7)(0,3)(0,3)
123 |    127		(foo|(bar))!bas		foo!bar!bas		(4,11)(4,7)(4,7)
124 |    128		(foo|(bar))!bas		foo!bas			(0,7)(0,3)(?,?)
125 |    129		(foo|bar)!bas		bar!bas			(0,7)(0,3)
126 |    130		(foo|bar)!bas		foo!bar!bas		(4,11)(4,7)
127 |    131		(foo|bar)!bas		foo!bas			(0,7)(0,3)
128 |    132		^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bar!bas	(0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
129 |    133		^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	bas		(0,3)(?,?)(0,3)(?,?)(?,?)
130 |    134		^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	bar!bas		(0,7)(0,4)(4,7)(?,?)(?,?)
131 |    135		^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	foo!bar!bas	(0,11)(?,?)(?,?)(4,8)(8,11)
132 |    136		^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	foo!bas		(0,7)(0,4)(4,7)(?,?)(?,?)
133 |    137		^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	bas		(0,3)(0,3)(?,?)(0,3)(?,?)(?,?)
134 |    138		^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	bar!bas		(0,7)(0,7)(0,4)(4,7)(?,?)(?,?)
135 |    139		^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bar!bas	(0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
136 |    140		^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bas		(0,7)(0,7)(0,4)(4,7)(?,?)(?,?)
137 |    141		.*(/XXX).*			/XXX			(0,4)(0,4)
138 |    142		.*(\\XXX).*			\XXX			(0,4)(0,4)
139 |    143		\\XXX				\XXX			(0,4)
140 |    144		.*(/000).*			/000			(0,4)(0,4)
141 |    145		.*(\\000).*			\000			(0,4)(0,4)
142 |    146		\\000				\000			(0,4)
143 |    147		[[:graph:]]+		!#}~		(0,4)
144 |    148		[[:xdigit:]]+		dE4db3eF		(0,8)
145 |    149		[[:print:]]+		!#}~		(0,4)
146 | 


--------------------------------------------------------------------------------
/test/cases/class.txt:
--------------------------------------------------------------------------------
 1 | 1	aa*			xaxaax	(1,2)
 2 | 2	(a*)(ab)*(b*)		abc	(0,2)(0,1)(?,?)(1,2)
 3 | -2	(a*)(ab)*(b*)		abc	(0,2)(0,0)(0,2)(2,2)
 4 | 3	((a*)(ab)*)((b*)(a*))	aba	(0,3)(0,2)(0,0)(0,2)(2,3)(2,2)(2,3)
 5 | 4	(...?.?)*		xxxxxx	(0,6)(4,6)
 6 | 5	(a|ab)(bc|c)		abcabc	(0,3)(0,2)(2,3)
 7 | 6	(aba|a*b)(aba|a*b)	ababa	(0,5)(0,2)(2,5)
 8 | 7	(a*){2}			xxxxx	(0,0)(0,0)
 9 | 8	(a*)*			a	(0,1)(0,1)
10 | 9	(aba|a*b)*		ababa	(0,5)(2,5)
11 | 10	(a(b)?)+		aba	(0,3)(2,3)(?,?)
12 | 11	.*(.*)			ab	(0,2)(2,2)
13 | 12	(a?)((ab)?)(b?)a?(ab)?b?	abab	(0,4)(0,1)(1,1)(?,?)(1,2)(?,?)
14 | -12	(a?)((ab)?)(b?)a?(ab)?b?	abab	(0,4)(0,1)(1,1)(?,?)(1,2)(2,4)
15 | 


--------------------------------------------------------------------------------
/test/cases/forced-assoc.txt:
--------------------------------------------------------------------------------
 1 | 1	(a|ab)(c|bcd)		abcd	(0,4)(0,1)(1,4)
 2 | 2	(a|ab)(bcd|c)		abcd	(0,4)(0,1)(1,4)
 3 | 3	(ab|a)(c|bcd)		abcd	(0,4)(0,1)(1,4)
 4 | 4	(ab|a)(bcd|c)		abcd	(0,4)(0,1)(1,4)
 5 | 5	((a|ab)(c|bcd))(d*)	abcd	(0,4)(0,4)(0,1)(1,4)(4,4)
 6 | 6	((a|ab)(bcd|c))(d*)	abcd	(0,4)(0,4)(0,1)(1,4)(4,4)
 7 | 7	((ab|a)(c|bcd))(d*)	abcd	(0,4)(0,4)(0,1)(1,4)(4,4)
 8 | 8	((ab|a)(bcd|c))(d*)	abcd	(0,4)(0,4)(0,1)(1,4)(4,4)
 9 | 9	(a|ab)((c|bcd)(d*))	abcd	(0,4)(0,2)(2,4)(2,3)(3,4)
10 | 10	(a|ab)((bcd|c)(d*))	abcd	(0,4)(0,2)(2,4)(2,3)(3,4)
11 | 11	(ab|a)((c|bcd)(d*))	abcd	(0,4)(0,2)(2,4)(2,3)(3,4)
12 | 12	(ab|a)((bcd|c)(d*))	abcd	(0,4)(0,2)(2,4)(2,3)(3,4)
13 | 13	(a*)(b|abc)		abc	(0,3)(0,0)(0,3)
14 | 14	(a*)(abc|b)		abc	(0,3)(0,0)(0,3)
15 | 15	((a*)(b|abc))(c*)	abc	(0,3)(0,3)(0,0)(0,3)(3,3)
16 | 16	((a*)(abc|b))(c*)	abc	(0,3)(0,3)(0,0)(0,3)(3,3)
17 | 17	(a*)((b|abc)(c*))	abc	(0,3)(0,1)(1,3)(1,2)(2,3)
18 | 18	(a*)((abc|b)(c*))	abc	(0,3)(0,1)(1,3)(1,2)(2,3)
19 | 19	(a*)(b|abc)		abc	(0,3)(0,0)(0,3)
20 | 20	(a*)(abc|b)		abc	(0,3)(0,0)(0,3)
21 | 21	((a*)(b|abc))(c*)	abc	(0,3)(0,3)(0,0)(0,3)(3,3)
22 | 22	((a*)(abc|b))(c*)	abc	(0,3)(0,3)(0,0)(0,3)(3,3)
23 | 23	(a*)((b|abc)(c*))	abc	(0,3)(0,1)(1,3)(1,2)(2,3)
24 | 24	(a*)((abc|b)(c*))	abc	(0,3)(0,1)(1,3)(1,2)(2,3)
25 | 25	(a|ab)			ab	(0,2)(0,2)
26 | 26	(ab|a)			ab	(0,2)(0,2)
27 | 27	(a|ab)(b*)		ab	(0,2)(0,2)(2,2)
28 | 28	(ab|a)(b*)		ab	(0,2)(0,2)(2,2)
29 | 


--------------------------------------------------------------------------------
/test/cases/left-assoc.txt:
--------------------------------------------------------------------------------
 1 | -1	(a|ab)(c|bcd)(d*)	abcd	(0,4)(0,1)(1,4)(4,4)
 2 | -2	(a|ab)(bcd|c)(d*)	abcd	(0,4)(0,1)(1,4)(4,4)
 3 | -3	(ab|a)(c|bcd)(d*)	abcd	(0,4)(0,1)(1,4)(4,4)
 4 | -4	(ab|a)(bcd|c)(d*)	abcd	(0,4)(0,1)(1,4)(4,4)
 5 | -5	(a*)(b|abc)(c*)	abc	(0,3)(0,0)(0,3)(3,3)
 6 | -6	(a*)(abc|b)(c*)	abc	(0,3)(0,0)(0,3)(3,3)
 7 | -7	(a*)(b|abc)(c*)	abc	(0,3)(0,0)(0,3)(3,3)
 8 | -8	(a*)(abc|b)(c*)	abc	(0,3)(0,0)(0,3)(3,3)
 9 | -9	(a|ab)(c|bcd)(d|.*)	abcd	(0,4)(0,1)(1,4)(4,4)
10 | -10	(a|ab)(bcd|c)(d|.*)	abcd	(0,4)(0,1)(1,4)(4,4)
11 | -11	(ab|a)(c|bcd)(d|.*)	abcd	(0,4)(0,1)(1,4)(4,4)
12 | -12	(ab|a)(bcd|c)(d|.*)	abcd	(0,4)(0,1)(1,4)(4,4)
13 | 


--------------------------------------------------------------------------------
/test/cases/nullsub3.txt:
--------------------------------------------------------------------------------
 1 |      1		(a*)*		a		(0,1)(0,1)
 2 |      2		SAME		x		(0,0)(0,0)
 3 |      3		SAME		aaaaaa		(0,6)(0,6)
 4 |      4		SAME		aaaaaax		(0,6)(0,6)
 5 |      5		(a*)+		a		(0,1)(0,1)
 6 |      6		SAME		x		(0,0)(0,0)
 7 |      7		SAME		aaaaaa		(0,6)(0,6)
 8 |      8		SAME		aaaaaax		(0,6)(0,6)
 9 |      9		(a+)*		a		(0,1)(0,1)
10 |     10		SAME		x		(0,0)(?,?)
11 |     11		SAME		aaaaaa		(0,6)(0,6)
12 |     12		SAME		aaaaaax		(0,6)(0,6)
13 |     13		(a+)+		a		(0,1)(0,1)
14 |     14		SAME		x		NOMATCH
15 |     15		SAME		aaaaaa		(0,6)(0,6)
16 |     16		SAME		aaaaaax		(0,6)(0,6)
17 |     17		([a]*)*		a		(0,1)(0,1)
18 |     18		SAME		x		(0,0)(0,0)
19 |     19		SAME		aaaaaa		(0,6)(0,6)
20 |     20		SAME		aaaaaax		(0,6)(0,6)
21 |     21		([a]*)+		a		(0,1)(0,1)
22 |     22		SAME		x		(0,0)(0,0)
23 |     23		SAME		aaaaaa		(0,6)(0,6)
24 |     24		SAME		aaaaaax		(0,6)(0,6)
25 |     25		([^b]*)*	a		(0,1)(0,1)
26 |     26		SAME		b		(0,0)(0,0)
27 |     27		SAME		aaaaaa		(0,6)(0,6)
28 |     28		SAME		aaaaaab		(0,6)(0,6)
29 |     29		([ab]*)*	a		(0,1)(0,1)
30 |     30		SAME		aaaaaa		(0,6)(0,6)
31 |     31		SAME		ababab		(0,6)(0,6)
32 |     32		SAME		bababa		(0,6)(0,6)
33 |     33		SAME		b		(0,1)(0,1)
34 |     34		SAME		bbbbbb		(0,6)(0,6)
35 |     35		SAME		aaaabcde	(0,5)(0,5)
36 |     36		([^a]*)*	b		(0,1)(0,1)
37 |     37		SAME		bbbbbb		(0,6)(0,6)
38 |     38		SAME		aaaaaa		(0,0)(0,0)
39 |     39		([^ab]*)*	ccccxx		(0,6)(0,6)
40 |     40		SAME		ababab		(0,0)(0,0)
41 |     41		((z)+|a)*	zabcde		(0,2)(1,2)(?,?)
42 |     42		(a)		aaa		(0,1)(0,1)
43 |     46		(a*)*(x)		x	(0,1)(0,0)(0,1)
44 |     47		(a*)*(x)		ax	(0,2)(0,1)(1,2)
45 |     48		(a*)*(x)		axa	(0,2)(0,1)(1,2)
46 |     49		(a*)+(x)		x	(0,1)(0,0)(0,1)
47 |     50		(a*)+(x)		ax	(0,2)(0,1)(1,2)
48 |     51		(a*)+(x)		axa	(0,2)(0,1)(1,2)
49 |     52		(a*){2}(x)		x	(0,1)(0,0)(0,1)
50 |     53		(a*){2}(x)		ax	(0,2)(1,1)(1,2)
51 |     54		(a*){2}(x)		axa	(0,2)(1,1)(1,2)
52 | 


--------------------------------------------------------------------------------
/test/cases/osx-bsd-critical.txt:
--------------------------------------------------------------------------------
 1 | 1	(()|.)(b)	ab	(0,2)(0,1)(?,?)(1,2)
 2 | -1	(()|.)(b)	ab	(1,2)(1,1)(1,1)(1,2)
 3 | 2	(()|[ab])(b)	ab	(0,2)(0,1)(?,?)(1,2)
 4 | -2	(()|[ab])(b)	ab	(1,2)(1,1)(1,1)(1,2)
 5 | 3	(()|[ab])+b	aaab	(0,4)(2,3)(?,?)
 6 | -3	(()|[ab])+b	aaab	(3,4)(3,3)(3,3)
 7 | 11	(.|())(b)	ab	(0,2)(0,1)(?,?)(1,2)
 8 | 12	([ab]|())(b)	ab	(0,2)(0,1)(?,?)(1,2)
 9 | 14	([ab]|())+b	aaab	(0,4)(2,3)(?,?)
10 | -14	([ab]|())+b	aaab	(0,4)(3,3)(3,3)
11 | 20	(.?)(b)	ab	(0,2)(0,1)(1,2)
12 | 


--------------------------------------------------------------------------------
/test/cases/repetition2.txt:
--------------------------------------------------------------------------------
 1 | 1	((..)|(.))	NULL	NOMATCH
 2 | 2	((..)|(.))((..)|(.))	NULL	NOMATCH
 3 | 3	((..)|(.))((..)|(.))((..)|(.))	NULL	NOMATCH
 4 | 4	((..)|(.)){1}	NULL	NOMATCH
 5 | 5	((..)|(.)){2}	NULL	NOMATCH
 6 | 6	((..)|(.)){3}	NULL	NOMATCH
 7 | 7	((..)|(.))*	NULL	(0,0)(?,?)(?,?)(?,?)
 8 | 8	((..)|(.))	a	(0,1)(0,1)(?,?)(0,1)
 9 | 9	((..)|(.))((..)|(.))	a	NOMATCH
10 | 10	((..)|(.))((..)|(.))((..)|(.))	a	NOMATCH
11 | 11	((..)|(.)){1}	a	(0,1)(0,1)(?,?)(0,1)
12 | 12	((..)|(.)){2}	a	NOMATCH
13 | 13	((..)|(.)){3}	a	NOMATCH
14 | 14	((..)|(.))*	a	(0,1)(0,1)(?,?)(0,1)
15 | 15	((..)|(.))	aa	(0,2)(0,2)(0,2)(?,?)
16 | 16	((..)|(.))((..)|(.))	aa	(0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)
17 | 17	((..)|(.))((..)|(.))((..)|(.))	aa	NOMATCH
18 | 18	((..)|(.)){1}	aa	(0,2)(0,2)(0,2)(?,?)
19 | 19	((..)|(.)){2}	aa	(0,2)(1,2)(?,?)(1,2)
20 | 20	((..)|(.)){3}	aa	NOMATCH
21 | 21	((..)|(.))*	aa	(0,2)(0,2)(0,2)(?,?)
22 | 22	((..)|(.))	aaa	(0,2)(0,2)(0,2)(?,?)
23 | 23	((..)|(.))((..)|(.))	aaa	(0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)
24 | 24	((..)|(.))((..)|(.))((..)|(.))	aaa	(0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3)
25 | 25	((..)|(.)){1}	aaa	(0,2)(0,2)(0,2)(?,?)
26 | 26	((..)|(.)){2}	aaa	(0,3)(2,3)(?,?)(2,3)
27 | 27	((..)|(.)){3}	aaa	(0,3)(2,3)(?,?)(2,3)
28 | 28	((..)|(.))*	aaa	(0,3)(2,3)(?,?)(2,3)
29 | 29	((..)|(.))	aaaa	(0,2)(0,2)(0,2)(?,?)
30 | 30	((..)|(.))((..)|(.))	aaaa	(0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
31 | 31	((..)|(.))((..)|(.))((..)|(.))	aaaa	(0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4)
32 | 32	((..)|(.)){1}	aaaa	(0,2)(0,2)(0,2)(?,?)
33 | 33	((..)|(.)){2}	aaaa	(0,4)(2,4)(2,4)(?,?)
34 | 34	((..)|(.)){3}	aaaa	(0,4)(3,4)(?,?)(3,4)
35 | 35	((..)|(.))*	aaaa	(0,4)(2,4)(2,4)(?,?)
36 | 36	((..)|(.))	aaaaa	(0,2)(0,2)(0,2)(?,?)
37 | 37	((..)|(.))((..)|(.))	aaaaa	(0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
38 | 38	((..)|(.))((..)|(.))((..)|(.))	aaaaa	(0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5)
39 | 39	((..)|(.)){1}	aaaaa	(0,2)(0,2)(0,2)(?,?)
40 | 40	((..)|(.)){2}	aaaaa	(0,4)(2,4)(2,4)(?,?)
41 | 41	((..)|(.)){3}	aaaaa	(0,5)(4,5)(?,?)(4,5)
42 | 42	((..)|(.))*	aaaaa	(0,5)(4,5)(?,?)(4,5)
43 | 43	((..)|(.))	aaaaaa	(0,2)(0,2)(0,2)(?,?)
44 | 44	((..)|(.))((..)|(.))	aaaaaa	(0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
45 | 45	((..)|(.))((..)|(.))((..)|(.))	aaaaaa	(0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?)
46 | 46	((..)|(.)){1}	aaaaaa	(0,2)(0,2)(0,2)(?,?)
47 | 47	((..)|(.)){2}	aaaaaa	(0,4)(2,4)(2,4)(?,?)
48 | 48	((..)|(.)){3}	aaaaaa	(0,6)(4,6)(4,6)(?,?)
49 | 49	((..)|(.))*	aaaaaa	(0,6)(4,6)(4,6)(?,?)
50 | 100	X(.?){0,}Y	X1234567Y	(0,9)(7,8)
51 | 101	X(.?){1,}Y	X1234567Y	(0,9)(7,8)
52 | 102	X(.?){2,}Y	X1234567Y	(0,9)(7,8)
53 | 103	X(.?){3,}Y	X1234567Y	(0,9)(7,8)
54 | 104	X(.?){4,}Y	X1234567Y	(0,9)(7,8)
55 | 105	X(.?){5,}Y	X1234567Y	(0,9)(7,8)
56 | 106	X(.?){6,}Y	X1234567Y	(0,9)(7,8)
57 | 107	X(.?){7,}Y	X1234567Y	(0,9)(7,8)
58 | 108	X(.?){8,}Y	X1234567Y	(0,9)(8,8)
59 | 110	X(.?){0,8}Y	X1234567Y	(0,9)(7,8)
60 | 111	X(.?){1,8}Y	X1234567Y	(0,9)(7,8)
61 | 112	X(.?){2,8}Y	X1234567Y	(0,9)(7,8)
62 | 113	X(.?){3,8}Y	X1234567Y	(0,9)(7,8)
63 | 114	X(.?){4,8}Y	X1234567Y	(0,9)(7,8)
64 | 115	X(.?){5,8}Y	X1234567Y	(0,9)(7,8)
65 | 116	X(.?){6,8}Y	X1234567Y	(0,9)(7,8)
66 | 117	X(.?){7,8}Y	X1234567Y	(0,9)(7,8)
67 | 118	X(.?){8,8}Y	X1234567Y	(0,9)(8,8)
68 | 260	(a|ab|c|bcd){0,}(d*)	ababcd	(0,6)(3,6)(6,6)
69 | 261	(a|ab|c|bcd){1,}(d*)	ababcd	(0,6)(3,6)(6,6)
70 | 262	(a|ab|c|bcd){2,}(d*)	ababcd	(0,6)(3,6)(6,6)
71 | 263	(a|ab|c|bcd){3,}(d*)	ababcd	(0,6)(3,6)(6,6)
72 | 264	(a|ab|c|bcd){4,}(d*)	ababcd	NOMATCH
73 | 265	(a|ab|c|bcd){0,10}(d*)	ababcd	(0,6)(3,6)(6,6)
74 | 266	(a|ab|c|bcd){1,10}(d*)	ababcd	(0,6)(3,6)(6,6)
75 | 267	(a|ab|c|bcd){2,10}(d*)	ababcd	(0,6)(3,6)(6,6)
76 | 268	(a|ab|c|bcd){3,10}(d*)	ababcd	(0,6)(3,6)(6,6)
77 | 269	(a|ab|c|bcd){4,10}(d*)	ababcd	NOMATCH
78 | 270	(a|ab|c|bcd)*(d*)	ababcd	(0,6)(3,6)(6,6)
79 | 271	(a|ab|c|bcd)+(d*)	ababcd	(0,6)(3,6)(6,6)
80 | 


--------------------------------------------------------------------------------
/test/cases/right-assoc.txt:
--------------------------------------------------------------------------------
 1 | 1	(a|ab)(c|bcd)(d*)	abcd	(0,4)(0,2)(2,3)(3,4)
 2 | 2	(a|ab)(bcd|c)(d*)	abcd	(0,4)(0,2)(2,3)(3,4)
 3 | 3	(ab|a)(c|bcd)(d*)	abcd	(0,4)(0,2)(2,3)(3,4)
 4 | 4	(ab|a)(bcd|c)(d*)	abcd	(0,4)(0,2)(2,3)(3,4)
 5 | 5	(a*)(b|abc)(c*)	abc	(0,3)(0,1)(1,2)(2,3)
 6 | 6	(a*)(abc|b)(c*)	abc	(0,3)(0,1)(1,2)(2,3)
 7 | 7	(a*)(b|abc)(c*)	abc	(0,3)(0,1)(1,2)(2,3)
 8 | 8	(a*)(abc|b)(c*)	abc	(0,3)(0,1)(1,2)(2,3)
 9 | 9	(a|ab)(c|bcd)(d|.*)	abcd	(0,4)(0,2)(2,3)(3,4)
10 | 10	(a|ab)(bcd|c)(d|.*)	abcd	(0,4)(0,2)(2,3)(3,4)
11 | 11	(ab|a)(c|bcd)(d|.*)	abcd	(0,4)(0,2)(2,3)(3,4)
12 | 12	(ab|a)(bcd|c)(d|.*)	abcd	(0,4)(0,2)(2,3)(3,4)
13 | 


--------------------------------------------------------------------------------
/test/cases/totest.txt:
--------------------------------------------------------------------------------
 1 | 01	a+			xaax	(1,3)
 2 | 03	(a?)((ab)?)		ab	(0,2)(0,0)(0,2)(0,2)
 3 | 04	(a?)((ab)?)(b?)		ab	(0,2)(0,1)(1,1)(?,?)(1,2)
 4 | 05	((a?)((ab)?))(b?)	ab	(0,2)(0,2)(0,0)(0,2)(0,2)(2,2)
 5 | 06	(a?)(((ab)?)(b?))	ab	(0,2)(0,1)(1,2)(1,1)(?,?)(1,2)
 6 | 07	(.?)			x	(0,1)(0,1)
 7 | 08	(.?){1}			x	(0,1)(0,1)
 8 | 09	(.?)(.?)		x	(0,1)(0,1)(1,1)
 9 | 10	(.?){2}			x	(0,1)(1,1)
10 | 11	(.?)*			x	(0,1)(0,1)
11 | 12	(.?.?)			xxx	(0,2)(0,2)
12 | 13	(.?.?){1}		xxx	(0,2)(0,2)
13 | 14	(.?.?)(.?.?)		xxx	(0,3)(0,2)(2,3)
14 | 15	(.?.?){2}		xxx	(0,3)(2,3)
15 | 16	(.?.?)(.?.?)(.?.?)	xxx	(0,3)(0,2)(2,3)(3,3)
16 | 17	(.?.?){3}		xxx	(0,3)(3,3)
17 | 18	(.?.?)*			xxx	(0,3)(2,3)
18 | 19	a?((ab)?)(b?)		ab	(0,2)(1,1)(?,?)(1,2)
19 | 20	(a?)((ab)?)b?		ab	(0,2)(0,1)(1,1)(?,?)
20 | 21	a?((ab)?)b?		ab	(0,2)(1,1)(?,?)
21 | 22	(a*){2}			xxxxx	(0,0)(0,0)
22 | 23	(ab?)(b?a)		aba	(0,3)(0,2)(2,3)
23 | 24	(a|ab)(ba|a)		aba	(0,3)(0,2)(2,3)
24 | 25	(a|ab|ba)		aba	(0,2)(0,2)
25 | 26	(a|ab|ba)(a|ab|ba)	aba	(0,3)(0,2)(2,3)
26 | 27	(a|ab|ba)*		aba	(0,3)(2,3)
27 | 28	(aba|a*b)		ababa	(0,3)(0,3)
28 | 29	(aba|a*b)(aba|a*b)	ababa	(0,5)(0,2)(2,5)
29 | 1029	(aba|a*b)(aba|a*b)(aba|a*b)	ababa	NOMATCH
30 | 30	(aba|a*b)*		ababa	(0,5)(2,5)
31 | 31	(aba|ab|a)		ababa	(0,3)(0,3)
32 | 32	(aba|ab|a)(aba|ab|a)	ababa	(0,5)(0,2)(2,5)
33 | 1032	(aba|ab|a)(aba|ab|a)(aba|ab|a)	ababa	(0,5)(0,2)(2,4)(4,5)
34 | 33	(aba|ab|a)*		ababa	(0,5)(2,5)
35 | 34	(a(b)?)			aba	(0,2)(0,2)(1,2)
36 | 35	(a(b)?)(a(b)?)		aba	(0,3)(0,2)(1,2)(2,3)(?,?)
37 | 36	(a(b)?)+		aba	(0,3)(2,3)(?,?)
38 | 37	(.*)(.*)		xx	(0,2)(0,2)(2,2)
39 | 38	.*(.*)			xx	(0,2)(2,2)
40 | 39	(a.*z|b.*y)		azbazby	(0,5)(0,5)
41 | 40	(a.*z|b.*y)(a.*z|b.*y)	azbazby	(0,7)(0,5)(5,7)
42 | 41	(a.*z|b.*y)*		azbazby	(0,7)(5,7)
43 | 42	(.|..)(.*)		ab	(0,2)(0,2)(2,2)
44 | 43	((..)*(...)*)			xxx		(0,3)(0,3)(?,?)(0,3)
45 | 44	((..)*(...)*)((..)*(...)*)	xxx		(0,3)(0,3)(?,?)(0,3)(3,3)(?,?)(?,?)
46 | 45	((..)*(...)*)*			xxx		(0,3)(0,3)(?,?)(0,3)
47 | 83	(aa(b(b))?)+		aabbaa	(0,6)(4,6)(?,?)(?,?)
48 | 84	(a(b)?)+		aba	(0,3)(2,3)(?,?)
49 | 85	([ab]+)([bc]+)([cd]*)		abcd		(0,4)(0,2)(2,3)(3,4)
50 | 90	^(A([^B]*))?(B(.*))?		Aa		(0,2)(0,2)(1,2)(?,?)(?,?)
51 | 91	^(A([^B]*))?(B(.*))?		Bb		(0,2)(?,?)(?,?)(0,2)(1,2)
52 | 110	(^){0,3}		a	(0,0)(0,0)
53 | 111	($){0,3}		a	(0,0)(?,?)
54 | 112	(^){1,3}		a	(0,0)(0,0)
55 | 113	($){1,3}		a	(1,1)(1,1)
56 | 200	((s^)|(s)|(^)|($)|(^.))*	searchme	(0,1)(0,1)(?,?)(0,1)(?,?)(?,?)(?,?)
57 | 201	s(()|^)e	searchme	(0,2)(1,1)(1,1)
58 | 202	s(^|())e	searchme	(0,2)(1,1)(1,1)
59 | 203	s(^|())e	searchme	(0,2)(1,1)(1,1)
60 | 204	s()?e	searchme	(0,2)(1,1)
61 | 205	s(^)?e	searchme	(0,2)(?,?)
62 | 206	((s)|(e)|(a))*	searchme	(0,3)(2,3)(?,?)(?,?)(2,3)
63 | 207	((s)|(e)|())*	searchme	(0,2)(1,2)(?,?)(1,2)(?,?)
64 | 208	((b*)|c(c*))*	cbb	(0,3)(1,3)(1,3)(?,?)
65 | 209	(yyy|(x?)){2,4}	yyyyyy	(0,6)(3,6)(?,?)
66 | 210	($)|()	xxx	(0,0)(?,?)(0,0)
67 | 211	$()|^()	ac\n	(0,0)(?,?)(0,0)
68 | 212	^()|$()	ac\n	(0,0)(0,0)(?,?)
69 | 213	($)?(.)	__	(0,1)(?,?)(0,1)
70 | 214	(.|()|())*	c	(0,1)(0,1)(?,?)(?,?)
71 | 215	((a)|(b)){2,}	ab	(0,2)(1,2)(?,?)(1,2)
72 | 216	.()|((.)?)	NULL	(0,0)(?,?)(0,0)(?,?)
73 | 217	(.|$){2,}	xx	(0,2)(1,2)
74 | 218	(.|$){2,2}	xx	(0,2)(1,2)
75 | 219	(.){2,}	xx	(0,2)(1,2)
76 | 220	(a|())(b|())(c|())	abc	(0,3)(0,1)(?,?)(1,2)(?,?)(2,3)(?,?)
77 | 220	ab()c|ab()c()	abc	(0,3)(2,2)(-1,-1)(-1,-1)
78 | 250	(b(c)|d(e))*	bcde	(0,4)(2,4)(-1,-1)(3,4)
79 | 251	(a(b)*)*	aba	(0,3)(2,3)(-1,-1)
80 | 260     []]             ]       (0,1)
81 | 261     [^]]            ]       NOMATCH
82 | 262     [-]             -       (0,1)
83 | 263     [^-]            -       NOMATCH
84 | 260     []]             a       NOMATCH
85 | 261     [^]]            a       (0,1)
86 | 262     [-]             a       NOMATCH
87 | 263     [^-]            a       (0,1)
88 | 


--------------------------------------------------------------------------------