├── .travis.yml
├── CHANGES.md
├── CONTRIBUTING.md
├── CREDITS.md
├── LICENSE
├── README.md
├── Setup.hs
├── Text
    └── Regex
    │   ├── Applicative.hs
    │   └── Applicative
    │       ├── Common.hs
    │       ├── Compile.hs
    │       ├── Interface.hs
    │       ├── Object.hs
    │       ├── Reference.hs
    │       ├── StateQueue.hs
    │       └── Types.hs
├── benchmark
    └── benchmark.hs
├── doctest
    └── doctest.hs
├── regex-applicative.cabal
├── stack.yaml
└── tests
    ├── StateQueue.hs
    └── test.hs


/.travis.yml:
--------------------------------------------------------------------------------
  1 | # This Travis job script has been generated by a script via
  2 | #
  3 | #   haskell-ci 'regex-applicative.cabal' '--output' '.travis.yml'
  4 | #
  5 | # For more information, see https://github.com/haskell-CI/haskell-ci
  6 | #
  7 | # version: 0.9.20200125
  8 | #
  9 | version: ~> 1.0
 10 | language: c
 11 | os: linux
 12 | dist: xenial
 13 | git:
 14 |   # whether to recursively clone submodules
 15 |   submodules: false
 16 | cache:
 17 |   directories:
 18 |     - $HOME/.cabal/packages
 19 |     - $HOME/.cabal/store
 20 |     - $HOME/.hlint
 21 | before_cache:
 22 |   - rm -fv $CABALHOME/packages/hackage.haskell.org/build-reports.log
 23 |   # remove files that are regenerated by 'cabal update'
 24 |   - rm -fv $CABALHOME/packages/hackage.haskell.org/00-index.*
 25 |   - rm -fv $CABALHOME/packages/hackage.haskell.org/*.json
 26 |   - rm -fv $CABALHOME/packages/hackage.haskell.org/01-index.cache
 27 |   - rm -fv $CABALHOME/packages/hackage.haskell.org/01-index.tar
 28 |   - rm -fv $CABALHOME/packages/hackage.haskell.org/01-index.tar.idx
 29 |   - rm -rfv $CABALHOME/packages/head.hackage
 30 | jobs:
 31 |   include:
 32 |     - compiler: ghc-8.8.2
 33 |       addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.8.2","cabal-install-3.0"]}}
 34 |       os: linux
 35 |     - compiler: ghc-8.6.5
 36 |       addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.6.5","cabal-install-3.0"]}}
 37 |       os: linux
 38 |     - compiler: ghc-8.4.4
 39 |       addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.4.4","cabal-install-3.0"]}}
 40 |       os: linux
 41 |     - compiler: ghc-8.2.2
 42 |       addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.2.2","cabal-install-3.0"]}}
 43 |       os: linux
 44 |     - compiler: ghc-8.0.2
 45 |       addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.0.2","cabal-install-3.0"]}}
 46 |       os: linux
 47 | before_install:
 48 |   - HC=$(echo "/opt/$CC/bin/ghc" | sed 's/-/\//')
 49 |   - WITHCOMPILER="-w $HC"
 50 |   - HADDOCK=$(echo "/opt/$CC/bin/haddock" | sed 's/-/\//')
 51 |   - HCPKG="$HC-pkg"
 52 |   - unset CC
 53 |   - CABAL=/opt/ghc/bin/cabal
 54 |   - CABALHOME=$HOME/.cabal
 55 |   - export PATH="$CABALHOME/bin:$PATH"
 56 |   - TOP=$(pwd)
 57 |   - "HCNUMVER=$(${HC} --numeric-version|perl -ne '/^(\\d+)\\.(\\d+)\\.(\\d+)(\\.(\\d+))?$/; print(10000 * $1 + 100 * $2 + ($3 == 0 ? $5 != 1 : $3))')"
 58 |   - echo $HCNUMVER
 59 |   - CABAL="$CABAL -vnormal+nowrap"
 60 |   - set -o pipefail
 61 |   - TEST=--enable-tests
 62 |   - BENCH=--enable-benchmarks
 63 |   - HEADHACKAGE=false
 64 |   - rm -f $CABALHOME/config
 65 |   - |
 66 |     echo "verbose: normal +nowrap +markoutput"          >> $CABALHOME/config
 67 |     echo "remote-build-reporting: anonymous"            >> $CABALHOME/config
 68 |     echo "write-ghc-environment-files: always"          >> $CABALHOME/config
 69 |     echo "remote-repo-cache: $CABALHOME/packages"       >> $CABALHOME/config
 70 |     echo "logs-dir:          $CABALHOME/logs"           >> $CABALHOME/config
 71 |     echo "world-file:        $CABALHOME/world"          >> $CABALHOME/config
 72 |     echo "extra-prog-path:   $CABALHOME/bin"            >> $CABALHOME/config
 73 |     echo "symlink-bindir:    $CABALHOME/bin"            >> $CABALHOME/config
 74 |     echo "installdir:        $CABALHOME/bin"            >> $CABALHOME/config
 75 |     echo "build-summary:     $CABALHOME/logs/build.log" >> $CABALHOME/config
 76 |     echo "store-dir:         $CABALHOME/store"          >> $CABALHOME/config
 77 |     echo "install-dirs user"                            >> $CABALHOME/config
 78 |     echo "  prefix: $CABALHOME"                         >> $CABALHOME/config
 79 |     echo "repository hackage.haskell.org"               >> $CABALHOME/config
 80 |     echo "  url: http://hackage.haskell.org/"           >> $CABALHOME/config
 81 | install:
 82 |   - ${CABAL} --version
 83 |   - echo "$(${HC} --version) [$(${HC} --print-project-git-commit-id 2> /dev/null || echo '?')]"
 84 |   - |
 85 |     echo "program-default-options"                >> $CABALHOME/config
 86 |     echo "  ghc-options: $GHCJOBS +RTS -M6G -RTS" >> $CABALHOME/config
 87 |   - cat $CABALHOME/config
 88 |   - rm -fv cabal.project cabal.project.local cabal.project.freeze
 89 |   - travis_retry ${CABAL} v2-update -v
 90 |   # Generate cabal.project
 91 |   - rm -rf cabal.project cabal.project.local cabal.project.freeze
 92 |   - touch cabal.project
 93 |   - |
 94 |     echo "packages: ." >> cabal.project
 95 |   - |
 96 |   - "for pkg in $($HCPKG list --simple-output); do echo $pkg | sed 's/-[^-]*$//' | (grep -vE -- '^(regex-applicative)$' || true) | sed 's/^/constraints: /' | sed 's/$/ installed/' >> cabal.project.local; done"
 97 |   - cat cabal.project || true
 98 |   - cat cabal.project.local || true
 99 |   - if [ -f "./configure.ac" ]; then (cd "." && autoreconf -i); fi
100 |   - ${CABAL} v2-freeze $WITHCOMPILER ${TEST} ${BENCH}
101 |   - "cat cabal.project.freeze | sed -E 's/^(constraints: *| *)//' | sed 's/any.//'"
102 |   - rm  cabal.project.freeze
103 |   - ${CABAL} v2-build $WITHCOMPILER ${TEST} ${BENCH} --dep -j2 all
104 |   - ${CABAL} v2-build $WITHCOMPILER --disable-tests --disable-benchmarks --dep -j2 all
105 | script:
106 |   - DISTDIR=$(mktemp -d /tmp/dist-test.XXXX)
107 |   # Packaging...
108 |   - ${CABAL} v2-sdist all
109 |   # Unpacking...
110 |   - mv dist-newstyle/sdist/*.tar.gz ${DISTDIR}/
111 |   - cd ${DISTDIR} || false
112 |   - find . -maxdepth 1 -type f -name '*.tar.gz' -exec tar -xvf '{}' \;
113 |   - find . -maxdepth 1 -type f -name '*.tar.gz' -exec rm       '{}' \;
114 |   - PKGDIR_regex_applicative="$(find . -maxdepth 1 -type d -regex '.*/regex-applicative-[0-9.]*')"
115 |   # Generate cabal.project
116 |   - rm -rf cabal.project cabal.project.local cabal.project.freeze
117 |   - touch cabal.project
118 |   - |
119 |     echo "packages: ${PKGDIR_regex_applicative}" >> cabal.project
120 |   - |
121 |   - "for pkg in $($HCPKG list --simple-output); do echo $pkg | sed 's/-[^-]*$//' | (grep -vE -- '^(regex-applicative)$' || true) | sed 's/^/constraints: /' | sed 's/$/ installed/' >> cabal.project.local; done"
122 |   - cat cabal.project || true
123 |   - cat cabal.project.local || true
124 |   # Building...
125 |   # this builds all libraries and executables (without tests/benchmarks)
126 |   - ${CABAL} v2-build $WITHCOMPILER --disable-tests --disable-benchmarks all
127 |   # Building with tests and benchmarks...
128 |   # build & run tests, build benchmarks
129 |   - ${CABAL} v2-build $WITHCOMPILER ${TEST} ${BENCH} all
130 |   # Testing...
131 |   - ${CABAL} v2-test $WITHCOMPILER ${TEST} ${BENCH} all
132 |   # cabal check...
133 |   - (cd ${PKGDIR_regex_applicative} && ${CABAL} -vnormal check)
134 |   # haddock...
135 |   - ${CABAL} v2-haddock $WITHCOMPILER --with-haddock $HADDOCK ${TEST} ${BENCH} all
136 |   # Building without installed constraints for packages in global-db...
137 |   - rm -f cabal.project.local
138 |   - ${CABAL} v2-build $WITHCOMPILER --disable-tests --disable-benchmarks all
139 | 
140 | # REGENDATA ("0.9.20200125",["regex-applicative.cabal","--output",".travis.yml"])
141 | # EOF
142 | 


--------------------------------------------------------------------------------
/CHANGES.md:
--------------------------------------------------------------------------------
  1 | Changes
  2 | =======
  3 | 
  4 | 0.3.4
  5 | -----
  6 | 
  7 | * Let the user provide a custom `uncons` function (add
  8 |     `find{First,Longest,Shortest}PrefixWithUncons`)
  9 | * Add `Filtrable` and `Monoid` instances for `RE`
 10 | 
 11 | 0.3.3.1
 12 | -------
 13 | 
 14 | Make a release to refresh the haddocks on hackage
 15 | (see <https://github.com/UnkindPartition/regex-applicative/issues/35>).
 16 | 
 17 | 0.3.3
 18 | -----
 19 | 
 20 | Add `replace`
 21 | 
 22 | 0.3.2.1
 23 | -------
 24 | 
 25 | * Use strict left fold in decimal/hexadecimal
 26 | * Include a missing test module in the sdist tarball
 27 | 
 28 | 0.3.2
 29 | -----
 30 | 
 31 | Add `msym`
 32 | 
 33 | 0.3.1
 34 | -----
 35 | 
 36 | Add `comap`
 37 | 
 38 | 0.3.0.3
 39 | -------
 40 | 
 41 | * Fix the test suite
 42 | * Fix build with GHC 7.9
 43 | 
 44 | 0.3.0.2
 45 | -------
 46 | 
 47 | Fix the test suite
 48 | 
 49 | 0.3.0.1
 50 | -------
 51 | 
 52 | Port the test suite to tasty
 53 | 
 54 | 0.3
 55 | ---
 56 | * Add a new module, `Text.Regex.Applicative.Common`, which contains some
 57 |   commonly used regexps (by Aleksey Khudyakov)
 58 | * Improve the test suite
 59 | 
 60 | 0.2.1
 61 | -----
 62 | * Add the `withMatched` function
 63 | * Make matching functions a bit more lax
 64 | * Fix a bug in the `empty` method
 65 | 
 66 | 0.2
 67 | ---
 68 | * Infix matching functions
 69 | * Improved documentation
 70 | * Improved performance
 71 | * Improved portability
 72 | 
 73 | 0.1.5
 74 | -----
 75 | * Expose Object interface
 76 | * Allow matching prefixes rather than the whole string
 77 | * Add non-greedy repetitions
 78 | 
 79 | 0.1.4
 80 | -----
 81 | * Completely rewrite the engine. Now it's faster and runs in constant space.
 82 | * Add 'string' function and 'IsString' instance.
 83 | 
 84 | 0.1.3
 85 | -----
 86 | * Fix a .cabal-file issue introduced in 0.1.2
 87 | * Change the fixity of =~
 88 | 
 89 | 0.1.2
 90 | -----
 91 | * Relax the constraint on the containers version
 92 | 
 93 | 0.1.1
 94 | ---
 95 | * Fix a bug in 'reFoldl' and 'many'
 96 | * "Lazy" infinite regexes are no longer supported
 97 | 
 98 | 0.1
 99 | ---
100 | * Initial release
101 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing guidelines
 2 | 
 3 | Bug reports are always welcome. Please provide full instructions to reproduce.
 4 | 
 5 | Pull requests fixing bugs are much appreciated.
 6 | 
 7 | Feature requests (whether in the form of an issue or a pull request) are
 8 | acceptable only if:
 9 | 
10 | 1. The feature would be useful for a large fraction of users, as opposed to
11 |    tackling an uncommon use case.
12 | 2. When filing a feature request as an issue, if it's accepted, you are willing
13 |    to implement it.
14 | 
15 | When creating a pull request, please read and follow [How to prepare a good pull request](https://ro-che.info/articles/2016-09-18-good-pull-requests).
16 | 
17 | Issues asking questions are fine, assuming you've put a reasonable effort to
18 | figure out the answer yourself first.
19 | 


--------------------------------------------------------------------------------
/CREDITS.md:
--------------------------------------------------------------------------------
1 | The current implementation is based on ideas [publicized][cox] by Russ Cox.
2 | 
3 | The original implementation was inspired and heavily based on the ideas from ["A
4 | Play on Regular Expressions"][play] by Sebastian Fischer, Frank Huch and Thomas
5 | Wilke.
6 | 
7 | [cox]: http://swtch.com/~rsc/regexp/
8 | [play]: http://sebfisch.github.com/haskell-regexp/regexp-play.pdf
9 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2011 Roman Cheplyaka
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | regex-applicative
 2 | =================
 3 | 
 4 | *regex-applicative* is a parsing combinator library for Haskell based on regular
 5 | expressions.
 6 | 
 7 | Example
 8 | -------
 9 | 
10 | ``` haskell
11 | import Text.Regex.Applicative
12 | 
13 | data Protocol = HTTP | FTP deriving Show
14 | 
15 | protocol :: RE Char Protocol
16 | protocol = HTTP <$ string "http" <|> FTP <$ string "ftp"
17 | 
18 | type Host = String
19 | type Location = String
20 | data URL = URL Protocol Host Location deriving Show
21 | 
22 | host :: RE Char Host
23 | host = many $ psym $ (/= '/')
24 | 
25 | url :: RE Char URL
26 | url = URL <$> protocol <* string "://" <*> host <* sym '/' <*> many anySym
27 | 
28 | main = print $ "http://stackoverflow.com/questions" =~ url
29 | ```
30 | 
31 | Documentation
32 | -------------
33 | 
34 | See the [API reference][haddock].
35 | 
36 | Performance
37 | -----------
38 | 
39 | For common tasks, this package is several times slower than monadic
40 | parser combinator libraries like parsec. However, this library has a roughly
41 | linear complexity, whereas monadic parser combinators have exponential
42 | worst-time complexity (see [here](https://swtch.com/~rsc/regexp/regexp1.html)).
43 | 
44 | Some tips to make your regex run faster:
45 | 
46 | 1. If you don't care about the result of the whole regex or its part, only
47 |    whether it matches or not, mark it with `void` or `<$`. Recognition is faster
48 |    than parsing.
49 | 1. If you apply the same regex to multiple strings, partially apply it like so:
50 | 
51 |    ```
52 |    let matcher = match my_regex
53 |    in  map matcher my_strings
54 |    ```
55 | 
56 |    This way the compiled regex is stored in the `matcher` value and shared among
57 |    the strings.
58 | 
59 | GHC support
60 | -----------
61 | 
62 | Only GHC versions >= 8.0 are supported, although older versions may work too.
63 | 
64 | [haddock]: http://hackage.haskell.org/packages/archive/regex-applicative/latest/doc/html/Text-Regex-Applicative.html
65 | 


--------------------------------------------------------------------------------
/Setup.hs:
--------------------------------------------------------------------------------
1 | import Distribution.Simple
2 | main = defaultMain
3 | 


--------------------------------------------------------------------------------
/Text/Regex/Applicative.hs:
--------------------------------------------------------------------------------
 1 | --------------------------------------------------------------------
 2 | -- |
 3 | -- Module    : Text.Regex.Applicative
 4 | -- Copyright : (c) Roman Cheplyaka
 5 | -- License   : MIT
 6 | --
 7 | -- Maintainer: Roman Cheplyaka <roma@ro-che.info>
 8 | -- Stability : experimental
 9 | --
10 | -- To get started, see some examples on the wiki:
11 | -- <https://github.com/UnkindPartition/regex-applicative/wiki/Examples>
12 | --------------------------------------------------------------------
13 | 
14 | module Text.Regex.Applicative
15 |     ( RE
16 |     , sym
17 |     , psym
18 |     , msym
19 |     , anySym
20 |     , string
21 |     , reFoldl
22 |     , Greediness(..)
23 |     , few
24 |     , comap
25 |     , withMatched
26 |     , match
27 |     , (=~)
28 |     , replace
29 |     , findFirstPrefix
30 |     , findLongestPrefix
31 |     , findShortestPrefix
32 |     , findFirstInfix
33 |     , findLongestInfix
34 |     , findShortestInfix
35 |     -- * Custom uncons function
36 |     -- $uncons
37 |     , findFirstPrefixWithUncons
38 |     , findLongestPrefixWithUncons
39 |     , findShortestPrefixWithUncons
40 |     , module Control.Applicative
41 |     )
42 |     where
43 | import Text.Regex.Applicative.Types
44 | import Text.Regex.Applicative.Interface
45 | import Control.Applicative
46 | 
47 | {- $uncons
48 | The following functions take an argument that splits the input into the first symbol and
49 | the remaining input (if the input is non-empty).
50 | 
51 | It is useful, for example, for feeding a @Text@ to a regex matcher:
52 | 
53 | >>> import qualified Data.Text as T
54 | >>> findFirstPrefixWithUncons T.uncons (many (sym 'a')) "aaa"
55 | Just ("aaa","")
56 | 
57 | For another example, feeding input symbols annotated with source positions into a matcher,
58 | preserving the positions in the remaining input so the location of a lexical error can be
59 | recovered:
60 | 
61 | @
62 | data AList a b = AList { annotation :: a, stripAnnotation :: Maybe (b, AList a b) }
63 | 
64 | findLongestPrefixAnnotated :: RE s a -> AList b s -> Maybe (a, AList b s)
65 | fondLongestPrefixAnnotated = findLongestPrefixWithUncons stripAnnotation
66 | @
67 | 
68 | The use of the other functions taking an @uncons@ argument is exactly analogous.
69 | -}
70 | 


--------------------------------------------------------------------------------
/Text/Regex/Applicative/Common.hs:
--------------------------------------------------------------------------------
 1 | -- |
 2 | -- Collection of commonly used regular expressions.
 3 | module Text.Regex.Applicative.Common (
 4 |     -- * Digits
 5 |     digit
 6 |   , hexDigit
 7 |     -- * Numbers
 8 |   , signed
 9 |   , decimal
10 |   , hexadecimal
11 |   ) where
12 | 
13 | import Data.Char
14 | import Data.List (foldl')
15 | import Text.Regex.Applicative
16 | 
17 | 
18 | -- | Decimal digit, i.e. @\'0\'@..@\'9\'@
19 | digit :: Num a => RE Char a
20 | digit = fromIntegral . digitToInt <$> psym isDigit
21 | 
22 | -- | Hexadecimal digit
23 | -- i.e. @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@.
24 | hexDigit :: Num a => RE Char a
25 | hexDigit = fromIntegral . digitToInt <$> psym isHexDigit
26 | 
27 | -- | Add optional sign
28 | signed :: Num a => RE Char a -> RE Char a
29 | signed p = sign <*> p
30 |   where
31 |     sign =  id     <$ sym '+'
32 |         <|> negate <$ sym '-'
33 |         <|> pure id
34 | 
35 | -- | Parse decimal number without sign.
36 | decimal :: Num a => RE Char a
37 | decimal = foldl' (\d i -> d*10 + i) 0 <$> some digit
38 | 
39 | -- | Parse decimal number without sign.
40 | hexadecimal :: Num a => RE Char a
41 | hexadecimal = foldl' (\d i -> d*16 + i) 0 <$> some hexDigit
42 | 


--------------------------------------------------------------------------------
/Text/Regex/Applicative/Compile.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE GADTs #-}
  2 | module Text.Regex.Applicative.Compile (compile) where
  3 | 
  4 | import Control.Monad ((<=<))
  5 | import Control.Monad.Trans.State
  6 | import Data.Foldable
  7 | import Data.Maybe
  8 | import Data.Monoid (Any (..))
  9 | import qualified Data.IntMap as IntMap
 10 | import Text.Regex.Applicative.Types
 11 | 
 12 | compile :: RE s a -> (a -> [Thread s r]) -> [Thread s r]
 13 | compile e k = compile2 e (SingleCont k)
 14 | 
 15 | data Cont a = SingleCont !a | EmptyNonEmpty !a !a
 16 | 
 17 | instance Functor Cont where
 18 |     fmap f k =
 19 |         case k of
 20 |             SingleCont a -> SingleCont (f a)
 21 |             EmptyNonEmpty a b -> EmptyNonEmpty (f a) (f b)
 22 | 
 23 | emptyCont :: Cont a -> a
 24 | emptyCont k =
 25 |     case k of
 26 |         SingleCont a -> a
 27 |         EmptyNonEmpty a _ -> a
 28 | nonEmptyCont :: Cont a -> a
 29 | nonEmptyCont k =
 30 |     case k of
 31 |         SingleCont a -> a
 32 |         EmptyNonEmpty _ a -> a
 33 | 
 34 | -- compile2 function takes two continuations: one when the match is empty and
 35 | -- one when the match is non-empty. See the "Rep" case for the reason.
 36 | compile2 :: RE s a -> Cont (a -> [Thread s r]) -> [Thread s r]
 37 | compile2 e =
 38 |     case e of
 39 |         Eps -> \k -> emptyCont k ()
 40 |         Symbol i p -> \k -> [t $ nonEmptyCont k] where
 41 |           -- t :: (a -> [Thread s r]) -> Thread s r
 42 |           t k = Thread i $ \s ->
 43 |             case p s of
 44 |               Just r -> k r
 45 |               Nothing -> []
 46 |         App n1 n2 ->
 47 |             let a1 = compile2 n1
 48 |                 a2 = compile2 n2
 49 |             in \k -> case k of
 50 |                 SingleCont k -> a1 $ SingleCont $ \a1_value -> a2 $ SingleCont $ k . a1_value
 51 |                 EmptyNonEmpty ke kn ->
 52 |                     a1 $ EmptyNonEmpty
 53 |                         -- empty
 54 |                         (\a1_value -> a2 $ EmptyNonEmpty (ke . a1_value) (kn . a1_value))
 55 |                         -- non-empty
 56 |                         (\a1_value -> a2 $ EmptyNonEmpty (kn . a1_value) (kn . a1_value))
 57 |         Alt n1 n2 ->
 58 |             let a1 = compile2 n1
 59 |                 a2 = compile2 n2
 60 |             in \k -> a1 k ++ a2 k
 61 |         Fail -> const []
 62 |         Fmap f n -> let a = compile2 n in \k -> a $ fmap (. f) k
 63 |         CatMaybes n -> let a = compile2 n in \k -> a $ (<=< toList) <$> k
 64 |         -- This is actually the point where we use the difference between
 65 |         -- continuations. For the inner RE the empty continuation is a
 66 |         -- "failing" one in order to avoid non-termination.
 67 |         Rep g f b n ->
 68 |             let a = compile2 n
 69 |                 threads b k =
 70 |                     combine g
 71 |                         (a $ EmptyNonEmpty (\_ -> []) (\v -> let b' = f b v in threads b' (SingleCont $ nonEmptyCont k)))
 72 |                         (emptyCont k b)
 73 |             in threads b
 74 |         Void n
 75 |           | hasCatMaybes n -> compile2 n . fmap (. \ _ -> ())
 76 |           | otherwise -> compile2_ n . fmap ($ ())
 77 | 
 78 | data FSMState
 79 |     = SAccept
 80 |     | STransition !ThreadId
 81 | 
 82 | type FSMMap s = IntMap.IntMap (s -> Bool, [FSMState])
 83 | 
 84 | mkNFA :: RE s a -> ([FSMState], (FSMMap s))
 85 | mkNFA e =
 86 |     flip runState IntMap.empty $
 87 |         go e [SAccept]
 88 |   where
 89 |   go :: RE s a -> [FSMState] -> State (FSMMap s) [FSMState]
 90 |   go e k =
 91 |     case e of
 92 |         Eps -> return k
 93 |         Symbol i@(ThreadId n) p -> do
 94 |             modify $ IntMap.insert n $
 95 |                 (isJust . p, k)
 96 |             return [STransition i]
 97 |         App n1 n2 -> go n1 =<< go n2 k
 98 |         Alt n1 n2 -> (++) <$> go n1 k <*> go n2 k
 99 |         Fail -> return []
100 |         Fmap _ n -> go n k
101 |         CatMaybes _ -> error "mkNFA CatMaybes"
102 |         Rep g _ _ n ->
103 |             let entries = findEntries n
104 |                 cont = combine g entries k
105 |             in
106 |             -- return value of 'go' is ignored -- it should be a subset of
107 |             -- 'cont'
108 |             go n cont >> return cont
109 |         Void n -> go n k
110 | 
111 |   findEntries :: RE s a -> [FSMState]
112 |   findEntries e =
113 |     -- A simple (although a bit inefficient) way to find all entry points is
114 |     -- just to use 'go'
115 |     evalState (go e []) IntMap.empty
116 | 
117 | hasCatMaybes :: RE s a -> Bool
118 | hasCatMaybes = getAny . foldMapPostorder (Any . \ case CatMaybes _ -> True; _ -> False)
119 | 
120 | compile2_ :: RE s a -> Cont [Thread s r] -> [Thread s r]
121 | compile2_ e =
122 |     let (entries, fsmap) = mkNFA e
123 |         mkThread _ k1 (STransition i@(ThreadId n)) =
124 |             let (p, cont) = fromMaybe (error "Unknown id") $ IntMap.lookup n fsmap
125 |             in [Thread i $ \s ->
126 |                 if p s
127 |                     then concatMap (mkThread k1 k1) cont
128 |                     else []]
129 |         mkThread k0 _ SAccept = k0
130 | 
131 |     in \k -> concatMap (mkThread (emptyCont k) (nonEmptyCont k)) entries
132 | 
133 | combine :: Greediness -> [a] -> [a] -> [a]
134 | combine g continue stop =
135 |     case g of
136 |         Greedy -> continue ++ stop
137 |         NonGreedy -> stop ++ continue
138 | 


--------------------------------------------------------------------------------
/Text/Regex/Applicative/Interface.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE TypeFamilies, GADTs, TupleSections #-}
  2 | module Text.Regex.Applicative.Interface where
  3 | import Control.Applicative hiding (empty)
  4 | import Control.Arrow
  5 | import Control.Monad (guard)
  6 | import qualified Data.List as List
  7 | import Data.Maybe
  8 | import Text.Regex.Applicative.Types
  9 | import Text.Regex.Applicative.Object
 10 | 
 11 | -- | 'RE' is a profunctor. This is its contravariant map.
 12 | --
 13 | -- (A dependency on the @profunctors@ package doesn't seem justified.)
 14 | comap :: (s2 -> s1) -> RE s1 a -> RE s2 a
 15 | comap f re =
 16 |   case re of
 17 |     Eps -> Eps
 18 |     Symbol t p    -> Symbol t (p . f)
 19 |     Alt r1 r2     -> Alt (comap f r1) (comap f r2)
 20 |     App r1 r2     -> App (comap f r1) (comap f r2)
 21 |     Fmap g r      -> Fmap g (comap f r)
 22 |     CatMaybes r   -> CatMaybes (comap f r)
 23 |     Fail          -> Fail
 24 |     Rep gr fn a r -> Rep gr fn a (comap f r)
 25 |     Void r        -> Void (comap f r)
 26 | 
 27 | -- | Match and return any single symbol
 28 | anySym :: RE s s
 29 | anySym = msym Just
 30 | 
 31 | -- | Match zero or more instances of the given expression, which are combined using
 32 | -- the given folding function.
 33 | --
 34 | -- 'Greediness' argument controls whether this regular expression should match
 35 | -- as many as possible ('Greedy') or as few as possible ('NonGreedy') instances
 36 | -- of the underlying expression.
 37 | reFoldl :: Greediness -> (b -> a -> b) -> b -> RE s a -> RE s b
 38 | reFoldl g f b a = Rep g f b a
 39 | 
 40 | -- | Match zero or more instances of the given expression, but as
 41 | -- few of them as possible (i.e. /non-greedily/). A greedy equivalent of 'few'
 42 | -- is 'many'.
 43 | --
 44 | -- Examples:
 45 | --
 46 | -- >>> findFirstPrefix (few anySym  <* "b") "ababab"
 47 | -- Just ("a","abab")
 48 | -- >>> findFirstPrefix (many anySym  <* "b") "ababab"
 49 | -- Just ("ababa","")
 50 | few :: RE s a -> RE s [a]
 51 | few a = reverse <$> Rep NonGreedy (flip (:)) [] a
 52 | 
 53 | -- | Return matched symbols as part of the return value
 54 | withMatched :: RE s a -> RE s (a, [s])
 55 | withMatched Eps = flip (,) [] <$> Eps
 56 | withMatched (Symbol t p) = Symbol t (\s -> (,[s]) <$> p s)
 57 | withMatched (Alt a b) = withMatched a <|> withMatched b
 58 | withMatched (App a b) =
 59 |     (\(f, s) (x, t) -> (f x, s ++ t)) <$>
 60 |         withMatched a <*>
 61 |         withMatched b
 62 | withMatched Fail = Fail
 63 | withMatched (Fmap f x) = (f *** id) <$> withMatched x
 64 | withMatched (CatMaybes x) = CatMaybes $
 65 |     (\ (as, s) -> flip (,) s <$> as) <$> withMatched x
 66 | withMatched (Rep gr f a0 x) =
 67 |     Rep gr (\(a, s) (x, t) -> (f a x, s ++ t)) (a0, []) (withMatched x)
 68 | -- N.B.: this ruins the Void optimization
 69 | withMatched (Void x) = (const () *** id) <$> withMatched x
 70 | 
 71 | -- | @s =~ a = match a s@
 72 | (=~) :: [s] -> RE s a -> Maybe a
 73 | (=~) = flip match
 74 | infix 2 =~
 75 | 
 76 | -- | Attempt to match a string of symbols against the regular expression.
 77 | -- Note that the whole string (not just some part of it) should be matched.
 78 | --
 79 | -- Examples:
 80 | --
 81 | -- >>> match (sym 'a' <|> sym 'b') "a"
 82 | -- Just 'a'
 83 | -- >>> match (sym 'a' <|> sym 'b') "ab"
 84 | -- Nothing
 85 | --
 86 | match :: RE s a -> [s] -> Maybe a
 87 | match re = let obj = compile re in \str ->
 88 |     listToMaybe $
 89 |     results $
 90 |     foldl (flip step) obj str
 91 | 
 92 | -- | Find a string prefix which is matched by the regular expression.
 93 | --
 94 | -- Of all matching prefixes, pick one using left bias (prefer the left part of
 95 | -- '<|>' to the right part) and greediness.
 96 | --
 97 | -- This is the match which a backtracking engine (such as Perl's one) would find
 98 | -- first.
 99 | --
100 | -- If match is found, the rest of the input is also returned.
101 | --
102 | -- See also 'findFirstPrefixWithUncons', of which this is a special case.
103 | --
104 | -- Examples:
105 | --
106 | -- >>> findFirstPrefix ("a" <|> "ab") "abc"
107 | -- Just ("a","bc")
108 | -- >>> findFirstPrefix ("ab" <|> "a") "abc"
109 | -- Just ("ab","c")
110 | -- >>> findFirstPrefix "bc" "abc"
111 | -- Nothing
112 | findFirstPrefix :: RE s a -> [s] -> Maybe (a, [s])
113 | findFirstPrefix = findFirstPrefixWithUncons List.uncons
114 | 
115 | -- | Find the first prefix, with the given @uncons@ function.
116 | --
117 | -- @since 0.3.4
118 | findFirstPrefixWithUncons :: (ss -> Maybe (s, ss)) -> RE s a -> ss -> Maybe (a, ss)
119 | findFirstPrefixWithUncons = findPrefixWith' (walk emptyObject . threads)
120 |   where
121 |     walk obj [] = (obj, Nothing)
122 |     walk obj (t:ts) =
123 |         case getResult t of
124 |             Just r -> (obj, Just r)
125 |             Nothing -> walk (addThread t obj) ts
126 | 
127 | -- | Find the longest string prefix which is matched by the regular expression.
128 | --
129 | -- Submatches are still determined using left bias and greediness, so this is
130 | -- different from POSIX semantics.
131 | --
132 | -- If match is found, the rest of the input is also returned.
133 | --
134 | -- See also 'findLongestPrefixWithUncons', of which this is a special case.
135 | --
136 | -- Examples:
137 | --
138 | -- >>> import Data.Char
139 | -- >>> let keyword = "if"
140 | -- >>> let identifier = many $ psym isAlpha
141 | -- >>> let lexeme = (Left <$> keyword) <|> (Right <$> identifier)
142 | -- >>> findLongestPrefix lexeme "if foo"
143 | -- Just (Left "if"," foo")
144 | -- >>> findLongestPrefix lexeme "iffoo"
145 | -- Just (Right "iffoo","")
146 | findLongestPrefix :: RE s a -> [s] -> Maybe (a, [s])
147 | findLongestPrefix = findLongestPrefixWithUncons List.uncons
148 | 
149 | -- | Find the longest prefix, with the given @uncons@ function.
150 | --
151 | -- @since 0.3.4
152 | findLongestPrefixWithUncons :: (ss -> Maybe (s, ss)) -> RE s a -> ss -> Maybe (a, ss)
153 | findLongestPrefixWithUncons = findPrefixWith' ((,) <*> listToMaybe . results)
154 | 
155 | findPrefixWith'
156 |  :: (ReObject s a -> (ReObject s a, Maybe a))
157 |  -- ^ Given the regex object, compute the regex object to feed the next input value into, and
158 |  -- the result, if any.
159 |  -> (ss -> Maybe (s, ss)) -- ^ @uncons@
160 |  -> RE s a -> ss -> Maybe (a, ss)
161 | findPrefixWith' walk uncons = \ re -> go (compile re) Nothing
162 |   where
163 |     go obj resOld ss = case walk obj of
164 |         (obj', resThis) ->
165 |             let res = flip (,) ss <$> resThis <|> resOld
166 |             in
167 |                 case uncons ss of
168 |                     _ | failed obj' -> res
169 |                     Nothing -> res
170 |                     Just (s, ss) -> go (step s obj') res ss
171 | 
172 | -- | Find the shortest prefix (analogous to 'findLongestPrefix')
173 | --
174 | -- See also 'findShortestPrefixWithUncons', of which this is a special case.
175 | findShortestPrefix :: RE s a -> [s] -> Maybe (a, [s])
176 | findShortestPrefix = findShortestPrefixWithUncons List.uncons
177 | 
178 | -- | Find the shortest prefix (analogous to 'findLongestPrefix'), with the given @uncons@ function.
179 | --
180 | -- @since 0.3.4
181 | findShortestPrefixWithUncons :: (ss -> Maybe (s, ss)) -> RE s a -> ss -> Maybe (a, ss)
182 | findShortestPrefixWithUncons uncons = go . compile
183 |   where
184 |     go obj ss = case results obj of
185 |         r:_ -> Just (r, ss)
186 |         _ -> do
187 |             guard (not (failed obj))
188 |             (s, ss) <- uncons ss
189 |             go (step s obj) ss
190 | 
191 | -- | Find the leftmost substring that is matched by the regular expression.
192 | -- Otherwise behaves like 'findFirstPrefix'. Returns the result together with
193 | -- the prefix and suffix of the string surrounding the match.
194 | findFirstInfix :: RE s a -> [s] -> Maybe ([s], a, [s])
195 | findFirstInfix re str =
196 |     fmap (\((first, res), last) -> (first, res, last)) $
197 |     findFirstPrefix ((,) <$> few anySym <*> re) str
198 | 
199 | -- Auxiliary function for findExtremeInfix
200 | prefixCounter :: RE s (Int, [s])
201 | prefixCounter = second reverse <$> reFoldl NonGreedy f (0, []) anySym
202 |     where
203 |     f (i, prefix) s = ((,) $! (i+1)) $ s:prefix
204 | 
205 | data InfixMatchingState s a = GotResult
206 |     { prefixLen  :: !Int
207 |     , prefixStr  :: [s]
208 |     , result     :: a
209 |     , postfixStr :: [s]
210 |     }
211 |     | NoResult
212 | 
213 | -- a `preferOver` b chooses one of a and b, giving preference to a
214 | preferOver
215 |     :: InfixMatchingState s a
216 |     -> InfixMatchingState s a
217 |     -> InfixMatchingState s a
218 | preferOver NoResult b = b
219 | preferOver b NoResult = b
220 | preferOver a b =
221 |     case prefixLen a `compare` prefixLen b of
222 |         GT -> b -- prefer b when it has smaller prefix
223 |         _  -> a -- otherwise, prefer a
224 | 
225 | mkInfixMatchingState
226 |     :: [s] -- rest of input
227 |     -> Thread s ((Int, [s]), a)
228 |     -> InfixMatchingState s a
229 | mkInfixMatchingState rest thread =
230 |     case getResult thread of
231 |         Just ((pLen, pStr), res) ->
232 |             GotResult
233 |                 { prefixLen = pLen
234 |                 , prefixStr = pStr
235 |                 , result    = res
236 |                 , postfixStr = rest
237 |                 }
238 |         Nothing -> NoResult
239 | 
240 | gotResult :: InfixMatchingState s a -> Bool
241 | gotResult GotResult {} = True
242 | gotResult _ = False
243 | 
244 | -- Algorithm for finding leftmost longest infix match:
245 | --
246 | -- 1. Add a thread /.*?/ to the begginning of the regexp
247 | -- 2. As soon as we get first accept, we delete that thread
248 | -- 3. When we get more than one accept, we choose one by the following criteria:
249 | -- 3.1. Compare by the length of prefix (since we are looking for the leftmost
250 | -- match)
251 | -- 3.2. If they are produced on the same step, choose the first one (left-biased
252 | -- choice)
253 | -- 3.3. If they are produced on the different steps, choose the later one (since
254 | -- they have the same prefixes, later means longer)
255 | findExtremalInfix
256 |     :: -- function to combine a later result (first arg) to an earlier one (second
257 |        -- arg)
258 |        (InfixMatchingState s a -> InfixMatchingState s a -> InfixMatchingState s a)
259 |     -> RE s a
260 |     -> [s]
261 |     -> Maybe ([s], a, [s])
262 | findExtremalInfix newOrOld re str =
263 |     case go (compile $ (,) <$> prefixCounter <*> re) str NoResult of
264 |         NoResult -> Nothing
265 |         r@GotResult{} ->
266 |             Just (prefixStr r, result r, postfixStr r)
267 |     where
268 |     {-
269 |     go :: ReObject s ((Int, [s]), a)
270 |        -> [s]
271 |        -> InfixMatchingState s a
272 |        -> InfixMatchingState s a
273 |     -}
274 |     go obj str resOld =
275 |         let resThis =
276 |                 foldl
277 |                     (\acc t -> acc `preferOver` mkInfixMatchingState str t)
278 |                     NoResult $
279 |                     threads obj
280 |             res = resThis `newOrOld` resOld
281 |             obj' =
282 |                 -- If we just found the first result, kill the "prefixCounter" thread.
283 |                 -- We rely on the fact that it is the last thread of the object.
284 |                 if gotResult resThis && not (gotResult resOld)
285 |                     then fromThreads $ init $ threads obj
286 |                     else obj
287 |         in
288 |             case str of
289 |                 [] -> res
290 |                 _ | failed obj -> res
291 |                 (s:ss) -> go (step s obj') ss res
292 | 
293 | 
294 | -- | Find the leftmost substring that is matched by the regular expression.
295 | -- Otherwise behaves like 'findLongestPrefix'. Returns the result together with
296 | -- the prefix and suffix of the string surrounding the match.
297 | findLongestInfix :: RE s a -> [s] -> Maybe ([s], a, [s])
298 | findLongestInfix = findExtremalInfix preferOver
299 | 
300 | -- | Find the leftmost substring that is matched by the regular expression.
301 | -- Otherwise behaves like 'findShortestPrefix'. Returns the result together with
302 | -- the prefix and suffix of the string surrounding the match.
303 | findShortestInfix :: RE s a -> [s] -> Maybe ([s], a, [s])
304 | findShortestInfix = findExtremalInfix $ flip preferOver
305 | 
306 | -- | Replace matches of the regular expression with its value.
307 | --
308 | -- >>> replace ("!" <$ sym 'f' <* some (sym 'o')) "quuxfoofooooofoobarfobar"
309 | -- "quux!!!bar!bar"
310 | replace :: RE s [s] -> [s] -> [s]
311 | replace r = ($ []) . go
312 |   where go ys = case findLongestInfix r ys of
313 |                     Nothing                -> (ys ++)
314 |                     Just (before, m, rest) -> (before ++) . (m ++) . go rest
315 | 


--------------------------------------------------------------------------------
/Text/Regex/Applicative/Object.hs:
--------------------------------------------------------------------------------
  1 | --------------------------------------------------------------------
  2 | -- |
  3 | -- Module    : Text.Regex.Applicative.Object
  4 | -- Copyright : (c) Roman Cheplyaka
  5 | -- License   : MIT
  6 | --
  7 | -- Maintainer: Roman Cheplyaka <roma@ro-che.info>
  8 | -- Stability : experimental
  9 | --
 10 | -- This is a low-level interface to the regex engine.
 11 | --------------------------------------------------------------------
 12 | {-# LANGUAGE GADTs #-}
 13 | module Text.Regex.Applicative.Object
 14 |     ( ReObject
 15 |     , compile
 16 |     , emptyObject
 17 |     , Thread
 18 |     , threads
 19 |     , failed
 20 |     , isResult
 21 |     , getResult
 22 |     , results
 23 |     , ThreadId
 24 |     , threadId
 25 |     , step
 26 |     , stepThread
 27 |     , fromThreads
 28 |     , addThread
 29 |     ) where
 30 | 
 31 | import Text.Regex.Applicative.Types
 32 | import qualified Text.Regex.Applicative.StateQueue as SQ
 33 | import qualified Text.Regex.Applicative.Compile as Compile
 34 | import Data.Maybe
 35 | import Data.Foldable as F
 36 | import Control.Monad.Trans.State
 37 | 
 38 | -- | The state of the engine is represented as a \"regex object\" of type
 39 | -- @'ReObject' s r@, where @s@ is the type of symbols and @r@ is the
 40 | -- result type (as in the 'RE' type). Think of 'ReObject' as a collection of
 41 | -- 'Thread's ordered by priority. E.g. threads generated by the left part of
 42 | -- '<|>' come before the threads generated by the right part.
 43 | newtype ReObject s r = ReObject (SQ.StateQueue (Thread s r))
 44 | 
 45 | -- | List of all threads of an object. Each non-result thread has a unique id.
 46 | threads :: ReObject s r -> [Thread s r]
 47 | threads (ReObject sq) = F.toList sq
 48 | 
 49 | -- | Create an object from a list of threads. It is recommended that all
 50 | -- threads come from the same 'ReObject', unless you know what you're doing.
 51 | -- However, it should be safe to filter out or rearrange threads.
 52 | fromThreads :: [Thread s r] -> ReObject s r
 53 | fromThreads ts = F.foldl' (flip addThread) emptyObject ts
 54 | 
 55 | -- | Check whether a thread is a result thread
 56 | isResult :: Thread s r -> Bool
 57 | isResult Accept {} = True
 58 | isResult _ = False
 59 | 
 60 | -- | Return the result of a result thread, or 'Nothing' if it's not a result
 61 | -- thread
 62 | getResult :: Thread s r -> Maybe r
 63 | getResult (Accept r) = Just r
 64 | getResult _ = Nothing
 65 | 
 66 | -- | Check if the object has no threads. In that case it never will
 67 | -- produce any new threads as a result of 'step'.
 68 | failed :: ReObject s r -> Bool
 69 | failed obj = null $ threads obj
 70 | 
 71 | -- | Empty object (with no threads)
 72 | emptyObject :: ReObject s r
 73 | emptyObject = ReObject $ SQ.empty
 74 | 
 75 | -- | Extract the result values from all the result threads of an object
 76 | results :: ReObject s r -> [r]
 77 | results obj =
 78 |     mapMaybe getResult $ threads obj
 79 | 
 80 | -- | Feed a symbol into a regex object
 81 | step :: s -> ReObject s r -> ReObject s r
 82 | step s (ReObject sq) =
 83 |     let accum q t =
 84 |             case t of
 85 |                 Accept {} -> q
 86 |                 Thread _ c ->
 87 |                     F.foldl' (\q x -> addThread x q) q $ c s
 88 |         newQueue = F.foldl' accum emptyObject sq
 89 |     in newQueue
 90 | 
 91 | -- | Feed a symbol into a non-result thread. It is an error to call 'stepThread'
 92 | -- on a result thread.
 93 | stepThread :: s -> Thread s r -> [Thread s r]
 94 | stepThread s t =
 95 |     case t of
 96 |         Thread _ c -> c s
 97 |         Accept {} -> error "stepThread on a result"
 98 | 
 99 | -- | Add a thread to an object. The new thread will have lower priority than the
100 | -- threads which are already in the object.
101 | --
102 | -- If a (non-result) thread with the same id already exists in the object, the
103 | -- object is not changed.
104 | addThread :: Thread s r -> ReObject s r -> ReObject s r
105 | addThread t (ReObject q) =
106 |     case t of
107 |         Accept {} -> ReObject $ SQ.insert t q
108 |         Thread { threadId_ = ThreadId i } -> ReObject $ SQ.insertUnique i t q
109 | 
110 | -- | Compile a regular expression into a regular expression object
111 | compile :: RE s r -> ReObject s r
112 | compile =
113 |     fromThreads .
114 |     flip Compile.compile (\x -> [Accept x]) .
115 |     renumber
116 | 
117 | renumber :: RE s a -> RE s a
118 | renumber =
119 |     flip evalState (ThreadId 1) .
120 |     traversePostorder (\ case Symbol _ p -> flip Symbol p <$> fresh; a -> pure a)
121 | 
122 | fresh :: State ThreadId ThreadId
123 | fresh = do
124 |     t@(ThreadId i) <- get
125 |     put $! ThreadId (i+1)
126 |     return t
127 | 


--------------------------------------------------------------------------------
/Text/Regex/Applicative/Reference.hs:
--------------------------------------------------------------------------------
 1 | --------------------------------------------------------------------
 2 | -- |
 3 | -- Module    : Text.Regex.Applicative.Reference
 4 | -- Copyright : (c) Roman Cheplyaka
 5 | -- License   : MIT
 6 | --
 7 | -- Maintainer: Roman Cheplyaka <roma@ro-che.info>
 8 | -- Stability : experimental
 9 | --
10 | -- Reference implementation (using backtracking).
11 | --
12 | -- This is exposed for testing purposes only!
13 | --------------------------------------------------------------------
14 | 
15 | {-# LANGUAGE GADTs #-}
16 | module Text.Regex.Applicative.Reference (reference) where
17 | import Prelude hiding (getChar)
18 | import Text.Regex.Applicative.Types
19 | import Control.Applicative
20 | import Control.Monad
21 | 
22 | 
23 | -- A simple parsing monad
24 | newtype P s a = P { unP :: [s] -> [(a, [s])] }
25 | 
26 | instance Monad (P s) where
27 |     return x = P $ \s -> [(x, s)]
28 |     (P a) >>= k = P $ \s ->
29 |         a s >>= \(x,s) -> unP (k x) s
30 | 
31 | instance Functor (P s) where
32 |     fmap = liftM
33 | 
34 | instance Applicative (P s) where
35 |     (<*>) = ap
36 |     pure = return
37 | 
38 | instance Alternative (P s) where
39 |     empty = P $ const []
40 |     P a1 <|> P a2 = P $ \s ->
41 |         a1 s ++ a2 s
42 | 
43 | getChar :: P s s
44 | getChar = P $ \s ->
45 |     case s of
46 |         [] -> []
47 |         c:cs -> [(c,cs)]
48 | 
49 | re2monad :: RE s a -> P s a
50 | re2monad r =
51 |     case r of
52 |         Eps -> return $ error "eps"
53 |         Symbol _ p -> do
54 |             c <- getChar
55 |             case p c of
56 |               Just r -> return r
57 |               Nothing -> empty
58 |         Alt a1 a2 -> re2monad a1 <|> re2monad a2
59 |         App a1 a2 -> re2monad a1 <*> re2monad a2
60 |         Fmap f a -> fmap f $ re2monad a
61 |         CatMaybes a -> maybe empty pure =<< re2monad a
62 |         Rep g f b a -> rep b
63 |             where
64 |             am = re2monad a
65 |             rep b = combine (do a <- am; rep $ f b a) (return b)
66 |             combine a b = case g of Greedy -> a <|> b; NonGreedy -> b <|> a
67 |         Void a -> re2monad a >> return ()
68 |         Fail -> empty
69 | 
70 | runP :: P s a -> [s] -> Maybe a
71 | runP m s = case filter (null . snd) $ unP m s of
72 |     (r, _) : _ -> Just r
73 |     _ -> Nothing
74 | 
75 | -- | 'reference' @r@ @s@ should give the same results as @s@ '=~' @r@.
76 | --
77 | -- However, this is not very efficient implementation and is supposed to be
78 | -- used for testing only.
79 | reference :: RE s a -> [s] -> Maybe a
80 | reference r s = runP (re2monad r) s
81 | 


--------------------------------------------------------------------------------
/Text/Regex/Applicative/StateQueue.hs:
--------------------------------------------------------------------------------
 1 | -- | This internal module is exposed only for testing and benchmarking. You
 2 | -- don't need to import it.
 3 | {-# LANGUAGE RecordWildCards #-}
 4 | module Text.Regex.Applicative.StateQueue
 5 |     ( StateQueue
 6 |     , empty
 7 |     , insert
 8 |     , insertUnique
 9 |     , getElements
10 |     ) where
11 | 
12 | import Prelude hiding (read, lookup, replicate)
13 | import qualified Data.IntSet as IntSet
14 | import Data.Foldable as F
15 | 
16 | -- | 'StateQueue' is a data structure that can efficiently insert elements
17 | -- (preserving their order)
18 | -- and check whether an element with the given 'Int' key is already in the queue.
19 | data StateQueue a = StateQueue
20 |     { elements :: [a]
21 |     , ids :: !IntSet.IntSet
22 |     }
23 |     deriving (Eq,Show)
24 | 
25 | instance Foldable StateQueue where
26 |   foldr f a = F.foldr f a . getElements
27 | 
28 | -- | Get the list of all elements
29 | getElements :: StateQueue a -> [a]
30 | getElements = reverse . elements
31 | 
32 | {-# INLINE empty #-}
33 | -- | The empty state queue
34 | empty :: StateQueue a
35 | empty = StateQueue
36 |     { elements = []
37 |     , ids = IntSet.empty
38 |     }
39 | 
40 | {-# INLINE insert #-}
41 | -- | Insert an element in the state queue, unless there is already an element with the same key
42 | insertUnique
43 |     :: Int -- ^ key
44 |     -> a
45 |     -> StateQueue a
46 |     -> StateQueue a
47 | insertUnique i v sq@StateQueue { ids = ids, elements = elements } =
48 |     if i `IntSet.member` ids
49 |         then sq
50 |         else sq { elements = v : elements
51 |                 , ids = IntSet.insert i ids
52 |                 }
53 | 
54 | -- | Insert an element in the state queue without a key.
55 | --
56 | -- Since 'insert' doesn't take a key, it won't affect any 'insertUnique'.
57 | insert
58 |     :: a
59 |     -> StateQueue a
60 |     -> StateQueue a
61 | insert v sq =
62 |     sq { elements = v : elements sq }
63 | 


--------------------------------------------------------------------------------
/Text/Regex/Applicative/Types.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE GADTs #-}
  2 | {-# LANGUAGE RankNTypes #-}
  3 | {-# LANGUAGE ScopedTypeVariables #-}
  4 | {-# LANGUAGE CPP #-}
  5 | module Text.Regex.Applicative.Types where
  6 | 
  7 | import Control.Applicative
  8 | import Control.Monad ((<=<))
  9 | import Data.Filtrable (Filtrable (..))
 10 | import Data.Functor.Identity (Identity (..))
 11 | import Data.String
 12 | #if !MIN_VERSION_base(4,11,0)
 13 | import Data.Semigroup
 14 | #endif
 15 | 
 16 | newtype ThreadId = ThreadId Int
 17 | 
 18 | -- | A thread either is a result or corresponds to a symbol in the regular
 19 | -- expression, which is expected by that thread.
 20 | data Thread s r
 21 |     = Thread
 22 |         { threadId_ :: ThreadId
 23 |         , _threadCont :: s -> [Thread s r]
 24 |         }
 25 |     | Accept r
 26 | 
 27 | -- | Returns thread identifier. This will be 'Just' for ordinary threads and
 28 | -- 'Nothing' for results.
 29 | threadId :: Thread s r -> Maybe ThreadId
 30 | threadId Thread { threadId_ = i } = Just i
 31 | threadId _ = Nothing
 32 | 
 33 | data Greediness = Greedy | NonGreedy
 34 |     deriving (Show, Read, Eq, Ord, Enum)
 35 | 
 36 | -- | Type of regular expressions that recognize symbols of type @s@ and
 37 | -- produce a result of type @a@.
 38 | --
 39 | -- Regular expressions can be built using 'Functor', 'Applicative',
 40 | -- 'Alternative', and 'Filtrable' instances in the following natural way:
 41 | --
 42 | -- * @f@ '<$>' @ra@ matches iff @ra@ matches, and its return value is the result
 43 | -- of applying @f@ to the return value of @ra@.
 44 | --
 45 | -- * 'pure' @x@ matches the empty string (i.e. it does not consume any symbols),
 46 | -- and its return value is @x@
 47 | --
 48 | -- * @rf@ '<*>' @ra@ matches a string iff it is a concatenation of two
 49 | -- strings: one matched by @rf@ and the other matched by @ra@. The return value
 50 | -- is @f a@, where @f@ and @a@ are the return values of @rf@ and @ra@
 51 | -- respectively.
 52 | --
 53 | -- * @ra@ '<|>' @rb@ matches a string which is accepted by either @ra@ or @rb@.
 54 | -- It is left-biased, so if both can match, the result of @ra@ is used.
 55 | --
 56 | -- * 'empty' is a regular expression which does not match any string.
 57 | --
 58 | -- * 'many' @ra@ matches concatenation of zero or more strings matched by @ra@
 59 | -- and returns the list of @ra@'s return values on those strings.
 60 | --
 61 | -- * 'some' @ra@ matches concatenation of one or more strings matched by @ra@
 62 | -- and returns the list of @ra@'s return values on those strings.
 63 | --
 64 | -- * 'catMaybes' @ram@ matches iff @ram@ matches and produces 'Just _'.
 65 | --
 66 | -- * @ra@ '<>' @rb@ matches @ra@ followed by @rb@. The return value is @a <> b@,
 67 | -- where @a@ and @b@ are the return values of @ra@ and @rb@ respectively.
 68 | -- (See <https://github.com/UnkindPartition/regex-applicative/issues/37#issue-499781703>
 69 | -- for an example usage.)
 70 | --
 71 | -- * 'mempty' matches the empty string (i.e. it does not consume any symbols),
 72 | -- and its return value is the 'mempty' value of type @a@.
 73 | data RE s a where
 74 |     Eps :: RE s ()
 75 |     Symbol :: ThreadId -> (s -> Maybe a) -> RE s a
 76 |     Alt :: RE s a -> RE s a -> RE s a
 77 |     App :: RE s (a -> b) -> RE s a -> RE s b
 78 |     Fmap :: (a -> b) -> RE s a -> RE s b
 79 |     CatMaybes :: RE s (Maybe a) -> RE s a
 80 |     Fail :: RE s a
 81 |     Rep :: Greediness    -- repetition may be greedy or not
 82 |         -> (b -> a -> b) -- folding function (like in foldl)
 83 |         -> b             -- the value for zero matches, and also the initial value
 84 |                          -- for the folding function
 85 |         -> RE s a
 86 |         -> RE s b
 87 |     Void :: RE s a -> RE s ()
 88 | 
 89 | -- | Traverse each (reflexive, transitive) subexpression of a 'RE', depth-first and post-order.
 90 | traversePostorder :: forall s a m . Monad m => (forall a . RE s a -> m (RE s a)) -> RE s a -> m (RE s a)
 91 | traversePostorder f = go
 92 |   where
 93 |     go :: forall a . RE s a -> m (RE s a)
 94 |     go = f <=< \ case
 95 |         Eps -> pure Eps
 96 |         Symbol i p -> pure (Symbol i p)
 97 |         Alt a b -> Alt <$> go a <*> go b
 98 |         App a b -> App <$> go a <*> go b
 99 |         Fmap g a -> Fmap g <$> go a
100 |         CatMaybes a -> CatMaybes <$> go a
101 |         Fail -> pure Fail
102 |         Rep greed g b a -> Rep greed g b <$> go a
103 |         Void a -> Void <$> go a
104 | 
105 | -- | Fold each (reflexive, transitive) subexpression of a 'RE', depth-first and post-order.
106 | foldMapPostorder :: Monoid b => (forall a . RE s a -> b) -> RE s a -> b
107 | foldMapPostorder f = fst . traversePostorder ((,) <$> f <*> id)
108 | 
109 | -- | Map each (reflexive, transitive) subexpression of a 'RE'.
110 | mapRE :: (forall a . RE s a -> RE s a) -> RE s a -> RE s a
111 | mapRE f = runIdentity . traversePostorder (Identity . f)
112 | 
113 | instance Functor (RE s) where
114 |     fmap f x = Fmap f x
115 |     f <$ x = pure f <* x
116 | 
117 | instance Applicative (RE s) where
118 |     pure x = const x <$> Eps
119 |     a1 <*> a2 = App a1 a2
120 |     a *> b = pure (const id) <*> Void a <*> b
121 |     a <* b = pure const <*> a <*> Void b
122 | 
123 | instance Alternative (RE s) where
124 |     a1 <|> a2 = Alt a1 a2
125 |     empty = Fail
126 |     many a = reverse <$> Rep Greedy (flip (:)) [] a
127 |     some a = (:) <$> a <*> many a
128 | 
129 | -- | @since 0.3.4
130 | instance Filtrable (RE s) where
131 |     catMaybes = CatMaybes
132 | 
133 | instance (char ~ Char, string ~ String) => IsString (RE char string) where
134 |     fromString = string
135 | 
136 | -- | @since 0.3.4
137 | instance Semigroup a => Semigroup (RE s a) where
138 |     x <> y = (<>) <$> x <*> y
139 | 
140 | -- | @since 0.3.4
141 | instance Monoid a => Monoid (RE s a) where
142 |     mempty = pure mempty
143 | 
144 | -- | Match and return the given sequence of symbols.
145 | --
146 | -- Note that there is an 'IsString' instance for regular expression, so
147 | -- if you enable the @OverloadedStrings@ language extension, you can write
148 | -- @string \"foo\"@ simply as @\"foo\"@.
149 | --
150 | -- Example:
151 | --
152 | -- >{-# LANGUAGE OverloadedStrings #-}
153 | -- >import Text.Regex.Applicative
154 | -- >
155 | -- >number = "one" *> pure 1  <|>  "two" *> pure 2
156 | -- >
157 | -- >main = print $ "two" =~ number
158 | string :: Eq a => [a] -> RE a [a]
159 | string = traverse sym
160 | 
161 | -- | Match and return a single symbol which satisfies the predicate
162 | psym :: (s -> Bool) -> RE s s
163 | psym p = msym (\s -> if p s then Just s else Nothing)
164 | 
165 | -- | Like 'psym', but allows to return a computed value instead of the
166 | -- original symbol
167 | msym :: (s -> Maybe a) -> RE s a
168 | msym p = Symbol (error "Not numbered symbol") p
169 | 
170 | -- | Match and return the given symbol
171 | sym :: Eq s => s -> RE s s
172 | sym s = psym (s ==)
173 | 


--------------------------------------------------------------------------------
/benchmark/benchmark.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE FlexibleInstances, TypeApplications, RankNTypes, CPP #-}
 2 | import Data.List
 3 | import Data.Traversable
 4 | import Data.Maybe
 5 | import Data.Void
 6 | import Control.Monad
 7 | 
 8 | import Criterion.Main
 9 | 
10 | import Text.Regex.Applicative
11 | import qualified Text.Parser.Combinators as PC
12 | import qualified Text.Parser.Char as PC
13 | import Control.DeepSeq
14 | import qualified Text.Parsec as Parsec
15 | import qualified Text.Megaparsec as Megaparsec
16 | import qualified Text.Megaparsec.Parsers as MP
17 | import qualified Data.Attoparsec.ByteString.Char8 as Attoparsec
18 | import qualified Data.ByteString.Char8 as BS8
19 | 
20 | parser1 :: PC.CharParsing f => f [String]
21 | parser1 = many $
22 |   PC.try (PC.string "foo") <|>
23 |   PC.try (PC.string "bar") <|>
24 |   PC.string "baz"
25 | 
26 | str :: String
27 | str = concat $ replicate 10 "foobarfoobarbaz"
28 | 
29 | benchmarkParser
30 |   :: NFData a
31 |   => (forall f . (PC.CharParsing f) => f a)
32 |   -> [Benchmark]
33 | benchmarkParser parser =
34 |     [ bench "regex-applicative" $ nf (match parser) str
35 |     , bench "parsec" $ nf (Parsec.parse parser "-") str
36 |     , bench "megaparsec" $ nf (Megaparsec.parseMaybe @Void (MP.unParsecT parser)) str
37 |     , bench "attoparsec" $ nf (Attoparsec.parseOnly parser) (BS8.pack str)
38 |     ]
39 | 
40 | main = defaultMain $
41 |   [ bgroup "parsing" (benchmarkParser parser1)
42 |   , bgroup "recognizing" (benchmarkParser (void parser1))
43 |   ]
44 | 
45 | -- instances
46 | instance PC.Parsing (RE c) where
47 |   try = id
48 |   (<?>) = const
49 |   unexpected _ = empty
50 |   notFollowedBy = error "RE: notFollowedBy"
51 |   eof = error "RE: eof"
52 | instance PC.CharParsing (RE Char) where
53 |   satisfy = psym
54 |   char = sym
55 |   anyChar = anySym
56 |   string = string
57 | instance NFData Parsec.ParseError where
58 |   rnf = flip seq ()
59 | 


--------------------------------------------------------------------------------
/doctest/doctest.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import Test.DocTest
 4 | 
 5 | main :: IO ()
 6 | main = doctest
 7 |   [ "-XLambdaCase"
 8 |   , "-XOverloadedStrings"
 9 |   , "Text/Regex/Applicative.hs"
10 |   ]
11 | 


--------------------------------------------------------------------------------
/regex-applicative.cabal:
--------------------------------------------------------------------------------
 1 | Name:                regex-applicative
 2 | Version:             0.3.4
 3 | Synopsis:            Regex-based parsing with applicative interface
 4 | Description:
 5 |     regex-applicative is a Haskell library for parsing using regular expressions.
 6 |     Parsers can be built using Applicative interface.
 7 | Homepage:            https://github.com/UnkindPartition/regex-applicative
 8 | License:             MIT
 9 | License-file:        LICENSE
10 | Author:              Roman Cheplyaka
11 | Maintainer:          Roman Cheplyaka <roma@ro-che.info>
12 | Category:            Text
13 | Build-type:          Simple
14 | Extra-source-files:  README.md CREDITS.md CHANGES.md
15 | Cabal-version:       >=1.10
16 | Tested-With:
17 |   GHC ==8.0.2 ||
18 |       ==8.2.2 ||
19 |       ==8.4.4 ||
20 |       ==8.6.5 ||
21 |       ==8.8.2
22 | 
23 | Source-repository head
24 |   type:     git
25 |   location: git://github.com/UnkindPartition/regex-applicative.git
26 | 
27 | Library
28 |   Default-language:    Haskell2010
29 |   Default-extensions:  LambdaCase
30 |   Build-depends:       base < 5,
31 |                        filtrable >= 0.1.3,
32 |                        containers,
33 |                        transformers
34 |   Exposed-modules:     Text.Regex.Applicative
35 |                        Text.Regex.Applicative.Object
36 |                        Text.Regex.Applicative.Common
37 |                        Text.Regex.Applicative.Reference
38 |                        Text.Regex.Applicative.StateQueue
39 |   Other-modules:       Text.Regex.Applicative.Interface
40 |                        Text.Regex.Applicative.Types
41 |                        Text.Regex.Applicative.Compile
42 |   GHC-Options:     -Wall
43 |                    -Werror=incomplete-patterns
44 |                    -fno-warn-name-shadowing
45 | 
46 | Test-Suite test-regex-applicative
47 |   type:       exitcode-stdio-1.0
48 |   hs-source-dirs:
49 |     tests
50 |   main-is:
51 |     test.hs
52 |   other-modules:
53 |     StateQueue
54 |   GHC-Options: -threaded
55 |   Default-language:    Haskell2010
56 |   Build-depends:       base < 5,
57 |                        containers,
58 |                        filtrable >= 0.1.3,
59 |                        transformers,
60 |                        smallcheck >= 1.0,
61 |                        tasty,
62 |                        tasty-smallcheck,
63 |                        tasty-hunit,
64 |                        regex-applicative
65 | 
66 | test-suite doctest
67 |   type:
68 |     exitcode-stdio-1.0
69 |   hs-source-dirs:
70 |     doctest
71 |   main-is:
72 |     doctest.hs
73 |   default-language:    Haskell2010
74 |   ghc-options:      -threaded
75 |   build-depends:    base, text, doctest >= 0.8
76 | 
77 | Benchmark bench-regex-applicative
78 |   type:           exitcode-stdio-1.0
79 |   hs-source-dirs: benchmark
80 |   main-is:        benchmark.hs
81 |   build-depends:  base <5
82 |                 , criterion
83 |                 , regex-applicative
84 |                 , parsers
85 |                 , deepseq
86 |                 , parsec
87 |                 , attoparsec
88 |                 , megaparsec
89 |                 , parsers-megaparsec
90 |                 , bytestring
91 |   default-language: Haskell2010
92 | 


--------------------------------------------------------------------------------
/stack.yaml:
--------------------------------------------------------------------------------
1 | flags: {}
2 | packages:
3 | - '.'
4 | extra-deps: []
5 | resolver: nightly-2021-12-18
6 | 


--------------------------------------------------------------------------------
/tests/StateQueue.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE FlexibleInstances, MultiParamTypeClasses, ScopedTypeVariables #-}
 2 | module StateQueue where
 3 | 
 4 | import Test.Tasty
 5 | import Test.Tasty.SmallCheck
 6 | import Test.SmallCheck.Series
 7 | import Control.Applicative
 8 | import Text.Regex.Applicative.StateQueue as SQ
 9 | 
10 | fromElems :: [(a, Maybe Int)] -> StateQueue a
11 | fromElems = foldl f SQ.empty
12 |   where
13 |     f sq (x, mbKey) = maybe insert insertUnique mbKey x sq
14 | 
15 | size :: StateQueue a -> Int
16 | size = length . getElements
17 | 
18 | instance (Monad m, Serial m a) => Serial m (StateQueue a) where
19 |   series = fromElems <$> series
20 | 
21 | stateQueueTests = testGroup "StateQueue"
22 |   [ testProperty "Insertion increments the # of elements" $
23 |       \sq (i :: Int) -> size (insert i sq) == size sq + 1
24 |   , testProperty "insertUnique increments the # of elements by 0 or 1" $
25 |       \sq (i :: Int) ->
26 |         let d = size (insertUnique i i sq) - size sq
27 |         in d == 0 || d == 1
28 |   , testProperty "insertUnique is idempotent" $
29 |       \sq (i :: Int) ->
30 |         let f = insertUnique i i
31 |         in f sq == (f . f) sq
32 |   , testProperty "insert doesn't affect insertUnique" $
33 |       \(i :: Int) -> exists $ \sq ->
34 |         let sq' = insert i sq
35 |         in insertUnique i i sq' /= sq'
36 |   , testProperty "insertUnique only cares about keys, not values" $
37 |       \sq i j ->
38 |         let sq' = insertUnique i i sq
39 |         in insertUnique i j sq' == sq'
40 |   , testProperty "insert puts in the back" $
41 |       \sq (i :: Int) ->
42 |         let sq' = insert i sq
43 |         in last (getElements sq') == i
44 |   , testProperty "insertUnique puts in the back" $
45 |       \sq i ->
46 |         let sq' = insertUnique i i sq
47 |         in sq' /= sq ==> last (getElements sq') == i
48 |   ]
49 | 


--------------------------------------------------------------------------------
/tests/test.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings, FlexibleInstances, MultiParamTypeClasses, FlexibleContexts #-}
  2 | import Text.Regex.Applicative
  3 | import Text.Regex.Applicative.Reference
  4 | import Control.Applicative
  5 | import Control.Monad
  6 | import Data.Filtrable
  7 | import Data.Traversable
  8 | import Data.Maybe
  9 | import Text.Printf
 10 | 
 11 | import Test.SmallCheck
 12 | import Test.SmallCheck.Series
 13 | import Test.Tasty
 14 | import Test.Tasty.SmallCheck
 15 | import Test.Tasty.HUnit
 16 | 
 17 | import StateQueue
 18 | 
 19 | -- Small alphabets as SmallCheck's series
 20 | newtype A = A { a :: Char } deriving Show
 21 | instance Monad m => Serial m A where
 22 |     series = cons0 $ A 'a'
 23 | 
 24 | newtype AB = AB { ab :: Char } deriving Show
 25 | instance Monad m => Serial m AB where
 26 |     series = cons0 (AB 'a') \/ cons0 (AB 'b')
 27 | 
 28 | newtype ABC = ABC { abc :: Char } deriving Show
 29 | instance Monad m => Serial m ABC where
 30 |     series = cons0 (ABC 'a') \/ cons0 (ABC 'b') \/ cons0 (ABC 'c')
 31 | 
 32 | re1 =
 33 |     let one = pure 1 <* sym 'a'
 34 |         two = pure 2 <* sym 'a' <* sym 'a'
 35 |     in (,) <$> (one <|> two) <*> (two <|> one)
 36 | 
 37 | re2 = sequenceA $
 38 |     [ pure 1 <* sym 'a' <* sym 'a' <|>
 39 |       pure 2 <* sym 'a'
 40 |     , pure 3 <* sym 'b'
 41 |     , pure 4 <* sym 'b' <|>
 42 |       pure 5 <* sym 'a' ]
 43 | 
 44 | re3 = sequenceA $
 45 |     [ pure 0 <|> pure 1
 46 |     , pure 1 <* sym 'a' <* sym 'a' <|>
 47 |       pure 2 <* sym 'a'
 48 |     , pure 3 <* sym 'b' <|> pure 6
 49 |     , fmap (+1) $
 50 |       pure 4 <* sym 'b' <|>
 51 |       pure 7 <|>
 52 |       pure 5 <* sym 'a' ]
 53 | 
 54 | re4 = sym 'a' *> many (sym 'b') <* sym 'a'
 55 | 
 56 | re5 = (sym 'a' <|> sym 'a' *> sym 'a') *> many (sym 'a')
 57 | 
 58 | re6 = many (pure 3 <* sym 'a' <* sym 'a' <* sym 'a' <|> pure 1 <* sym 'a')
 59 | 
 60 | -- Regular expression from the weighted regexp paper.
 61 | re7 =
 62 |     let many_A_or_B = many (sym 'a' <|> sym 'b')
 63 |     in (,) <$>
 64 |         many ((,,,) <$> many_A_or_B <*> sym 'c' <*> many_A_or_B <*> sym 'c') <*>
 65 |         many_A_or_B
 66 | 
 67 | re8 = (,) <$> many (sym 'a' <|> sym 'b') <*> many (sym 'b' <|> sym 'c')
 68 | 
 69 | -- NB: we don't test these against the reference impl, 'cause it will loop!
 70 | re9 = many (sym 'a' <|> empty) <* sym 'b'
 71 | re10 = few (sym 'a' <|> empty) <* sym 'b'
 72 | 
 73 | re11 = (\ a b -> a <$ guard (a == b)) <$> anySym <*?> anySym
 74 | 
 75 | prop re f s =
 76 |     let fs = map f s in
 77 |     reference re fs == (fs =~ re)
 78 | 
 79 | prop_withMatched =
 80 |     let re = withMatched $ many (string "a" <|> string "ba")
 81 |     in \str ->
 82 |         case map ab str =~ re of
 83 |             Nothing -> True
 84 |             Just (x, y) -> concat x == y
 85 | 
 86 | -- Because we have 2 slightly different algorithms for recognition and parsing,
 87 | -- we test that they agree
 88 | testRecognitionAgainstParsing re f s =
 89 |     let fs = map f s in
 90 |     isJust (fs =~ re) == isJust (fs =~ (re *> pure ()))
 91 | 
 92 | tests = testGroup "Tests"
 93 |     [ testGroup "Engine tests"
 94 |        [ t "re1" 10 $ prop re1  a
 95 |        , t "re2" 10 $ prop re2  ab
 96 |        , t "re3" 10 $ prop re3  ab
 97 |        , t "re4" 10 $ prop re4  ab
 98 |        , t "re5" 10 $ prop re5  a
 99 |        , t "re6" 10 $ prop re6  a
100 |        , t "re7"  7 $ prop re7  abc
101 |        , t "re8"  7 $ prop re8  abc
102 |        , t "re11" 7 $ prop re11 abc
103 |        ]
104 |     , testGroup "Recognition vs parsing"
105 |        [ t "re1" 10 $ testRecognitionAgainstParsing re1 a
106 |        , t "re2" 10 $ testRecognitionAgainstParsing re2 ab
107 |        , t "re3" 10 $ testRecognitionAgainstParsing re3 ab
108 |        , t "re4" 10 $ testRecognitionAgainstParsing re4 ab
109 |        , t "re5" 10 $ testRecognitionAgainstParsing re5 a
110 |        , t "re6" 10 $ testRecognitionAgainstParsing re6 a
111 |        , t "re7"  7 $ testRecognitionAgainstParsing re7 abc
112 |        , t "re8"  7 $ testRecognitionAgainstParsing re8 abc
113 |        , t "re8" 10 $ testRecognitionAgainstParsing re9 ab
114 |        , t "re8" 10 $ testRecognitionAgainstParsing re10 ab
115 |        , t "re11" 7 $ testRecognitionAgainstParsing re11 abc
116 |        ]
117 |     , testProperty "withMatched" prop_withMatched
118 |     , testGroup "Tests for matching functions"
119 |         [ testGroup "findFirstPrefix"
120 |             [ u "t1"
121 |                 (findFirstPrefix ("a" <|> "ab") "abc")
122 |                 (Just ("a","bc"))
123 |             , u "t2"
124 |                 (findFirstPrefix ("ab" <|> "a") "abc")
125 |                 (Just ("ab","c"))
126 |             , u "t3"
127 |                 (findFirstPrefix "bc" "abc")
128 |                 Nothing
129 |             ]
130 |         , testGroup "findFirstInfix"
131 |             [ u "t1"
132 |                 (findFirstInfix ("a" <|> "ab") "tabc")
133 |                 (Just ("t", "a","bc"))
134 |             , u "t2"
135 |                 (findFirstInfix ("ab" <|> "a") "tabc")
136 |                 (Just ("t", "ab","c"))
137 |             ]
138 |         , testGroup "findLongestPrefix"
139 |             [ u "t1"
140 |                 (findLongestPrefix ("a" <|> "ab") "abc")
141 |                 (Just ("ab","c"))
142 |             , u "t2"
143 |                 (findLongestPrefix ("ab" <|> "a") "abc")
144 |                 (Just ("ab","c"))
145 |             , u "t3"
146 |                 (findLongestPrefix "bc" "abc")
147 |                 Nothing
148 |             ]
149 |         , testGroup "findLongestInfix"
150 |             [ u "t1"
151 |                 (findLongestInfix ("a" <|> "ab") "tabc")
152 |                 (Just ("t", "ab","c"))
153 |             , u "t2"
154 |                 (findLongestInfix ("ab" <|> "a") "tabc")
155 |                 (Just ("t", "ab","c"))
156 |             , u "t3"
157 |                 (findLongestInfix "bc" "tabc")
158 |                 (Just ("ta", "bc",""))
159 |             ]
160 |         , testGroup "findShortestPrefix"
161 |             [ u "t1"
162 |                 (findShortestPrefix ("a" <|> "ab") "abc")
163 |                 (Just ("a","bc"))
164 |             , u "t2"
165 |                 (findShortestPrefix ("ab" <|> "a") "abc")
166 |                 (Just ("a","bc"))
167 |             , u "t3"
168 |                 (findShortestPrefix "bc" "abc")
169 |                 Nothing
170 |             ]
171 |         , testGroup "findShortestInfix"
172 |             [ u "t1"
173 |                 (findShortestInfix ("a" <|> "ab") "tabc")
174 |                 (Just ("t", "a","bc"))
175 |             , u "t2"
176 |                 (findShortestInfix ("ab" <|> "a") "tabc")
177 |                 (Just ("t", "a","bc"))
178 |             , u "t3"
179 |                 (findShortestInfix "bc" "tabc")
180 |                 (Just ("ta", "bc",""))
181 |             ]
182 |         , testGroup "replace"
183 |             [ u "t1"
184 |                 (replace ("x" <$ "a" <|> "y" <$ "ab") "tabc")
185 |                 "tyc"
186 |             , u "t2"
187 |                 (replace ("y" <$ "ab" <|> "x" <$ "a") "tabc")
188 |                 "tyc"
189 |             , u "t3"
190 |                 (replace ("x" <$ "bc") "tabc")
191 |                 "tax"
192 |             , u "t4"
193 |                 (replace ("y" <$ "a" <|> "x" <$ "ab") "tacabc")
194 |                 "tycxc"
195 |             ]
196 |         ]
197 |     , stateQueueTests
198 |     ]
199 |     where
200 |     t name n = localOption (SmallCheckDepth n) . testProperty name
201 |     u name real ideal = testCase name (assertEqual "" real ideal)
202 | 
203 | main = defaultMain tests
204 | 


--------------------------------------------------------------------------------