├── .travis.yml ├── CHANGES.md ├── CONTRIBUTING.md ├── CREDITS.md ├── LICENSE ├── README.md ├── Setup.hs ├── Text └── Regex │ ├── Applicative.hs │ └── Applicative │ ├── Common.hs │ ├── Compile.hs │ ├── Interface.hs │ ├── Object.hs │ ├── Reference.hs │ ├── StateQueue.hs │ └── Types.hs ├── benchmark └── benchmark.hs ├── doctest └── doctest.hs ├── regex-applicative.cabal ├── stack.yaml └── tests ├── StateQueue.hs └── test.hs /.travis.yml: -------------------------------------------------------------------------------- 1 | # This Travis job script has been generated by a script via 2 | # 3 | # haskell-ci 'regex-applicative.cabal' '--output' '.travis.yml' 4 | # 5 | # For more information, see https://github.com/haskell-CI/haskell-ci 6 | # 7 | # version: 0.9.20200125 8 | # 9 | version: ~> 1.0 10 | language: c 11 | os: linux 12 | dist: xenial 13 | git: 14 | # whether to recursively clone submodules 15 | submodules: false 16 | cache: 17 | directories: 18 | - $HOME/.cabal/packages 19 | - $HOME/.cabal/store 20 | - $HOME/.hlint 21 | before_cache: 22 | - rm -fv $CABALHOME/packages/hackage.haskell.org/build-reports.log 23 | # remove files that are regenerated by 'cabal update' 24 | - rm -fv $CABALHOME/packages/hackage.haskell.org/00-index.* 25 | - rm -fv $CABALHOME/packages/hackage.haskell.org/*.json 26 | - rm -fv $CABALHOME/packages/hackage.haskell.org/01-index.cache 27 | - rm -fv $CABALHOME/packages/hackage.haskell.org/01-index.tar 28 | - rm -fv $CABALHOME/packages/hackage.haskell.org/01-index.tar.idx 29 | - rm -rfv $CABALHOME/packages/head.hackage 30 | jobs: 31 | include: 32 | - compiler: ghc-8.8.2 33 | addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.8.2","cabal-install-3.0"]}} 34 | os: linux 35 | - compiler: ghc-8.6.5 36 | addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.6.5","cabal-install-3.0"]}} 37 | os: linux 38 | - compiler: ghc-8.4.4 39 | addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.4.4","cabal-install-3.0"]}} 40 | os: linux 41 | - compiler: ghc-8.2.2 42 | addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.2.2","cabal-install-3.0"]}} 43 | os: linux 44 | - compiler: ghc-8.0.2 45 | addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.0.2","cabal-install-3.0"]}} 46 | os: linux 47 | before_install: 48 | - HC=$(echo "/opt/$CC/bin/ghc" | sed 's/-/\//') 49 | - WITHCOMPILER="-w $HC" 50 | - HADDOCK=$(echo "/opt/$CC/bin/haddock" | sed 's/-/\//') 51 | - HCPKG="$HC-pkg" 52 | - unset CC 53 | - CABAL=/opt/ghc/bin/cabal 54 | - CABALHOME=$HOME/.cabal 55 | - export PATH="$CABALHOME/bin:$PATH" 56 | - TOP=$(pwd) 57 | - "HCNUMVER=$(${HC} --numeric-version|perl -ne '/^(\\d+)\\.(\\d+)\\.(\\d+)(\\.(\\d+))?$/; print(10000 * $1 + 100 * $2 + ($3 == 0 ? $5 != 1 : $3))')" 58 | - echo $HCNUMVER 59 | - CABAL="$CABAL -vnormal+nowrap" 60 | - set -o pipefail 61 | - TEST=--enable-tests 62 | - BENCH=--enable-benchmarks 63 | - HEADHACKAGE=false 64 | - rm -f $CABALHOME/config 65 | - | 66 | echo "verbose: normal +nowrap +markoutput" >> $CABALHOME/config 67 | echo "remote-build-reporting: anonymous" >> $CABALHOME/config 68 | echo "write-ghc-environment-files: always" >> $CABALHOME/config 69 | echo "remote-repo-cache: $CABALHOME/packages" >> $CABALHOME/config 70 | echo "logs-dir: $CABALHOME/logs" >> $CABALHOME/config 71 | echo "world-file: $CABALHOME/world" >> $CABALHOME/config 72 | echo "extra-prog-path: $CABALHOME/bin" >> $CABALHOME/config 73 | echo "symlink-bindir: $CABALHOME/bin" >> $CABALHOME/config 74 | echo "installdir: $CABALHOME/bin" >> $CABALHOME/config 75 | echo "build-summary: $CABALHOME/logs/build.log" >> $CABALHOME/config 76 | echo "store-dir: $CABALHOME/store" >> $CABALHOME/config 77 | echo "install-dirs user" >> $CABALHOME/config 78 | echo " prefix: $CABALHOME" >> $CABALHOME/config 79 | echo "repository hackage.haskell.org" >> $CABALHOME/config 80 | echo " url: http://hackage.haskell.org/" >> $CABALHOME/config 81 | install: 82 | - ${CABAL} --version 83 | - echo "$(${HC} --version) [$(${HC} --print-project-git-commit-id 2> /dev/null || echo '?')]" 84 | - | 85 | echo "program-default-options" >> $CABALHOME/config 86 | echo " ghc-options: $GHCJOBS +RTS -M6G -RTS" >> $CABALHOME/config 87 | - cat $CABALHOME/config 88 | - rm -fv cabal.project cabal.project.local cabal.project.freeze 89 | - travis_retry ${CABAL} v2-update -v 90 | # Generate cabal.project 91 | - rm -rf cabal.project cabal.project.local cabal.project.freeze 92 | - touch cabal.project 93 | - | 94 | echo "packages: ." >> cabal.project 95 | - | 96 | - "for pkg in $($HCPKG list --simple-output); do echo $pkg | sed 's/-[^-]*$//' | (grep -vE -- '^(regex-applicative)$' || true) | sed 's/^/constraints: /' | sed 's/$/ installed/' >> cabal.project.local; done" 97 | - cat cabal.project || true 98 | - cat cabal.project.local || true 99 | - if [ -f "./configure.ac" ]; then (cd "." && autoreconf -i); fi 100 | - ${CABAL} v2-freeze $WITHCOMPILER ${TEST} ${BENCH} 101 | - "cat cabal.project.freeze | sed -E 's/^(constraints: *| *)//' | sed 's/any.//'" 102 | - rm cabal.project.freeze 103 | - ${CABAL} v2-build $WITHCOMPILER ${TEST} ${BENCH} --dep -j2 all 104 | - ${CABAL} v2-build $WITHCOMPILER --disable-tests --disable-benchmarks --dep -j2 all 105 | script: 106 | - DISTDIR=$(mktemp -d /tmp/dist-test.XXXX) 107 | # Packaging... 108 | - ${CABAL} v2-sdist all 109 | # Unpacking... 110 | - mv dist-newstyle/sdist/*.tar.gz ${DISTDIR}/ 111 | - cd ${DISTDIR} || false 112 | - find . -maxdepth 1 -type f -name '*.tar.gz' -exec tar -xvf '{}' \; 113 | - find . -maxdepth 1 -type f -name '*.tar.gz' -exec rm '{}' \; 114 | - PKGDIR_regex_applicative="$(find . -maxdepth 1 -type d -regex '.*/regex-applicative-[0-9.]*')" 115 | # Generate cabal.project 116 | - rm -rf cabal.project cabal.project.local cabal.project.freeze 117 | - touch cabal.project 118 | - | 119 | echo "packages: ${PKGDIR_regex_applicative}" >> cabal.project 120 | - | 121 | - "for pkg in $($HCPKG list --simple-output); do echo $pkg | sed 's/-[^-]*$//' | (grep -vE -- '^(regex-applicative)$' || true) | sed 's/^/constraints: /' | sed 's/$/ installed/' >> cabal.project.local; done" 122 | - cat cabal.project || true 123 | - cat cabal.project.local || true 124 | # Building... 125 | # this builds all libraries and executables (without tests/benchmarks) 126 | - ${CABAL} v2-build $WITHCOMPILER --disable-tests --disable-benchmarks all 127 | # Building with tests and benchmarks... 128 | # build & run tests, build benchmarks 129 | - ${CABAL} v2-build $WITHCOMPILER ${TEST} ${BENCH} all 130 | # Testing... 131 | - ${CABAL} v2-test $WITHCOMPILER ${TEST} ${BENCH} all 132 | # cabal check... 133 | - (cd ${PKGDIR_regex_applicative} && ${CABAL} -vnormal check) 134 | # haddock... 135 | - ${CABAL} v2-haddock $WITHCOMPILER --with-haddock $HADDOCK ${TEST} ${BENCH} all 136 | # Building without installed constraints for packages in global-db... 137 | - rm -f cabal.project.local 138 | - ${CABAL} v2-build $WITHCOMPILER --disable-tests --disable-benchmarks all 139 | 140 | # REGENDATA ("0.9.20200125",["regex-applicative.cabal","--output",".travis.yml"]) 141 | # EOF 142 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | Changes 2 | ======= 3 | 4 | 0.3.4 5 | ----- 6 | 7 | * Let the user provide a custom `uncons` function (add 8 | `find{First,Longest,Shortest}PrefixWithUncons`) 9 | * Add `Filtrable` and `Monoid` instances for `RE` 10 | 11 | 0.3.3.1 12 | ------- 13 | 14 | Make a release to refresh the haddocks on hackage 15 | (see ). 16 | 17 | 0.3.3 18 | ----- 19 | 20 | Add `replace` 21 | 22 | 0.3.2.1 23 | ------- 24 | 25 | * Use strict left fold in decimal/hexadecimal 26 | * Include a missing test module in the sdist tarball 27 | 28 | 0.3.2 29 | ----- 30 | 31 | Add `msym` 32 | 33 | 0.3.1 34 | ----- 35 | 36 | Add `comap` 37 | 38 | 0.3.0.3 39 | ------- 40 | 41 | * Fix the test suite 42 | * Fix build with GHC 7.9 43 | 44 | 0.3.0.2 45 | ------- 46 | 47 | Fix the test suite 48 | 49 | 0.3.0.1 50 | ------- 51 | 52 | Port the test suite to tasty 53 | 54 | 0.3 55 | --- 56 | * Add a new module, `Text.Regex.Applicative.Common`, which contains some 57 | commonly used regexps (by Aleksey Khudyakov) 58 | * Improve the test suite 59 | 60 | 0.2.1 61 | ----- 62 | * Add the `withMatched` function 63 | * Make matching functions a bit more lax 64 | * Fix a bug in the `empty` method 65 | 66 | 0.2 67 | --- 68 | * Infix matching functions 69 | * Improved documentation 70 | * Improved performance 71 | * Improved portability 72 | 73 | 0.1.5 74 | ----- 75 | * Expose Object interface 76 | * Allow matching prefixes rather than the whole string 77 | * Add non-greedy repetitions 78 | 79 | 0.1.4 80 | ----- 81 | * Completely rewrite the engine. Now it's faster and runs in constant space. 82 | * Add 'string' function and 'IsString' instance. 83 | 84 | 0.1.3 85 | ----- 86 | * Fix a .cabal-file issue introduced in 0.1.2 87 | * Change the fixity of =~ 88 | 89 | 0.1.2 90 | ----- 91 | * Relax the constraint on the containers version 92 | 93 | 0.1.1 94 | --- 95 | * Fix a bug in 'reFoldl' and 'many' 96 | * "Lazy" infinite regexes are no longer supported 97 | 98 | 0.1 99 | --- 100 | * Initial release 101 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing guidelines 2 | 3 | Bug reports are always welcome. Please provide full instructions to reproduce. 4 | 5 | Pull requests fixing bugs are much appreciated. 6 | 7 | Feature requests (whether in the form of an issue or a pull request) are 8 | acceptable only if: 9 | 10 | 1. The feature would be useful for a large fraction of users, as opposed to 11 | tackling an uncommon use case. 12 | 2. When filing a feature request as an issue, if it's accepted, you are willing 13 | to implement it. 14 | 15 | When creating a pull request, please read and follow [How to prepare a good pull request](https://ro-che.info/articles/2016-09-18-good-pull-requests). 16 | 17 | Issues asking questions are fine, assuming you've put a reasonable effort to 18 | figure out the answer yourself first. 19 | -------------------------------------------------------------------------------- /CREDITS.md: -------------------------------------------------------------------------------- 1 | The current implementation is based on ideas [publicized][cox] by Russ Cox. 2 | 3 | The original implementation was inspired and heavily based on the ideas from ["A 4 | Play on Regular Expressions"][play] by Sebastian Fischer, Frank Huch and Thomas 5 | Wilke. 6 | 7 | [cox]: http://swtch.com/~rsc/regexp/ 8 | [play]: http://sebfisch.github.com/haskell-regexp/regexp-play.pdf 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011 Roman Cheplyaka 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | regex-applicative 2 | ================= 3 | 4 | *regex-applicative* is a parsing combinator library for Haskell based on regular 5 | expressions. 6 | 7 | Example 8 | ------- 9 | 10 | ``` haskell 11 | import Text.Regex.Applicative 12 | 13 | data Protocol = HTTP | FTP deriving Show 14 | 15 | protocol :: RE Char Protocol 16 | protocol = HTTP <$ string "http" <|> FTP <$ string "ftp" 17 | 18 | type Host = String 19 | type Location = String 20 | data URL = URL Protocol Host Location deriving Show 21 | 22 | host :: RE Char Host 23 | host = many $ psym $ (/= '/') 24 | 25 | url :: RE Char URL 26 | url = URL <$> protocol <* string "://" <*> host <* sym '/' <*> many anySym 27 | 28 | main = print $ "http://stackoverflow.com/questions" =~ url 29 | ``` 30 | 31 | Documentation 32 | ------------- 33 | 34 | See the [API reference][haddock]. 35 | 36 | Performance 37 | ----------- 38 | 39 | For common tasks, this package is several times slower than monadic 40 | parser combinator libraries like parsec. However, this library has a roughly 41 | linear complexity, whereas monadic parser combinators have exponential 42 | worst-time complexity (see [here](https://swtch.com/~rsc/regexp/regexp1.html)). 43 | 44 | Some tips to make your regex run faster: 45 | 46 | 1. If you don't care about the result of the whole regex or its part, only 47 | whether it matches or not, mark it with `void` or `<$`. Recognition is faster 48 | than parsing. 49 | 1. If you apply the same regex to multiple strings, partially apply it like so: 50 | 51 | ``` 52 | let matcher = match my_regex 53 | in map matcher my_strings 54 | ``` 55 | 56 | This way the compiled regex is stored in the `matcher` value and shared among 57 | the strings. 58 | 59 | GHC support 60 | ----------- 61 | 62 | Only GHC versions >= 8.0 are supported, although older versions may work too. 63 | 64 | [haddock]: http://hackage.haskell.org/packages/archive/regex-applicative/latest/doc/html/Text-Regex-Applicative.html 65 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /Text/Regex/Applicative.hs: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------- 2 | -- | 3 | -- Module : Text.Regex.Applicative 4 | -- Copyright : (c) Roman Cheplyaka 5 | -- License : MIT 6 | -- 7 | -- Maintainer: Roman Cheplyaka 8 | -- Stability : experimental 9 | -- 10 | -- To get started, see some examples on the wiki: 11 | -- 12 | -------------------------------------------------------------------- 13 | 14 | module Text.Regex.Applicative 15 | ( RE 16 | , sym 17 | , psym 18 | , msym 19 | , anySym 20 | , string 21 | , reFoldl 22 | , Greediness(..) 23 | , few 24 | , comap 25 | , withMatched 26 | , match 27 | , (=~) 28 | , replace 29 | , findFirstPrefix 30 | , findLongestPrefix 31 | , findShortestPrefix 32 | , findFirstInfix 33 | , findLongestInfix 34 | , findShortestInfix 35 | -- * Custom uncons function 36 | -- $uncons 37 | , findFirstPrefixWithUncons 38 | , findLongestPrefixWithUncons 39 | , findShortestPrefixWithUncons 40 | , module Control.Applicative 41 | ) 42 | where 43 | import Text.Regex.Applicative.Types 44 | import Text.Regex.Applicative.Interface 45 | import Control.Applicative 46 | 47 | {- $uncons 48 | The following functions take an argument that splits the input into the first symbol and 49 | the remaining input (if the input is non-empty). 50 | 51 | It is useful, for example, for feeding a @Text@ to a regex matcher: 52 | 53 | >>> import qualified Data.Text as T 54 | >>> findFirstPrefixWithUncons T.uncons (many (sym 'a')) "aaa" 55 | Just ("aaa","") 56 | 57 | For another example, feeding input symbols annotated with source positions into a matcher, 58 | preserving the positions in the remaining input so the location of a lexical error can be 59 | recovered: 60 | 61 | @ 62 | data AList a b = AList { annotation :: a, stripAnnotation :: Maybe (b, AList a b) } 63 | 64 | findLongestPrefixAnnotated :: RE s a -> AList b s -> Maybe (a, AList b s) 65 | fondLongestPrefixAnnotated = findLongestPrefixWithUncons stripAnnotation 66 | @ 67 | 68 | The use of the other functions taking an @uncons@ argument is exactly analogous. 69 | -} 70 | -------------------------------------------------------------------------------- /Text/Regex/Applicative/Common.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Collection of commonly used regular expressions. 3 | module Text.Regex.Applicative.Common ( 4 | -- * Digits 5 | digit 6 | , hexDigit 7 | -- * Numbers 8 | , signed 9 | , decimal 10 | , hexadecimal 11 | ) where 12 | 13 | import Data.Char 14 | import Data.List (foldl') 15 | import Text.Regex.Applicative 16 | 17 | 18 | -- | Decimal digit, i.e. @\'0\'@..@\'9\'@ 19 | digit :: Num a => RE Char a 20 | digit = fromIntegral . digitToInt <$> psym isDigit 21 | 22 | -- | Hexadecimal digit 23 | -- i.e. @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@. 24 | hexDigit :: Num a => RE Char a 25 | hexDigit = fromIntegral . digitToInt <$> psym isHexDigit 26 | 27 | -- | Add optional sign 28 | signed :: Num a => RE Char a -> RE Char a 29 | signed p = sign <*> p 30 | where 31 | sign = id <$ sym '+' 32 | <|> negate <$ sym '-' 33 | <|> pure id 34 | 35 | -- | Parse decimal number without sign. 36 | decimal :: Num a => RE Char a 37 | decimal = foldl' (\d i -> d*10 + i) 0 <$> some digit 38 | 39 | -- | Parse decimal number without sign. 40 | hexadecimal :: Num a => RE Char a 41 | hexadecimal = foldl' (\d i -> d*16 + i) 0 <$> some hexDigit 42 | -------------------------------------------------------------------------------- /Text/Regex/Applicative/Compile.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE GADTs #-} 2 | module Text.Regex.Applicative.Compile (compile) where 3 | 4 | import Control.Monad ((<=<)) 5 | import Control.Monad.Trans.State 6 | import Data.Foldable 7 | import Data.Maybe 8 | import Data.Monoid (Any (..)) 9 | import qualified Data.IntMap as IntMap 10 | import Text.Regex.Applicative.Types 11 | 12 | compile :: RE s a -> (a -> [Thread s r]) -> [Thread s r] 13 | compile e k = compile2 e (SingleCont k) 14 | 15 | data Cont a = SingleCont !a | EmptyNonEmpty !a !a 16 | 17 | instance Functor Cont where 18 | fmap f k = 19 | case k of 20 | SingleCont a -> SingleCont (f a) 21 | EmptyNonEmpty a b -> EmptyNonEmpty (f a) (f b) 22 | 23 | emptyCont :: Cont a -> a 24 | emptyCont k = 25 | case k of 26 | SingleCont a -> a 27 | EmptyNonEmpty a _ -> a 28 | nonEmptyCont :: Cont a -> a 29 | nonEmptyCont k = 30 | case k of 31 | SingleCont a -> a 32 | EmptyNonEmpty _ a -> a 33 | 34 | -- compile2 function takes two continuations: one when the match is empty and 35 | -- one when the match is non-empty. See the "Rep" case for the reason. 36 | compile2 :: RE s a -> Cont (a -> [Thread s r]) -> [Thread s r] 37 | compile2 e = 38 | case e of 39 | Eps -> \k -> emptyCont k () 40 | Symbol i p -> \k -> [t $ nonEmptyCont k] where 41 | -- t :: (a -> [Thread s r]) -> Thread s r 42 | t k = Thread i $ \s -> 43 | case p s of 44 | Just r -> k r 45 | Nothing -> [] 46 | App n1 n2 -> 47 | let a1 = compile2 n1 48 | a2 = compile2 n2 49 | in \k -> case k of 50 | SingleCont k -> a1 $ SingleCont $ \a1_value -> a2 $ SingleCont $ k . a1_value 51 | EmptyNonEmpty ke kn -> 52 | a1 $ EmptyNonEmpty 53 | -- empty 54 | (\a1_value -> a2 $ EmptyNonEmpty (ke . a1_value) (kn . a1_value)) 55 | -- non-empty 56 | (\a1_value -> a2 $ EmptyNonEmpty (kn . a1_value) (kn . a1_value)) 57 | Alt n1 n2 -> 58 | let a1 = compile2 n1 59 | a2 = compile2 n2 60 | in \k -> a1 k ++ a2 k 61 | Fail -> const [] 62 | Fmap f n -> let a = compile2 n in \k -> a $ fmap (. f) k 63 | CatMaybes n -> let a = compile2 n in \k -> a $ (<=< toList) <$> k 64 | -- This is actually the point where we use the difference between 65 | -- continuations. For the inner RE the empty continuation is a 66 | -- "failing" one in order to avoid non-termination. 67 | Rep g f b n -> 68 | let a = compile2 n 69 | threads b k = 70 | combine g 71 | (a $ EmptyNonEmpty (\_ -> []) (\v -> let b' = f b v in threads b' (SingleCont $ nonEmptyCont k))) 72 | (emptyCont k b) 73 | in threads b 74 | Void n 75 | | hasCatMaybes n -> compile2 n . fmap (. \ _ -> ()) 76 | | otherwise -> compile2_ n . fmap ($ ()) 77 | 78 | data FSMState 79 | = SAccept 80 | | STransition !ThreadId 81 | 82 | type FSMMap s = IntMap.IntMap (s -> Bool, [FSMState]) 83 | 84 | mkNFA :: RE s a -> ([FSMState], (FSMMap s)) 85 | mkNFA e = 86 | flip runState IntMap.empty $ 87 | go e [SAccept] 88 | where 89 | go :: RE s a -> [FSMState] -> State (FSMMap s) [FSMState] 90 | go e k = 91 | case e of 92 | Eps -> return k 93 | Symbol i@(ThreadId n) p -> do 94 | modify $ IntMap.insert n $ 95 | (isJust . p, k) 96 | return [STransition i] 97 | App n1 n2 -> go n1 =<< go n2 k 98 | Alt n1 n2 -> (++) <$> go n1 k <*> go n2 k 99 | Fail -> return [] 100 | Fmap _ n -> go n k 101 | CatMaybes _ -> error "mkNFA CatMaybes" 102 | Rep g _ _ n -> 103 | let entries = findEntries n 104 | cont = combine g entries k 105 | in 106 | -- return value of 'go' is ignored -- it should be a subset of 107 | -- 'cont' 108 | go n cont >> return cont 109 | Void n -> go n k 110 | 111 | findEntries :: RE s a -> [FSMState] 112 | findEntries e = 113 | -- A simple (although a bit inefficient) way to find all entry points is 114 | -- just to use 'go' 115 | evalState (go e []) IntMap.empty 116 | 117 | hasCatMaybes :: RE s a -> Bool 118 | hasCatMaybes = getAny . foldMapPostorder (Any . \ case CatMaybes _ -> True; _ -> False) 119 | 120 | compile2_ :: RE s a -> Cont [Thread s r] -> [Thread s r] 121 | compile2_ e = 122 | let (entries, fsmap) = mkNFA e 123 | mkThread _ k1 (STransition i@(ThreadId n)) = 124 | let (p, cont) = fromMaybe (error "Unknown id") $ IntMap.lookup n fsmap 125 | in [Thread i $ \s -> 126 | if p s 127 | then concatMap (mkThread k1 k1) cont 128 | else []] 129 | mkThread k0 _ SAccept = k0 130 | 131 | in \k -> concatMap (mkThread (emptyCont k) (nonEmptyCont k)) entries 132 | 133 | combine :: Greediness -> [a] -> [a] -> [a] 134 | combine g continue stop = 135 | case g of 136 | Greedy -> continue ++ stop 137 | NonGreedy -> stop ++ continue 138 | -------------------------------------------------------------------------------- /Text/Regex/Applicative/Interface.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TypeFamilies, GADTs, TupleSections #-} 2 | module Text.Regex.Applicative.Interface where 3 | import Control.Applicative hiding (empty) 4 | import Control.Arrow 5 | import Control.Monad (guard) 6 | import qualified Data.List as List 7 | import Data.Maybe 8 | import Text.Regex.Applicative.Types 9 | import Text.Regex.Applicative.Object 10 | 11 | -- | 'RE' is a profunctor. This is its contravariant map. 12 | -- 13 | -- (A dependency on the @profunctors@ package doesn't seem justified.) 14 | comap :: (s2 -> s1) -> RE s1 a -> RE s2 a 15 | comap f re = 16 | case re of 17 | Eps -> Eps 18 | Symbol t p -> Symbol t (p . f) 19 | Alt r1 r2 -> Alt (comap f r1) (comap f r2) 20 | App r1 r2 -> App (comap f r1) (comap f r2) 21 | Fmap g r -> Fmap g (comap f r) 22 | CatMaybes r -> CatMaybes (comap f r) 23 | Fail -> Fail 24 | Rep gr fn a r -> Rep gr fn a (comap f r) 25 | Void r -> Void (comap f r) 26 | 27 | -- | Match and return any single symbol 28 | anySym :: RE s s 29 | anySym = msym Just 30 | 31 | -- | Match zero or more instances of the given expression, which are combined using 32 | -- the given folding function. 33 | -- 34 | -- 'Greediness' argument controls whether this regular expression should match 35 | -- as many as possible ('Greedy') or as few as possible ('NonGreedy') instances 36 | -- of the underlying expression. 37 | reFoldl :: Greediness -> (b -> a -> b) -> b -> RE s a -> RE s b 38 | reFoldl g f b a = Rep g f b a 39 | 40 | -- | Match zero or more instances of the given expression, but as 41 | -- few of them as possible (i.e. /non-greedily/). A greedy equivalent of 'few' 42 | -- is 'many'. 43 | -- 44 | -- Examples: 45 | -- 46 | -- >>> findFirstPrefix (few anySym <* "b") "ababab" 47 | -- Just ("a","abab") 48 | -- >>> findFirstPrefix (many anySym <* "b") "ababab" 49 | -- Just ("ababa","") 50 | few :: RE s a -> RE s [a] 51 | few a = reverse <$> Rep NonGreedy (flip (:)) [] a 52 | 53 | -- | Return matched symbols as part of the return value 54 | withMatched :: RE s a -> RE s (a, [s]) 55 | withMatched Eps = flip (,) [] <$> Eps 56 | withMatched (Symbol t p) = Symbol t (\s -> (,[s]) <$> p s) 57 | withMatched (Alt a b) = withMatched a <|> withMatched b 58 | withMatched (App a b) = 59 | (\(f, s) (x, t) -> (f x, s ++ t)) <$> 60 | withMatched a <*> 61 | withMatched b 62 | withMatched Fail = Fail 63 | withMatched (Fmap f x) = (f *** id) <$> withMatched x 64 | withMatched (CatMaybes x) = CatMaybes $ 65 | (\ (as, s) -> flip (,) s <$> as) <$> withMatched x 66 | withMatched (Rep gr f a0 x) = 67 | Rep gr (\(a, s) (x, t) -> (f a x, s ++ t)) (a0, []) (withMatched x) 68 | -- N.B.: this ruins the Void optimization 69 | withMatched (Void x) = (const () *** id) <$> withMatched x 70 | 71 | -- | @s =~ a = match a s@ 72 | (=~) :: [s] -> RE s a -> Maybe a 73 | (=~) = flip match 74 | infix 2 =~ 75 | 76 | -- | Attempt to match a string of symbols against the regular expression. 77 | -- Note that the whole string (not just some part of it) should be matched. 78 | -- 79 | -- Examples: 80 | -- 81 | -- >>> match (sym 'a' <|> sym 'b') "a" 82 | -- Just 'a' 83 | -- >>> match (sym 'a' <|> sym 'b') "ab" 84 | -- Nothing 85 | -- 86 | match :: RE s a -> [s] -> Maybe a 87 | match re = let obj = compile re in \str -> 88 | listToMaybe $ 89 | results $ 90 | foldl (flip step) obj str 91 | 92 | -- | Find a string prefix which is matched by the regular expression. 93 | -- 94 | -- Of all matching prefixes, pick one using left bias (prefer the left part of 95 | -- '<|>' to the right part) and greediness. 96 | -- 97 | -- This is the match which a backtracking engine (such as Perl's one) would find 98 | -- first. 99 | -- 100 | -- If match is found, the rest of the input is also returned. 101 | -- 102 | -- See also 'findFirstPrefixWithUncons', of which this is a special case. 103 | -- 104 | -- Examples: 105 | -- 106 | -- >>> findFirstPrefix ("a" <|> "ab") "abc" 107 | -- Just ("a","bc") 108 | -- >>> findFirstPrefix ("ab" <|> "a") "abc" 109 | -- Just ("ab","c") 110 | -- >>> findFirstPrefix "bc" "abc" 111 | -- Nothing 112 | findFirstPrefix :: RE s a -> [s] -> Maybe (a, [s]) 113 | findFirstPrefix = findFirstPrefixWithUncons List.uncons 114 | 115 | -- | Find the first prefix, with the given @uncons@ function. 116 | -- 117 | -- @since 0.3.4 118 | findFirstPrefixWithUncons :: (ss -> Maybe (s, ss)) -> RE s a -> ss -> Maybe (a, ss) 119 | findFirstPrefixWithUncons = findPrefixWith' (walk emptyObject . threads) 120 | where 121 | walk obj [] = (obj, Nothing) 122 | walk obj (t:ts) = 123 | case getResult t of 124 | Just r -> (obj, Just r) 125 | Nothing -> walk (addThread t obj) ts 126 | 127 | -- | Find the longest string prefix which is matched by the regular expression. 128 | -- 129 | -- Submatches are still determined using left bias and greediness, so this is 130 | -- different from POSIX semantics. 131 | -- 132 | -- If match is found, the rest of the input is also returned. 133 | -- 134 | -- See also 'findLongestPrefixWithUncons', of which this is a special case. 135 | -- 136 | -- Examples: 137 | -- 138 | -- >>> import Data.Char 139 | -- >>> let keyword = "if" 140 | -- >>> let identifier = many $ psym isAlpha 141 | -- >>> let lexeme = (Left <$> keyword) <|> (Right <$> identifier) 142 | -- >>> findLongestPrefix lexeme "if foo" 143 | -- Just (Left "if"," foo") 144 | -- >>> findLongestPrefix lexeme "iffoo" 145 | -- Just (Right "iffoo","") 146 | findLongestPrefix :: RE s a -> [s] -> Maybe (a, [s]) 147 | findLongestPrefix = findLongestPrefixWithUncons List.uncons 148 | 149 | -- | Find the longest prefix, with the given @uncons@ function. 150 | -- 151 | -- @since 0.3.4 152 | findLongestPrefixWithUncons :: (ss -> Maybe (s, ss)) -> RE s a -> ss -> Maybe (a, ss) 153 | findLongestPrefixWithUncons = findPrefixWith' ((,) <*> listToMaybe . results) 154 | 155 | findPrefixWith' 156 | :: (ReObject s a -> (ReObject s a, Maybe a)) 157 | -- ^ Given the regex object, compute the regex object to feed the next input value into, and 158 | -- the result, if any. 159 | -> (ss -> Maybe (s, ss)) -- ^ @uncons@ 160 | -> RE s a -> ss -> Maybe (a, ss) 161 | findPrefixWith' walk uncons = \ re -> go (compile re) Nothing 162 | where 163 | go obj resOld ss = case walk obj of 164 | (obj', resThis) -> 165 | let res = flip (,) ss <$> resThis <|> resOld 166 | in 167 | case uncons ss of 168 | _ | failed obj' -> res 169 | Nothing -> res 170 | Just (s, ss) -> go (step s obj') res ss 171 | 172 | -- | Find the shortest prefix (analogous to 'findLongestPrefix') 173 | -- 174 | -- See also 'findShortestPrefixWithUncons', of which this is a special case. 175 | findShortestPrefix :: RE s a -> [s] -> Maybe (a, [s]) 176 | findShortestPrefix = findShortestPrefixWithUncons List.uncons 177 | 178 | -- | Find the shortest prefix (analogous to 'findLongestPrefix'), with the given @uncons@ function. 179 | -- 180 | -- @since 0.3.4 181 | findShortestPrefixWithUncons :: (ss -> Maybe (s, ss)) -> RE s a -> ss -> Maybe (a, ss) 182 | findShortestPrefixWithUncons uncons = go . compile 183 | where 184 | go obj ss = case results obj of 185 | r:_ -> Just (r, ss) 186 | _ -> do 187 | guard (not (failed obj)) 188 | (s, ss) <- uncons ss 189 | go (step s obj) ss 190 | 191 | -- | Find the leftmost substring that is matched by the regular expression. 192 | -- Otherwise behaves like 'findFirstPrefix'. Returns the result together with 193 | -- the prefix and suffix of the string surrounding the match. 194 | findFirstInfix :: RE s a -> [s] -> Maybe ([s], a, [s]) 195 | findFirstInfix re str = 196 | fmap (\((first, res), last) -> (first, res, last)) $ 197 | findFirstPrefix ((,) <$> few anySym <*> re) str 198 | 199 | -- Auxiliary function for findExtremeInfix 200 | prefixCounter :: RE s (Int, [s]) 201 | prefixCounter = second reverse <$> reFoldl NonGreedy f (0, []) anySym 202 | where 203 | f (i, prefix) s = ((,) $! (i+1)) $ s:prefix 204 | 205 | data InfixMatchingState s a = GotResult 206 | { prefixLen :: !Int 207 | , prefixStr :: [s] 208 | , result :: a 209 | , postfixStr :: [s] 210 | } 211 | | NoResult 212 | 213 | -- a `preferOver` b chooses one of a and b, giving preference to a 214 | preferOver 215 | :: InfixMatchingState s a 216 | -> InfixMatchingState s a 217 | -> InfixMatchingState s a 218 | preferOver NoResult b = b 219 | preferOver b NoResult = b 220 | preferOver a b = 221 | case prefixLen a `compare` prefixLen b of 222 | GT -> b -- prefer b when it has smaller prefix 223 | _ -> a -- otherwise, prefer a 224 | 225 | mkInfixMatchingState 226 | :: [s] -- rest of input 227 | -> Thread s ((Int, [s]), a) 228 | -> InfixMatchingState s a 229 | mkInfixMatchingState rest thread = 230 | case getResult thread of 231 | Just ((pLen, pStr), res) -> 232 | GotResult 233 | { prefixLen = pLen 234 | , prefixStr = pStr 235 | , result = res 236 | , postfixStr = rest 237 | } 238 | Nothing -> NoResult 239 | 240 | gotResult :: InfixMatchingState s a -> Bool 241 | gotResult GotResult {} = True 242 | gotResult _ = False 243 | 244 | -- Algorithm for finding leftmost longest infix match: 245 | -- 246 | -- 1. Add a thread /.*?/ to the begginning of the regexp 247 | -- 2. As soon as we get first accept, we delete that thread 248 | -- 3. When we get more than one accept, we choose one by the following criteria: 249 | -- 3.1. Compare by the length of prefix (since we are looking for the leftmost 250 | -- match) 251 | -- 3.2. If they are produced on the same step, choose the first one (left-biased 252 | -- choice) 253 | -- 3.3. If they are produced on the different steps, choose the later one (since 254 | -- they have the same prefixes, later means longer) 255 | findExtremalInfix 256 | :: -- function to combine a later result (first arg) to an earlier one (second 257 | -- arg) 258 | (InfixMatchingState s a -> InfixMatchingState s a -> InfixMatchingState s a) 259 | -> RE s a 260 | -> [s] 261 | -> Maybe ([s], a, [s]) 262 | findExtremalInfix newOrOld re str = 263 | case go (compile $ (,) <$> prefixCounter <*> re) str NoResult of 264 | NoResult -> Nothing 265 | r@GotResult{} -> 266 | Just (prefixStr r, result r, postfixStr r) 267 | where 268 | {- 269 | go :: ReObject s ((Int, [s]), a) 270 | -> [s] 271 | -> InfixMatchingState s a 272 | -> InfixMatchingState s a 273 | -} 274 | go obj str resOld = 275 | let resThis = 276 | foldl 277 | (\acc t -> acc `preferOver` mkInfixMatchingState str t) 278 | NoResult $ 279 | threads obj 280 | res = resThis `newOrOld` resOld 281 | obj' = 282 | -- If we just found the first result, kill the "prefixCounter" thread. 283 | -- We rely on the fact that it is the last thread of the object. 284 | if gotResult resThis && not (gotResult resOld) 285 | then fromThreads $ init $ threads obj 286 | else obj 287 | in 288 | case str of 289 | [] -> res 290 | _ | failed obj -> res 291 | (s:ss) -> go (step s obj') ss res 292 | 293 | 294 | -- | Find the leftmost substring that is matched by the regular expression. 295 | -- Otherwise behaves like 'findLongestPrefix'. Returns the result together with 296 | -- the prefix and suffix of the string surrounding the match. 297 | findLongestInfix :: RE s a -> [s] -> Maybe ([s], a, [s]) 298 | findLongestInfix = findExtremalInfix preferOver 299 | 300 | -- | Find the leftmost substring that is matched by the regular expression. 301 | -- Otherwise behaves like 'findShortestPrefix'. Returns the result together with 302 | -- the prefix and suffix of the string surrounding the match. 303 | findShortestInfix :: RE s a -> [s] -> Maybe ([s], a, [s]) 304 | findShortestInfix = findExtremalInfix $ flip preferOver 305 | 306 | -- | Replace matches of the regular expression with its value. 307 | -- 308 | -- >>> replace ("!" <$ sym 'f' <* some (sym 'o')) "quuxfoofooooofoobarfobar" 309 | -- "quux!!!bar!bar" 310 | replace :: RE s [s] -> [s] -> [s] 311 | replace r = ($ []) . go 312 | where go ys = case findLongestInfix r ys of 313 | Nothing -> (ys ++) 314 | Just (before, m, rest) -> (before ++) . (m ++) . go rest 315 | -------------------------------------------------------------------------------- /Text/Regex/Applicative/Object.hs: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------- 2 | -- | 3 | -- Module : Text.Regex.Applicative.Object 4 | -- Copyright : (c) Roman Cheplyaka 5 | -- License : MIT 6 | -- 7 | -- Maintainer: Roman Cheplyaka 8 | -- Stability : experimental 9 | -- 10 | -- This is a low-level interface to the regex engine. 11 | -------------------------------------------------------------------- 12 | {-# LANGUAGE GADTs #-} 13 | module Text.Regex.Applicative.Object 14 | ( ReObject 15 | , compile 16 | , emptyObject 17 | , Thread 18 | , threads 19 | , failed 20 | , isResult 21 | , getResult 22 | , results 23 | , ThreadId 24 | , threadId 25 | , step 26 | , stepThread 27 | , fromThreads 28 | , addThread 29 | ) where 30 | 31 | import Text.Regex.Applicative.Types 32 | import qualified Text.Regex.Applicative.StateQueue as SQ 33 | import qualified Text.Regex.Applicative.Compile as Compile 34 | import Data.Maybe 35 | import Data.Foldable as F 36 | import Control.Monad.Trans.State 37 | 38 | -- | The state of the engine is represented as a \"regex object\" of type 39 | -- @'ReObject' s r@, where @s@ is the type of symbols and @r@ is the 40 | -- result type (as in the 'RE' type). Think of 'ReObject' as a collection of 41 | -- 'Thread's ordered by priority. E.g. threads generated by the left part of 42 | -- '<|>' come before the threads generated by the right part. 43 | newtype ReObject s r = ReObject (SQ.StateQueue (Thread s r)) 44 | 45 | -- | List of all threads of an object. Each non-result thread has a unique id. 46 | threads :: ReObject s r -> [Thread s r] 47 | threads (ReObject sq) = F.toList sq 48 | 49 | -- | Create an object from a list of threads. It is recommended that all 50 | -- threads come from the same 'ReObject', unless you know what you're doing. 51 | -- However, it should be safe to filter out or rearrange threads. 52 | fromThreads :: [Thread s r] -> ReObject s r 53 | fromThreads ts = F.foldl' (flip addThread) emptyObject ts 54 | 55 | -- | Check whether a thread is a result thread 56 | isResult :: Thread s r -> Bool 57 | isResult Accept {} = True 58 | isResult _ = False 59 | 60 | -- | Return the result of a result thread, or 'Nothing' if it's not a result 61 | -- thread 62 | getResult :: Thread s r -> Maybe r 63 | getResult (Accept r) = Just r 64 | getResult _ = Nothing 65 | 66 | -- | Check if the object has no threads. In that case it never will 67 | -- produce any new threads as a result of 'step'. 68 | failed :: ReObject s r -> Bool 69 | failed obj = null $ threads obj 70 | 71 | -- | Empty object (with no threads) 72 | emptyObject :: ReObject s r 73 | emptyObject = ReObject $ SQ.empty 74 | 75 | -- | Extract the result values from all the result threads of an object 76 | results :: ReObject s r -> [r] 77 | results obj = 78 | mapMaybe getResult $ threads obj 79 | 80 | -- | Feed a symbol into a regex object 81 | step :: s -> ReObject s r -> ReObject s r 82 | step s (ReObject sq) = 83 | let accum q t = 84 | case t of 85 | Accept {} -> q 86 | Thread _ c -> 87 | F.foldl' (\q x -> addThread x q) q $ c s 88 | newQueue = F.foldl' accum emptyObject sq 89 | in newQueue 90 | 91 | -- | Feed a symbol into a non-result thread. It is an error to call 'stepThread' 92 | -- on a result thread. 93 | stepThread :: s -> Thread s r -> [Thread s r] 94 | stepThread s t = 95 | case t of 96 | Thread _ c -> c s 97 | Accept {} -> error "stepThread on a result" 98 | 99 | -- | Add a thread to an object. The new thread will have lower priority than the 100 | -- threads which are already in the object. 101 | -- 102 | -- If a (non-result) thread with the same id already exists in the object, the 103 | -- object is not changed. 104 | addThread :: Thread s r -> ReObject s r -> ReObject s r 105 | addThread t (ReObject q) = 106 | case t of 107 | Accept {} -> ReObject $ SQ.insert t q 108 | Thread { threadId_ = ThreadId i } -> ReObject $ SQ.insertUnique i t q 109 | 110 | -- | Compile a regular expression into a regular expression object 111 | compile :: RE s r -> ReObject s r 112 | compile = 113 | fromThreads . 114 | flip Compile.compile (\x -> [Accept x]) . 115 | renumber 116 | 117 | renumber :: RE s a -> RE s a 118 | renumber = 119 | flip evalState (ThreadId 1) . 120 | traversePostorder (\ case Symbol _ p -> flip Symbol p <$> fresh; a -> pure a) 121 | 122 | fresh :: State ThreadId ThreadId 123 | fresh = do 124 | t@(ThreadId i) <- get 125 | put $! ThreadId (i+1) 126 | return t 127 | -------------------------------------------------------------------------------- /Text/Regex/Applicative/Reference.hs: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------- 2 | -- | 3 | -- Module : Text.Regex.Applicative.Reference 4 | -- Copyright : (c) Roman Cheplyaka 5 | -- License : MIT 6 | -- 7 | -- Maintainer: Roman Cheplyaka 8 | -- Stability : experimental 9 | -- 10 | -- Reference implementation (using backtracking). 11 | -- 12 | -- This is exposed for testing purposes only! 13 | -------------------------------------------------------------------- 14 | 15 | {-# LANGUAGE GADTs #-} 16 | module Text.Regex.Applicative.Reference (reference) where 17 | import Prelude hiding (getChar) 18 | import Text.Regex.Applicative.Types 19 | import Control.Applicative 20 | import Control.Monad 21 | 22 | 23 | -- A simple parsing monad 24 | newtype P s a = P { unP :: [s] -> [(a, [s])] } 25 | 26 | instance Monad (P s) where 27 | return x = P $ \s -> [(x, s)] 28 | (P a) >>= k = P $ \s -> 29 | a s >>= \(x,s) -> unP (k x) s 30 | 31 | instance Functor (P s) where 32 | fmap = liftM 33 | 34 | instance Applicative (P s) where 35 | (<*>) = ap 36 | pure = return 37 | 38 | instance Alternative (P s) where 39 | empty = P $ const [] 40 | P a1 <|> P a2 = P $ \s -> 41 | a1 s ++ a2 s 42 | 43 | getChar :: P s s 44 | getChar = P $ \s -> 45 | case s of 46 | [] -> [] 47 | c:cs -> [(c,cs)] 48 | 49 | re2monad :: RE s a -> P s a 50 | re2monad r = 51 | case r of 52 | Eps -> return $ error "eps" 53 | Symbol _ p -> do 54 | c <- getChar 55 | case p c of 56 | Just r -> return r 57 | Nothing -> empty 58 | Alt a1 a2 -> re2monad a1 <|> re2monad a2 59 | App a1 a2 -> re2monad a1 <*> re2monad a2 60 | Fmap f a -> fmap f $ re2monad a 61 | CatMaybes a -> maybe empty pure =<< re2monad a 62 | Rep g f b a -> rep b 63 | where 64 | am = re2monad a 65 | rep b = combine (do a <- am; rep $ f b a) (return b) 66 | combine a b = case g of Greedy -> a <|> b; NonGreedy -> b <|> a 67 | Void a -> re2monad a >> return () 68 | Fail -> empty 69 | 70 | runP :: P s a -> [s] -> Maybe a 71 | runP m s = case filter (null . snd) $ unP m s of 72 | (r, _) : _ -> Just r 73 | _ -> Nothing 74 | 75 | -- | 'reference' @r@ @s@ should give the same results as @s@ '=~' @r@. 76 | -- 77 | -- However, this is not very efficient implementation and is supposed to be 78 | -- used for testing only. 79 | reference :: RE s a -> [s] -> Maybe a 80 | reference r s = runP (re2monad r) s 81 | -------------------------------------------------------------------------------- /Text/Regex/Applicative/StateQueue.hs: -------------------------------------------------------------------------------- 1 | -- | This internal module is exposed only for testing and benchmarking. You 2 | -- don't need to import it. 3 | {-# LANGUAGE RecordWildCards #-} 4 | module Text.Regex.Applicative.StateQueue 5 | ( StateQueue 6 | , empty 7 | , insert 8 | , insertUnique 9 | , getElements 10 | ) where 11 | 12 | import Prelude hiding (read, lookup, replicate) 13 | import qualified Data.IntSet as IntSet 14 | import Data.Foldable as F 15 | 16 | -- | 'StateQueue' is a data structure that can efficiently insert elements 17 | -- (preserving their order) 18 | -- and check whether an element with the given 'Int' key is already in the queue. 19 | data StateQueue a = StateQueue 20 | { elements :: [a] 21 | , ids :: !IntSet.IntSet 22 | } 23 | deriving (Eq,Show) 24 | 25 | instance Foldable StateQueue where 26 | foldr f a = F.foldr f a . getElements 27 | 28 | -- | Get the list of all elements 29 | getElements :: StateQueue a -> [a] 30 | getElements = reverse . elements 31 | 32 | {-# INLINE empty #-} 33 | -- | The empty state queue 34 | empty :: StateQueue a 35 | empty = StateQueue 36 | { elements = [] 37 | , ids = IntSet.empty 38 | } 39 | 40 | {-# INLINE insert #-} 41 | -- | Insert an element in the state queue, unless there is already an element with the same key 42 | insertUnique 43 | :: Int -- ^ key 44 | -> a 45 | -> StateQueue a 46 | -> StateQueue a 47 | insertUnique i v sq@StateQueue { ids = ids, elements = elements } = 48 | if i `IntSet.member` ids 49 | then sq 50 | else sq { elements = v : elements 51 | , ids = IntSet.insert i ids 52 | } 53 | 54 | -- | Insert an element in the state queue without a key. 55 | -- 56 | -- Since 'insert' doesn't take a key, it won't affect any 'insertUnique'. 57 | insert 58 | :: a 59 | -> StateQueue a 60 | -> StateQueue a 61 | insert v sq = 62 | sq { elements = v : elements sq } 63 | -------------------------------------------------------------------------------- /Text/Regex/Applicative/Types.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE GADTs #-} 2 | {-# LANGUAGE RankNTypes #-} 3 | {-# LANGUAGE ScopedTypeVariables #-} 4 | {-# LANGUAGE CPP #-} 5 | module Text.Regex.Applicative.Types where 6 | 7 | import Control.Applicative 8 | import Control.Monad ((<=<)) 9 | import Data.Filtrable (Filtrable (..)) 10 | import Data.Functor.Identity (Identity (..)) 11 | import Data.String 12 | #if !MIN_VERSION_base(4,11,0) 13 | import Data.Semigroup 14 | #endif 15 | 16 | newtype ThreadId = ThreadId Int 17 | 18 | -- | A thread either is a result or corresponds to a symbol in the regular 19 | -- expression, which is expected by that thread. 20 | data Thread s r 21 | = Thread 22 | { threadId_ :: ThreadId 23 | , _threadCont :: s -> [Thread s r] 24 | } 25 | | Accept r 26 | 27 | -- | Returns thread identifier. This will be 'Just' for ordinary threads and 28 | -- 'Nothing' for results. 29 | threadId :: Thread s r -> Maybe ThreadId 30 | threadId Thread { threadId_ = i } = Just i 31 | threadId _ = Nothing 32 | 33 | data Greediness = Greedy | NonGreedy 34 | deriving (Show, Read, Eq, Ord, Enum) 35 | 36 | -- | Type of regular expressions that recognize symbols of type @s@ and 37 | -- produce a result of type @a@. 38 | -- 39 | -- Regular expressions can be built using 'Functor', 'Applicative', 40 | -- 'Alternative', and 'Filtrable' instances in the following natural way: 41 | -- 42 | -- * @f@ '<$>' @ra@ matches iff @ra@ matches, and its return value is the result 43 | -- of applying @f@ to the return value of @ra@. 44 | -- 45 | -- * 'pure' @x@ matches the empty string (i.e. it does not consume any symbols), 46 | -- and its return value is @x@ 47 | -- 48 | -- * @rf@ '<*>' @ra@ matches a string iff it is a concatenation of two 49 | -- strings: one matched by @rf@ and the other matched by @ra@. The return value 50 | -- is @f a@, where @f@ and @a@ are the return values of @rf@ and @ra@ 51 | -- respectively. 52 | -- 53 | -- * @ra@ '<|>' @rb@ matches a string which is accepted by either @ra@ or @rb@. 54 | -- It is left-biased, so if both can match, the result of @ra@ is used. 55 | -- 56 | -- * 'empty' is a regular expression which does not match any string. 57 | -- 58 | -- * 'many' @ra@ matches concatenation of zero or more strings matched by @ra@ 59 | -- and returns the list of @ra@'s return values on those strings. 60 | -- 61 | -- * 'some' @ra@ matches concatenation of one or more strings matched by @ra@ 62 | -- and returns the list of @ra@'s return values on those strings. 63 | -- 64 | -- * 'catMaybes' @ram@ matches iff @ram@ matches and produces 'Just _'. 65 | -- 66 | -- * @ra@ '<>' @rb@ matches @ra@ followed by @rb@. The return value is @a <> b@, 67 | -- where @a@ and @b@ are the return values of @ra@ and @rb@ respectively. 68 | -- (See 69 | -- for an example usage.) 70 | -- 71 | -- * 'mempty' matches the empty string (i.e. it does not consume any symbols), 72 | -- and its return value is the 'mempty' value of type @a@. 73 | data RE s a where 74 | Eps :: RE s () 75 | Symbol :: ThreadId -> (s -> Maybe a) -> RE s a 76 | Alt :: RE s a -> RE s a -> RE s a 77 | App :: RE s (a -> b) -> RE s a -> RE s b 78 | Fmap :: (a -> b) -> RE s a -> RE s b 79 | CatMaybes :: RE s (Maybe a) -> RE s a 80 | Fail :: RE s a 81 | Rep :: Greediness -- repetition may be greedy or not 82 | -> (b -> a -> b) -- folding function (like in foldl) 83 | -> b -- the value for zero matches, and also the initial value 84 | -- for the folding function 85 | -> RE s a 86 | -> RE s b 87 | Void :: RE s a -> RE s () 88 | 89 | -- | Traverse each (reflexive, transitive) subexpression of a 'RE', depth-first and post-order. 90 | traversePostorder :: forall s a m . Monad m => (forall a . RE s a -> m (RE s a)) -> RE s a -> m (RE s a) 91 | traversePostorder f = go 92 | where 93 | go :: forall a . RE s a -> m (RE s a) 94 | go = f <=< \ case 95 | Eps -> pure Eps 96 | Symbol i p -> pure (Symbol i p) 97 | Alt a b -> Alt <$> go a <*> go b 98 | App a b -> App <$> go a <*> go b 99 | Fmap g a -> Fmap g <$> go a 100 | CatMaybes a -> CatMaybes <$> go a 101 | Fail -> pure Fail 102 | Rep greed g b a -> Rep greed g b <$> go a 103 | Void a -> Void <$> go a 104 | 105 | -- | Fold each (reflexive, transitive) subexpression of a 'RE', depth-first and post-order. 106 | foldMapPostorder :: Monoid b => (forall a . RE s a -> b) -> RE s a -> b 107 | foldMapPostorder f = fst . traversePostorder ((,) <$> f <*> id) 108 | 109 | -- | Map each (reflexive, transitive) subexpression of a 'RE'. 110 | mapRE :: (forall a . RE s a -> RE s a) -> RE s a -> RE s a 111 | mapRE f = runIdentity . traversePostorder (Identity . f) 112 | 113 | instance Functor (RE s) where 114 | fmap f x = Fmap f x 115 | f <$ x = pure f <* x 116 | 117 | instance Applicative (RE s) where 118 | pure x = const x <$> Eps 119 | a1 <*> a2 = App a1 a2 120 | a *> b = pure (const id) <*> Void a <*> b 121 | a <* b = pure const <*> a <*> Void b 122 | 123 | instance Alternative (RE s) where 124 | a1 <|> a2 = Alt a1 a2 125 | empty = Fail 126 | many a = reverse <$> Rep Greedy (flip (:)) [] a 127 | some a = (:) <$> a <*> many a 128 | 129 | -- | @since 0.3.4 130 | instance Filtrable (RE s) where 131 | catMaybes = CatMaybes 132 | 133 | instance (char ~ Char, string ~ String) => IsString (RE char string) where 134 | fromString = string 135 | 136 | -- | @since 0.3.4 137 | instance Semigroup a => Semigroup (RE s a) where 138 | x <> y = (<>) <$> x <*> y 139 | 140 | -- | @since 0.3.4 141 | instance Monoid a => Monoid (RE s a) where 142 | mempty = pure mempty 143 | 144 | -- | Match and return the given sequence of symbols. 145 | -- 146 | -- Note that there is an 'IsString' instance for regular expression, so 147 | -- if you enable the @OverloadedStrings@ language extension, you can write 148 | -- @string \"foo\"@ simply as @\"foo\"@. 149 | -- 150 | -- Example: 151 | -- 152 | -- >{-# LANGUAGE OverloadedStrings #-} 153 | -- >import Text.Regex.Applicative 154 | -- > 155 | -- >number = "one" *> pure 1 <|> "two" *> pure 2 156 | -- > 157 | -- >main = print $ "two" =~ number 158 | string :: Eq a => [a] -> RE a [a] 159 | string = traverse sym 160 | 161 | -- | Match and return a single symbol which satisfies the predicate 162 | psym :: (s -> Bool) -> RE s s 163 | psym p = msym (\s -> if p s then Just s else Nothing) 164 | 165 | -- | Like 'psym', but allows to return a computed value instead of the 166 | -- original symbol 167 | msym :: (s -> Maybe a) -> RE s a 168 | msym p = Symbol (error "Not numbered symbol") p 169 | 170 | -- | Match and return the given symbol 171 | sym :: Eq s => s -> RE s s 172 | sym s = psym (s ==) 173 | -------------------------------------------------------------------------------- /benchmark/benchmark.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleInstances, TypeApplications, RankNTypes, CPP #-} 2 | import Data.List 3 | import Data.Traversable 4 | import Data.Maybe 5 | import Data.Void 6 | import Control.Monad 7 | 8 | import Criterion.Main 9 | 10 | import Text.Regex.Applicative 11 | import qualified Text.Parser.Combinators as PC 12 | import qualified Text.Parser.Char as PC 13 | import Control.DeepSeq 14 | import qualified Text.Parsec as Parsec 15 | import qualified Text.Megaparsec as Megaparsec 16 | import qualified Text.Megaparsec.Parsers as MP 17 | import qualified Data.Attoparsec.ByteString.Char8 as Attoparsec 18 | import qualified Data.ByteString.Char8 as BS8 19 | 20 | parser1 :: PC.CharParsing f => f [String] 21 | parser1 = many $ 22 | PC.try (PC.string "foo") <|> 23 | PC.try (PC.string "bar") <|> 24 | PC.string "baz" 25 | 26 | str :: String 27 | str = concat $ replicate 10 "foobarfoobarbaz" 28 | 29 | benchmarkParser 30 | :: NFData a 31 | => (forall f . (PC.CharParsing f) => f a) 32 | -> [Benchmark] 33 | benchmarkParser parser = 34 | [ bench "regex-applicative" $ nf (match parser) str 35 | , bench "parsec" $ nf (Parsec.parse parser "-") str 36 | , bench "megaparsec" $ nf (Megaparsec.parseMaybe @Void (MP.unParsecT parser)) str 37 | , bench "attoparsec" $ nf (Attoparsec.parseOnly parser) (BS8.pack str) 38 | ] 39 | 40 | main = defaultMain $ 41 | [ bgroup "parsing" (benchmarkParser parser1) 42 | , bgroup "recognizing" (benchmarkParser (void parser1)) 43 | ] 44 | 45 | -- instances 46 | instance PC.Parsing (RE c) where 47 | try = id 48 | () = const 49 | unexpected _ = empty 50 | notFollowedBy = error "RE: notFollowedBy" 51 | eof = error "RE: eof" 52 | instance PC.CharParsing (RE Char) where 53 | satisfy = psym 54 | char = sym 55 | anyChar = anySym 56 | string = string 57 | instance NFData Parsec.ParseError where 58 | rnf = flip seq () 59 | -------------------------------------------------------------------------------- /doctest/doctest.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import Test.DocTest 4 | 5 | main :: IO () 6 | main = doctest 7 | [ "-XLambdaCase" 8 | , "-XOverloadedStrings" 9 | , "Text/Regex/Applicative.hs" 10 | ] 11 | -------------------------------------------------------------------------------- /regex-applicative.cabal: -------------------------------------------------------------------------------- 1 | Name: regex-applicative 2 | Version: 0.3.4 3 | Synopsis: Regex-based parsing with applicative interface 4 | Description: 5 | regex-applicative is a Haskell library for parsing using regular expressions. 6 | Parsers can be built using Applicative interface. 7 | Homepage: https://github.com/UnkindPartition/regex-applicative 8 | License: MIT 9 | License-file: LICENSE 10 | Author: Roman Cheplyaka 11 | Maintainer: Roman Cheplyaka 12 | Category: Text 13 | Build-type: Simple 14 | Extra-source-files: README.md CREDITS.md CHANGES.md 15 | Cabal-version: >=1.10 16 | Tested-With: 17 | GHC ==8.0.2 || 18 | ==8.2.2 || 19 | ==8.4.4 || 20 | ==8.6.5 || 21 | ==8.8.2 22 | 23 | Source-repository head 24 | type: git 25 | location: git://github.com/UnkindPartition/regex-applicative.git 26 | 27 | Library 28 | Default-language: Haskell2010 29 | Default-extensions: LambdaCase 30 | Build-depends: base < 5, 31 | filtrable >= 0.1.3, 32 | containers, 33 | transformers 34 | Exposed-modules: Text.Regex.Applicative 35 | Text.Regex.Applicative.Object 36 | Text.Regex.Applicative.Common 37 | Text.Regex.Applicative.Reference 38 | Text.Regex.Applicative.StateQueue 39 | Other-modules: Text.Regex.Applicative.Interface 40 | Text.Regex.Applicative.Types 41 | Text.Regex.Applicative.Compile 42 | GHC-Options: -Wall 43 | -Werror=incomplete-patterns 44 | -fno-warn-name-shadowing 45 | 46 | Test-Suite test-regex-applicative 47 | type: exitcode-stdio-1.0 48 | hs-source-dirs: 49 | tests 50 | main-is: 51 | test.hs 52 | other-modules: 53 | StateQueue 54 | GHC-Options: -threaded 55 | Default-language: Haskell2010 56 | Build-depends: base < 5, 57 | containers, 58 | filtrable >= 0.1.3, 59 | transformers, 60 | smallcheck >= 1.0, 61 | tasty, 62 | tasty-smallcheck, 63 | tasty-hunit, 64 | regex-applicative 65 | 66 | test-suite doctest 67 | type: 68 | exitcode-stdio-1.0 69 | hs-source-dirs: 70 | doctest 71 | main-is: 72 | doctest.hs 73 | default-language: Haskell2010 74 | ghc-options: -threaded 75 | build-depends: base, text, doctest >= 0.8 76 | 77 | Benchmark bench-regex-applicative 78 | type: exitcode-stdio-1.0 79 | hs-source-dirs: benchmark 80 | main-is: benchmark.hs 81 | build-depends: base <5 82 | , criterion 83 | , regex-applicative 84 | , parsers 85 | , deepseq 86 | , parsec 87 | , attoparsec 88 | , megaparsec 89 | , parsers-megaparsec 90 | , bytestring 91 | default-language: Haskell2010 92 | -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | flags: {} 2 | packages: 3 | - '.' 4 | extra-deps: [] 5 | resolver: nightly-2021-12-18 6 | -------------------------------------------------------------------------------- /tests/StateQueue.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleInstances, MultiParamTypeClasses, ScopedTypeVariables #-} 2 | module StateQueue where 3 | 4 | import Test.Tasty 5 | import Test.Tasty.SmallCheck 6 | import Test.SmallCheck.Series 7 | import Control.Applicative 8 | import Text.Regex.Applicative.StateQueue as SQ 9 | 10 | fromElems :: [(a, Maybe Int)] -> StateQueue a 11 | fromElems = foldl f SQ.empty 12 | where 13 | f sq (x, mbKey) = maybe insert insertUnique mbKey x sq 14 | 15 | size :: StateQueue a -> Int 16 | size = length . getElements 17 | 18 | instance (Monad m, Serial m a) => Serial m (StateQueue a) where 19 | series = fromElems <$> series 20 | 21 | stateQueueTests = testGroup "StateQueue" 22 | [ testProperty "Insertion increments the # of elements" $ 23 | \sq (i :: Int) -> size (insert i sq) == size sq + 1 24 | , testProperty "insertUnique increments the # of elements by 0 or 1" $ 25 | \sq (i :: Int) -> 26 | let d = size (insertUnique i i sq) - size sq 27 | in d == 0 || d == 1 28 | , testProperty "insertUnique is idempotent" $ 29 | \sq (i :: Int) -> 30 | let f = insertUnique i i 31 | in f sq == (f . f) sq 32 | , testProperty "insert doesn't affect insertUnique" $ 33 | \(i :: Int) -> exists $ \sq -> 34 | let sq' = insert i sq 35 | in insertUnique i i sq' /= sq' 36 | , testProperty "insertUnique only cares about keys, not values" $ 37 | \sq i j -> 38 | let sq' = insertUnique i i sq 39 | in insertUnique i j sq' == sq' 40 | , testProperty "insert puts in the back" $ 41 | \sq (i :: Int) -> 42 | let sq' = insert i sq 43 | in last (getElements sq') == i 44 | , testProperty "insertUnique puts in the back" $ 45 | \sq i -> 46 | let sq' = insertUnique i i sq 47 | in sq' /= sq ==> last (getElements sq') == i 48 | ] 49 | -------------------------------------------------------------------------------- /tests/test.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings, FlexibleInstances, MultiParamTypeClasses, FlexibleContexts #-} 2 | import Text.Regex.Applicative 3 | import Text.Regex.Applicative.Reference 4 | import Control.Applicative 5 | import Control.Monad 6 | import Data.Filtrable 7 | import Data.Traversable 8 | import Data.Maybe 9 | import Text.Printf 10 | 11 | import Test.SmallCheck 12 | import Test.SmallCheck.Series 13 | import Test.Tasty 14 | import Test.Tasty.SmallCheck 15 | import Test.Tasty.HUnit 16 | 17 | import StateQueue 18 | 19 | -- Small alphabets as SmallCheck's series 20 | newtype A = A { a :: Char } deriving Show 21 | instance Monad m => Serial m A where 22 | series = cons0 $ A 'a' 23 | 24 | newtype AB = AB { ab :: Char } deriving Show 25 | instance Monad m => Serial m AB where 26 | series = cons0 (AB 'a') \/ cons0 (AB 'b') 27 | 28 | newtype ABC = ABC { abc :: Char } deriving Show 29 | instance Monad m => Serial m ABC where 30 | series = cons0 (ABC 'a') \/ cons0 (ABC 'b') \/ cons0 (ABC 'c') 31 | 32 | re1 = 33 | let one = pure 1 <* sym 'a' 34 | two = pure 2 <* sym 'a' <* sym 'a' 35 | in (,) <$> (one <|> two) <*> (two <|> one) 36 | 37 | re2 = sequenceA $ 38 | [ pure 1 <* sym 'a' <* sym 'a' <|> 39 | pure 2 <* sym 'a' 40 | , pure 3 <* sym 'b' 41 | , pure 4 <* sym 'b' <|> 42 | pure 5 <* sym 'a' ] 43 | 44 | re3 = sequenceA $ 45 | [ pure 0 <|> pure 1 46 | , pure 1 <* sym 'a' <* sym 'a' <|> 47 | pure 2 <* sym 'a' 48 | , pure 3 <* sym 'b' <|> pure 6 49 | , fmap (+1) $ 50 | pure 4 <* sym 'b' <|> 51 | pure 7 <|> 52 | pure 5 <* sym 'a' ] 53 | 54 | re4 = sym 'a' *> many (sym 'b') <* sym 'a' 55 | 56 | re5 = (sym 'a' <|> sym 'a' *> sym 'a') *> many (sym 'a') 57 | 58 | re6 = many (pure 3 <* sym 'a' <* sym 'a' <* sym 'a' <|> pure 1 <* sym 'a') 59 | 60 | -- Regular expression from the weighted regexp paper. 61 | re7 = 62 | let many_A_or_B = many (sym 'a' <|> sym 'b') 63 | in (,) <$> 64 | many ((,,,) <$> many_A_or_B <*> sym 'c' <*> many_A_or_B <*> sym 'c') <*> 65 | many_A_or_B 66 | 67 | re8 = (,) <$> many (sym 'a' <|> sym 'b') <*> many (sym 'b' <|> sym 'c') 68 | 69 | -- NB: we don't test these against the reference impl, 'cause it will loop! 70 | re9 = many (sym 'a' <|> empty) <* sym 'b' 71 | re10 = few (sym 'a' <|> empty) <* sym 'b' 72 | 73 | re11 = (\ a b -> a <$ guard (a == b)) <$> anySym <*?> anySym 74 | 75 | prop re f s = 76 | let fs = map f s in 77 | reference re fs == (fs =~ re) 78 | 79 | prop_withMatched = 80 | let re = withMatched $ many (string "a" <|> string "ba") 81 | in \str -> 82 | case map ab str =~ re of 83 | Nothing -> True 84 | Just (x, y) -> concat x == y 85 | 86 | -- Because we have 2 slightly different algorithms for recognition and parsing, 87 | -- we test that they agree 88 | testRecognitionAgainstParsing re f s = 89 | let fs = map f s in 90 | isJust (fs =~ re) == isJust (fs =~ (re *> pure ())) 91 | 92 | tests = testGroup "Tests" 93 | [ testGroup "Engine tests" 94 | [ t "re1" 10 $ prop re1 a 95 | , t "re2" 10 $ prop re2 ab 96 | , t "re3" 10 $ prop re3 ab 97 | , t "re4" 10 $ prop re4 ab 98 | , t "re5" 10 $ prop re5 a 99 | , t "re6" 10 $ prop re6 a 100 | , t "re7" 7 $ prop re7 abc 101 | , t "re8" 7 $ prop re8 abc 102 | , t "re11" 7 $ prop re11 abc 103 | ] 104 | , testGroup "Recognition vs parsing" 105 | [ t "re1" 10 $ testRecognitionAgainstParsing re1 a 106 | , t "re2" 10 $ testRecognitionAgainstParsing re2 ab 107 | , t "re3" 10 $ testRecognitionAgainstParsing re3 ab 108 | , t "re4" 10 $ testRecognitionAgainstParsing re4 ab 109 | , t "re5" 10 $ testRecognitionAgainstParsing re5 a 110 | , t "re6" 10 $ testRecognitionAgainstParsing re6 a 111 | , t "re7" 7 $ testRecognitionAgainstParsing re7 abc 112 | , t "re8" 7 $ testRecognitionAgainstParsing re8 abc 113 | , t "re8" 10 $ testRecognitionAgainstParsing re9 ab 114 | , t "re8" 10 $ testRecognitionAgainstParsing re10 ab 115 | , t "re11" 7 $ testRecognitionAgainstParsing re11 abc 116 | ] 117 | , testProperty "withMatched" prop_withMatched 118 | , testGroup "Tests for matching functions" 119 | [ testGroup "findFirstPrefix" 120 | [ u "t1" 121 | (findFirstPrefix ("a" <|> "ab") "abc") 122 | (Just ("a","bc")) 123 | , u "t2" 124 | (findFirstPrefix ("ab" <|> "a") "abc") 125 | (Just ("ab","c")) 126 | , u "t3" 127 | (findFirstPrefix "bc" "abc") 128 | Nothing 129 | ] 130 | , testGroup "findFirstInfix" 131 | [ u "t1" 132 | (findFirstInfix ("a" <|> "ab") "tabc") 133 | (Just ("t", "a","bc")) 134 | , u "t2" 135 | (findFirstInfix ("ab" <|> "a") "tabc") 136 | (Just ("t", "ab","c")) 137 | ] 138 | , testGroup "findLongestPrefix" 139 | [ u "t1" 140 | (findLongestPrefix ("a" <|> "ab") "abc") 141 | (Just ("ab","c")) 142 | , u "t2" 143 | (findLongestPrefix ("ab" <|> "a") "abc") 144 | (Just ("ab","c")) 145 | , u "t3" 146 | (findLongestPrefix "bc" "abc") 147 | Nothing 148 | ] 149 | , testGroup "findLongestInfix" 150 | [ u "t1" 151 | (findLongestInfix ("a" <|> "ab") "tabc") 152 | (Just ("t", "ab","c")) 153 | , u "t2" 154 | (findLongestInfix ("ab" <|> "a") "tabc") 155 | (Just ("t", "ab","c")) 156 | , u "t3" 157 | (findLongestInfix "bc" "tabc") 158 | (Just ("ta", "bc","")) 159 | ] 160 | , testGroup "findShortestPrefix" 161 | [ u "t1" 162 | (findShortestPrefix ("a" <|> "ab") "abc") 163 | (Just ("a","bc")) 164 | , u "t2" 165 | (findShortestPrefix ("ab" <|> "a") "abc") 166 | (Just ("a","bc")) 167 | , u "t3" 168 | (findShortestPrefix "bc" "abc") 169 | Nothing 170 | ] 171 | , testGroup "findShortestInfix" 172 | [ u "t1" 173 | (findShortestInfix ("a" <|> "ab") "tabc") 174 | (Just ("t", "a","bc")) 175 | , u "t2" 176 | (findShortestInfix ("ab" <|> "a") "tabc") 177 | (Just ("t", "a","bc")) 178 | , u "t3" 179 | (findShortestInfix "bc" "tabc") 180 | (Just ("ta", "bc","")) 181 | ] 182 | , testGroup "replace" 183 | [ u "t1" 184 | (replace ("x" <$ "a" <|> "y" <$ "ab") "tabc") 185 | "tyc" 186 | , u "t2" 187 | (replace ("y" <$ "ab" <|> "x" <$ "a") "tabc") 188 | "tyc" 189 | , u "t3" 190 | (replace ("x" <$ "bc") "tabc") 191 | "tax" 192 | , u "t4" 193 | (replace ("y" <$ "a" <|> "x" <$ "ab") "tacabc") 194 | "tycxc" 195 | ] 196 | ] 197 | , stateQueueTests 198 | ] 199 | where 200 | t name n = localOption (SmallCheckDepth n) . testProperty name 201 | u name real ideal = testCase name (assertEqual "" real ideal) 202 | 203 | main = defaultMain tests 204 | --------------------------------------------------------------------------------