├── .github └── workflows │ └── haskell-ci.yml ├── .gitignore ├── .stylish-haskell.yaml ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── Setup.hs ├── cabal.haskell-ci ├── cabal.project ├── example.png ├── kleene.cabal ├── src ├── Kleene.hs └── Kleene │ ├── Classes.hs │ ├── DFA.hs │ ├── ERE.hs │ ├── Equiv.hs │ ├── Functor.hs │ ├── Functor │ └── NonEmpty.hs │ ├── Internal │ ├── Functor.hs │ ├── Partition.hs │ ├── Pretty.hs │ ├── RE.hs │ └── Sets.hs │ ├── Monad.hs │ └── RE.hs └── tests └── kleene-utf8.hs /.github/workflows/haskell-ci.yml: -------------------------------------------------------------------------------- 1 | # This GitHub workflow config has been generated by a script via 2 | # 3 | # haskell-ci 'github' 'cabal.project' 4 | # 5 | # To regenerate the script (for example after adjusting tested-with) run 6 | # 7 | # haskell-ci regenerate 8 | # 9 | # For more information, see https://github.com/haskell-CI/haskell-ci 10 | # 11 | # version: 0.17.20231110 12 | # 13 | # REGENDATA ("0.17.20231110",["github","cabal.project"]) 14 | # 15 | name: Haskell-CI 16 | on: 17 | push: 18 | branches: 19 | - master 20 | pull_request: 21 | branches: 22 | - master 23 | jobs: 24 | linux: 25 | name: Haskell-CI - Linux - ${{ matrix.compiler }} 26 | runs-on: ubuntu-20.04 27 | timeout-minutes: 28 | 60 29 | container: 30 | image: buildpack-deps:bionic 31 | continue-on-error: ${{ matrix.allow-failure }} 32 | strategy: 33 | matrix: 34 | include: 35 | - compiler: ghc-9.8.1 36 | compilerKind: ghc 37 | compilerVersion: 9.8.1 38 | setup-method: ghcup 39 | allow-failure: false 40 | - compiler: ghc-9.6.3 41 | compilerKind: ghc 42 | compilerVersion: 9.6.3 43 | setup-method: ghcup 44 | allow-failure: false 45 | - compiler: ghc-9.4.8 46 | compilerKind: ghc 47 | compilerVersion: 9.4.8 48 | setup-method: ghcup 49 | allow-failure: false 50 | - compiler: ghc-9.2.8 51 | compilerKind: ghc 52 | compilerVersion: 9.2.8 53 | setup-method: ghcup 54 | allow-failure: false 55 | - compiler: ghc-9.0.2 56 | compilerKind: ghc 57 | compilerVersion: 9.0.2 58 | setup-method: ghcup 59 | allow-failure: false 60 | - compiler: ghc-8.10.7 61 | compilerKind: ghc 62 | compilerVersion: 8.10.7 63 | setup-method: ghcup 64 | allow-failure: false 65 | - compiler: ghc-8.8.4 66 | compilerKind: ghc 67 | compilerVersion: 8.8.4 68 | setup-method: hvr-ppa 69 | allow-failure: false 70 | - compiler: ghc-8.6.5 71 | compilerKind: ghc 72 | compilerVersion: 8.6.5 73 | setup-method: hvr-ppa 74 | allow-failure: false 75 | - compiler: ghc-8.4.4 76 | compilerKind: ghc 77 | compilerVersion: 8.4.4 78 | setup-method: hvr-ppa 79 | allow-failure: false 80 | - compiler: ghc-8.2.2 81 | compilerKind: ghc 82 | compilerVersion: 8.2.2 83 | setup-method: hvr-ppa 84 | allow-failure: false 85 | - compiler: ghc-8.0.2 86 | compilerKind: ghc 87 | compilerVersion: 8.0.2 88 | setup-method: hvr-ppa 89 | allow-failure: false 90 | - compiler: ghc-7.10.3 91 | compilerKind: ghc 92 | compilerVersion: 7.10.3 93 | setup-method: hvr-ppa 94 | allow-failure: false 95 | - compiler: ghc-7.8.4 96 | compilerKind: ghc 97 | compilerVersion: 7.8.4 98 | setup-method: hvr-ppa 99 | allow-failure: false 100 | fail-fast: false 101 | steps: 102 | - name: apt 103 | run: | 104 | apt-get update 105 | apt-get install -y --no-install-recommends gnupg ca-certificates dirmngr curl git software-properties-common libtinfo5 106 | if [ "${{ matrix.setup-method }}" = ghcup ]; then 107 | mkdir -p "$HOME/.ghcup/bin" 108 | curl -sL https://downloads.haskell.org/ghcup/0.1.20.0/x86_64-linux-ghcup-0.1.20.0 > "$HOME/.ghcup/bin/ghcup" 109 | chmod a+x "$HOME/.ghcup/bin/ghcup" 110 | "$HOME/.ghcup/bin/ghcup" install ghc "$HCVER" || (cat "$HOME"/.ghcup/logs/*.* && false) 111 | "$HOME/.ghcup/bin/ghcup" install cabal 3.10.2.0 || (cat "$HOME"/.ghcup/logs/*.* && false) 112 | else 113 | apt-add-repository -y 'ppa:hvr/ghc' 114 | apt-get update 115 | apt-get install -y "$HCNAME" 116 | mkdir -p "$HOME/.ghcup/bin" 117 | curl -sL https://downloads.haskell.org/ghcup/0.1.20.0/x86_64-linux-ghcup-0.1.20.0 > "$HOME/.ghcup/bin/ghcup" 118 | chmod a+x "$HOME/.ghcup/bin/ghcup" 119 | "$HOME/.ghcup/bin/ghcup" install cabal 3.10.2.0 || (cat "$HOME"/.ghcup/logs/*.* && false) 120 | fi 121 | env: 122 | HCKIND: ${{ matrix.compilerKind }} 123 | HCNAME: ${{ matrix.compiler }} 124 | HCVER: ${{ matrix.compilerVersion }} 125 | - name: Set PATH and environment variables 126 | run: | 127 | echo "$HOME/.cabal/bin" >> $GITHUB_PATH 128 | echo "LANG=C.UTF-8" >> "$GITHUB_ENV" 129 | echo "CABAL_DIR=$HOME/.cabal" >> "$GITHUB_ENV" 130 | echo "CABAL_CONFIG=$HOME/.cabal/config" >> "$GITHUB_ENV" 131 | HCDIR=/opt/$HCKIND/$HCVER 132 | if [ "${{ matrix.setup-method }}" = ghcup ]; then 133 | HC=$("$HOME/.ghcup/bin/ghcup" whereis ghc "$HCVER") 134 | HCPKG=$(echo "$HC" | sed 's#ghc$#ghc-pkg#') 135 | HADDOCK=$(echo "$HC" | sed 's#ghc$#haddock#') 136 | echo "HC=$HC" >> "$GITHUB_ENV" 137 | echo "HCPKG=$HCPKG" >> "$GITHUB_ENV" 138 | echo "HADDOCK=$HADDOCK" >> "$GITHUB_ENV" 139 | echo "CABAL=$HOME/.ghcup/bin/cabal-3.10.2.0 -vnormal+nowrap" >> "$GITHUB_ENV" 140 | else 141 | HC=$HCDIR/bin/$HCKIND 142 | echo "HC=$HC" >> "$GITHUB_ENV" 143 | echo "HCPKG=$HCDIR/bin/$HCKIND-pkg" >> "$GITHUB_ENV" 144 | echo "HADDOCK=$HCDIR/bin/haddock" >> "$GITHUB_ENV" 145 | echo "CABAL=$HOME/.ghcup/bin/cabal-3.10.2.0 -vnormal+nowrap" >> "$GITHUB_ENV" 146 | fi 147 | 148 | HCNUMVER=$(${HC} --numeric-version|perl -ne '/^(\d+)\.(\d+)\.(\d+)(\.(\d+))?$/; print(10000 * $1 + 100 * $2 + ($3 == 0 ? $5 != 1 : $3))') 149 | echo "HCNUMVER=$HCNUMVER" >> "$GITHUB_ENV" 150 | echo "ARG_TESTS=--enable-tests" >> "$GITHUB_ENV" 151 | echo "ARG_BENCH=--enable-benchmarks" >> "$GITHUB_ENV" 152 | echo "HEADHACKAGE=false" >> "$GITHUB_ENV" 153 | echo "ARG_COMPILER=--$HCKIND --with-compiler=$HC" >> "$GITHUB_ENV" 154 | echo "GHCJSARITH=0" >> "$GITHUB_ENV" 155 | env: 156 | HCKIND: ${{ matrix.compilerKind }} 157 | HCNAME: ${{ matrix.compiler }} 158 | HCVER: ${{ matrix.compilerVersion }} 159 | - name: env 160 | run: | 161 | env 162 | - name: write cabal config 163 | run: | 164 | mkdir -p $CABAL_DIR 165 | cat >> $CABAL_CONFIG <> $CABAL_CONFIG < cabal-plan.xz 198 | echo 'f62ccb2971567a5f638f2005ad3173dba14693a45154c1508645c52289714cb2 cabal-plan.xz' | sha256sum -c - 199 | xz -d < cabal-plan.xz > $HOME/.cabal/bin/cabal-plan 200 | rm -f cabal-plan.xz 201 | chmod a+x $HOME/.cabal/bin/cabal-plan 202 | cabal-plan --version 203 | - name: install cabal-docspec 204 | run: | 205 | mkdir -p $HOME/.cabal/bin 206 | curl -sL https://github.com/phadej/cabal-extras/releases/download/cabal-docspec-0.0.0.20230517/cabal-docspec-0.0.0.20230517-x86_64-linux.xz > cabal-docspec.xz 207 | echo '3b31bbe463ad4d671abbc103db49628562ec48a6604cab278207b5b6acd21ed7 cabal-docspec.xz' | sha256sum -c - 208 | xz -d < cabal-docspec.xz > $HOME/.cabal/bin/cabal-docspec 209 | rm -f cabal-docspec.xz 210 | chmod a+x $HOME/.cabal/bin/cabal-docspec 211 | cabal-docspec --version 212 | - name: checkout 213 | uses: actions/checkout@v3 214 | with: 215 | path: source 216 | - name: initial cabal.project for sdist 217 | run: | 218 | touch cabal.project 219 | echo "packages: $GITHUB_WORKSPACE/source/." >> cabal.project 220 | cat cabal.project 221 | - name: sdist 222 | run: | 223 | mkdir -p sdist 224 | $CABAL sdist all --output-dir $GITHUB_WORKSPACE/sdist 225 | - name: unpack 226 | run: | 227 | mkdir -p unpacked 228 | find sdist -maxdepth 1 -type f -name '*.tar.gz' -exec tar -C $GITHUB_WORKSPACE/unpacked -xzvf {} \; 229 | - name: generate cabal.project 230 | run: | 231 | PKGDIR_kleene="$(find "$GITHUB_WORKSPACE/unpacked" -maxdepth 1 -type d -regex '.*/kleene-[0-9.]*')" 232 | echo "PKGDIR_kleene=${PKGDIR_kleene}" >> "$GITHUB_ENV" 233 | rm -f cabal.project cabal.project.local 234 | touch cabal.project 235 | touch cabal.project.local 236 | echo "packages: ${PKGDIR_kleene}" >> cabal.project 237 | if [ $((HCNUMVER >= 80200)) -ne 0 ] ; then echo "package kleene" >> cabal.project ; fi 238 | if [ $((HCNUMVER >= 80200)) -ne 0 ] ; then echo " ghc-options: -Werror=missing-methods" >> cabal.project ; fi 239 | cat >> cabal.project <> cabal.project.local 242 | cat cabal.project 243 | cat cabal.project.local 244 | - name: dump install plan 245 | run: | 246 | $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dry-run all 247 | cabal-plan 248 | - name: restore cache 249 | uses: actions/cache/restore@v3 250 | with: 251 | key: ${{ runner.os }}-${{ matrix.compiler }}-${{ github.sha }} 252 | path: ~/.cabal/store 253 | restore-keys: ${{ runner.os }}-${{ matrix.compiler }}- 254 | - name: install dependencies 255 | run: | 256 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --dependencies-only -j2 all 257 | $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dependencies-only -j2 all 258 | - name: build w/o tests 259 | run: | 260 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks all 261 | - name: build 262 | run: | 263 | $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --write-ghc-environment-files=always 264 | - name: tests 265 | run: | 266 | $CABAL v2-test $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --test-show-details=direct 267 | - name: docspec 268 | run: | 269 | if [ $((HCNUMVER >= 71000)) -ne 0 ] ; then $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH all ; fi 270 | if [ $((HCNUMVER >= 71000)) -ne 0 ] ; then cabal-docspec $ARG_COMPILER ; fi 271 | - name: cabal check 272 | run: | 273 | cd ${PKGDIR_kleene} || false 274 | ${CABAL} -vnormal check 275 | - name: haddock 276 | run: | 277 | $CABAL v2-haddock --disable-documentation --haddock-all $ARG_COMPILER --with-haddock $HADDOCK $ARG_TESTS $ARG_BENCH all 278 | - name: unconstrained build 279 | run: | 280 | rm -f cabal.project.local 281 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks all 282 | - name: save cache 283 | uses: actions/cache/save@v3 284 | if: always() 285 | with: 286 | key: ${{ runner.os }}-${{ matrix.compiler }}-${{ github.sha }} 287 | path: ~/.cabal/store 288 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist/ 2 | dist-newstyle/ 3 | .ghc.environment.* 4 | 5 | ref/ 6 | 7 | example.dot 8 | -------------------------------------------------------------------------------- /.stylish-haskell.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - imports: 3 | align: group 4 | list_align: after_alias 5 | long_list_align: new_line 6 | empty_list_align: right_after 7 | list_padding: module_name 8 | - language_pragmas: 9 | style: vertical 10 | remove_redundant: true 11 | - trailing_whitespace: {} 12 | columns: 80 13 | language_extensions: 14 | - DataKinds 15 | - EmptyCase 16 | - ExplicitForAll 17 | - FlexibleContexts 18 | - MultiParamTypeClasses 19 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.2 2 | 3 | * Remove MChars constructor from Monad variant 4 | 5 | ## 0.1.1 6 | 7 | * Export `Kleene.Functor.NonEmpty.few1` 8 | * Export `Kleene.RE.everything` and `Kleene.ERE.everything` 9 | * Export `Equivalent` from `Kleene` 10 | 11 | ## 0.1 12 | 13 | * Drop superclasses from `Kleene`. 14 | * Rearrange classes. Introduce `CharKleene`, `FiniteKleene`. 15 | * Add `ToLatin1` and ability to match on `ByteString`. 16 | * Add `Derivate c (DFA c)` instance. 17 | * Add `toDot` to output `DFA` to be rendered by *graphviz*. 18 | * Add `fromRE :: RE c -> ERE c` 19 | * Add `nullableProof :: RE c -> Maybe (RE c)` which returns non-nullable part 20 | of given regular expression. 21 | * Support/require `lattices-2`: `RE` is now a `Lattice`, `M` isn't. 22 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Patches welcome! 2 | 3 | - If you are only going to bump bounds: 4 | - If it's really **only bounds**, please simply open an issue (so you'll have a URL to refer to). I have a semi-automated process to make revisions, pull requests only disturb it. 5 | - If patch includes **source code change** (i.e. I'll need to make a release), and it's a patch to support **newer base/GHC version**: 6 | - Amend `tested-with` to include that GHC 7 | - Regenerate `.github/workflows/haskell-ci.yml` with `haskell-ci regenerate` (get the latest from [GitHub haskell-ci/haskell-ci](https://github.com/haskell-ci/haskell-ci)) 8 | 9 | - Don't edit `CHANGELOG.md`, rather include a copyable entry in your pull request description. I often process pull requests in bulk, and everyone editing the `CHANGELOG.md` causes unnecessary conflicts. 10 | - For the same reason, do not edit `version` or `x-revision` 11 | 12 | - I use [`stylish-haskell`](https://github.com/jaspervdj/stylish-haskell) to format imports. I encourage you to use it too, when contributing. 13 | - General code style is 4 spaces, just look around how it looks, it's not so strict. 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Futurice Oy, 2017-2018 Oleg Grenrus 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of Oleg Grenrus nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | build : 2 | cabal new-build 3 | 4 | doctest : 5 | perl -i -e 'while () { print unless /package-id\s+(base-compat)-\d+(\.\d+)*/; }' .ghc.environment.* 6 | doctest --fast src/ 7 | 8 | haddock : 9 | cabal new-haddock --haddock-hyperlink-source 10 | 11 | ghcid : 12 | ghcid -c 'cabal new-repl' 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # kleene 2 | 3 | Kleene algebra 4 | 5 | read: Regular Expressions 6 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | {-# OPTIONS_GHC -Wall #-} 3 | module Main (main) where 4 | 5 | #ifndef MIN_VERSION_cabal_doctest 6 | #define MIN_VERSION_cabal_doctest(x,y,z) 0 7 | #endif 8 | 9 | #if MIN_VERSION_cabal_doctest(1,0,0) 10 | 11 | import Distribution.Extra.Doctest ( defaultMainWithDoctests ) 12 | main :: IO () 13 | main = defaultMainWithDoctests "doctests" 14 | 15 | #else 16 | 17 | #ifdef MIN_VERSION_Cabal 18 | -- If the macro is defined, we have new cabal-install, 19 | -- but for some reason we don't have cabal-doctest in package-db 20 | -- 21 | -- Probably we are running cabal sdist, when otherwise using new-build 22 | -- workflow 23 | #warning You are configuring this package without cabal-doctest installed. \ 24 | The doctests test-suite will not work as a result. \ 25 | To fix this, install cabal-doctest before configuring. 26 | #endif 27 | 28 | import Distribution.Simple 29 | 30 | main :: IO () 31 | main = defaultMain 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /cabal.haskell-ci: -------------------------------------------------------------------------------- 1 | branches: master 2 | docspec: >=7.10 3 | -------------------------------------------------------------------------------- /cabal.project: -------------------------------------------------------------------------------- 1 | packages: . 2 | tests: True 3 | -------------------------------------------------------------------------------- /example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phadej/kleene/f244cb8cedb52d9b3ac7b7001e6a9b85c62f807d/example.png -------------------------------------------------------------------------------- /kleene.cabal: -------------------------------------------------------------------------------- 1 | cabal-version: 2.4 2 | name: kleene 3 | version: 0.2 4 | synopsis: Kleene algebra 5 | category: Math 6 | description: 7 | Kleene algebra 8 | . 9 | Think: Regular expressions 10 | . 11 | Implements ideas from /Regular-expression derivatives re-examined/ by 12 | Scott Owens, John Reppy and Aaron Turon 13 | 14 | 15 | homepage: https://github.com/phadej/kleene 16 | bug-reports: https://github.com/phadej/kleene/issues 17 | author: Oleg Grenrus 18 | maintainer: Oleg Grenrus 19 | license: BSD-3-Clause 20 | license-file: LICENSE 21 | build-type: Simple 22 | extra-source-files: CHANGELOG.md 23 | extra-doc-files: example.png 24 | tested-with: 25 | GHC ==7.8.4 26 | || ==7.10.3 27 | || ==8.0.2 28 | || ==8.2.2 29 | || ==8.4.4 30 | || ==8.6.5 31 | || ==8.8.4 32 | || ==8.10.7 33 | || ==9.0.2 34 | || ==9.2.8 35 | || ==9.4.8 36 | || ==9.6.3 37 | || ==9.8.1 38 | 39 | source-repository head 40 | type: git 41 | location: https://github.com/phadej/kleene 42 | 43 | library 44 | default-language: Haskell2010 45 | ghc-options: -Wall 46 | hs-source-dirs: src 47 | 48 | -- GHC boot libraries 49 | build-depends: 50 | , base >=4.7.0.2 && <4.20 51 | , bytestring >=0.10.4.0 && <0.13 52 | , containers >=0.5.5.1 && <0.7 53 | , text >=1.2.3.0 && <1.3 || >=2.0 && <2.2 54 | , transformers >=0.3.0.0 && <0.7 55 | 56 | if !impl(ghc >=8.0) 57 | build-depends: semigroups >=0.18.5 && <0.21 58 | 59 | -- Other dependencies 60 | build-depends: 61 | , attoparsec 62 | , base-compat >=0.10.5 && <0.14 63 | , lattices >=2 && <2.3 64 | , MemoTrie >=0.6.9 && <0.7 65 | , QuickCheck >=2.12.6.1 && <2.15 66 | , range-set-list >=0.1.3 && <0.2 67 | , regex-applicative >=0.3.3 && <0.4 68 | , semigroupoids >=5.3.2 && <6.1 69 | , step-function >=0.2 && <0.3 70 | 71 | other-extensions: 72 | CPP 73 | DefaultSignatures 74 | DeriveFoldable 75 | DeriveFunctor 76 | DeriveTraversable 77 | FlexibleInstances 78 | FunctionalDependencies 79 | GADTs 80 | GeneralizedNewtypeDeriving 81 | OverloadedStrings 82 | StandaloneDeriving 83 | UndecidableInstances 84 | 85 | exposed-modules: 86 | Kleene 87 | Kleene.Classes 88 | Kleene.DFA 89 | Kleene.Equiv 90 | Kleene.ERE 91 | Kleene.Functor 92 | Kleene.Functor.NonEmpty 93 | Kleene.Monad 94 | Kleene.RE 95 | 96 | -- "Internal-ish" modules 97 | exposed-modules: 98 | Kleene.Internal.Functor 99 | Kleene.Internal.Partition 100 | Kleene.Internal.Pretty 101 | Kleene.Internal.RE 102 | Kleene.Internal.Sets 103 | 104 | x-docspec-options: 105 | -XOverloadedStrings --check-properties "--property-variables=c p r s t q" 106 | 107 | test-suite kleene-utf8 108 | default-language: Haskell2010 109 | ghc-options: -Wall 110 | hs-source-dirs: tests 111 | type: exitcode-stdio-1.0 112 | main-is: kleene-utf8.hs 113 | build-depends: 114 | , base 115 | , bytestring 116 | , kleene 117 | 118 | if !impl(ghc >=8.0) 119 | build-depends: semigroups >=0.18.5 && <0.21 120 | 121 | -- test dependencies 122 | build-depends: 123 | , tasty ^>=1.4.0.3 || ^>=1.5 124 | , tasty-hunit ^>=0.10.0.3 125 | , tasty-quickcheck ^>=0.10.1.2 126 | -------------------------------------------------------------------------------- /src/Kleene.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Safe #-} 2 | -- | Kleene algebra. 3 | -- 4 | -- This package provides means to work with kleene algebra, 5 | -- at the moment specifically concentrating on regular expressions over 'Char'. 6 | -- 7 | -- Implements ideas from /Regular-expression derivatives re-examined/ by 8 | -- Scott Owens, John Reppy and Aaron Turon 9 | -- . 10 | -- 11 | -- >>> :set -XOverloadedStrings 12 | -- >>> import Algebra.Lattice 13 | -- >>> import Algebra.PartialOrd 14 | -- >>> import Data.Semigroup 15 | -- >>> import Kleene.Internal.Pretty (putPretty) 16 | -- 17 | -- "Kleene.RE" module provides 'RE' type. "Kleene.Classes" module provides various 18 | -- classes to work with the type. All of that is re-exported from "Kleene" module. 19 | -- 20 | -- First let's construct a regular expression value: 21 | -- 22 | -- >>> let re = star "abc" <> "def" <> ("x" \/ "yz") :: RE Char 23 | -- >>> putPretty re 24 | -- ^(abc)*def(x|yz)$ 25 | -- 26 | -- We can convert it to 'DFA' (there are 8 states) 27 | -- 28 | -- >>> let dfa = fromTM re 29 | -- >>> putPretty dfa 30 | -- 0 -> \x -> if 31 | -- | x <= '`' -> 8 32 | -- | x <= 'a' -> 5 33 | -- | x <= 'c' -> 8 34 | -- | x <= 'd' -> 3 35 | -- | otherwise -> 8 36 | -- 1 -> \x -> if 37 | -- | x <= 'w' -> 8 38 | -- | x <= 'x' -> 6 39 | -- | x <= 'y' -> 7 40 | -- | otherwise -> 8 41 | -- 2 -> ... 42 | -- ... 43 | -- 44 | -- It's also possible to graphically visualise DFAs 45 | -- 46 | -- @ 47 | -- λ> writeFile "example.dot' ('toDot' dfa) 48 | -- % dot -Tpng -oexample.png example.dot 49 | -- @ 50 | -- 51 | -- ![example.png](example.png) 52 | -- 53 | -- And we can convert back from 'DFA' to 'RE': 54 | -- 55 | -- >>> let re' = toKleene dfa :: RE Char 56 | -- >>> putPretty re' 57 | -- ^(a(bca)*bcdefx|defx|(a(bca)*bcdefy|defy)z)$ 58 | -- 59 | -- As you see, we don't get what we started with. Yet, these 60 | -- regular expressions are 'equivalent'; 61 | -- 62 | -- >>> equivalent re re' 63 | -- True 64 | -- 65 | -- or using 'Equiv' wrapper 66 | -- 67 | -- >>> Equiv re == Equiv re' 68 | -- True 69 | -- 70 | -- (The paper doesn't outline decision procedure for the equivalence, though 71 | -- it's right there - seems to be fast enough at least for toy examples like 72 | -- here). 73 | -- 74 | -- We can use regular expressions to generate word examples in the language: 75 | -- 76 | -- >>> import Data.Foldable 77 | -- >>> import qualified Test.QuickCheck as QC 78 | -- >>> import Kleene.RE (generate) 79 | -- 80 | -- >>> traverse_ print $ take 5 $ generate (curry QC.choose) 42 re 81 | -- "abcdefx" 82 | -- "abcabcdefx" 83 | -- "abcdefyz" 84 | -- "abcabcabcabcabcabcabcdefyz" 85 | -- "abcabcabcabcabcabcabcdefyz" 86 | -- 87 | -- In addition to the "normal" regular expressions, there are /extended regular expressions/. 88 | -- Regular expressions which we can 'complement', and therefore intersect: 89 | -- 90 | -- >>> let ere = star "aa" /\ star "aaa" :: ERE Char 91 | -- >>> putPretty ere 92 | -- ^~(~((aa)*)|~((aaa)*))$ 93 | -- 94 | -- We can convert 'ERE' to 'RE' via 'DFA': 95 | -- 96 | -- >>> let re'' = toKleene (fromTM ere) :: RE Char 97 | -- >>> putPretty re'' 98 | -- ^(a(aaaaaa)*aaaaa)?$ 99 | -- 100 | -- Machine works own ways, we don't (always) get as pretty results as we'd like: 101 | -- 102 | -- >>> equivalent re'' (star "aaaaaa") 103 | -- True 104 | -- 105 | -- Another feature of the library is an 'Applciative' 'Functor', 106 | -- 107 | -- >>> import Control.Applicative 108 | -- >>> import qualified Kleene.Functor as F 109 | -- 110 | -- >>> let f = (,) <$> many (F.char 'x') <* F.few F.anyChar <*> many (F.char 'z') 111 | -- >>> putPretty f 112 | -- ^x*[^]*z*$ 113 | -- 114 | -- By relying on library, 115 | -- we can match and /capture/ with regular expression. 116 | -- 117 | -- >>> F.match f "xyyzzz" 118 | -- Just ("x","zzz") 119 | -- 120 | -- Where with 'RE' we can only get 'True' or 'False': 121 | -- 122 | -- >>> match (F.toRE f) "xyyzzz" 123 | -- True 124 | -- 125 | -- Which in this case is not even interesting because: 126 | -- 127 | -- >>> equivalent (F.toRE f) everything 128 | -- True 129 | -- 130 | -- Converting from 'RE' to 'K' is also possible, which may be handy: 131 | -- 132 | -- >>> let g = (,) <$> F.few F.anyChar <*> F.fromRE re'' 133 | -- >>> putPretty g 134 | -- ^[^]*(a(aaaaaa)*aaaaa)?$ 135 | -- 136 | -- >>> F.match g (replicate 20 'a') 137 | -- Just ("aa","aaaaaaaaaaaaaaaaaa") 138 | -- 139 | -- We got longest divisible by 6 prefix of as. That's because 'F.fromRE' 140 | -- uses 'many' for 'star'. 141 | -- 142 | module Kleene ( 143 | -- * Regular expressions 144 | RE, 145 | ERE, 146 | 147 | -- * Equivalence (and partial order) 148 | Equiv (..), 149 | 150 | -- * Deterministic finite automaton 151 | DFA (..), 152 | fromTM, 153 | fromTMEquiv, 154 | toKleene, 155 | toDot, 156 | 157 | -- * Classes 158 | -- 159 | -- | Most operations are defined in following type-classes. 160 | -- 161 | -- See "Kleene.RE" module for a specific version with examples. 162 | Kleene (..), 163 | CharKleene (..), 164 | FiniteKleene (..), 165 | Derivate (..), 166 | Match (..), 167 | Equivalent (..), 168 | TransitionMap (..), 169 | Complement (..), 170 | ToLatin1 (..), 171 | 172 | -- * Functor 173 | -- 174 | -- | Only the type is exported so it can be referred to. 175 | -- 176 | -- See "Kleene.Functor" for operations. 177 | K, 178 | ) where 179 | 180 | import Kleene.Classes 181 | import Kleene.DFA (DFA (..), fromTM, fromTMEquiv, toDot, toKleene) 182 | import Kleene.Equiv 183 | import Kleene.ERE (ERE) 184 | import Kleene.Functor (K) 185 | import Kleene.RE (RE) 186 | -------------------------------------------------------------------------------- /src/Kleene/Classes.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DefaultSignatures #-} 2 | {-# LANGUAGE FunctionalDependencies #-} 3 | {-# LANGUAGE GADTs #-} 4 | module Kleene.Classes where 5 | 6 | import Prelude () 7 | import Prelude.Compat 8 | 9 | import Data.Char (ord) 10 | import Data.Foldable (toList) 11 | import Data.Function.Step.Discrete.Closed (SF) 12 | import Data.Map (Map) 13 | import Data.Maybe (mapMaybe) 14 | import Data.RangeSet.Map (RSet) 15 | import Data.Word (Word8) 16 | 17 | import qualified Data.ByteString as BS 18 | import qualified Data.RangeSet.Map as RSet 19 | 20 | import Kleene.Internal.Sets (dotRSet) 21 | 22 | -- | Kleene algebra. 23 | -- 24 | -- If 'k' is 'Monoid' it's expected that @'appends' = 'mappend'@; 25 | -- if 'k' is 'Algebra.Lattice.Lattice' it's expected that @'unions' = 'Algebra.Lattice.joins'@. 26 | -- 27 | -- [Wikipedia: Kleene Algebra](https://en.wikipedia.org/wiki/Kleene_algebra). 28 | -- 29 | class Kleene k where 30 | -- | Empty regex. Doesn't accept anything. 31 | empty :: k 32 | 33 | -- | Empty string. /Note:/ different than 'empty'. 34 | eps :: k 35 | 36 | -- | Concatenation. 37 | appends :: [k] -> k 38 | 39 | -- | Union. 40 | unions :: [k] -> k 41 | 42 | -- | Kleene star. 43 | star :: k -> k 44 | 45 | class Kleene k => CharKleene c k | k -> c where 46 | -- | Single character 47 | char :: c -> k 48 | 49 | string :: [c] -> k 50 | string = appends . map char 51 | 52 | -- | One of the characters. 53 | oneof :: (CharKleene c k, Foldable f) => f c -> k 54 | oneof = unions . map char . toList 55 | 56 | class CharKleene c k => FiniteKleene c k | k -> c where 57 | -- | Everything. \(\Sigma^\star\). 58 | everything :: k 59 | everything = star anyChar 60 | 61 | -- | @'charRange' 'a' 'z' = ^[a-z]$@. 62 | charRange :: c -> c -> k 63 | 64 | -- | Generalisation of 'charRange'. 65 | fromRSet :: RSet c -> k 66 | 67 | -- | @.@ Every character except new line @\\n@. 68 | dot :: c ~ Char => k 69 | dot = fromRSet dotRSet 70 | 71 | -- | Any character. /Note:/ different than 'dot'! 72 | anyChar :: k 73 | 74 | notChar :: c -> k 75 | default notChar :: (Ord c, Enum c, Bounded c) => c -> k 76 | notChar = fromRSet . RSet.complement . RSet.singleton 77 | 78 | class Derivate c k | k -> c where 79 | -- | Does language contain an empty string? 80 | nullable :: k -> Bool 81 | 82 | -- | Derivative of a language. 83 | derivate :: c -> k -> k 84 | 85 | -- | An @f@ can be used to match on the input. 86 | class Match c k | k -> c where 87 | match :: k -> [c] -> Bool 88 | 89 | match8 :: c ~ Word8 => k -> BS.ByteString -> Bool 90 | match8 k = match k . BS.unpack 91 | 92 | -- | Equivalence induced by 'Match'. 93 | -- 94 | -- /Law:/ 95 | -- 96 | -- @ 97 | -- 'equivalent' re1 re2 <=> forall s. 'match' re1 s == 'match' re1 s 98 | -- @ 99 | -- 100 | class Match c k => Equivalent c k | k -> c where 101 | equivalent :: k -> k -> Bool 102 | 103 | -- | Transition map. 104 | class Derivate c k => TransitionMap c k | k -> c where 105 | transitionMap :: k -> Map k (SF c k) 106 | 107 | -- | Complement of the language. 108 | -- 109 | -- /Law:/ 110 | -- 111 | -- @ 112 | -- 'match' ('complement' f) xs = 'not' ('match' f) xs 113 | -- @ 114 | class Complement c k | k -> c where 115 | complement :: k -> k 116 | 117 | class ToLatin1 k where 118 | toLatin1 :: k Char -> k Word8 119 | 120 | instance ToLatin1 RSet where 121 | toLatin1 = RSet.fromRangeList . mapMaybe f . RSet.toRangeList where 122 | f :: (Char, Char) -> Maybe (Word8, Word8) 123 | f (a, b) 124 | | ord a >= 256 = Nothing 125 | | otherwise = Just (fromIntegral (ord a), fromIntegral (min 255 (ord b))) 126 | -------------------------------------------------------------------------------- /src/Kleene/DFA.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE FlexibleInstances #-} 3 | {-# LANGUAGE GADTs #-} 4 | {-# LANGUAGE MultiParamTypeClasses #-} 5 | {-# LANGUAGE Safe #-} 6 | {-# LANGUAGE ScopedTypeVariables #-} 7 | {-# OPTIONS_GHC -fno-warn-orphans #-} 8 | module Kleene.DFA ( 9 | DFA (..), 10 | -- * Conversions 11 | fromRE, 12 | toRE, 13 | fromERE, 14 | toERE, 15 | fromTM, 16 | fromTMEquiv, 17 | toKleene, 18 | toDot, 19 | toDot', 20 | ) where 21 | 22 | import Prelude () 23 | import Prelude.Compat 24 | 25 | import Algebra.Lattice 26 | (BoundedJoinSemiLattice (..), BoundedMeetSemiLattice (..), Lattice (..)) 27 | import Data.IntMap (IntMap) 28 | import Data.IntSet (IntSet) 29 | import Data.List (intercalate) 30 | import Data.Map (Map) 31 | import Data.Maybe (fromMaybe) 32 | import Data.RangeSet.Map (RSet) 33 | 34 | import qualified Data.ByteString as BS 35 | import qualified Data.Function.Step.Discrete.Closed as SF 36 | import qualified Data.IntMap as IntMap 37 | import qualified Data.IntSet as IntSet 38 | import qualified Data.Map as Map 39 | import qualified Data.MemoTrie as MT 40 | import qualified Data.RangeSet.Map as RSet 41 | 42 | import Kleene.Classes 43 | import qualified Kleene.ERE as ERE 44 | import Kleene.Internal.Pretty 45 | import qualified Kleene.Internal.RE as RE 46 | 47 | -- $setup 48 | -- >>> :set -XOverloadedStrings 49 | -- >>> import Data.Foldable (traverse_) 50 | -- >>> import Data.Semigroup (Semigroup (..)) 51 | -- >>> import Algebra.Lattice ((/\)) 52 | -- >>> import Kleene.Classes 53 | -- >>> import Kleene.Internal.Pretty (putPretty) 54 | -- >>> import Test.QuickCheck ((===)) 55 | -- >>> import qualified Test.QuickCheck as QC 56 | -- >>> import qualified Kleene.RE as RE 57 | -- >>> import qualified Kleene.ERE as ERE 58 | -- >>> import qualified Data.RangeSet.Map as RSet 59 | -- 60 | -- >>> newtype Smaller a = Smaller a deriving (Show) 61 | -- >>> let intLog2 = (`div` 10) 62 | -- >>> instance QC.Arbitrary a => QC.Arbitrary (Smaller a) where arbitrary = QC.scale intLog2 QC.arbitrary; shrink (Smaller a) = map Smaller (QC.shrink a) 63 | -- 64 | -- >>> let asREChar :: RE.RE Char -> RE.RE Char; asREChar = id 65 | 66 | ------------------------------------------------------------------------------- 67 | -- DFA 68 | ------------------------------------------------------------------------------- 69 | 70 | -- | Deterministic finite automaton. 71 | -- 72 | -- A deterministic finite automaton (DFA) over an alphabet \(\Sigma\) (type 73 | -- variable @c@) is 4-tuple \(Q\), \(q_0\) , \(F\), \(\delta\), where 74 | -- 75 | -- * \(Q\) is a finite set of states (subset of 's'), 76 | -- * \(q_0 \in Q\) is the distinguised start state ('dfaInitial'), 77 | -- * \(F \subset Q\) is a set of final (or accepting) states ('dfaAcceptable'), and 78 | -- * \(\delta : Q \times \Sigma \to Q\) is a function called the state 79 | -- transition function ('dfaTransition'). 80 | -- 81 | data DFA c = DFA 82 | { dfaTransition :: !(IntMap (SF.SF c Int)) 83 | -- ^ transition function 84 | , dfaInitial :: !Int 85 | -- ^ initial state 86 | , dfaAcceptable :: !IntSet 87 | -- ^ accept states 88 | , dfaBlackholes :: !IntSet 89 | -- ^ states we cannot escape 90 | } 91 | deriving Show 92 | 93 | ------------------------------------------------------------------------------- 94 | -- Construction 95 | ------------------------------------------------------------------------------- 96 | 97 | -- | Convert 'RE.RE' to 'DFA'. 98 | -- 99 | -- >>> putPretty $ fromRE $ RE.star "abc" 100 | -- 0+ -> \x -> if 101 | -- | x <= '`' -> 3 102 | -- | x <= 'a' -> 2 103 | -- | otherwise -> 3 104 | -- 1 -> \x -> if 105 | -- | x <= 'b' -> 3 106 | -- | x <= 'c' -> 0 107 | -- | otherwise -> 3 108 | -- 2 -> \x -> if 109 | -- | x <= 'a' -> 3 110 | -- | x <= 'b' -> 1 111 | -- | otherwise -> 3 112 | -- 3 -> \_ -> 3 -- black hole 113 | -- 114 | -- Everything and nothing result in blackholes: 115 | -- 116 | -- >>> traverse_ (putPretty . fromRE) [RE.empty, RE.star RE.anyChar] 117 | -- 0 -> \_ -> 0 -- black hole 118 | -- 0+ -> \_ -> 0 -- black hole 119 | -- 120 | -- Character ranges are effecient: 121 | -- 122 | -- >>> putPretty $ fromRE $ RE.charRange 'a' 'z' 123 | -- 0 -> \x -> if 124 | -- | x <= '`' -> 2 125 | -- | x <= 'z' -> 1 126 | -- | otherwise -> 2 127 | -- 1+ -> \_ -> 2 128 | -- 2 -> \_ -> 2 -- black hole 129 | -- 130 | -- An example with two blackholes: 131 | -- 132 | -- >>> putPretty $ fromRE $ "c" <> RE.star RE.anyChar 133 | -- 0 -> \x -> if 134 | -- | x <= 'b' -> 2 135 | -- | x <= 'c' -> 1 136 | -- | otherwise -> 2 137 | -- 1+ -> \_ -> 1 -- black hole 138 | -- 2 -> \_ -> 2 -- black hole 139 | -- 140 | fromRE :: forall c. (Ord c, Enum c, Bounded c) => RE.RE c -> DFA c 141 | fromRE = fromTM 142 | 143 | -- | Convert 'ERE.ERE' to 'DFA'. 144 | -- 145 | -- We don't always generate a minimal automata: 146 | -- 147 | -- >>> putPretty $ fromERE $ "a" /\ "b" 148 | -- 0 -> \_ -> 1 149 | -- 1 -> \_ -> 1 -- black hole 150 | -- 151 | -- Compare this to a 'complement' example 152 | -- 153 | -- Using 'fromTMEquiv', we can get a minimal automaton, for the cost of higher 154 | -- complexity (slow!). 155 | -- 156 | -- >>> putPretty $ fromTMEquiv $ ("a" /\ "b" :: ERE.ERE Char) 157 | -- 0 -> \_ -> 0 -- black hole 158 | -- 159 | -- >>> putPretty $ fromERE $ complement $ star "abc" 160 | -- 0 -> \x -> if 161 | -- | x <= '`' -> 3 162 | -- | x <= 'a' -> 2 163 | -- | otherwise -> 3 164 | -- 1+ -> \x -> if 165 | -- | x <= 'b' -> 3 166 | -- | x <= 'c' -> 0 167 | -- | otherwise -> 3 168 | -- 2+ -> \x -> if 169 | -- | x <= 'a' -> 3 170 | -- | x <= 'b' -> 1 171 | -- | otherwise -> 3 172 | -- 3+ -> \_ -> 3 -- black hole 173 | -- 174 | fromERE :: forall c. (Ord c, Enum c, Bounded c) => ERE.ERE c -> DFA c 175 | fromERE = fromTM 176 | 177 | -- | Create from 'TransitionMap'. 178 | -- 179 | -- See 'fromRE' for a specific example. 180 | fromTM :: forall k c. (Ord k, Ord c, TransitionMap c k) => k -> DFA c 181 | fromTM = fromTMImpl Nothing 182 | 183 | -- | Create from 'TransitonMap' minimising states with 'Equivalent'. 184 | -- 185 | -- See 'fromERE' for an example. 186 | -- 187 | fromTMEquiv :: forall k c. (Ord k, Ord c, TransitionMap c k, Equivalent c k) => k -> DFA c 188 | fromTMEquiv = fromTMImpl (Just equivalent) 189 | 190 | fromTMImpl :: forall k c. (Ord k, Ord c, TransitionMap c k) 191 | => Maybe (k -> k -> Bool) 192 | -> k 193 | -> DFA c 194 | fromTMImpl mequiv re = DFA 195 | { dfaTransition = transition 196 | , dfaInitial = 0 197 | , dfaAcceptable = IntSet.fromList 198 | [ i 199 | | (re', i) <- Map.toList lookupMap 200 | , nullable re' 201 | ] 202 | , dfaBlackholes = blackholes 203 | } 204 | where 205 | transition = IntMap.fromList 206 | [ (i, js) 207 | | (re', pm) <- Map.toList tm 208 | , let i = fromMaybe 0 $ Map.lookup re' lookupMap 209 | , let js = SF.normalise $ fmap (\re'' -> fromMaybe 0 $ Map.lookup re'' lookupMap) pm 210 | ] 211 | 212 | blackholes = IntSet.fromList 213 | [ i 214 | | (i, sf) <- IntMap.toList transition 215 | , sf == pure i 216 | ] 217 | 218 | tm = transitionMap re 219 | 220 | -- reversing makes error state go last, usually 221 | lookupMap :: Map k Int 222 | lookupMap = makeLookup 1 lookupMap' (reverse $ Map.toList $ Map.delete re tm) 223 | 224 | lookupMap' :: Map k Int 225 | lookupMap' = case Map.lookup re tm of 226 | Nothing -> Map.empty 227 | Just _ -> Map.singleton re 0 228 | 229 | makeLookup :: Int -> Map k Int -> [(k, b)] -> Map k Int 230 | makeLookup = maybe makeLookupEq makeLookupEquiv mequiv 231 | 232 | makeLookupEq :: Int -> Map k Int -> [(k, b)] -> Map k Int 233 | makeLookupEq !_ !acc [] = acc 234 | makeLookupEq !n acc ((x, _) : xs) = makeLookup (n + 1) (Map.insert x n acc) xs 235 | 236 | -- this differs from makeLookupEq. We don't insert new states right away, 237 | -- but check whether equivalent state is already in the map. 238 | -- 239 | -- This causes n^2 of exp m operations, where n = number of states and 240 | -- m size of @k@. 241 | makeLookupEquiv :: (k -> k -> Bool) -> Int -> Map k Int -> [(k, b)] -> Map k Int 242 | makeLookupEquiv _ !_ !acc [] = acc 243 | makeLookupEquiv eq !n acc ((x, _) : xs) = case ys of 244 | [] -> makeLookup (n + 1) (Map.insert x n acc) xs 245 | ((_, i) : _) -> makeLookup n (Map.insert x i acc) xs 246 | where 247 | ys = [ p | p@(y, _) <- Map.toList acc, eq x y ] 248 | 249 | ------------------------------------------------------------------------------- 250 | -- Destruction 251 | ------------------------------------------------------------------------------- 252 | 253 | -- | Convert 'DFA' to 'RE.RE'. 254 | -- 255 | -- >>> putPretty $ toRE $ fromRE "foobar" 256 | -- ^foobar$ 257 | -- 258 | -- For 'RE.string' regular expressions, @'toRE' . 'fromRE' = 'id'@: 259 | -- 260 | -- prop> let s' = take 5 s in RE.string (s' :: String) === toRE (fromRE (RE.string s')) 261 | -- 262 | -- But in general it isn't: 263 | -- 264 | -- >>> let aToZ = RE.star $ RE.charRange 'a' 'z' 265 | -- >>> traverse_ putPretty [aToZ, toRE $ fromRE aToZ] 266 | -- ^[a-z]*$ 267 | -- ^([a-z]|[a-z]?[a-z]*[a-z]?)?$ 268 | -- 269 | -- @ 270 | -- not-prop> (re :: RE.RE Char) === toRE (fromRE re) 271 | -- @ 272 | -- 273 | -- However, they are 'RE.equivalent': 274 | -- 275 | -- >>> RE.equivalent aToZ (toRE (fromRE aToZ)) 276 | -- True 277 | -- 278 | -- And so are others 279 | -- 280 | -- >>> all (\re -> RE.equivalent re (toRE (fromRE re))) [RE.star "a", RE.star "ab"] 281 | -- True 282 | -- 283 | -- @ 284 | -- expensive-prop> RE.equivalent re (toRE (fromRE (re :: RE.RE Char))) 285 | -- @ 286 | -- 287 | -- Note, that @'toRE' . 'fromRE'@ can, and usually makes regexp unrecognisable: 288 | -- 289 | -- >>> putPretty $ toRE $ fromRE $ RE.star "ab" 290 | -- ^(a(ba)*b)?$ 291 | -- 292 | -- We can 'complement' DFA, therefore we can complement 'RE.RE'. 293 | -- For example. regular expression matching string containing an @a@: 294 | -- 295 | -- >>> let withA = RE.star RE.anyChar <> "a" <> RE.star RE.anyChar 296 | -- >>> let withoutA = toRE $ complement $ fromRE withA 297 | -- >>> putPretty withoutA 298 | -- ^([^a]|[^a]?[^a]*[^a]?)?$ 299 | -- 300 | -- >>> let withoutA' = RE.star $ RE.REChars $ RSet.complement $ RSet.singleton 'a' 301 | -- >>> putPretty withoutA' 302 | -- ^[^a]*$ 303 | -- 304 | -- >>> RE.equivalent withoutA withoutA' 305 | -- True 306 | -- 307 | -- Quite small, for example 2 state DFAs can result in big regular expressions: 308 | -- 309 | -- >>> putPretty $ toRE $ complement $ fromRE $ star "ab" 310 | -- ^([^]|a(ba)*(ba)?|a(ba)*([^b]|b[^a])|([^a]|a(ba)*([^b]|b[^a]))[^]*[^]?)$ 311 | -- 312 | -- We can use @'toRE' . 'fromERE'@ to convert 'ERE.ERE' to 'RE.RE': 313 | -- 314 | -- >>> putPretty $ toRE $ fromERE $ complement $ star "ab" 315 | -- ^([^]|a(ba)*(ba)?|a(ba)*([^b]|b[^a])|([^a]|a(ba)*([^b]|b[^a]))[^]*[^]?)$ 316 | -- 317 | -- >>> putPretty $ toRE $ fromERE $ "a" /\ "b" 318 | -- ^[]$ 319 | -- 320 | -- See 321 | -- for the description of the algorithm used. 322 | -- 323 | toRE :: (Ord c, Enum c, Bounded c) => DFA c -> RE.RE c 324 | toRE = toKleene 325 | 326 | -- | Convert 'DFA' to 'ERE.ERE'. 327 | toERE :: (Ord c, Enum c, Bounded c) => DFA c -> ERE.ERE c 328 | toERE = toKleene 329 | 330 | -- | Convert to any 'Kleene'. 331 | -- 332 | -- See 'toRE' for a specific example. 333 | -- 334 | toKleene :: forall k c. (Ord c, Enum c, Bounded c, FiniteKleene c k) => DFA c -> k 335 | toKleene (DFA tr ini acc _) = unions 336 | [ re ini j maxN 337 | | j <- IntSet.toList acc 338 | ] 339 | where 340 | maxN | IntMap.null tr = 1 341 | | otherwise = succ $ fst $ IntMap.findMax tr 342 | 343 | {- 344 | -- this is useful for debug 345 | table = 346 | [ show i ++ " " ++ show j ++ " " ++ show k ++ " = " ++ pretty (re i j k) 347 | | k <- [0..pred maxN] 348 | , i <- [0..pred maxN] 349 | , j <- [0..pred maxN] 350 | ] 351 | -} 352 | 353 | re i j k = MT.memo re' (i, j, k) 354 | re' (i, j, k) 355 | | k <= 0 = if i == j then unions [eps, r] else r 356 | | otherwise = unions [re i j k', appends [re i k' k', star (re k' k' k'), re k' j k']] 357 | where 358 | r = maybe empty fromRSet $ Map.lookup (i, j) re0map 359 | k' = k - 1 360 | 361 | re0map :: Map (Int, Int) (RSet c) 362 | re0map = Map.fromListWith RSet.union 363 | [ ((i, j), RSet.singletonRange (lo, hi)) 364 | | (i, tr') <- IntMap.toList tr 365 | , (lo, hi, j) <- toPieces tr' 366 | ] 367 | 368 | toPieces :: (Enum a, Bounded a, Ord a) => SF.SF a b -> [(a, a, b)] 369 | toPieces (SF.SF m v) 370 | | maxBound `Map.member` m = toPieces' m 371 | | otherwise = toPieces' (Map.insert maxBound v m) 372 | 373 | toPieces' :: (Enum a, Bounded a) => Map a b -> [(a, a, b)] 374 | toPieces' = go minBound . Map.toList where 375 | go _lo [] = [] 376 | go lo ((k, v) : kv) = (lo, k, v) : go (succ k) kv 377 | 378 | ------------------------------------------------------------------------------- 379 | -- Operations 380 | ------------------------------------------------------------------------------- 381 | 382 | -- | Run 'DFA' on the input. 383 | -- 384 | -- Because we have analysed a language, in some cases we can determine a result 385 | -- without traversing all of the input. 386 | -- That's not the cases with 'RE.RE' 'match'. 387 | -- 388 | -- >>> let dfa = fromRE $ RE.star "abc" 389 | -- >>> map (match dfa) ["", "abc", "abcabc", "aa", 'a' : 'a' : undefined] 390 | -- [True,True,True,False,False] 391 | -- 392 | -- Holds: 393 | -- 394 | -- @ 395 | -- 'match' ('fromRE' re) xs == 'match' re xs 396 | -- @ 397 | -- 398 | -- prop> all (match (fromRE r)) $ take 10 $ RE.generate (curry QC.choose) 42 (r :: RE.RE Char) 399 | -- 400 | instance Ord c => Match c (DFA c) where 401 | match (DFA tr i acc bh) = go i where 402 | go !s _ | IntSet.member s bh = IntSet.member s acc 403 | go !s [] = IntSet.member s acc 404 | go !s (c : cs) = case IntMap.lookup s tr of 405 | Nothing -> False 406 | Just sf -> go (sf SF.! c) cs 407 | 408 | match8 (DFA tr i acc bh) = go i where 409 | go !s !_ | IntSet.member s bh = IntSet.member s acc 410 | go !s bs = case BS.uncons bs of 411 | Nothing -> IntSet.member s acc 412 | Just (c, cs) -> case IntMap.lookup s tr of 413 | Nothing -> False 414 | Just sf -> go (sf SF.! c) cs 415 | 416 | -- | Complement DFA. 417 | -- 418 | -- Complement of 'DFA' is way easier than of 'RE.RE': complement accept states. 419 | -- 420 | -- >>> let dfa = complement $ fromRE $ RE.star "abc" 421 | -- >>> putPretty dfa 422 | -- 0 -> \x -> if 423 | -- | x <= '`' -> 3 424 | -- | x <= 'a' -> 2 425 | -- | otherwise -> 3 426 | -- 1+ -> \x -> if 427 | -- | x <= 'b' -> 3 428 | -- | x <= 'c' -> 0 429 | -- | otherwise -> 3 430 | -- 2+ -> \x -> if 431 | -- | x <= 'a' -> 3 432 | -- | x <= 'b' -> 1 433 | -- | otherwise -> 3 434 | -- 3+ -> \_ -> 3 -- black hole 435 | -- 436 | -- >>> map (match dfa) ["", "abc", "abcabc", "aa","abca", 'a' : 'a' : undefined] 437 | -- [False,False,False,True,True,True] 438 | -- 439 | instance Complement c (DFA c) where 440 | complement (DFA tr ini acc bh) = DFA tr ini acc' bh where 441 | acc' = IntSet.difference (IntMap.keysSet tr) acc 442 | 443 | instance Ord c => Derivate c (DFA c) where 444 | nullable (DFA _tr ini acc _bh) = IntSet.member ini acc 445 | 446 | derivate c (DFA tr ini acc bh) = DFA tr ini' acc bh where 447 | ini' = case IntMap.lookup ini tr of 448 | Nothing -> ini -- in error case let's just stay in the same state. 449 | Just sf -> sf SF.! c 450 | 451 | ------------------------------------------------------------------------------- 452 | -- toDot 453 | ------------------------------------------------------------------------------- 454 | 455 | -- | Get Graphviz dot-code of DFA. 456 | -- 457 | -- >>> let dfa = fromRE $ RE.star "abc" 458 | -- >>> putStr $ toDot dfa 459 | -- digraph dfa { 460 | -- rankdir=LR; 461 | -- // states 462 | -- "0" [shape=doublecircle]; 463 | -- "1" [shape=circle]; 464 | -- "2" [shape=circle]; 465 | -- // initial state 466 | -- "" [shape=none]; 467 | -- "" -> "0"; 468 | -- // transitions 469 | -- "0" -> "2"[label="a"] 470 | -- "1" -> "0"[label="c"] 471 | -- "2" -> "1"[label="b"] 472 | -- } 473 | -- 474 | toDot :: DFA Char -> String 475 | toDot = toDot' show pure 476 | 477 | -- | More flexible version of 'toDot'. 478 | toDot' :: (Ord c, Enum c, Bounded c) => (Int -> String) -> (c -> String) -> DFA c -> String 479 | toDot' showS showC (DFA tr ini acc bh) 480 | = showString "digraph dfa {\n" 481 | . showString "rankdir=LR;\n" 482 | . showString "// states\n" 483 | . showStates 484 | . showString "// initial state\n" 485 | . showInitial 486 | . showString "// transitions\n" 487 | . showTransitions 488 | . showString "}\n" 489 | $ "" 490 | where 491 | showStates = foldr (.) id 492 | [ showState i 493 | | i <- IntMap.keys tr 494 | , IntSet.member i acc || IntSet.notMember i bh 495 | ] 496 | showState s = showS' s . shape where 497 | shape 498 | | IntSet.member s acc = showString " [shape=doublecircle];\n" 499 | | otherwise = showString " [shape=circle];\n" 500 | 501 | showInitial 502 | = showString "\"\" [shape=none];\n" 503 | . showString "\"\" -> " 504 | . showS' ini 505 | . showString ";\n" 506 | 507 | showTransitions = foldr (.) id 508 | [ showS' i 509 | . showString " -> " 510 | . showS' j 511 | . showString "[label=" 512 | . label 513 | . showString "]\n" 514 | | (i, sf) <- IntMap.toList tr 515 | , (lo, hi, j) <- toPieces sf 516 | , IntSet.member j acc || IntSet.notMember j bh 517 | , let label 518 | | lo == hi 519 | = shows (showC lo) 520 | | lo == minBound && hi == maxBound 521 | = shows ("-any" :: String) 522 | | otherwise 523 | = shows (showC lo ++ "-" ++ showC hi) 524 | ] 525 | 526 | showS' = shows . showS 527 | 528 | ------------------------------------------------------------------------------- 529 | -- Orphans 530 | ------------------------------------------------------------------------------- 531 | 532 | -- | __WARNING__: The '/\' is inefficient, it actually computes the conjunction: 533 | -- 534 | -- >>> putPretty $ asREChar $ "a" /\ "b" 535 | -- ^[]$ 536 | -- 537 | -- >>> putPretty $ asREChar $ "a" /\ star "a" 538 | -- ^a$ 539 | -- 540 | -- >>> putPretty $ asREChar $ star "aa" /\ star "aaa" 541 | -- ^(a(aaaaaa)*aaaaa)?$ 542 | -- 543 | instance (Ord c, Enum c, Bounded c) => Lattice (RE.RE c) where 544 | r /\ r' = toRE $ fromERE $ ERE.fromRE r /\ ERE.fromRE r' 545 | r \/ r' = unions [r, r'] 546 | 547 | instance (Ord c, Enum c, Bounded c) => BoundedJoinSemiLattice (RE.RE c) where 548 | bottom = empty 549 | 550 | instance (Ord c, Enum c, Bounded c) => BoundedMeetSemiLattice (RE.RE c) where 551 | top = RE.REStar (RE.REChars RSet.full) 552 | 553 | instance (Ord c, Enum c, Bounded c) => Complement c (RE.RE c) where 554 | complement = toRE . complement . fromRE 555 | 556 | ------------------------------------------------------------------------------- 557 | -- Debug 558 | ------------------------------------------------------------------------------- 559 | 560 | instance Show c => Pretty (DFA c) where 561 | pretty dfa = intercalate "\n" 562 | [ show i ++ acc ++ " -> " ++ SF.showSF sf ++ bh 563 | | (i, sf) <- IntMap.toList (dfaTransition dfa) 564 | , let acc = if IntSet.member i (dfaAcceptable dfa) then "+" else "" 565 | , let bh = if IntSet.member i $ dfaBlackholes dfa then " -- black hole" else "" 566 | ] 567 | -------------------------------------------------------------------------------- /src/Kleene/ERE.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE FlexibleInstances #-} 3 | {-# LANGUAGE GADTs #-} 4 | {-# LANGUAGE MultiParamTypeClasses #-} 5 | {-# LANGUAGE Safe #-} 6 | {-# LANGUAGE ScopedTypeVariables #-} 7 | module Kleene.ERE ( 8 | ERE (..), 9 | -- * Construction 10 | -- 11 | -- | Binary operators are 12 | -- 13 | -- * '<>' for append 14 | -- * '\/' for union 15 | -- * '/\' for intersection 16 | -- 17 | empty, 18 | eps, 19 | everything, 20 | char, 21 | charRange, 22 | anyChar, 23 | appends, 24 | unions, 25 | intersections, 26 | star, 27 | string, 28 | complement, 29 | -- * Derivative 30 | nullable, 31 | derivate, 32 | -- * Conversion 33 | fromRE, 34 | -- * Transition map 35 | transitionMap, 36 | leadingChars, 37 | -- * Equivalence 38 | equivalent, 39 | -- * Other 40 | isEmpty, 41 | isEverything, 42 | ) where 43 | 44 | import Data.Semigroup (Semigroup (..)) 45 | import Prelude () 46 | import Prelude.Compat 47 | 48 | import Algebra.Lattice 49 | (BoundedJoinSemiLattice (..), BoundedMeetSemiLattice (..), Lattice (..)) 50 | import Control.Applicative (liftA2) 51 | import Data.Foldable (toList) 52 | import Data.List (foldl') 53 | import Data.Map (Map) 54 | import Data.RangeSet.Map (RSet) 55 | import Data.Set (Set) 56 | import Data.String (IsString (..)) 57 | 58 | import qualified Data.Function.Step.Discrete.Closed as SF 59 | import qualified Data.Map as Map 60 | import qualified Data.RangeSet.Map as RSet 61 | import qualified Data.Set as Set 62 | import qualified Test.QuickCheck as QC 63 | 64 | import qualified Kleene.Classes as C 65 | import qualified Kleene.Internal.Partition as P 66 | import Kleene.Internal.Pretty 67 | import qualified Kleene.Internal.RE as RE 68 | 69 | -- $setup 70 | -- >>> import Algebra.Lattice ((/\), (\/), top, bottom) 71 | -- >>> import Data.Semigroup (Semigroup (..)) 72 | -- >>> import Control.Monad (void) 73 | -- >>> import Data.Foldable (traverse_) 74 | -- >>> import Data.List (sort) 75 | -- >>> import Test.QuickCheck ((===)) 76 | -- >>> import qualified Test.QuickCheck as QC 77 | -- >>> import qualified Data.Map as Map 78 | -- >>> import qualified Data.Function.Step.Discrete.Closed as SF 79 | -- 80 | -- >>> import Kleene.Classes (match) 81 | -- >>> import Kleene.Internal.Pretty (putPretty, pretty) 82 | -- >>> let asEREChar :: ERE Char -> ERE Char; asEREChar = id 83 | 84 | -- | Extended regular expression 85 | -- 86 | -- It's both, /Kleene/ and /Boolean/ algebra. (If we add only intersections, it 87 | -- wouldn't be /Boolean/). 88 | -- 89 | -- /Note:/ we don't have special constructor for intersections. 90 | -- We use de Morgan formula \(a \land b = \neg (\neg a \lor \neg b)\). 91 | -- 92 | -- >>> putPretty $ asEREChar $ "a" /\ "b" 93 | -- ^~(~a|~b)$ 94 | -- 95 | -- There is no generator, as 'intersections' makes it hard. 96 | -- 97 | data ERE c 98 | = EREChars (RSet c) -- ^ Single character 99 | | EREAppend [ERE c] -- ^ Concatenation 100 | | EREUnion (RSet c) (Set (ERE c)) -- ^ Union 101 | | EREStar (ERE c) -- ^ Kleene star 102 | | ERENot (ERE c) -- ^ Complement 103 | deriving (Eq, Ord, Show) 104 | 105 | ------------------------------------------------------------------------------- 106 | -- fromRE 107 | ------------------------------------------------------------------------------- 108 | 109 | -- | Convert from ordinary regular expression, 'RE.RE'. 110 | -- 111 | fromRE :: Ord c => RE.RE c -> ERE c 112 | fromRE (RE.REChars rs) = EREChars rs 113 | fromRE (RE.REAppend rs) = EREAppend (map fromRE rs) 114 | fromRE (RE.REUnion r rs) = EREUnion r (Set.map fromRE rs) 115 | fromRE (RE.REStar r) = EREStar (fromRE r) 116 | 117 | ------------------------------------------------------------------------------- 118 | -- Smart constructor 119 | ------------------------------------------------------------------------------- 120 | 121 | -- | Empty regex. Doesn't accept anything. 122 | -- 123 | -- >>> putPretty (empty :: ERE Char) 124 | -- ^[]$ 125 | -- 126 | -- >>> putPretty (bottom :: ERE Char) 127 | -- ^[]$ 128 | -- 129 | -- prop> match (empty :: ERE Char) (s :: String) === False 130 | -- 131 | empty :: ERE c 132 | empty = EREChars RSet.empty 133 | 134 | -- | Everything. 135 | -- 136 | -- >>> putPretty (everything :: ERE Char) 137 | -- ^~[]$ 138 | -- 139 | -- >>> putPretty (top :: ERE Char) 140 | -- ^~[]$ 141 | -- 142 | -- prop> match (everything :: ERE Char) (s :: String) === True 143 | -- 144 | everything :: ERE c 145 | everything = complement empty 146 | 147 | -- | Empty string. /Note:/ different than 'empty'. 148 | -- 149 | -- >>> putPretty eps 150 | -- ^$ 151 | -- 152 | -- >>> putPretty (mempty :: ERE Char) 153 | -- ^$ 154 | -- 155 | -- prop> match (eps :: ERE Char) s === null (s :: String) 156 | -- 157 | eps :: ERE c 158 | eps = EREAppend [] 159 | 160 | -- | 161 | -- 162 | -- >>> putPretty (char 'x') 163 | -- ^x$ 164 | -- 165 | char :: c -> ERE c 166 | char = EREChars . RSet.singleton 167 | 168 | -- | 169 | -- 170 | -- >>> putPretty $ charRange 'a' 'z' 171 | -- ^[a-z]$ 172 | -- 173 | charRange :: Ord c => c -> c -> ERE c 174 | charRange c c' = EREChars $ RSet.singletonRange (c, c') 175 | 176 | -- | Any character. /Note:/ different than dot! 177 | -- 178 | -- >>> putPretty anyChar 179 | -- ^[^]$ 180 | -- 181 | anyChar :: Bounded c => ERE c 182 | anyChar = EREChars RSet.full 183 | 184 | -- | Concatenate regular expressions. 185 | -- 186 | -- prop> asEREChar r <> empty === empty 187 | -- prop> empty <> asEREChar r === empty 188 | -- prop> (asEREChar r <> s) <> t === r <> (s <> t) 189 | -- 190 | -- prop> asEREChar r <> eps === r 191 | -- prop> eps <> asEREChar r === r 192 | -- 193 | appends :: Eq c => [ERE c] -> ERE c 194 | appends rs0 195 | | elem empty rs1 = empty 196 | | otherwise = case rs1 of 197 | [r] -> r 198 | rs -> EREAppend rs 199 | where 200 | -- flatten one level of EREAppend 201 | rs1 = concatMap f rs0 202 | 203 | f (EREAppend rs) = rs 204 | f r = [r] 205 | 206 | -- | Union of regular expressions. 207 | -- 208 | -- prop> asEREChar r \/ r === r 209 | -- prop> asEREChar r \/ s === s \/ r 210 | -- prop> (asEREChar r \/ s) \/ t === r \/ (s \/ t) 211 | -- 212 | -- prop> empty \/ asEREChar r === r 213 | -- prop> asEREChar r \/ empty === r 214 | -- 215 | -- prop> everything \/ asEREChar r === everything 216 | -- prop> asEREChar r \/ everything === everything 217 | -- 218 | unions :: (Ord c, Enum c) => [ERE c] -> ERE c 219 | unions = uncurry mk . foldMap f where 220 | mk cs rss 221 | | Set.null rss = EREChars cs 222 | | Set.member everything rss = everything 223 | | RSet.null cs = case Set.toList rss of 224 | [] -> empty 225 | [r] -> r 226 | _ -> EREUnion cs rss 227 | | otherwise = EREUnion cs rss 228 | 229 | f (EREUnion cs rs) = (cs, rs) 230 | f (EREChars cs) = (cs, Set.empty) 231 | f r = (mempty, Set.singleton r) 232 | 233 | -- | Intersection of regular expressions. 234 | -- 235 | -- prop> asEREChar r /\ r === r 236 | -- prop> asEREChar r /\ s === s /\ r 237 | -- prop> (asEREChar r /\ s) /\ t === r /\ (s /\ t) 238 | -- 239 | -- prop> empty /\ asEREChar r === empty 240 | -- prop> asEREChar r /\ empty === empty 241 | -- 242 | -- prop> everything /\ asEREChar r === r 243 | -- prop> asEREChar r /\ everything === r 244 | -- 245 | intersections :: (Ord c, Enum c) => [ERE c] -> ERE c 246 | intersections = complement . unions . map complement 247 | 248 | -- | Complement. 249 | -- 250 | -- prop> complement (complement r) === asEREChar r 251 | -- 252 | complement :: ERE c -> ERE c 253 | complement r = case r of 254 | ERENot r' -> r' 255 | _ -> ERENot r 256 | 257 | -- | Kleene star. 258 | -- 259 | -- prop> star (star r) === star (asEREChar r) 260 | -- 261 | -- prop> star eps === asEREChar eps 262 | -- prop> star empty === asEREChar eps 263 | -- prop> star anyChar === asEREChar everything 264 | -- 265 | -- prop> star (asEREChar r \/ eps) === star r 266 | -- prop> star (char c \/ eps) === star (char (c :: Char)) 267 | -- prop> star (empty \/ eps) === eps 268 | -- 269 | star :: (Ord c, Bounded c) => ERE c -> ERE c 270 | star r = case r of 271 | EREStar _ -> r 272 | EREAppend [] -> eps 273 | EREChars cs | RSet.null cs -> eps 274 | EREChars cs | RSet.isFull cs -> everything 275 | EREUnion cs rs | Set.member eps rs -> case Set.toList rs' of 276 | [] -> star (EREChars cs) 277 | [r'] | RSet.null cs -> star r' 278 | _ -> EREStar (EREUnion cs rs') 279 | where 280 | rs' = Set.delete eps rs 281 | _ -> EREStar r 282 | 283 | -- | Literal string. 284 | -- 285 | -- >>> putPretty ("foobar" :: ERE Char) 286 | -- ^foobar$ 287 | -- 288 | -- >>> putPretty ("(.)" :: ERE Char) 289 | -- ^\(\.\)$ 290 | -- 291 | string :: [c] -> ERE c 292 | string [] = eps 293 | string [c] = EREChars (RSet.singleton c) 294 | string cs = EREAppend $ map (EREChars . RSet.singleton) cs 295 | 296 | instance (Ord c, Enum c, Bounded c) => C.Kleene (ERE c) where 297 | empty = empty 298 | eps = eps 299 | appends = appends 300 | unions = unions 301 | star = star 302 | 303 | instance (Ord c, Enum c, Bounded c) => C.CharKleene c (ERE c) where 304 | char = char 305 | 306 | instance (Ord c, Enum c, Bounded c) => C.FiniteKleene c (ERE c) where 307 | everything = everything 308 | charRange = charRange 309 | fromRSet = EREChars 310 | anyChar = anyChar 311 | 312 | instance C.Complement c (ERE c) where 313 | complement = complement 314 | 315 | ------------------------------------------------------------------------------- 316 | -- derivative 317 | ------------------------------------------------------------------------------- 318 | 319 | -- | We say that a regular expression r is nullable if the language it defines 320 | -- contains the empty string. 321 | -- 322 | -- >>> nullable eps 323 | -- True 324 | -- 325 | -- >>> nullable (star "x") 326 | -- True 327 | -- 328 | -- >>> nullable "foo" 329 | -- False 330 | -- 331 | -- >>> nullable (complement eps) 332 | -- False 333 | -- 334 | nullable :: ERE c -> Bool 335 | nullable (EREChars _) = False 336 | nullable (EREAppend rs) = all nullable rs 337 | nullable (EREUnion _cs rs) = any nullable rs 338 | nullable (EREStar _) = True 339 | nullable (ERENot r) = not (nullable r) 340 | 341 | -- | Intuitively, the derivative of a language \(\mathcal{L} \subset \Sigma^\star\) 342 | -- with respect to a symbol \(a \in \Sigma\) is the language that includes only 343 | -- those suffixes of strings with a leading symbol \(a\) in \(\mathcal{L}\). 344 | -- 345 | -- >>> putPretty $ derivate 'f' "foobar" 346 | -- ^oobar$ 347 | -- 348 | -- >>> putPretty $ derivate 'x' $ "xyz" \/ "abc" 349 | -- ^yz$ 350 | -- 351 | -- >>> putPretty $ derivate 'x' $ star "xyz" 352 | -- ^yz(xyz)*$ 353 | -- 354 | derivate :: (Ord c, Enum c) => c -> ERE c -> ERE c 355 | derivate c (EREChars cs) = derivateChars c cs 356 | derivate c (EREUnion cs rs) = unions $ derivateChars c cs : [ derivate c r | r <- toList rs] 357 | derivate c (EREAppend rs) = derivateAppend c rs 358 | derivate c rs@(EREStar r) = derivate c r <> rs 359 | derivate c (ERENot r) = complement (derivate c r) 360 | 361 | instance (Ord c, Enum c) => C.Derivate c (ERE c) where 362 | nullable = nullable 363 | derivate = derivate 364 | 365 | instance (Ord c, Enum c) => C.Match c (ERE c) where 366 | match r = nullable . foldl' (flip derivate) r 367 | 368 | derivateAppend :: (Enum c, Ord c) => c -> [ERE c] -> ERE c 369 | derivateAppend _ [] = empty 370 | derivateAppend c [r] = derivate c r 371 | derivateAppend c (r:rs) 372 | | nullable r = unions [r' <> appends rs, rs'] 373 | | otherwise = r' <> appends rs 374 | where 375 | r' = derivate c r 376 | rs' = derivateAppend c rs 377 | 378 | derivateChars :: Ord c => c -> RSet c -> ERE c 379 | derivateChars c cs 380 | | c `RSet.member` cs = eps 381 | | otherwise = empty 382 | 383 | ------------------------------------------------------------------------------- 384 | -- isEmpty 385 | ------------------------------------------------------------------------------- 386 | 387 | -- | Whether 'ERE' is (structurally) equal to 'empty'. 388 | isEmpty :: ERE c -> Bool 389 | isEmpty (EREChars rs) = RSet.null rs 390 | isEmpty _ = False 391 | 392 | -- | Whether 'ERE' is (structurally) equal to 'everything'. 393 | isEverything :: ERE c -> Bool 394 | isEverything (ERENot (EREChars rs)) = RSet.null rs 395 | isEverything _ = False 396 | 397 | ------------------------------------------------------------------------------- 398 | -- States 399 | ------------------------------------------------------------------------------- 400 | 401 | -- | Transition map. Used to construct 'Kleene.DFA.DFA'. 402 | -- 403 | -- >>> void $ Map.traverseWithKey (\k v -> putStrLn $ pretty k ++ " : " ++ SF.showSF (fmap pretty v)) $ transitionMap ("ab" :: ERE Char) 404 | -- ^[]$ : \_ -> "^[]$" 405 | -- ^b$ : \x -> if 406 | -- | x <= 'a' -> "^[]$" 407 | -- | x <= 'b' -> "^$" 408 | -- | otherwise -> "^[]$" 409 | -- ^$ : \_ -> "^[]$" 410 | -- ^ab$ : \x -> if 411 | -- | x <= '`' -> "^[]$" 412 | -- | x <= 'a' -> "^b$" 413 | -- | otherwise -> "^[]$" 414 | -- 415 | transitionMap 416 | :: forall c. (Ord c, Enum c, Bounded c) 417 | => ERE c 418 | -> Map (ERE c) (SF.SF c (ERE c)) 419 | transitionMap re = go Map.empty [re] where 420 | go :: Map (ERE c) (SF.SF c (ERE c)) 421 | -> [ERE c] 422 | -> Map (ERE c) (SF.SF c (ERE c)) 423 | go !acc [] = acc 424 | go acc (r : rs) 425 | | r `Map.member` acc = go acc rs 426 | | otherwise = go (Map.insert r pm acc) (SF.values pm ++ rs) 427 | where 428 | pm = P.toSF (\c -> derivate c r) (leadingChars r) 429 | 430 | instance (Ord c, Enum c, Bounded c) => C.TransitionMap c (ERE c) where 431 | transitionMap = transitionMap 432 | 433 | -- | Leading character sets of regular expression. 434 | -- 435 | -- >>> leadingChars "foo" 436 | -- fromSeparators "ef" 437 | -- 438 | -- >>> leadingChars (star "b" <> star "e") 439 | -- fromSeparators "abde" 440 | -- 441 | -- >>> leadingChars (charRange 'b' 'z') 442 | -- fromSeparators "az" 443 | -- 444 | leadingChars :: (Ord c, Enum c, Bounded c) => ERE c -> P.Partition c 445 | leadingChars (EREChars cs) = P.fromRSet cs 446 | leadingChars (EREUnion cs rs) = P.fromRSet cs <> foldMap leadingChars rs 447 | leadingChars (EREStar r) = leadingChars r 448 | leadingChars (EREAppend rs) = leadingCharsAppend rs 449 | leadingChars (ERENot r) = leadingChars r 450 | 451 | leadingCharsAppend :: (Ord c, Enum c, Bounded c) => [ERE c] -> P.Partition c 452 | leadingCharsAppend [] = P.whole 453 | leadingCharsAppend (r : rs) 454 | | nullable r = leadingChars r <> leadingCharsAppend rs 455 | | otherwise = leadingChars r 456 | 457 | ------------------------------------------------------------------------------- 458 | -- Equivalence 459 | ------------------------------------------------------------------------------- 460 | 461 | -- | Whether two regexps are equivalent. 462 | -- 463 | -- @ 464 | -- 'equivalent' re1 re2 <=> forall s. 'match' re1 s == 'match' re1 s 465 | -- @ 466 | -- 467 | -- >>> let re1 = star "a" <> "a" 468 | -- >>> let re2 = "a" <> star "a" 469 | -- 470 | -- These are different regular expressions, even we perform 471 | -- some normalisation-on-construction: 472 | -- 473 | -- >>> re1 == re2 474 | -- False 475 | -- 476 | -- They are however equivalent: 477 | -- 478 | -- >>> equivalent re1 re2 479 | -- True 480 | -- 481 | -- The algorithm works by executing 'states' on "product" regexp, 482 | -- and checking whether all resulting states are both accepting or rejecting. 483 | -- 484 | -- @ 485 | -- re1 == re2 ==> 'equivalent' re1 re2 486 | -- @ 487 | -- 488 | -- === More examples 489 | -- 490 | -- >>> let example re1 re2 = putPretty re1 >> putPretty re2 >> return (equivalent re1 re2) 491 | -- >>> example re1 re2 492 | -- ^a*a$ 493 | -- ^aa*$ 494 | -- True 495 | -- 496 | -- >>> example (star "aa") (star "aaa") 497 | -- ^(aa)*$ 498 | -- ^(aaa)*$ 499 | -- False 500 | -- 501 | -- >>> example (star "aa" <> star "aaa") (star "aaa" <> star "aa") 502 | -- ^(aa)*(aaa)*$ 503 | -- ^(aaa)*(aa)*$ 504 | -- True 505 | -- 506 | -- >>> example (star ("a" \/ "b")) (star $ star "a" <> star "b") 507 | -- ^[a-b]*$ 508 | -- ^(a*b*)*$ 509 | -- True 510 | -- 511 | equivalent :: forall c. (Ord c, Enum c, Bounded c) => ERE c -> ERE c -> Bool 512 | equivalent x0 y0 = go mempty [(x0, y0)] where 513 | go :: Set (ERE c, ERE c) -> [(ERE c, ERE c)] -> Bool 514 | go !_ [] = True 515 | go acc (p@(x, y) : zs) 516 | | p `Set.member` acc = go acc zs 517 | -- if two regexps are structurally the same, we don't need to recurse. 518 | | x == y = go (Set.insert p acc) zs 519 | | all agree ps = go (Set.insert p acc) (ps ++ zs) 520 | | otherwise = False 521 | where 522 | cs = toList $ P.examples $ leadingChars x `P.wedge` leadingChars y 523 | ps = map (\c -> (derivate c x, derivate c y)) cs 524 | 525 | agree :: (ERE c, ERE c) -> Bool 526 | agree (x, y) = nullable x == nullable y 527 | 528 | instance (Ord c, Enum c, Bounded c) => C.Equivalent c (ERE c) where 529 | equivalent = equivalent 530 | 531 | ------------------------------------------------------------------------------- 532 | -- Instances 533 | ------------------------------------------------------------------------------- 534 | 535 | instance Eq c => Semigroup (ERE c) where 536 | r <> r' = appends [r, r'] 537 | 538 | instance Eq c => Monoid (ERE c) where 539 | mempty = eps 540 | mappend = (<>) 541 | mconcat = appends 542 | 543 | instance (Ord c, Enum c) => Lattice (ERE c) where 544 | r \/ r' = unions [r, r'] 545 | r /\ r' = intersections [r, r'] 546 | 547 | instance (Ord c, Enum c) => BoundedJoinSemiLattice (ERE c) where 548 | bottom = empty 549 | 550 | instance (Ord c, Enum c) => BoundedMeetSemiLattice (ERE c) where 551 | top = everything 552 | 553 | instance c ~ Char => IsString (ERE c) where 554 | fromString = string 555 | 556 | instance (Ord c, Enum c, Bounded c, QC.Arbitrary c) => QC.Arbitrary (ERE c) where 557 | arbitrary = QC.sized arb where 558 | c :: QC.Gen (ERE c) 559 | c = EREChars . RSet.fromRangeList <$> QC.arbitrary 560 | 561 | arb :: Int -> QC.Gen (ERE c) 562 | arb n | n <= 0 = QC.oneof [c, fmap char QC.arbitrary, pure eps] 563 | | otherwise = QC.oneof 564 | [ c 565 | , pure eps 566 | , fmap char QC.arbitrary 567 | , liftA2 (<>) (arb n2) (arb n2) 568 | , liftA2 (\/) (arb n2) (arb n2) 569 | , fmap star (arb n2) 570 | , fmap complement (arb n2) 571 | ] 572 | where 573 | n2 = n `div` 2 574 | 575 | instance (QC.CoArbitrary c) => QC.CoArbitrary (ERE c) where 576 | coarbitrary (EREChars cs) = QC.variant (0 :: Int) . QC.coarbitrary (RSet.toRangeList cs) 577 | coarbitrary (EREAppend rs) = QC.variant (1 :: Int) . QC.coarbitrary rs 578 | coarbitrary (EREUnion cs rs) = QC.variant (2 :: Int) . QC.coarbitrary (RSet.toRangeList cs, Set.toList rs) 579 | coarbitrary (EREStar r) = QC.variant (3 :: Int) . QC.coarbitrary r 580 | coarbitrary (ERENot r) = QC.variant (4 :: Int) . QC.coarbitrary r 581 | 582 | ------------------------------------------------------------------------------- 583 | -- JavaScript 584 | ------------------------------------------------------------------------------- 585 | 586 | instance c ~ Char => Pretty (ERE c) where 587 | prettyS x = showChar '^' . go False x . showChar '$' 588 | where 589 | go :: Bool -> ERE Char -> ShowS 590 | go p (EREStar a) 591 | = parens p 592 | $ go True a . showChar '*' 593 | go p (EREAppend rs) 594 | = parens p $ goMany id rs 595 | go p (EREUnion cs rs) 596 | | RSet.null cs = goUnion p rs 597 | | Set.null rs = prettyS cs 598 | | otherwise = goUnion p (Set.insert (EREChars cs) rs) 599 | go _ (EREChars cs) 600 | = prettyS cs 601 | go p (ERENot r) 602 | = parens p $ showChar '~' . go True r 603 | 604 | goUnion p rs 605 | | Set.member eps rs = parens p $ goUnion' True . showChar '?' 606 | | otherwise = goUnion' p 607 | where 608 | goUnion' p' = case Set.toList (Set.delete eps rs) of 609 | [] -> go True empty 610 | [r] -> go p' r 611 | (r:rs') -> parens True $ goSome1 (showChar '|') r rs' 612 | 613 | goMany :: ShowS -> [ERE Char] -> ShowS 614 | goMany sep = foldr (\a b -> go False a . sep . b) id 615 | 616 | goSome1 :: ShowS -> ERE Char -> [ERE Char] -> ShowS 617 | goSome1 sep r = foldl (\a b -> a . sep . go False b) (go False r) 618 | 619 | parens :: Bool -> ShowS -> ShowS 620 | parens True s = showString "(" . s . showChar ')' 621 | parens False s = s 622 | 623 | ------------------------------------------------------------------------------- 624 | -- Latin1 625 | ------------------------------------------------------------------------------- 626 | 627 | instance C.ToLatin1 ERE where 628 | toLatin1 (EREChars rs) = C.fromRSet (C.toLatin1 rs) 629 | toLatin1 (EREAppend xs) = appends (map C.toLatin1 xs) 630 | toLatin1 (EREUnion rs xs) = C.fromRSet (C.toLatin1 rs) \/ unions (map C.toLatin1 (Set.toList xs)) 631 | toLatin1 (EREStar r) = star (C.toLatin1 r) 632 | toLatin1 (ERENot r) = complement (C.toLatin1 r) 633 | -------------------------------------------------------------------------------- /src/Kleene/Equiv.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleInstances #-} 2 | {-# LANGUAGE FunctionalDependencies #-} 3 | {-# LANGUAGE GADTs #-} 4 | {-# LANGUAGE GeneralizedNewtypeDeriving #-} 5 | {-# LANGUAGE StandaloneDeriving #-} 6 | {-# LANGUAGE Trustworthy #-} 7 | {-# LANGUAGE UndecidableInstances #-} 8 | module Kleene.Equiv where 9 | 10 | import Prelude () 11 | import Prelude.Compat 12 | 13 | import Algebra.Lattice 14 | (BoundedJoinSemiLattice (..), BoundedMeetSemiLattice (..), Lattice (..), 15 | joinLeq) 16 | import Algebra.PartialOrd (PartialOrd (..)) 17 | import Data.Semigroup (Semigroup (..)) 18 | 19 | import Kleene.Classes 20 | import Kleene.Internal.Pretty 21 | 22 | -- $setup 23 | -- >>> import Kleene.RE (RE) 24 | -- >>> import Kleene.Classes 25 | -- >>> import Algebra.PartialOrd (leq) 26 | -- >>> import Data.Semigroup (Semigroup (..)) 27 | 28 | -- | Regular-expressions for which '==' is 'equivalent'. 29 | -- 30 | -- >>> let re1 = star "a" <> "a" :: RE Char 31 | -- >>> let re2 = "a" <> star "a" :: RE Char 32 | -- 33 | -- >>> re1 == re2 34 | -- False 35 | -- 36 | -- >>> Equiv re1 == Equiv re2 37 | -- True 38 | -- 39 | -- 'Equiv' is also a 'PartialOrd' (but not 'Ord'!) 40 | -- 41 | -- >>> Equiv "a" `leq` Equiv (star "a" :: RE Char) 42 | -- True 43 | -- 44 | -- Not all regular expessions are 'comparable': 45 | -- 46 | -- >>> let reA = Equiv "a" :: Equiv RE Char 47 | -- >>> let reB = Equiv "b" :: Equiv RE Char 48 | -- >>> (leq reA reB, leq reB reA) 49 | -- (False,False) 50 | -- 51 | newtype Equiv r c = Equiv (r c) 52 | deriving (Show, Semigroup, Monoid, BoundedJoinSemiLattice, BoundedMeetSemiLattice, Lattice, Pretty) 53 | 54 | instance Equivalent c (r c) => Eq (Equiv r c) where 55 | (==) = equivalent 56 | 57 | -- | \(a \preceq b := a \lor b = b \) 58 | instance (Lattice (r c), Equivalent c (r c)) => PartialOrd (Equiv r c) where 59 | leq = joinLeq 60 | 61 | deriving instance Kleene (r c) => Kleene (Equiv r c) 62 | deriving instance CharKleene c (r c) => CharKleene c (Equiv r c) 63 | deriving instance Derivate c (r c) => Derivate c (Equiv r c) 64 | deriving instance Match c (r c) => Match c (Equiv r c) 65 | deriving instance Equivalent c (r c) => Equivalent c (Equiv r c) 66 | deriving instance Complement c (r c) => Complement c (Equiv r c) 67 | -------------------------------------------------------------------------------- /src/Kleene/Functor.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Safe #-} 2 | module Kleene.Functor ( 3 | K, 4 | Greediness (..), 5 | -- * Constructors 6 | few, 7 | anyChar, 8 | oneof, 9 | char, 10 | charRange, 11 | dot, 12 | everything, 13 | everything1, 14 | -- * Queries 15 | isEmpty, 16 | isEverything, 17 | -- * Matching 18 | match, 19 | -- * Conversions 20 | toRE, 21 | toKleene, 22 | fromRE, 23 | toRA, 24 | ) where 25 | 26 | import Kleene.Internal.Functor 27 | -------------------------------------------------------------------------------- /src/Kleene/Functor/NonEmpty.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | {-# LANGUAGE GADTs #-} 3 | {-# LANGUAGE Safe #-} 4 | module Kleene.Functor.NonEmpty ( 5 | K1, 6 | Greediness (..), 7 | -- * Constructors 8 | some1, 9 | few1, 10 | anyChar, 11 | oneof, 12 | char, 13 | charRange, 14 | dot, 15 | everything1, 16 | string, 17 | -- * Queries 18 | isEmpty, 19 | isEverything, 20 | -- * Matching 21 | match, 22 | -- * Conversions 23 | toRE, 24 | toKleene, 25 | toRA, 26 | nullableProof, 27 | ) where 28 | 29 | import Prelude () 30 | import Prelude.Compat 31 | 32 | import Control.Applicative (Alternative (..), liftA2) 33 | import Data.Foldable (toList) 34 | import Data.Functor.Alt (()) 35 | import Data.Functor.Apply (Apply (..)) 36 | import Data.List.NonEmpty (NonEmpty (..)) 37 | import Data.RangeSet.Map (RSet) 38 | 39 | import qualified Data.Functor.Alt as Alt 40 | import qualified Data.List.NonEmpty as NE 41 | import qualified Data.RangeSet.Map as RSet 42 | import qualified Text.Regex.Applicative as R 43 | 44 | import qualified Kleene.Classes as C 45 | import Kleene.Internal.Functor (Greediness (..), K (..)) 46 | import Kleene.Internal.Pretty 47 | import Kleene.Internal.Sets 48 | import qualified Kleene.RE as RE 49 | 50 | -- $setup 51 | -- 52 | -- >>> import Control.Applicative (optional, Alternative (..)) 53 | -- >>> import Data.Functor.Apply (Apply (..)) 54 | -- >>> import Data.List.NonEmpty (NonEmpty (..)) 55 | -- >>> import Kleene.Functor (Greediness (..), K (..)) 56 | -- >>> import Data.Foldable (toList) 57 | -- >>> import Kleene.Internal.Pretty (putPretty) 58 | -- >>> import qualified Kleene.RE as RE 59 | -- >>> import qualified Kleene.Classes as C 60 | -- >>> import qualified Text.Regex.Applicative as R 61 | 62 | -- | 'Applicative' 'Functor' regular expression. 63 | data K1 c a where 64 | K1Empty :: K1 c a 65 | K1Char :: (Ord c, Enum c) => RSet c -> K1 c c 66 | K1Append :: (a -> b -> r) -> K1 c a -> K1 c b -> K1 c r 67 | K1Union :: K1 c a -> K1 c a -> K1 c a 68 | KPlus :: Greediness -> K1 c a -> K1 c (NonEmpty a) 69 | 70 | -- optimisations 71 | K1Map :: (a -> b) -> K1 c a -> K1 c b -- could use Pure and Append 72 | K1String :: Eq c => NonEmpty c -> K1 c (NonEmpty c) -- could use Char and Append 73 | 74 | instance Functor (K1 c) where 75 | fmap _ K1Empty = K1Empty 76 | fmap f (K1Map g k) = K1Map (f . g) k 77 | fmap f (K1Append g a b) = K1Append (\x y -> f (g x y)) a b 78 | fmap f k = K1Map f k 79 | 80 | instance Apply (K1 c) where 81 | K1Empty <.> _ = K1Empty 82 | _ <.> K1Empty = K1Empty 83 | 84 | f <.> x = K1Append ($) f x 85 | 86 | liftF2 = K1Append 87 | 88 | instance Alt.Alt (K1 c) where 89 | K1Empty k = k 90 | k K1Empty = k 91 | K1Char a K1Char b = K1Char (RSet.union a b) 92 | 93 | a b = K1Union a b 94 | 95 | -- 96 | some1 :: K1 c a -> K1 c (NonEmpty a) 97 | some1 K1Empty = K1Empty 98 | some1 (KPlus _ k) = K1Map pure (KPlus Greedy k) 99 | some1 k = KPlus Greedy k 100 | 101 | -- | 'few1', not 'some1'. 102 | -- 103 | -- Let's define two similar regexps 104 | -- 105 | -- >>> let re1 = liftF2 (,) (few1 $ char 'a') (some1 $ char 'a') 106 | -- >>> let re2 = liftF2 (,) (some1 $ char 'a') (few1 $ char 'a') 107 | -- 108 | -- Their 'RE' behaviour is the same: 109 | -- 110 | -- >>> C.equivalent (toRE re1) (toRE re2) 111 | -- True 112 | -- 113 | -- >>> map (C.match $ toRE re1) ["aaa","bbb"] 114 | -- [True,False] 115 | -- 116 | -- However, the 'RA' behaviour is different! 117 | -- 118 | -- >>> R.match (toRA re1) "aaaaa" 119 | -- Just ('a' :| "",'a' :| "aaa") 120 | -- 121 | -- >>> R.match (toRA re2) "aaaaa" 122 | -- Just ('a' :| "aaa",'a' :| "") 123 | -- 124 | few1 :: K1 c a -> K1 c (NonEmpty a) 125 | few1 K1Empty = K1Empty 126 | few1 (KPlus _ k) = K1Map pure (KPlus NonGreedy k) 127 | few1 k = KPlus NonGreedy k 128 | 129 | ------------------------------------------------------------------------------- 130 | -- 131 | ------------------------------------------------------------------------------- 132 | 133 | -- | >>> putPretty anyChar 134 | -- ^[^]$ 135 | anyChar :: (Ord c, Enum c, Bounded c) => K1 c c 136 | anyChar = K1Char RSet.full 137 | 138 | -- | >>> putPretty $ oneof ("foobar" :: [Char]) 139 | -- ^[a-bfor]$ 140 | oneof :: (Ord c, Enum c, Foldable f) => f c -> K1 c c 141 | oneof = K1Char . RSet.fromList . toList 142 | 143 | -- | >>> putPretty $ char 'x' 144 | -- ^x$ 145 | char :: (Ord c, Enum c) => c -> K1 c c 146 | char = K1Char . RSet.singleton 147 | 148 | -- | >>> putPretty $ charRange 'a' 'z' 149 | -- ^[a-z]$ 150 | charRange :: (Enum c, Ord c) => c -> c -> K1 c c 151 | charRange a b = K1Char (RSet.singletonRange (a, b)) 152 | 153 | -- | >>> putPretty dot 154 | -- ^.$ 155 | dot :: K1 Char Char 156 | dot = K1Char dotRSet 157 | 158 | -- | >>> putPretty everything1 159 | -- ^[^][^]*$ 160 | everything1 :: (Ord c, Enum c, Bounded c) => K1 c (NonEmpty c) 161 | everything1 = some1 anyChar 162 | 163 | -- | Matches nothing? 164 | isEmpty :: (Ord c, Enum c, Bounded c) => K1 c a -> Bool 165 | isEmpty k = C.equivalent (toRE k) C.empty 166 | 167 | -- | Matches whole input? 168 | isEverything :: (Ord c, Enum c, Bounded c) => K1 c a -> Bool 169 | isEverything k = C.equivalent (toRE k) C.everything 170 | 171 | string :: String -> K1 Char (NonEmpty Char) 172 | string [] = error "panic! K1.string []" 173 | string (x : xs) = K1String (x :| xs) 174 | 175 | ------------------------------------------------------------------------------- 176 | -- Matching 177 | ------------------------------------------------------------------------------- 178 | 179 | -- | Match using @regex-applicative@ 180 | match :: K1 c a -> [c] -> Maybe a 181 | match = R.match . toRA 182 | 183 | ------------------------------------------------------------------------------- 184 | -- RE 185 | ------------------------------------------------------------------------------- 186 | 187 | -- | Convert to 'RE'. 188 | -- 189 | -- >>> putPretty (toRE $ some1 (string "foo") :: RE.RE Char) 190 | -- ^foo(foo)*$ 191 | -- 192 | toRE :: (Ord c, Enum c, Bounded c) => K1 c a -> RE.RE c 193 | toRE = toKleene 194 | 195 | -- | Convert to any 'Kleene' 196 | toKleene :: C.FiniteKleene c k => K1 c a -> k 197 | toKleene (K1Map _ a) = toKleene a 198 | toKleene (K1Union a b) = C.unions [toKleene a, toKleene b] 199 | toKleene (K1Append _ a b) = C.appends [toKleene a, toKleene b] 200 | toKleene (KPlus _ a) = let k = toKleene a in C.appends [k, C.star k] 201 | toKleene (K1String s) = C.appends (map C.char $ NE.toList s) 202 | toKleene K1Empty = C.empty 203 | toKleene (K1Char cs) = C.fromRSet cs 204 | 205 | ------------------------------------------------------------------------------- 206 | -- regex-applicative 207 | ------------------------------------------------------------------------------- 208 | 209 | -- | Convert 'K' to 'R.RE' from @regex-applicative@. 210 | -- 211 | -- >>> R.match (toRA (string "xx" .> everything1 <. string "zz" :: K1 Char (NonEmpty Char))) "xxyyzyyzz" 212 | -- Just ('y' :| "yzyy") 213 | -- 214 | -- See also 'match'. 215 | -- 216 | toRA :: K1 c a -> R.RE c a 217 | toRA K1Empty = empty 218 | toRA (K1Char cs) = R.psym (\c -> RSet.member c cs) 219 | toRA (K1Append f a b) = liftA2 f (toRA a) (toRA b) 220 | toRA (K1Union a b) = toRA a <|> toRA b 221 | toRA (KPlus Greedy a) = (:|) <$> toRA a <*> many (toRA a) 222 | toRA (KPlus NonGreedy a) = (:|) <$> toRA a <*> R.few (toRA a) 223 | toRA (K1Map f a) = fmap f (toRA a) 224 | toRA (K1String (x :| xs)) = (:|) <$> R.sym x <*> R.string xs 225 | 226 | ------------------------------------------------------------------------------- 227 | -- nullableProof 228 | ------------------------------------------------------------------------------- 229 | 230 | -- | 231 | -- >>> putPretty $ nullableProof (pure True) 232 | -- Right 1 , ^[]$ 233 | -- 234 | -- >>> putPretty $ nullableProof (many "xyz" :: K Char [String]) 235 | -- Right [] , ^xyz(xyz)*$ 236 | -- 237 | -- >>> putPretty $ nullableProof (many $ toList <$> optional "x" <|> many "yz" :: K Char [[String]]) 238 | -- Right [] , ^(x|yz(yz)*)(x|yz(yz)*)*$ 239 | -- 240 | nullableProof :: K c a -> Either (K1 c a) (a, K1 c a) 241 | nullableProof KEmpty = Left K1Empty 242 | nullableProof (KPure x) = Right (x, K1Empty) 243 | nullableProof (KChar c) = Left (K1Char c) 244 | 245 | nullableProof (KAppend f a b) = case (nullableProof a, nullableProof b) of 246 | (Left x, Left y) -> Left (K1Append f x y) 247 | (Left x, Right (y', y)) -> Left ((`f` y') <$> x K1Append f x y) 248 | (Right (x', x), Left y) -> Left (K1Append f x y f x' <$> y) 249 | (Right (x', x), Right (y', y)) -> Right 250 | (f x' y' 251 | , K1Append f x y 252 | flip f y' <$> x 253 | f x' <$> y 254 | ) 255 | 256 | nullableProof (KUnion a b) = case (nullableProof a, nullableProof b) of 257 | (Left x', Left _) -> Left x' 258 | (Right (x, x'), Left y') -> Right (x, x' y') 259 | (Left x', Right (y, y')) -> Right (y, x' y') 260 | (Right (x, x'), Right (_, y')) -> Right (x, x' y') 261 | 262 | nullableProof (KStar g a) = case nullableProof a of 263 | Left x -> Right ([], NE.toList <$> star1 x) 264 | Right (_, x) -> Right ([], NE.toList <$> star1 x) -- note, we don't left recurse 265 | where 266 | star1 = case g of 267 | Greedy -> some1 268 | NonGreedy -> few1 269 | 270 | nullableProof (KMap f a) = case nullableProof a of 271 | Right (x, x') -> Right (f x, fmap f x') 272 | Left x' -> Left (fmap f x') 273 | 274 | nullableProof (KString []) = Right ([], K1Empty) 275 | nullableProof (KString (c : cs)) = Left (NE.toList <$> K1String (c :| cs)) 276 | 277 | ------------------------------------------------------------------------------- 278 | -- JavaScript 279 | ------------------------------------------------------------------------------- 280 | 281 | -- | Convert to non-matching JavaScript string which can be used 282 | -- as an argument to @new RegExp@ 283 | -- 284 | -- >>> putPretty ("foobar" :: K Char String) 285 | -- ^foobar$ 286 | -- 287 | -- >>> putPretty $ many ("foobar" :: K Char String) 288 | -- ^(foobar)*$ 289 | -- 290 | instance c ~ Char => Pretty (K1 c a) where 291 | pretty = pretty . toRE 292 | -------------------------------------------------------------------------------- /src/Kleene/Internal/Functor.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | {-# LANGUAGE GADTs #-} 3 | {-# LANGUAGE Safe #-} 4 | {-# OPTIONS_HADDOCK not-home #-} 5 | module Kleene.Internal.Functor ( 6 | K (..), 7 | Greediness (..), 8 | -- * Constructors 9 | few, 10 | anyChar, 11 | oneof, 12 | char, 13 | charRange, 14 | dot, 15 | everything, 16 | everything1, 17 | -- * Queries 18 | isEmpty, 19 | isEverything, 20 | -- * Matching 21 | match, 22 | -- * Conversions 23 | toRE, 24 | toKleene, 25 | fromRE, 26 | toRA, 27 | ) where 28 | 29 | import Prelude () 30 | import Prelude.Compat 31 | 32 | import Control.Applicative (Alternative (..), liftA2) 33 | import Data.Foldable (toList) 34 | import Data.Functor.Apply (Apply (..)) 35 | import Data.RangeSet.Map (RSet) 36 | import Data.String (IsString (..)) 37 | 38 | import qualified Data.Functor.Alt as Alt 39 | import qualified Data.RangeSet.Map as RSet 40 | import qualified Text.Regex.Applicative as R 41 | 42 | import qualified Kleene.Classes as C 43 | import Kleene.Internal.Pretty 44 | import Kleene.Internal.Sets 45 | import qualified Kleene.RE as RE 46 | 47 | 48 | -- $setup 49 | -- 50 | -- >>> import Control.Applicative (Alternative (..), liftA2) 51 | -- >>> import Data.Semigroup (Semigroup (..)) 52 | -- >>> import Kleene.Internal.Pretty (putPretty) 53 | -- >>> import qualified Kleene.Classes as C 54 | -- >>> import qualified Kleene.RE as RE 55 | -- >>> import qualified Text.Regex.Applicative as R 56 | 57 | ------------------------------------------------------------------------------- 58 | -- Functor RE 59 | ------------------------------------------------------------------------------- 60 | 61 | -- | Star behaviour 62 | data Greediness 63 | = Greedy -- ^ 'many' 64 | | NonGreedy -- ^ 'few' 65 | deriving (Eq, Ord, Show, Enum, Bounded) 66 | 67 | -- | 'Applicative' 'Functor' regular expression. 68 | data K c a where 69 | KEmpty :: K c a 70 | KPure :: a -> K c a 71 | KChar :: (Ord c, Enum c) => RSet c -> K c c 72 | KAppend :: (a -> b -> r) -> K c a -> K c b -> K c r 73 | KUnion :: K c a -> K c a -> K c a 74 | KStar :: Greediness -> K c a -> K c [a] 75 | 76 | -- optimisations 77 | KMap :: (a -> b) -> K c a -> K c b -- could use Pure and Append 78 | KString :: Eq c => [c] -> K c [c] -- could use Char and Append 79 | 80 | instance (c ~ Char, IsString a) => IsString (K c a) where 81 | fromString s = KMap fromString (KString s) 82 | 83 | instance Functor (K c) where 84 | fmap _ KEmpty = KEmpty 85 | fmap f (KPure x) = KPure (f x) 86 | fmap f (KMap g k) = KMap (f . g) k 87 | fmap f (KAppend g a b) = KAppend (\x y -> f (g x y)) a b 88 | fmap f k = KMap f k 89 | 90 | instance Apply (K c) where 91 | KEmpty <.> _ = KEmpty 92 | _ <.> KEmpty = KEmpty 93 | 94 | KPure f <.> k = fmap f k 95 | k <.> KPure x = fmap ($ x) k 96 | 97 | f <.> x = KAppend ($) f x 98 | 99 | liftF2 = KAppend 100 | 101 | instance Applicative (K c) where 102 | pure = KPure 103 | (<*>) = (<.>) 104 | 105 | #if MIN_VERSION_base(4,10,0) 106 | liftA2 = liftF2 107 | #endif 108 | 109 | instance Alt.Alt (K c) where 110 | KEmpty k = k 111 | k KEmpty = k 112 | KChar a KChar b = KChar (RSet.union a b) 113 | 114 | a b = KUnion a b 115 | 116 | many KEmpty = KPure [] 117 | many (KStar _ k) = KMap pure (KStar Greedy k) 118 | many k = KStar Greedy k 119 | 120 | some KEmpty = KEmpty 121 | some (KStar _ k) = KMap pure (KStar Greedy k) 122 | some k = liftA2 (:) k (KStar Greedy k) 123 | 124 | instance Alternative (K c) where 125 | empty = KEmpty 126 | (<|>) = (Alt.) 127 | some = Alt.some 128 | many = Alt.many 129 | 130 | -- | 'few', not 'many'. 131 | -- 132 | -- Let's define two similar regexps 133 | -- 134 | -- >>> let re1 = liftA2 (,) (few $ char 'a') (many $ char 'a') 135 | -- >>> let re2 = liftA2 (,) (many $ char 'a') (few $ char 'a') 136 | -- 137 | -- Their 'RE' behaviour is the same: 138 | -- 139 | -- >>> C.equivalent (toRE re1) (toRE re2) 140 | -- True 141 | -- 142 | -- >>> map (C.match $ toRE re1) ["aaa","bbb"] 143 | -- [True,False] 144 | -- 145 | -- However, the 'RA' behaviour is different! 146 | -- 147 | -- >>> R.match (toRA re1) "aaaa" 148 | -- Just ("","aaaa") 149 | -- 150 | -- >>> R.match (toRA re2) "aaaa" 151 | -- Just ("aaaa","") 152 | -- 153 | few :: K c a -> K c [a] 154 | few KEmpty = KPure [] 155 | few (KStar _ k) = KMap pure (KStar NonGreedy k) 156 | few k = KStar NonGreedy k 157 | 158 | ------------------------------------------------------------------------------- 159 | -- 160 | ------------------------------------------------------------------------------- 161 | 162 | -- | >>> putPretty anyChar 163 | -- ^[^]$ 164 | anyChar :: (Ord c, Enum c, Bounded c) => K c c 165 | anyChar = KChar RSet.full 166 | 167 | -- | >>> putPretty $ oneof ("foobar" :: [Char]) 168 | -- ^[a-bfor]$ 169 | oneof :: (Ord c, Enum c, Foldable f) => f c -> K c c 170 | oneof = KChar . RSet.fromList . toList 171 | 172 | -- | >>> putPretty $ char 'x' 173 | -- ^x$ 174 | char :: (Ord c, Enum c) => c -> K c c 175 | char = KChar . RSet.singleton 176 | 177 | -- | >>> putPretty $ charRange 'a' 'z' 178 | -- ^[a-z]$ 179 | charRange :: (Enum c, Ord c) => c -> c -> K c c 180 | charRange a b = KChar (RSet.singletonRange (a, b)) 181 | 182 | -- | >>> putPretty dot 183 | -- ^.$ 184 | dot :: K Char Char 185 | dot = KChar dotRSet 186 | 187 | -- | >>> putPretty everything 188 | -- ^[^]*$ 189 | everything :: (Ord c, Enum c, Bounded c) => K c [c] 190 | everything = many anyChar 191 | 192 | -- | >>> putPretty everything1 193 | -- ^[^][^]*$ 194 | everything1 :: (Ord c, Enum c, Bounded c) => K c [c] 195 | everything1 = some anyChar 196 | 197 | -- | Matches nothing? 198 | isEmpty :: (Ord c, Enum c, Bounded c) => K c a -> Bool 199 | isEmpty k = C.equivalent (toRE k) C.empty 200 | 201 | -- | Matches whole input? 202 | isEverything :: (Ord c, Enum c, Bounded c) => K c a -> Bool 203 | isEverything k = C.equivalent (toRE k) C.everything 204 | 205 | ------------------------------------------------------------------------------- 206 | -- Matching 207 | ------------------------------------------------------------------------------- 208 | 209 | -- | Match using @regex-applicative@ 210 | match :: K c a -> [c] -> Maybe a 211 | match = R.match . toRA 212 | 213 | ------------------------------------------------------------------------------- 214 | -- RE 215 | ------------------------------------------------------------------------------- 216 | 217 | -- | Convert to 'RE'. 218 | -- 219 | -- >>> putPretty (toRE $ many "foo" :: RE.RE Char) 220 | -- ^(foo)*$ 221 | -- 222 | toRE :: (Ord c, Enum c, Bounded c) => K c a -> RE.RE c 223 | toRE = toKleene 224 | 225 | -- | Convert to any 'Kleene' 226 | toKleene :: C.FiniteKleene c k => K c a -> k 227 | toKleene (KMap _ a) = toKleene a 228 | toKleene (KUnion a b) = C.unions [toKleene a, toKleene b] 229 | toKleene (KAppend _ a b) = C.appends [toKleene a, toKleene b] 230 | toKleene (KStar _ a) = C.star (toKleene a) 231 | toKleene (KString s) = C.appends (map C.char s) 232 | toKleene KEmpty = C.empty 233 | toKleene (KPure _) = C.eps 234 | toKleene (KChar cs) = C.fromRSet cs 235 | 236 | -- | Convert from 'RE'. 237 | -- 238 | -- /Note:/ all 'RE.REStar's are converted to 'Greedy' ones, 239 | -- it doesn't matter, as we don't capture anything. 240 | -- 241 | -- >>> match (fromRE "foobar") "foobar" 242 | -- Just "foobar" 243 | -- 244 | -- >>> match (fromRE $ C.star "a" <> C.star "a") "aaaa" 245 | -- Just "aaaa" 246 | -- 247 | fromRE :: (Ord c, Enum c) => RE.RE c -> K c [c] 248 | fromRE (RE.REChars cs) = pure <$> KChar cs 249 | fromRE (RE.REAppend rs) = concat <$> traverse fromRE rs 250 | fromRE (RE.REUnion cs rs) = foldr (KUnion . fromRE) (pure <$> KChar cs) (toList rs) 251 | fromRE (RE.REStar r) = concat <$> KStar Greedy (fromRE r) 252 | 253 | ------------------------------------------------------------------------------- 254 | -- regex-applicative 255 | ------------------------------------------------------------------------------- 256 | 257 | -- | Convert 'K' to 'R.RE' from @regex-applicative@. 258 | -- 259 | -- >>> R.match (toRA ("xx" *> everything <* "zz" :: K Char String)) "xxyyyzz" 260 | -- Just "yyy" 261 | -- 262 | -- See also 'match'. 263 | -- 264 | toRA :: K c a -> R.RE c a 265 | toRA KEmpty = empty 266 | toRA (KPure x) = pure x 267 | toRA (KChar cs) = R.psym (\c -> RSet.member c cs) 268 | toRA (KAppend f a b) = liftA2 f (toRA a) (toRA b) 269 | toRA (KUnion a b) = toRA a <|> toRA b 270 | toRA (KStar Greedy a) = many (toRA a) 271 | toRA (KStar NonGreedy a) = R.few (toRA a) 272 | toRA (KMap f a) = fmap f (toRA a) 273 | toRA (KString s) = R.string s 274 | 275 | ------------------------------------------------------------------------------- 276 | -- JavaScript 277 | ------------------------------------------------------------------------------- 278 | 279 | -- | Convert to non-matching JavaScript string which can be used 280 | -- as an argument to @new RegExp@ 281 | -- 282 | -- >>> putPretty ("foobar" :: K Char String) 283 | -- ^foobar$ 284 | -- 285 | -- >>> putPretty $ many ("foobar" :: K Char String) 286 | -- ^(foobar)*$ 287 | -- 288 | instance c ~ Char => Pretty (K c a) where 289 | pretty = pretty . toRE 290 | -------------------------------------------------------------------------------- /src/Kleene/Internal/Partition.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Safe #-} 2 | {-# OPTIONS_HADDOCK not-home #-} 3 | module Kleene.Internal.Partition where 4 | 5 | import Data.Semigroup (Semigroup (..)) 6 | import Prelude () 7 | import Prelude.Compat 8 | 9 | import Data.Foldable (toList) 10 | import Data.List.NonEmpty (NonEmpty (..)) 11 | import Data.RangeSet.Map (RSet) 12 | import Data.Set (Set) 13 | 14 | import qualified Data.Function.Step.Discrete.Closed as SF 15 | import qualified Data.List.NonEmpty as NE 16 | import qualified Data.RangeSet.Map as RSet 17 | import qualified Data.Set as Set 18 | 19 | import Test.QuickCheck 20 | 21 | -- $setup 22 | -- >>> import Data.Word 23 | -- >>> import Data.Semigroup (Semigroup (..)) 24 | -- >>> import Test.QuickCheck (Arbitrary (..), (===), (==>)) 25 | -- >>> import Data.RangeSet.Map (RSet) 26 | -- >>> import qualified Data.RangeSet.Map as RSet 27 | -- 28 | -- >>> let asPartitionChar :: Partition Char -> Partition Char; asPartitionChar = id 29 | -- >>> instance (Ord a, Enum a, Arbitrary a) => Arbitrary (RSet a) where arbitrary = fmap RSet.fromRangeList arbitrary 30 | 31 | -- | 'Partition' devides type into disjoint connected partitions. 32 | -- 33 | -- /Note:/ we could have non-connecter partitions too, 34 | -- but that would be more complicated. 35 | -- This variant is correct by construction, but less precise. 36 | -- 37 | -- It's enought to store last element of each piece. 38 | -- 39 | -- @'Partition' (fromList [x1, x2, x3]) :: 'Partition' s@ describes a partition of /Set/ @s@, as 40 | -- 41 | -- \[ 42 | -- \{ x \mid x \le x_1 \} \cup 43 | -- \{ x \mid x_1 < x \le x_2 \} \cup 44 | -- \{ x \mid x_2 < x \le x_3 \} \cup 45 | -- \{ x \mid x_3 < x \} 46 | -- \] 47 | -- 48 | -- /Note:/ it's enough to check upper bound conditions only if checks are performed in order. 49 | -- 50 | -- /Invariant:/ 'maxBound' is not in the set. 51 | -- 52 | newtype Partition a = Partition { unPartition :: Set a } 53 | deriving (Eq, Ord) 54 | 55 | -- | Check invariant. 56 | invariant :: (Ord a, Bounded a) => Partition a -> Bool 57 | invariant (Partition xs) = Set.notMember maxBound xs 58 | 59 | ------------------------------------------------------------------------------- 60 | -- Instances 61 | ------------------------------------------------------------------------------- 62 | 63 | instance Show a => Show (Partition a) where 64 | showsPrec d (Partition xs) 65 | = showParen (d > 10) 66 | $ showString "fromSeparators " 67 | . showsPrec 11 (Set.toList xs) 68 | 69 | -- | prop> invariant (asPartitionChar p) 70 | instance (Enum a, Bounded a, Ord a, Arbitrary a) => Arbitrary (Partition a) where 71 | arbitrary = fromSeparators <$> arbitrary 72 | 73 | -- | See 'wedge'. 74 | instance (Enum a, Bounded a, Ord a) => Semigroup (Partition a) where 75 | (<>) = wedge 76 | 77 | instance (Enum a, Bounded a, Ord a) => Monoid (Partition a) where 78 | mempty = whole 79 | mappend = (<>) 80 | 81 | ------------------------------------------------------------------------------- 82 | -- Constructors 83 | ------------------------------------------------------------------------------- 84 | 85 | fromSeparators :: (Enum a, Bounded a, Ord a) => [a] -> Partition a 86 | fromSeparators = Partition . Set.fromList . filter (/= maxBound) 87 | 88 | -- | Construct 'Partition' from list of 'RSet's. 89 | -- 90 | -- RSet intervals are closed on both sides. 91 | fromRSets :: (Enum a, Bounded a, Ord a) => [RSet a] -> Partition a 92 | fromRSets rs = Partition $ Set.fromList $ concat 93 | [ (if x == minBound then [] else [pred x]) ++ 94 | (if y == maxBound then [] else [y]) 95 | | r <- rs 96 | , (x, y) <- RSet.toRangeList r 97 | ] 98 | 99 | fromRSet :: (Enum a, Bounded a, Ord a) => RSet a -> Partition a 100 | fromRSet r 101 | | r == RSet.empty = whole 102 | | r == RSet.full = whole 103 | | otherwise = fromRSets [r] 104 | 105 | whole :: Partition a 106 | whole = Partition Set.empty 107 | 108 | ------------------------------------------------------------------------------- 109 | -- Querying 110 | ------------------------------------------------------------------------------- 111 | 112 | -- | Count of sets in a 'Partition'. 113 | -- 114 | -- >>> size whole 115 | -- 1 116 | -- 117 | -- >>> size $ split (10 :: Word8) 118 | -- 2 119 | -- 120 | -- prop> size (asPartitionChar p) >= 1 121 | -- 122 | size :: Partition a -> Int 123 | size (Partition xs) = 1 + length xs 124 | 125 | -- | Extract examples from each subset in a 'Partition'. 126 | -- 127 | -- >>> examples $ split (10 :: Word8) 128 | -- fromList [10,255] 129 | -- 130 | -- >>> examples $ split (10 :: Word8) <> split 20 131 | -- fromList [10,20,255] 132 | -- 133 | -- prop> invariant p ==> size (asPartitionChar p) === length (examples p) 134 | -- 135 | examples :: (Bounded a, Enum a, Ord a) => Partition a -> Set a 136 | examples (Partition xs) = Set.insert maxBound xs 137 | 138 | -- | 139 | -- 140 | -- prop> all (uncurry (<=)) $ intervals $ asPartitionChar p 141 | intervals :: (Enum a, Bounded a, Ord a) => Partition a -> NonEmpty (a, a) 142 | intervals (Partition xs) = go minBound (toList xs) where 143 | go x [] = (x, maxBound) :| [] 144 | go x (y : ys) = (x, y) `NE.cons` go y ys 145 | 146 | ------------------------------------------------------------------------------- 147 | -- 148 | -- Operations 149 | ------------------------------------------------------------------------------- 150 | 151 | -- | Wedge partitions. 152 | -- 153 | -- >>> split (10 :: Word8) <> split 20 154 | -- fromSeparators [10,20] 155 | -- 156 | -- prop> whole `wedge` (p :: Partition Char) === p 157 | -- prop> (p :: Partition Char) <> whole === p 158 | -- prop> asPartitionChar p <> q === q <> p 159 | -- prop> asPartitionChar p <> p === p 160 | -- prop> invariant $ asPartitionChar p <> q 161 | -- 162 | wedge :: Ord a => Partition a -> Partition a -> Partition a 163 | wedge (Partition as) (Partition bs) = Partition (Set.union as bs) 164 | 165 | -- | Simplest partition: given @x@ partition space into @[min..x) and [x .. max]@ 166 | -- 167 | -- >>> split (128 :: Word8) 168 | -- fromSeparators [128] 169 | -- 170 | split :: (Enum a, Bounded a, Eq a) => a -> Partition a 171 | split x 172 | | x == minBound = Partition Set.empty 173 | | otherwise = Partition (Set.singleton x) 174 | 175 | ------------------------------------------------------------------------------- 176 | -- Conversion 177 | ------------------------------------------------------------------------------- 178 | 179 | -- | Make a step function. 180 | toSF :: (Enum a, Bounded a, Ord a) => (a -> b) -> Partition a -> SF.SF a b 181 | toSF f (Partition p) = SF.fromList 182 | (map (\k -> (k, f k)) $ toList as) 183 | (f maxBound) 184 | where 185 | as = toList p 186 | -------------------------------------------------------------------------------- /src/Kleene/Internal/Pretty.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE GADTs #-} 2 | {-# LANGUAGE Safe #-} 3 | {-# OPTIONS_HADDOCK not-home #-} 4 | module Kleene.Internal.Pretty ( 5 | Pretty (..), 6 | putPretty, 7 | ) where 8 | 9 | import Prelude () 10 | import Prelude.Compat 11 | 12 | import Data.Monoid (Endo (..)) 13 | import Data.RangeSet.Map (RSet) 14 | import Kleene.Internal.Sets (dotRSet) 15 | 16 | import qualified Data.RangeSet.Map as RSet 17 | 18 | ------------------------------------------------------------------------------- 19 | -- Pretty 20 | ------------------------------------------------------------------------------- 21 | 22 | -- | Pretty class. 23 | -- 24 | -- For @'pretty' :: 'Kleene.RE.RE' -> 'String'@ gives a 25 | -- representation accepted by many regex engines. 26 | -- 27 | class Pretty a where 28 | pretty :: a -> String 29 | pretty x = prettyS x "" 30 | 31 | prettyS :: a -> ShowS 32 | prettyS = showString . pretty 33 | 34 | {-# MINIMAL pretty | prettyS #-} 35 | 36 | -- | @'putStrLn' . 'pretty'@ 37 | putPretty :: Pretty a => a -> IO () 38 | putPretty = putStrLn . pretty 39 | 40 | instance c ~ Char => Pretty (RSet c) where 41 | prettyS cs 42 | | RSet.size cs == 1 = prettyS (head (RSet.elems cs)) 43 | | cs == dotRSet = showChar '.' 44 | | ics == dotRSet = showString "[^.]" 45 | | RSet.size cs < RSet.size ics = prettyRSet True cs 46 | | otherwise = prettyRSet False ics 47 | where 48 | ics = RSet.complement cs 49 | 50 | prettyRSet :: Bool -> RSet Char -> ShowS 51 | prettyRSet c cs 52 | = showChar '[' 53 | . (if c then id else showChar '^') 54 | . appEndo (foldMap (Endo . f) (RSet.toRangeList cs)) 55 | . showChar ']' 56 | where 57 | f (a, b) 58 | | a == b = prettyS a 59 | | otherwise = prettyS a . showChar '-' . prettyS b 60 | 61 | -- | Escapes special regexp characters 62 | instance Pretty Char where 63 | prettyS '.' = showString "\\." 64 | prettyS '-' = showString "\\-" 65 | prettyS '^' = showString "\\^" 66 | prettyS '*' = showString "\\*" 67 | prettyS '+' = showString "\\+" 68 | prettyS '?' = showString "\\?" 69 | prettyS '(' = showString "\\(" 70 | prettyS ')' = showString "\\)" 71 | prettyS '[' = showString "\\[" 72 | prettyS ']' = showString "\\]" 73 | prettyS '\r' = showString "\\r" 74 | prettyS '\n' = showString "\\n" 75 | prettyS '\t' = showString "\\t" 76 | prettyS c = showChar c 77 | 78 | instance Pretty Bool where 79 | prettyS True = showChar '1' 80 | prettyS False = showChar '0' 81 | 82 | instance Pretty () where 83 | prettyS _ = showChar '.' 84 | 85 | instance Pretty a => Pretty (Maybe a) where 86 | prettyS Nothing = showString "Nothing" 87 | prettyS (Just x) = prettyS x 88 | 89 | instance (Pretty a, Pretty b) => Pretty (Either a b) where 90 | prettyS (Left x) = showString "Left " . prettyS x 91 | prettyS (Right x) = showString "Right " . prettyS x 92 | 93 | instance (Pretty a, Pretty b) => Pretty (a, b) where 94 | prettyS (x, y) = prettyS x . showString " , " . prettyS y 95 | 96 | instance Show a => Pretty [a] where 97 | prettyS = showList 98 | -------------------------------------------------------------------------------- /src/Kleene/Internal/RE.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE FlexibleInstances #-} 3 | {-# LANGUAGE FunctionalDependencies #-} 4 | {-# LANGUAGE GADTs #-} 5 | {-# LANGUAGE Safe #-} 6 | {-# LANGUAGE ScopedTypeVariables #-} 7 | {-# OPTIONS_HADDOCK not-home #-} 8 | module Kleene.Internal.RE ( 9 | RE (..), 10 | -- * Construction 11 | -- 12 | -- | Binary operators are 13 | -- 14 | -- * '<>' for append 15 | -- * '\/' for union 16 | -- 17 | empty, 18 | eps, 19 | everything, 20 | char, 21 | charRange, 22 | anyChar, 23 | appends, 24 | unions, 25 | star, 26 | string, 27 | -- * Derivative 28 | nullable, 29 | derivate, 30 | -- * Transition map 31 | transitionMap, 32 | leadingChars, 33 | -- * Equivalence 34 | equivalent, 35 | -- * Generation 36 | generate, 37 | -- * Other 38 | isEmpty, 39 | nullableProof, 40 | ) where 41 | 42 | import Data.Semigroup (Semigroup (..)) 43 | import Prelude () 44 | import Prelude.Compat 45 | 46 | import Control.Applicative (liftA2) 47 | import Data.Foldable (toList) 48 | import Data.List (foldl') 49 | import Data.Map (Map) 50 | import Data.RangeSet.Map (RSet) 51 | import Data.Set (Set) 52 | import Data.String (IsString (..)) 53 | 54 | import qualified Data.Function.Step.Discrete.Closed as SF 55 | import qualified Data.Map as Map 56 | import qualified Data.RangeSet.Map as RSet 57 | import qualified Data.Set as Set 58 | import qualified Test.QuickCheck as QC 59 | import qualified Test.QuickCheck.Gen as QC (unGen) 60 | import qualified Test.QuickCheck.Random as QC (mkQCGen) 61 | 62 | import qualified Kleene.Classes as C 63 | import qualified Kleene.Internal.Partition as P 64 | import Kleene.Internal.Pretty 65 | 66 | -- $setup 67 | -- >>> :set -XOverloadedStrings 68 | -- >>> import Control.Monad (void) 69 | -- >>> import Data.Foldable (traverse_) 70 | -- >>> import Data.List (sort) 71 | -- >>> import Data.Maybe (isJust) 72 | -- >>> import Data.Semigroup (Semigroup (..)) 73 | -- 74 | -- >>> import Test.QuickCheck ((===)) 75 | -- >>> import qualified Test.QuickCheck as QC 76 | -- >>> import qualified Data.Map as Map 77 | -- >>> import qualified Data.Function.Step.Discrete.Closed as SF 78 | -- 79 | -- >>> import Kleene.Classes (match) 80 | -- >>> import Kleene.Internal.Pretty (putPretty, pretty) 81 | -- >>> import Algebra.Lattice (bottom, (\/)) 82 | -- >>> import Kleene.RE () 83 | -- 84 | -- >>> let asREChar :: RE Char -> RE Char; asREChar = id 85 | 86 | ------------------------------------------------------------------------------- 87 | -- RE 88 | ------------------------------------------------------------------------------- 89 | 90 | -- | Regular expression 91 | -- 92 | -- Constructors are exposed, but you should use 93 | -- smart constructors in this module to construct 'RE'. 94 | -- 95 | -- The 'Eq' and 'Ord' instances are structural. 96 | -- The 'Kleene' etc constructors do "weak normalisation", so for values 97 | -- constructed using those operations 'Eq' witnesses "weak equivalence". 98 | -- See 'equivalent' for regular-expression equivalence. 99 | -- 100 | -- Structure is exposed in "Kleene.RE" module but consider constructors as 101 | -- half-internal. There are soft-invariants, but violating them shouldn't 102 | -- break anything in the package. (e.g. 'transitionMap' will eventually 103 | -- terminate, but may create more redundant states if starting regexp is not 104 | -- "weakly normalised"). 105 | -- 106 | data RE c 107 | = REChars (RSet c) -- ^ Single character 108 | | REAppend [RE c] -- ^ Concatenation 109 | | REUnion (RSet c) (Set (RE c)) -- ^ Union 110 | | REStar (RE c) -- ^ Kleene star 111 | deriving (Eq, Ord, Show) 112 | 113 | ------------------------------------------------------------------------------- 114 | -- Smart constructor 115 | ------------------------------------------------------------------------------- 116 | 117 | -- | Empty regex. Doesn't accept anything. 118 | -- 119 | -- >>> putPretty (empty :: RE Char) 120 | -- ^[]$ 121 | -- 122 | -- >>> putPretty (bottom :: RE Char) 123 | -- ^[]$ 124 | -- 125 | -- prop> match (empty :: RE Char) (s :: String) === False 126 | -- 127 | empty :: RE c 128 | empty = REChars RSet.empty 129 | 130 | -- | Everything. 131 | -- 132 | -- >>> putPretty everything 133 | -- ^[^]*$ 134 | -- 135 | -- prop> match (everything :: RE Char) (s :: String) === True 136 | -- 137 | everything :: Bounded c => RE c 138 | everything = REStar (REChars RSet.full) 139 | 140 | -- | Empty string. /Note:/ different than 'empty'. 141 | -- 142 | -- >>> putPretty eps 143 | -- ^$ 144 | -- 145 | -- >>> putPretty (mempty :: RE Char) 146 | -- ^$ 147 | -- 148 | -- prop> match (eps :: RE Char) s === null (s :: String) 149 | -- 150 | eps :: RE c 151 | eps = REAppend [] 152 | 153 | -- | 154 | -- 155 | -- >>> putPretty (char 'x') 156 | -- ^x$ 157 | -- 158 | char :: c -> RE c 159 | char = REChars . RSet.singleton 160 | 161 | -- | 162 | -- 163 | -- >>> putPretty $ charRange 'a' 'z' 164 | -- ^[a-z]$ 165 | -- 166 | charRange :: Ord c => c -> c -> RE c 167 | charRange c c' = REChars $ RSet.singletonRange (c, c') 168 | 169 | -- | Any character. /Note:/ different than dot! 170 | -- 171 | -- >>> putPretty anyChar 172 | -- ^[^]$ 173 | -- 174 | anyChar :: Bounded c => RE c 175 | anyChar = REChars RSet.full 176 | 177 | -- | Concatenate regular expressions. 178 | -- 179 | -- prop> (asREChar r <> s) <> t === r <> (s <> t) 180 | -- 181 | -- prop> asREChar r <> empty === empty 182 | -- prop> empty <> asREChar r === empty 183 | -- 184 | -- prop> asREChar r <> eps === r 185 | -- prop> eps <> asREChar r === r 186 | -- 187 | appends :: Eq c => [RE c] -> RE c 188 | appends rs0 189 | | elem empty rs1 = empty 190 | | otherwise = case rs1 of 191 | [r] -> r 192 | rs -> REAppend rs 193 | where 194 | rs1 = concatMap f rs0 195 | 196 | f :: RE c -> [RE c] 197 | f (REAppend rs) = concatMap f rs 198 | f r = [r] 199 | 200 | -- | Union of regular expressions. 201 | -- 202 | -- prop> asREChar r \/ r === r 203 | -- prop> asREChar r \/ s === s \/ r 204 | -- prop> (asREChar r \/ s) \/ t === r \/ (s \/ t) 205 | -- 206 | -- prop> empty \/ asREChar r === r 207 | -- prop> asREChar r \/ empty === r 208 | -- 209 | -- prop> everything \/ asREChar r === everything 210 | -- prop> asREChar r \/ everything === everything 211 | -- 212 | unions :: (Ord c, Enum c, Bounded c) => [RE c] -> RE c 213 | unions = uncurry mk . foldMap f where 214 | mk cs rss 215 | | Set.null rss = REChars cs 216 | | Set.member everything rss = everything 217 | | RSet.null cs = case Set.toList rss of 218 | [] -> empty 219 | [r] -> r 220 | _ -> REUnion cs rss 221 | | otherwise = REUnion cs rss 222 | 223 | f (REUnion cs rs) = (cs, rs) 224 | f (REChars cs) = (cs, Set.empty) 225 | f r = (mempty, Set.singleton r) 226 | 227 | -- | Kleene star. 228 | -- 229 | -- prop> star (star r) === star (asREChar r) 230 | -- 231 | -- prop> star eps === asREChar eps 232 | -- prop> star empty === asREChar eps 233 | -- prop> star anyChar === asREChar everything 234 | -- 235 | -- prop> star (r \/ eps) === star (asREChar r) 236 | -- prop> star (char c \/ eps) === star (asREChar (char c)) 237 | -- prop> star (empty \/ eps) === asREChar eps 238 | -- 239 | star :: Ord c => RE c -> RE c 240 | star r = case r of 241 | REStar _ -> r 242 | REAppend [] -> eps 243 | REChars cs | RSet.null cs -> eps 244 | REUnion cs rs | Set.member eps rs -> case Set.toList rs' of 245 | [] -> star (REChars cs) 246 | [r'] | RSet.null cs -> star r' 247 | _ -> REStar (REUnion cs rs') 248 | where 249 | rs' = Set.delete eps rs 250 | _ -> REStar r 251 | 252 | -- | Literal string. 253 | -- 254 | -- >>> putPretty ("foobar" :: RE Char) 255 | -- ^foobar$ 256 | -- 257 | -- >>> putPretty ("(.)" :: RE Char) 258 | -- ^\(\.\)$ 259 | -- 260 | string :: [c] -> RE c 261 | string [] = eps 262 | string [c] = REChars (RSet.singleton c) 263 | string cs = REAppend $ map (REChars . RSet.singleton) cs 264 | 265 | instance (Ord c, Enum c, Bounded c) => C.Kleene (RE c) where 266 | empty = empty 267 | eps = eps 268 | appends = appends 269 | unions = unions 270 | star = star 271 | 272 | instance (Ord c, Enum c, Bounded c) => C.CharKleene c (RE c) where 273 | char = char 274 | 275 | instance (Ord c, Enum c, Bounded c) => C.FiniteKleene c (RE c) where 276 | everything = everything 277 | charRange = charRange 278 | fromRSet = REChars 279 | anyChar = anyChar 280 | 281 | ------------------------------------------------------------------------------- 282 | -- Pseudo lattice 283 | ------------------------------------------------------------------------------- 284 | 285 | (\/) :: (Ord c, Enum c, Bounded c) => RE c -> RE c -> RE c 286 | r \/ r' = unions [r, r'] 287 | 288 | ------------------------------------------------------------------------------- 289 | -- derivative 290 | ------------------------------------------------------------------------------- 291 | 292 | -- | We say that a regular expression r is nullable if the language it defines 293 | -- contains the empty string. 294 | -- 295 | -- >>> nullable eps 296 | -- True 297 | -- 298 | -- >>> nullable (star "x") 299 | -- True 300 | -- 301 | -- >>> nullable "foo" 302 | -- False 303 | -- 304 | nullable :: RE c -> Bool 305 | nullable (REChars _) = False 306 | nullable (REAppend rs) = all nullable rs 307 | nullable (REUnion _cs rs) = any nullable rs 308 | nullable (REStar _) = True 309 | 310 | -- | Intuitively, the derivative of a language \(\mathcal{L} \subset \Sigma^\star\) 311 | -- with respect to a symbol \(a \in \Sigma\) is the language that includes only 312 | -- those suffixes of strings with a leading symbol \(a\) in \(\mathcal{L}\). 313 | -- 314 | -- >>> putPretty $ derivate 'f' "foobar" 315 | -- ^oobar$ 316 | -- 317 | -- >>> putPretty $ derivate 'x' $ "xyz" \/ "abc" 318 | -- ^yz$ 319 | -- 320 | -- >>> putPretty $ derivate 'x' $ star "xyz" 321 | -- ^yz(xyz)*$ 322 | -- 323 | derivate :: (Ord c, Enum c, Bounded c) => c -> RE c -> RE c 324 | derivate c (REChars cs) = derivateChars c cs 325 | derivate c (REUnion cs rs) = unions $ derivateChars c cs : [ derivate c r | r <- toList rs] 326 | derivate c (REAppend rs) = derivateAppend c rs 327 | derivate c rs@(REStar r) = derivate c r <> rs 328 | 329 | derivateAppend :: (Ord c, Enum c, Bounded c) => c -> [RE c] -> RE c 330 | derivateAppend _ [] = empty 331 | derivateAppend c [r] = derivate c r 332 | derivateAppend c (r:rs) 333 | | nullable r = unions [r' <> appends rs, rs'] 334 | | otherwise = r' <> appends rs 335 | where 336 | r' = derivate c r 337 | rs' = derivateAppend c rs 338 | 339 | derivateChars :: Ord c => c -> RSet c -> RE c 340 | derivateChars c cs 341 | | c `RSet.member` cs = eps 342 | | otherwise = empty 343 | 344 | instance (Ord c, Enum c, Bounded c) => C.Derivate c (RE c) where 345 | nullable = nullable 346 | derivate = derivate 347 | 348 | instance (Ord c, Enum c, Bounded c) => C.Match c (RE c) where 349 | match r = nullable . foldl' (flip derivate) r 350 | 351 | ------------------------------------------------------------------------------- 352 | -- Nullable with proof 353 | ------------------------------------------------------------------------------- 354 | 355 | -- | Not only we can decide whether 'RE' is nullable, we can also 356 | -- remove the empty string: 357 | -- 358 | -- >>> putPretty $ nullableProof eps 359 | -- ^[]$ 360 | -- 361 | -- >>> putPretty $ nullableProof $ star "x" 362 | -- ^xx*$ 363 | -- 364 | -- >>> putPretty $ nullableProof "foo" 365 | -- Nothing 366 | -- 367 | -- 'nullableProof' is consistent with 'nullable': 368 | -- 369 | -- prop> isJust (nullableProof r) === nullable (asREChar r) 370 | -- 371 | -- The returned regular expression is not nullable: 372 | -- 373 | -- prop> maybe True (not . nullable) $ nullableProof $ asREChar r 374 | -- 375 | -- If we union with empty regex, we get a equivalent regular expression 376 | -- we started with: 377 | -- 378 | -- prop> maybe r (eps \/) (nullableProof r) `equivalent` (asREChar r) 379 | -- 380 | nullableProof :: forall c. (Ord c, Enum c, Bounded c) => RE c -> Maybe (RE c) 381 | nullableProof (REChars _) = Nothing 382 | 383 | nullableProof (REAppend []) = Just empty 384 | nullableProof (REAppend xs) 385 | | Just ys <- traverse (\x -> (,) x <$> nullableProof x) xs = Just (go ys) 386 | | otherwise = Nothing 387 | where 388 | go :: [(RE c, RE c)] -> RE c 389 | go rs = unions $ map appends $ tail $ traverse (\(r,r') -> [r,r']) rs 390 | 391 | nullableProof (REUnion cs rs) 392 | | any nullable rs = Just $ REUnion cs $ Set.map (\r -> maybe r id $ nullableProof r) rs 393 | | otherwise = Nothing 394 | 395 | nullableProof (REStar r) 396 | | Just r' <- nullableProof r = Just (r' <> REStar r') 397 | | otherwise = Just (r <> REStar r) 398 | 399 | ------------------------------------------------------------------------------- 400 | -- isEmpty 401 | ------------------------------------------------------------------------------- 402 | 403 | -- | Whether 'RE' is (structurally) equal to 'empty'. 404 | -- 405 | -- prop> isEmpty r === all (not . nullable) (Map.keys $ transitionMap $ asREChar r) 406 | isEmpty :: RE c -> Bool 407 | isEmpty (REChars rs) = RSet.null rs 408 | isEmpty _ = False 409 | 410 | ------------------------------------------------------------------------------- 411 | -- States 412 | ------------------------------------------------------------------------------- 413 | 414 | -- | Transition map. Used to construct 'Kleene.DFA.DFA'. 415 | -- 416 | -- >>> void $ Map.traverseWithKey (\k v -> putStrLn $ pretty k ++ " : " ++ SF.showSF (fmap pretty v)) $ transitionMap ("ab" :: RE Char) 417 | -- ^[]$ : \_ -> "^[]$" 418 | -- ^b$ : \x -> if 419 | -- | x <= 'a' -> "^[]$" 420 | -- | x <= 'b' -> "^$" 421 | -- | otherwise -> "^[]$" 422 | -- ^$ : \_ -> "^[]$" 423 | -- ^ab$ : \x -> if 424 | -- | x <= '`' -> "^[]$" 425 | -- | x <= 'a' -> "^b$" 426 | -- | otherwise -> "^[]$" 427 | -- 428 | transitionMap 429 | :: forall c. (Ord c, Enum c, Bounded c) 430 | => RE c 431 | -> Map (RE c) (SF.SF c (RE c)) 432 | transitionMap re = go Map.empty [re] where 433 | go :: Map (RE c) (SF.SF c (RE c)) 434 | -> [RE c] 435 | -> Map (RE c) (SF.SF c (RE c)) 436 | go !acc [] = acc 437 | go acc (r : rs) 438 | | r `Map.member` acc = go acc rs 439 | | otherwise = go (Map.insert r pm acc) (SF.values pm ++ rs) 440 | where 441 | pm = P.toSF (\c -> derivate c r) (leadingChars r) 442 | 443 | instance (Ord c, Enum c, Bounded c) => C.TransitionMap c (RE c) where 444 | transitionMap = transitionMap 445 | 446 | -- | Leading character sets of regular expression. 447 | -- 448 | -- >>> leadingChars "foo" 449 | -- fromSeparators "ef" 450 | -- 451 | -- >>> leadingChars (star "b" <> star "e") 452 | -- fromSeparators "abde" 453 | -- 454 | -- >>> leadingChars (charRange 'b' 'z') 455 | -- fromSeparators "az" 456 | -- 457 | leadingChars :: (Ord c, Enum c, Bounded c) => RE c -> P.Partition c 458 | leadingChars (REChars cs) = P.fromRSet cs 459 | leadingChars (REUnion cs rs) = P.fromRSet cs <> foldMap leadingChars rs 460 | leadingChars (REStar r) = leadingChars r 461 | leadingChars (REAppend rs) = leadingCharsAppend rs 462 | 463 | leadingCharsAppend :: (Ord c, Enum c, Bounded c) => [RE c] -> P.Partition c 464 | leadingCharsAppend [] = P.whole 465 | leadingCharsAppend (r : rs) 466 | | nullable r = leadingChars r <> leadingCharsAppend rs 467 | | otherwise = leadingChars r 468 | 469 | ------------------------------------------------------------------------------- 470 | -- Equivalence 471 | ------------------------------------------------------------------------------- 472 | 473 | -- | Whether two regexps are equivalent. 474 | -- 475 | -- @ 476 | -- 'equivalent' re1 re2 <=> forall s. 'match' re1 s === 'match' re1 s 477 | -- @ 478 | -- 479 | -- >>> let re1 = star "a" <> "a" 480 | -- >>> let re2 = "a" <> star "a" 481 | -- 482 | -- These are different regular expressions, even we perform 483 | -- some normalisation-on-construction: 484 | -- 485 | -- >>> re1 == re2 486 | -- False 487 | -- 488 | -- They are however equivalent: 489 | -- 490 | -- >>> equivalent re1 re2 491 | -- True 492 | -- 493 | -- The algorithm works by executing 'states' on "product" regexp, 494 | -- and checking whether all resulting states are both accepting or rejecting. 495 | -- 496 | -- @ 497 | -- re1 == re2 ==> 'equivalent' re1 re2 498 | -- @ 499 | -- 500 | -- === More examples 501 | -- 502 | -- >>> let example re1 re2 = putPretty re1 >> putPretty re2 >> return (equivalent re1 re2) 503 | -- >>> example re1 re2 504 | -- ^a*a$ 505 | -- ^aa*$ 506 | -- True 507 | -- 508 | -- >>> example (star "aa") (star "aaa") 509 | -- ^(aa)*$ 510 | -- ^(aaa)*$ 511 | -- False 512 | -- 513 | -- >>> example (star "aa" <> star "aaa") (star "aaa" <> star "aa") 514 | -- ^(aa)*(aaa)*$ 515 | -- ^(aaa)*(aa)*$ 516 | -- True 517 | -- 518 | -- >>> example (star ("a" \/ "b")) (star $ star "a" <> star "b") 519 | -- ^[a-b]*$ 520 | -- ^(a*b*)*$ 521 | -- True 522 | -- 523 | equivalent :: forall c. (Ord c, Enum c, Bounded c) => RE c -> RE c -> Bool 524 | equivalent x0 y0 = go mempty [(x0, y0)] where 525 | go :: Set (RE c, RE c) -> [(RE c, RE c)] -> Bool 526 | go !_ [] = True 527 | go acc (p@(x, y) : zs) 528 | | p `Set.member` acc = go acc zs 529 | -- if two regexps are structurally the same, we don't need to recurse. 530 | | x == y = go (Set.insert p acc) zs 531 | | all agree ps = go (Set.insert p acc) (ps ++ zs) 532 | | otherwise = False 533 | where 534 | cs = toList $ P.examples $ leadingChars x `P.wedge` leadingChars y 535 | ps = map (\c -> (derivate c x, derivate c y)) cs 536 | 537 | agree :: (RE c, RE c) -> Bool 538 | agree (x, y) = nullable x == nullable y 539 | 540 | instance (Ord c, Enum c, Bounded c) => C.Equivalent c (RE c) where 541 | equivalent = equivalent 542 | 543 | ------------------------------------------------------------------------------- 544 | -- Generation 545 | ------------------------------------------------------------------------------- 546 | 547 | -- | Generate random strings of the language @RE c@ describes. 548 | -- 549 | -- >>> let example = traverse_ print . take 3 . generate (curry QC.choose) 42 550 | -- >>> example "abc" 551 | -- "abc" 552 | -- "abc" 553 | -- "abc" 554 | -- 555 | -- >>> example $ star $ "a" \/ "b" 556 | -- "" 557 | -- "bbaabbaaba" 558 | -- "b" 559 | -- 560 | -- >>> example empty 561 | -- 562 | -- prop> all (match r) $ take 10 $ generate (curry QC.choose) 42 (r :: RE Char) 563 | -- 564 | generate 565 | :: (c -> c -> QC.Gen c) -- ^ character range generator 566 | -> Int -- ^ seed 567 | -> RE c 568 | -> [[c]] -- ^ infinite list of results 569 | generate c seed re 570 | | isEmpty re = [] 571 | | otherwise = QC.unGen (QC.infiniteListOf (generator c re)) (QC.mkQCGen seed) 10 572 | 573 | generator 574 | :: (c -> c -> QC.Gen c) 575 | -> RE c 576 | -> QC.Gen [c] 577 | generator c = go where 578 | go (REChars cs) = goChars cs 579 | go (REAppend rs) = concat <$> traverse go rs 580 | go (REUnion cs rs) 581 | | RSet.null cs = QC.oneof [ go r | r <- toList rs ] 582 | | otherwise = QC.oneof $ goChars cs : [ go r | r <- toList rs ] 583 | go (REStar r) = QC.sized $ \n -> do 584 | n' <- QC.choose (0, n) 585 | concat <$> sequence (replicate n' (go r)) 586 | 587 | goChars cs = pure <$> QC.oneof [ c x y | (x,y) <- RSet.toRangeList cs ] 588 | 589 | ------------------------------------------------------------------------------- 590 | -- Instances 591 | ------------------------------------------------------------------------------- 592 | 593 | instance Eq c => Semigroup (RE c) where 594 | r <> r' = appends [r, r'] 595 | 596 | instance Eq c => Monoid (RE c) where 597 | mempty = eps 598 | mappend = (<>) 599 | mconcat = appends 600 | 601 | 602 | 603 | instance c ~ Char => IsString (RE c) where 604 | fromString = string 605 | 606 | instance (Ord c, Enum c, Bounded c, QC.Arbitrary c) => QC.Arbitrary (RE c) where 607 | arbitrary = QC.sized arb where 608 | c :: QC.Gen (RE c) 609 | c = REChars . RSet.fromRangeList <$> QC.arbitrary 610 | 611 | arb :: Int -> QC.Gen (RE c) 612 | arb n | n <= 0 = QC.oneof [c, fmap char QC.arbitrary, pure eps] 613 | | otherwise = QC.oneof 614 | [ c 615 | , pure eps 616 | , fmap char QC.arbitrary 617 | , liftA2 (<>) (arb n2) (arb n2) 618 | , liftA2 (\/) (arb n2) (arb n2) 619 | , fmap star (arb n2) 620 | ] 621 | where 622 | n2 = n `div` 2 623 | 624 | shrink (REUnion _cs rs) = Set.toList rs 625 | shrink (REAppend rs) = rs ++ map appends (QC.shrink rs) 626 | shrink (REStar r) = r : map star (QC.shrink r) 627 | shrink _ = [] 628 | 629 | instance (QC.CoArbitrary c) => QC.CoArbitrary (RE c) where 630 | coarbitrary (REChars cs) = QC.variant (0 :: Int) . QC.coarbitrary (RSet.toRangeList cs) 631 | coarbitrary (REAppend rs) = QC.variant (1 :: Int) . QC.coarbitrary rs 632 | coarbitrary (REUnion cs rs) = QC.variant (2 :: Int) . QC.coarbitrary (RSet.toRangeList cs, Set.toList rs) 633 | coarbitrary (REStar r) = QC.variant (3 :: Int) . QC.coarbitrary r 634 | 635 | ------------------------------------------------------------------------------- 636 | -- JavaScript 637 | ------------------------------------------------------------------------------- 638 | 639 | instance c ~ Char => Pretty (RE c) where 640 | prettyS x = showChar '^' . go False x . showChar '$' 641 | where 642 | go :: Bool -> RE Char -> ShowS 643 | go p (REStar a) 644 | = parens p 645 | $ go True a . showChar '*' 646 | go p (REAppend rs) 647 | = parens p $ goMany id rs 648 | go p (REUnion cs rs) 649 | | RSet.null cs = goUnion p rs 650 | | Set.null rs = prettyS cs 651 | | otherwise = goUnion p (Set.insert (REChars cs) rs) 652 | go _ (REChars cs) 653 | = prettyS cs 654 | 655 | goUnion p rs 656 | | Set.member eps rs = parens p $ goUnion' True . showChar '?' 657 | | otherwise = goUnion' p 658 | where 659 | goUnion' p' = case Set.toList (Set.delete eps rs) of 660 | [] -> go True empty 661 | [r] -> go p' r 662 | (r:rs') -> parens True $ goSome1 (showChar '|') r rs' 663 | 664 | goMany :: ShowS -> [RE Char] -> ShowS 665 | goMany sep = foldr (\a b -> go False a . sep . b) id 666 | 667 | goSome1 :: ShowS -> RE Char -> [RE Char] -> ShowS 668 | goSome1 sep r = foldl (\a b -> a . sep . go False b) (go False r) 669 | 670 | parens :: Bool -> ShowS -> ShowS 671 | parens True s = showString "(" . s . showChar ')' 672 | parens False s = s 673 | 674 | ------------------------------------------------------------------------------- 675 | -- Latin1 676 | ------------------------------------------------------------------------------- 677 | 678 | instance C.ToLatin1 RE where 679 | toLatin1 (REChars rs) = C.fromRSet (C.toLatin1 rs) 680 | toLatin1 (REAppend xs) = appends (map C.toLatin1 xs) 681 | toLatin1 (REUnion rs xs) = C.fromRSet (C.toLatin1 rs) \/ unions (map C.toLatin1 (Set.toList xs)) 682 | toLatin1 (REStar r) = star (C.toLatin1 r) 683 | -------------------------------------------------------------------------------- /src/Kleene/Internal/Sets.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Safe #-} 2 | {-# OPTIONS_HADDOCK not-home #-} 3 | -- | Character sets. 4 | module Kleene.Internal.Sets ( 5 | dotRSet, 6 | ) where 7 | 8 | import Data.RangeSet.Map (RSet) 9 | 10 | import qualified Data.RangeSet.Map as RSet 11 | 12 | -- | All but the newline. 13 | dotRSet :: RSet Char 14 | dotRSet = RSet.full RSet.\\ RSet.singleton '\n' 15 | -------------------------------------------------------------------------------- /src/Kleene/Monad.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveFoldable #-} 2 | {-# LANGUAGE DeriveFunctor #-} 3 | {-# LANGUAGE DeriveTraversable #-} 4 | {-# LANGUAGE FlexibleInstances #-} 5 | {-# LANGUAGE FunctionalDependencies #-} 6 | {-# LANGUAGE GADTs #-} 7 | {-# LANGUAGE Safe #-} 8 | {-# LANGUAGE ScopedTypeVariables #-} 9 | module Kleene.Monad ( 10 | M (..), 11 | -- * Construction 12 | -- 13 | -- | Binary operators are 14 | -- 15 | -- * '<>' for append 16 | -- 17 | -- There are no binary operator for union. Use 'unions'. 18 | -- 19 | empty, 20 | eps, 21 | char, 22 | charRange, 23 | anyChar, 24 | appends, 25 | unions, 26 | star, 27 | string, 28 | -- * Derivative 29 | nullable, 30 | derivate, 31 | -- * Generation 32 | generate, 33 | -- * Conversion 34 | toKleene, 35 | -- * Other 36 | isEmpty, 37 | isEps, 38 | ) where 39 | 40 | import Data.Semigroup (Semigroup (..)) 41 | import Prelude () 42 | import Prelude.Compat 43 | 44 | import Control.Applicative (liftA2) 45 | import Control.Monad (ap) 46 | import Data.Foldable (toList) 47 | import Data.List (foldl') 48 | import Data.String (IsString (..)) 49 | 50 | import qualified Test.QuickCheck as QC 51 | import qualified Test.QuickCheck.Gen as QC (unGen) 52 | import qualified Test.QuickCheck.Random as QC (mkQCGen) 53 | 54 | import qualified Kleene.Classes as C 55 | import Kleene.Internal.Pretty 56 | 57 | -- $setup 58 | -- >>> :set -XOverloadedStrings 59 | -- >>> import Data.Foldable (traverse_) 60 | -- >>> import Data.List (sort) 61 | -- >>> import Kleene.Internal.Pretty (putPretty) 62 | -- 63 | -- >>> import Test.QuickCheck ((===)) 64 | -- >>> import qualified Test.QuickCheck as QC 65 | -- 66 | -- >>> import Kleene.RE (RE) 67 | -- >>> import Kleene.Classes (match) 68 | -- >>> let asMBool :: M Bool -> M Bool; asMBool = id 69 | 70 | -- | Regular expression which has no restrictions on the elements. 71 | -- Therefore we can have 'Monad' instance, i.e. have a regexp where 72 | -- characters are regexps themselves. 73 | -- 74 | -- Because there are no optimisations, it's better to work over small alphabets. 75 | -- On the other hand, we can work over infinite alphabets, if we only 76 | -- use small amount of symbols! 77 | -- 78 | -- >>> putPretty $ string [True, False] 79 | -- ^10$ 80 | -- 81 | -- >>> let re = string [True, False, True] 82 | -- >>> let re' = re >>= \b -> if b then char () else star (char ()) 83 | -- >>> putPretty re' 84 | -- ^..*.$ 85 | -- 86 | data M c 87 | = MAppend [M c] -- ^ Concatenation 88 | | MUnion [c] [M c] -- ^ Union 89 | | MStar (M c) -- ^ Kleene star 90 | deriving (Eq, Ord, Show, Functor, Foldable, Traversable) 91 | 92 | instance Applicative M where 93 | pure = char 94 | (<*>) = ap 95 | 96 | instance Monad M where 97 | return = pure 98 | 99 | MAppend rs >>= k = appends (map (>>= k) rs) 100 | MUnion cs rs >>= k = unions (appends (map k cs) : map (>>= k) rs) 101 | MStar r >>= k = star (r >>= k) 102 | 103 | ------------------------------------------------------------------------------- 104 | -- Smart constructor 105 | ------------------------------------------------------------------------------- 106 | 107 | -- | Empty regex. Doesn't accept anything. 108 | -- 109 | -- >>> putPretty (empty :: M Bool) 110 | -- ^[]$ 111 | -- 112 | -- prop> match (empty :: M Char) (s :: String) === False 113 | -- 114 | empty :: M c 115 | empty = MUnion [] [] 116 | 117 | -- | Empty string. /Note:/ different than 'empty'. 118 | -- 119 | -- >>> putPretty (eps :: M Bool) 120 | -- ^$ 121 | -- 122 | -- >>> putPretty (mempty :: M Bool) 123 | -- ^$ 124 | -- 125 | -- prop> match (eps :: M Char) s === null (s :: String) 126 | -- 127 | eps :: M c 128 | eps = MAppend [] 129 | 130 | -- | 131 | -- 132 | -- >>> putPretty (char 'x') 133 | -- ^x$ 134 | -- 135 | char :: c -> M c 136 | char c = MUnion [c] [] 137 | 138 | -- | /Note:/ we know little about @c@. 139 | -- 140 | -- >>> putPretty $ charRange 'a' 'z' 141 | -- ^[abcdefghijklmnopqrstuvwxyz]$ 142 | -- 143 | charRange :: Enum c => c -> c -> M c 144 | charRange c c' = MUnion [c .. c'] [] 145 | 146 | 147 | -- | Any character. /Note:/ different than dot! 148 | -- 149 | -- >>> putPretty (anyChar :: M Bool) 150 | -- ^[01]$ 151 | -- 152 | anyChar :: (Bounded c, Enum c) => M c 153 | anyChar = MUnion [minBound .. maxBound] [] 154 | 155 | -- | Concatenate regular expressions. 156 | -- 157 | appends :: [M c] -> M c 158 | appends rs0 159 | | any isEmpty rs1 = empty 160 | | otherwise = case rs1 of 161 | [r] -> r 162 | rs -> MAppend rs 163 | where 164 | -- flatten one level of MAppend 165 | rs1 = concatMap f rs0 166 | 167 | f (MAppend rs) = rs 168 | f r = [r] 169 | 170 | -- | Union of regular expressions. 171 | -- 172 | -- Lattice laws don't hold structurally: 173 | -- 174 | unions :: [M c] -> M c 175 | unions = uncurry mk . foldMap f where 176 | f (MUnion cs rs) = (cs, rs) 177 | f r = ([], [r]) 178 | 179 | mk [] [r] = r 180 | mk cs rs = MUnion cs rs 181 | 182 | -- | Kleene star. 183 | -- 184 | star :: M c -> M c 185 | star r = case r of 186 | MStar _ -> r 187 | MAppend [] -> eps 188 | MUnion cs rs | any isEps rs -> case rs' of 189 | [r'] | null cs -> star r' 190 | _ -> MStar (MUnion cs rs') 191 | where 192 | rs' = filter (not . isEps) rs 193 | _ -> MStar r 194 | 195 | -- | Literal string. 196 | -- 197 | -- >>> putPretty ("foobar" :: M Char) 198 | -- ^foobar$ 199 | -- 200 | -- >>> putPretty ("(.)" :: M Char) 201 | -- ^\(\.\)$ 202 | -- 203 | -- >>> putPretty $ string [False, True] 204 | -- ^01$ 205 | -- 206 | string :: [c] -> M c 207 | string [] = eps 208 | string [c] = char c 209 | string cs = MAppend $ map char cs 210 | 211 | instance C.Kleene (M c) where 212 | empty = empty 213 | eps = eps 214 | appends = appends 215 | unions = unions 216 | star = star 217 | 218 | instance C.CharKleene c (M c) where 219 | char = char 220 | 221 | ------------------------------------------------------------------------------- 222 | -- derivative 223 | ------------------------------------------------------------------------------- 224 | 225 | -- | We say that a regular expression r is nullable if the language it defines 226 | -- contains the empty string. 227 | -- 228 | -- >>> nullable eps 229 | -- True 230 | -- 231 | -- >>> nullable (star "x") 232 | -- True 233 | -- 234 | -- >>> nullable "foo" 235 | -- False 236 | -- 237 | nullable :: M c -> Bool 238 | nullable (MAppend rs) = all nullable rs 239 | nullable (MUnion _cs rs) = any nullable rs 240 | nullable (MStar _) = True 241 | 242 | -- | Intuitively, the derivative of a language \(\mathcal{L} \subset \Sigma^\star\) 243 | -- with respect to a symbol \(a \in \Sigma\) is the language that includes only 244 | -- those suffixes of strings with a leading symbol \(a\) in \(\mathcal{L}\). 245 | -- 246 | -- >>> putPretty $ derivate 'f' "foobar" 247 | -- ^oobar$ 248 | -- 249 | -- >>> putPretty $ derivate 'x' $ unions ["xyz", "abc"] 250 | -- ^yz$ 251 | -- 252 | -- >>> putPretty $ derivate 'x' $ star "xyz" 253 | -- ^yz(xyz)*$ 254 | -- 255 | derivate :: (Eq c, Enum c, Bounded c) => c -> M c -> M c 256 | derivate c (MUnion cs rs) = unions $ derivateChars c cs : [ derivate c r | r <- toList rs] 257 | derivate c (MAppend rs) = derivateAppend c rs 258 | derivate c rs@(MStar r) = derivate c r <> rs 259 | 260 | derivateAppend :: (Eq c, Enum c, Bounded c) => c -> [M c] -> M c 261 | derivateAppend _ [] = empty 262 | derivateAppend c [r] = derivate c r 263 | derivateAppend c (r:rs) 264 | | nullable r = unions [r' <> appends rs, rs'] 265 | | otherwise = r' <> appends rs 266 | where 267 | r' = derivate c r 268 | rs' = derivateAppend c rs 269 | 270 | derivateChars :: Eq c => c -> [c] -> M c 271 | derivateChars c cs 272 | | c `elem` cs = eps 273 | | otherwise = empty 274 | 275 | instance (Eq c, Enum c, Bounded c) => C.Derivate c (M c) where 276 | nullable = nullable 277 | derivate = derivate 278 | 279 | instance (Eq c, Enum c, Bounded c) => C.Match c (M c) where 280 | match r = nullable . foldl' (flip derivate) r 281 | 282 | ------------------------------------------------------------------------------- 283 | -- isEmpty 284 | ------------------------------------------------------------------------------- 285 | 286 | -- | Whether 'M' is (structurally) equal to 'empty'. 287 | isEmpty :: M c -> Bool 288 | isEmpty (MUnion cs rs) = null cs && null rs 289 | isEmpty _ = False 290 | 291 | -- | Whether 'M' is (structurally) equal to 'eps'. 292 | isEps :: M c -> Bool 293 | isEps (MAppend rs) = null rs 294 | isEps _ = False 295 | 296 | ------------------------------------------------------------------------------- 297 | -- Generation 298 | ------------------------------------------------------------------------------- 299 | 300 | -- | Generate random strings of the language @M c@ describes. 301 | -- 302 | -- >>> let example = traverse_ print . take 3 . generate 42 303 | -- >>> example "abc" 304 | -- "abc" 305 | -- "abc" 306 | -- "abc" 307 | -- 308 | -- >>> example $ star $ unions ["a", "b"] 309 | -- "" 310 | -- "aaababaaab" 311 | -- "a" 312 | -- 313 | -- xx >>> example empty 314 | -- 315 | -- expensive-prop> all (match r) $ take 10 $ generate 42 (r :: M Bool) 316 | -- 317 | generate 318 | :: Int -- ^ seed 319 | -> M c 320 | -> [[c]] -- ^ infinite list of results 321 | generate seed re 322 | | isEmpty re = [] 323 | | otherwise = QC.unGen (QC.infiniteListOf (generator re)) (QC.mkQCGen seed) 10 324 | 325 | generator :: M c -> QC.Gen [c] 326 | generator = go where 327 | go (MAppend rs) = concat <$> traverse go rs 328 | go (MUnion cs rs) 329 | | null cs = QC.oneof [ go r | r <- toList rs ] 330 | | otherwise = QC.oneof $ goChars cs : [ go r | r <- toList rs ] 331 | go (MStar r) = QC.sized $ \n -> do 332 | n' <- QC.choose (0, n) 333 | concat <$> sequence (replicate n' (go r)) 334 | 335 | goChars cs = pure <$> QC.elements cs 336 | 337 | ------------------------------------------------------------------------------- 338 | -- Conversion 339 | ------------------------------------------------------------------------------- 340 | 341 | -- | Convert to 'Kleene' 342 | -- 343 | -- >>> let re = charRange 'a' 'z' 344 | -- >>> putPretty re 345 | -- ^[abcdefghijklmnopqrstuvwxyz]$ 346 | -- 347 | -- >>> putPretty (toKleene re :: RE Char) 348 | -- ^[a-z]$ 349 | -- 350 | toKleene :: C.CharKleene c k => M c -> k 351 | toKleene (MAppend rs) = C.appends (map toKleene rs) 352 | toKleene (MUnion cs rs) = C.unions (C.oneof cs : map toKleene rs) 353 | toKleene (MStar r) = C.star (toKleene r) 354 | 355 | ------------------------------------------------------------------------------- 356 | -- Instances 357 | ------------------------------------------------------------------------------- 358 | 359 | instance Semigroup (M c) where 360 | r <> r' = appends [r, r'] 361 | 362 | instance Monoid (M c) where 363 | mempty = eps 364 | mappend = (<>) 365 | mconcat = appends 366 | 367 | instance c ~ Char => IsString (M c) where 368 | fromString = string 369 | 370 | instance (Eq c, Enum c, Bounded c, QC.Arbitrary c) => QC.Arbitrary (M c) where 371 | arbitrary = QC.sized arb where 372 | c :: QC.Gen (M c) 373 | c = char <$> QC.arbitrary 374 | 375 | arb :: Int -> QC.Gen (M c) 376 | arb n | n <= 0 = QC.oneof [c, fmap char QC.arbitrary, pure eps] 377 | | otherwise = QC.oneof 378 | [ c 379 | , pure eps 380 | , fmap char QC.arbitrary 381 | , liftA2 (<>) (arb n2) (arb n2) 382 | , liftA2 (\x y -> unions [x,y]) (arb n2) (arb n2) 383 | , fmap star (arb n2) 384 | ] 385 | where 386 | n2 = n `div` 2 387 | 388 | instance (QC.CoArbitrary c) => QC.CoArbitrary (M c) where 389 | coarbitrary (MAppend rs) = QC.variant (1 :: Int) . QC.coarbitrary rs 390 | coarbitrary (MUnion cs rs) = QC.variant (2 :: Int) . QC.coarbitrary (cs, rs) 391 | coarbitrary (MStar r) = QC.variant (3 :: Int) . QC.coarbitrary r 392 | 393 | ------------------------------------------------------------------------------- 394 | -- JavaScript 395 | ------------------------------------------------------------------------------- 396 | 397 | instance (Pretty c, Eq c) => Pretty (M c) where 398 | prettyS x = showChar '^' . go False x . showChar '$' 399 | where 400 | go :: Bool -> M c -> ShowS 401 | go p (MStar a) 402 | = parens p 403 | $ go True a . showChar '*' 404 | go p (MAppend rs) 405 | = parens p $ goMany id rs 406 | go p (MUnion cs rs) 407 | | null rs = prettySList cs 408 | | null cs = goUnion p rs 409 | | otherwise = goUnion p (MUnion cs [] : rs) 410 | 411 | goUnion p rs 412 | | elem eps rs = parens p $ goUnion' True . showChar '?' 413 | | otherwise = goUnion' p 414 | where 415 | goUnion' p' = case filter (/= eps) rs of 416 | [] -> go True empty 417 | [r] -> go p' r 418 | (r:rs') -> parens True $ goSome1 (showChar '|') r rs' 419 | 420 | goMany :: ShowS -> [M c] -> ShowS 421 | goMany sep = foldr (\a b -> go False a . sep . b) id 422 | 423 | goSome1 :: ShowS -> M c -> [M c] -> ShowS 424 | goSome1 sep r = foldl (\a b -> a . sep . go False b) (go False r) 425 | 426 | parens :: Bool -> ShowS -> ShowS 427 | parens True s = showString "(" . s . showChar ')' 428 | parens False s = s 429 | 430 | prettySList :: [c] -> ShowS 431 | prettySList [c] = prettyS c 432 | prettySList xs = showChar '[' . foldr (\a b -> prettyS a . b) (showChar ']') xs 433 | -------------------------------------------------------------------------------- /src/Kleene/RE.hs: -------------------------------------------------------------------------------- 1 | module Kleene.RE ( 2 | RE (..), 3 | -- * Construction 4 | -- 5 | -- | Binary operators are 6 | -- 7 | -- * '<>' for append 8 | -- * '\/' for union 9 | -- 10 | empty, 11 | eps, 12 | everything, 13 | char, 14 | charRange, 15 | anyChar, 16 | appends, 17 | unions, 18 | star, 19 | string, 20 | -- * Derivative 21 | nullable, 22 | derivate, 23 | -- * Transition map 24 | transitionMap, 25 | leadingChars, 26 | -- * Equivalence 27 | equivalent, 28 | -- * Generation 29 | generate, 30 | -- * Other 31 | isEmpty, 32 | nullableProof, 33 | ) where 34 | 35 | -- This to include orphans. 36 | import Kleene.DFA () 37 | import Kleene.Internal.RE 38 | -------------------------------------------------------------------------------- /tests/kleene-utf8.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | module Main (main) where 3 | 4 | import Data.Bits (shiftR, (.&.), (.|.)) 5 | import Data.Char (ord) 6 | import Data.Semigroup ((<>)) 7 | import Data.Word (Word8) 8 | import Test.Tasty (defaultMain, testGroup) 9 | import Test.Tasty.HUnit (assertBool, testCase) 10 | 11 | import Kleene 12 | import Kleene.DFA (fromRE, toRE) 13 | 14 | #if MIN_VERSION_bytestring(0,11,2) 15 | import Test.Tasty.QuickCheck (label, testProperty, (===)) 16 | 17 | import qualified Data.ByteString as BS 18 | #endif 19 | 20 | main :: IO () 21 | main = do 22 | -- print utf8char2 23 | -- print utf8char3 24 | -- print utf8char4 25 | defaultMain $ testGroup "kleene-utf8" 26 | [ testCase "brute ≃ brute'" $ do 27 | assertBool "not equiv" (equivalent utf8char1 utf8char2) 28 | , testCase "brute' ≃ manual1" $ do 29 | assertBool "not equiv" (equivalent utf8char2 utf8char3) 30 | , testCase "manual1 ≃ manual2" $ do 31 | assertBool "not equiv" (equivalent utf8char3 utf8char4) 32 | , testCase "brute ≃ manual2" $ do 33 | assertBool "not equiv" (equivalent utf8char1 utf8char4) 34 | 35 | #if MIN_VERSION_bytestring(0,11,2) 36 | , testProperty "isValidUtf8" $ \w8s -> 37 | let bs = BS.pack w8s 38 | isValid = BS.isValidUtf8 bs 39 | in label (show isValid) $ isValid === match (star utf8char4) w8s 40 | #endif 41 | ] 42 | 43 | ------------------------------------------------------------------------------- 44 | -- Bruteforce definition 45 | ------------------------------------------------------------------------------- 46 | 47 | utf8char1 :: RE Word8 48 | utf8char1 = unions 49 | [ string (encodeStringUtf8 [c]) 50 | | c <- [ '\0' .. maxBound ] 51 | ] 52 | 53 | ------------------------------------------------------------------------------- 54 | -- Derived definition 55 | ------------------------------------------------------------------------------- 56 | 57 | utf8charDFA1 :: DFA Word8 58 | utf8charDFA1 = fromRE utf8char1 59 | 60 | utf8char2 :: RE Word8 61 | utf8char2 = toRE utf8charDFA1 62 | 63 | ------------------------------------------------------------------------------- 64 | -- Written out definition 65 | ------------------------------------------------------------------------------- 66 | 67 | utf8char3 :: RE Word8 68 | utf8char3 = unions 69 | [ charRange 0x00 0x7F 70 | , sub1 <> charRange 0x80 0xBF 71 | ] 72 | where 73 | sub1 = unions 74 | [ charRange 0xC2 0xDF 75 | , charRange 0xE0 0xE0 <> charRange 0xa0 0xBF 76 | , charRange 0xED 0xED <> charRange 0x80 0x9F 77 | , sub2 <> charRange 0x80 0xBF 78 | ] 79 | 80 | sub2 = unions 81 | [ charRange 0xE1 0xEC 82 | , charRange 0xEE 0xEF 83 | , charRange 0xF0 0xF0 <> charRange 0x90 0xBF 84 | , charRange 0xF1 0xF3 <> charRange 0x80 0xBF 85 | , charRange 0xF4 0xF4 <> charRange 0x80 0x8f 86 | ] 87 | 88 | ------------------------------------------------------------------------------- 89 | -- Manual definition, how human would written it 90 | ------------------------------------------------------------------------------- 91 | 92 | utf8char4 :: RE Word8 93 | utf8char4 = unions 94 | [ charRange 0x00 0x7F 95 | , charRange 0xC2 0xDF <> charRange 0x80 0xBF 96 | , charRange 0xE0 0xE0 <> charRange 0xa0 0xBF <> charRange 0x80 0xBF 97 | , charRange 0xE1 0xEC <> charRange 0x80 0xBF <> charRange 0x80 0xBF 98 | , charRange 0xED 0xED <> charRange 0x80 0x9F <> charRange 0x80 0xBF 99 | , charRange 0xEE 0xEF <> charRange 0x80 0xBF <> charRange 0x80 0xBF 100 | , charRange 0xF0 0xF0 <> charRange 0x90 0xBF <> charRange 0x80 0xBF <> charRange 0x80 0xBF 101 | , charRange 0xF1 0xF3 <> charRange 0x80 0xBF <> charRange 0x80 0xBF <> charRange 0x80 0xBF 102 | , charRange 0xF4 0xF4 <> charRange 0x80 0x8f <> charRange 0x80 0xBF <> charRange 0x80 0xBF 103 | ] 104 | 105 | ------------------------------------------------------------------------------- 106 | -- UTF8 encoding 107 | ------------------------------------------------------------------------------- 108 | 109 | encodeStringUtf8 :: String -> [Word8] 110 | encodeStringUtf8 [] = [] 111 | encodeStringUtf8 (c:cs) 112 | | c <= '\x07F' = w8 113 | : encodeStringUtf8 cs 114 | | c <= '\x7FF' = (0xC0 .|. w8ShiftR 6 ) 115 | : (0x80 .|. (w8 .&. 0x3F)) 116 | : encodeStringUtf8 cs 117 | | c <= '\xD7FF'= (0xE0 .|. w8ShiftR 12 ) 118 | : (0x80 .|. (w8ShiftR 6 .&. 0x3F)) 119 | : (0x80 .|. (w8 .&. 0x3F)) 120 | : encodeStringUtf8 cs 121 | | c <= '\xDFFF'= 0xEF : 0xBF : 0xBD -- U+FFFD 122 | : encodeStringUtf8 cs 123 | | c <= '\xFFFF'= (0xE0 .|. w8ShiftR 12 ) 124 | : (0x80 .|. (w8ShiftR 6 .&. 0x3F)) 125 | : (0x80 .|. (w8 .&. 0x3F)) 126 | : encodeStringUtf8 cs 127 | | otherwise = (0xf0 .|. w8ShiftR 18 ) 128 | : (0x80 .|. (w8ShiftR 12 .&. 0x3F)) 129 | : (0x80 .|. (w8ShiftR 6 .&. 0x3F)) 130 | : (0x80 .|. (w8 .&. 0x3F)) 131 | : encodeStringUtf8 cs 132 | where 133 | w8 = fromIntegral (ord c) :: Word8 134 | w8ShiftR :: Int -> Word8 135 | w8ShiftR = fromIntegral . shiftR (ord c) 136 | --------------------------------------------------------------------------------