├── .github
    └── workflows
    │   └── haskell-ci.yml
├── .gitignore
├── .stylish-haskell.yaml
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── Setup.hs
├── cabal.haskell-ci
├── cabal.project
├── example.png
├── kleene.cabal
├── src
    ├── Kleene.hs
    └── Kleene
    │   ├── Classes.hs
    │   ├── DFA.hs
    │   ├── ERE.hs
    │   ├── Equiv.hs
    │   ├── Functor.hs
    │   ├── Functor
    │       └── NonEmpty.hs
    │   ├── Internal
    │       ├── Functor.hs
    │       ├── Partition.hs
    │       ├── Pretty.hs
    │       ├── RE.hs
    │       └── Sets.hs
    │   ├── Monad.hs
    │   └── RE.hs
└── tests
    └── kleene-utf8.hs


/.github/workflows/haskell-ci.yml:
--------------------------------------------------------------------------------
  1 | # This GitHub workflow config has been generated by a script via
  2 | #
  3 | #   haskell-ci 'github' 'cabal.project'
  4 | #
  5 | # To regenerate the script (for example after adjusting tested-with) run
  6 | #
  7 | #   haskell-ci regenerate
  8 | #
  9 | # For more information, see https://github.com/haskell-CI/haskell-ci
 10 | #
 11 | # version: 0.17.20231110
 12 | #
 13 | # REGENDATA ("0.17.20231110",["github","cabal.project"])
 14 | #
 15 | name: Haskell-CI
 16 | on:
 17 |   push:
 18 |     branches:
 19 |       - master
 20 |   pull_request:
 21 |     branches:
 22 |       - master
 23 | jobs:
 24 |   linux:
 25 |     name: Haskell-CI - Linux - ${{ matrix.compiler }}
 26 |     runs-on: ubuntu-20.04
 27 |     timeout-minutes:
 28 |       60
 29 |     container:
 30 |       image: buildpack-deps:bionic
 31 |     continue-on-error: ${{ matrix.allow-failure }}
 32 |     strategy:
 33 |       matrix:
 34 |         include:
 35 |           - compiler: ghc-9.8.1
 36 |             compilerKind: ghc
 37 |             compilerVersion: 9.8.1
 38 |             setup-method: ghcup
 39 |             allow-failure: false
 40 |           - compiler: ghc-9.6.3
 41 |             compilerKind: ghc
 42 |             compilerVersion: 9.6.3
 43 |             setup-method: ghcup
 44 |             allow-failure: false
 45 |           - compiler: ghc-9.4.8
 46 |             compilerKind: ghc
 47 |             compilerVersion: 9.4.8
 48 |             setup-method: ghcup
 49 |             allow-failure: false
 50 |           - compiler: ghc-9.2.8
 51 |             compilerKind: ghc
 52 |             compilerVersion: 9.2.8
 53 |             setup-method: ghcup
 54 |             allow-failure: false
 55 |           - compiler: ghc-9.0.2
 56 |             compilerKind: ghc
 57 |             compilerVersion: 9.0.2
 58 |             setup-method: ghcup
 59 |             allow-failure: false
 60 |           - compiler: ghc-8.10.7
 61 |             compilerKind: ghc
 62 |             compilerVersion: 8.10.7
 63 |             setup-method: ghcup
 64 |             allow-failure: false
 65 |           - compiler: ghc-8.8.4
 66 |             compilerKind: ghc
 67 |             compilerVersion: 8.8.4
 68 |             setup-method: hvr-ppa
 69 |             allow-failure: false
 70 |           - compiler: ghc-8.6.5
 71 |             compilerKind: ghc
 72 |             compilerVersion: 8.6.5
 73 |             setup-method: hvr-ppa
 74 |             allow-failure: false
 75 |           - compiler: ghc-8.4.4
 76 |             compilerKind: ghc
 77 |             compilerVersion: 8.4.4
 78 |             setup-method: hvr-ppa
 79 |             allow-failure: false
 80 |           - compiler: ghc-8.2.2
 81 |             compilerKind: ghc
 82 |             compilerVersion: 8.2.2
 83 |             setup-method: hvr-ppa
 84 |             allow-failure: false
 85 |           - compiler: ghc-8.0.2
 86 |             compilerKind: ghc
 87 |             compilerVersion: 8.0.2
 88 |             setup-method: hvr-ppa
 89 |             allow-failure: false
 90 |           - compiler: ghc-7.10.3
 91 |             compilerKind: ghc
 92 |             compilerVersion: 7.10.3
 93 |             setup-method: hvr-ppa
 94 |             allow-failure: false
 95 |           - compiler: ghc-7.8.4
 96 |             compilerKind: ghc
 97 |             compilerVersion: 7.8.4
 98 |             setup-method: hvr-ppa
 99 |             allow-failure: false
100 |       fail-fast: false
101 |     steps:
102 |       - name: apt
103 |         run: |
104 |           apt-get update
105 |           apt-get install -y --no-install-recommends gnupg ca-certificates dirmngr curl git software-properties-common libtinfo5
106 |           if [ "${{ matrix.setup-method }}" = ghcup ]; then
107 |             mkdir -p "$HOME/.ghcup/bin"
108 |             curl -sL https://downloads.haskell.org/ghcup/0.1.20.0/x86_64-linux-ghcup-0.1.20.0 > "$HOME/.ghcup/bin/ghcup"
109 |             chmod a+x "$HOME/.ghcup/bin/ghcup"
110 |             "$HOME/.ghcup/bin/ghcup" install ghc "$HCVER" || (cat "$HOME"/.ghcup/logs/*.* && false)
111 |             "$HOME/.ghcup/bin/ghcup" install cabal 3.10.2.0 || (cat "$HOME"/.ghcup/logs/*.* && false)
112 |           else
113 |             apt-add-repository -y 'ppa:hvr/ghc'
114 |             apt-get update
115 |             apt-get install -y "$HCNAME"
116 |             mkdir -p "$HOME/.ghcup/bin"
117 |             curl -sL https://downloads.haskell.org/ghcup/0.1.20.0/x86_64-linux-ghcup-0.1.20.0 > "$HOME/.ghcup/bin/ghcup"
118 |             chmod a+x "$HOME/.ghcup/bin/ghcup"
119 |             "$HOME/.ghcup/bin/ghcup" install cabal 3.10.2.0 || (cat "$HOME"/.ghcup/logs/*.* && false)
120 |           fi
121 |         env:
122 |           HCKIND: ${{ matrix.compilerKind }}
123 |           HCNAME: ${{ matrix.compiler }}
124 |           HCVER: ${{ matrix.compilerVersion }}
125 |       - name: Set PATH and environment variables
126 |         run: |
127 |           echo "$HOME/.cabal/bin" >> $GITHUB_PATH
128 |           echo "LANG=C.UTF-8" >> "$GITHUB_ENV"
129 |           echo "CABAL_DIR=$HOME/.cabal" >> "$GITHUB_ENV"
130 |           echo "CABAL_CONFIG=$HOME/.cabal/config" >> "$GITHUB_ENV"
131 |           HCDIR=/opt/$HCKIND/$HCVER
132 |           if [ "${{ matrix.setup-method }}" = ghcup ]; then
133 |             HC=$("$HOME/.ghcup/bin/ghcup" whereis ghc "$HCVER")
134 |             HCPKG=$(echo "$HC" | sed 's#ghc$#ghc-pkg#')
135 |             HADDOCK=$(echo "$HC" | sed 's#ghc$#haddock#')
136 |             echo "HC=$HC" >> "$GITHUB_ENV"
137 |             echo "HCPKG=$HCPKG" >> "$GITHUB_ENV"
138 |             echo "HADDOCK=$HADDOCK" >> "$GITHUB_ENV"
139 |             echo "CABAL=$HOME/.ghcup/bin/cabal-3.10.2.0 -vnormal+nowrap" >> "$GITHUB_ENV"
140 |           else
141 |             HC=$HCDIR/bin/$HCKIND
142 |             echo "HC=$HC" >> "$GITHUB_ENV"
143 |             echo "HCPKG=$HCDIR/bin/$HCKIND-pkg" >> "$GITHUB_ENV"
144 |             echo "HADDOCK=$HCDIR/bin/haddock" >> "$GITHUB_ENV"
145 |             echo "CABAL=$HOME/.ghcup/bin/cabal-3.10.2.0 -vnormal+nowrap" >> "$GITHUB_ENV"
146 |           fi
147 | 
148 |           HCNUMVER=$(${HC} --numeric-version|perl -ne '/^(\d+)\.(\d+)\.(\d+)(\.(\d+))?$/; print(10000 * $1 + 100 * $2 + ($3 == 0 ? $5 != 1 : $3))')
149 |           echo "HCNUMVER=$HCNUMVER" >> "$GITHUB_ENV"
150 |           echo "ARG_TESTS=--enable-tests" >> "$GITHUB_ENV"
151 |           echo "ARG_BENCH=--enable-benchmarks" >> "$GITHUB_ENV"
152 |           echo "HEADHACKAGE=false" >> "$GITHUB_ENV"
153 |           echo "ARG_COMPILER=--$HCKIND --with-compiler=$HC" >> "$GITHUB_ENV"
154 |           echo "GHCJSARITH=0" >> "$GITHUB_ENV"
155 |         env:
156 |           HCKIND: ${{ matrix.compilerKind }}
157 |           HCNAME: ${{ matrix.compiler }}
158 |           HCVER: ${{ matrix.compilerVersion }}
159 |       - name: env
160 |         run: |
161 |           env
162 |       - name: write cabal config
163 |         run: |
164 |           mkdir -p $CABAL_DIR
165 |           cat >> $CABAL_CONFIG <<EOF
166 |           remote-build-reporting: anonymous
167 |           write-ghc-environment-files: never
168 |           remote-repo-cache: $CABAL_DIR/packages
169 |           logs-dir:          $CABAL_DIR/logs
170 |           world-file:        $CABAL_DIR/world
171 |           extra-prog-path:   $CABAL_DIR/bin
172 |           symlink-bindir:    $CABAL_DIR/bin
173 |           installdir:        $CABAL_DIR/bin
174 |           build-summary:     $CABAL_DIR/logs/build.log
175 |           store-dir:         $CABAL_DIR/store
176 |           install-dirs user
177 |             prefix: $CABAL_DIR
178 |           repository hackage.haskell.org
179 |             url: http://hackage.haskell.org/
180 |           EOF
181 |           cat >> $CABAL_CONFIG <<EOF
182 |           program-default-options
183 |             ghc-options: $GHCJOBS +RTS -M3G -RTS
184 |           EOF
185 |           cat $CABAL_CONFIG
186 |       - name: versions
187 |         run: |
188 |           $HC --version || true
189 |           $HC --print-project-git-commit-id || true
190 |           $CABAL --version || true
191 |       - name: update cabal index
192 |         run: |
193 |           $CABAL v2-update -v
194 |       - name: install cabal-plan
195 |         run: |
196 |           mkdir -p $HOME/.cabal/bin
197 |           curl -sL https://github.com/haskell-hvr/cabal-plan/releases/download/v0.7.3.0/cabal-plan-0.7.3.0-x86_64-linux.xz > cabal-plan.xz
198 |           echo 'f62ccb2971567a5f638f2005ad3173dba14693a45154c1508645c52289714cb2  cabal-plan.xz' | sha256sum -c -
199 |           xz -d < cabal-plan.xz > $HOME/.cabal/bin/cabal-plan
200 |           rm -f cabal-plan.xz
201 |           chmod a+x $HOME/.cabal/bin/cabal-plan
202 |           cabal-plan --version
203 |       - name: install cabal-docspec
204 |         run: |
205 |           mkdir -p $HOME/.cabal/bin
206 |           curl -sL https://github.com/phadej/cabal-extras/releases/download/cabal-docspec-0.0.0.20230517/cabal-docspec-0.0.0.20230517-x86_64-linux.xz > cabal-docspec.xz
207 |           echo '3b31bbe463ad4d671abbc103db49628562ec48a6604cab278207b5b6acd21ed7  cabal-docspec.xz' | sha256sum -c -
208 |           xz -d < cabal-docspec.xz > $HOME/.cabal/bin/cabal-docspec
209 |           rm -f cabal-docspec.xz
210 |           chmod a+x $HOME/.cabal/bin/cabal-docspec
211 |           cabal-docspec --version
212 |       - name: checkout
213 |         uses: actions/checkout@v3
214 |         with:
215 |           path: source
216 |       - name: initial cabal.project for sdist
217 |         run: |
218 |           touch cabal.project
219 |           echo "packages: $GITHUB_WORKSPACE/source/." >> cabal.project
220 |           cat cabal.project
221 |       - name: sdist
222 |         run: |
223 |           mkdir -p sdist
224 |           $CABAL sdist all --output-dir $GITHUB_WORKSPACE/sdist
225 |       - name: unpack
226 |         run: |
227 |           mkdir -p unpacked
228 |           find sdist -maxdepth 1 -type f -name '*.tar.gz' -exec tar -C $GITHUB_WORKSPACE/unpacked -xzvf {} \;
229 |       - name: generate cabal.project
230 |         run: |
231 |           PKGDIR_kleene="$(find "$GITHUB_WORKSPACE/unpacked" -maxdepth 1 -type d -regex '.*/kleene-[0-9.]*')"
232 |           echo "PKGDIR_kleene=${PKGDIR_kleene}" >> "$GITHUB_ENV"
233 |           rm -f cabal.project cabal.project.local
234 |           touch cabal.project
235 |           touch cabal.project.local
236 |           echo "packages: ${PKGDIR_kleene}" >> cabal.project
237 |           if [ $((HCNUMVER >= 80200)) -ne 0 ] ; then echo "package kleene" >> cabal.project ; fi
238 |           if [ $((HCNUMVER >= 80200)) -ne 0 ] ; then echo "    ghc-options: -Werror=missing-methods" >> cabal.project ; fi
239 |           cat >> cabal.project <<EOF
240 |           EOF
241 |           $HCPKG list --simple-output --names-only | perl -ne 'for (split /\s+/) { print "constraints: $_ installed\n" unless /^(kleene)$/; }' >> cabal.project.local
242 |           cat cabal.project
243 |           cat cabal.project.local
244 |       - name: dump install plan
245 |         run: |
246 |           $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dry-run all
247 |           cabal-plan
248 |       - name: restore cache
249 |         uses: actions/cache/restore@v3
250 |         with:
251 |           key: ${{ runner.os }}-${{ matrix.compiler }}-${{ github.sha }}
252 |           path: ~/.cabal/store
253 |           restore-keys: ${{ runner.os }}-${{ matrix.compiler }}-
254 |       - name: install dependencies
255 |         run: |
256 |           $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --dependencies-only -j2 all
257 |           $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dependencies-only -j2 all
258 |       - name: build w/o tests
259 |         run: |
260 |           $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks all
261 |       - name: build
262 |         run: |
263 |           $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --write-ghc-environment-files=always
264 |       - name: tests
265 |         run: |
266 |           $CABAL v2-test $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --test-show-details=direct
267 |       - name: docspec
268 |         run: |
269 |           if [ $((HCNUMVER >= 71000)) -ne 0 ] ; then $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH all ; fi
270 |           if [ $((HCNUMVER >= 71000)) -ne 0 ] ; then cabal-docspec $ARG_COMPILER ; fi
271 |       - name: cabal check
272 |         run: |
273 |           cd ${PKGDIR_kleene} || false
274 |           ${CABAL} -vnormal check
275 |       - name: haddock
276 |         run: |
277 |           $CABAL v2-haddock --disable-documentation --haddock-all $ARG_COMPILER --with-haddock $HADDOCK $ARG_TESTS $ARG_BENCH all
278 |       - name: unconstrained build
279 |         run: |
280 |           rm -f cabal.project.local
281 |           $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks all
282 |       - name: save cache
283 |         uses: actions/cache/save@v3
284 |         if: always()
285 |         with:
286 |           key: ${{ runner.os }}-${{ matrix.compiler }}-${{ github.sha }}
287 |           path: ~/.cabal/store
288 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | dist/
2 | dist-newstyle/
3 | .ghc.environment.*
4 | 
5 | ref/
6 | 
7 | example.dot
8 | 


--------------------------------------------------------------------------------
/.stylish-haskell.yaml:
--------------------------------------------------------------------------------
 1 | steps:
 2 |   - imports:
 3 |       align: group
 4 |       list_align: after_alias
 5 |       long_list_align: new_line
 6 |       empty_list_align: right_after
 7 |       list_padding: module_name
 8 |   - language_pragmas:
 9 |       style: vertical
10 |       remove_redundant: true
11 |   - trailing_whitespace: {}
12 | columns: 80
13 | language_extensions:
14 |   - DataKinds
15 |   - EmptyCase
16 |   - ExplicitForAll
17 |   - FlexibleContexts
18 |   - MultiParamTypeClasses
19 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | ## 0.2
 2 | 
 3 | * Remove MChars constructor from Monad variant
 4 | 
 5 | ## 0.1.1
 6 | 
 7 | * Export `Kleene.Functor.NonEmpty.few1`
 8 | * Export `Kleene.RE.everything` and `Kleene.ERE.everything`
 9 | * Export `Equivalent` from `Kleene`
10 | 
11 | ## 0.1
12 | 
13 | * Drop superclasses from `Kleene`.
14 | * Rearrange classes. Introduce `CharKleene`, `FiniteKleene`.
15 | * Add `ToLatin1` and ability to match on `ByteString`.
16 | * Add `Derivate c (DFA c)` instance.
17 | * Add `toDot` to output `DFA` to be rendered by *graphviz*.
18 | * Add `fromRE :: RE c -> ERE c`
19 | * Add `nullableProof :: RE c -> Maybe (RE c)` which returns non-nullable part
20 |   of given regular expression.
21 | * Support/require `lattices-2`: `RE` is now a `Lattice`, `M` isn't.
22 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | Patches welcome!
 2 | 
 3 | - If you are only going to bump bounds:
 4 |   - If it's really **only bounds**, please simply open an issue (so you'll have a URL to refer to). I have a semi-automated process to make revisions, pull requests only disturb it.
 5 |   - If patch includes **source code change** (i.e. I'll need to make a release), and it's a patch to support **newer base/GHC version**:
 6 |     - Amend `tested-with` to include that GHC
 7 |     - Regenerate `.github/workflows/haskell-ci.yml` with `haskell-ci regenerate` (get the latest from [GitHub haskell-ci/haskell-ci](https://github.com/haskell-ci/haskell-ci))
 8 | 
 9 | - Don't edit `CHANGELOG.md`, rather include a copyable entry in your pull request description. I often process pull requests in bulk, and everyone editing the `CHANGELOG.md` causes unnecessary conflicts.
10 | - For the same reason, do not edit `version` or `x-revision`
11 | 
12 | - I use [`stylish-haskell`](https://github.com/jaspervdj/stylish-haskell) to format imports. I encourage you to use it too, when contributing.
13 | - General code style is 4 spaces, just look around how it looks, it's not so strict.
14 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017 Futurice Oy, 2017-2018 Oleg Grenrus
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 | 
11 |     * Redistributions in binary form must reproduce the above
12 |       copyright notice, this list of conditions and the following
13 |       disclaimer in the documentation and/or other materials provided
14 |       with the distribution.
15 | 
16 |     * Neither the name of Oleg Grenrus nor the names of other
17 |       contributors may be used to endorse or promote products derived
18 |       from this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | build :
 2 | 	cabal new-build
 3 | 
 4 | doctest :
 5 | 	perl -i -e 'while (<ARGV>) { print unless /package-id\s+(base-compat)-\d+(\.\d+)*/; }' .ghc.environment.*
 6 | 	doctest --fast src/
 7 | 
 8 | haddock :
 9 | 	cabal new-haddock --haddock-hyperlink-source
10 | 
11 | ghcid :
12 | 	ghcid -c 'cabal new-repl'
13 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # kleene
2 | 
3 | Kleene algebra
4 | 
5 | read: Regular Expressions
6 | 


--------------------------------------------------------------------------------
/Setup.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE CPP #-}
 2 | {-# OPTIONS_GHC -Wall #-}
 3 | module Main (main) where
 4 | 
 5 | #ifndef MIN_VERSION_cabal_doctest
 6 | #define MIN_VERSION_cabal_doctest(x,y,z) 0
 7 | #endif
 8 | 
 9 | #if MIN_VERSION_cabal_doctest(1,0,0)
10 | 
11 | import Distribution.Extra.Doctest ( defaultMainWithDoctests )
12 | main :: IO ()
13 | main = defaultMainWithDoctests "doctests"
14 | 
15 | #else
16 | 
17 | #ifdef MIN_VERSION_Cabal
18 | -- If the macro is defined, we have new cabal-install,
19 | -- but for some reason we don't have cabal-doctest in package-db
20 | --
21 | -- Probably we are running cabal sdist, when otherwise using new-build
22 | -- workflow
23 | #warning You are configuring this package without cabal-doctest installed. \
24 |          The doctests test-suite will not work as a result. \
25 |          To fix this, install cabal-doctest before configuring.
26 | #endif
27 | 
28 | import Distribution.Simple
29 | 
30 | main :: IO ()
31 | main = defaultMain
32 | 
33 | #endif
34 | 


--------------------------------------------------------------------------------
/cabal.haskell-ci:
--------------------------------------------------------------------------------
1 | branches: master
2 | docspec:  >=7.10
3 | 


--------------------------------------------------------------------------------
/cabal.project:
--------------------------------------------------------------------------------
1 | packages: .
2 | tests:    True
3 | 


--------------------------------------------------------------------------------
/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phadej/kleene/f244cb8cedb52d9b3ac7b7001e6a9b85c62f807d/example.png


--------------------------------------------------------------------------------
/kleene.cabal:
--------------------------------------------------------------------------------
  1 | cabal-version:      2.4
  2 | name:               kleene
  3 | version:            0.2
  4 | synopsis:           Kleene algebra
  5 | category:           Math
  6 | description:
  7 |   Kleene algebra
  8 |   .
  9 |   Think: Regular expressions
 10 |   .
 11 |   Implements ideas from /Regular-expression derivatives re-examined/ by
 12 |   Scott Owens, John Reppy and Aaron Turon
 13 |   <https://doi.org/10.1017/S0956796808007090>
 14 | 
 15 | homepage:           https://github.com/phadej/kleene
 16 | bug-reports:        https://github.com/phadej/kleene/issues
 17 | author:             Oleg Grenrus <oleg.grenrus@iki.fi>
 18 | maintainer:         Oleg Grenrus <oleg.grenrus@iki.fi>
 19 | license:            BSD-3-Clause
 20 | license-file:       LICENSE
 21 | build-type:         Simple
 22 | extra-source-files: CHANGELOG.md
 23 | extra-doc-files:    example.png
 24 | tested-with:
 25 |   GHC ==7.8.4
 26 |    || ==7.10.3
 27 |    || ==8.0.2
 28 |    || ==8.2.2
 29 |    || ==8.4.4
 30 |    || ==8.6.5
 31 |    || ==8.8.4
 32 |    || ==8.10.7
 33 |    || ==9.0.2
 34 |    || ==9.2.8
 35 |    || ==9.4.8
 36 |    || ==9.6.3
 37 |    || ==9.8.1
 38 | 
 39 | source-repository head
 40 |   type:     git
 41 |   location: https://github.com/phadej/kleene
 42 | 
 43 | library
 44 |   default-language:  Haskell2010
 45 |   ghc-options:       -Wall
 46 |   hs-source-dirs:    src
 47 | 
 48 |   -- GHC boot libraries
 49 |   build-depends:
 50 |     , base          >=4.7.0.2  && <4.20
 51 |     , bytestring    >=0.10.4.0 && <0.13
 52 |     , containers    >=0.5.5.1  && <0.7
 53 |     , text          >=1.2.3.0  && <1.3  || >=2.0 && <2.2
 54 |     , transformers  >=0.3.0.0  && <0.7
 55 | 
 56 |   if !impl(ghc >=8.0)
 57 |     build-depends: semigroups >=0.18.5 && <0.21
 58 | 
 59 |   -- Other dependencies
 60 |   build-depends:
 61 |     , attoparsec
 62 |     , base-compat        >=0.10.5   && <0.14
 63 |     , lattices           >=2        && <2.3
 64 |     , MemoTrie           >=0.6.9    && <0.7
 65 |     , QuickCheck         >=2.12.6.1 && <2.15
 66 |     , range-set-list     >=0.1.3    && <0.2
 67 |     , regex-applicative  >=0.3.3    && <0.4
 68 |     , semigroupoids      >=5.3.2    && <6.1
 69 |     , step-function      >=0.2      && <0.3
 70 | 
 71 |   other-extensions:
 72 |     CPP
 73 |     DefaultSignatures
 74 |     DeriveFoldable
 75 |     DeriveFunctor
 76 |     DeriveTraversable
 77 |     FlexibleInstances
 78 |     FunctionalDependencies
 79 |     GADTs
 80 |     GeneralizedNewtypeDeriving
 81 |     OverloadedStrings
 82 |     StandaloneDeriving
 83 |     UndecidableInstances
 84 | 
 85 |   exposed-modules:
 86 |     Kleene
 87 |     Kleene.Classes
 88 |     Kleene.DFA
 89 |     Kleene.Equiv
 90 |     Kleene.ERE
 91 |     Kleene.Functor
 92 |     Kleene.Functor.NonEmpty
 93 |     Kleene.Monad
 94 |     Kleene.RE
 95 | 
 96 |   -- "Internal-ish" modules
 97 |   exposed-modules:
 98 |     Kleene.Internal.Functor
 99 |     Kleene.Internal.Partition
100 |     Kleene.Internal.Pretty
101 |     Kleene.Internal.RE
102 |     Kleene.Internal.Sets
103 | 
104 |   x-docspec-options:
105 |     -XOverloadedStrings --check-properties "--property-variables=c p r s t q"
106 | 
107 | test-suite kleene-utf8
108 |   default-language: Haskell2010
109 |   ghc-options:      -Wall
110 |   hs-source-dirs:   tests
111 |   type:             exitcode-stdio-1.0
112 |   main-is:          kleene-utf8.hs
113 |   build-depends:
114 |     , base
115 |     , bytestring
116 |     , kleene
117 | 
118 |   if !impl(ghc >=8.0)
119 |     build-depends: semigroups >=0.18.5 && <0.21
120 | 
121 |   -- test dependencies
122 |   build-depends:
123 |     , tasty             ^>=1.4.0.3  || ^>=1.5
124 |     , tasty-hunit       ^>=0.10.0.3
125 |     , tasty-quickcheck  ^>=0.10.1.2
126 | 


--------------------------------------------------------------------------------
/src/Kleene.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE Safe #-}
  2 | -- | Kleene algebra.
  3 | --
  4 | -- This package provides means to work with kleene algebra,
  5 | -- at the moment specifically concentrating on regular expressions over 'Char'.
  6 | --
  7 | -- Implements ideas from /Regular-expression derivatives re-examined/ by
  8 | -- Scott Owens, John Reppy and Aaron Turon
  9 | -- <https://doi.org/10.1017/S0956796808007090>.
 10 | --
 11 | -- >>> :set -XOverloadedStrings
 12 | -- >>> import Algebra.Lattice
 13 | -- >>> import Algebra.PartialOrd
 14 | -- >>> import Data.Semigroup
 15 | -- >>> import Kleene.Internal.Pretty (putPretty)
 16 | --
 17 | -- "Kleene.RE" module provides 'RE' type. "Kleene.Classes" module provides various
 18 | -- classes to work with the type. All of that is re-exported from "Kleene" module.
 19 | --
 20 | -- First let's construct a regular expression value:
 21 | --
 22 | -- >>> let re = star "abc" <> "def" <> ("x" \/ "yz") :: RE Char
 23 | -- >>> putPretty re
 24 | -- ^(abc)*def(x|yz)$
 25 | --
 26 | -- We can convert it to 'DFA' (there are 8 states)
 27 | --
 28 | -- >>> let dfa = fromTM re
 29 | -- >>> putPretty dfa
 30 | -- 0 -> \x -> if
 31 | --     | x <= '`'  -> 8
 32 | --     | x <= 'a'  -> 5
 33 | --     | x <= 'c'  -> 8
 34 | --     | x <= 'd'  -> 3
 35 | --     | otherwise -> 8
 36 | -- 1 -> \x -> if
 37 | --     | x <= 'w'  -> 8
 38 | --     | x <= 'x'  -> 6
 39 | --     | x <= 'y'  -> 7
 40 | --     | otherwise -> 8
 41 | -- 2 -> ...
 42 | -- ...
 43 | --
 44 | -- It's also possible to graphically visualise DFAs
 45 | --
 46 | -- @
 47 | -- λ> writeFile "example.dot' ('toDot' dfa)
 48 | -- %  dot -Tpng -oexample.png example.dot
 49 | -- @
 50 | --
 51 | -- ![example.png](example.png)
 52 | --
 53 | -- And we can convert back from 'DFA' to 'RE':
 54 | --
 55 | -- >>> let re' = toKleene dfa :: RE Char
 56 | -- >>> putPretty re'
 57 | -- ^(a(bca)*bcdefx|defx|(a(bca)*bcdefy|defy)z)$
 58 | --
 59 | -- As you see, we don't get what we started with. Yet, these
 60 | -- regular expressions are 'equivalent';
 61 | --
 62 | -- >>> equivalent re re'
 63 | -- True
 64 | --
 65 | -- or using 'Equiv' wrapper
 66 | --
 67 | -- >>> Equiv re == Equiv re'
 68 | -- True
 69 | --
 70 | -- (The paper doesn't outline decision procedure for the equivalence, though
 71 | -- it's right there - seems to be fast enough at least for toy examples like
 72 | -- here).
 73 | --
 74 | -- We can use regular expressions to generate word examples in the language:
 75 | --
 76 | -- >>> import Data.Foldable
 77 | -- >>> import qualified Test.QuickCheck as QC
 78 | -- >>> import Kleene.RE (generate)
 79 | --
 80 | -- >>> traverse_ print $ take 5 $ generate (curry QC.choose) 42 re
 81 | -- "abcdefx"
 82 | -- "abcabcdefx"
 83 | -- "abcdefyz"
 84 | -- "abcabcabcabcabcabcabcdefyz"
 85 | -- "abcabcabcabcabcabcabcdefyz"
 86 | --
 87 | -- In addition to the "normal" regular expressions, there are /extended regular expressions/.
 88 | -- Regular expressions which we can 'complement', and therefore intersect:
 89 | --
 90 | -- >>> let ere = star "aa" /\ star "aaa" :: ERE Char
 91 | -- >>> putPretty ere
 92 | -- ^~(~((aa)*)|~((aaa)*))$
 93 | --
 94 | -- We can convert 'ERE' to 'RE' via 'DFA':
 95 | --
 96 | -- >>> let re'' = toKleene (fromTM ere) :: RE Char
 97 | -- >>> putPretty re''
 98 | -- ^(a(aaaaaa)*aaaaa)?$
 99 | --
100 | -- Machine works own ways, we don't (always) get as pretty results as we'd like:
101 | --
102 | -- >>> equivalent re'' (star "aaaaaa")
103 | -- True
104 | --
105 | -- Another feature of the library is an 'Applciative' 'Functor',
106 | --
107 | -- >>> import Control.Applicative
108 | -- >>> import qualified Kleene.Functor as F
109 | --
110 | -- >>> let f = (,) <$> many (F.char 'x') <* F.few F.anyChar <*> many (F.char 'z')
111 | -- >>> putPretty f
112 | -- ^x*[^]*z*$
113 | --
114 | -- By relying on <regex-applicative http://hackage.haskell.org/package/regex-applicative> library,
115 | -- we can match and /capture/ with regular expression.
116 | --
117 | -- >>> F.match f "xyyzzz"
118 | -- Just ("x","zzz")
119 | --
120 | -- Where with 'RE' we can only get 'True' or 'False':
121 | --
122 | -- >>> match (F.toRE f) "xyyzzz"
123 | -- True
124 | --
125 | -- Which in this case is not even interesting because:
126 | --
127 | -- >>> equivalent (F.toRE f) everything
128 | -- True
129 | --
130 | -- Converting from 'RE' to 'K' is also possible, which may be handy:
131 | --
132 | -- >>> let g = (,) <$> F.few F.anyChar <*> F.fromRE re''
133 | -- >>> putPretty g
134 | -- ^[^]*(a(aaaaaa)*aaaaa)?$
135 | --
136 | -- >>> F.match g (replicate 20 'a')
137 | -- Just ("aa","aaaaaaaaaaaaaaaaaa")
138 | --
139 | -- We got longest divisible by 6 prefix of as. That's because 'F.fromRE'
140 | -- uses 'many' for 'star'.
141 | --
142 | module Kleene (
143 |     -- * Regular expressions
144 |     RE,
145 |     ERE,
146 | 
147 |     -- * Equivalence (and partial order)
148 |     Equiv (..),
149 | 
150 |     -- * Deterministic finite automaton
151 |     DFA (..),
152 |     fromTM,
153 |     fromTMEquiv,
154 |     toKleene,
155 |     toDot,
156 | 
157 |     -- * Classes
158 |     --
159 |     -- | Most operations are defined in following type-classes.
160 |     --
161 |     -- See "Kleene.RE" module for a specific version with examples.
162 |     Kleene (..),
163 |     CharKleene (..),
164 |     FiniteKleene (..),
165 |     Derivate (..),
166 |     Match (..),
167 |     Equivalent (..),
168 |     TransitionMap (..),
169 |     Complement (..),
170 |     ToLatin1 (..),
171 | 
172 |     -- * Functor
173 |     --
174 |     -- | Only the type is exported so it can be referred to.
175 |     --
176 |     -- See "Kleene.Functor" for operations.
177 |     K,
178 |     ) where
179 | 
180 | import Kleene.Classes
181 | import Kleene.DFA     (DFA (..), fromTM, fromTMEquiv, toDot, toKleene)
182 | import Kleene.Equiv
183 | import Kleene.ERE     (ERE)
184 | import Kleene.Functor (K)
185 | import Kleene.RE      (RE)
186 | 


--------------------------------------------------------------------------------
/src/Kleene/Classes.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DefaultSignatures      #-}
  2 | {-# LANGUAGE FunctionalDependencies #-}
  3 | {-# LANGUAGE GADTs                  #-}
  4 | module Kleene.Classes where
  5 | 
  6 | import Prelude ()
  7 | import Prelude.Compat
  8 | 
  9 | import Data.Char                          (ord)
 10 | import Data.Foldable                      (toList)
 11 | import Data.Function.Step.Discrete.Closed (SF)
 12 | import Data.Map                           (Map)
 13 | import Data.Maybe                         (mapMaybe)
 14 | import Data.RangeSet.Map                  (RSet)
 15 | import Data.Word                          (Word8)
 16 | 
 17 | import qualified Data.ByteString   as BS
 18 | import qualified Data.RangeSet.Map as RSet
 19 | 
 20 | import Kleene.Internal.Sets (dotRSet)
 21 | 
 22 | -- | Kleene algebra.
 23 | --
 24 | -- If 'k' is 'Monoid' it's expected that @'appends' = 'mappend'@;
 25 | -- if 'k' is 'Algebra.Lattice.Lattice' it's expected that @'unions' = 'Algebra.Lattice.joins'@.
 26 | --
 27 | -- [Wikipedia: Kleene Algebra](https://en.wikipedia.org/wiki/Kleene_algebra).
 28 | --
 29 | class Kleene k where
 30 |     -- | Empty regex. Doesn't accept anything.
 31 |     empty :: k
 32 | 
 33 |     -- | Empty string. /Note:/ different than 'empty'.
 34 |     eps :: k
 35 | 
 36 |     -- | Concatenation.
 37 |     appends :: [k] -> k
 38 | 
 39 |     -- | Union.
 40 |     unions :: [k] -> k
 41 | 
 42 |     -- | Kleene star.
 43 |     star :: k -> k
 44 | 
 45 | class Kleene k => CharKleene c k | k -> c where
 46 |     -- | Single character
 47 |     char :: c -> k
 48 | 
 49 |     string :: [c] -> k
 50 |     string = appends . map char
 51 | 
 52 | -- | One of the characters.
 53 | oneof :: (CharKleene c k, Foldable f) => f c -> k
 54 | oneof = unions . map char . toList
 55 | 
 56 | class CharKleene c k => FiniteKleene c k | k -> c where
 57 |     -- | Everything. \(\Sigma^\star\).
 58 |     everything :: k
 59 |     everything = star anyChar
 60 | 
 61 |     -- | @'charRange' 'a' 'z' = ^[a-z]$@.
 62 |     charRange :: c -> c -> k
 63 | 
 64 |     -- | Generalisation of 'charRange'.
 65 |     fromRSet :: RSet c -> k
 66 | 
 67 |     -- | @.@ Every character except new line @\\n@.
 68 |     dot :: c ~ Char => k
 69 |     dot = fromRSet dotRSet
 70 | 
 71 |     -- | Any character. /Note:/ different than 'dot'!
 72 |     anyChar :: k
 73 | 
 74 |     notChar :: c -> k
 75 |     default notChar :: (Ord c, Enum c, Bounded c) => c -> k
 76 |     notChar = fromRSet . RSet.complement . RSet.singleton
 77 | 
 78 | class Derivate c k | k -> c where
 79 |     -- | Does language contain an empty string?
 80 |     nullable :: k -> Bool
 81 | 
 82 |     -- | Derivative of a language.
 83 |     derivate :: c -> k -> k
 84 | 
 85 | -- | An @f@ can be used to match on the input.
 86 | class Match c k | k -> c where
 87 |     match :: k -> [c] -> Bool
 88 | 
 89 |     match8 :: c ~ Word8 => k -> BS.ByteString -> Bool
 90 |     match8 k = match k . BS.unpack
 91 | 
 92 | -- | Equivalence induced by 'Match'.
 93 | --
 94 | -- /Law:/
 95 | --
 96 | -- @
 97 | -- 'equivalent' re1 re2 <=> forall s. 'match' re1 s == 'match' re1 s
 98 | -- @
 99 | --
100 | class Match c k => Equivalent c k | k -> c  where
101 |     equivalent :: k -> k -> Bool
102 | 
103 | -- | Transition map.
104 | class Derivate c k => TransitionMap c k | k -> c where
105 |     transitionMap :: k -> Map k (SF c k)
106 | 
107 | -- | Complement of the language.
108 | --
109 | -- /Law:/
110 | --
111 | -- @
112 | -- 'match' ('complement' f) xs = 'not' ('match' f) xs
113 | -- @
114 | class Complement c k | k -> c where
115 |     complement :: k -> k
116 | 
117 | class ToLatin1 k where
118 |     toLatin1 :: k Char -> k Word8
119 | 
120 | instance ToLatin1 RSet where
121 |     toLatin1 = RSet.fromRangeList . mapMaybe f . RSet.toRangeList where
122 |         f :: (Char, Char) -> Maybe (Word8, Word8)
123 |         f (a, b)
124 |             | ord a >= 256 = Nothing
125 |             | otherwise    = Just (fromIntegral (ord a), fromIntegral (min 255 (ord b)))
126 | 


--------------------------------------------------------------------------------
/src/Kleene/DFA.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns          #-}
  2 | {-# LANGUAGE FlexibleInstances     #-}
  3 | {-# LANGUAGE GADTs                 #-}
  4 | {-# LANGUAGE MultiParamTypeClasses #-}
  5 | {-# LANGUAGE Safe                  #-}
  6 | {-# LANGUAGE ScopedTypeVariables   #-}
  7 | {-# OPTIONS_GHC -fno-warn-orphans #-}
  8 | module Kleene.DFA (
  9 |     DFA (..),
 10 |     -- * Conversions
 11 |     fromRE,
 12 |     toRE,
 13 |     fromERE,
 14 |     toERE,
 15 |     fromTM,
 16 |     fromTMEquiv,
 17 |     toKleene,
 18 |     toDot,
 19 |     toDot',
 20 |     ) where
 21 | 
 22 | import Prelude ()
 23 | import Prelude.Compat
 24 | 
 25 | import Algebra.Lattice
 26 |        (BoundedJoinSemiLattice (..), BoundedMeetSemiLattice (..), Lattice (..))
 27 | import Data.IntMap       (IntMap)
 28 | import Data.IntSet       (IntSet)
 29 | import Data.List         (intercalate)
 30 | import Data.Map          (Map)
 31 | import Data.Maybe        (fromMaybe)
 32 | import Data.RangeSet.Map (RSet)
 33 | 
 34 | import qualified Data.ByteString                    as BS
 35 | import qualified Data.Function.Step.Discrete.Closed as SF
 36 | import qualified Data.IntMap                        as IntMap
 37 | import qualified Data.IntSet                        as IntSet
 38 | import qualified Data.Map                           as Map
 39 | import qualified Data.MemoTrie                      as MT
 40 | import qualified Data.RangeSet.Map                  as RSet
 41 | 
 42 | import           Kleene.Classes
 43 | import qualified Kleene.ERE             as ERE
 44 | import           Kleene.Internal.Pretty
 45 | import qualified Kleene.Internal.RE     as RE
 46 | 
 47 | -- $setup
 48 | -- >>> :set -XOverloadedStrings
 49 | -- >>> import Data.Foldable (traverse_)
 50 | -- >>> import Data.Semigroup (Semigroup (..))
 51 | -- >>> import Algebra.Lattice ((/\))
 52 | -- >>> import Kleene.Classes
 53 | -- >>> import Kleene.Internal.Pretty (putPretty)
 54 | -- >>> import Test.QuickCheck ((===))
 55 | -- >>> import qualified Test.QuickCheck as QC
 56 | -- >>> import qualified Kleene.RE as RE
 57 | -- >>> import qualified Kleene.ERE as ERE
 58 | -- >>> import qualified Data.RangeSet.Map as RSet
 59 | --
 60 | -- >>> newtype Smaller a = Smaller a deriving (Show)
 61 | -- >>> let intLog2 = (`div` 10)
 62 | -- >>> instance QC.Arbitrary a => QC.Arbitrary (Smaller a) where arbitrary = QC.scale intLog2 QC.arbitrary; shrink (Smaller a) = map Smaller (QC.shrink a)
 63 | --
 64 | -- >>> let asREChar :: RE.RE Char -> RE.RE Char; asREChar = id
 65 | 
 66 | -------------------------------------------------------------------------------
 67 | -- DFA
 68 | -------------------------------------------------------------------------------
 69 | 
 70 | -- | Deterministic finite automaton.
 71 | --
 72 | -- A deterministic finite automaton (DFA) over an alphabet \(\Sigma\) (type
 73 | -- variable @c@) is 4-tuple \(Q\), \(q_0\) , \(F\), \(\delta\), where
 74 | --
 75 | -- * \(Q\) is a finite set of states (subset of 's'),
 76 | -- * \(q_0 \in Q\) is the distinguised start state ('dfaInitial'),
 77 | -- * \(F \subset Q\) is a set of final (or  accepting) states ('dfaAcceptable'), and
 78 | -- * \(\delta : Q \times \Sigma \to Q\) is a function called the state
 79 | -- transition function ('dfaTransition').
 80 | --
 81 | data DFA c = DFA
 82 |     { dfaTransition   :: !(IntMap (SF.SF c Int))
 83 |       -- ^ transition function
 84 |     , dfaInitial      :: !Int
 85 |       -- ^ initial state
 86 |     , dfaAcceptable   :: !IntSet
 87 |       -- ^ accept states
 88 |     , dfaBlackholes   :: !IntSet
 89 |       -- ^ states we cannot escape
 90 |     }
 91 |   deriving Show
 92 | 
 93 | -------------------------------------------------------------------------------
 94 | -- Construction
 95 | -------------------------------------------------------------------------------
 96 | 
 97 | -- | Convert 'RE.RE' to 'DFA'.
 98 | --
 99 | -- >>> putPretty $ fromRE $ RE.star "abc"
100 | -- 0+ -> \x -> if
101 | --     | x <= '`'  -> 3
102 | --     | x <= 'a'  -> 2
103 | --     | otherwise -> 3
104 | -- 1 -> \x -> if
105 | --     | x <= 'b'  -> 3
106 | --     | x <= 'c'  -> 0
107 | --     | otherwise -> 3
108 | -- 2 -> \x -> if
109 | --     | x <= 'a'  -> 3
110 | --     | x <= 'b'  -> 1
111 | --     | otherwise -> 3
112 | -- 3 -> \_ -> 3 -- black hole
113 | --
114 | -- Everything and nothing result in blackholes:
115 | --
116 | -- >>> traverse_ (putPretty . fromRE) [RE.empty, RE.star RE.anyChar]
117 | -- 0 -> \_ -> 0 -- black hole
118 | -- 0+ -> \_ -> 0 -- black hole
119 | --
120 | -- Character ranges are effecient:
121 | --
122 | -- >>> putPretty $ fromRE $ RE.charRange 'a' 'z'
123 | -- 0 -> \x -> if
124 | --     | x <= '`'  -> 2
125 | --     | x <= 'z'  -> 1
126 | --     | otherwise -> 2
127 | -- 1+ -> \_ -> 2
128 | -- 2 -> \_ -> 2 -- black hole
129 | --
130 | -- An example with two blackholes:
131 | --
132 | -- >>> putPretty $ fromRE $ "c" <> RE.star RE.anyChar
133 | -- 0 -> \x -> if
134 | --     | x <= 'b'  -> 2
135 | --     | x <= 'c'  -> 1
136 | --     | otherwise -> 2
137 | -- 1+ -> \_ -> 1 -- black hole
138 | -- 2 -> \_ -> 2 -- black hole
139 | --
140 | fromRE :: forall c. (Ord c, Enum c, Bounded c) => RE.RE c -> DFA c
141 | fromRE = fromTM
142 | 
143 | -- | Convert 'ERE.ERE' to 'DFA'.
144 | --
145 | -- We don't always generate a minimal automata:
146 | --
147 | -- >>> putPretty $ fromERE $ "a" /\ "b"
148 | -- 0 -> \_ -> 1
149 | -- 1 -> \_ -> 1 -- black hole
150 | --
151 | -- Compare this to a 'complement' example
152 | --
153 | -- Using 'fromTMEquiv', we can get a minimal automaton, for the cost of higher
154 | -- complexity (slow!).
155 | --
156 | -- >>> putPretty $ fromTMEquiv $ ("a" /\ "b" :: ERE.ERE Char)
157 | -- 0 -> \_ -> 0 -- black hole
158 | --
159 | -- >>> putPretty $ fromERE $ complement $ star "abc"
160 | -- 0 -> \x -> if
161 | --     | x <= '`'  -> 3
162 | --     | x <= 'a'  -> 2
163 | --     | otherwise -> 3
164 | -- 1+ -> \x -> if
165 | --     | x <= 'b'  -> 3
166 | --     | x <= 'c'  -> 0
167 | --     | otherwise -> 3
168 | -- 2+ -> \x -> if
169 | --     | x <= 'a'  -> 3
170 | --     | x <= 'b'  -> 1
171 | --     | otherwise -> 3
172 | -- 3+ -> \_ -> 3 -- black hole
173 | --
174 | fromERE :: forall c. (Ord c, Enum c, Bounded c) => ERE.ERE c -> DFA c
175 | fromERE = fromTM
176 | 
177 | -- | Create from 'TransitionMap'.
178 | --
179 | -- See 'fromRE' for a specific example.
180 | fromTM :: forall k c. (Ord k, Ord c, TransitionMap c k) => k -> DFA c
181 | fromTM = fromTMImpl Nothing
182 | 
183 | -- | Create from 'TransitonMap' minimising states with 'Equivalent'.
184 | --
185 | -- See 'fromERE' for an example.
186 | --
187 | fromTMEquiv :: forall k c. (Ord k, Ord c, TransitionMap c k, Equivalent c k) => k -> DFA c
188 | fromTMEquiv = fromTMImpl (Just equivalent)
189 | 
190 | fromTMImpl :: forall k c. (Ord k, Ord c, TransitionMap c k)
191 |     => Maybe (k ->  k -> Bool)
192 |     -> k
193 |     -> DFA c
194 | fromTMImpl mequiv re = DFA
195 |     { dfaTransition = transition
196 |     , dfaInitial    = 0
197 |     , dfaAcceptable = IntSet.fromList
198 |         [ i
199 |         | (re', i) <- Map.toList lookupMap
200 |         , nullable re'
201 |         ]
202 |     , dfaBlackholes = blackholes
203 |     }
204 |   where
205 |     transition = IntMap.fromList
206 |         [ (i, js)
207 |         | (re', pm) <- Map.toList tm
208 |         , let i  = fromMaybe 0 $ Map.lookup re' lookupMap
209 |         , let js = SF.normalise $ fmap (\re'' -> fromMaybe 0 $ Map.lookup re'' lookupMap) pm
210 |         ]
211 | 
212 |     blackholes = IntSet.fromList
213 |         [ i
214 |         | (i, sf) <- IntMap.toList transition
215 |         , sf == pure i
216 |         ]
217 | 
218 |     tm = transitionMap re
219 | 
220 |     -- reversing makes error state go last, usually
221 |     lookupMap :: Map k Int
222 |     lookupMap = makeLookup 1 lookupMap' (reverse $ Map.toList $ Map.delete re tm)
223 | 
224 |     lookupMap' :: Map k Int
225 |     lookupMap' = case Map.lookup re tm of
226 |         Nothing -> Map.empty
227 |         Just _  -> Map.singleton re 0
228 | 
229 |     makeLookup :: Int -> Map k Int -> [(k, b)] -> Map k Int
230 |     makeLookup = maybe makeLookupEq makeLookupEquiv mequiv
231 | 
232 |     makeLookupEq :: Int -> Map k Int -> [(k, b)] -> Map k Int
233 |     makeLookupEq !_ !acc []            = acc
234 |     makeLookupEq !n acc ((x, _) : xs) = makeLookup (n + 1) (Map.insert x n acc) xs
235 | 
236 |     -- this differs from makeLookupEq. We don't insert new states right away,
237 |     -- but check whether equivalent state is already in the map.
238 |     --
239 |     -- This causes n^2 of exp m operations, where n = number of states and
240 |     -- m size of @k@.
241 |     makeLookupEquiv :: (k -> k -> Bool) ->  Int -> Map k Int -> [(k, b)] -> Map k Int
242 |     makeLookupEquiv _  !_ !acc []           = acc
243 |     makeLookupEquiv eq !n acc ((x, _) : xs) = case ys of
244 |         []           -> makeLookup (n + 1) (Map.insert x n acc) xs
245 |         ((_, i) : _) -> makeLookup n       (Map.insert x i acc) xs
246 |       where
247 |         ys = [ p | p@(y, _) <- Map.toList acc, eq x y ]
248 | 
249 | -------------------------------------------------------------------------------
250 | -- Destruction
251 | -------------------------------------------------------------------------------
252 | 
253 | -- | Convert 'DFA' to 'RE.RE'.
254 | --
255 | -- >>> putPretty $ toRE $ fromRE "foobar"
256 | -- ^foobar$
257 | --
258 | -- For 'RE.string' regular expressions, @'toRE' . 'fromRE' = 'id'@:
259 | --
260 | -- prop> let s' = take 5 s in RE.string (s' :: String) === toRE (fromRE (RE.string s'))
261 | --
262 | -- But in general it isn't:
263 | --
264 | -- >>> let aToZ = RE.star $ RE.charRange 'a' 'z'
265 | -- >>> traverse_ putPretty [aToZ, toRE $ fromRE aToZ]
266 | -- ^[a-z]*$
267 | -- ^([a-z]|[a-z]?[a-z]*[a-z]?)?$
268 | --
269 | -- @
270 | -- not-prop> (re :: RE.RE Char) === toRE (fromRE re)
271 | -- @
272 | --
273 | -- However, they are 'RE.equivalent':
274 | --
275 | -- >>> RE.equivalent aToZ (toRE (fromRE aToZ))
276 | -- True
277 | --
278 | -- And so are others
279 | --
280 | -- >>> all (\re -> RE.equivalent re (toRE (fromRE re))) [RE.star "a", RE.star "ab"]
281 | -- True
282 | --
283 | -- @
284 | -- expensive-prop> RE.equivalent re (toRE (fromRE (re :: RE.RE Char)))
285 | -- @
286 | --
287 | -- Note, that @'toRE' . 'fromRE'@ can, and usually makes regexp unrecognisable:
288 | --
289 | -- >>> putPretty $ toRE $ fromRE $ RE.star "ab"
290 | -- ^(a(ba)*b)?$
291 | --
292 | -- We can 'complement' DFA, therefore we can complement 'RE.RE'.
293 | -- For example. regular expression matching string containing an @a@:
294 | --
295 | -- >>> let withA = RE.star RE.anyChar <> "a" <> RE.star RE.anyChar
296 | -- >>> let withoutA = toRE $ complement $ fromRE withA
297 | -- >>> putPretty withoutA
298 | -- ^([^a]|[^a]?[^a]*[^a]?)?$
299 | --
300 | -- >>> let withoutA' = RE.star $ RE.REChars $ RSet.complement $ RSet.singleton 'a'
301 | -- >>> putPretty withoutA'
302 | -- ^[^a]*$
303 | --
304 | -- >>> RE.equivalent withoutA withoutA'
305 | -- True
306 | --
307 | -- Quite small, for example 2 state DFAs can result in big regular expressions:
308 | --
309 | -- >>> putPretty $ toRE $ complement $ fromRE $ star "ab"
310 | -- ^([^]|a(ba)*(ba)?|a(ba)*([^b]|b[^a])|([^a]|a(ba)*([^b]|b[^a]))[^]*[^]?)$
311 | --
312 | -- We can use @'toRE' . 'fromERE'@ to convert 'ERE.ERE' to 'RE.RE':
313 | --
314 | -- >>> putPretty $ toRE $ fromERE $ complement $ star "ab"
315 | -- ^([^]|a(ba)*(ba)?|a(ba)*([^b]|b[^a])|([^a]|a(ba)*([^b]|b[^a]))[^]*[^]?)$
316 | --
317 | -- >>> putPretty $ toRE $ fromERE $ "a" /\ "b"
318 | -- ^[]$
319 | --
320 | -- See <https://mathoverflow.net/questions/45149/can-regular-expressions-be-made-unambiguous>
321 | -- for the description of the algorithm used.
322 | --
323 | toRE :: (Ord c, Enum c, Bounded c) => DFA c -> RE.RE c
324 | toRE = toKleene
325 | 
326 | -- | Convert 'DFA' to 'ERE.ERE'.
327 | toERE :: (Ord c, Enum c, Bounded c) => DFA c -> ERE.ERE c
328 | toERE = toKleene
329 | 
330 | -- | Convert to any 'Kleene'.
331 | --
332 | -- See 'toRE' for a specific example.
333 | --
334 | toKleene :: forall k c. (Ord c, Enum c, Bounded c, FiniteKleene c k) => DFA c -> k
335 | toKleene (DFA tr ini acc _) = unions
336 |     [ re ini j maxN
337 |     | j <- IntSet.toList acc
338 |     ]
339 |   where
340 |     maxN | IntMap.null tr = 1
341 |          | otherwise      = succ $ fst $ IntMap.findMax tr
342 | 
343 |     {-
344 |     -- this is useful for debug
345 |     table =
346 |       [ show i ++ " " ++ show j ++ " " ++ show k ++ " = " ++ pretty (re i j k)
347 |       | k <- [0..pred maxN]
348 |       , i <- [0..pred maxN]
349 |       , j <- [0..pred maxN]
350 |       ]
351 |     -}
352 | 
353 |     re i j k = MT.memo re' (i, j, k)
354 |     re' (i, j, k)
355 |         | k <= 0    = if i == j then unions [eps, r] else r
356 |         | otherwise = unions [re i j k', appends [re i k' k', star (re k' k' k'), re k' j k']]
357 |       where
358 |         r = maybe empty fromRSet $ Map.lookup (i, j) re0map
359 |         k' = k - 1
360 | 
361 |     re0map :: Map (Int, Int) (RSet c)
362 |     re0map = Map.fromListWith RSet.union
363 |         [ ((i, j), RSet.singletonRange (lo, hi))
364 |         | (i, tr') <- IntMap.toList tr
365 |         , (lo, hi, j) <- toPieces tr'
366 |         ]
367 | 
368 | toPieces :: (Enum a, Bounded a, Ord a) => SF.SF a b -> [(a, a, b)]
369 | toPieces (SF.SF m v)
370 |     | maxBound `Map.member` m = toPieces' m
371 |     | otherwise               = toPieces' (Map.insert maxBound v m)
372 | 
373 | toPieces' :: (Enum a, Bounded a) => Map a b -> [(a, a, b)]
374 | toPieces' = go minBound . Map.toList where
375 |     go _lo []            = []
376 |     go  lo ((k, v) : kv) = (lo, k, v) : go (succ k) kv
377 | 
378 | -------------------------------------------------------------------------------
379 | -- Operations
380 | -------------------------------------------------------------------------------
381 | 
382 | -- | Run 'DFA' on the input.
383 | --
384 | -- Because we have analysed a language, in some cases we can determine a result
385 | -- without traversing all of the input.
386 | -- That's not the cases with 'RE.RE' 'match'.
387 | --
388 | -- >>> let dfa = fromRE $ RE.star "abc"
389 | -- >>> map (match dfa) ["", "abc", "abcabc", "aa", 'a' : 'a' : undefined]
390 | -- [True,True,True,False,False]
391 | --
392 | -- Holds:
393 | --
394 | -- @
395 | -- 'match' ('fromRE' re) xs == 'match' re xs
396 | -- @
397 | --
398 | -- prop> all (match (fromRE r)) $ take 10 $ RE.generate (curry QC.choose) 42 (r :: RE.RE Char)
399 | --
400 | instance Ord c => Match c (DFA c) where
401 |     match (DFA tr i acc bh) = go i where
402 |         go !s _ | IntSet.member s bh = IntSet.member s acc
403 |         go !s []                 = IntSet.member s acc
404 |         go !s (c : cs)           = case IntMap.lookup s tr of
405 |             Nothing -> False
406 |             Just sf -> go (sf SF.! c) cs
407 | 
408 |     match8 (DFA tr i acc bh) = go i where
409 |         go !s !_ | IntSet.member s bh = IntSet.member s acc
410 |         go !s bs = case BS.uncons bs of
411 |             Nothing      -> IntSet.member s acc
412 |             Just (c, cs) -> case IntMap.lookup s tr of
413 |                 Nothing -> False
414 |                 Just sf -> go (sf SF.! c) cs
415 | 
416 | -- | Complement DFA.
417 | --
418 | -- Complement of 'DFA' is way easier than of 'RE.RE': complement accept states.
419 | --
420 | -- >>> let dfa = complement $ fromRE $ RE.star "abc"
421 | -- >>> putPretty dfa
422 | -- 0 -> \x -> if
423 | --     | x <= '`'  -> 3
424 | --     | x <= 'a'  -> 2
425 | --     | otherwise -> 3
426 | -- 1+ -> \x -> if
427 | --     | x <= 'b'  -> 3
428 | --     | x <= 'c'  -> 0
429 | --     | otherwise -> 3
430 | -- 2+ -> \x -> if
431 | --     | x <= 'a'  -> 3
432 | --     | x <= 'b'  -> 1
433 | --     | otherwise -> 3
434 | -- 3+ -> \_ -> 3 -- black hole
435 | --
436 | -- >>> map (match dfa) ["", "abc", "abcabc", "aa","abca", 'a' : 'a' : undefined]
437 | -- [False,False,False,True,True,True]
438 | --
439 | instance Complement c (DFA c) where
440 |     complement (DFA tr ini acc bh) = DFA tr ini acc' bh where
441 |         acc' = IntSet.difference (IntMap.keysSet tr) acc
442 | 
443 | instance Ord c => Derivate c (DFA c) where
444 |     nullable (DFA _tr ini acc _bh) = IntSet.member ini acc
445 | 
446 |     derivate c (DFA tr ini acc bh) = DFA tr ini' acc bh where
447 |         ini' = case IntMap.lookup ini tr of
448 |             Nothing -> ini -- in error case let's just stay in the same state.
449 |             Just sf -> sf SF.! c
450 | 
451 | -------------------------------------------------------------------------------
452 | -- toDot
453 | -------------------------------------------------------------------------------
454 | 
455 | -- | Get Graphviz dot-code of DFA.
456 | --
457 | -- >>> let dfa = fromRE $ RE.star "abc"
458 | -- >>> putStr $ toDot dfa
459 | -- digraph dfa {
460 | -- rankdir=LR;
461 | -- // states
462 | -- "0" [shape=doublecircle];
463 | -- "1" [shape=circle];
464 | -- "2" [shape=circle];
465 | -- // initial state
466 | -- "" [shape=none];
467 | -- "" -> "0";
468 | -- // transitions
469 | -- "0" -> "2"[label="a"]
470 | -- "1" -> "0"[label="c"]
471 | -- "2" -> "1"[label="b"]
472 | -- }
473 | --
474 | toDot :: DFA Char -> String
475 | toDot = toDot' show pure
476 | 
477 | -- | More flexible version of 'toDot'.
478 | toDot' :: (Ord c, Enum c, Bounded c) => (Int -> String) -> (c -> String) -> DFA c -> String
479 | toDot' showS showC (DFA tr ini acc bh)
480 |     = showString "digraph dfa {\n"
481 |     . showString "rankdir=LR;\n"
482 |     . showString "// states\n"
483 |     . showStates
484 |     . showString "// initial state\n"
485 |     . showInitial
486 |     . showString "// transitions\n"
487 |     . showTransitions
488 |     . showString "}\n"
489 |     $ ""
490 |   where
491 |     showStates  = foldr (.) id
492 |         [ showState i
493 |         | i <- IntMap.keys tr
494 |         , IntSet.member i acc || IntSet.notMember i bh
495 |         ]
496 |     showState s = showS' s . shape where
497 |         shape
498 |             | IntSet.member s acc = showString " [shape=doublecircle];\n"
499 |             | otherwise        = showString " [shape=circle];\n"
500 | 
501 |     showInitial
502 |         = showString "\"\" [shape=none];\n"
503 |         . showString "\"\" -> "
504 |         . showS' ini
505 |         . showString ";\n"
506 | 
507 |     showTransitions = foldr (.) id
508 |         [ showS' i
509 |         . showString " -> "
510 |         . showS' j
511 |         . showString "[label="
512 |         . label
513 |                 . showString "]\n"
514 |         | (i, sf) <- IntMap.toList tr
515 |         , (lo, hi, j) <- toPieces sf
516 |         , IntSet.member j acc || IntSet.notMember j bh
517 |         , let label
518 |                 | lo == hi
519 |                     = shows (showC lo)
520 |                 | lo == minBound && hi == maxBound
521 |                     = shows ("-any" :: String)
522 |                 | otherwise
523 |                     = shows (showC lo ++ "-" ++ showC hi)
524 |         ]
525 | 
526 |     showS' = shows . showS
527 | 
528 | -------------------------------------------------------------------------------
529 | -- Orphans
530 | -------------------------------------------------------------------------------
531 | 
532 | -- | __WARNING__: The '/\' is inefficient, it actually computes the conjunction:
533 | --
534 | -- >>> putPretty $ asREChar $ "a" /\ "b"
535 | -- ^[]$
536 | --
537 | -- >>> putPretty $ asREChar $ "a" /\ star "a"
538 | -- ^a$
539 | --
540 | -- >>> putPretty $ asREChar $ star "aa" /\ star "aaa"
541 | -- ^(a(aaaaaa)*aaaaa)?$
542 | --
543 | instance (Ord c, Enum c, Bounded c) => Lattice (RE.RE c) where
544 |     r /\ r' = toRE $ fromERE $ ERE.fromRE r /\ ERE.fromRE r'
545 |     r \/ r' = unions [r, r']
546 | 
547 | instance (Ord c, Enum c, Bounded c) => BoundedJoinSemiLattice (RE.RE c) where
548 |     bottom = empty
549 | 
550 | instance (Ord c, Enum c, Bounded c) => BoundedMeetSemiLattice (RE.RE c) where
551 |     top = RE.REStar (RE.REChars RSet.full)
552 | 
553 | instance (Ord c, Enum c, Bounded c) => Complement c (RE.RE c) where
554 |     complement = toRE . complement . fromRE
555 | 
556 | -------------------------------------------------------------------------------
557 | -- Debug
558 | -------------------------------------------------------------------------------
559 | 
560 | instance Show c => Pretty (DFA c) where
561 |     pretty dfa = intercalate "\n"
562 |         [ show i ++ acc ++ " -> " ++ SF.showSF sf ++ bh
563 |         | (i, sf) <- IntMap.toList (dfaTransition dfa)
564 |         , let acc = if IntSet.member i (dfaAcceptable dfa) then "+" else ""
565 |         , let bh = if IntSet.member i $ dfaBlackholes dfa then " -- black hole" else ""
566 |         ]
567 | 


--------------------------------------------------------------------------------
/src/Kleene/ERE.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns          #-}
  2 | {-# LANGUAGE FlexibleInstances     #-}
  3 | {-# LANGUAGE GADTs                 #-}
  4 | {-# LANGUAGE MultiParamTypeClasses #-}
  5 | {-# LANGUAGE Safe                  #-}
  6 | {-# LANGUAGE ScopedTypeVariables   #-}
  7 | module Kleene.ERE (
  8 |     ERE (..),
  9 |     -- * Construction
 10 |     --
 11 |     -- | Binary operators are
 12 |     --
 13 |     -- * '<>' for append
 14 |     -- * '\/' for union
 15 |     -- * '/\' for intersection
 16 |     --
 17 |     empty,
 18 |     eps,
 19 |     everything,
 20 |     char,
 21 |     charRange,
 22 |     anyChar,
 23 |     appends,
 24 |     unions,
 25 |     intersections,
 26 |     star,
 27 |     string,
 28 |     complement,
 29 |     -- * Derivative
 30 |     nullable,
 31 |     derivate,
 32 |     -- * Conversion
 33 |     fromRE,
 34 |     -- * Transition map
 35 |     transitionMap,
 36 |     leadingChars,
 37 |     -- * Equivalence
 38 |     equivalent,
 39 |     -- * Other
 40 |     isEmpty,
 41 |     isEverything,
 42 |     ) where
 43 | 
 44 | import Data.Semigroup (Semigroup (..))
 45 | import Prelude ()
 46 | import Prelude.Compat
 47 | 
 48 | import Algebra.Lattice
 49 |        (BoundedJoinSemiLattice (..), BoundedMeetSemiLattice (..), Lattice (..))
 50 | import Control.Applicative (liftA2)
 51 | import Data.Foldable       (toList)
 52 | import Data.List           (foldl')
 53 | import Data.Map            (Map)
 54 | import Data.RangeSet.Map   (RSet)
 55 | import Data.Set            (Set)
 56 | import Data.String         (IsString (..))
 57 | 
 58 | import qualified Data.Function.Step.Discrete.Closed as SF
 59 | import qualified Data.Map                           as Map
 60 | import qualified Data.RangeSet.Map                  as RSet
 61 | import qualified Data.Set                           as Set
 62 | import qualified Test.QuickCheck                    as QC
 63 | 
 64 | import qualified Kleene.Classes            as C
 65 | import qualified Kleene.Internal.Partition as P
 66 | import           Kleene.Internal.Pretty
 67 | import qualified Kleene.Internal.RE        as RE
 68 | 
 69 | -- $setup
 70 | -- >>> import Algebra.Lattice ((/\), (\/), top, bottom)
 71 | -- >>> import Data.Semigroup (Semigroup (..))
 72 | -- >>> import Control.Monad (void)
 73 | -- >>> import Data.Foldable (traverse_)
 74 | -- >>> import Data.List (sort)
 75 | -- >>> import Test.QuickCheck ((===))
 76 | -- >>> import qualified Test.QuickCheck as QC
 77 | -- >>> import qualified Data.Map as Map
 78 | -- >>> import qualified Data.Function.Step.Discrete.Closed as SF
 79 | --
 80 | -- >>> import Kleene.Classes (match)
 81 | -- >>> import Kleene.Internal.Pretty (putPretty, pretty)
 82 | -- >>> let asEREChar :: ERE Char -> ERE Char; asEREChar = id
 83 | 
 84 | -- | Extended regular expression
 85 | --
 86 | -- It's both, /Kleene/ and /Boolean/ algebra. (If we add only intersections, it
 87 | -- wouldn't be /Boolean/).
 88 | --
 89 | -- /Note:/ we don't have special constructor for intersections.
 90 | -- We use de Morgan formula \(a \land b = \neg (\neg a \lor \neg b)\).
 91 | --
 92 | -- >>> putPretty $ asEREChar $ "a" /\ "b"
 93 | -- ^~(~a|~b)$
 94 | --
 95 | -- There is no generator, as 'intersections' makes it hard.
 96 | --
 97 | data ERE c
 98 |     = EREChars (RSet c)                -- ^ Single character
 99 |     | EREAppend [ERE c]                -- ^ Concatenation
100 |     | EREUnion (RSet c) (Set (ERE c))  -- ^ Union
101 |     | EREStar (ERE c)                  -- ^ Kleene star
102 |     | ERENot (ERE c)                   -- ^ Complement
103 |   deriving (Eq, Ord, Show)
104 | 
105 | -------------------------------------------------------------------------------
106 | -- fromRE
107 | -------------------------------------------------------------------------------
108 | 
109 | -- | Convert from ordinary regular expression, 'RE.RE'.
110 | --
111 | fromRE :: Ord c => RE.RE c -> ERE c
112 | fromRE (RE.REChars rs)   = EREChars rs
113 | fromRE (RE.REAppend rs)  = EREAppend (map fromRE rs)
114 | fromRE (RE.REUnion r rs) = EREUnion r (Set.map fromRE rs)
115 | fromRE (RE.REStar r)     = EREStar (fromRE r)
116 | 
117 | -------------------------------------------------------------------------------
118 | -- Smart constructor
119 | -------------------------------------------------------------------------------
120 | 
121 | -- | Empty regex. Doesn't accept anything.
122 | --
123 | -- >>> putPretty (empty :: ERE Char)
124 | -- ^[]$
125 | --
126 | -- >>> putPretty (bottom :: ERE Char)
127 | -- ^[]$
128 | --
129 | -- prop> match (empty :: ERE Char) (s :: String) === False
130 | --
131 | empty :: ERE c
132 | empty = EREChars RSet.empty
133 | 
134 | -- | Everything.
135 | --
136 | -- >>> putPretty (everything :: ERE Char)
137 | -- ^~[]$
138 | --
139 | -- >>> putPretty (top :: ERE Char)
140 | -- ^~[]$
141 | --
142 | -- prop> match (everything :: ERE Char) (s :: String) === True
143 | --
144 | everything :: ERE c
145 | everything = complement empty
146 | 
147 | -- | Empty string. /Note:/ different than 'empty'.
148 | --
149 | -- >>> putPretty eps
150 | -- ^$
151 | --
152 | -- >>> putPretty (mempty :: ERE Char)
153 | -- ^$
154 | --
155 | -- prop> match (eps :: ERE Char) s === null (s :: String)
156 | --
157 | eps :: ERE c
158 | eps = EREAppend []
159 | 
160 | -- |
161 | --
162 | -- >>> putPretty (char 'x')
163 | -- ^x$
164 | --
165 | char :: c -> ERE c
166 | char = EREChars . RSet.singleton
167 | 
168 | -- |
169 | --
170 | -- >>> putPretty $ charRange 'a' 'z'
171 | -- ^[a-z]$
172 | --
173 | charRange :: Ord c => c -> c -> ERE c
174 | charRange c c' = EREChars $ RSet.singletonRange (c, c')
175 | 
176 | -- | Any character. /Note:/ different than dot!
177 | --
178 | -- >>> putPretty anyChar
179 | -- ^[^]$
180 | --
181 | anyChar :: Bounded c => ERE c
182 | anyChar = EREChars RSet.full
183 | 
184 | -- | Concatenate regular expressions.
185 | --
186 | -- prop> asEREChar r <> empty === empty
187 | -- prop> empty <> asEREChar r === empty
188 | -- prop> (asEREChar r <> s) <> t === r <> (s <> t)
189 | --
190 | -- prop> asEREChar r <> eps === r
191 | -- prop> eps <> asEREChar r === r
192 | --
193 | appends :: Eq c => [ERE c] -> ERE c
194 | appends rs0
195 |     | elem empty rs1 = empty
196 |     | otherwise = case rs1 of
197 |         [r] -> r
198 |         rs  -> EREAppend rs
199 |   where
200 |     -- flatten one level of EREAppend
201 |     rs1 = concatMap f rs0
202 | 
203 |     f (EREAppend rs) = rs
204 |     f r              = [r]
205 | 
206 | -- | Union of regular expressions.
207 | --
208 | -- prop> asEREChar r \/ r === r
209 | -- prop> asEREChar r \/ s === s \/ r
210 | -- prop> (asEREChar r \/ s) \/ t === r \/ (s \/ t)
211 | --
212 | -- prop> empty \/ asEREChar r === r
213 | -- prop> asEREChar r \/ empty === r
214 | --
215 | -- prop> everything \/ asEREChar r === everything
216 | -- prop> asEREChar r \/ everything === everything
217 | --
218 | unions :: (Ord c, Enum c) => [ERE c] -> ERE c
219 | unions = uncurry mk . foldMap f where
220 |     mk cs rss
221 |         | Set.null rss = EREChars cs
222 |         | Set.member everything rss = everything
223 |         | RSet.null cs = case Set.toList rss of
224 |             []  -> empty
225 |             [r] -> r
226 |             _   -> EREUnion cs rss
227 |         | otherwise    = EREUnion cs rss
228 | 
229 |     f (EREUnion cs rs) = (cs, rs)
230 |     f (EREChars cs)    = (cs, Set.empty)
231 |     f r                = (mempty, Set.singleton r)
232 | 
233 | -- | Intersection of regular expressions.
234 | --
235 | -- prop> asEREChar r /\ r === r
236 | -- prop> asEREChar r /\ s === s /\ r
237 | -- prop> (asEREChar r /\ s) /\ t === r /\ (s /\ t)
238 | --
239 | -- prop> empty /\ asEREChar r === empty
240 | -- prop> asEREChar r /\ empty === empty
241 | --
242 | -- prop> everything /\ asEREChar r === r
243 | -- prop> asEREChar r /\ everything === r
244 | --
245 | intersections :: (Ord c, Enum c) => [ERE c] -> ERE c
246 | intersections = complement . unions . map complement
247 | 
248 | -- | Complement.
249 | --
250 | -- prop> complement (complement r) === asEREChar r
251 | --
252 | complement :: ERE c -> ERE c
253 | complement r = case r of
254 |     ERENot r' -> r'
255 |     _ -> ERENot r
256 | 
257 | -- | Kleene star.
258 | --
259 | -- prop> star (star r) === star (asEREChar r)
260 | --
261 | -- prop> star eps     === asEREChar eps
262 | -- prop> star empty   === asEREChar eps
263 | -- prop> star anyChar === asEREChar everything
264 | --
265 | -- prop> star (asEREChar r \/ eps) === star r
266 | -- prop> star (char c \/ eps) === star (char (c :: Char))
267 | -- prop> star (empty \/ eps) === eps
268 | --
269 | star :: (Ord c, Bounded c) => ERE c -> ERE c
270 | star r = case r of
271 |     EREStar _                          -> r
272 |     EREAppend []                       -> eps
273 |     EREChars cs | RSet.null cs         -> eps
274 |     EREChars cs | RSet.isFull cs       -> everything
275 |     EREUnion cs rs | Set.member eps rs -> case Set.toList rs' of
276 |         []                  -> star (EREChars cs)
277 |         [r'] | RSet.null cs -> star r'
278 |         _                   -> EREStar (EREUnion cs rs')
279 |       where
280 |         rs' = Set.delete eps rs
281 |     _                                  -> EREStar r
282 | 
283 | -- | Literal string.
284 | --
285 | -- >>> putPretty ("foobar" :: ERE Char)
286 | -- ^foobar$
287 | --
288 | -- >>> putPretty ("(.)" :: ERE Char)
289 | -- ^\(\.\)$
290 | --
291 | string :: [c] -> ERE c
292 | string []  = eps
293 | string [c] = EREChars (RSet.singleton c)
294 | string cs  = EREAppend $ map (EREChars . RSet.singleton) cs
295 | 
296 | instance (Ord c, Enum c, Bounded c) => C.Kleene (ERE c) where
297 |     empty      = empty
298 |     eps        = eps
299 |     appends    = appends
300 |     unions     = unions
301 |     star       = star
302 | 
303 | instance (Ord c, Enum c, Bounded c) => C.CharKleene c (ERE c) where
304 |     char       = char
305 | 
306 | instance (Ord c, Enum c, Bounded c) => C.FiniteKleene c (ERE c) where
307 |     everything = everything
308 |     charRange  = charRange
309 |     fromRSet   = EREChars
310 |     anyChar    = anyChar
311 | 
312 | instance C.Complement c (ERE c) where
313 |     complement = complement
314 | 
315 | -------------------------------------------------------------------------------
316 | -- derivative
317 | -------------------------------------------------------------------------------
318 | 
319 | -- | We say that a regular expression r is nullable if the language it defines
320 | -- contains the empty string.
321 | --
322 | -- >>> nullable eps
323 | -- True
324 | --
325 | -- >>> nullable (star "x")
326 | -- True
327 | --
328 | -- >>> nullable "foo"
329 | -- False
330 | --
331 | -- >>> nullable (complement eps)
332 | -- False
333 | --
334 | nullable :: ERE c -> Bool
335 | nullable (EREChars _)      = False
336 | nullable (EREAppend rs)    = all nullable rs
337 | nullable (EREUnion _cs rs) = any nullable rs
338 | nullable (EREStar _)       = True
339 | nullable (ERENot r)        = not (nullable r)
340 | 
341 | -- | Intuitively, the derivative of a language \(\mathcal{L} \subset \Sigma^\star\)
342 | -- with respect to a symbol \(a \in \Sigma\) is the language that includes only
343 | -- those suffixes of strings with a leading symbol \(a\) in \(\mathcal{L}\).
344 | --
345 | -- >>> putPretty $ derivate 'f' "foobar"
346 | -- ^oobar$
347 | --
348 | -- >>> putPretty $ derivate 'x' $ "xyz" \/ "abc"
349 | -- ^yz$
350 | --
351 | -- >>> putPretty $ derivate 'x' $ star "xyz"
352 | -- ^yz(xyz)*$
353 | --
354 | derivate :: (Ord c, Enum c) => c -> ERE c -> ERE c
355 | derivate c (EREChars cs)     = derivateChars c cs
356 | derivate c (EREUnion cs rs)  = unions $ derivateChars c cs : [ derivate c r | r <- toList rs]
357 | derivate c (EREAppend rs)    = derivateAppend c rs
358 | derivate c rs@(EREStar r)    = derivate c r <> rs
359 | derivate c (ERENot r)        = complement (derivate c r)
360 | 
361 | instance (Ord c, Enum c) => C.Derivate c (ERE c) where
362 |     nullable = nullable
363 |     derivate = derivate
364 | 
365 | instance (Ord c, Enum c) => C.Match c (ERE c) where
366 |     match r = nullable . foldl' (flip derivate) r
367 | 
368 | derivateAppend :: (Enum c, Ord c) => c -> [ERE c] -> ERE c
369 | derivateAppend _ []      = empty
370 | derivateAppend c [r]     = derivate c r
371 | derivateAppend c (r:rs)
372 |     | nullable r         = unions [r' <> appends rs, rs']
373 |     | otherwise          = r' <> appends rs
374 |   where
375 |     r'  = derivate c r
376 |     rs' = derivateAppend c rs
377 | 
378 | derivateChars :: Ord c =>  c -> RSet c -> ERE c
379 | derivateChars c cs
380 |     | c `RSet.member` cs      = eps
381 |     | otherwise               = empty
382 | 
383 | -------------------------------------------------------------------------------
384 | -- isEmpty
385 | -------------------------------------------------------------------------------
386 | 
387 | -- | Whether 'ERE' is (structurally) equal to 'empty'.
388 | isEmpty :: ERE c -> Bool
389 | isEmpty (EREChars rs) = RSet.null rs
390 | isEmpty _            = False
391 | 
392 | -- | Whether 'ERE' is (structurally) equal to 'everything'.
393 | isEverything :: ERE c -> Bool
394 | isEverything (ERENot (EREChars rs)) = RSet.null rs
395 | isEverything _                      = False
396 | 
397 | -------------------------------------------------------------------------------
398 | -- States
399 | -------------------------------------------------------------------------------
400 | 
401 | -- | Transition map. Used to construct 'Kleene.DFA.DFA'.
402 | --
403 | -- >>> void $ Map.traverseWithKey (\k v -> putStrLn $ pretty k ++ " : " ++ SF.showSF (fmap pretty v)) $ transitionMap ("ab" :: ERE Char)
404 | -- ^[]$ : \_ -> "^[]$"
405 | -- ^b$ : \x -> if
406 | --     | x <= 'a'  -> "^[]$"
407 | --     | x <= 'b'  -> "^$"
408 | --     | otherwise -> "^[]$"
409 | -- ^$ : \_ -> "^[]$"
410 | -- ^ab$ : \x -> if
411 | --     | x <= '`'  -> "^[]$"
412 | --     | x <= 'a'  -> "^b$"
413 | --     | otherwise -> "^[]$"
414 | --
415 | transitionMap
416 |     :: forall c. (Ord c, Enum c, Bounded c)
417 |     => ERE c
418 |     -> Map (ERE c) (SF.SF c (ERE c))
419 | transitionMap re = go Map.empty [re] where
420 |     go :: Map (ERE c) (SF.SF c (ERE c))
421 |        -> [ERE c]
422 |        -> Map (ERE c) (SF.SF c (ERE c))
423 |     go !acc [] = acc
424 |     go acc (r : rs)
425 |         | r `Map.member` acc = go acc rs
426 |         | otherwise = go (Map.insert r pm acc) (SF.values pm ++ rs)
427 |       where
428 |         pm = P.toSF (\c -> derivate c r) (leadingChars r)
429 | 
430 | instance (Ord c, Enum c, Bounded c) => C.TransitionMap c (ERE c) where
431 |     transitionMap = transitionMap
432 | 
433 | -- | Leading character sets of regular expression.
434 | --
435 | -- >>> leadingChars "foo"
436 | -- fromSeparators "ef"
437 | --
438 | -- >>> leadingChars (star "b" <> star "e")
439 | -- fromSeparators "abde"
440 | --
441 | -- >>> leadingChars (charRange 'b' 'z')
442 | -- fromSeparators "az"
443 | --
444 | leadingChars :: (Ord c, Enum c, Bounded c) => ERE c -> P.Partition c
445 | leadingChars (EREChars cs)    = P.fromRSet cs
446 | leadingChars (EREUnion cs rs) = P.fromRSet cs <> foldMap leadingChars rs
447 | leadingChars (EREStar r)      = leadingChars r
448 | leadingChars (EREAppend rs)   = leadingCharsAppend rs
449 | leadingChars (ERENot r)       = leadingChars r
450 | 
451 | leadingCharsAppend :: (Ord c, Enum c, Bounded c) => [ERE c] -> P.Partition c
452 | leadingCharsAppend [] = P.whole
453 | leadingCharsAppend (r : rs)
454 |     | nullable r = leadingChars r <> leadingCharsAppend rs
455 |     | otherwise  = leadingChars r
456 | 
457 | -------------------------------------------------------------------------------
458 | -- Equivalence
459 | -------------------------------------------------------------------------------
460 | 
461 | -- | Whether two regexps are equivalent.
462 | --
463 | -- @
464 | -- 'equivalent' re1 re2 <=> forall s. 'match' re1 s == 'match' re1 s
465 | -- @
466 | --
467 | -- >>> let re1 = star "a" <> "a"
468 | -- >>> let re2 = "a" <> star "a"
469 | --
470 | -- These are different regular expressions, even we perform
471 | -- some normalisation-on-construction:
472 | --
473 | -- >>> re1 == re2
474 | -- False
475 | --
476 | -- They are however equivalent:
477 | --
478 | -- >>> equivalent re1 re2
479 | -- True
480 | --
481 | -- The algorithm works by executing 'states' on "product" regexp,
482 | -- and checking whether all resulting states are both accepting or rejecting.
483 | --
484 | -- @
485 | -- re1 == re2 ==> 'equivalent' re1 re2
486 | -- @
487 | --
488 | -- === More examples
489 | --
490 | -- >>> let example re1 re2 = putPretty re1 >> putPretty re2 >> return (equivalent re1 re2)
491 | -- >>> example re1 re2
492 | -- ^a*a$
493 | -- ^aa*$
494 | -- True
495 | --
496 | -- >>> example (star "aa") (star "aaa")
497 | -- ^(aa)*$
498 | -- ^(aaa)*$
499 | -- False
500 | --
501 | -- >>> example (star "aa" <> star "aaa") (star "aaa" <> star "aa")
502 | -- ^(aa)*(aaa)*$
503 | -- ^(aaa)*(aa)*$
504 | -- True
505 | --
506 | -- >>> example (star ("a" \/ "b")) (star $ star "a" <> star "b")
507 | -- ^[a-b]*$
508 | -- ^(a*b*)*$
509 | -- True
510 | --
511 | equivalent :: forall c. (Ord c, Enum c, Bounded c) => ERE c -> ERE c -> Bool
512 | equivalent x0 y0 = go mempty [(x0, y0)] where
513 |     go :: Set (ERE c, ERE c) -> [(ERE c, ERE c)] -> Bool
514 |     go !_ [] = True
515 |     go acc (p@(x, y) : zs)
516 |         | p `Set.member` acc = go acc zs
517 |         -- if two regexps are structurally the same, we don't need to recurse.
518 |         | x == y             = go (Set.insert p acc) zs
519 |         | all agree ps       = go (Set.insert p acc) (ps ++ zs)
520 |         | otherwise = False
521 |       where
522 |         cs = toList $ P.examples $ leadingChars x `P.wedge` leadingChars y
523 |         ps = map (\c -> (derivate c x, derivate c y)) cs
524 | 
525 |     agree :: (ERE c, ERE c) -> Bool
526 |     agree (x, y) = nullable x == nullable y
527 | 
528 | instance (Ord c, Enum c, Bounded c) => C.Equivalent c (ERE c) where
529 |     equivalent = equivalent
530 | 
531 | -------------------------------------------------------------------------------
532 | -- Instances
533 | -------------------------------------------------------------------------------
534 | 
535 | instance Eq c => Semigroup (ERE c) where
536 |     r <> r' = appends [r, r']
537 | 
538 | instance Eq c => Monoid (ERE c) where
539 |     mempty  = eps
540 |     mappend = (<>)
541 |     mconcat = appends
542 | 
543 | instance (Ord c, Enum c) => Lattice (ERE c) where
544 |     r \/ r' = unions [r, r']
545 |     r /\ r' = intersections [r, r']
546 | 
547 | instance (Ord c, Enum c) => BoundedJoinSemiLattice (ERE c) where
548 |     bottom = empty
549 | 
550 | instance (Ord c, Enum c) => BoundedMeetSemiLattice (ERE c) where
551 |     top = everything
552 | 
553 | instance c ~ Char => IsString (ERE c) where
554 |     fromString = string
555 | 
556 | instance (Ord c, Enum c, Bounded c, QC.Arbitrary c) => QC.Arbitrary (ERE c) where
557 |     arbitrary = QC.sized arb where
558 |         c :: QC.Gen (ERE c)
559 |         c = EREChars . RSet.fromRangeList <$> QC.arbitrary
560 | 
561 |         arb :: Int -> QC.Gen (ERE c)
562 |         arb n | n <= 0    = QC.oneof [c, fmap char QC.arbitrary, pure eps]
563 |               | otherwise = QC.oneof
564 |             [ c
565 |             , pure eps
566 |             , fmap char QC.arbitrary
567 |             , liftA2 (<>) (arb n2) (arb n2)
568 |             , liftA2 (\/) (arb n2) (arb n2)
569 |             , fmap star (arb n2)
570 |             , fmap complement (arb n2)
571 |             ]
572 |           where
573 |             n2 = n `div` 2
574 | 
575 | instance (QC.CoArbitrary c) => QC.CoArbitrary (ERE c) where
576 |     coarbitrary (EREChars cs)    = QC.variant (0 :: Int) . QC.coarbitrary (RSet.toRangeList cs)
577 |     coarbitrary (EREAppend rs)   = QC.variant (1 :: Int) . QC.coarbitrary rs
578 |     coarbitrary (EREUnion cs rs) = QC.variant (2 :: Int) . QC.coarbitrary (RSet.toRangeList cs, Set.toList rs)
579 |     coarbitrary (EREStar r)      = QC.variant (3 :: Int) . QC.coarbitrary r
580 |     coarbitrary (ERENot r)       = QC.variant (4 :: Int) . QC.coarbitrary r
581 | 
582 | -------------------------------------------------------------------------------
583 | -- JavaScript
584 | -------------------------------------------------------------------------------
585 | 
586 | instance c ~ Char => Pretty (ERE c) where
587 |     prettyS x = showChar '^' . go False x . showChar '$'
588 |       where
589 |         go :: Bool -> ERE Char -> ShowS
590 |         go p (EREStar a)
591 |             = parens p
592 |             $ go True a . showChar '*'
593 |         go p (EREAppend rs)
594 |             = parens p $ goMany id rs
595 |         go p (EREUnion cs rs)
596 |             | RSet.null cs = goUnion p rs
597 |             | Set.null rs  = prettyS cs
598 |             | otherwise    = goUnion p (Set.insert (EREChars cs) rs)
599 |         go _ (EREChars cs)
600 |             = prettyS cs
601 |         go p (ERENot r)
602 |             = parens p $ showChar '~' . go True r
603 | 
604 |         goUnion p rs
605 |             | Set.member eps rs = parens p $ goUnion' True . showChar '?'
606 |             | otherwise         = goUnion' p
607 |           where
608 |             goUnion' p' = case Set.toList (Set.delete eps rs) of
609 |                 [] -> go True empty
610 |                 [r] -> go p' r
611 |                 (r:rs') -> parens True $ goSome1 (showChar '|') r rs'
612 | 
613 |         goMany :: ShowS -> [ERE Char] -> ShowS
614 |         goMany sep = foldr (\a b -> go False a . sep . b) id
615 | 
616 |         goSome1 :: ShowS -> ERE Char -> [ERE Char] -> ShowS
617 |         goSome1 sep r = foldl (\a b -> a . sep . go False b) (go False r)
618 | 
619 |         parens :: Bool -> ShowS -> ShowS
620 |         parens True  s = showString "(" . s . showChar ')'
621 |         parens False s = s
622 | 
623 | -------------------------------------------------------------------------------
624 | -- Latin1
625 | -------------------------------------------------------------------------------
626 | 
627 | instance C.ToLatin1 ERE where
628 |     toLatin1 (EREChars rs)    = C.fromRSet (C.toLatin1 rs)
629 |     toLatin1 (EREAppend xs)   = appends (map C.toLatin1 xs)
630 |     toLatin1 (EREUnion rs xs) = C.fromRSet (C.toLatin1 rs) \/ unions (map C.toLatin1 (Set.toList  xs))
631 |     toLatin1 (EREStar r)      = star (C.toLatin1 r)
632 |     toLatin1 (ERENot r)       = complement (C.toLatin1 r)
633 | 


--------------------------------------------------------------------------------
/src/Kleene/Equiv.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE FlexibleInstances          #-}
 2 | {-# LANGUAGE FunctionalDependencies     #-}
 3 | {-# LANGUAGE GADTs                      #-}
 4 | {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 5 | {-# LANGUAGE StandaloneDeriving         #-}
 6 | {-# LANGUAGE Trustworthy                #-}
 7 | {-# LANGUAGE UndecidableInstances       #-}
 8 | module Kleene.Equiv where
 9 | 
10 | import Prelude ()
11 | import Prelude.Compat
12 | 
13 | import Algebra.Lattice
14 |        (BoundedJoinSemiLattice (..), BoundedMeetSemiLattice (..), Lattice (..),
15 |        joinLeq)
16 | import Algebra.PartialOrd (PartialOrd (..))
17 | import Data.Semigroup     (Semigroup (..))
18 | 
19 | import Kleene.Classes
20 | import Kleene.Internal.Pretty
21 | 
22 | -- $setup
23 | -- >>> import Kleene.RE (RE)
24 | -- >>> import Kleene.Classes
25 | -- >>> import Algebra.PartialOrd (leq)
26 | -- >>> import Data.Semigroup (Semigroup (..))
27 | 
28 | -- | Regular-expressions for which '==' is 'equivalent'.
29 | --
30 | -- >>> let re1 = star "a" <> "a" :: RE Char
31 | -- >>> let re2 = "a" <> star "a" :: RE Char
32 | --
33 | -- >>> re1 == re2
34 | -- False
35 | --
36 | -- >>> Equiv re1 == Equiv re2
37 | -- True
38 | --
39 | -- 'Equiv' is also a 'PartialOrd' (but not 'Ord'!)
40 | --
41 | -- >>> Equiv "a" `leq` Equiv (star "a" :: RE Char)
42 | -- True
43 | --
44 | -- Not all regular expessions are 'comparable':
45 | --
46 | -- >>> let reA = Equiv "a" :: Equiv RE Char
47 | -- >>> let reB = Equiv "b" :: Equiv RE Char
48 | -- >>> (leq reA reB, leq reB reA)
49 | -- (False,False)
50 | --
51 | newtype Equiv r c = Equiv (r c)
52 |   deriving (Show, Semigroup, Monoid, BoundedJoinSemiLattice, BoundedMeetSemiLattice, Lattice, Pretty)
53 | 
54 | instance Equivalent c (r c) => Eq (Equiv r c) where
55 |     (==) = equivalent
56 | 
57 | -- | \(a \preceq b := a \lor b = b \)
58 | instance (Lattice (r c), Equivalent c (r c)) => PartialOrd (Equiv r c) where
59 |     leq = joinLeq
60 | 
61 | deriving instance Kleene       (r c) => Kleene       (Equiv r c)
62 | deriving instance CharKleene c (r c) => CharKleene c (Equiv r c)
63 | deriving instance Derivate   c (r c) => Derivate   c (Equiv r c)
64 | deriving instance Match      c (r c) => Match      c (Equiv r c)
65 | deriving instance Equivalent c (r c) => Equivalent c (Equiv r c)
66 | deriving instance Complement c (r c) => Complement c (Equiv r c)
67 | 


--------------------------------------------------------------------------------
/src/Kleene/Functor.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE Safe #-}
 2 | module Kleene.Functor (
 3 |     K,
 4 |     Greediness (..),
 5 |     -- * Constructors
 6 |     few,
 7 |     anyChar,
 8 |     oneof,
 9 |     char,
10 |     charRange,
11 |     dot,
12 |     everything,
13 |     everything1,
14 |     -- * Queries
15 |     isEmpty,
16 |     isEverything,
17 |     -- * Matching
18 |     match,
19 |     -- * Conversions
20 |     toRE,
21 |     toKleene,
22 |     fromRE,
23 |     toRA,
24 |     ) where
25 | 
26 | import Kleene.Internal.Functor
27 | 


--------------------------------------------------------------------------------
/src/Kleene/Functor/NonEmpty.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP   #-}
  2 | {-# LANGUAGE GADTs #-}
  3 | {-# LANGUAGE Safe  #-}
  4 | module Kleene.Functor.NonEmpty (
  5 |     K1,
  6 |     Greediness (..),
  7 |     -- * Constructors
  8 |     some1,
  9 |     few1,
 10 |     anyChar,
 11 |     oneof,
 12 |     char,
 13 |     charRange,
 14 |     dot,
 15 |     everything1,
 16 |     string,
 17 |     -- * Queries
 18 |     isEmpty,
 19 |     isEverything,
 20 |     -- * Matching
 21 |     match,
 22 |     -- * Conversions
 23 |     toRE,
 24 |     toKleene,
 25 |     toRA,
 26 |     nullableProof,
 27 |     ) where
 28 | 
 29 | import Prelude ()
 30 | import Prelude.Compat
 31 | 
 32 | import Control.Applicative (Alternative (..), liftA2)
 33 | import Data.Foldable       (toList)
 34 | import Data.Functor.Alt    ((<!>))
 35 | import Data.Functor.Apply  (Apply (..))
 36 | import Data.List.NonEmpty  (NonEmpty (..))
 37 | import Data.RangeSet.Map   (RSet)
 38 | 
 39 | import qualified Data.Functor.Alt       as Alt
 40 | import qualified Data.List.NonEmpty     as NE
 41 | import qualified Data.RangeSet.Map      as RSet
 42 | import qualified Text.Regex.Applicative as R
 43 | 
 44 | import qualified Kleene.Classes          as C
 45 | import           Kleene.Internal.Functor (Greediness (..), K (..))
 46 | import           Kleene.Internal.Pretty
 47 | import           Kleene.Internal.Sets
 48 | import qualified Kleene.RE               as RE
 49 | 
 50 | -- $setup
 51 | --
 52 | -- >>> import Control.Applicative (optional, Alternative (..))
 53 | -- >>> import Data.Functor.Apply (Apply (..))
 54 | -- >>> import Data.List.NonEmpty (NonEmpty (..))
 55 | -- >>> import Kleene.Functor (Greediness (..), K (..))
 56 | -- >>> import Data.Foldable (toList)
 57 | -- >>> import Kleene.Internal.Pretty (putPretty)
 58 | -- >>> import qualified Kleene.RE as RE
 59 | -- >>> import qualified Kleene.Classes as C
 60 | -- >>> import qualified Text.Regex.Applicative as R
 61 | 
 62 | -- | 'Applicative' 'Functor' regular expression.
 63 | data K1 c a where
 64 |     K1Empty  :: K1 c a
 65 |     K1Char   :: (Ord c, Enum c) => RSet c -> K1 c c
 66 |     K1Append :: (a -> b -> r) -> K1 c a -> K1 c b -> K1 c r
 67 |     K1Union  :: K1 c a -> K1 c a -> K1 c a
 68 |     KPlus    :: Greediness -> K1 c a -> K1 c (NonEmpty a)
 69 | 
 70 |     -- optimisations
 71 |     K1Map    :: (a -> b) -> K1 c a -> K1 c b -- could use Pure and Append
 72 |     K1String :: Eq c => NonEmpty c -> K1 c (NonEmpty c)     -- could use Char and Append
 73 | 
 74 | instance Functor (K1 c) where
 75 |     fmap _ K1Empty          = K1Empty
 76 |     fmap f (K1Map g k)      = K1Map (f . g) k
 77 |     fmap f (K1Append g a b) = K1Append (\x y -> f (g x y)) a b
 78 |     fmap f k                = K1Map f k
 79 | 
 80 | instance Apply (K1 c) where
 81 |     K1Empty <.> _ = K1Empty
 82 |     _ <.> K1Empty = K1Empty
 83 | 
 84 |     f <.> x = K1Append ($) f x
 85 | 
 86 |     liftF2 = K1Append
 87 | 
 88 | instance Alt.Alt (K1 c) where
 89 |     K1Empty <!> k = k
 90 |     k <!> K1Empty = k
 91 |     K1Char a <!> K1Char b = K1Char (RSet.union a b)
 92 | 
 93 |     a <!> b = K1Union a b
 94 | 
 95 | --
 96 | some1 :: K1 c a -> K1 c (NonEmpty a)
 97 | some1 K1Empty     = K1Empty
 98 | some1 (KPlus _ k) = K1Map pure (KPlus Greedy k)
 99 | some1 k           = KPlus Greedy k
100 | 
101 | -- | 'few1', not 'some1'.
102 | --
103 | -- Let's define two similar regexps
104 | --
105 | -- >>> let re1 = liftF2 (,) (few1 $ char 'a')  (some1 $ char 'a')
106 | -- >>> let re2 = liftF2 (,) (some1 $ char 'a') (few1  $ char 'a')
107 | --
108 | -- Their 'RE' behaviour is the same:
109 | --
110 | -- >>> C.equivalent (toRE re1) (toRE re2)
111 | -- True
112 | --
113 | -- >>> map (C.match $ toRE re1) ["aaa","bbb"]
114 | -- [True,False]
115 | --
116 | -- However, the 'RA' behaviour is different!
117 | --
118 | -- >>> R.match (toRA re1) "aaaaa"
119 | -- Just ('a' :| "",'a' :| "aaa")
120 | --
121 | -- >>> R.match (toRA re2) "aaaaa"
122 | -- Just ('a' :| "aaa",'a' :| "")
123 | --
124 | few1 :: K1 c a -> K1 c (NonEmpty a)
125 | few1 K1Empty     = K1Empty
126 | few1 (KPlus _ k) = K1Map pure (KPlus NonGreedy k)
127 | few1 k           = KPlus NonGreedy k
128 | 
129 | -------------------------------------------------------------------------------
130 | --
131 | -------------------------------------------------------------------------------
132 | 
133 | -- | >>> putPretty anyChar
134 | -- ^[^]$
135 | anyChar :: (Ord c, Enum c, Bounded c) => K1 c c
136 | anyChar = K1Char RSet.full
137 | 
138 | -- | >>> putPretty $ oneof ("foobar" :: [Char])
139 | -- ^[a-bfor]$
140 | oneof :: (Ord c, Enum c, Foldable f) => f c -> K1 c c
141 | oneof = K1Char . RSet.fromList . toList
142 | 
143 | -- | >>> putPretty $ char 'x'
144 | -- ^x$
145 | char :: (Ord c, Enum c) => c -> K1 c c
146 | char = K1Char . RSet.singleton
147 | 
148 | -- | >>> putPretty $ charRange 'a' 'z'
149 | -- ^[a-z]$
150 | charRange :: (Enum c, Ord c) => c -> c -> K1 c c
151 | charRange a b = K1Char (RSet.singletonRange (a, b))
152 | 
153 | -- | >>> putPretty dot
154 | -- ^.$
155 | dot :: K1 Char Char
156 | dot = K1Char dotRSet
157 | 
158 | -- | >>> putPretty everything1
159 | -- ^[^][^]*$
160 | everything1 :: (Ord c, Enum c, Bounded c) => K1 c (NonEmpty c)
161 | everything1 = some1 anyChar
162 | 
163 | -- | Matches nothing?
164 | isEmpty :: (Ord c, Enum c, Bounded c) => K1 c a -> Bool
165 | isEmpty k = C.equivalent (toRE k) C.empty
166 | 
167 | -- | Matches whole input?
168 | isEverything :: (Ord c, Enum c, Bounded c) => K1 c a -> Bool
169 | isEverything k = C.equivalent (toRE k) C.everything
170 | 
171 | string :: String -> K1 Char (NonEmpty Char)
172 | string []       = error "panic! K1.string []"
173 | string (x : xs) = K1String (x :| xs)
174 | 
175 | -------------------------------------------------------------------------------
176 | -- Matching
177 | -------------------------------------------------------------------------------
178 | 
179 | -- | Match using @regex-applicative@
180 | match :: K1 c a -> [c] -> Maybe a
181 | match = R.match . toRA
182 | 
183 | -------------------------------------------------------------------------------
184 | -- RE
185 | -------------------------------------------------------------------------------
186 | 
187 | -- | Convert to 'RE'.
188 | --
189 | -- >>> putPretty (toRE $ some1 (string "foo") :: RE.RE Char)
190 | -- ^foo(foo)*$
191 | --
192 | toRE :: (Ord c, Enum c, Bounded c) => K1 c a -> RE.RE c
193 | toRE = toKleene
194 | 
195 | -- | Convert to any 'Kleene'
196 | toKleene :: C.FiniteKleene c k => K1 c a -> k
197 | toKleene (K1Map _ a)      = toKleene a
198 | toKleene (K1Union a b)    = C.unions [toKleene a, toKleene b]
199 | toKleene (K1Append _ a b) = C.appends [toKleene a, toKleene b]
200 | toKleene (KPlus _ a)      = let k = toKleene a in C.appends [k, C.star k]
201 | toKleene (K1String s)     = C.appends (map C.char $ NE.toList s)
202 | toKleene K1Empty          = C.empty
203 | toKleene (K1Char cs)      = C.fromRSet cs
204 | 
205 | -------------------------------------------------------------------------------
206 | -- regex-applicative
207 | -------------------------------------------------------------------------------
208 | 
209 | -- | Convert 'K' to 'R.RE' from @regex-applicative@.
210 | --
211 | -- >>> R.match (toRA (string "xx" .> everything1 <. string "zz" :: K1 Char (NonEmpty Char))) "xxyyzyyzz"
212 | -- Just ('y' :| "yzyy")
213 | --
214 | -- See also 'match'.
215 | --
216 | toRA :: K1 c a -> R.RE c a
217 | toRA K1Empty              = empty
218 | toRA (K1Char cs)          = R.psym (\c -> RSet.member c cs)
219 | toRA (K1Append f a b)     = liftA2 f (toRA a) (toRA b)
220 | toRA (K1Union a b)        = toRA a <|> toRA b
221 | toRA (KPlus Greedy a)     = (:|) <$> toRA a <*> many (toRA a)
222 | toRA (KPlus NonGreedy a)  = (:|) <$> toRA a <*> R.few (toRA a)
223 | toRA (K1Map f a)          = fmap f (toRA a)
224 | toRA (K1String (x :| xs)) = (:|) <$> R.sym x <*> R.string xs
225 | 
226 | -------------------------------------------------------------------------------
227 | -- nullableProof
228 | -------------------------------------------------------------------------------
229 | 
230 | -- |
231 | -- >>> putPretty $ nullableProof (pure True)
232 | -- Right 1 , ^[]$
233 | --
234 | -- >>> putPretty $ nullableProof (many "xyz" :: K Char [String])
235 | -- Right [] , ^xyz(xyz)*$
236 | --
237 | -- >>> putPretty $ nullableProof (many $ toList <$> optional "x" <|> many "yz" :: K Char [[String]])
238 | -- Right [] , ^(x|yz(yz)*)(x|yz(yz)*)*$
239 | --
240 | nullableProof :: K c a -> Either (K1 c a) (a, K1 c a)
241 | nullableProof KEmpty    = Left K1Empty
242 | nullableProof (KPure x) = Right (x, K1Empty)
243 | nullableProof (KChar c) = Left (K1Char c)
244 | 
245 | nullableProof (KAppend f a b) = case (nullableProof a, nullableProof b) of
246 |     (Left x, Left y)               -> Left (K1Append f x y)
247 |     (Left x, Right (y', y))        -> Left ((`f` y') <$> x <!> K1Append f x y)
248 |     (Right (x', x), Left y)        -> Left (K1Append f x y <!> f x' <$> y)
249 |     (Right (x', x), Right (y', y)) -> Right
250 |         (f x' y'
251 |         , K1Append f x y
252 |         <!> flip f y' <$> x
253 |         <!> f x' <$> y
254 |         )
255 | 
256 | nullableProof (KUnion a b) = case (nullableProof a, nullableProof b) of
257 |     (Left x', Left _)              -> Left x'
258 |     (Right (x, x'), Left y')       -> Right (x, x' <!> y')
259 |     (Left x', Right (y, y'))       -> Right (y, x' <!> y')
260 |     (Right (x, x'), Right (_, y')) -> Right (x, x' <!> y')
261 | 
262 | nullableProof (KStar g a) = case nullableProof a of
263 |     Left x       -> Right ([], NE.toList <$> star1 x)
264 |     Right (_, x) -> Right ([], NE.toList <$> star1 x) -- note, we don't left recurse
265 |   where
266 |     star1 = case g of
267 |         Greedy    -> some1
268 |         NonGreedy -> few1
269 | 
270 | nullableProof (KMap f a) = case nullableProof a of
271 |     Right (x, x') -> Right (f x, fmap f x')
272 |     Left x'       -> Left (fmap f x')
273 | 
274 | nullableProof (KString [])       = Right ([], K1Empty)
275 | nullableProof (KString (c : cs)) = Left (NE.toList <$> K1String (c :| cs))
276 | 
277 | -------------------------------------------------------------------------------
278 | -- JavaScript
279 | -------------------------------------------------------------------------------
280 | 
281 | -- | Convert to non-matching JavaScript string which can be used
282 | -- as an argument to @new RegExp@
283 | --
284 | -- >>> putPretty ("foobar" :: K Char String)
285 | -- ^foobar$
286 | --
287 | -- >>> putPretty $ many ("foobar" :: K Char String)
288 | -- ^(foobar)*$
289 | --
290 | instance c ~ Char => Pretty (K1 c a) where
291 |     pretty = pretty . toRE
292 | 


--------------------------------------------------------------------------------
/src/Kleene/Internal/Functor.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP   #-}
  2 | {-# LANGUAGE GADTs #-}
  3 | {-# LANGUAGE Safe  #-}
  4 | {-# OPTIONS_HADDOCK not-home #-}
  5 | module Kleene.Internal.Functor (
  6 |     K (..),
  7 |     Greediness (..),
  8 |     -- * Constructors
  9 |     few,
 10 |     anyChar,
 11 |     oneof,
 12 |     char,
 13 |     charRange,
 14 |     dot,
 15 |     everything,
 16 |     everything1,
 17 |     -- * Queries
 18 |     isEmpty,
 19 |     isEverything,
 20 |     -- * Matching
 21 |     match,
 22 |     -- * Conversions
 23 |     toRE,
 24 |     toKleene,
 25 |     fromRE,
 26 |     toRA,
 27 |     ) where
 28 | 
 29 | import Prelude ()
 30 | import Prelude.Compat
 31 | 
 32 | import Control.Applicative (Alternative (..), liftA2)
 33 | import Data.Foldable       (toList)
 34 | import Data.Functor.Apply  (Apply (..))
 35 | import Data.RangeSet.Map   (RSet)
 36 | import Data.String         (IsString (..))
 37 | 
 38 | import qualified Data.Functor.Alt       as Alt
 39 | import qualified Data.RangeSet.Map      as RSet
 40 | import qualified Text.Regex.Applicative as R
 41 | 
 42 | import qualified Kleene.Classes         as C
 43 | import           Kleene.Internal.Pretty
 44 | import           Kleene.Internal.Sets
 45 | import qualified Kleene.RE              as RE
 46 | 
 47 | 
 48 | -- $setup
 49 | --
 50 | -- >>> import Control.Applicative (Alternative (..), liftA2)
 51 | -- >>> import Data.Semigroup (Semigroup (..))
 52 | -- >>> import Kleene.Internal.Pretty (putPretty)
 53 | -- >>> import qualified Kleene.Classes as C
 54 | -- >>> import qualified Kleene.RE as RE
 55 | -- >>> import qualified Text.Regex.Applicative as R
 56 | 
 57 | -------------------------------------------------------------------------------
 58 | -- Functor RE
 59 | -------------------------------------------------------------------------------
 60 | 
 61 | -- | Star behaviour
 62 | data Greediness
 63 |     = Greedy    -- ^ 'many'
 64 |     | NonGreedy -- ^ 'few'
 65 |   deriving (Eq, Ord, Show, Enum, Bounded)
 66 | 
 67 | -- | 'Applicative' 'Functor' regular expression.
 68 | data K c a where
 69 |     KEmpty  :: K c a
 70 |     KPure   :: a -> K c a
 71 |     KChar   :: (Ord c, Enum c) => RSet c -> K c c
 72 |     KAppend :: (a -> b -> r) -> K c a -> K c b -> K c r
 73 |     KUnion  :: K c a -> K c a -> K c a
 74 |     KStar   :: Greediness -> K c a -> K c [a]
 75 | 
 76 |     -- optimisations
 77 |     KMap    :: (a -> b) -> K c a -> K c b -- could use Pure and Append
 78 |     KString :: Eq c => [c] -> K c [c]     -- could use Char and Append
 79 | 
 80 | instance (c ~ Char, IsString a) => IsString (K c a) where
 81 |     fromString s = KMap fromString (KString s)
 82 | 
 83 | instance Functor (K c) where
 84 |     fmap _ KEmpty          = KEmpty
 85 |     fmap f (KPure x)       = KPure (f x)
 86 |     fmap f (KMap g k)      = KMap (f . g) k
 87 |     fmap f (KAppend g a b) = KAppend (\x y -> f (g x y)) a b
 88 |     fmap f k               = KMap f k
 89 | 
 90 | instance Apply (K c) where
 91 |     KEmpty <.> _ = KEmpty
 92 |     _ <.> KEmpty = KEmpty
 93 | 
 94 |     KPure f <.> k = fmap f k
 95 |     k <.> KPure x = fmap ($ x) k
 96 | 
 97 |     f <.> x = KAppend ($) f x
 98 | 
 99 |     liftF2 = KAppend
100 | 
101 | instance Applicative (K c) where
102 |     pure  = KPure
103 |     (<*>) = (<.>)
104 | 
105 | #if MIN_VERSION_base(4,10,0)
106 |     liftA2 = liftF2
107 | #endif
108 | 
109 | instance Alt.Alt (K c) where
110 |     KEmpty <!> k = k
111 |     k <!> KEmpty = k
112 |     KChar a <!> KChar b = KChar (RSet.union a b)
113 | 
114 |     a <!> b = KUnion a b
115 | 
116 |     many KEmpty      = KPure []
117 |     many (KStar _ k) = KMap pure (KStar Greedy k)
118 |     many k           = KStar Greedy k
119 | 
120 |     some KEmpty      = KEmpty
121 |     some (KStar _ k) = KMap pure (KStar Greedy k)
122 |     some k           = liftA2 (:) k (KStar Greedy k)
123 | 
124 | instance Alternative (K c) where
125 |     empty = KEmpty
126 |     (<|>) = (Alt.<!>)
127 |     some  = Alt.some
128 |     many  = Alt.many
129 | 
130 | -- | 'few', not 'many'.
131 | --
132 | -- Let's define two similar regexps
133 | --
134 | -- >>> let re1 = liftA2 (,) (few  $ char 'a') (many $ char 'a')
135 | -- >>> let re2 = liftA2 (,) (many $ char 'a') (few  $ char 'a')
136 | --
137 | -- Their 'RE' behaviour is the same:
138 | --
139 | -- >>> C.equivalent (toRE re1) (toRE re2)
140 | -- True
141 | --
142 | -- >>> map (C.match $ toRE re1) ["aaa","bbb"]
143 | -- [True,False]
144 | --
145 | -- However, the 'RA' behaviour is different!
146 | --
147 | -- >>> R.match (toRA re1) "aaaa"
148 | -- Just ("","aaaa")
149 | --
150 | -- >>> R.match (toRA re2) "aaaa"
151 | -- Just ("aaaa","")
152 | --
153 | few :: K c a -> K c [a]
154 | few KEmpty      = KPure []
155 | few (KStar _ k) = KMap pure (KStar NonGreedy k)
156 | few k           = KStar NonGreedy k
157 | 
158 | -------------------------------------------------------------------------------
159 | --
160 | -------------------------------------------------------------------------------
161 | 
162 | -- | >>> putPretty anyChar
163 | -- ^[^]$
164 | anyChar :: (Ord c, Enum c, Bounded c) => K c c
165 | anyChar = KChar RSet.full
166 | 
167 | -- | >>> putPretty $ oneof ("foobar" :: [Char])
168 | -- ^[a-bfor]$
169 | oneof :: (Ord c, Enum c, Foldable f) => f c -> K c c
170 | oneof = KChar . RSet.fromList . toList
171 | 
172 | -- | >>> putPretty $ char 'x'
173 | -- ^x$
174 | char :: (Ord c, Enum c) => c -> K c c
175 | char = KChar . RSet.singleton
176 | 
177 | -- | >>> putPretty $ charRange 'a' 'z'
178 | -- ^[a-z]$
179 | charRange :: (Enum c, Ord c) => c -> c -> K c c
180 | charRange a b = KChar (RSet.singletonRange (a, b))
181 | 
182 | -- | >>> putPretty dot
183 | -- ^.$
184 | dot :: K Char Char
185 | dot = KChar dotRSet
186 | 
187 | -- | >>> putPretty everything
188 | -- ^[^]*$
189 | everything :: (Ord c, Enum c, Bounded c) => K c [c]
190 | everything = many anyChar
191 | 
192 | -- | >>> putPretty everything1
193 | -- ^[^][^]*$
194 | everything1 :: (Ord c, Enum c, Bounded c) => K c [c]
195 | everything1 = some anyChar
196 | 
197 | -- | Matches nothing?
198 | isEmpty :: (Ord c, Enum c, Bounded c) => K c a -> Bool
199 | isEmpty k = C.equivalent (toRE k) C.empty
200 | 
201 | -- | Matches whole input?
202 | isEverything :: (Ord c, Enum c, Bounded c) => K c a -> Bool
203 | isEverything k = C.equivalent (toRE k) C.everything
204 | 
205 | -------------------------------------------------------------------------------
206 | -- Matching
207 | -------------------------------------------------------------------------------
208 | 
209 | -- | Match using @regex-applicative@
210 | match :: K c a -> [c] -> Maybe a
211 | match = R.match . toRA
212 | 
213 | -------------------------------------------------------------------------------
214 | -- RE
215 | -------------------------------------------------------------------------------
216 | 
217 | -- | Convert to 'RE'.
218 | --
219 | -- >>> putPretty (toRE $ many "foo" :: RE.RE Char)
220 | -- ^(foo)*$
221 | --
222 | toRE :: (Ord c, Enum c, Bounded c) => K c a -> RE.RE c
223 | toRE = toKleene
224 | 
225 | -- | Convert to any 'Kleene'
226 | toKleene :: C.FiniteKleene c k => K c a -> k
227 | toKleene (KMap _ a)      = toKleene a
228 | toKleene (KUnion a b)    = C.unions [toKleene a, toKleene b]
229 | toKleene (KAppend _ a b) = C.appends [toKleene a, toKleene b]
230 | toKleene (KStar _ a)     = C.star (toKleene a)
231 | toKleene (KString s)     = C.appends (map C.char s)
232 | toKleene KEmpty          = C.empty
233 | toKleene (KPure _)       = C.eps
234 | toKleene (KChar cs)      = C.fromRSet cs
235 | 
236 | -- | Convert from 'RE'.
237 | --
238 | -- /Note:/ all 'RE.REStar's are converted to 'Greedy' ones,
239 | -- it doesn't matter, as we don't capture anything.
240 | --
241 | -- >>> match (fromRE "foobar") "foobar"
242 | -- Just "foobar"
243 | --
244 | -- >>> match (fromRE $ C.star "a" <> C.star "a") "aaaa"
245 | -- Just "aaaa"
246 | --
247 | fromRE :: (Ord c, Enum c) => RE.RE c -> K c [c]
248 | fromRE (RE.REChars cs)    = pure <$> KChar cs
249 | fromRE (RE.REAppend rs)   = concat <$> traverse fromRE rs
250 | fromRE (RE.REUnion cs rs) = foldr (KUnion . fromRE) (pure <$> KChar cs) (toList rs)
251 | fromRE (RE.REStar r)      = concat <$> KStar Greedy (fromRE r)
252 | 
253 | -------------------------------------------------------------------------------
254 | -- regex-applicative
255 | -------------------------------------------------------------------------------
256 | 
257 | -- | Convert 'K' to 'R.RE' from @regex-applicative@.
258 | --
259 | -- >>> R.match (toRA ("xx" *> everything <* "zz" :: K Char String)) "xxyyyzz"
260 | -- Just "yyy"
261 | --
262 | -- See also 'match'.
263 | --
264 | toRA :: K c a -> R.RE c a
265 | toRA KEmpty              = empty
266 | toRA (KPure x)           = pure x
267 | toRA (KChar cs)          = R.psym (\c -> RSet.member c cs)
268 | toRA (KAppend f a b)     = liftA2 f (toRA a) (toRA b)
269 | toRA (KUnion a b)        = toRA a <|> toRA b
270 | toRA (KStar Greedy a)    = many (toRA a)
271 | toRA (KStar NonGreedy a) = R.few (toRA a)
272 | toRA (KMap f a)          = fmap f (toRA a)
273 | toRA (KString s)         = R.string s
274 | 
275 | -------------------------------------------------------------------------------
276 | -- JavaScript
277 | -------------------------------------------------------------------------------
278 | 
279 | -- | Convert to non-matching JavaScript string which can be used
280 | -- as an argument to @new RegExp@
281 | --
282 | -- >>> putPretty ("foobar" :: K Char String)
283 | -- ^foobar$
284 | --
285 | -- >>> putPretty $ many ("foobar" :: K Char String)
286 | -- ^(foobar)*$
287 | --
288 | instance c ~ Char => Pretty (K c a) where
289 |     pretty = pretty . toRE
290 | 


--------------------------------------------------------------------------------
/src/Kleene/Internal/Partition.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE Safe #-}
  2 | {-# OPTIONS_HADDOCK not-home #-}
  3 | module Kleene.Internal.Partition where
  4 | 
  5 | import Data.Semigroup (Semigroup (..))
  6 | import Prelude ()
  7 | import Prelude.Compat
  8 | 
  9 | import Data.Foldable      (toList)
 10 | import Data.List.NonEmpty (NonEmpty (..))
 11 | import Data.RangeSet.Map  (RSet)
 12 | import Data.Set           (Set)
 13 | 
 14 | import qualified Data.Function.Step.Discrete.Closed as SF
 15 | import qualified Data.List.NonEmpty                 as NE
 16 | import qualified Data.RangeSet.Map                  as RSet
 17 | import qualified Data.Set                           as Set
 18 | 
 19 | import Test.QuickCheck
 20 | 
 21 | -- $setup
 22 | -- >>> import Data.Word
 23 | -- >>> import Data.Semigroup (Semigroup (..))
 24 | -- >>> import Test.QuickCheck (Arbitrary (..), (===), (==>))
 25 | -- >>> import Data.RangeSet.Map (RSet)
 26 | -- >>> import qualified Data.RangeSet.Map as RSet
 27 | --
 28 | -- >>> let asPartitionChar :: Partition Char -> Partition Char; asPartitionChar = id
 29 | -- >>> instance (Ord a, Enum a, Arbitrary a) => Arbitrary (RSet a) where arbitrary = fmap RSet.fromRangeList arbitrary
 30 | 
 31 | -- | 'Partition' devides type into disjoint connected partitions.
 32 | --
 33 | -- /Note:/ we could have non-connecter partitions too,
 34 | -- but that would be more complicated.
 35 | -- This variant is correct by construction, but less precise.
 36 | --
 37 | -- It's enought to store last element of each piece.
 38 | --
 39 | -- @'Partition' (fromList [x1, x2, x3]) :: 'Partition' s@ describes a partition of /Set/ @s@, as
 40 | --
 41 | -- \[
 42 | -- \{ x \mid x \le x_1 \} \cup
 43 | -- \{ x \mid x_1 < x \le x_2 \} \cup
 44 | -- \{ x \mid x_2 < x \le x_3 \} \cup
 45 | -- \{ x \mid x_3 < x \}
 46 | -- \]
 47 | --
 48 | -- /Note:/ it's enough to check upper bound conditions only if checks are performed in order.
 49 | --
 50 | -- /Invariant:/ 'maxBound' is not in the set.
 51 | --
 52 | newtype Partition a  = Partition { unPartition :: Set a }
 53 |   deriving (Eq, Ord)
 54 | 
 55 | -- | Check invariant.
 56 | invariant :: (Ord a, Bounded a) => Partition a -> Bool
 57 | invariant (Partition xs) = Set.notMember maxBound xs
 58 | 
 59 | -------------------------------------------------------------------------------
 60 | -- Instances
 61 | -------------------------------------------------------------------------------
 62 | 
 63 | instance Show a => Show (Partition a) where
 64 |     showsPrec d (Partition xs)
 65 |         = showParen (d > 10)
 66 |         $ showString "fromSeparators "
 67 |         . showsPrec 11 (Set.toList xs)
 68 | 
 69 | -- | prop> invariant (asPartitionChar p)
 70 | instance (Enum a, Bounded a, Ord a, Arbitrary a) => Arbitrary (Partition a) where
 71 |     arbitrary = fromSeparators <$> arbitrary
 72 | 
 73 | -- | See 'wedge'.
 74 | instance (Enum a, Bounded a, Ord a) => Semigroup (Partition a) where
 75 |     (<>) = wedge
 76 | 
 77 | instance (Enum a, Bounded a, Ord a) => Monoid (Partition a) where
 78 |     mempty = whole
 79 |     mappend = (<>)
 80 | 
 81 | -------------------------------------------------------------------------------
 82 | -- Constructors
 83 | -------------------------------------------------------------------------------
 84 | 
 85 | fromSeparators :: (Enum a, Bounded a, Ord a) => [a] -> Partition a
 86 | fromSeparators = Partition . Set.fromList . filter (/= maxBound)
 87 | 
 88 | -- | Construct 'Partition' from list of 'RSet's.
 89 | --
 90 | -- RSet intervals are closed on both sides.
 91 | fromRSets :: (Enum a, Bounded a, Ord a) => [RSet a] -> Partition a
 92 | fromRSets rs = Partition $ Set.fromList $ concat
 93 |     [ (if x == minBound then [] else [pred x]) ++
 94 |       (if y == maxBound then [] else [y])
 95 |     | r <- rs
 96 |     , (x, y) <- RSet.toRangeList r
 97 |     ]
 98 | 
 99 | fromRSet :: (Enum a, Bounded a, Ord a) => RSet a -> Partition a
100 | fromRSet r
101 |     | r == RSet.empty = whole
102 |     | r == RSet.full  = whole
103 |     | otherwise       = fromRSets [r]
104 | 
105 | whole :: Partition a
106 | whole = Partition Set.empty
107 | 
108 | -------------------------------------------------------------------------------
109 | -- Querying
110 | -------------------------------------------------------------------------------
111 | 
112 | -- | Count of sets in a 'Partition'.
113 | --
114 | -- >>> size whole
115 | -- 1
116 | --
117 | -- >>> size $ split (10 :: Word8)
118 | -- 2
119 | --
120 | -- prop> size (asPartitionChar p) >= 1
121 | --
122 | size :: Partition a -> Int
123 | size (Partition xs) = 1 + length xs
124 | 
125 | -- | Extract examples from each subset in a 'Partition'.
126 | --
127 | -- >>> examples $ split (10 :: Word8)
128 | -- fromList [10,255]
129 | --
130 | -- >>> examples $ split (10 :: Word8) <> split 20
131 | -- fromList [10,20,255]
132 | --
133 | -- prop> invariant p ==> size (asPartitionChar p) === length (examples p)
134 | --
135 | examples :: (Bounded a, Enum a, Ord a) => Partition a -> Set a
136 | examples (Partition xs) = Set.insert maxBound xs
137 | 
138 | -- |
139 | --
140 | -- prop> all (uncurry (<=)) $ intervals $ asPartitionChar p
141 | intervals :: (Enum a, Bounded a, Ord a) => Partition a -> NonEmpty (a, a)
142 | intervals (Partition xs) = go minBound (toList xs) where
143 |     go x []       = (x, maxBound) :| []
144 |     go x (y : ys) = (x, y) `NE.cons` go y ys
145 | 
146 | -------------------------------------------------------------------------------
147 | --
148 | -- Operations
149 | -------------------------------------------------------------------------------
150 | 
151 | -- | Wedge partitions.
152 | --
153 | -- >>> split (10 :: Word8) <> split 20
154 | -- fromSeparators [10,20]
155 | --
156 | -- prop> whole `wedge` (p :: Partition Char) === p
157 | -- prop> (p :: Partition Char) <> whole === p
158 | -- prop> asPartitionChar p <> q === q <> p
159 | -- prop> asPartitionChar p <> p === p
160 | -- prop> invariant $ asPartitionChar p <> q
161 | --
162 | wedge :: Ord a => Partition a -> Partition a -> Partition a
163 | wedge (Partition as) (Partition bs) = Partition (Set.union as bs)
164 | 
165 | -- | Simplest partition: given @x@ partition space into @[min..x) and [x .. max]@
166 | --
167 | -- >>> split (128 :: Word8)
168 | -- fromSeparators [128]
169 | --
170 | split :: (Enum a, Bounded a, Eq a) => a -> Partition a
171 | split x
172 |     | x == minBound = Partition Set.empty
173 |     | otherwise     = Partition (Set.singleton x)
174 | 
175 | -------------------------------------------------------------------------------
176 | -- Conversion
177 | -------------------------------------------------------------------------------
178 | 
179 | -- | Make a step function.
180 | toSF :: (Enum a, Bounded a, Ord a) => (a -> b) -> Partition a -> SF.SF a b
181 | toSF f (Partition p) = SF.fromList
182 |     (map (\k -> (k, f k)) $ toList as)
183 |     (f maxBound)
184 |   where
185 |     as = toList p
186 | 


--------------------------------------------------------------------------------
/src/Kleene/Internal/Pretty.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE GADTs #-}
 2 | {-# LANGUAGE Safe  #-}
 3 | {-# OPTIONS_HADDOCK not-home #-}
 4 | module Kleene.Internal.Pretty (
 5 |     Pretty (..),
 6 |     putPretty,
 7 |     ) where
 8 | 
 9 | import Prelude ()
10 | import Prelude.Compat
11 | 
12 | import Data.Monoid          (Endo (..))
13 | import Data.RangeSet.Map    (RSet)
14 | import Kleene.Internal.Sets (dotRSet)
15 | 
16 | import qualified Data.RangeSet.Map as RSet
17 | 
18 | -------------------------------------------------------------------------------
19 | -- Pretty
20 | -------------------------------------------------------------------------------
21 | 
22 | -- | Pretty class.
23 | --
24 | -- For @'pretty' :: 'Kleene.RE.RE' -> 'String'@ gives a
25 | -- representation accepted by many regex engines.
26 | --
27 | class Pretty a where
28 |     pretty :: a -> String
29 |     pretty x = prettyS x ""
30 | 
31 |     prettyS :: a -> ShowS
32 |     prettyS = showString . pretty
33 | 
34 |     {-# MINIMAL pretty | prettyS #-}
35 | 
36 | -- | @'putStrLn' . 'pretty'@
37 | putPretty :: Pretty a => a -> IO ()
38 | putPretty = putStrLn . pretty
39 | 
40 | instance c ~ Char => Pretty (RSet c) where
41 |     prettyS cs
42 |         | RSet.size cs == 1 = prettyS (head (RSet.elems cs))
43 |         | cs == dotRSet  = showChar '.'
44 |         | ics == dotRSet = showString "[^.]"
45 |         | RSet.size cs < RSet.size ics = prettyRSet True cs
46 |         | otherwise                    = prettyRSet False ics
47 |       where
48 |         ics = RSet.complement cs
49 | 
50 | prettyRSet :: Bool -> RSet Char -> ShowS
51 | prettyRSet c cs
52 |     = showChar '['
53 |     . (if c then id else showChar '^')
54 |     . appEndo (foldMap (Endo . f) (RSet.toRangeList cs))
55 |     . showChar ']'
56 |   where
57 |     f (a, b)
58 |       | a == b = prettyS a
59 |       | otherwise = prettyS a . showChar '-' . prettyS b
60 | 
61 | -- | Escapes special regexp characters
62 | instance Pretty Char where
63 |     prettyS '.' = showString "\\."
64 |     prettyS '-' = showString "\\-"
65 |     prettyS '^' = showString "\\^"
66 |     prettyS '*' = showString "\\*"
67 |     prettyS '+' = showString "\\+"
68 |     prettyS '?' = showString "\\?"
69 |     prettyS '(' = showString "\\("
70 |     prettyS ')' = showString "\\)"
71 |     prettyS '[' = showString "\\["
72 |     prettyS ']' = showString "\\]"
73 |     prettyS '\r' = showString "\\r"
74 |     prettyS '\n' = showString "\\n"
75 |     prettyS '\t' = showString "\\t"
76 |     prettyS c   = showChar c
77 | 
78 | instance Pretty Bool where
79 |     prettyS True  = showChar '1'
80 |     prettyS False = showChar '0'
81 | 
82 | instance Pretty () where
83 |     prettyS _ = showChar '.'
84 | 
85 | instance Pretty a => Pretty (Maybe a) where
86 |     prettyS Nothing  = showString "Nothing"
87 |     prettyS (Just x) = prettyS x
88 | 
89 | instance (Pretty a, Pretty b) => Pretty (Either a b) where
90 |     prettyS (Left x)  = showString "Left " . prettyS x
91 |     prettyS (Right x) = showString "Right " . prettyS x
92 | 
93 | instance (Pretty a, Pretty b) => Pretty (a, b) where
94 |     prettyS (x, y) = prettyS x . showString " , " . prettyS y
95 | 
96 | instance Show a => Pretty [a] where
97 |     prettyS = showList
98 | 


--------------------------------------------------------------------------------
/src/Kleene/Internal/RE.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns           #-}
  2 | {-# LANGUAGE FlexibleInstances      #-}
  3 | {-# LANGUAGE FunctionalDependencies #-}
  4 | {-# LANGUAGE GADTs                  #-}
  5 | {-# LANGUAGE Safe                   #-}
  6 | {-# LANGUAGE ScopedTypeVariables    #-}
  7 | {-# OPTIONS_HADDOCK not-home #-}
  8 | module Kleene.Internal.RE (
  9 |     RE (..),
 10 |     -- * Construction
 11 |     --
 12 |     -- | Binary operators are
 13 |     --
 14 |     -- * '<>' for append
 15 |     -- * '\/' for union
 16 |     --
 17 |     empty,
 18 |     eps,
 19 |     everything,
 20 |     char,
 21 |     charRange,
 22 |     anyChar,
 23 |     appends,
 24 |     unions,
 25 |     star,
 26 |     string,
 27 |     -- * Derivative
 28 |     nullable,
 29 |     derivate,
 30 |     -- * Transition map
 31 |     transitionMap,
 32 |     leadingChars,
 33 |     -- * Equivalence
 34 |     equivalent,
 35 |     -- * Generation
 36 |     generate,
 37 |     -- * Other
 38 |     isEmpty,
 39 |     nullableProof,
 40 |     ) where
 41 | 
 42 | import Data.Semigroup (Semigroup (..))
 43 | import Prelude ()
 44 | import Prelude.Compat
 45 | 
 46 | import Control.Applicative (liftA2)
 47 | import Data.Foldable       (toList)
 48 | import Data.List           (foldl')
 49 | import Data.Map            (Map)
 50 | import Data.RangeSet.Map   (RSet)
 51 | import Data.Set            (Set)
 52 | import Data.String         (IsString (..))
 53 | 
 54 | import qualified Data.Function.Step.Discrete.Closed as SF
 55 | import qualified Data.Map                           as Map
 56 | import qualified Data.RangeSet.Map                  as RSet
 57 | import qualified Data.Set                           as Set
 58 | import qualified Test.QuickCheck                    as QC
 59 | import qualified Test.QuickCheck.Gen                as QC (unGen)
 60 | import qualified Test.QuickCheck.Random             as QC (mkQCGen)
 61 | 
 62 | import qualified Kleene.Classes            as C
 63 | import qualified Kleene.Internal.Partition as P
 64 | import           Kleene.Internal.Pretty
 65 | 
 66 | -- $setup
 67 | -- >>> :set -XOverloadedStrings
 68 | -- >>> import Control.Monad (void)
 69 | -- >>> import Data.Foldable (traverse_)
 70 | -- >>> import Data.List (sort)
 71 | -- >>> import Data.Maybe (isJust)
 72 | -- >>> import Data.Semigroup (Semigroup (..))
 73 | --
 74 | -- >>> import Test.QuickCheck ((===))
 75 | -- >>> import qualified Test.QuickCheck as QC
 76 | -- >>> import qualified Data.Map as Map
 77 | -- >>> import qualified Data.Function.Step.Discrete.Closed as SF
 78 | --
 79 | -- >>> import Kleene.Classes (match)
 80 | -- >>> import Kleene.Internal.Pretty (putPretty, pretty)
 81 | -- >>> import Algebra.Lattice (bottom, (\/))
 82 | -- >>> import Kleene.RE ()
 83 | --
 84 | -- >>> let asREChar :: RE Char -> RE Char; asREChar = id
 85 | 
 86 | -------------------------------------------------------------------------------
 87 | -- RE
 88 | -------------------------------------------------------------------------------
 89 | 
 90 | -- | Regular expression
 91 | --
 92 | -- Constructors are exposed, but you should use
 93 | -- smart constructors in this module to construct 'RE'.
 94 | --
 95 | -- The 'Eq' and 'Ord' instances are structural.
 96 | -- The 'Kleene' etc constructors do "weak normalisation", so for values
 97 | -- constructed using those operations 'Eq' witnesses "weak equivalence".
 98 | -- See 'equivalent' for regular-expression equivalence.
 99 | --
100 | -- Structure is exposed in "Kleene.RE" module but consider constructors as
101 | -- half-internal.  There are soft-invariants, but violating them shouldn't
102 | -- break anything in the package. (e.g. 'transitionMap' will eventually
103 | -- terminate, but may create more redundant states if starting regexp is not
104 | -- "weakly normalised").
105 | --
106 | data RE c
107 |     = REChars (RSet c)               -- ^ Single character
108 |     | REAppend [RE c]                -- ^ Concatenation
109 |     | REUnion (RSet c) (Set (RE c))  -- ^ Union
110 |     | REStar (RE c)                  -- ^ Kleene star
111 |   deriving (Eq, Ord, Show)
112 | 
113 | -------------------------------------------------------------------------------
114 | -- Smart constructor
115 | -------------------------------------------------------------------------------
116 | 
117 | -- | Empty regex. Doesn't accept anything.
118 | --
119 | -- >>> putPretty (empty :: RE Char)
120 | -- ^[]$
121 | --
122 | -- >>> putPretty (bottom :: RE Char)
123 | -- ^[]$
124 | --
125 | -- prop> match (empty :: RE Char) (s :: String) === False
126 | --
127 | empty :: RE c
128 | empty = REChars RSet.empty
129 | 
130 | -- | Everything.
131 | --
132 | -- >>> putPretty everything
133 | -- ^[^]*$
134 | --
135 | -- prop> match (everything :: RE Char) (s :: String) === True
136 | --
137 | everything :: Bounded c => RE c
138 | everything = REStar (REChars RSet.full)
139 | 
140 | -- | Empty string. /Note:/ different than 'empty'.
141 | --
142 | -- >>> putPretty eps
143 | -- ^$
144 | --
145 | -- >>> putPretty (mempty :: RE Char)
146 | -- ^$
147 | --
148 | -- prop> match (eps :: RE Char) s === null (s :: String)
149 | --
150 | eps :: RE c
151 | eps = REAppend []
152 | 
153 | -- |
154 | --
155 | -- >>> putPretty (char 'x')
156 | -- ^x$
157 | --
158 | char :: c -> RE c
159 | char = REChars . RSet.singleton
160 | 
161 | -- |
162 | --
163 | -- >>> putPretty $ charRange 'a' 'z'
164 | -- ^[a-z]$
165 | --
166 | charRange :: Ord c => c -> c -> RE c
167 | charRange c c' = REChars $ RSet.singletonRange (c, c')
168 | 
169 | -- | Any character. /Note:/ different than dot!
170 | --
171 | -- >>> putPretty anyChar
172 | -- ^[^]$
173 | --
174 | anyChar :: Bounded c => RE c
175 | anyChar = REChars RSet.full
176 | 
177 | -- | Concatenate regular expressions.
178 | --
179 | -- prop> (asREChar r <> s) <> t === r <> (s <> t)
180 | --
181 | -- prop> asREChar r <> empty === empty
182 | -- prop> empty <> asREChar r === empty
183 | --
184 | -- prop> asREChar r <> eps === r
185 | -- prop> eps <> asREChar r === r
186 | --
187 | appends :: Eq c => [RE c] -> RE c
188 | appends rs0
189 |     | elem empty rs1 = empty
190 |     | otherwise = case rs1 of
191 |         [r] -> r
192 |         rs  -> REAppend rs
193 |   where
194 |     rs1 = concatMap f rs0
195 | 
196 |     f :: RE c -> [RE c]
197 |     f (REAppend rs) = concatMap f rs
198 |     f r             = [r]
199 | 
200 | -- | Union of regular expressions.
201 | --
202 | -- prop> asREChar r \/ r === r
203 | -- prop> asREChar r \/ s === s \/ r
204 | -- prop> (asREChar r \/ s) \/ t === r \/ (s \/ t)
205 | --
206 | -- prop> empty \/ asREChar r === r
207 | -- prop> asREChar r \/ empty === r
208 | --
209 | -- prop> everything \/ asREChar r === everything
210 | -- prop> asREChar r \/ everything === everything
211 | --
212 | unions :: (Ord c, Enum c, Bounded c) => [RE c] -> RE c
213 | unions = uncurry mk . foldMap f where
214 |     mk cs rss
215 |         | Set.null rss = REChars cs
216 |         | Set.member everything rss = everything
217 |         | RSet.null cs = case Set.toList rss of
218 |             []  -> empty
219 |             [r] -> r
220 |             _   -> REUnion cs rss
221 |         | otherwise    = REUnion cs rss
222 | 
223 |     f (REUnion cs rs) = (cs, rs)
224 |     f (REChars cs)    = (cs, Set.empty)
225 |     f r               = (mempty, Set.singleton r)
226 | 
227 | -- | Kleene star.
228 | --
229 | -- prop> star (star r) === star (asREChar r)
230 | --
231 | -- prop> star eps     === asREChar eps
232 | -- prop> star empty   === asREChar eps
233 | -- prop> star anyChar === asREChar everything
234 | --
235 | -- prop> star (r      \/ eps) === star (asREChar r)
236 | -- prop> star (char c \/ eps) === star (asREChar (char c))
237 | -- prop> star (empty  \/ eps) === asREChar eps
238 | --
239 | star :: Ord c => RE c -> RE c
240 | star r = case r of
241 |     REStar _                          -> r
242 |     REAppend []                       -> eps
243 |     REChars cs | RSet.null cs         -> eps
244 |     REUnion cs rs | Set.member eps rs -> case Set.toList rs' of
245 |         []                  -> star (REChars cs)
246 |         [r'] | RSet.null cs -> star r'
247 |         _                   -> REStar (REUnion cs rs')
248 |       where
249 |         rs' = Set.delete eps rs
250 |     _                                 -> REStar r
251 | 
252 | -- | Literal string.
253 | --
254 | -- >>> putPretty ("foobar" :: RE Char)
255 | -- ^foobar$
256 | --
257 | -- >>> putPretty ("(.)" :: RE Char)
258 | -- ^\(\.\)$
259 | --
260 | string :: [c] -> RE c
261 | string []  = eps
262 | string [c] = REChars (RSet.singleton c)
263 | string cs  = REAppend $ map (REChars . RSet.singleton) cs
264 | 
265 | instance (Ord c, Enum c, Bounded c) => C.Kleene (RE c) where
266 |     empty      = empty
267 |     eps        = eps
268 |     appends    = appends
269 |     unions     = unions
270 |     star       = star
271 | 
272 | instance (Ord c, Enum c, Bounded c) => C.CharKleene c (RE c) where
273 |     char       = char
274 | 
275 | instance (Ord c, Enum c, Bounded c) => C.FiniteKleene c (RE c) where
276 |     everything = everything
277 |     charRange  = charRange
278 |     fromRSet   = REChars
279 |     anyChar    = anyChar
280 | 
281 | -------------------------------------------------------------------------------
282 | -- Pseudo lattice
283 | -------------------------------------------------------------------------------
284 | 
285 | (\/) :: (Ord c, Enum c, Bounded c) => RE c -> RE c -> RE c
286 | r \/ r' = unions [r, r']
287 | 
288 | -------------------------------------------------------------------------------
289 | -- derivative
290 | -------------------------------------------------------------------------------
291 | 
292 | -- | We say that a regular expression r is nullable if the language it defines
293 | -- contains the empty string.
294 | --
295 | -- >>> nullable eps
296 | -- True
297 | --
298 | -- >>> nullable (star "x")
299 | -- True
300 | --
301 | -- >>> nullable "foo"
302 | -- False
303 | --
304 | nullable :: RE c -> Bool
305 | nullable (REChars _)      = False
306 | nullable (REAppend rs)    = all nullable rs
307 | nullable (REUnion _cs rs) = any nullable rs
308 | nullable (REStar _)       = True
309 | 
310 | -- | Intuitively, the derivative of a language \(\mathcal{L} \subset \Sigma^\star\)
311 | -- with respect to a symbol \(a \in \Sigma\) is the language that includes only
312 | -- those suffixes of strings with a leading symbol \(a\) in \(\mathcal{L}\).
313 | --
314 | -- >>> putPretty $ derivate 'f' "foobar"
315 | -- ^oobar$
316 | --
317 | -- >>> putPretty $ derivate 'x' $ "xyz" \/ "abc"
318 | -- ^yz$
319 | --
320 | -- >>> putPretty $ derivate 'x' $ star "xyz"
321 | -- ^yz(xyz)*$
322 | --
323 | derivate :: (Ord c, Enum c, Bounded c) => c -> RE c -> RE c
324 | derivate c (REChars cs)     = derivateChars c cs
325 | derivate c (REUnion cs rs)  = unions $ derivateChars c cs : [ derivate c r | r <- toList rs]
326 | derivate c (REAppend rs)    = derivateAppend c rs
327 | derivate c rs@(REStar r)    = derivate c r <> rs
328 | 
329 | derivateAppend :: (Ord c, Enum c, Bounded c) => c -> [RE c] -> RE c
330 | derivateAppend _ []      = empty
331 | derivateAppend c [r]     = derivate c r
332 | derivateAppend c (r:rs)
333 |     | nullable r         = unions [r' <> appends rs, rs']
334 |     | otherwise          = r' <> appends rs
335 |   where
336 |     r'  = derivate c r
337 |     rs' = derivateAppend c rs
338 | 
339 | derivateChars :: Ord c =>  c -> RSet c -> RE c
340 | derivateChars c cs
341 |     | c `RSet.member` cs      = eps
342 |     | otherwise               = empty
343 | 
344 | instance (Ord c, Enum c, Bounded c) => C.Derivate c (RE c) where
345 |     nullable = nullable
346 |     derivate = derivate
347 | 
348 | instance (Ord c, Enum c, Bounded c) => C.Match c (RE c) where
349 |     match r = nullable . foldl' (flip derivate) r
350 | 
351 | -------------------------------------------------------------------------------
352 | -- Nullable with proof
353 | -------------------------------------------------------------------------------
354 | 
355 | -- | Not only we can decide whether 'RE' is nullable, we can also
356 | -- remove the empty string:
357 | --
358 | -- >>> putPretty $ nullableProof eps
359 | -- ^[]$
360 | --
361 | -- >>> putPretty $ nullableProof $ star "x"
362 | -- ^xx*$
363 | --
364 | -- >>> putPretty $ nullableProof "foo"
365 | -- Nothing
366 | --
367 | -- 'nullableProof' is consistent with 'nullable':
368 | --
369 | -- prop> isJust (nullableProof r) === nullable (asREChar r)
370 | --
371 | -- The returned regular expression is not nullable:
372 | --
373 | -- prop> maybe True (not . nullable) $ nullableProof $ asREChar r
374 | --
375 | -- If we union with empty regex, we get a equivalent regular expression
376 | -- we started with:
377 | --
378 | -- prop> maybe r (eps \/) (nullableProof r) `equivalent` (asREChar r)
379 | --
380 | nullableProof :: forall c. (Ord c, Enum c, Bounded c) => RE c -> Maybe (RE c)
381 | nullableProof (REChars _)   = Nothing
382 | 
383 | nullableProof (REAppend []) = Just empty
384 | nullableProof (REAppend xs)
385 |     | Just ys <- traverse (\x -> (,) x <$> nullableProof x) xs = Just (go ys)
386 |     | otherwise = Nothing
387 |   where
388 |     go :: [(RE c, RE c)] -> RE c
389 |     go rs = unions $ map appends $ tail $ traverse (\(r,r') -> [r,r']) rs
390 | 
391 | nullableProof (REUnion cs rs)
392 |     | any nullable rs = Just $ REUnion cs $ Set.map (\r -> maybe r id $ nullableProof r) rs
393 |     | otherwise       = Nothing
394 | 
395 | nullableProof (REStar r)
396 |     | Just r' <- nullableProof r = Just (r' <> REStar r')
397 |     | otherwise                  = Just (r <> REStar r)
398 | 
399 | -------------------------------------------------------------------------------
400 | -- isEmpty
401 | -------------------------------------------------------------------------------
402 | 
403 | -- | Whether 'RE' is (structurally) equal to 'empty'.
404 | --
405 | -- prop> isEmpty r === all (not . nullable) (Map.keys $ transitionMap $ asREChar r)
406 | isEmpty :: RE c -> Bool
407 | isEmpty (REChars rs) = RSet.null rs
408 | isEmpty _            = False
409 | 
410 | -------------------------------------------------------------------------------
411 | -- States
412 | -------------------------------------------------------------------------------
413 | 
414 | -- | Transition map. Used to construct 'Kleene.DFA.DFA'.
415 | --
416 | -- >>> void $ Map.traverseWithKey (\k v -> putStrLn $ pretty k ++ " : " ++ SF.showSF (fmap pretty v)) $ transitionMap ("ab" :: RE Char)
417 | -- ^[]$ : \_ -> "^[]$"
418 | -- ^b$ : \x -> if
419 | --     | x <= 'a'  -> "^[]$"
420 | --     | x <= 'b'  -> "^$"
421 | --     | otherwise -> "^[]$"
422 | -- ^$ : \_ -> "^[]$"
423 | -- ^ab$ : \x -> if
424 | --     | x <= '`'  -> "^[]$"
425 | --     | x <= 'a'  -> "^b$"
426 | --     | otherwise -> "^[]$"
427 | --
428 | transitionMap
429 |     :: forall c. (Ord c, Enum c, Bounded c)
430 |     => RE c
431 |     -> Map (RE c) (SF.SF c (RE c))
432 | transitionMap re = go Map.empty [re] where
433 |     go :: Map (RE c) (SF.SF c (RE c))
434 |        -> [RE c]
435 |        -> Map (RE c) (SF.SF c (RE c))
436 |     go !acc [] = acc
437 |     go acc (r : rs)
438 |         | r `Map.member` acc = go acc rs
439 |         | otherwise = go (Map.insert r pm acc) (SF.values pm ++ rs)
440 |       where
441 |         pm = P.toSF (\c -> derivate c r) (leadingChars r)
442 | 
443 | instance (Ord c, Enum c, Bounded c) => C.TransitionMap c (RE c) where
444 |     transitionMap = transitionMap
445 | 
446 | -- | Leading character sets of regular expression.
447 | --
448 | -- >>> leadingChars "foo"
449 | -- fromSeparators "ef"
450 | --
451 | -- >>> leadingChars (star "b" <> star "e")
452 | -- fromSeparators "abde"
453 | --
454 | -- >>> leadingChars (charRange 'b' 'z')
455 | -- fromSeparators "az"
456 | --
457 | leadingChars :: (Ord c, Enum c, Bounded c) => RE c -> P.Partition c
458 | leadingChars (REChars cs)    = P.fromRSet cs
459 | leadingChars (REUnion cs rs) = P.fromRSet cs <> foldMap leadingChars rs
460 | leadingChars (REStar r)      = leadingChars r
461 | leadingChars (REAppend rs)   = leadingCharsAppend rs
462 | 
463 | leadingCharsAppend :: (Ord c, Enum c, Bounded c) => [RE c] -> P.Partition c
464 | leadingCharsAppend [] = P.whole
465 | leadingCharsAppend (r : rs)
466 |     | nullable r = leadingChars r <> leadingCharsAppend rs
467 |     | otherwise  = leadingChars r
468 | 
469 | -------------------------------------------------------------------------------
470 | -- Equivalence
471 | -------------------------------------------------------------------------------
472 | 
473 | -- | Whether two regexps are equivalent.
474 | --
475 | -- @
476 | -- 'equivalent' re1 re2 <=> forall s. 'match' re1 s === 'match' re1 s
477 | -- @
478 | --
479 | -- >>> let re1 = star "a" <> "a"
480 | -- >>> let re2 = "a" <> star "a"
481 | --
482 | -- These are different regular expressions, even we perform
483 | -- some normalisation-on-construction:
484 | --
485 | -- >>> re1 == re2
486 | -- False
487 | --
488 | -- They are however equivalent:
489 | --
490 | -- >>> equivalent re1 re2
491 | -- True
492 | --
493 | -- The algorithm works by executing 'states' on "product" regexp,
494 | -- and checking whether all resulting states are both accepting or rejecting.
495 | --
496 | -- @
497 | -- re1 == re2 ==> 'equivalent' re1 re2
498 | -- @
499 | --
500 | -- === More examples
501 | --
502 | -- >>> let example re1 re2 = putPretty re1 >> putPretty re2 >> return (equivalent re1 re2)
503 | -- >>> example re1 re2
504 | -- ^a*a$
505 | -- ^aa*$
506 | -- True
507 | --
508 | -- >>> example (star "aa") (star "aaa")
509 | -- ^(aa)*$
510 | -- ^(aaa)*$
511 | -- False
512 | --
513 | -- >>> example (star "aa" <> star "aaa") (star "aaa" <> star "aa")
514 | -- ^(aa)*(aaa)*$
515 | -- ^(aaa)*(aa)*$
516 | -- True
517 | --
518 | -- >>> example (star ("a" \/ "b")) (star $ star "a" <> star "b")
519 | -- ^[a-b]*$
520 | -- ^(a*b*)*$
521 | -- True
522 | --
523 | equivalent :: forall c. (Ord c, Enum c, Bounded c) => RE c -> RE c -> Bool
524 | equivalent x0 y0 = go mempty [(x0, y0)] where
525 |     go :: Set (RE c, RE c) -> [(RE c, RE c)] -> Bool
526 |     go !_ [] = True
527 |     go acc (p@(x, y) : zs)
528 |         | p `Set.member` acc = go acc zs
529 |         -- if two regexps are structurally the same, we don't need to recurse.
530 |         | x == y             = go (Set.insert p acc) zs
531 |         | all agree ps       = go (Set.insert p acc) (ps ++ zs)
532 |         | otherwise = False
533 |       where
534 |         cs = toList $ P.examples $ leadingChars x `P.wedge` leadingChars y
535 |         ps = map (\c -> (derivate c x, derivate c y)) cs
536 | 
537 |     agree :: (RE c, RE c) -> Bool
538 |     agree (x, y) = nullable x == nullable y
539 | 
540 | instance (Ord c, Enum c, Bounded c) => C.Equivalent c (RE c) where
541 |     equivalent = equivalent
542 | 
543 | -------------------------------------------------------------------------------
544 | -- Generation
545 | -------------------------------------------------------------------------------
546 | 
547 | -- | Generate random strings of the language @RE c@ describes.
548 | --
549 | -- >>> let example = traverse_ print . take 3 . generate (curry QC.choose) 42
550 | -- >>> example "abc"
551 | -- "abc"
552 | -- "abc"
553 | -- "abc"
554 | --
555 | -- >>> example $ star $ "a" \/ "b"
556 | -- ""
557 | -- "bbaabbaaba"
558 | -- "b"
559 | --
560 | -- >>> example empty
561 | --
562 | -- prop> all (match r) $ take 10 $ generate (curry QC.choose) 42 (r :: RE Char)
563 | --
564 | generate
565 |     :: (c -> c -> QC.Gen c) -- ^ character range generator
566 |     -> Int    -- ^ seed
567 |     -> RE c
568 |     -> [[c]]  -- ^ infinite list of results
569 | generate c seed re
570 |     | isEmpty re = []
571 |     | otherwise  = QC.unGen (QC.infiniteListOf (generator c re)) (QC.mkQCGen seed) 10
572 | 
573 | generator
574 |     :: (c -> c -> QC.Gen c)
575 |     -> RE c
576 |     -> QC.Gen [c]
577 | generator c = go where
578 |     go (REChars cs)    = goChars cs
579 |     go (REAppend rs)   = concat <$> traverse go rs
580 |     go (REUnion cs rs)
581 |         | RSet.null  cs = QC.oneof [ go r | r <- toList rs ]
582 |         | otherwise     = QC.oneof $ goChars cs : [ go r | r <- toList rs ]
583 |     go (REStar r)      = QC.sized $ \n -> do
584 |         n' <- QC.choose (0, n)
585 |         concat <$> sequence (replicate n' (go r))
586 | 
587 |     goChars cs = pure <$> QC.oneof [ c x y | (x,y) <- RSet.toRangeList cs ]
588 | 
589 | -------------------------------------------------------------------------------
590 | -- Instances
591 | -------------------------------------------------------------------------------
592 | 
593 | instance Eq c => Semigroup (RE c) where
594 |     r <> r' = appends [r, r']
595 | 
596 | instance Eq c => Monoid (RE c) where
597 |     mempty  = eps
598 |     mappend = (<>)
599 |     mconcat = appends
600 | 
601 | 
602 | 
603 | instance c ~ Char => IsString (RE c) where
604 |     fromString = string
605 | 
606 | instance (Ord c, Enum c, Bounded c, QC.Arbitrary c) => QC.Arbitrary (RE c) where
607 |     arbitrary = QC.sized arb where
608 |         c :: QC.Gen (RE c)
609 |         c = REChars . RSet.fromRangeList <$> QC.arbitrary
610 | 
611 |         arb :: Int -> QC.Gen (RE c)
612 |         arb n | n <= 0    = QC.oneof [c, fmap char QC.arbitrary, pure eps]
613 |               | otherwise = QC.oneof
614 |             [ c
615 |             , pure eps
616 |             , fmap char QC.arbitrary
617 |             , liftA2 (<>) (arb n2) (arb n2)
618 |             , liftA2 (\/) (arb n2) (arb n2)
619 |             , fmap star (arb n2)
620 |             ]
621 |           where
622 |             n2 = n `div` 2
623 | 
624 |     shrink (REUnion _cs rs) = Set.toList rs
625 |     shrink (REAppend rs)    = rs ++ map appends (QC.shrink rs)
626 |     shrink (REStar r)       = r : map star (QC.shrink r)
627 |     shrink _                = []
628 | 
629 | instance (QC.CoArbitrary c) => QC.CoArbitrary (RE c) where
630 |     coarbitrary (REChars cs)    = QC.variant (0 :: Int) . QC.coarbitrary (RSet.toRangeList cs)
631 |     coarbitrary (REAppend rs)   = QC.variant (1 :: Int) . QC.coarbitrary rs
632 |     coarbitrary (REUnion cs rs) = QC.variant (2 :: Int) . QC.coarbitrary (RSet.toRangeList cs, Set.toList rs)
633 |     coarbitrary (REStar r)      = QC.variant (3 :: Int) . QC.coarbitrary r
634 | 
635 | -------------------------------------------------------------------------------
636 | -- JavaScript
637 | -------------------------------------------------------------------------------
638 | 
639 | instance c ~ Char => Pretty (RE c) where
640 |     prettyS x = showChar '^' . go False x . showChar '$'
641 |       where
642 |         go :: Bool -> RE Char -> ShowS
643 |         go p (REStar a)
644 |             = parens p
645 |             $ go True a . showChar '*'
646 |         go p (REAppend rs)
647 |             = parens p $ goMany id rs
648 |         go p (REUnion cs rs)
649 |             | RSet.null cs = goUnion p rs
650 |             | Set.null rs  = prettyS cs
651 |             | otherwise    = goUnion p (Set.insert (REChars cs) rs)
652 |         go _ (REChars cs)
653 |             = prettyS cs
654 | 
655 |         goUnion p rs
656 |             | Set.member eps rs = parens p $ goUnion' True . showChar '?'
657 |             | otherwise         = goUnion' p
658 |           where
659 |             goUnion' p' = case Set.toList (Set.delete eps rs) of
660 |                 [] -> go True empty
661 |                 [r] -> go p' r
662 |                 (r:rs') -> parens True $ goSome1 (showChar '|') r rs'
663 | 
664 |         goMany :: ShowS -> [RE Char] -> ShowS
665 |         goMany sep = foldr (\a b -> go False a . sep . b) id
666 | 
667 |         goSome1 :: ShowS -> RE Char -> [RE Char] -> ShowS
668 |         goSome1 sep r = foldl (\a b -> a . sep . go False b) (go False r)
669 | 
670 |         parens :: Bool -> ShowS -> ShowS
671 |         parens True  s = showString "(" . s . showChar ')'
672 |         parens False s = s
673 | 
674 | -------------------------------------------------------------------------------
675 | -- Latin1
676 | -------------------------------------------------------------------------------
677 | 
678 | instance C.ToLatin1 RE where
679 |     toLatin1 (REChars rs)    = C.fromRSet (C.toLatin1 rs)
680 |     toLatin1 (REAppend xs)   = appends (map C.toLatin1 xs)
681 |     toLatin1 (REUnion rs xs) = C.fromRSet (C.toLatin1 rs) \/ unions (map C.toLatin1 (Set.toList  xs))
682 |     toLatin1 (REStar r)      = star (C.toLatin1 r)
683 | 


--------------------------------------------------------------------------------
/src/Kleene/Internal/Sets.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE Safe #-}
 2 | {-# OPTIONS_HADDOCK not-home #-}
 3 | -- | Character sets.
 4 | module Kleene.Internal.Sets (
 5 |     dotRSet,
 6 | ) where
 7 | 
 8 | import Data.RangeSet.Map (RSet)
 9 | 
10 | import qualified Data.RangeSet.Map as RSet
11 | 
12 | -- | All but the newline.
13 | dotRSet :: RSet Char
14 | dotRSet = RSet.full RSet.\\ RSet.singleton '\n'
15 | 


--------------------------------------------------------------------------------
/src/Kleene/Monad.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DeriveFoldable         #-}
  2 | {-# LANGUAGE DeriveFunctor          #-}
  3 | {-# LANGUAGE DeriveTraversable      #-}
  4 | {-# LANGUAGE FlexibleInstances      #-}
  5 | {-# LANGUAGE FunctionalDependencies #-}
  6 | {-# LANGUAGE GADTs                  #-}
  7 | {-# LANGUAGE Safe                   #-}
  8 | {-# LANGUAGE ScopedTypeVariables    #-}
  9 | module Kleene.Monad (
 10 |     M (..),
 11 |     -- * Construction
 12 |     --
 13 |     -- | Binary operators are
 14 |     --
 15 |     -- * '<>' for append
 16 |     --
 17 |     -- There are no binary operator for union. Use 'unions'.
 18 |     --
 19 |     empty,
 20 |     eps,
 21 |     char,
 22 |     charRange,
 23 |     anyChar,
 24 |     appends,
 25 |     unions,
 26 |     star,
 27 |     string,
 28 |     -- * Derivative
 29 |     nullable,
 30 |     derivate,
 31 |     -- * Generation
 32 |     generate,
 33 |     -- * Conversion
 34 |     toKleene,
 35 |     -- * Other
 36 |     isEmpty,
 37 |     isEps,
 38 |     ) where
 39 | 
 40 | import Data.Semigroup (Semigroup (..))
 41 | import Prelude ()
 42 | import Prelude.Compat
 43 | 
 44 | import Control.Applicative (liftA2)
 45 | import Control.Monad       (ap)
 46 | import Data.Foldable       (toList)
 47 | import Data.List           (foldl')
 48 | import Data.String         (IsString (..))
 49 | 
 50 | import qualified Test.QuickCheck        as QC
 51 | import qualified Test.QuickCheck.Gen    as QC (unGen)
 52 | import qualified Test.QuickCheck.Random as QC (mkQCGen)
 53 | 
 54 | import qualified Kleene.Classes         as C
 55 | import           Kleene.Internal.Pretty
 56 | 
 57 | -- $setup
 58 | -- >>> :set -XOverloadedStrings
 59 | -- >>> import Data.Foldable (traverse_)
 60 | -- >>> import Data.List (sort)
 61 | -- >>> import Kleene.Internal.Pretty (putPretty)
 62 | --
 63 | -- >>> import Test.QuickCheck ((===))
 64 | -- >>> import qualified Test.QuickCheck as QC
 65 | --
 66 | -- >>> import Kleene.RE (RE)
 67 | -- >>> import Kleene.Classes (match)
 68 | -- >>> let asMBool :: M Bool -> M Bool; asMBool = id
 69 | 
 70 | -- | Regular expression which has no restrictions on the elements.
 71 | -- Therefore we can have 'Monad' instance, i.e. have a regexp where
 72 | -- characters are regexps themselves.
 73 | --
 74 | -- Because there are no optimisations, it's better to work over small alphabets.
 75 | -- On the other hand, we can work over infinite alphabets, if we only
 76 | -- use small amount of symbols!
 77 | --
 78 | -- >>> putPretty $ string [True, False]
 79 | -- ^10$
 80 | --
 81 | -- >>> let re  = string [True, False, True]
 82 | -- >>> let re' = re >>= \b -> if b then char () else star (char ())
 83 | -- >>> putPretty re'
 84 | -- ^..*.$
 85 | --
 86 | data M c
 87 |     = MAppend [M c]     -- ^ Concatenation
 88 |     | MUnion [c] [M c]  -- ^ Union
 89 |     | MStar (M c)       -- ^ Kleene star
 90 |   deriving (Eq, Ord, Show, Functor, Foldable, Traversable)
 91 | 
 92 | instance Applicative M where
 93 |     pure = char
 94 |     (<*>) = ap
 95 | 
 96 | instance Monad M where
 97 |     return = pure
 98 | 
 99 |     MAppend rs   >>= k  = appends (map (>>= k) rs)
100 |     MUnion cs rs >>= k  = unions (appends (map k cs) : map (>>= k) rs)
101 |     MStar r      >>= k  = star (r >>= k)
102 | 
103 | -------------------------------------------------------------------------------
104 | -- Smart constructor
105 | -------------------------------------------------------------------------------
106 | 
107 | -- | Empty regex. Doesn't accept anything.
108 | --
109 | -- >>> putPretty (empty :: M Bool)
110 | -- ^[]$
111 | --
112 | -- prop> match (empty :: M Char) (s :: String) === False
113 | --
114 | empty :: M c
115 | empty = MUnion [] []
116 | 
117 | -- | Empty string. /Note:/ different than 'empty'.
118 | --
119 | -- >>> putPretty (eps :: M Bool)
120 | -- ^$
121 | --
122 | -- >>> putPretty (mempty :: M Bool)
123 | -- ^$
124 | --
125 | -- prop> match (eps :: M Char) s === null (s :: String)
126 | --
127 | eps :: M c
128 | eps = MAppend []
129 | 
130 | -- |
131 | --
132 | -- >>> putPretty (char 'x')
133 | -- ^x$
134 | --
135 | char :: c -> M c
136 | char c = MUnion [c] []
137 | 
138 | -- | /Note:/ we know little about @c@.
139 | --
140 | -- >>> putPretty $ charRange 'a' 'z'
141 | -- ^[abcdefghijklmnopqrstuvwxyz]$
142 | --
143 | charRange :: Enum c => c -> c -> M c
144 | charRange c c' = MUnion [c .. c'] []
145 | 
146 | 
147 | -- | Any character. /Note:/ different than dot!
148 | --
149 | -- >>> putPretty (anyChar :: M Bool)
150 | -- ^[01]$
151 | --
152 | anyChar :: (Bounded c, Enum c) => M c
153 | anyChar = MUnion [minBound .. maxBound] []
154 | 
155 | -- | Concatenate regular expressions.
156 | --
157 | appends :: [M c] -> M c
158 | appends rs0
159 |     | any isEmpty rs1 = empty
160 |     | otherwise = case rs1 of
161 |         [r] -> r
162 |         rs  -> MAppend rs
163 |   where
164 |     -- flatten one level of MAppend
165 |     rs1 = concatMap f rs0
166 | 
167 |     f (MAppend rs) = rs
168 |     f r             = [r]
169 | 
170 | -- | Union of regular expressions.
171 | --
172 | -- Lattice laws don't hold structurally:
173 | --
174 | unions :: [M c] -> M c
175 | unions = uncurry mk . foldMap f where
176 |     f (MUnion cs rs) = (cs, rs)
177 |     f r              = ([], [r])
178 | 
179 |     mk [] [r] = r
180 |     mk cs rs = MUnion cs rs
181 | 
182 | -- | Kleene star.
183 | --
184 | star :: M c -> M c
185 | star r = case r of
186 |     MStar _                    -> r
187 |     MAppend []                 -> eps
188 |     MUnion cs rs | any isEps rs -> case rs' of
189 |         [r'] | null cs -> star r'
190 |         _              -> MStar (MUnion cs rs')
191 |       where
192 |         rs' = filter (not . isEps) rs
193 |     _                          -> MStar r
194 | 
195 | -- | Literal string.
196 | --
197 | -- >>> putPretty ("foobar" :: M Char)
198 | -- ^foobar$
199 | --
200 | -- >>> putPretty ("(.)" :: M Char)
201 | -- ^\(\.\)$
202 | --
203 | -- >>> putPretty $ string [False, True]
204 | -- ^01$
205 | --
206 | string :: [c] -> M c
207 | string []  = eps
208 | string [c] = char c
209 | string cs  = MAppend $ map char cs
210 | 
211 | instance C.Kleene  (M c) where
212 |     empty      = empty
213 |     eps        = eps
214 |     appends    = appends
215 |     unions     = unions
216 |     star       = star
217 | 
218 | instance C.CharKleene c (M c) where
219 |     char       = char
220 | 
221 | -------------------------------------------------------------------------------
222 | -- derivative
223 | -------------------------------------------------------------------------------
224 | 
225 | -- | We say that a regular expression r is nullable if the language it defines
226 | -- contains the empty string.
227 | --
228 | -- >>> nullable eps
229 | -- True
230 | --
231 | -- >>> nullable (star "x")
232 | -- True
233 | --
234 | -- >>> nullable "foo"
235 | -- False
236 | --
237 | nullable :: M c -> Bool
238 | nullable (MAppend rs)    = all nullable rs
239 | nullable (MUnion _cs rs) = any nullable rs
240 | nullable (MStar _)       = True
241 | 
242 | -- | Intuitively, the derivative of a language \(\mathcal{L} \subset \Sigma^\star\)
243 | -- with respect to a symbol \(a \in \Sigma\) is the language that includes only
244 | -- those suffixes of strings with a leading symbol \(a\) in \(\mathcal{L}\).
245 | --
246 | -- >>> putPretty $ derivate 'f' "foobar"
247 | -- ^oobar$
248 | --
249 | -- >>> putPretty $ derivate 'x' $ unions ["xyz", "abc"]
250 | -- ^yz$
251 | --
252 | -- >>> putPretty $ derivate 'x' $ star "xyz"
253 | -- ^yz(xyz)*$
254 | --
255 | derivate :: (Eq c, Enum c, Bounded c) => c -> M c -> M c
256 | derivate c (MUnion cs rs)  = unions $ derivateChars c cs : [ derivate c r | r <- toList rs]
257 | derivate c (MAppend rs)    = derivateAppend c rs
258 | derivate c rs@(MStar r)    = derivate c r <> rs
259 | 
260 | derivateAppend :: (Eq c, Enum c, Bounded c) => c -> [M c] -> M c
261 | derivateAppend _ []      = empty
262 | derivateAppend c [r]     = derivate c r
263 | derivateAppend c (r:rs)
264 |     | nullable r         = unions [r' <> appends rs, rs']
265 |     | otherwise          = r' <> appends rs
266 |   where
267 |     r'  = derivate c r
268 |     rs' = derivateAppend c rs
269 | 
270 | derivateChars :: Eq c =>  c -> [c] -> M c
271 | derivateChars c cs
272 |     | c `elem` cs = eps
273 |     | otherwise   = empty
274 | 
275 | instance (Eq c, Enum c, Bounded c) => C.Derivate c (M c) where
276 |     nullable = nullable
277 |     derivate = derivate
278 | 
279 | instance (Eq c, Enum c, Bounded c) => C.Match c (M c) where
280 |     match r = nullable . foldl' (flip derivate) r
281 | 
282 | -------------------------------------------------------------------------------
283 | -- isEmpty
284 | -------------------------------------------------------------------------------
285 | 
286 | -- | Whether 'M' is (structurally) equal to 'empty'.
287 | isEmpty :: M c -> Bool
288 | isEmpty (MUnion cs rs) = null cs && null rs
289 | isEmpty _              = False
290 | 
291 | -- | Whether 'M' is (structurally) equal to 'eps'.
292 | isEps :: M c -> Bool
293 | isEps (MAppend rs) = null rs
294 | isEps _            = False
295 | 
296 | -------------------------------------------------------------------------------
297 | -- Generation
298 | -------------------------------------------------------------------------------
299 | 
300 | -- | Generate random strings of the language @M c@ describes.
301 | --
302 | -- >>> let example = traverse_ print . take 3 . generate 42
303 | -- >>> example "abc"
304 | -- "abc"
305 | -- "abc"
306 | -- "abc"
307 | --
308 | -- >>> example $ star $ unions ["a", "b"]
309 | -- ""
310 | -- "aaababaaab"
311 | -- "a"
312 | --
313 | -- xx >>> example empty
314 | --
315 | -- expensive-prop> all (match r) $ take 10 $ generate 42 (r :: M Bool)
316 | --
317 | generate
318 |     :: Int    -- ^ seed
319 |     -> M c
320 |     -> [[c]]  -- ^ infinite list of results
321 | generate seed re
322 |     | isEmpty re = []
323 |     | otherwise  = QC.unGen (QC.infiniteListOf (generator re)) (QC.mkQCGen seed) 10
324 | 
325 | generator :: M c -> QC.Gen [c]
326 | generator = go where
327 |     go (MAppend rs)   = concat <$> traverse go rs
328 |     go (MUnion cs rs)
329 |         | null cs   = QC.oneof [ go r | r <- toList rs ]
330 |         | otherwise = QC.oneof $ goChars cs : [ go r | r <- toList rs ]
331 |     go (MStar r)      = QC.sized $ \n -> do
332 |         n' <- QC.choose (0, n)
333 |         concat <$> sequence (replicate n' (go r))
334 | 
335 |     goChars cs = pure <$> QC.elements cs
336 | 
337 | -------------------------------------------------------------------------------
338 | -- Conversion
339 | -------------------------------------------------------------------------------
340 | 
341 | -- | Convert to 'Kleene'
342 | --
343 | -- >>> let re = charRange 'a' 'z'
344 | -- >>> putPretty re
345 | -- ^[abcdefghijklmnopqrstuvwxyz]$
346 | --
347 | -- >>> putPretty (toKleene re :: RE Char)
348 | -- ^[a-z]$
349 | --
350 | toKleene :: C.CharKleene c k => M c -> k
351 | toKleene (MAppend rs)   = C.appends (map toKleene rs)
352 | toKleene (MUnion cs rs) = C.unions (C.oneof cs : map toKleene rs)
353 | toKleene (MStar r)      = C.star (toKleene r)
354 | 
355 | -------------------------------------------------------------------------------
356 | -- Instances
357 | -------------------------------------------------------------------------------
358 | 
359 | instance Semigroup (M c) where
360 |     r <> r' = appends [r, r']
361 | 
362 | instance Monoid (M c) where
363 |     mempty  = eps
364 |     mappend = (<>)
365 |     mconcat = appends
366 | 
367 | instance c ~ Char => IsString (M c) where
368 |     fromString = string
369 | 
370 | instance (Eq c, Enum c, Bounded c, QC.Arbitrary c) => QC.Arbitrary (M c) where
371 |     arbitrary = QC.sized arb where
372 |         c :: QC.Gen (M c)
373 |         c = char <$> QC.arbitrary
374 | 
375 |         arb :: Int -> QC.Gen (M c)
376 |         arb n | n <= 0    = QC.oneof [c, fmap char QC.arbitrary, pure eps]
377 |               | otherwise = QC.oneof
378 |             [ c
379 |             , pure eps
380 |             , fmap char QC.arbitrary
381 |             , liftA2 (<>) (arb n2) (arb n2)
382 |             , liftA2 (\x y -> unions [x,y]) (arb n2) (arb n2)
383 |             , fmap star (arb n2)
384 |             ]
385 |           where
386 |             n2 = n `div` 2
387 | 
388 | instance (QC.CoArbitrary c) => QC.CoArbitrary (M c) where
389 |     coarbitrary (MAppend rs)   = QC.variant (1 :: Int) . QC.coarbitrary rs
390 |     coarbitrary (MUnion cs rs) = QC.variant (2 :: Int) . QC.coarbitrary (cs, rs)
391 |     coarbitrary (MStar r)      = QC.variant (3 :: Int) . QC.coarbitrary r
392 | 
393 | -------------------------------------------------------------------------------
394 | -- JavaScript
395 | -------------------------------------------------------------------------------
396 | 
397 | instance (Pretty c, Eq c) => Pretty (M c) where
398 |     prettyS x = showChar '^' . go False x . showChar '$'
399 |       where
400 |         go :: Bool -> M c -> ShowS
401 |         go p (MStar a)
402 |             = parens p
403 |             $ go True a . showChar '*'
404 |         go p (MAppend rs)
405 |             = parens p $ goMany id rs
406 |         go p (MUnion cs rs)
407 |             | null rs   = prettySList cs
408 |             | null cs   = goUnion p rs
409 |             | otherwise = goUnion p (MUnion cs [] : rs)
410 | 
411 |         goUnion p rs
412 |             | elem eps rs = parens p $ goUnion' True . showChar '?'
413 |             | otherwise   = goUnion' p
414 |           where
415 |             goUnion' p' = case filter (/= eps) rs of
416 |                 []      -> go True empty
417 |                 [r]     -> go p' r
418 |                 (r:rs') -> parens True $ goSome1 (showChar '|') r rs'
419 | 
420 |         goMany :: ShowS -> [M c] -> ShowS
421 |         goMany sep = foldr (\a b -> go False a . sep . b) id
422 | 
423 |         goSome1 :: ShowS -> M c -> [M c] -> ShowS
424 |         goSome1 sep r = foldl (\a b -> a . sep . go False b) (go False r)
425 | 
426 |         parens :: Bool -> ShowS -> ShowS
427 |         parens True  s = showString "(" . s . showChar ')'
428 |         parens False s = s
429 | 
430 |         prettySList :: [c] -> ShowS
431 |         prettySList [c] = prettyS c
432 |         prettySList xs  = showChar '[' . foldr (\a b -> prettyS a . b) (showChar ']') xs
433 | 


--------------------------------------------------------------------------------
/src/Kleene/RE.hs:
--------------------------------------------------------------------------------
 1 | module Kleene.RE (
 2 |     RE (..),
 3 |     -- * Construction
 4 |     --
 5 |     -- | Binary operators are
 6 |     --
 7 |     -- * '<>' for append
 8 |     -- * '\/' for union
 9 |     --
10 |     empty,
11 |     eps,
12 |     everything,
13 |     char,
14 |     charRange,
15 |     anyChar,
16 |     appends,
17 |     unions,
18 |     star,
19 |     string,
20 |     -- * Derivative
21 |     nullable,
22 |     derivate,
23 |     -- * Transition map
24 |     transitionMap,
25 |     leadingChars,
26 |     -- * Equivalence
27 |     equivalent,
28 |     -- * Generation
29 |     generate,
30 |     -- * Other
31 |     isEmpty,
32 |     nullableProof,
33 |     ) where
34 | 
35 | -- This to include orphans.
36 | import Kleene.DFA ()
37 | import Kleene.Internal.RE
38 | 


--------------------------------------------------------------------------------
/tests/kleene-utf8.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP #-}
  2 | module Main (main) where
  3 | 
  4 | import Data.Bits        (shiftR, (.&.), (.|.))
  5 | import Data.Char        (ord)
  6 | import Data.Semigroup   ((<>))
  7 | import Data.Word        (Word8)
  8 | import Test.Tasty       (defaultMain, testGroup)
  9 | import Test.Tasty.HUnit (assertBool, testCase)
 10 | 
 11 | import Kleene
 12 | import Kleene.DFA (fromRE, toRE)
 13 | 
 14 | #if MIN_VERSION_bytestring(0,11,2)
 15 | import Test.Tasty.QuickCheck (label, testProperty, (===))
 16 | 
 17 | import qualified Data.ByteString as BS
 18 | #endif
 19 | 
 20 | main :: IO ()
 21 | main = do
 22 |     -- print utf8char2
 23 |     -- print utf8char3
 24 |     -- print utf8char4
 25 |     defaultMain $ testGroup "kleene-utf8"
 26 |         [ testCase "brute ≃ brute'" $ do
 27 |             assertBool "not equiv" (equivalent utf8char1 utf8char2)
 28 |         , testCase "brute' ≃ manual1" $ do
 29 |             assertBool "not equiv" (equivalent utf8char2 utf8char3)
 30 |         , testCase "manual1 ≃ manual2" $ do
 31 |             assertBool "not equiv" (equivalent utf8char3 utf8char4)
 32 |         , testCase "brute ≃ manual2" $ do
 33 |             assertBool "not equiv" (equivalent utf8char1 utf8char4)
 34 | 
 35 | #if MIN_VERSION_bytestring(0,11,2)
 36 |         , testProperty "isValidUtf8" $ \w8s ->
 37 |             let bs = BS.pack w8s
 38 |                 isValid = BS.isValidUtf8 bs
 39 |             in label (show isValid) $ isValid === match (star utf8char4) w8s
 40 | #endif
 41 |         ]
 42 | 
 43 | -------------------------------------------------------------------------------
 44 | -- Bruteforce definition
 45 | -------------------------------------------------------------------------------
 46 | 
 47 | utf8char1 :: RE Word8
 48 | utf8char1 = unions
 49 |     [ string (encodeStringUtf8 [c])
 50 |     | c <- [ '\0' .. maxBound ]
 51 |     ]
 52 | 
 53 | -------------------------------------------------------------------------------
 54 | -- Derived definition
 55 | -------------------------------------------------------------------------------
 56 | 
 57 | utf8charDFA1 :: DFA Word8
 58 | utf8charDFA1 = fromRE utf8char1
 59 | 
 60 | utf8char2 :: RE Word8
 61 | utf8char2 = toRE utf8charDFA1
 62 | 
 63 | -------------------------------------------------------------------------------
 64 | -- Written out definition
 65 | -------------------------------------------------------------------------------
 66 | 
 67 | utf8char3 :: RE Word8
 68 | utf8char3 = unions
 69 |     [ charRange 0x00 0x7F
 70 |     , sub1 <> charRange 0x80 0xBF
 71 |     ]
 72 |   where
 73 |     sub1 = unions
 74 |         [ charRange 0xC2 0xDF
 75 |         , charRange 0xE0 0xE0 <> charRange 0xa0 0xBF
 76 |         , charRange 0xED 0xED <> charRange 0x80 0x9F
 77 |         , sub2 <> charRange 0x80 0xBF
 78 |         ]
 79 | 
 80 |     sub2 = unions
 81 |         [ charRange 0xE1 0xEC
 82 |         , charRange 0xEE 0xEF
 83 |         , charRange 0xF0 0xF0 <> charRange 0x90 0xBF
 84 |         , charRange 0xF1 0xF3 <> charRange 0x80 0xBF
 85 |         , charRange 0xF4 0xF4 <> charRange 0x80 0x8f
 86 |         ]
 87 | 
 88 | -------------------------------------------------------------------------------
 89 | -- Manual definition, how human would written it
 90 | -------------------------------------------------------------------------------
 91 | 
 92 | utf8char4 :: RE Word8
 93 | utf8char4 = unions
 94 |     [ charRange 0x00 0x7F
 95 |     , charRange 0xC2 0xDF <> charRange 0x80 0xBF
 96 |     , charRange 0xE0 0xE0 <> charRange 0xa0 0xBF <> charRange 0x80 0xBF
 97 |     , charRange 0xE1 0xEC <> charRange 0x80 0xBF <> charRange 0x80 0xBF
 98 |     , charRange 0xED 0xED <> charRange 0x80 0x9F <> charRange 0x80 0xBF
 99 |     , charRange 0xEE 0xEF <> charRange 0x80 0xBF <> charRange 0x80 0xBF
100 |     , charRange 0xF0 0xF0 <> charRange 0x90 0xBF <> charRange 0x80 0xBF <> charRange 0x80 0xBF
101 |     , charRange 0xF1 0xF3 <> charRange 0x80 0xBF <> charRange 0x80 0xBF <> charRange 0x80 0xBF
102 |     , charRange 0xF4 0xF4 <> charRange 0x80 0x8f <> charRange 0x80 0xBF <> charRange 0x80 0xBF
103 |     ]
104 | 
105 | -------------------------------------------------------------------------------
106 | -- UTF8 encoding
107 | -------------------------------------------------------------------------------
108 | 
109 | encodeStringUtf8 :: String -> [Word8]
110 | encodeStringUtf8 []        = []
111 | encodeStringUtf8 (c:cs)
112 |   | c <= '\x07F' = w8
113 |                  : encodeStringUtf8 cs
114 |   | c <= '\x7FF' = (0xC0 .|.  w8ShiftR  6          )
115 |                  : (0x80 .|. (w8          .&. 0x3F))
116 |                  : encodeStringUtf8 cs
117 |   | c <= '\xD7FF'= (0xE0 .|.  w8ShiftR 12          )
118 |                  : (0x80 .|. (w8ShiftR  6 .&. 0x3F))
119 |                  : (0x80 .|. (w8          .&. 0x3F))
120 |                  : encodeStringUtf8 cs
121 |   | c <= '\xDFFF'= 0xEF : 0xBF : 0xBD -- U+FFFD
122 |                  : encodeStringUtf8 cs
123 |   | c <= '\xFFFF'= (0xE0 .|.  w8ShiftR 12          )
124 |                  : (0x80 .|. (w8ShiftR  6 .&. 0x3F))
125 |                  : (0x80 .|. (w8          .&. 0x3F))
126 |                  : encodeStringUtf8 cs
127 |   | otherwise    = (0xf0 .|.  w8ShiftR 18          )
128 |                  : (0x80 .|. (w8ShiftR 12 .&. 0x3F))
129 |                  : (0x80 .|. (w8ShiftR  6 .&. 0x3F))
130 |                  : (0x80 .|. (w8          .&. 0x3F))
131 |                  : encodeStringUtf8 cs
132 |   where
133 |     w8 = fromIntegral (ord c) :: Word8
134 |     w8ShiftR :: Int -> Word8
135 |     w8ShiftR = fromIntegral . shiftR (ord c)
136 | 


--------------------------------------------------------------------------------