├── .github ├── haskell-ci.patch └── workflows │ ├── ci.yml │ ├── emulated.yml │ └── fix-whitespace.yml ├── .gitignore ├── .mailmap ├── .readthedocs.yaml ├── CHANGELOG.md ├── CONTRIBUTING.rst ├── LICENSE ├── Makefile ├── NOTE.txt ├── README.md ├── Setup.hs ├── TODO ├── alex.cabal ├── build-windows-dist.sh ├── cabal.project ├── data ├── AlexTemplate.hs └── AlexWrappers.hs ├── doc ├── .gitignore ├── Makefile ├── about.rst ├── api.rst ├── conf.py ├── contributing.rst ├── index.rst ├── introduction.rst ├── invoking.rst ├── make.bat ├── obtaining.rst ├── regex.rst ├── requirements.txt └── syntax.rst ├── examples ├── Makefile ├── Tokens.x ├── Tokens_gscan.x ├── Tokens_posn.x ├── examples.x ├── haskell.x ├── lit.x ├── pp.x ├── state.x ├── tiger.x ├── tiger │ └── Literals.txt ├── tiny.y ├── words.x ├── words_monad.x └── words_posn.x ├── fix-whitespace.yaml ├── make-sdist.sh ├── src ├── AbsSyn.hs ├── CharSet.hs ├── DFA.hs ├── DFAMin.hs ├── DFS.hs ├── Data │ ├── LICENSE.txt │ ├── Ranged.hs │ └── Ranged │ │ ├── Boundaries.hs │ │ ├── RangedSet.hs │ │ └── Ranges.hs ├── Info.hs ├── Main.hs ├── NFA.hs ├── Output.hs ├── ParseMonad.hs ├── Parser.y ├── Parser.y.boot ├── Scan.x ├── Scan.x.boot ├── UTF8.hs ├── Util.hs └── ghc_hooks.c ├── stack.yaml ├── test-debug.hs ├── test.hs └── tests ├── Makefile ├── basic_typeclass.x ├── basic_typeclass_bytestring.x ├── default_typeclass.x ├── gscan_typeclass.x ├── issue_119.x ├── issue_141.x ├── issue_197.x ├── issue_262.x ├── issue_269_part1.x ├── issue_269_part2.x ├── issue_71.x ├── monadUserState_typeclass.x ├── monadUserState_typeclass_bytestring.x ├── monad_typeclass.x ├── monad_typeclass_bytestring.x ├── null.x ├── posn_typeclass.x ├── posn_typeclass_bytestring.x ├── posn_typeclass_strict_text.x ├── simple.x ├── strict_text_typeclass.x ├── strict_typeclass.x ├── tokens.x ├── tokens_bytestring.x ├── tokens_bytestring_unicode.x ├── tokens_gscan.x ├── tokens_monadUserState_bytestring.x ├── tokens_monadUserState_strict_text.x ├── tokens_monad_bytestring.x ├── tokens_posn.x ├── tokens_posn_bytestring.x ├── tokens_scan_user.x ├── tokens_strict_bytestring.x └── unicode.x /.github/haskell-ci.patch: -------------------------------------------------------------------------------- 1 | --- .github/workflows/haskell-ci.yml 2023-02-08 20:09:03.000000000 +0100 2 | +++ .github/workflows/haskell-ci.yml-patched 2023-02-08 20:08:57.000000000 +0100 3 | @@ -226,10 +226,23 @@ 4 | rm -f cabal-plan.xz 5 | chmod a+x $HOME/.cabal/bin/cabal-plan 6 | cabal-plan --version 7 | + 8 | + - name: install alex and happy 9 | + run: | 10 | + $CABAL v2-install $ARG_COMPILER alex happy 11 | + 12 | - name: checkout 13 | uses: actions/checkout@v3 14 | with: 15 | path: source 16 | + 17 | + - name: generate Parser.hs and Scan.hs 18 | + run: | 19 | + happy -agc $GITHUB_WORKSPACE/source/src/Parser.y -o $GITHUB_WORKSPACE/source/src/Parser.hs 20 | + alex -g $GITHUB_WORKSPACE/source/src/Scan.x -o $GITHUB_WORKSPACE/source/src/Scan.hs 21 | + mv $GITHUB_WORKSPACE/source/src/Parser.y $GITHUB_WORKSPACE/source/src/Parser.y.boot 22 | + mv $GITHUB_WORKSPACE/source/src/Scan.x $GITHUB_WORKSPACE/source/src/Scan.x.boot 23 | + 24 | - name: initial cabal.project for sdist 25 | run: | 26 | touch cabal.project 27 | @@ -275,15 +288,21 @@ 28 | run: | 29 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --dependencies-only -j2 all 30 | $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dependencies-only -j2 all 31 | - - name: build w/o tests 32 | + 33 | + - name: build w/o tests and install 34 | run: | 35 | - $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks all 36 | + $CABAL v2-install --reinstall --overwrite-policy=always $ARG_COMPILER --disable-tests --disable-benchmarks all 37 | + 38 | - name: build 39 | run: | 40 | $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --write-ghc-environment-files=always 41 | - name: tests 42 | run: | 43 | + # echo "ALEX=$HOME/.cabal/bin/alex" >> "$GITHUB_ENV" 44 | + export ALEX=$HOME/.cabal/bin/alex 45 | + export HC=$HC 46 | $CABAL v2-test $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --test-show-details=direct 47 | + 48 | - name: cabal check 49 | run: | 50 | cd ${PKGDIR_alex} || false 51 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | branches: 8 | - master 9 | 10 | defaults: 11 | run: 12 | shell: bash 13 | 14 | jobs: 15 | main: 16 | name: GHC ${{ matrix.ghc }} on ${{ matrix.os }} 17 | runs-on: ${{ matrix.os }} 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | os: [ubuntu-latest] 22 | ghc: 23 | - "8.0.2" 24 | - "8.2.2" 25 | - "8.4.4" 26 | - "8.6.5" 27 | - "8.8.4" 28 | - "8.10.7" 29 | - "9.0.2" 30 | - "9.2.8" 31 | - "9.4.8" 32 | - "9.6.7" 33 | - "9.8.4" 34 | - "9.10.1" 35 | - "9.12.2" 36 | include: 37 | - ghc: "9.12.2" 38 | os: macos-latest 39 | - ghc: "9.12.2" 40 | os: windows-latest 41 | 42 | steps: 43 | - uses: actions/checkout@v4 44 | 45 | - uses: haskell-actions/setup@v2 46 | id: setup 47 | with: 48 | ghc-version: ${{ matrix.ghc }} 49 | cabal-version: "latest" 50 | cabal-update: true 51 | 52 | - uses: actions/cache@v4 53 | name: Cache cabal stuff 54 | with: 55 | path: | 56 | ${{ steps.setup.outputs.cabal-store }} 57 | dist-newstyle 58 | key: ${{ runner.os }}-${{ matrix.ghc }} 59 | 60 | - name: Versions 61 | run: | 62 | cabal --version 63 | 64 | - name: Install alex & happy 65 | run: | 66 | cd ../ 67 | cabal install alex happy 68 | 69 | - name: Unpack 70 | run: | 71 | cp src/Parser.y src/Parser.y.boot 72 | cp src/Scan.x src/Scan.x.boot 73 | cabal sdist --ignore-project --output-directory . 74 | cabal get alex-*.tar.gz 75 | 76 | - name: Build 77 | run: | 78 | cd alex-*/ 79 | cabal build all --enable-tests --enable-benchmarks 80 | 81 | - name: Test 82 | run: | 83 | cd alex-*/ 84 | ALEX="$(cabal list-bin alex)" 85 | export ALEX 86 | cabal run --enable-tests alex:test:tests 87 | 88 | - name: Test (with debugging) 89 | run: | 90 | cd alex-*/ 91 | ALEX="$(cabal list-bin alex)" 92 | export ALEX 93 | cabal run --enable-tests alex:test:tests-debug 94 | 95 | - name: Haddock 96 | run: | 97 | cd alex-*/ 98 | cabal haddock --disable-documentation --haddock-all all 99 | 100 | - name: Cabal check 101 | run: | 102 | cd alex-*/ 103 | cabal check 104 | -------------------------------------------------------------------------------- /.github/workflows/emulated.yml: -------------------------------------------------------------------------------- 1 | name: emulated 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | branches: 8 | - master 9 | 10 | defaults: 11 | run: 12 | shell: bash 13 | 14 | jobs: 15 | # Emulation is incredibly slow and memory demanding. It seems that any 16 | # executable with GHC RTS takes at least 7-8 Gb of RAM, so we can run 17 | # `cabal` or `ghc` on their own, but cannot run them both at the same time, 18 | # striking out `cabal test`. Instead we rely on system packages and invoke 19 | # `ghc --make` manually, and even so `ghc -O` is prohibitively expensive. 20 | emulated: 21 | runs-on: ubuntu-latest 22 | strategy: 23 | fail-fast: true 24 | matrix: 25 | arch: ['s390x', 'ppc64le'] 26 | steps: 27 | - uses: actions/checkout@v4 28 | - uses: uraimo/run-on-arch-action@v3 29 | timeout-minutes: 60 30 | with: 31 | arch: ${{ matrix.arch }} 32 | distro: ubuntu_latest 33 | githubToken: ${{ github.token }} 34 | install: | 35 | apt-get update -y 36 | apt-get install -y ghc alex happy 37 | run: | 38 | (cd src/; alex -g Scan.x; happy -ag Parser.y) 39 | # Need to remove mention of the Cabal path module, and then substitutes 40 | # getDataDir |-> return "/pwd/data/" 41 | # version |-> undefined 42 | sed -i '/^import Paths_alex.*$/d' src/Main.hs # 43 | # The nested sed here escapes / into \/ so that the outer sed doesn't 44 | # interpret the forward slashes. You're welcome. 45 | sed -i "s/getDataDir/\(return \"$(pwd | sed 's/\//\\\//g')\\/data\"\)/g" src/Main.hs 46 | sed -i "s/version/undefined/g" src/Main.hs 47 | ghc -XHaskell2010 -XPatternSynonyms -XFlexibleContexts -XMagicHash -XCPP -XNondecreasingIndentation -XScopedTypeVariables -XTupleSections -XDeriveFunctor \ 48 | -package array -package containers -package directory \ 49 | -isrc src/Main.hs \ 50 | -o alex 51 | ./alex -g tests/simple.x 52 | ghc -package array tests/simple.hs -o simple 53 | ./simple +RTS -s 54 | -------------------------------------------------------------------------------- /.github/workflows/fix-whitespace.yml: -------------------------------------------------------------------------------- 1 | name: Whitespace 2 | on: 3 | push: 4 | pull_request: 5 | 6 | jobs: 7 | check-whitespace: 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - name: Checkout sources 12 | uses: actions/checkout@v4 13 | 14 | - name: Check for whitespace violations 15 | uses: andreasabel/fix-whitespace-action@v1 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.hi 2 | *.prof 3 | *~ 4 | *.o 5 | *.dyn_hi 6 | *.dyn_o 7 | *.info 8 | /.ghc.environment.* 9 | /data/ 10 | /dist 11 | /dist-newstyle 12 | /examples/*.alex.hs 13 | /examples/*.happy.hs 14 | /examples/*.bin 15 | /examples/*.exe 16 | /old-*/ 17 | /tests/*.[dgn].hs 18 | /tests/*.[dgn].bin 19 | /tests/*.[dgn].exe 20 | .cabal-sandbox 21 | .stack-work 22 | cabal.sandbox.config 23 | /src/TAGS 24 | /TAGS 25 | /stack.yaml.lock 26 | -------------------------------------------------------------------------------- /.mailmap: -------------------------------------------------------------------------------- 1 | # Fill-in/fix authorship information for migrated commits. See 2 | # git-shortlog(1) for details. 3 | 4 | Alain Cremieux alcremi@pobox.com 5 | 6 | Alexander Biehl alexbiehl 7 | 8 | Duncan Coutts Duncan Coutts 9 | Duncan Coutts 10 | 11 | Gwern gwern0@gmail.com 12 | 13 | John Ericson 14 | 15 | Mike Thomas mthomas 16 | 17 | Neil Mitchell 18 | 19 | Omer Agacan osa1 20 | 21 | Ross Paterson ross 22 | 23 | Sebastian Graf 24 | 25 | Sergey Vinokurov <1149355+sergv@users.noreply.github.com> 26 | 27 | Simon Marlow simonmar 28 | Simon Marlow 29 | 30 | Sven Panne panne 31 | Sven Panne sven.panne@aedion.de 32 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file for Sphinx projects 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the OS, Python version and other tools you might need 8 | build: 9 | os: ubuntu-22.04 10 | tools: 11 | python: "3" 12 | # Latest 3.x 13 | # You can also specify other tool versions: 14 | # nodejs: "19" 15 | # rust: "1.64" 16 | # golang: "1.19" 17 | 18 | # Build documentation in the "doc/" directory with Sphinx 19 | sphinx: 20 | configuration: doc/conf.py 21 | 22 | # Optionally build your docs in additional formats such as PDF and ePub 23 | # formats: 24 | # - pdf 25 | # - epub 26 | 27 | # Optional but recommended, declare the Python requirements required 28 | # to build your documentation 29 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 30 | python: 31 | install: 32 | - requirements: doc/requirements.txt 33 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | doc/contributing.rst -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 1995-2011, Chris Dornan and Simon Marlow 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of the copyright holders, nor the names of the 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CABAL = cabal 2 | 3 | HAPPY = happy 4 | HAPPY_OPTS = -agc 5 | 6 | ALEX = alex 7 | ALEX_OPTS = -g 8 | ALEX_VER = `awk '/^version:/ { print $$2 }' alex.cabal` 9 | 10 | SDIST_DIR=dist-newstyle/sdist 11 | 12 | sdist :: 13 | @case "`$(CABAL) --numeric-version`" in \ 14 | 2.[2-9].* | [3-9].* ) ;; \ 15 | * ) echo "Error: needs cabal 2.2.0.0 or later (but got : `$(CABAL) --numeric-version`)" ; exit 1 ;; \ 16 | esac 17 | @if [ "`git status -s`" != '' ]; then \ 18 | echo "Error: Tree is not clean"; \ 19 | exit 1; \ 20 | fi 21 | $(HAPPY) $(HAPPY_OPTS) src/Parser.y -o src/Parser.hs 22 | $(ALEX) $(ALEX_OPTS) src/Scan.x -o src/Scan.hs 23 | mv src/Parser.y src/Parser.y.boot 24 | mv src/Scan.x src/Scan.x.boot 25 | $(CABAL) v2-sdist 26 | @if [ ! -f "${SDIST_DIR}/alex-$(ALEX_VER).tar.gz" ]; then \ 27 | echo "Error: source tarball not found: dist/alex-$(ALEX_VER).tar.gz"; \ 28 | exit 1; \ 29 | fi 30 | git checkout . 31 | git clean -f 32 | 33 | sdist-test :: sdist sdist-test-only 34 | @rm -rf "${SDIST_DIR}/alex-${ALEX_VER}/" 35 | 36 | sdist-test-only :: 37 | @if [ ! -f "${SDIST_DIR}/alex-$(ALEX_VER).tar.gz" ]; then \ 38 | echo "Error: source tarball not found: ${SDIST_DIR}/alex-$(ALEX_VER).tar.gz"; \ 39 | exit 1; \ 40 | fi 41 | rm -rf "${SDIST_DIR}/alex-$(ALEX_VER)/" 42 | tar -xf "${SDIST_DIR}/alex-$(ALEX_VER).tar.gz" -C ${SDIST_DIR}/ 43 | echo "packages: ." > "${SDIST_DIR}/alex-$(ALEX_VER)/cabal.project" 44 | cd "${SDIST_DIR}/alex-$(ALEX_VER)/" && cabal v2-test --enable-tests all 45 | @echo "" 46 | @echo "Success! ${SDIST_DIR}/alex-$(ALEX_VER).tar.gz is ready for distribution!" 47 | @echo "" 48 | -------------------------------------------------------------------------------- /NOTE.txt: -------------------------------------------------------------------------------- 1 | Note: 2 | 3 | The contents of package Ranged-sets-0.3.0 has been copied into this 4 | package, in order to allow it to be part of the Haskell Platform, 5 | without introducing additional dependencies. 6 | 7 | The original license agreement has been included in the src/Data 8 | subdirectory, as required by the package source. 9 | 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Alex: A Lexical Analyser Generator 2 | 3 | [![CI](https://github.com/haskell/alex/actions/workflows/ci.yml/badge.svg)](https://github.com/haskell/alex/actions/workflows/ci.yml) 4 | 5 | Alex is a tool for generating lexical analysers, also known as "lexers" and "scanners", in Haskell. 6 | The lexical analysers implement a description of the tokens to be recognised in the form of regular expressions. 7 | It is similar to the tools "lex" and "flex" for C/C++. 8 | 9 | Share and enjoy! 10 | 11 | ## Documentation 12 | 13 | Documentation is hosted on [Read the Docs](https://haskell-alex.readthedocs.io): 14 | 15 | - [Online (HTML)](https://haskell-alex.readthedocs.io) 16 | - [PDF](https://haskell-alex.readthedocs.io/_/downloads/en/latest/pdf/) 17 | - [Downloadable HTML](https://haskell-alex.readthedocs.io/_/downloads/en/latest/htmlzip/) 18 | 19 | For basic information of the sort typically found in a read-me, see the following sections of the docs: 20 | 21 | - [About Alex](https://haskell-alex.readthedocs.io/en/latest/about.html) 22 | - [Obtaining Alex](https://haskell-alex.readthedocs.io/en/latest/obtaining.html) 23 | - [Contributing](https://haskell-alex.readthedocs.io/en/latest/contributing.html) 24 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | - Option for pure Haskell 98 output? 2 | - maybe Haskell 2010 at this point? 3 | - how about an option to use Data.Array.Unboxed? 4 | 5 | - Put in {-# LINE #-} pragmas for token actions 6 | 7 | - Prune states that aren't reachable? 8 | 9 | - Issue a warning for tokens that can't be generated? 10 | 11 | - Info file? 12 | - start codes 13 | - accepting states 14 | 15 | - More compact lexer table encoding: 16 | - equivalence classes? 17 | 18 | - Improve performance of Alex itself 19 | 20 | - AlexEOF doesn't provide a way to get at the text position of the EOF. 21 | 22 | - Allow user-defined wrappers? Wrappers in files relative to the 23 | current directory, for example? 24 | 25 | - case-insensitivity option (like flex's -i). 26 | 27 | -------------------------------------------------------------------------------- /alex.cabal: -------------------------------------------------------------------------------- 1 | cabal-version: >= 1.10 2 | name: alex 3 | version: 3.5.3.0 4 | -- don't forget updating changelog.md! 5 | license: BSD3 6 | license-file: LICENSE 7 | copyright: (c) Chis Dornan, Simon Marlow 8 | author: Chris Dornan and Simon Marlow 9 | maintainer: https://github.com/haskell/alex 10 | bug-reports: https://github.com/haskell/alex/issues 11 | stability: stable 12 | homepage: http://www.haskell.org/alex/ 13 | synopsis: Alex is a tool for generating lexical analysers in Haskell 14 | description: 15 | Alex is a tool for generating lexical analysers in Haskell. 16 | It takes a description of tokens based on regular 17 | expressions and generates a Haskell module containing code 18 | for scanning text efficiently. It is similar to the tool 19 | lex or flex for C/C++. 20 | 21 | category: Development 22 | build-type: Simple 23 | 24 | tested-with: 25 | GHC == 9.12.2 26 | GHC == 9.10.1 27 | GHC == 9.8.4 28 | GHC == 9.6.7 29 | GHC == 9.4.8 30 | GHC == 9.2.8 31 | GHC == 9.0.2 32 | GHC == 8.10.7 33 | GHC == 8.8.4 34 | GHC == 8.6.5 35 | GHC == 8.4.4 36 | GHC == 8.2.2 37 | GHC == 8.0.2 38 | 39 | data-dir: data/ 40 | 41 | data-files: 42 | AlexTemplate.hs 43 | AlexWrappers.hs 44 | 45 | extra-source-files: 46 | CHANGELOG.md 47 | README.md 48 | examples/Makefile 49 | examples/Tokens.x 50 | examples/Tokens_gscan.x 51 | examples/Tokens_posn.x 52 | examples/examples.x 53 | examples/haskell.x 54 | examples/lit.x 55 | examples/pp.x 56 | examples/state.x 57 | examples/tiny.y 58 | examples/words.x 59 | examples/words_monad.x 60 | examples/words_posn.x 61 | src/Parser.y.boot 62 | src/Scan.x.boot 63 | src/ghc_hooks.c 64 | tests/Makefile 65 | tests/simple.x 66 | tests/null.x 67 | tests/tokens.x 68 | tests/tokens_gscan.x 69 | tests/tokens_posn.x 70 | tests/tokens_bytestring.x 71 | tests/tokens_posn_bytestring.x 72 | tests/tokens_scan_user.x 73 | tests/tokens_strict_bytestring.x 74 | tests/tokens_monad_bytestring.x 75 | tests/tokens_monadUserState_bytestring.x 76 | tests/tokens_bytestring_unicode.x 77 | tests/basic_typeclass.x 78 | tests/basic_typeclass_bytestring.x 79 | tests/default_typeclass.x 80 | tests/gscan_typeclass.x 81 | tests/posn_typeclass.x 82 | tests/monad_typeclass.x 83 | tests/monad_typeclass_bytestring.x 84 | tests/monadUserState_typeclass.x 85 | tests/monadUserState_typeclass_bytestring.x 86 | tests/posn_typeclass_bytestring.x 87 | tests/strict_typeclass.x 88 | tests/unicode.x 89 | tests/issue_71.x 90 | tests/issue_119.x 91 | tests/issue_141.x 92 | tests/issue_197.x 93 | tests/issue_262.x 94 | tests/issue_269_part1.x 95 | tests/issue_269_part2.x 96 | tests/strict_text_typeclass.x 97 | tests/posn_typeclass_strict_text.x 98 | tests/tokens_monadUserState_strict_text.x 99 | 100 | source-repository head 101 | type: git 102 | location: https://github.com/haskell/alex.git 103 | 104 | executable alex 105 | hs-source-dirs: src 106 | main-is: Main.hs 107 | 108 | build-depends: 109 | base >= 4.9 && < 5 110 | -- Data.List.NonEmpty enters `base` at 4.9 111 | , array 112 | , containers 113 | , directory 114 | 115 | default-language: 116 | Haskell2010 117 | default-extensions: 118 | DeriveFunctor 119 | PatternSynonyms 120 | ScopedTypeVariables 121 | TupleSections 122 | other-extensions: 123 | CPP 124 | FlexibleContexts 125 | MagicHash 126 | NondecreasingIndentation 127 | OverloadedLists 128 | ghc-options: -Wall -Wcompat -rtsopts 129 | 130 | other-modules: 131 | AbsSyn 132 | CharSet 133 | DFA 134 | DFAMin 135 | DFS 136 | Info 137 | NFA 138 | Output 139 | Paths_alex 140 | Parser 141 | ParseMonad 142 | Scan 143 | Util 144 | UTF8 145 | Data.Ranged 146 | Data.Ranged.Boundaries 147 | Data.Ranged.RangedSet 148 | Data.Ranged.Ranges 149 | 150 | test-suite tests 151 | type: exitcode-stdio-1.0 152 | main-is: test.hs 153 | -- This line is important as it ensures that the local `exe:alex` component declared above is built before the test-suite component is invoked, as well as making sure that `alex` is made available on $PATH and `$alex_datadir` is set accordingly before invoking `test.hs` 154 | build-tools: alex 155 | 156 | default-language: Haskell2010 157 | 158 | build-depends: 159 | base < 5 160 | , process 161 | 162 | test-suite tests-debug 163 | type: exitcode-stdio-1.0 164 | main-is: test-debug.hs 165 | -- This line is important as it ensures that the local `exe:alex` component declared above is built before the test-suite component is invoked, as well as making sure that `alex` is made available on $PATH and `$alex_datadir` is set accordingly before invoking `test.hs` 166 | build-tools: alex 167 | 168 | default-language: Haskell2010 169 | 170 | build-depends: 171 | base < 5 172 | , process 173 | -------------------------------------------------------------------------------- /build-windows-dist.sh: -------------------------------------------------------------------------------- 1 | # mini script for building the relocatable Windows binary distribution. 2 | # 3 | # sh build-windows-dist.sh 4 | # 5 | # NB. the Cabal that shipped with GHC 6.6 isn't enough for this, because it 6 | # is missing this patch: 7 | # 8 | # Fri Oct 13 11:09:41 BST 2006 Simon Marlow 9 | # * Fix getDataDir etc. when bindir=$prefix 10 | # 11 | # So you need to use a more recent Cabal. GHC 6.6 is fine for building the 12 | # package, though. 13 | 14 | ghc --make Setup 15 | ./Setup configure --prefix=`pwd`/install --bindir='$prefix' --libdir='$prefix' --datadir='$prefix' 16 | ./Setup build 17 | ./Setup install 18 | echo Now zip up `pwd`/install as "alex--Win32.zip" 19 | -------------------------------------------------------------------------------- /cabal.project: -------------------------------------------------------------------------------- 1 | packages: 2 | ./ 3 | -------------------------------------------------------------------------------- /data/AlexTemplate.hs: -------------------------------------------------------------------------------- 1 | -- ----------------------------------------------------------------------------- 2 | -- ALEX TEMPLATE 3 | -- 4 | -- This code is in the PUBLIC DOMAIN; you may copy it freely and use 5 | -- it for any purpose whatsoever. 6 | 7 | -- ----------------------------------------------------------------------------- 8 | -- INTERNALS and main scanner engine 9 | 10 | #ifdef ALEX_GHC 11 | # define ILIT(n) n# 12 | # define IBOX(n) (I# (n)) 13 | # define FAST_INT Int# 14 | -- Do not remove this comment. Required to fix CPP parsing when using GCC and a clang-compiled alex. 15 | # if __GLASGOW_HASKELL__ > 706 16 | # define GTE(n,m) (GHC.Exts.tagToEnum# (n >=# m)) 17 | # define EQ(n,m) (GHC.Exts.tagToEnum# (n ==# m)) 18 | # else 19 | # define GTE(n,m) (n >=# m) 20 | # define EQ(n,m) (n ==# m) 21 | # endif 22 | # define PLUS(n,m) (n +# m) 23 | # define MINUS(n,m) (n -# m) 24 | # define TIMES(n,m) (n *# m) 25 | # define NEGATE(n) (negateInt# (n)) 26 | # define IF_GHC(x) (x) 27 | #else 28 | # define ILIT(n) (n) 29 | # define IBOX(n) (n) 30 | # define FAST_INT Int 31 | # define GTE(n,m) (n >= m) 32 | # define EQ(n,m) (n == m) 33 | # define PLUS(n,m) (n + m) 34 | # define MINUS(n,m) (n - m) 35 | # define TIMES(n,m) (n * m) 36 | # define NEGATE(n) (negate (n)) 37 | # define IF_GHC(x) 38 | #endif 39 | 40 | #ifdef ALEX_GHC 41 | data AlexAddr = AlexA# Addr# 42 | -- Do not remove this comment. Required to fix CPP parsing when using GCC and a clang-compiled alex. 43 | 44 | {-# INLINE alexIndexInt16OffAddr #-} 45 | alexIndexInt16OffAddr :: AlexAddr -> Int# -> Int# 46 | alexIndexInt16OffAddr (AlexA# arr) off = 47 | #if __GLASGOW_HASKELL__ >= 901 48 | GHC.Exts.int16ToInt# -- qualified import because it doesn't exist on older GHC's 49 | #endif 50 | #ifdef WORDS_BIGENDIAN 51 | (GHC.Exts.word16ToInt16# (GHC.Exts.wordToWord16# (GHC.Exts.byteSwap16# (GHC.Exts.word16ToWord# (GHC.Exts.int16ToWord16# 52 | #endif 53 | (indexInt16OffAddr# arr off) 54 | #ifdef WORDS_BIGENDIAN 55 | ))))) 56 | #endif 57 | #else 58 | alexIndexInt16OffAddr = (Data.Array.!) 59 | #endif 60 | 61 | #ifdef ALEX_GHC 62 | {-# INLINE alexIndexInt32OffAddr #-} 63 | alexIndexInt32OffAddr :: AlexAddr -> Int# -> Int# 64 | alexIndexInt32OffAddr (AlexA# arr) off = 65 | #if __GLASGOW_HASKELL__ >= 901 66 | GHC.Exts.int32ToInt# -- qualified import because it doesn't exist on older GHC's 67 | #endif 68 | #ifdef WORDS_BIGENDIAN 69 | (GHC.Exts.word32ToInt32# (GHC.Exts.wordToWord32# (GHC.Exts.byteSwap32# (GHC.Exts.word32ToWord# (GHC.Exts.int32ToWord32# 70 | #endif 71 | (indexInt32OffAddr# arr off) 72 | #ifdef WORDS_BIGENDIAN 73 | ))))) 74 | #endif 75 | #else 76 | alexIndexInt32OffAddr = (Data.Array.!) 77 | #endif 78 | 79 | #ifdef ALEX_GHC 80 | -- GHC >= 503, unsafeAt is available from Data.Array.Base. 81 | quickIndex = unsafeAt 82 | #else 83 | quickIndex = (Data.Array.!) 84 | #endif 85 | 86 | -- ----------------------------------------------------------------------------- 87 | -- Main lexing routines 88 | 89 | data AlexReturn a 90 | = AlexEOF 91 | | AlexError !AlexInput 92 | | AlexSkip !AlexInput !Int 93 | | AlexToken !AlexInput !Int a 94 | 95 | -- alexScan :: AlexInput -> StartCode -> AlexReturn a 96 | alexScan input__ IBOX(sc) 97 | = alexScanUser (error "alex rule requiring context was invoked by alexScan; use alexScanUser instead?") input__ IBOX(sc) 98 | 99 | -- If the generated alexScan/alexScanUser functions are called multiple times 100 | -- in the same file, alexScanUser gets broken out into a separate function and 101 | -- increases memory usage. Make sure GHC inlines this function and optimizes it. 102 | {-# INLINE alexScanUser #-} 103 | 104 | alexScanUser user__ input__ IBOX(sc) 105 | = case alex_scan_tkn user__ input__ ILIT(0) input__ sc AlexNone of 106 | (AlexNone, input__') -> 107 | case alexGetByte input__ of 108 | Nothing -> 109 | #ifdef ALEX_DEBUG 110 | Debug.Trace.trace ("End of input.") $ 111 | #endif 112 | AlexEOF 113 | Just _ -> 114 | #ifdef ALEX_DEBUG 115 | Debug.Trace.trace ("Error.") $ 116 | #endif 117 | AlexError input__' 118 | 119 | (AlexLastSkip input__'' len, _) -> 120 | #ifdef ALEX_DEBUG 121 | Debug.Trace.trace ("Skipping.") $ 122 | #endif 123 | AlexSkip input__'' len 124 | 125 | (AlexLastAcc k input__''' len, _) -> 126 | #ifdef ALEX_DEBUG 127 | Debug.Trace.trace ("Accept.") $ 128 | #endif 129 | AlexToken input__''' len ((Data.Array.!) alex_actions k) 130 | 131 | 132 | -- Push the input through the DFA, remembering the most recent accepting 133 | -- state it encountered. 134 | 135 | alex_scan_tkn user__ orig_input len input__ s last_acc = 136 | input__ `seq` -- strict in the input 137 | let 138 | new_acc = (check_accs (alex_accept `quickIndex` IBOX(s))) 139 | in 140 | new_acc `seq` 141 | case alexGetByte input__ of 142 | Nothing -> (new_acc, input__) 143 | Just (c, new_input) -> 144 | #ifdef ALEX_DEBUG 145 | Debug.Trace.trace ("State: " ++ show IBOX(s) ++ ", char: " ++ show c ++ " " ++ (show . chr . fromIntegral) c) $ 146 | #endif 147 | case fromIntegral c of { IBOX(ord_c) -> 148 | let 149 | base = alexIndexInt32OffAddr alex_base s 150 | offset = PLUS(base,ord_c) 151 | 152 | new_s = if GTE(offset,ILIT(0)) 153 | && let check = alexIndexInt16OffAddr alex_check offset 154 | in EQ(check,ord_c) 155 | then alexIndexInt16OffAddr alex_table offset 156 | else alexIndexInt16OffAddr alex_deflt s 157 | in 158 | case new_s of 159 | ILIT(-1) -> (new_acc, input__) 160 | -- on an error, we want to keep the input *before* the 161 | -- character that failed, not after. 162 | _ -> alex_scan_tkn user__ orig_input 163 | #ifdef ALEX_LATIN1 164 | PLUS(len,ILIT(1)) 165 | -- issue 119: in the latin1 encoding, *each* byte is one character 166 | #else 167 | (if c < 0x80 || c >= 0xC0 then PLUS(len,ILIT(1)) else len) 168 | -- note that the length is increased ONLY if this is the 1st byte in a char encoding) 169 | #endif 170 | new_input new_s new_acc 171 | } 172 | where 173 | check_accs (AlexAccNone) = last_acc 174 | check_accs (AlexAcc a ) = AlexLastAcc a input__ IBOX(len) 175 | check_accs (AlexAccSkip) = AlexLastSkip input__ IBOX(len) 176 | #ifndef ALEX_NOPRED 177 | check_accs (AlexAccPred a predx rest) 178 | | predx user__ orig_input IBOX(len) input__ 179 | = AlexLastAcc a input__ IBOX(len) 180 | | otherwise 181 | = check_accs rest 182 | check_accs (AlexAccSkipPred predx rest) 183 | | predx user__ orig_input IBOX(len) input__ 184 | = AlexLastSkip input__ IBOX(len) 185 | | otherwise 186 | = check_accs rest 187 | #endif 188 | 189 | data AlexLastAcc 190 | = AlexNone 191 | | AlexLastAcc !Int !AlexInput !Int 192 | | AlexLastSkip !AlexInput !Int 193 | 194 | data AlexAcc user 195 | = AlexAccNone 196 | | AlexAcc Int 197 | | AlexAccSkip 198 | #ifndef ALEX_NOPRED 199 | | AlexAccPred Int (AlexAccPred user) (AlexAcc user) 200 | | AlexAccSkipPred (AlexAccPred user) (AlexAcc user) 201 | 202 | type AlexAccPred user = user -> AlexInput -> Int -> AlexInput -> Bool 203 | 204 | -- ----------------------------------------------------------------------------- 205 | -- Predicates on a rule 206 | 207 | alexAndPred p1 p2 user__ in1 len in2 208 | = p1 user__ in1 len in2 && p2 user__ in1 len in2 209 | 210 | --alexPrevCharIsPred :: Char -> AlexAccPred _ 211 | alexPrevCharIs c _ input__ _ _ = c == alexInputPrevChar input__ 212 | 213 | alexPrevCharMatches f _ input__ _ _ = f (alexInputPrevChar input__) 214 | 215 | --alexPrevCharIsOneOfPred :: Array Char Bool -> AlexAccPred _ 216 | alexPrevCharIsOneOf arr _ input__ _ _ = arr Data.Array.! alexInputPrevChar input__ 217 | 218 | --alexRightContext :: Int -> AlexAccPred _ 219 | alexRightContext IBOX(sc) user__ _ _ input__ = 220 | case alex_scan_tkn user__ input__ ILIT(0) input__ sc AlexNone of 221 | (AlexNone, _) -> False 222 | _ -> True 223 | -- TODO: there's no need to find the longest 224 | -- match when checking the right context, just 225 | -- the first match will do. 226 | #endif 227 | -------------------------------------------------------------------------------- /doc/.gitignore: -------------------------------------------------------------------------------- 1 | _build/ 2 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Flag -n ("nitpick") warns about broken references 12 | # Flag -W turns warnings into errors 13 | # Flag --keep-going continues after errors 14 | SPHINXOPTS := -n -W --keep-going -E 15 | 16 | .PHONY: help html Makefile 17 | 18 | # default goal, first 19 | html: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | 22 | help: 23 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 24 | 25 | # Catch-all target: route all unknown targets to Sphinx using the new 26 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 27 | %: Makefile 28 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 29 | -------------------------------------------------------------------------------- /doc/about.rst: -------------------------------------------------------------------------------- 1 | .. _about: 2 | 3 | About Alex 4 | ========== 5 | 6 | Alex can always be obtained from its `home page `__. 7 | The latest source code lives in the `git repository `__ on ``GitHub``. 8 | 9 | Releases 10 | -------- 11 | 12 | Releases of Alex are published on `Hackage `__. 13 | They are also given `Git tags `__ in the repository. 14 | 15 | .. _bug-reports: 16 | 17 | Reporting issues with Alex 18 | -------------------------- 19 | 20 | Please report bugs on the `Alex issue tracker `__. 21 | There are no specific mailing lists for the discussion of Alex-related matters, 22 | but such topics should be fine on the `Haskell Cafe `__ mailing list. 23 | 24 | License 25 | ------- 26 | 27 | Copyright (c) 1995-2011, Chris Dornan and Simon Marlow. 28 | All rights reserved. 29 | 30 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 31 | 32 | - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 33 | 34 | - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the 35 | documentation and/or other materials provided with the distribution. 36 | 37 | - Neither the name of the copyright holders, nor the names of the contributors may be used to endorse or promote products derived from this software without specific prior written permission. 38 | 39 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR APARTICULAR PURPOSE ARE DISCLAIMED. 40 | IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 41 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 42 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 | 44 | Acknowledgments 45 | --------------- 46 | 47 | Original authors 48 | ~~~~~~~~~~~~~~~~ 49 | 50 | - Chris Dornan: cdornan@arm.com 51 | 52 | - Isaac Jones: ijones@syntaxpolice.org 53 | 54 | - Simon Marlow: simonmar@microsoft.com 55 | 56 | Current Maintainers 57 | ~~~~~~~~~~~~~~~~~~~ 58 | 59 | - Andreas Abel (@andreasabel) 60 | 61 | - John Ericson (@Ericson2314) 62 | 63 | - Simon Marlow (@simonmar) 64 | 65 | Other contributors 66 | ~~~~~~~~~~~~~~~~~~ 67 | 68 | The data is in the Git history. 69 | GitHub can render that in `various ways `__. 70 | 71 | The documentation itself 72 | ~~~~~~~~~~~~~~~~~~~~~~~~ 73 | 74 | This documentation is based the original documentation written by Alex's original authors using DocBook. 75 | 76 | It was converted to reStructuredText / Sphinx / readthedocs.org by Andreas Abel in February 2022. 77 | -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'Alex' 21 | copyright = '2022, Simon Marlow and the Alex developers' 22 | author = 'Simon Marlow and the Alex developers' 23 | 24 | 25 | # -- General configuration --------------------------------------------------- 26 | 27 | # Add any Sphinx extension module names here, as strings. They can be 28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 29 | # ones. 30 | extensions = [ 31 | ] 32 | 33 | # Add any paths that contain templates here, relative to this directory. 34 | templates_path = ['_templates'] 35 | 36 | # List of patterns, relative to source directory, that match files and 37 | # directories to ignore when looking for source files. 38 | # This pattern also affects html_static_path and html_extra_path. 39 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 40 | 41 | 42 | # -- Options for HTML output ------------------------------------------------- 43 | 44 | # The theme to use for HTML and HTML Help pages. See the documentation for 45 | # a list of builtin themes. 46 | # 47 | html_theme = 'sphinx_rtd_theme' 48 | 49 | # Add any paths that contain custom static files (such as style sheets) here, 50 | # relative to this directory. They are copied after the builtin static files, 51 | # so a file named "default.css" will overwrite the builtin "default.css". 52 | html_static_path = ['_static'] 53 | 54 | # The name of the Pygments (syntax highlighting) style to use. 55 | pygments_style = 'sphinx' 56 | highlight_language = 'Peg' 57 | # Andreas Abel, 2022-02-27: Peg looked best from the "grammar" highlighters. 58 | # I also tried 'Bnf' and 'Abnf'. 59 | -------------------------------------------------------------------------------- /doc/contributing.rst: -------------------------------------------------------------------------------- 1 | .. _contributing: 2 | 3 | Contributing to Alex 4 | ==================== 5 | 6 | .. highlight:: bash 7 | 8 | Source Code Repository 9 | ---------------------- 10 | 11 | Alex is hosted on `GitHub `__. 12 | As previously discussed in :ref:`bug-reports`, we use the built-in `GitHub issue tracker `__ for Alex. 13 | We also use `GitHub pull requests `__ for managing changes; 14 | feel free to submit them! 15 | 16 | Repo Layout 17 | ----------- 18 | 19 | - ``src``: The source code for Alex itself 20 | - ``doc``: The documentation 21 | - ``examples``: Various examples of using Alex 22 | 23 | Contributor Build Instructions 24 | ------------------------------ 25 | 26 | Alex is built using `GHC `__ and 27 | `Cabal Install `__ (version 2.0 or later). 28 | Make sure they are already installed first. 29 | 30 | Since Alex itself is implemented in terms of an Alex scanner, 31 | bootstrapping Alex is a bit tricky: 32 | 33 | You need to have the build-tools ``alex`` and ``happy`` manually installed; 34 | either via your system package manager distribution, the Haskell Platform, or e.g. via (run this outside the Git repository!):: 35 | 36 | $ cabal install alex happy 37 | 38 | which installs them into ``${HOME}/.cabal/bin`` by default. 39 | (make sure they are in your ``$PATH`` for the next steps!) 40 | 41 | Variant A 42 | ~~~~~~~~~ 43 | 44 | You can install ``alex`` simply by invoking:: 45 | 46 | $ cabal install 47 | 48 | from inside the Git folder. 49 | 50 | Variant B 51 | ~~~~~~~~~ 52 | 53 | Alternatively, you can use the ``Makefile`` which automates the steps of producing a self-contained pre-bootstrapped source distribution with pre-generated lexer/scanners:: 54 | 55 | $ make sdist 56 | $ cabal install dist/alex-*.tar.gz 57 | 58 | For convenience, there is also a ``make sdist-test`` target which builds the source source tarball and runs the test-suite from within the source dist. 59 | -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | Alex User Guide 3 | =============== 4 | 5 | Alex is a tool for generating lexical analysers, also known as "lexers" and "scanners", in Haskell. 6 | The lexical analysers implement a description of the tokens to be recognised in the form of regular expressions. 7 | It is similar to the tools "lex" and "flex" for C/C++. 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents 12 | 13 | about 14 | obtaining 15 | introduction 16 | syntax 17 | regex 18 | api 19 | invoking 20 | contributing 21 | 22 | 23 | 24 | Indices and tables 25 | ================== 26 | 27 | * :ref:`genindex` 28 | * :ref:`search` 29 | -------------------------------------------------------------------------------- /doc/introduction.rst: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | Alex is a tool for generating lexical analysers, also known as "lexers" and "scanners", in Haskell. 5 | The lexical analysers implement a description of the tokens to be recognised in the form of regular expressions. 6 | It is similar to the tools "lex" and "flex" for C/C++. 7 | 8 | Alex takes a description of tokens based on regular expressions and generates a Haskell module containing code for scanning text 9 | efficiently. 10 | Alex is designed to be familiar to existing lex users, 11 | although it does depart from lex in a number of ways. 12 | 13 | A sample specification would be the following: 14 | 15 | .. code-block:: none 16 | 17 | { 18 | module Main (main) where 19 | } 20 | 21 | %wrapper "basic" 22 | 23 | $digit = 0-9 -- digits 24 | $alpha = [a-zA-Z] -- alphabetic characters 25 | 26 | tokens :- 27 | 28 | $white+ ; 29 | "--".* ; 30 | let { \s -> Let } 31 | in { \s -> In } 32 | $digit+ { \s -> Int (read s) } 33 | [\=\+\-\*\/\(\)] { \s -> Sym (head s) } 34 | $alpha [$alpha $digit \_ \']* { \s -> Var s } 35 | 36 | { 37 | -- Each action has type :: String -> Token 38 | 39 | -- The token type: 40 | data Token 41 | = Let 42 | | In 43 | | Sym Char 44 | | Var String 45 | | Int Int 46 | deriving (Eq, Show) 47 | 48 | main = do 49 | s <- getContents 50 | print (alexScanTokens s) 51 | } 52 | 53 | The first few lines between the ``{`` and ``}`` provide a code scrap (some inlined Haskell code) to be placed directly in the output, 54 | the scrap at the top of the module is normally used to declare the module name for the generated Haskell module, in this case ``Main``. 55 | 56 | The next line, ``%wrapper "basic"`` controls what kind of support code Alex should produce along with the basic scanner. 57 | The ``basic`` wrapper selects a scanner that tokenises a ``String`` and returns a list of tokens. 58 | Wrappers are described fully in :ref:`The Interface to an Alex-generated lexer `. 59 | 60 | The next two lines define the ``$digit`` and ``$alpha`` macros for use in the token definitions. 61 | 62 | The ‘\ ``tokens :-``\ ’ line ends the macro definitions and starts the definition of the scanner. 63 | 64 | The scanner is specified as a series of token definitions where each token specification takes the form of 65 | 66 | :: 67 | 68 | regexp { code } 69 | 70 | The meaning of this rule is 71 | "if the input matches , then return ". 72 | The code part along with the braces can be replaced by simply ‘\ ``;``\ ’, 73 | meaning that this token should be ignored in the input stream. 74 | As you can see, we've used this to ignore whitespace in our example. 75 | 76 | Our scanner is set up so that the actions are all functions with type ``String->Token``. 77 | When the token is matched, the portion of the input stream that it matched is passed to the appropriate action function as a ``String``. 78 | 79 | At the bottom of the file we have another code fragment, surrounded by braces ``{ ... }``. 80 | In this fragment, we declare the type of the tokens, and give a ``main`` function that we can use for testing it; 81 | the ``main`` function just tokenises the input and prints the results to standard output. 82 | 83 | Alex has kindly provided the following function which we can use to invoke the scanner: 84 | 85 | .. code-block:: haskell 86 | 87 | alexScanTokens :: String -> [Token] 88 | 89 | Alex arranges for the input stream to be tokenised, 90 | each of the action functions to be passed the appropriate ``String``, 91 | and a list of ``Token``\ s returned as the result. 92 | If the input stream is lazy, the output stream will also be produced lazily [1]_. 93 | 94 | We have demonstrated the simplest form of scanner here, 95 | which was selected by the ``%wrapper "basic"`` line near the top of the file. 96 | In general, actions do not have to have type ``String->Token``, 97 | and there's no requirement for the scanner to return a list of tokens. 98 | 99 | With this specification in the file ``Tokens.x``, 100 | Alex can be used to generate ``Tokens.hs``: 101 | 102 | .. code-block:: sh 103 | 104 | $ alex Tokens.x 105 | 106 | If the module needed to be placed in a different file, ``Main.hs`` for example, 107 | then the output filename can be specified using the ``-o`` option: 108 | 109 | .. code-block:: sh 110 | 111 | $ alex Tokens.x -o Main.hs 112 | 113 | The resulting module is Haskell 98 compatible. 114 | It can also be readily used with a `Happy `__ parser. 115 | 116 | .. [1] 117 | That is, unless you have any patterns that require a long lookahead. 118 | -------------------------------------------------------------------------------- /doc/invoking.rst: -------------------------------------------------------------------------------- 1 | .. _invoking: 2 | 3 | Invoking Alex 4 | ============= 5 | 6 | The command line syntax for Alex is entirely standard: 7 | 8 | .. code-block:: sh 9 | 10 | $ alex { option } file.x { option } 11 | 12 | Alex expects a single ``file.x`` to be named on the command line. 13 | By default, Alex will create ``file.hs`` containing the Haskell source for the lexer. 14 | 15 | The options that Alex accepts are listed below: 16 | 17 | ``-o`` ; ``--outfile``\ = 18 | Specifies the filename in which the output is to be placed. 19 | By default, this is the name of the input file with the ``.x`` suffix replaced by ``.hs``. 20 | 21 | ``-i`` []; ``--info`` [<=file>] 22 | Produces a human-readable rendition of the state machine (DFA) that Alex derives from the lexer, in 23 | (default: ``file.info`` where the input file is ``file.x``). 24 | 25 | The format of the info file is currently a bit basic, and not particularly informative. 26 | 27 | ``-t`` []; ``--template``\ = 28 | Look in for template files. 29 | 30 | ``-g``; ``--ghc`` 31 | Causes Alex to produce a lexer which is optimised for compiling with GHC. 32 | The lexer will be significantly more efficient, 33 | both in terms of the size of the compiled lexer and its runtime. 34 | 35 | ``-d``; ``--debug`` 36 | Causes Alex to produce a lexer which will output debugging messages as it runs. 37 | 38 | ``-l``; ``--latin1`` 39 | Disables the use of UTF-8 encoding in the generated lexer. 40 | This has two consequences: 41 | 42 | - The Alex source file is still assumed to be UTF-8 encoded, 43 | but any Unicode characters outside the range 0-255 are mapped to Latin-1 characters by taking the code point modulo 256. 44 | 45 | - The built-in macros ``$printable`` and '``.``' range over the Latin-1 character set, not the Unicode character set. 46 | 47 | Note that this currently does not disable the UTF-8 encoding thathappens in the "basic" wrappers, 48 | so ``--latin1`` does not make sense in conjunction with these wrappers 49 | (not that you would want to do that, anyway). 50 | Alternatively, a ``%encoding "latin1"`` declaration can be used inside the Alex source file to request a Latin-1 mapping. 51 | See also :ref:`Unicode and UTF-8 ` for more information about the ``%encoding`` declaration. 52 | 53 | ``-?``; ``--help`` 54 | Display help and exit. 55 | 56 | ``-V``; ``--version`` 57 | Output version information and exit. 58 | Note that for legacy reasons ``-v`` is supported, too, but the use of it is deprecated. 59 | ``-v`` will be used for verbose mode when it is actually implemented. 60 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /doc/obtaining.rst: -------------------------------------------------------------------------------- 1 | .. _installing: 2 | 3 | Obtaining Alex 4 | ============== 5 | 6 | .. highlight:: bash 7 | 8 | If you just want to *use* Alex, you can build from a release. 9 | This should work the same as any other Haskell package. 10 | 11 | Alex itself and its examples are intended to work with GHC >= 7.0. 12 | 13 | Haskell-specific way 14 | -------------------- 15 | 16 | From `Hackage `__ via `Cabal Install `__:: 17 | 18 | $ cabal install alex 19 | 20 | From `Stackage `__ via `Stack `__:: 21 | 22 | $ stack install alex 23 | 24 | Moreover, recent versions of ``cabal`` will automatically install the required version of ``alex`` based on ``build-tools``/``build-tool-depends`` `declarations `__. 25 | 26 | Operating System way 27 | -------------------- 28 | 29 | Because Alex is a dependency of GHC, it is often packaged by operating systems. 30 | `Repology `__ aggregates this info across many distros and operating systems, and Alex is actually listed twice: 31 | 32 | - https://repology.org/project/haskell:alex/versions 33 | - https://repology.org/project/alex/versions 34 | 35 | The table contains links to the individual OS packages, which should provide installation instructions. 36 | -------------------------------------------------------------------------------- /doc/regex.rst: -------------------------------------------------------------------------------- 1 | .. _regexps: 2 | 3 | Regular Expression 4 | ================== 5 | 6 | Regular expressions are the patterns that Alex uses to match tokens in the input stream. 7 | 8 | .. _regexp-syntax: 9 | 10 | Syntax of regular expressions 11 | ----------------------------- 12 | 13 | :: 14 | 15 | regexp := rexp2 { '|' rexp2 } 16 | 17 | rexp2 := rexp1 { rexp1 } 18 | 19 | rexp1 := rexp0 [ '*' | '+' | '?' | repeat ] 20 | 21 | rexp0 := set 22 | | @rmac 23 | | @string 24 | | '(' [ regexp ] ')' 25 | 26 | repeat := '{' $digit+ '}' 27 | | '{' $digit+ ',' '}' 28 | | '{' $digit+ ',' $digit+ '}' 29 | 30 | The syntax of regular expressions is fairly standard, 31 | the only difference from normal lex-style regular expressions being that we allow the sequence ``()`` to denote the regular expression that matches the empty string. 32 | 33 | Spaces are ignored in a regular expression, 34 | so feel free to space out your regular expression as much as you like, 35 | even split it over multiple lines and include comments. 36 | Literal whitespace can be included by surrounding it with quotes ``" "``, or by escaping each whitespace character with ``\``. 37 | 38 | ``set`` 39 | Matches any of the characters in . See :ref:`Syntax of character sets ` for the syntax of sets. 40 | 41 | ``@foo`` 42 | Expands to the definition of the appropriate regular expression macro. 43 | 44 | ``"..."`` 45 | Matches the sequence of characters in the string, in that order. 46 | 47 | ``r*`` 48 | Matches zero or more occurrences of . 49 | 50 | ``r+`` 51 | Matches one or more occurrences of . 52 | 53 | ``r?`` 54 | Matches zero or one occurrences of . 55 | 56 | ``r{n}`` 57 | Matches occurrences of . 58 | 59 | ``r{n,}`` 60 | Matches or more occurrences of . 61 | 62 | ``r{n,m}`` 63 | Matches between and (inclusive) occurrences of . 64 | 65 | .. _charsets: 66 | 67 | Syntax of character sets 68 | ------------------------ 69 | 70 | Character sets are the fundamental elements in a regular expression. 71 | A character set is a pattern that matches a single character. 72 | The syntax of character sets is as follows: 73 | 74 | :: 75 | 76 | set := set '#' set0 77 | | set0 78 | 79 | set0 := @char [ '-' @char ] 80 | | '.' 81 | | @smac 82 | | '[' [^] { set } ']' 83 | | '~' set0 84 | 85 | The various character set constructions are: 86 | 87 | ``char`` 88 | The simplest character set is a single Unicode character. 89 | Note that special characters such as ``[`` and ``.`` must be escaped by prefixing them with ``\`` 90 | (see the lexical syntax, :ref:`Lexical syntax `, for the list of special characters). 91 | 92 | Certain non-printable characters have special escape sequences. 93 | These are: ``\a``, ``\b``, ``\f``, ``\n``, ``\r``, ``\t``, and ``\v``. 94 | Other characters can be represented by using their numerical character values 95 | (although this may be non-portable): 96 | ``\x0A`` is equivalent to ``\n``, for example. 97 | 98 | Whitespace characters are ignored; 99 | to represent a literal space, escape it with ``\``. 100 | 101 | ``char-char`` 102 | A range of characters can be expressed by separating the characters with a ‘\ ``-``\ ’, 103 | all the characters with codes in the given range are included in the set. 104 | Character ranges can also be non-portable. 105 | 106 | ``.`` 107 | The built-in set ‘\ ``.``\ ’ matches all characters except newline (``\n``). 108 | 109 | Equivalent to the set ``[\x00-\x10ffff] # \n``. 110 | 111 | ``set0 # set1`` 112 | Matches all the characters in that are not in . 113 | 114 | ``[sets]`` 115 | The union of . 116 | 117 | ``[^sets]`` 118 | The complement of the union of the . Equivalent to 119 | ‘\ ``. # [sets]``\ ’. 120 | 121 | ``~set`` 122 | The complement of . 123 | Equivalent to ‘\ ``. # set``\ ’ 124 | 125 | A set macro is written as ``$`` followed by an identifier. 126 | There are some builtin character set macros: 127 | 128 | ``$white`` 129 | Matches all whitespace characters, including newline. 130 | 131 | Equivalent to the set ``[\ \t\n\f\v\r]``. 132 | 133 | ``$printable`` 134 | Matches all "printable characters". 135 | Currently this corresponds to Unicode code points 32 to 0x10ffff, 136 | although strictly speaking there are many non-printable code points in this region. 137 | In the future Alex may use a more precise definition of ``$printable``. 138 | 139 | Character set macros can be defined at the top of the file at the same time as regular expression macros 140 | (see :ref:`Regular Expression `). 141 | Here are some example character set macros: 142 | 143 | :: 144 | 145 | $lls = a-z -- little letters 146 | $not_lls = ~a-z -- anything but little letters 147 | $ls_ds = [a-zA-Z0-9] -- letters and digits 148 | $sym = [ \! \@ \# \$ ] -- the symbols !, @, #, and $ 149 | $sym_q_nl = [ \' \! \@ \# \$ \n ] -- the above symbols with ' and newline 150 | $quotable = $printable # \' -- any graphic character except ' 151 | $del = \127 -- ASCII DEL 152 | -------------------------------------------------------------------------------- /doc/requirements.txt: -------------------------------------------------------------------------------- 1 | Sphinx >= 7.2.5 2 | sphinx_rtd_theme >= 1.3.0 3 | -------------------------------------------------------------------------------- /examples/Makefile: -------------------------------------------------------------------------------- 1 | ALEX=../dist/build/alex/alex 2 | HC=ghc -Wall -fno-warn-unused-binds -fno-warn-missing-signatures -fno-warn-unused-matches -fno-warn-name-shadowing -fno-warn-unused-imports -fno-warn-tabs 3 | 4 | HAPPY=happy 5 | HAPPY_OPTS=-agc 6 | 7 | ifeq "$(TARGETPLATFORM)" "i386-unknown-mingw32" 8 | exeext=.exe 9 | else 10 | exeext=.bin 11 | endif 12 | 13 | PROGS = lit Tokens Tokens_gscan words words_posn words_monad tiny haskell tiger 14 | 15 | ALEX_OPTS = --template=../data/ -g 16 | 17 | %.alex.hs : %.x 18 | $(ALEX) $(ALEX_OPTS) $< -o $@ 19 | 20 | %.happy.hs : %.y 21 | $(HAPPY) $(HAPPY_OPTS) $< -o $@ 22 | 23 | %.o : %.hs 24 | $(HC) $(HC_OPTS) -c -o $@ $< 25 | 26 | CLEAN_FILES += *.info *.hi *.o *.bin *.exe 27 | 28 | all : $(addsuffix $(exeext),$(PROGS)) 29 | 30 | tiny$(exeext) : tiny.happy.hs Tokens_posn.alex.hs 31 | $(HC) $(HC_OPTS) -o $@ $^ 32 | 33 | lit$(exeext) : lit.alex.hs 34 | $(HC) $(HC_OPTS) -o $@ $^ 35 | 36 | Tokens$(exeext) : Tokens.alex.hs 37 | $(HC) $(HC_OPTS) -o $@ $^ 38 | 39 | Tokens_gscan$(exeext) : Tokens_gscan.alex.hs 40 | $(HC) $(HC_OPTS) -o $@ $^ 41 | 42 | words$(exeext) : words.alex.hs 43 | $(HC) $(HC_OPTS) -o $@ $^ 44 | 45 | words_posn$(exeext) : words_posn.alex.hs 46 | $(HC) $(HC_OPTS) -o $@ $^ 47 | 48 | words_monad$(exeext) : words_monad.alex.hs 49 | $(HC) $(HC_OPTS) -o $@ $^ 50 | 51 | haskell$(exeext) : haskell.alex.hs 52 | $(HC) $(HC_OPTS) -o $@ $^ 53 | 54 | tiger$(exeext) : tiger.alex.hs 55 | $(HC) $(HC_OPTS) -main-is TigerLexer -o $@ $^ 56 | 57 | .PHONY: clean 58 | clean: 59 | rm -f *.o *.hi $(addsuffix $(exeext),$(PROGS)) \ 60 | *.alex.hs *.happy.hs 61 | -------------------------------------------------------------------------------- /examples/Tokens.x: -------------------------------------------------------------------------------- 1 | { 2 | module Main where 3 | } 4 | 5 | %wrapper "basic" 6 | 7 | $digit = 0-9 -- digits 8 | $alpha = [a-zA-Z] -- alphabetic characters 9 | 10 | tokens :- 11 | 12 | $white+ { \s -> White } 13 | "--".* { \s -> Comment } 14 | let { \s -> Let } 15 | in { \s -> In } 16 | $digit+ { \s -> Int (read s) } 17 | [\=\+\-\*\/\(\)] { \s -> Sym (head s) } 18 | $alpha [$alpha $digit \_ \']* { \s -> Var s } 19 | 20 | { 21 | -- Each right-hand side has type :: String -> Token 22 | 23 | -- The token type: 24 | data Token = 25 | White | 26 | Comment | 27 | Let | 28 | In | 29 | Sym Char | 30 | Var String | 31 | Int Int | 32 | Err 33 | deriving (Eq,Show) 34 | 35 | main = do 36 | s <- getContents 37 | print (alexScanTokens s) 38 | } 39 | -------------------------------------------------------------------------------- /examples/Tokens_gscan.x: -------------------------------------------------------------------------------- 1 | { 2 | module Main (main) where 3 | } 4 | 5 | %wrapper "gscan" 6 | 7 | $digit = 0-9 -- digits 8 | $alpha = [a-zA-Z] -- alphabetic characters 9 | 10 | tokens :- 11 | 12 | $white+ ; 13 | "--".* ; 14 | let { tok (\p s -> Let p) } 15 | in { tok (\p s -> In p) } 16 | $digit+ { tok (\p s -> Int p (read s)) } 17 | [\=\+\-\*\/\(\)] { tok (\p s -> Sym p (head s)) } 18 | $alpha [$alpha $digit \_ \']* { tok (\p s -> Var p s) } 19 | 20 | { 21 | -- Some action helpers: 22 | tok f p c str len cont (sc,state) = f p (take len str) : cont (sc,state) 23 | 24 | -- The token type: 25 | data Token = 26 | Let AlexPosn | 27 | In AlexPosn | 28 | Sym AlexPosn Char | 29 | Var AlexPosn String | 30 | Int AlexPosn Int | 31 | Err AlexPosn 32 | deriving (Eq,Show) 33 | 34 | main = do 35 | s <- getContents 36 | print (alexGScan stop undefined s) 37 | where 38 | stop p c "" (sc,s) = [] 39 | stop p c _ (sc,s) = error "lexical error" 40 | } 41 | -------------------------------------------------------------------------------- /examples/Tokens_posn.x: -------------------------------------------------------------------------------- 1 | { 2 | module Tokens_posn (Token(..), AlexPosn(..), alexScanTokens, token_posn) where 3 | } 4 | 5 | %wrapper "posn" 6 | 7 | $digit = 0-9 -- digits 8 | $alpha = [a-zA-Z] -- alphabetic characters 9 | 10 | tokens :- 11 | 12 | $white+ ; 13 | "--".* ; 14 | let { tok (\p s -> Let p) } 15 | in { tok (\p s -> In p) } 16 | $digit+ { tok (\p s -> Int p (read s)) } 17 | [\=\+\-\*\/\(\)] { tok (\p s -> Sym p (head s)) } 18 | $alpha [$alpha $digit \_ \']* { tok (\p s -> Var p s) } 19 | 20 | { 21 | -- Each right-hand side has type :: AlexPosn -> String -> Token 22 | 23 | -- Some action helpers: 24 | tok f p s = f p s 25 | 26 | -- The token type: 27 | data Token = 28 | Let AlexPosn | 29 | In AlexPosn | 30 | Sym AlexPosn Char | 31 | Var AlexPosn String | 32 | Int AlexPosn Int 33 | deriving (Eq,Show) 34 | 35 | token_posn (Let p) = p 36 | token_posn (In p) = p 37 | token_posn (Sym p _) = p 38 | token_posn (Var p _) = p 39 | token_posn (Int p _) = p 40 | } 41 | -------------------------------------------------------------------------------- /examples/examples.x: -------------------------------------------------------------------------------- 1 | "example_rexps":- 2 | 3 | ::= $ | a+ -- = a*, zero or more as 4 | ::= aa* -- = a+, one or more as 5 | ::= $ | a -- = a?, zero or one as 6 | ::= a{3} -- = aaa, three as 7 | ::= a{3,5} -- = a{3}a?a? 8 | ::= a{3,} -- = a{3}a* 9 | 10 | 11 | "example_sets":- 12 | 13 | ::= a-z -- little letters 14 | ::= ~a-z -- anything but little letters 15 | ::= [a-zA-Z0-9] -- letters and digits 16 | ::= `!@@#$' -- the symbols !, @@, # and $ 17 | ::= [`!#@@$'^'^n] -- the above symbols with ' and newline 18 | ::= ^p#^' -- any graphic character except ' 19 | ::= ^127 -- ASCII DEL 20 | -------------------------------------------------------------------------------- /examples/haskell.x: -------------------------------------------------------------------------------- 1 | -- 2 | -- Lexical syntax for Haskell 98. 3 | -- 4 | -- (c) Simon Marlow 2003, with the caveat that much of this is 5 | -- translated directly from the syntax in the Haskell 98 report. 6 | -- 7 | -- This isn't a complete Haskell 98 lexer - it doesn't handle layout 8 | -- for one thing. However, it could be adapted with a small 9 | -- amount of effort. 10 | -- 11 | 12 | { 13 | module Main (main) where 14 | import Data.Char (chr) 15 | } 16 | 17 | %wrapper "monad" 18 | 19 | $whitechar = [ \t\n\r\f\v] 20 | $special = [\(\)\,\;\[\]\`\{\}] 21 | 22 | $ascdigit = 0-9 23 | $unidigit = [] -- TODO 24 | $digit = [$ascdigit $unidigit] 25 | 26 | $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~] 27 | $unisymbol = [] -- TODO 28 | $symbol = [$ascsymbol $unisymbol] # [$special \_\:\"\'] 29 | 30 | $large = [A-Z \xc0-\xd6 \xd8-\xde] 31 | $small = [a-z \xdf-\xf6 \xf8-\xff \_] 32 | $alpha = [$small $large] 33 | 34 | $graphic = [$small $large $symbol $digit $special \:\"\'] 35 | 36 | $octit = 0-7 37 | $hexit = [0-9 A-F a-f] 38 | $idchar = [$alpha $digit \'] 39 | $symchar = [$symbol \:] 40 | $nl = [\n\r] 41 | 42 | @reservedid = 43 | as|case|class|data|default|deriving|do|else|hiding|if| 44 | import|in|infix|infixl|infixr|instance|let|module|newtype| 45 | of|qualified|then|type|where 46 | 47 | @reservedop = 48 | ".." | ":" | "::" | "=" | \\ | "|" | "<-" | "->" | "@" | "~" | "=>" 49 | 50 | @varid = $small $idchar* 51 | @conid = $large $idchar* 52 | @varsym = $symbol $symchar* 53 | @consym = \: $symchar* 54 | 55 | @decimal = $digit+ 56 | @octal = $octit+ 57 | @hexadecimal = $hexit+ 58 | @exponent = [eE] [\-\+] @decimal 59 | 60 | $cntrl = [$large \@\[\\\]\^\_] 61 | @ascii = \^ $cntrl | NUL | SOH | STX | ETX | EOT | ENQ | ACK 62 | | BEL | BS | HT | LF | VT | FF | CR | SO | SI | DLE 63 | | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN | EM 64 | | SUB | ESC | FS | GS | RS | US | SP | DEL 65 | $charesc = [abfnrtv\\\"\'\&] 66 | @escape = \\ ($charesc | @ascii | @decimal | o @octal | x @hexadecimal) 67 | @gap = \\ $whitechar+ \\ 68 | @string = $graphic # [\"\\] | " " | @escape | @gap 69 | 70 | haskell :- 71 | 72 | <0> $white+ { skip } 73 | <0> "--"\-*[^$symbol].* { skip } 74 | 75 | "{-" { nested_comment } 76 | 77 | <0> $special { mkL LSpecial } 78 | 79 | <0> @reservedid { mkL LReservedId } 80 | <0> @conid \. @varid { mkL LQVarId } 81 | <0> @conid \. @conid { mkL LQConId } 82 | <0> @varid { mkL LVarId } 83 | <0> @conid { mkL LConId } 84 | 85 | <0> @reservedop { mkL LReservedOp } 86 | <0> @conid \. @varsym { mkL LVarSym } 87 | <0> @conid \. @consym { mkL LConSym } 88 | <0> @varsym { mkL LVarSym } 89 | <0> @consym { mkL LConSym } 90 | 91 | <0> @decimal 92 | | 0[oO] @octal 93 | | 0[xX] @hexadecimal { mkL LInteger } 94 | 95 | <0> @decimal \. @decimal @exponent? 96 | | @decimal @exponent { mkL LFloat } 97 | 98 | <0> \' ($graphic # [\'\\] | " " | @escape) \' 99 | { mkL LChar } 100 | 101 | <0> \" @string* \" { mkL LString } 102 | 103 | { 104 | data Lexeme = L AlexPosn LexemeClass String 105 | 106 | data LexemeClass 107 | = LInteger 108 | | LFloat 109 | | LChar 110 | | LString 111 | | LSpecial 112 | | LReservedId 113 | | LReservedOp 114 | | LVarId 115 | | LQVarId 116 | | LConId 117 | | LQConId 118 | | LVarSym 119 | | LQVarSym 120 | | LConSym 121 | | LQConSym 122 | | LEOF 123 | deriving Eq 124 | 125 | mkL :: LexemeClass -> AlexInput -> Int -> Alex Lexeme 126 | mkL c (p,_,_,str) len = return (L p c (take len str)) 127 | 128 | nested_comment :: AlexInput -> Int -> Alex Lexeme 129 | nested_comment _ _ = do 130 | input <- alexGetInput 131 | go 1 input 132 | where go 0 input = do alexSetInput input; alexMonadScan 133 | go n input = do 134 | case alexGetByte input of 135 | Nothing -> err input 136 | Just (c,input) -> do 137 | case chr (fromIntegral c) of 138 | '-' -> do 139 | let temp = input 140 | case alexGetByte input of 141 | Nothing -> err input 142 | Just (125,input) -> go (n-1) input 143 | Just (45, input) -> go n temp 144 | Just (c,input) -> go n input 145 | '\123' -> do 146 | case alexGetByte input of 147 | Nothing -> err input 148 | Just (c,input) | c == fromIntegral (ord '-') -> go (n+1) input 149 | Just (c,input) -> go n input 150 | c -> go n input 151 | 152 | err input = do alexSetInput input; lexError "error in nested comment" 153 | 154 | lexError s = do 155 | (p,c,_,input) <- alexGetInput 156 | alexError (showPosn p ++ ": " ++ s ++ 157 | (if (not (null input)) 158 | then " before " ++ show (head input) 159 | else " at end of file")) 160 | 161 | scanner str = runAlex str $ do 162 | let loop i = do tok@(L _ cl _) <- alexMonadScan; 163 | if cl == LEOF 164 | then return i 165 | else do loop $! (i+1) 166 | loop 0 167 | 168 | alexEOF = return (L undefined LEOF "") 169 | 170 | showPosn (AlexPn _ line col) = show line ++ ':': show col 171 | 172 | main = do 173 | s <- getContents 174 | print (scanner s) 175 | } 176 | -------------------------------------------------------------------------------- /examples/lit.x: -------------------------------------------------------------------------------- 1 | { 2 | {-# LANGUAGE NPlusKPatterns #-} 3 | module Main (main) where 4 | } 5 | 6 | %wrapper "gscan" 7 | 8 | $space = $white # \n 9 | @blank = \n $space* 10 | @scrap = \n \> .* 11 | @comment = \n ( [^ \> $white] | $space+ ~$white ) .* 12 | 13 | lit :- 14 | 15 | @blank @scrap+ { scrap } 16 | @blank @comment* { comment } 17 | 18 | { 19 | scrap _ _ inp len cont st = strip len inp 20 | where 21 | strip 0 _ = cont st 22 | strip (n+1) (c:rst) = 23 | if c=='\n' 24 | then '\n':strip_nl n rst 25 | else c:strip n rst 26 | 27 | strip_nl (n+1) ('>':rst) = ' ':strip n rst 28 | strip_nl n rst = strip n rst 29 | 30 | comment _ _ inp len cont st = strip len inp 31 | where 32 | strip 0 _ = cont st 33 | strip (n+1) (c:rst) = if c=='\n' then c:strip n rst else strip n rst 34 | 35 | 36 | main:: IO () 37 | main = interact literate 38 | 39 | literate:: String -> String 40 | literate inp = drop 2 (alexGScan stop_act () ('\n':'\n':inp)) 41 | 42 | stop_act p _ "" st = [] 43 | stop_act p _ _ _ = error (msg ++ loc p ++ "\n") 44 | 45 | msg = "literate preprocessing error at " 46 | 47 | loc (AlexPn _ l c) = "line " ++ show(l-2) ++ ", column " ++ show c 48 | } 49 | -------------------------------------------------------------------------------- /examples/pp.x: -------------------------------------------------------------------------------- 1 | %{ 2 | import System 3 | import Char 4 | import Alex 5 | %} 6 | 7 | 8 | "pp_lx"/"pp_acts":- 9 | 10 | { ^s = ^w#^n } -- spaces and tabs, etc. 11 | { ^f = [A-Za-z0-9`~%-_.,/'] } -- file name character 12 | 13 | ::= ^#include^s+^"^f+^"^s*^n 14 | ::= .*^n 15 | 16 | 17 | %{ 18 | inc p c inp len cont st = pp fn >> cont st 19 | where 20 | fn = (takeWhile ('"'/=) . tail . dropWhile isSpace . drop 8) inp 21 | 22 | txt p c inp len cont st = putStr (take len inp) >> cont st 23 | 24 | 25 | main:: IO () 26 | main = getArgs >>= \args -> 27 | case args of 28 | [fn] -> pp fn 29 | _ -> error "usage: pp file\n" 30 | 31 | pp:: String -> IO () 32 | pp fn = readFile fn >>= \cts -> gscan pp_scan () cts 33 | 34 | pp_scan:: GScan () (IO ()) 35 | pp_scan = load_gscan (pp_acts,stop_act) pp_lx 36 | where 37 | stop_act _ _ _ _ = return () 38 | %} 39 | -------------------------------------------------------------------------------- /examples/state.x: -------------------------------------------------------------------------------- 1 | { 2 | module Main (main) where 3 | } 4 | 5 | %wrapper "gscan" 6 | 7 | state :- 8 | 9 | $white+ { skip } 10 | \{ [^\}]* \} { code } 11 | [A-Za-z]+ { ide } 12 | 13 | { 14 | code _ _ inp len cont (sc,frags) = cont (sc,frag:frags) 15 | where 16 | frag = take (len-4) (drop 2 inp) 17 | 18 | ide _ _ inp len cont st = Ide (take len inp):cont st 19 | 20 | skip _ _ inp len cont st = cont st 21 | 22 | data Token = Ide String | Eof String | Err deriving Show 23 | 24 | stop_act _ _ "" (_,frags) = [Eof (unlines(reverse frags))] 25 | stop_act _ _ _ _ = [Err] 26 | 27 | tokens:: String -> [Token] 28 | tokens inp = alexGScan stop_act [] inp 29 | 30 | main:: IO () 31 | main = interact (show.tokens) 32 | } 33 | -------------------------------------------------------------------------------- /examples/tiger/Literals.txt: -------------------------------------------------------------------------------- 1 | 123 "Hello, world!" 2 | -------------------------------------------------------------------------------- /examples/tiny.y: -------------------------------------------------------------------------------- 1 | -- An example demonstrating how to connect a Happy parser to an Alex lexer. 2 | { 3 | import Tokens_posn 4 | } 5 | 6 | %name calc 7 | %tokentype { Token } 8 | 9 | %token let { Let _ } 10 | in { In _ } 11 | int { Int _ $$ } 12 | var { Var _ $$ } 13 | '=' { Sym _ '=' } 14 | '+' { Sym _ '+' } 15 | '-' { Sym _ '-' } 16 | '*' { Sym _ '*' } 17 | '/' { Sym _ '/' } 18 | '(' { Sym _ '(' } 19 | ')' { Sym _ ')' } 20 | 21 | %% 22 | 23 | Exp :: { Exp } 24 | Exp : let var '=' Exp in Exp { LetE $2 $4 $6 } 25 | | Exp1 { $1 } 26 | 27 | Exp1 : Exp1 '+' Term { PlusE $1 $3 } 28 | | Exp1 '-' Term { MinusE $1 $3 } 29 | | Term { $1 } 30 | 31 | Term : Term '*' Factor { TimesE $1 $3 } 32 | | Term '/' Factor { DivE $1 $3 } 33 | | Factor { $1 } 34 | 35 | Factor : '-' Atom { NegE $2 } 36 | | Atom { $1 } 37 | 38 | Atom : int { IntE $1 } 39 | | var { VarE $1 } 40 | | '(' Exp ')' { $2 } 41 | 42 | { 43 | data Exp = 44 | LetE String Exp Exp | 45 | PlusE Exp Exp | 46 | MinusE Exp Exp | 47 | TimesE Exp Exp | 48 | DivE Exp Exp | 49 | NegE Exp | 50 | IntE Int | 51 | VarE String 52 | deriving Show 53 | 54 | 55 | main:: IO () 56 | main = interact (show.runCalc) 57 | 58 | runCalc :: String -> Exp 59 | runCalc = calc . alexScanTokens 60 | 61 | happyError :: [Token] -> a 62 | happyError tks = error ("Parse error at " ++ lcn ++ "\n") 63 | where 64 | lcn = case tks of 65 | [] -> "end of file" 66 | tk:_ -> "line " ++ show l ++ ", column " ++ show c 67 | where 68 | AlexPn _ l c = token_posn tk 69 | } 70 | -------------------------------------------------------------------------------- /examples/words.x: -------------------------------------------------------------------------------- 1 | -- Performance test; run with input /usr/dict/words, for example 2 | { 3 | module Main (main) where 4 | } 5 | 6 | %wrapper "basic" 7 | 8 | words :- 9 | 10 | $white+ ; 11 | [A-Za-z0-9\'\-]+ { \s -> () } 12 | 13 | { 14 | main = do 15 | s <- getContents 16 | print (length (alexScanTokens s)) 17 | } 18 | -------------------------------------------------------------------------------- /examples/words_monad.x: -------------------------------------------------------------------------------- 1 | -- Performance test; run with input /usr/dict/words, for example 2 | { 3 | module Main (main) where 4 | } 5 | 6 | %wrapper "monad" 7 | 8 | words :- 9 | 10 | $white+ { skip } 11 | [A-Za-z0-9\'\-]+ { word } 12 | 13 | { 14 | word (_,_,_,input) len = return (take len input) 15 | 16 | scanner str = runAlex str $ do 17 | let loop i = do tok <- alexMonadScan 18 | if tok == "stopped." || tok == "error." 19 | then return i 20 | else do let i' = i+1 in i' `seq` loop i' 21 | loop 0 22 | 23 | alexEOF = return "stopped." 24 | 25 | main = do 26 | s <- getContents 27 | print (scanner s) 28 | } 29 | -------------------------------------------------------------------------------- /examples/words_posn.x: -------------------------------------------------------------------------------- 1 | -- Performance test; run with input /usr/dict/words, for example 2 | { 3 | module Main (main) where 4 | } 5 | 6 | %wrapper "posn" 7 | 8 | words :- 9 | 10 | $white+ ; 11 | [A-Za-z0-9\'\-]+ { \p s -> () } 12 | 13 | { 14 | main = do 15 | s <- getContents 16 | print (length (alexScanTokens s)) 17 | } 18 | -------------------------------------------------------------------------------- /fix-whitespace.yaml: -------------------------------------------------------------------------------- 1 | # Files checked by `fix-whitespace` for whitespace violations. 2 | # 3 | # - Trailing whitespace. 4 | # - Trailing empty lines. 5 | # - Final line not ending in a newline characters. 6 | # - Tabs. 7 | 8 | excluded-dirs: 9 | - .git 10 | - .stack-work 11 | - dist 12 | - dist-newstyle 13 | - old-docbook 14 | 15 | included-files: 16 | # Some file types make use of tabs which we do not convert just now (2023-06-20), 17 | # so these are commented out. 18 | - .gitignore 19 | # - .mailmap 20 | - LICENSE 21 | # - "*.bat" 22 | - "*.c" 23 | - "*.cabal" 24 | - "*.hs" 25 | - "*.md" 26 | - "*.project" 27 | - "*.py" 28 | - "*.rst" 29 | - "*.sh" 30 | - "*.txt" 31 | # - "*.x" 32 | # - "*.y" 33 | -------------------------------------------------------------------------------- /make-sdist.sh: -------------------------------------------------------------------------------- 1 | # Put the Happy-generated .hs files in the right place in the source dist. 2 | 3 | # Not necessary any more, Cabal does this: 4 | 5 | # set -e 6 | # rm -f dist/alex-*.tar.gz 7 | # rm -rf dist/alex-*/ 8 | # ./Setup sdist 9 | # cd dist 10 | # tar xvzf alex-*.tar.gz 11 | # cd alex-*/ 12 | # mkdir dist 13 | # mkdir dist/build 14 | # mv alex dist/build 15 | # cd .. 16 | # tar cvzf alex-*.tar.gz alex-*/ 17 | 18 | # Steps for doing a release: 19 | # * Source: 20 | # - do the above 21 | # - upload the dist to haskell.org:alex/dist/${version} 22 | # * Documentation: 23 | # - cd doc 24 | # - make html 25 | # - mv alex alex-html 26 | # - tar cvzf alex-doc-html-${version}.tar.gz alex-html 27 | # - scp alex-doc-html-${version}.tar.gz haskell.org:alex/doc 28 | # - ssh haskell.org 29 | # - cd alex/doc 30 | # - tar xvzf alex-doc-html-${version}.tar.gz 31 | # - rm -rf html-OLD 32 | # - mv html html-OLD && mv alex-html html 33 | # * Update the web page (~/darcs/www/alex/index.html), and push it 34 | -------------------------------------------------------------------------------- /src/CharSet.hs: -------------------------------------------------------------------------------- 1 | -- ----------------------------------------------------------------------------- 2 | -- 3 | -- CharSet.hs, part of Alex 4 | -- 5 | -- (c) Chris Dornan 1995-2000, Simon Marlow 2003 6 | -- 7 | -- An abstract CharSet type for Alex. To begin with we'll use Alex's 8 | -- original definition of sets as functions, then later will 9 | -- transition to something that will work better with Unicode. 10 | -- 11 | -- ----------------------------------------------------------------------------} 12 | 13 | {-# LANGUAGE OverloadedLists #-} 14 | 15 | module CharSet ( 16 | setSingleton, 17 | 18 | Encoding(..), 19 | 20 | Byte, 21 | ByteSet, 22 | byteSetSingleton, 23 | byteRanges, 24 | byteSetRange, 25 | 26 | CharSet, -- abstract 27 | emptyCharSet, 28 | charSetSingleton, 29 | charSet, 30 | charSetMinus, 31 | charSetComplement, 32 | charSetRange, 33 | charSetUnion, 34 | charSetQuote, 35 | setUnions, 36 | byteSetToArray, 37 | byteSetElems, 38 | byteSetElem 39 | ) where 40 | 41 | import Data.Array ( Array, array ) 42 | import Data.Char ( chr, ord ) 43 | import Data.Maybe ( catMaybes ) 44 | import Data.Word ( Word8 ) 45 | import Data.List.NonEmpty ( pattern (:|), (<|) ) 46 | import qualified Data.List.NonEmpty as List1 47 | 48 | import UTF8 ( List1, encode ) 49 | import Data.Ranged 50 | ( Boundary( BoundaryAbove, BoundaryAboveAll, BoundaryBelow, BoundaryBelowAll ) 51 | , DiscreteOrdered, Range( Range ), RSet 52 | , makeRangedSet 53 | , rSetDifference, rSetEmpty, rSetHas, rSetNegation, rSetRanges, rSetUnion, rSingleton 54 | ) 55 | 56 | -- import Data.Semigroup (sconcat) 57 | -- import qualified Data.Foldable as Fold 58 | 59 | type Byte = Word8 60 | -- Implementation as functions 61 | type CharSet = RSet Char 62 | type ByteSet = RSet Byte 63 | -- type Utf8Set = RSet [Byte] 64 | type Utf8Range = Span (List1 Byte) 65 | 66 | data Encoding = Latin1 | UTF8 67 | deriving (Eq, Show) 68 | 69 | emptyCharSet :: CharSet 70 | emptyCharSet = rSetEmpty 71 | 72 | byteSetElem :: ByteSet -> Byte -> Bool 73 | byteSetElem = rSetHas 74 | 75 | charSetSingleton :: Char -> CharSet 76 | charSetSingleton = rSingleton 77 | 78 | setSingleton :: DiscreteOrdered a => a -> RSet a 79 | setSingleton = rSingleton 80 | 81 | charSet :: [Char] -> CharSet 82 | charSet = setUnions . fmap charSetSingleton 83 | 84 | charSetMinus :: CharSet -> CharSet -> CharSet 85 | charSetMinus = rSetDifference 86 | 87 | charSetUnion :: CharSet -> CharSet -> CharSet 88 | charSetUnion = rSetUnion 89 | 90 | setUnions :: DiscreteOrdered a => [RSet a] -> RSet a 91 | setUnions = foldr rSetUnion rSetEmpty 92 | 93 | charSetComplement :: CharSet -> CharSet 94 | charSetComplement = rSetNegation 95 | 96 | charSetRange :: Char -> Char -> CharSet 97 | charSetRange c1 c2 = makeRangedSet [Range (BoundaryBelow c1) (BoundaryAbove c2)] 98 | 99 | {-# INLINE bytes #-} 100 | bytes :: [Byte] 101 | bytes = [minBound..maxBound] 102 | 103 | byteSetToArray :: ByteSet -> Array Byte Bool 104 | byteSetToArray set = array (minBound, maxBound) [(c, rSetHas set c) | c <- bytes] 105 | 106 | byteSetElems :: ByteSet -> [Byte] 107 | byteSetElems set = filter (rSetHas set) bytes 108 | 109 | charToRanges :: Encoding -> CharSet -> [Utf8Range] 110 | charToRanges Latin1 = 111 | map (fmap ((:| []) . fromIntegral . ord)) -- Span [Byte] 112 | . catMaybes 113 | . fmap (charRangeToCharSpan False) 114 | . rSetRanges 115 | charToRanges UTF8 = 116 | concat -- Span [Byte] 117 | . fmap toUtfRange -- [Span [Byte]] 118 | . fmap (fmap UTF8.encode) -- Span [Byte] 119 | . catMaybes 120 | . fmap (charRangeToCharSpan True) 121 | . rSetRanges 122 | 123 | -- | Turns a range of characters expressed as a pair of UTF-8 byte sequences into a set of ranges, in which each range of the resulting set is between pairs of sequences of the same length 124 | toUtfRange :: Span (List1 Byte) -> [Span (List1 Byte)] 125 | toUtfRange (Span x y) = List1.toList $ fix x y 126 | 127 | fix :: List1 Byte -> List1 Byte -> List1 (Span (List1 Byte)) 128 | fix x y 129 | | length x == length y = [Span x y] 130 | | length x == 1 = Span x [0x7F] <| fix [0xC2,0x80] y 131 | | length x == 2 = Span x [0xDF,0xBF] <| fix [0xE0,0x80,0x80] y 132 | | length x == 3 = Span x [0xEF,0xBF,0xBF] <| fix [0xF0,0x80,0x80,0x80] y 133 | | otherwise = error "fix: incorrect input given" 134 | 135 | 136 | byteRangeToBytePair :: Span a -> (a, a) 137 | byteRangeToBytePair (Span x y) = (x, y) 138 | 139 | data Span a = Span a a -- lower bound inclusive, higher bound exclusive 140 | -- (SDM: upper bound inclusive, surely?) 141 | instance Functor Span where 142 | fmap f (Span x y) = Span (f x) (f y) 143 | 144 | charRangeToCharSpan :: Bool -> Range Char -> Maybe (Span Char) 145 | charRangeToCharSpan _ (Range BoundaryAboveAll _) = Nothing 146 | charRangeToCharSpan _ (Range (BoundaryAbove c) _) | c == maxBound = Nothing 147 | charRangeToCharSpan _ (Range _ BoundaryBelowAll) = Nothing 148 | charRangeToCharSpan _ (Range _ (BoundaryBelow c)) | c == minBound = Nothing 149 | charRangeToCharSpan uni (Range x y) = Just (Span (l x) (h y)) 150 | where l b = case b of 151 | BoundaryBelowAll -> '\0' 152 | BoundaryBelow a -> a 153 | BoundaryAbove a -> succ a 154 | BoundaryAboveAll -> error "panic: charRangeToCharSpan" 155 | h b = case b of 156 | BoundaryBelowAll -> error "panic: charRangeToCharSpan" 157 | BoundaryBelow a -> pred a 158 | BoundaryAbove a -> a 159 | BoundaryAboveAll | uni -> chr 0x10ffff 160 | | otherwise -> chr 0xff 161 | 162 | byteRanges :: Encoding -> CharSet -> [(List1 Byte, List1 Byte)] 163 | byteRanges enc = fmap byteRangeToBytePair . charToRanges enc 164 | 165 | byteSetRange :: Byte -> Byte -> ByteSet 166 | byteSetRange c1 c2 = makeRangedSet [Range (BoundaryBelow c1) (BoundaryAbove c2)] 167 | 168 | byteSetSingleton :: Byte -> ByteSet 169 | byteSetSingleton = rSingleton 170 | 171 | -- TODO: More efficient generated code! 172 | charSetQuote :: CharSet -> String 173 | charSetQuote s = "(\\c -> " ++ foldr (\x y -> x ++ " || " ++ y) "False" (map quoteRange (rSetRanges s)) ++ ")" 174 | where quoteRange (Range l h) = quoteL l ++ " && " ++ quoteH h 175 | quoteL (BoundaryAbove a) = "c > " ++ show a 176 | quoteL (BoundaryBelow a) = "c >= " ++ show a 177 | quoteL (BoundaryAboveAll) = "False" 178 | quoteL (BoundaryBelowAll) = "True" 179 | quoteH (BoundaryAbove a) = "c <= " ++ show a 180 | quoteH (BoundaryBelow a) = "c < " ++ show a 181 | quoteH (BoundaryAboveAll) = "True" 182 | quoteH (BoundaryBelowAll) = "False" 183 | -------------------------------------------------------------------------------- /src/DFA.hs: -------------------------------------------------------------------------------- 1 | -- ----------------------------------------------------------------------------- 2 | -- 3 | -- DFA.hs, part of Alex 4 | -- 5 | -- (c) Chris Dornan 1995-2000, Simon Marlow 2003 6 | -- 7 | -- This module generates a DFA from a scanner by first converting it 8 | -- to an NFA and then converting the NFA with the subset construction. 9 | -- 10 | -- See the chapter on `Finite Automata and Lexical Analysis' in the 11 | -- dragon book for an excellent overview of the algorithms in this 12 | -- module. 13 | -- 14 | -- ----------------------------------------------------------------------------} 15 | 16 | module DFA (scanner2dfa) where 17 | 18 | import Data.Array ( (!) ) 19 | import Data.Function ( on ) 20 | import Data.Maybe ( fromJust ) 21 | 22 | import qualified Data.IntMap as IntMap 23 | import qualified Data.IntSet as IntSet 24 | import qualified Data.Map as Map 25 | import qualified Data.List as List 26 | 27 | import AbsSyn 28 | import NFA 29 | import CharSet 30 | 31 | {- Defined in the Scan Module 32 | 33 | -- (This section should logically belong to the DFA module but it has been 34 | -- placed here to make this module self-contained.) 35 | -- 36 | -- `DFA' provides an alternative to `Scanner' (described in the RExp module); 37 | -- it can be used directly to scan text efficiently. Additionally it has an 38 | -- extra place holder for holding action functions for generating 39 | -- application-specific tokens. When this place holder is not being used, the 40 | -- unit type will be used. 41 | -- 42 | -- Each state in the automaton consist of a list of `Accept' values, descending 43 | -- in priority, and an array mapping characters to new states. As the array 44 | -- may only cover a sub-range of the characters, a default state number is 45 | -- given in the third field. By convention, all transitions to the -1 state 46 | -- represent invalid transitions. 47 | -- 48 | -- A list of accept states is provided for as the original specification may 49 | -- have been ambiguous, in which case the highest priority token should be 50 | -- taken (the one appearing earliest in the specification); this can not be 51 | -- calculated when the DFA is generated in all cases as some of the tokens may 52 | -- be associated with leading or trailing context or start codes. 53 | -- 54 | -- `scan_token' (see above) can deal with unconditional accept states more 55 | -- efficiently than those associated with context; to save it testing each time 56 | -- whether the list of accept states contains an unconditional state, the flag 57 | -- in the first field of `St' is set to true whenever the list contains an 58 | -- unconditional state. 59 | -- 60 | -- The `Accept' structure contains the priority of the token being accepted 61 | -- (lower numbers => higher priorities), the name of the token, a place holder 62 | -- that can be used for storing the `action' function for constructing the 63 | -- token from the input text and the scanner's state, a list of start codes 64 | -- (listing the start codes that the scanner must be in for the token to be 65 | -- accepted; empty => no restriction), the leading and trailing context (both 66 | -- `Nothing' if there is none). 67 | -- 68 | -- The leading context consists simply of a character predicate that will 69 | -- return true if the last character read is acceptable. The trailing context 70 | -- consists of an alternative starting state within the DFA; if this `sub-dfa' 71 | -- turns up any accepting state when applied to the residual input then the 72 | -- trailing context is acceptable (see `scan_token' above). 73 | 74 | type DFA a = Array SNum (State a) 75 | 76 | type SNum = Int 77 | 78 | data State a = St Bool [Accept a] SNum (Array Char SNum) 79 | 80 | data Accept a = Acc Int String a [StartCode] (MB(Char->Bool)) (MB SNum) 81 | 82 | type StartCode = Int 83 | -} 84 | 85 | 86 | -- Scanners are converted to DFAs by converting them to NFAs first. Converting 87 | -- an NFA to a DFA works by identifying the states of the DFA with subsets of 88 | -- the NFA. The PartDFA is used to construct the DFA; it is essentially a DFA 89 | -- in which the states are represented directly by state sets of the NFA. 90 | -- `nfa2pdfa' constructs the partial DFA from the NFA by searching for all the 91 | -- transitions from a given list of state sets, initially containing the start 92 | -- state of the partial DFA, until all possible state sets have been considered 93 | -- The final DFA is then constructed with a `mk_dfa'. 94 | 95 | scanner2dfa:: Encoding -> Scanner -> [StartCode] -> DFA SNum Code 96 | scanner2dfa enc scanner scs = nfa2dfa scs (scanner2nfa enc scanner scs) 97 | 98 | nfa2dfa:: [StartCode] -> NFA -> DFA SNum Code 99 | nfa2dfa scs nfa = mk_int_dfa nfa (nfa2pdfa nfa pdfa (dfa_start_states pdfa)) 100 | where 101 | pdfa = new_pdfa n_starts nfa 102 | n_starts = length scs -- number of start states 103 | 104 | -- `nfa2pdfa' works by taking the next outstanding state set to be considered 105 | -- and ignoring it if the state is already in the partial DFA, otherwise 106 | -- generating all possible transitions from it, adding the new state to the 107 | -- partial DFA and continuing the closure with the extra states. Note the way 108 | -- it incorporates the trailing context references into the search (by 109 | -- including `rctx_ss' in the search). 110 | 111 | nfa2pdfa:: NFA -> DFA StateSet Code -> [StateSet] -> DFA StateSet Code 112 | nfa2pdfa _ pdfa [] = pdfa 113 | nfa2pdfa nfa pdfa (ss:umkd) 114 | | ss `in_pdfa` pdfa = nfa2pdfa nfa pdfa umkd 115 | | otherwise = nfa2pdfa nfa pdfa' umkd' 116 | where 117 | pdfa' = add_pdfa ss (State accs (IntMap.fromList ss_outs)) pdfa 118 | 119 | umkd' = rctx_sss ++ map snd ss_outs ++ umkd 120 | 121 | -- for each character, the set of states that character would take 122 | -- us to from the current set of states in the NFA. 123 | ss_outs :: [(Int, StateSet)] 124 | ss_outs = [ (fromIntegral ch, mk_ss nfa ss') 125 | | ch <- byteSetElems $ setUnions [p | (p,_) <- outs], 126 | let ss' = [ s' | (p,s') <- outs, byteSetElem p ch ], 127 | not (null ss') 128 | ] 129 | 130 | rctx_sss = [ mk_ss nfa [s] 131 | | Acc _ _ _ (RightContextRExp s) <- accs ] 132 | 133 | outs :: [(ByteSet,SNum)] 134 | outs = [ out | s <- ss, out <- nst_outs (nfa ! s) ] 135 | 136 | accs = sort_accs [ acc | s <- ss, acc <- nst_accs (nfa ! s) ] 137 | 138 | -- `sort_accs' sorts a list of accept values into descending order of priority, 139 | -- eliminating any elements that follow an unconditional accept value. 140 | 141 | sort_accs :: [Accept a] -> [Accept a] 142 | sort_accs accs = foldr chk [] $ List.sortBy (compare `on` accPrio) accs 143 | where 144 | chk acc@(Acc _ _ Nothing NoRightContext) _ = [acc] 145 | chk acc rst = acc:rst 146 | 147 | 148 | {------------------------------------------------------------------------------ 149 | State Sets and Partial DFAs 150 | ------------------------------------------------------------------------------} 151 | 152 | 153 | 154 | -- A `PartDFA' is a partially constructed DFA in which the states are 155 | -- represented by sets of states of the original NFA. It is represented by a 156 | -- triple consisting of the start state of the partial DFA, the NFA from which 157 | -- it is derived and a map from state sets to states of the partial DFA. The 158 | -- state set for a given list of NFA states is calculated by taking the epsilon 159 | -- closure of all the states, sorting the result with duplicates eliminated. 160 | 161 | type StateSet = [SNum] 162 | 163 | new_pdfa :: Int -> NFA -> DFA StateSet a 164 | new_pdfa starts nfa 165 | = DFA { dfa_start_states = [ List.sort $ nst_cl $ nfa ! n | n <- [0 .. starts - 1] ] 166 | , dfa_states = Map.empty 167 | } 168 | 169 | -- starts is the number of start states 170 | 171 | -- constructs the epsilon-closure of a set of NFA states 172 | mk_ss :: NFA -> [SNum] -> StateSet 173 | mk_ss nfa l = IntSet.toAscList $ IntSet.fromList [ s' | s <- l, s' <- nst_cl (nfa ! s) ] 174 | 175 | add_pdfa:: StateSet -> State StateSet a -> DFA StateSet a -> DFA StateSet a 176 | add_pdfa ss pst (DFA st mp) = DFA st (Map.insert ss pst mp) 177 | 178 | in_pdfa:: StateSet -> DFA StateSet a -> Bool 179 | in_pdfa ss (DFA _ mp) = ss `Map.member` mp 180 | 181 | -- Construct a DFA with numbered states, from a DFA whose states are 182 | -- sets of states from the original NFA. 183 | 184 | mk_int_dfa:: NFA -> DFA StateSet a -> DFA SNum a 185 | mk_int_dfa nfa (DFA start_states mp) 186 | = DFA [0 .. length start_states-1] 187 | (Map.fromList [ (lookup' st, cnv pds) | (st, pds) <- Map.toAscList mp ]) 188 | where 189 | mp' = Map.fromList (zip (start_states ++ 190 | (map fst . Map.toAscList) (foldr Map.delete mp start_states)) [0..]) 191 | 192 | lookup' = fromJust . flip Map.lookup mp' 193 | 194 | cnv :: State StateSet a -> State SNum a 195 | cnv (State accs as) = State accs' as' 196 | where 197 | as' = IntMap.mapWithKey (\_ch s -> lookup' s) as 198 | 199 | accs' = map cnv_acc accs 200 | cnv_acc (Acc p a lctx rctx) = Acc p a lctx rctx' 201 | where rctx' = 202 | case rctx of 203 | RightContextRExp s -> 204 | RightContextRExp (lookup' (mk_ss nfa [s])) 205 | other -> other 206 | -------------------------------------------------------------------------------- /src/DFS.hs: -------------------------------------------------------------------------------- 1 | {------------------------------------------------------------------------------ 2 | DFS 3 | 4 | This module is a portable version of the ghc-specific `DFS.g.hs', which is 5 | itself a straightforward encoding of the Launchbury/King paper on linear graph 6 | algorithms. This module uses balanced binary trees instead of mutable arrays 7 | to implement the depth-first search so the complexity of the algorithms is 8 | n.log(n) instead of linear. 9 | 10 | The vertices of the graphs manipulated by these modules are labelled with the 11 | integers from 0 to n-1 where n is the number of vertices in the graph. 12 | 13 | The module's principle products are `mk_graph' for constructing a graph from an 14 | edge list, `t_close' for taking the transitive closure of a graph and `scc' 15 | for generating a list of strongly connected components; the components are 16 | listed in dependency order and each component takes the form of a `dfs tree' 17 | (see Launchberry and King). Thus if each edge (fid,fid') encodes the fact that 18 | function `fid' references function `fid'' in a program then `scc' performs a 19 | dependency analysis. 20 | 21 | Chris Dornan, 23-Jun-94, 2-Jul-96, 29-Aug-96, 29-Sep-97 22 | ------------------------------------------------------------------------------} 23 | 24 | module DFS where 25 | 26 | import Data.Array ( (!), accumArray, listArray ) 27 | import Data.Set ( Set ) 28 | import qualified Data.Set as Set 29 | 30 | -- The result of a depth-first search of a graph is a list of trees, 31 | -- `GForest'. `post_order' provides a post-order traversal of a forest. 32 | 33 | type GForest = [GTree] 34 | data GTree = GNode Int GForest 35 | 36 | postorder:: GForest -> [Int] 37 | postorder ts = po ts [] 38 | where 39 | po ts' l = foldr po_tree l ts' 40 | 41 | po_tree (GNode a ts') l = po ts' (a:l) 42 | 43 | list_tree:: GTree -> [Int] 44 | list_tree t = l_t t [] 45 | where 46 | l_t (GNode x ts) l = foldr l_t (x:l) ts 47 | 48 | 49 | -- Graphs are represented by a pair of an integer, giving the number of nodes 50 | -- in the graph, and function mapping each vertex (0..n-1, n=size of graph) to 51 | -- its neighbouring nodes. `mk_graph' takes a size and an edge list and 52 | -- constructs a graph. 53 | 54 | type Graph = (Int,Int->[Int]) 55 | type Edge = (Int,Int) 56 | 57 | mk_graph:: Int -> [Edge] -> Graph 58 | mk_graph sz es = (sz,\v->ar!v) 59 | where 60 | ar = accumArray (flip (:)) [] (0,sz-1) [(v,v')| (v,v')<-es] 61 | 62 | vertices:: Graph -> [Int] 63 | vertices (sz,_) = [0..sz-1] 64 | 65 | out:: Graph -> Int -> [Int] 66 | out (_,f) = f 67 | 68 | edges:: Graph -> [Edge] 69 | edges g = [(v,v')| v<-vertices g, v'<-out g v] 70 | 71 | rev_edges:: Graph -> [Edge] 72 | rev_edges g = [(v',v)| v<-vertices g, v'<-out g v] 73 | 74 | reverse_graph:: Graph -> Graph 75 | reverse_graph g@(sz,_) = mk_graph sz (rev_edges g) 76 | 77 | 78 | -- `t_close' takes the transitive closure of a graph; `scc' returns the 79 | -- strongly connected components of the graph and `top_sort' topologically 80 | -- sorts the graph. Note that the array is given one more element in order 81 | -- to avoid problems with empty arrays. 82 | 83 | t_close:: Graph -> Graph 84 | t_close g@(sz,_) = (sz,\v->ar!v) 85 | where 86 | ar = listArray (0,sz) ([postorder(dff' [v] g)| v<-vertices g]++[und]) 87 | und = error "t_close" 88 | 89 | scc:: Graph -> GForest 90 | scc g = dff' (reverse (top_sort (reverse_graph g))) g 91 | 92 | top_sort:: Graph -> [Int] 93 | top_sort = postorder . dff 94 | 95 | 96 | -- `dff' computes the depth-first forest. It works by unrolling the 97 | -- potentially infinite tree from each of the vertices with `generate_g' and 98 | -- then pruning out the duplicates. 99 | 100 | dff:: Graph -> GForest 101 | dff g = dff' (vertices g) g 102 | 103 | dff':: [Int] -> Graph -> GForest 104 | dff' vs (_bs, f) = prune (map (generate_g f) vs) 105 | 106 | generate_g:: (Int->[Int]) -> Int -> GTree 107 | generate_g f v = GNode v (map (generate_g f) (f v)) 108 | 109 | prune:: GForest -> GForest 110 | prune ts = snd(chop(empty_int,ts)) 111 | where 112 | empty_int:: Set Int 113 | empty_int = Set.empty 114 | 115 | chop:: (Set Int,GForest) -> (Set Int,GForest) 116 | chop p@(_, []) = p 117 | chop (vstd,GNode v ts:us) = 118 | if v `Set.member` vstd 119 | then chop (vstd,us) 120 | else let vstd1 = Set.insert v vstd 121 | (vstd2,ts') = chop (vstd1,ts) 122 | (vstd3,us') = chop (vstd2,us) 123 | in 124 | (vstd3,GNode v ts' : us') 125 | 126 | 127 | {-- Some simple test functions 128 | 129 | test:: Graph Char 130 | test = mk_graph (char_bds ('a','h')) (mk_pairs "eefggfgegdhfhged") 131 | where 132 | mk_pairs [] = [] 133 | mk_pairs (a:b:l) = (a,b):mk_pairs l 134 | 135 | -} 136 | -------------------------------------------------------------------------------- /src/Data/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2005, Paul Johnson 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in 13 | the documentation and/or other materials provided with the 14 | distribution. 15 | 16 | * Neither the name of the Ranged Sets project nor the names of its 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 24 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 25 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 26 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 27 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 28 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 29 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /src/Data/Ranged.hs: -------------------------------------------------------------------------------- 1 | module Data.Ranged ( 2 | module Data.Ranged.Boundaries, 3 | module Data.Ranged.Ranges, 4 | module Data.Ranged.RangedSet 5 | ) where 6 | 7 | import Data.Ranged.Boundaries 8 | import Data.Ranged.Ranges 9 | import Data.Ranged.RangedSet 10 | -------------------------------------------------------------------------------- /src/Data/Ranged/Boundaries.hs: -------------------------------------------------------------------------------- 1 | ----------------------------------------------------------------------------- 2 | -- | 3 | -- Module : Data.Ranged.Boundaries 4 | -- Copyright : (c) Paul Johnson 2006 5 | -- License : BSD-style 6 | -- Maintainer : paul@cogito.org.uk 7 | -- Stability : experimental 8 | -- Portability : portable 9 | -- 10 | ----------------------------------------------------------------------------- 11 | 12 | module Data.Ranged.Boundaries ( 13 | DiscreteOrdered (..), 14 | enumAdjacent, 15 | boundedAdjacent, 16 | boundedBelow, 17 | Boundary (..), 18 | above, 19 | (/>/) 20 | ) where 21 | 22 | import Data.Ratio 23 | import Data.Word 24 | 25 | infix 4 />/ 26 | 27 | {- | 28 | Distinguish between dense and sparse ordered types. A dense type is 29 | one in which any two values @v1 < v2@ have a third value @v3@ such that 30 | @v1 < v3 < v2@. 31 | 32 | In theory the floating types are dense, although in practice they can only have 33 | finitely many values. This class treats them as dense. 34 | 35 | Tuples up to 4 members are declared as instances. Larger tuples may be added 36 | if necessary. 37 | 38 | Most values of sparse types have an @adjacentBelow@, such that, for all x: 39 | 40 | > case adjacentBelow x of 41 | > Just x1 -> adjacent x1 x 42 | > Nothing -> True 43 | 44 | The exception is for bounded types when @x == lowerBound@. For dense types 45 | @adjacentBelow@ always returns 'Nothing'. 46 | 47 | This approach was suggested by Ben Rudiak-Gould on comp.lang.functional. 48 | -} 49 | 50 | class Ord a => DiscreteOrdered a where 51 | -- | Two values @x@ and @y@ are adjacent if @x < y@ and there does not 52 | -- exist a third value between them. Always @False@ for dense types. 53 | adjacent :: a -> a -> Bool 54 | -- | The value immediately below the argument, if it can be determined. 55 | adjacentBelow :: a -> Maybe a 56 | 57 | 58 | -- Implementation note: the precise rules about unbounded enumerated vs 59 | -- bounded enumerated types are difficult to express using Haskell 98, so 60 | -- the prelude types are listed individually here. 61 | 62 | instance DiscreteOrdered Bool where 63 | adjacent = boundedAdjacent 64 | adjacentBelow = boundedBelow 65 | 66 | instance DiscreteOrdered Ordering where 67 | adjacent = boundedAdjacent 68 | adjacentBelow = boundedBelow 69 | 70 | instance DiscreteOrdered Char where 71 | adjacent = boundedAdjacent 72 | adjacentBelow = boundedBelow 73 | 74 | instance DiscreteOrdered Int where 75 | adjacent = boundedAdjacent 76 | adjacentBelow = boundedBelow 77 | 78 | instance DiscreteOrdered Integer where 79 | adjacent = enumAdjacent 80 | adjacentBelow = Just . pred 81 | 82 | instance DiscreteOrdered Double where 83 | adjacent _ _ = False 84 | adjacentBelow = const Nothing 85 | 86 | instance DiscreteOrdered Float where 87 | adjacent _ _ = False 88 | adjacentBelow = const Nothing 89 | 90 | instance (Integral a) => DiscreteOrdered (Ratio a) where 91 | adjacent _ _ = False 92 | adjacentBelow = const Nothing 93 | 94 | instance Ord a => DiscreteOrdered [a] where 95 | adjacent _ _ = False 96 | adjacentBelow = const Nothing 97 | 98 | instance (Ord a, DiscreteOrdered b) => DiscreteOrdered (a, b) 99 | where 100 | adjacent (x1, x2) (y1, y2) = (x1 == y1) && adjacent x2 y2 101 | adjacentBelow (x1, x2) = do -- Maybe monad 102 | x2' <- adjacentBelow x2 103 | return (x1, x2') 104 | 105 | instance (Ord a, Ord b, DiscreteOrdered c) => DiscreteOrdered (a, b, c) 106 | where 107 | adjacent (x1, x2, x3) (y1, y2, y3) = 108 | (x1 == y1) && (x2 == y2) && adjacent x3 y3 109 | adjacentBelow (x1, x2, x3) = do -- Maybe monad 110 | x3' <- adjacentBelow x3 111 | return (x1, x2, x3') 112 | 113 | instance (Ord a, Ord b, Ord c, DiscreteOrdered d) => 114 | DiscreteOrdered (a, b, c, d) 115 | where 116 | adjacent (x1, x2, x3, x4) (y1, y2, y3, y4) = 117 | (x1 == y1) && (x2 == y2) && (x3 == y3) && adjacent x4 y4 118 | adjacentBelow (x1, x2, x3, x4) = do -- Maybe monad 119 | x4' <- adjacentBelow x4 120 | return (x1, x2, x3, x4') 121 | 122 | instance DiscreteOrdered Word8 where 123 | adjacent x y = x + 1 == y 124 | adjacentBelow 0 = Nothing 125 | adjacentBelow x = Just (x-1) 126 | 127 | 128 | -- | Check adjacency for sparse enumerated types (i.e. where there 129 | -- is no value between @x@ and @succ x@). 130 | enumAdjacent :: (Ord a, Enum a) => a -> a -> Bool 131 | enumAdjacent x y = (succ x == y) 132 | 133 | -- | Check adjacency, allowing for case where x = maxBound. Use as the 134 | -- definition of "adjacent" for bounded enumerated types such as Int and Char. 135 | boundedAdjacent :: (Ord a, Enum a) => a -> a -> Bool 136 | boundedAdjacent x y = if x < y then succ x == y else False 137 | 138 | 139 | -- | The usual implementation of 'adjacentBelow' for bounded enumerated types. 140 | boundedBelow :: (Eq a, Enum a, Bounded a) => a -> Maybe a 141 | boundedBelow x = if x == minBound then Nothing else Just $ pred x 142 | 143 | {- | 144 | A Boundary is a division of an ordered type into values above 145 | and below the boundary. No value can sit on a boundary. 146 | 147 | Known bug: for Bounded types 148 | 149 | * @BoundaryAbove maxBound < BoundaryAboveAll@ 150 | 151 | * @BoundaryBelow minBound > BoundaryBelowAll@ 152 | 153 | This is incorrect because there are no possible values in 154 | between the left and right sides of these inequalities. 155 | -} 156 | 157 | data Boundary a = 158 | -- | The argument is the highest value below the boundary. 159 | BoundaryAbove a | 160 | -- | The argument is the lowest value above the boundary. 161 | BoundaryBelow a | 162 | -- | The boundary above all values. 163 | BoundaryAboveAll | 164 | -- | The boundary below all values. 165 | BoundaryBelowAll 166 | deriving (Show) 167 | 168 | -- | True if the value is above the boundary, false otherwise. 169 | above :: Ord v => Boundary v -> v -> Bool 170 | above (BoundaryAbove b) v = v > b 171 | above (BoundaryBelow b) v = v >= b 172 | above BoundaryAboveAll _ = False 173 | above BoundaryBelowAll _ = True 174 | 175 | -- | Same as 'above', but with the arguments reversed for more intuitive infix 176 | -- usage. 177 | (/>/) :: Ord v => v -> Boundary v -> Bool 178 | (/>/) = flip above 179 | 180 | instance (DiscreteOrdered a) => Eq (Boundary a) where 181 | b1 == b2 = compare b1 b2 == EQ 182 | 183 | instance (DiscreteOrdered a) => Ord (Boundary a) where 184 | -- Comparison alogrithm based on brute force and ignorance: 185 | -- enumerate all combinations. 186 | 187 | compare boundary1 boundary2 = 188 | case boundary1 of 189 | BoundaryAbove b1 -> 190 | case boundary2 of 191 | BoundaryAbove b2 -> compare b1 b2 192 | BoundaryBelow b2 -> 193 | if b1 < b2 194 | then 195 | if adjacent b1 b2 then EQ else LT 196 | else GT 197 | BoundaryAboveAll -> LT 198 | BoundaryBelowAll -> GT 199 | BoundaryBelow b1 -> 200 | case boundary2 of 201 | BoundaryAbove b2 -> 202 | if b1 > b2 203 | then 204 | if adjacent b2 b1 then EQ else GT 205 | else LT 206 | BoundaryBelow b2 -> compare b1 b2 207 | BoundaryAboveAll -> LT 208 | BoundaryBelowAll -> GT 209 | BoundaryAboveAll -> 210 | case boundary2 of 211 | BoundaryAboveAll -> EQ 212 | _ -> GT 213 | BoundaryBelowAll -> 214 | case boundary2 of 215 | BoundaryBelowAll -> EQ 216 | _ -> LT 217 | -------------------------------------------------------------------------------- /src/Data/Ranged/RangedSet.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | 3 | module Data.Ranged.RangedSet ( 4 | -- ** Ranged Set Type 5 | RSet, 6 | rSetRanges, 7 | -- ** Ranged Set construction functions and their preconditions 8 | makeRangedSet, 9 | unsafeRangedSet, 10 | validRangeList, 11 | normaliseRangeList, 12 | rSingleton, 13 | rSetUnfold, 14 | -- ** Predicates 15 | rSetIsEmpty, 16 | rSetIsFull, 17 | (-?-), rSetHas, 18 | (-<=-), rSetIsSubset, 19 | (-<-), rSetIsSubsetStrict, 20 | -- ** Set Operations 21 | (-\/-), rSetUnion, 22 | (-/\-), rSetIntersection, 23 | (-!-), rSetDifference, 24 | rSetNegation, 25 | -- ** Useful Sets 26 | rSetEmpty, 27 | rSetFull, 28 | ) where 29 | 30 | import Data.Ranged.Boundaries 31 | import Data.Ranged.Ranges 32 | #if !MIN_VERSION_base(4,11,0) 33 | import Data.Semigroup 34 | #endif 35 | 36 | import qualified Data.List as List 37 | 38 | infixl 7 -/\- 39 | infixl 6 -\/-, -!- 40 | infixl 5 -<=-, -<-, -?- 41 | 42 | -- | An RSet (for Ranged Set) is a list of ranges. The ranges must be sorted 43 | -- and not overlap. 44 | newtype DiscreteOrdered v => RSet v = RSet {rSetRanges :: [Range v]} 45 | deriving (Eq, Show, Ord) 46 | 47 | instance DiscreteOrdered a => Semigroup (RSet a) where 48 | (<>) = rSetUnion 49 | 50 | instance DiscreteOrdered a => Monoid (RSet a) where 51 | mempty = rSetEmpty 52 | mappend = (<>) 53 | 54 | -- | Determine if the ranges in the list are both in order and non-overlapping. 55 | -- If so then they are suitable input for the unsafeRangedSet function. 56 | validRangeList :: DiscreteOrdered v => [Range v] -> Bool 57 | validRangeList rs = and $ 58 | all (\ (Range lower upper) -> lower <= upper) rs : 59 | zipWith (\ (Range _ upper1) (Range lower2 _) -> upper1 <= lower2) rs (drop 1 rs) 60 | 61 | 62 | -- | Rearrange and merge the ranges in the list so that they are in order and 63 | -- non-overlapping. 64 | normaliseRangeList :: DiscreteOrdered v => [Range v] -> [Range v] 65 | normaliseRangeList = normalise . List.sort . filter (not . rangeIsEmpty) 66 | 67 | 68 | -- Private routine: normalise a range list that is known to be already sorted. 69 | -- This precondition is not checked. 70 | normalise :: DiscreteOrdered v => [Range v] -> [Range v] 71 | normalise (r1:r2:rs) = 72 | if overlap r1 r2 73 | then normalise $ 74 | Range (rangeLower r1) 75 | (max (rangeUpper r1) (rangeUpper r2)) 76 | : rs 77 | else r1 : (normalise $ r2 : rs) 78 | where 79 | overlap (Range _ upper1) (Range lower2 _) = upper1 >= lower2 80 | 81 | normalise rs = rs 82 | 83 | 84 | -- | Create a new Ranged Set from a list of ranges. The list may contain 85 | -- ranges that overlap or are not in ascending order. 86 | makeRangedSet :: DiscreteOrdered v => [Range v] -> RSet v 87 | makeRangedSet = RSet . normaliseRangeList 88 | 89 | 90 | -- | Create a new Ranged Set from a list of ranges. @validRangeList ranges@ 91 | -- must return @True@. This precondition is not checked. 92 | unsafeRangedSet :: DiscreteOrdered v => [Range v] -> RSet v 93 | unsafeRangedSet = RSet 94 | 95 | -- | Create a Ranged Set from a single element. 96 | rSingleton :: DiscreteOrdered v => v -> RSet v 97 | rSingleton v = unsafeRangedSet [singletonRange v] 98 | 99 | -- | True if the set has no members. 100 | rSetIsEmpty :: DiscreteOrdered v => RSet v -> Bool 101 | rSetIsEmpty = null . rSetRanges 102 | 103 | 104 | -- | True if the negation of the set has no members. 105 | rSetIsFull :: DiscreteOrdered v => RSet v -> Bool 106 | rSetIsFull = rSetIsEmpty . rSetNegation 107 | 108 | 109 | -- | True if the value is within the ranged set. Infix precedence is left 5. 110 | rSetHas, (-?-) :: DiscreteOrdered v => RSet v -> v -> Bool 111 | rSetHas (RSet ls) value = rSetHas1 ls 112 | where 113 | rSetHas1 [] = False 114 | rSetHas1 (r:rs) 115 | | value />/ rangeLower r = rangeHas r value || rSetHas1 rs 116 | | otherwise = False 117 | 118 | (-?-) = rSetHas 119 | 120 | -- | True if the first argument is a subset of the second argument, or is 121 | -- equal. 122 | -- 123 | -- Infix precedence is left 5. 124 | rSetIsSubset, (-<=-) :: DiscreteOrdered v => RSet v -> RSet v -> Bool 125 | rSetIsSubset rs1 rs2 = rSetIsEmpty (rs1 -!- rs2) 126 | (-<=-) = rSetIsSubset 127 | 128 | 129 | -- | True if the first argument is a strict subset of the second argument. 130 | -- 131 | -- Infix precedence is left 5. 132 | rSetIsSubsetStrict, (-<-) :: DiscreteOrdered v => RSet v -> RSet v -> Bool 133 | rSetIsSubsetStrict rs1 rs2 = 134 | rSetIsEmpty (rs1 -!- rs2) 135 | && not (rSetIsEmpty (rs2 -!- rs1)) 136 | 137 | (-<-) = rSetIsSubsetStrict 138 | 139 | -- | Set union for ranged sets. Infix precedence is left 6. 140 | rSetUnion, (-\/-) :: DiscreteOrdered v => RSet v -> RSet v -> RSet v 141 | -- Implementation note: rSetUnion merges the two lists into a single 142 | -- sorted list and then calls normalise to combine overlapping ranges. 143 | rSetUnion (RSet ls1) (RSet ls2) = RSet $ normalise $ merge ls1 ls2 144 | where 145 | merge ms1 [] = ms1 146 | merge [] ms2 = ms2 147 | merge ms1@(h1:t1) ms2@(h2:t2) = 148 | if h1 < h2 149 | then h1 : merge t1 ms2 150 | else h2 : merge ms1 t2 151 | 152 | (-\/-) = rSetUnion 153 | 154 | -- | Set intersection for ranged sets. Infix precedence is left 7. 155 | rSetIntersection, (-/\-) :: DiscreteOrdered v => RSet v -> RSet v -> RSet v 156 | rSetIntersection (RSet ls1) (RSet ls2) = 157 | RSet $ filter (not . rangeIsEmpty) $ merge ls1 ls2 158 | where 159 | merge ms1@(h1:t1) ms2@(h2:t2) = 160 | rangeIntersection h1 h2 161 | : if rangeUpper h1 < rangeUpper h2 162 | then merge t1 ms2 163 | else merge ms1 t2 164 | merge _ _ = [] 165 | 166 | (-/\-) = rSetIntersection 167 | 168 | 169 | -- | Set difference. Infix precedence is left 6. 170 | rSetDifference, (-!-) :: DiscreteOrdered v => RSet v -> RSet v -> RSet v 171 | rSetDifference rs1 rs2 = rs1 -/\- (rSetNegation rs2) 172 | (-!-) = rSetDifference 173 | 174 | 175 | -- | Set negation. 176 | rSetNegation :: DiscreteOrdered a => RSet a -> RSet a 177 | rSetNegation set = RSet $ ranges1 $ setBounds1 178 | where 179 | ranges1 (b1:b2:bs) = Range b1 b2 : ranges1 bs 180 | ranges1 [BoundaryAboveAll] = [] 181 | ranges1 [b] = [Range b BoundaryAboveAll] 182 | ranges1 _ = [] 183 | setBounds1 = case setBounds of 184 | (BoundaryBelowAll : bs) -> bs 185 | _ -> BoundaryBelowAll : setBounds 186 | setBounds = bounds $ rSetRanges set 187 | bounds (r:rs) = rangeLower r : rangeUpper r : bounds rs 188 | bounds _ = [] 189 | 190 | -- | The empty set. 191 | rSetEmpty :: DiscreteOrdered a => RSet a 192 | rSetEmpty = RSet [] 193 | 194 | -- | The set that contains everything. 195 | rSetFull :: DiscreteOrdered a => RSet a 196 | rSetFull = RSet [Range BoundaryBelowAll BoundaryAboveAll] 197 | 198 | -- | Construct a range set. 199 | rSetUnfold :: DiscreteOrdered a => 200 | Boundary a 201 | -- ^ A first lower boundary. 202 | -> (Boundary a -> Boundary a) 203 | -- ^ A function from a lower boundary to an upper boundary, which must 204 | -- return a result greater than the argument (not checked). 205 | -> (Boundary a -> Maybe (Boundary a)) 206 | -- ^ A function from a lower boundary to @Maybe@ the successor lower 207 | -- boundary, which must return a result greater than the argument 208 | -- (not checked). If ranges overlap then they will be merged. 209 | -> RSet a 210 | rSetUnfold bound upperFunc succFunc = RSet $ normalise $ ranges1 bound 211 | where 212 | ranges1 b = 213 | Range b (upperFunc b) 214 | : case succFunc b of 215 | Just b2 -> ranges1 b2 216 | Nothing -> [] 217 | -------------------------------------------------------------------------------- /src/Data/Ranged/Ranges.hs: -------------------------------------------------------------------------------- 1 | ----------------------------------------------------------------------------- 2 | -- 3 | -- Module : Data.Ranged.Ranges 4 | -- Copyright : (c) Paul Johnson 2006 5 | -- License : BSD-style 6 | -- Maintainer : paul@cogito.org.uk 7 | -- Stability : experimental 8 | -- Portability : portable 9 | -- 10 | ----------------------------------------------------------------------------- 11 | 12 | -- | A range has an upper and lower boundary. 13 | module Data.Ranged.Ranges ( 14 | -- ** Construction 15 | Range (..), 16 | emptyRange, 17 | fullRange, 18 | -- ** Predicates 19 | rangeIsEmpty, 20 | rangeIsFull, 21 | rangeOverlap, 22 | rangeEncloses, 23 | rangeSingletonValue, 24 | -- ** Membership 25 | rangeHas, 26 | rangeListHas, 27 | -- ** Set Operations 28 | singletonRange, 29 | rangeIntersection, 30 | rangeUnion, 31 | rangeDifference, 32 | ) where 33 | 34 | import Data.Ranged.Boundaries 35 | 36 | -- | A Range has upper and lower boundaries. 37 | data Range v = Range {rangeLower, rangeUpper :: Boundary v} 38 | 39 | instance (DiscreteOrdered a) => Eq (Range a) where 40 | r1 == r2 = (rangeIsEmpty r1 && rangeIsEmpty r2) || 41 | (rangeLower r1 == rangeLower r2 && 42 | rangeUpper r1 == rangeUpper r2) 43 | 44 | 45 | instance (DiscreteOrdered a) => Ord (Range a) where 46 | compare r1 r2 47 | | r1 == r2 = EQ 48 | | rangeIsEmpty r1 = LT 49 | | rangeIsEmpty r2 = GT 50 | | otherwise = compare (rangeLower r1, rangeUpper r1) 51 | (rangeLower r2, rangeUpper r2) 52 | 53 | instance (Show a, DiscreteOrdered a) => Show (Range a) where 54 | show r 55 | | rangeIsEmpty r = "Empty" 56 | | rangeIsFull r = "All x" 57 | | otherwise = 58 | case rangeSingletonValue r of 59 | Just v -> "x == " ++ show v 60 | Nothing -> lowerBound ++ "x" ++ upperBound 61 | where 62 | lowerBound = case rangeLower r of 63 | BoundaryBelowAll -> "" 64 | BoundaryBelow v -> show v ++ " <= " 65 | BoundaryAbove v -> show v ++ " < " 66 | BoundaryAboveAll -> error "show Range: lower bound is BoundaryAboveAll" 67 | upperBound = case rangeUpper r of 68 | BoundaryBelowAll -> error "show Range: upper bound is BoundaryBelowAll" 69 | BoundaryBelow v -> " < " ++ show v 70 | BoundaryAbove v -> " <= " ++ show v 71 | BoundaryAboveAll -> "" 72 | 73 | 74 | -- | True if the value is within the range. 75 | rangeHas :: Ord v => Range v -> v -> Bool 76 | 77 | rangeHas (Range b1 b2) v = 78 | (v />/ b1) && not (v />/ b2) 79 | 80 | 81 | -- | True if the value is within one of the ranges. 82 | rangeListHas :: Ord v => 83 | [Range v] -> v -> Bool 84 | rangeListHas ls v = or $ map (\r -> rangeHas r v) ls 85 | 86 | 87 | -- | The empty range 88 | emptyRange :: Range v 89 | emptyRange = Range BoundaryAboveAll BoundaryBelowAll 90 | 91 | 92 | -- | The full range. All values are within it. 93 | fullRange :: Range v 94 | fullRange = Range BoundaryBelowAll BoundaryAboveAll 95 | 96 | 97 | -- | A range containing a single value 98 | singletonRange :: v -> Range v 99 | singletonRange v = Range (BoundaryBelow v) (BoundaryAbove v) 100 | 101 | 102 | -- | If the range is a singleton, returns @Just@ the value. Otherwise returns 103 | -- @Nothing@. 104 | -- 105 | -- Known bug: This always returns @Nothing@ for ranges including 106 | -- @BoundaryBelowAll@ or @BoundaryAboveAll@. For bounded types this can be 107 | -- incorrect. For instance, the following range only contains one value: 108 | -- 109 | -- > Range (BoundaryBelow maxBound) BoundaryAboveAll 110 | rangeSingletonValue :: DiscreteOrdered v => Range v -> Maybe v 111 | rangeSingletonValue (Range (BoundaryBelow v1) (BoundaryBelow v2)) 112 | | adjacent v1 v2 = Just v1 113 | | otherwise = Nothing 114 | rangeSingletonValue (Range (BoundaryBelow v1) (BoundaryAbove v2)) 115 | | v1 == v2 = Just v1 116 | | otherwise = Nothing 117 | rangeSingletonValue (Range (BoundaryAbove v1) (BoundaryBelow v2)) = 118 | do 119 | v2' <- adjacentBelow v2 120 | v2'' <- adjacentBelow v2' 121 | if v1 == v2'' then return v2' else Nothing 122 | rangeSingletonValue (Range (BoundaryAbove v1) (BoundaryAbove v2)) 123 | | adjacent v1 v2 = Just v2 124 | | otherwise = Nothing 125 | rangeSingletonValue (Range _ _) = Nothing 126 | 127 | -- | A range is empty unless its upper boundary is greater than its lower 128 | -- boundary. 129 | rangeIsEmpty :: DiscreteOrdered v => Range v -> Bool 130 | rangeIsEmpty (Range lower upper) = upper <= lower 131 | 132 | 133 | -- | A range is full if it contains every possible value. 134 | rangeIsFull :: DiscreteOrdered v => Range v -> Bool 135 | rangeIsFull = (== fullRange) 136 | 137 | -- | Two ranges overlap if their intersection is non-empty. 138 | rangeOverlap :: DiscreteOrdered v => Range v -> Range v -> Bool 139 | rangeOverlap r1 r2 = 140 | not (rangeIsEmpty r1) 141 | && not (rangeIsEmpty r2) 142 | && not (rangeUpper r1 <= rangeLower r2 || rangeUpper r2 <= rangeLower r1) 143 | 144 | 145 | -- | The first range encloses the second if every value in the second range is 146 | -- also within the first range. If the second range is empty then this is 147 | -- always true. 148 | rangeEncloses :: DiscreteOrdered v => Range v -> Range v -> Bool 149 | rangeEncloses r1 r2 = 150 | (rangeLower r1 <= rangeLower r2 && rangeUpper r2 <= rangeUpper r1) 151 | || rangeIsEmpty r2 152 | 153 | 154 | -- | Intersection of two ranges, if any. 155 | rangeIntersection :: DiscreteOrdered v => Range v -> Range v -> Range v 156 | rangeIntersection r1@(Range lower1 upper1) r2@(Range lower2 upper2) 157 | | rangeIsEmpty r1 || rangeIsEmpty r2 = emptyRange 158 | | otherwise = Range (max lower1 lower2) (min upper1 upper2) 159 | 160 | 161 | -- | Union of two ranges. Returns one or two results. 162 | -- 163 | -- If there are two results then they are guaranteed to have a non-empty 164 | -- gap in between, but may not be in ascending order. 165 | rangeUnion :: DiscreteOrdered v => Range v -> Range v -> [Range v] 166 | rangeUnion r1@(Range lower1 upper1) r2@(Range lower2 upper2) 167 | | rangeIsEmpty r1 = [r2] 168 | | rangeIsEmpty r2 = [r1] 169 | | otherwise = 170 | if touching then [Range lower upper] else [r1, r2] 171 | where 172 | touching = (max lower1 lower2) <= (min upper1 upper2) 173 | lower = min lower1 lower2 174 | upper = max upper1 upper2 175 | 176 | 177 | -- | @range1@ minus @range2@. Returns zero, one or two results. Multiple 178 | -- results are guaranteed to have non-empty gaps in between, but may not be in 179 | -- ascending order. 180 | rangeDifference :: DiscreteOrdered v => Range v -> Range v -> [Range v] 181 | 182 | rangeDifference r1@(Range lower1 upper1) (Range lower2 upper2) = 183 | -- There are six possibilities 184 | -- 1: r2 completely less than r1 185 | -- 2: r2 overlaps bottom of r1 186 | -- 3: r2 encloses r1 187 | -- 4: r1 encloses r2 188 | -- 5: r2 overlaps top of r1 189 | -- 6: r2 completely greater than r1 190 | if intersects 191 | then -- Cases 2,3,4,5 192 | filter (not . rangeIsEmpty) [Range lower1 lower2, Range upper2 upper1] 193 | else -- Cases 1, 6 194 | [r1] 195 | where 196 | intersects = (max lower1 lower2) < (min upper1 upper2) 197 | -------------------------------------------------------------------------------- /src/Info.hs: -------------------------------------------------------------------------------- 1 | -- ----------------------------------------------------------------------------- 2 | -- 3 | -- Info.hs, part of Alex 4 | -- 5 | -- (c) Simon Marlow 2003 6 | -- 7 | -- Generate a human-readable rendition of the state machine. 8 | -- 9 | -- ----------------------------------------------------------------------------} 10 | 11 | module Info (infoDFA) where 12 | 13 | import AbsSyn 14 | import qualified Data.Map as Map 15 | import qualified Data.IntMap as IntMap 16 | import Util 17 | 18 | -- ----------------------------------------------------------------------------- 19 | -- Generate a human readable dump of the state machine 20 | 21 | infoDFA :: Int -> String -> DFA SNum Code -> ShowS 22 | infoDFA _ func_nm dfa 23 | = str "Scanner : " . str func_nm . nl 24 | . str "States : " . shows (length dfa_list) . nl 25 | . nl . infoDFA' 26 | where 27 | dfa_list = Map.toAscList (dfa_states dfa) 28 | 29 | infoDFA' = interleave_shows nl (map infoStateN dfa_list) 30 | 31 | infoStateN (i,s) = str "State " . shows i . nl . infoState s 32 | 33 | infoState :: State SNum Code -> ShowS 34 | infoState (State accs out) 35 | = foldr (.) id (map infoAccept accs) 36 | . infoArr out . nl 37 | 38 | infoArr out 39 | = char '\t' . interleave_shows (str "\n\t") 40 | (map infoTransition (IntMap.toAscList out)) 41 | 42 | infoAccept (Acc p act lctx rctx) 43 | = str "\tAccept" . paren (shows p) . space 44 | . outputLCtx lctx . space 45 | . showRCtx rctx 46 | . (case act of 47 | Nothing -> id 48 | Just code -> str " { " . str code . str " }") 49 | . nl 50 | 51 | infoTransition (char',state) 52 | = str (ljustify 8 (show char')) 53 | . str " -> " 54 | . shows state 55 | 56 | outputLCtx Nothing 57 | = id 58 | outputLCtx (Just set) 59 | = paren (show set ++) . char '^' 60 | 61 | -- outputArr arr 62 | -- = str "Array.array " . shows (bounds arr) . space 63 | -- . shows (assocs arr) 64 | -------------------------------------------------------------------------------- /src/ParseMonad.hs: -------------------------------------------------------------------------------- 1 | -- ----------------------------------------------------------------------------- 2 | -- 3 | -- ParseMonad.hs, part of Alex 4 | -- 5 | -- (c) Simon Marlow 2003 6 | -- 7 | -- ----------------------------------------------------------------------------} 8 | 9 | module ParseMonad ( 10 | AlexInput, alexInputPrevChar, alexGetChar, alexGetByte, 11 | AlexPosn(..), alexStartPos, 12 | Warning(..), warnIfNullable, 13 | P, runP, StartCode, failP, lookupSMac, lookupRMac, newSMac, newRMac, 14 | setStartCode, getStartCode, getInput, setInput, 15 | ) where 16 | 17 | import Control.Monad ( liftM, ap, when ) 18 | import Data.Map ( Map ) 19 | import Data.List.NonEmpty ( pattern (:|) ) 20 | import Data.Word ( Word8 ) 21 | import qualified Data.Map as Map 22 | 23 | import AbsSyn hiding ( StartCode ) 24 | import CharSet ( CharSet ) 25 | import UTF8 26 | 27 | -- ----------------------------------------------------------------------------- 28 | -- The input type 29 | --import Codec.Binary.UTF8.Light as UTF8 30 | 31 | type Byte = Word8 32 | 33 | type AlexInput = (AlexPosn, -- current position, 34 | Char, -- previous char 35 | [Byte], 36 | String) -- current input string 37 | 38 | alexInputPrevChar :: AlexInput -> Char 39 | alexInputPrevChar (_,c,_,_) = c 40 | 41 | 42 | alexGetChar :: AlexInput -> Maybe (Char, AlexInput) 43 | alexGetChar (_, _, [], []) = Nothing 44 | alexGetChar (p, _, [], c:s) = p' `seq` Just (c, (p', c, [], s)) 45 | where 46 | p' = alexMove p c 47 | alexGetChar (_, _ , _:_, _) = undefined -- hide compiler warning 48 | 49 | alexGetByte :: AlexInput -> Maybe (Byte, AlexInput) 50 | alexGetByte (p, c, b:bs, s) = Just (b, (p, c, bs, s)) 51 | alexGetByte (_, _, [], []) = Nothing 52 | alexGetByte (p, _, [], c:s) = p' `seq` Just (b, (p', c, bs, s)) 53 | where 54 | p' = alexMove p c 55 | b :| bs = UTF8.encode c 56 | 57 | -- ----------------------------------------------------------------------------- 58 | -- Token positions 59 | 60 | -- `Posn' records the location of a token in the input text. It has three 61 | -- fields: the address (number of characters preceding the token), line number 62 | -- and column of a token within the file. `start_pos' gives the position of the 63 | -- start of the file and `eof_pos' a standard encoding for the end of file. 64 | -- `move_pos' calculates the new position after traversing a given character, 65 | -- assuming the usual eight character tab stops. 66 | 67 | data AlexPosn = AlexPn !Int !Int !Int 68 | deriving (Eq, Show, Ord) 69 | 70 | alexStartPos :: AlexPosn 71 | alexStartPos = AlexPn 0 1 1 72 | 73 | alexMove :: AlexPosn -> Char -> AlexPosn 74 | alexMove (AlexPn a l c) '\t' = AlexPn (a+1) l (((c+7) `div` 8)*8+1) 75 | alexMove (AlexPn a l _) '\n' = AlexPn (a+1) (l+1) 1 76 | alexMove (AlexPn a l c) _ = AlexPn (a+1) l (c+1) 77 | 78 | -- ----------------------------------------------------------------------------- 79 | -- Alex lexing/parsing monad 80 | 81 | data Warning 82 | = WarnNullableRExp 83 | { _warnPos :: AlexPosn -- ^ The position of the code following the regex. 84 | , _warnText :: String -- ^ Warning text. 85 | } 86 | 87 | type ParseError = (Maybe AlexPosn, String) 88 | type StartCode = Int 89 | 90 | data PState = PState 91 | { warnings :: [Warning] -- ^ Stack of warnings, top = last warning. 92 | , smac_env :: Map String CharSet 93 | , rmac_env :: Map String RExp 94 | , startcode :: Int 95 | , input :: AlexInput 96 | } 97 | 98 | newtype P a = P { unP :: PState -> Either ParseError (PState,a) } 99 | 100 | instance Functor P where 101 | fmap = liftM 102 | 103 | instance Applicative P where 104 | pure a = P $ \env -> Right (env,a) 105 | (<*>) = ap 106 | 107 | instance Monad P where 108 | (P m) >>= k = P $ \env -> case m env of 109 | Left err -> Left err 110 | Right (env',ok) -> unP (k ok) env' 111 | return = pure 112 | 113 | -- | Run the parser on given input. 114 | runP :: String 115 | -- ^ Input string. 116 | -> (Map String CharSet, Map String RExp) 117 | -- ^ Character set and regex definitions. 118 | -> P a 119 | -- ^ Parsing computation. 120 | -> Either ParseError ([Warning], a) 121 | -- ^ List of warnings in first-to-last order, result. 122 | runP str (senv,renv) (P p) 123 | = case p initial_state of 124 | Left err -> Left err 125 | Right (s, a) -> Right (reverse (warnings s), a) 126 | where 127 | initial_state = PState 128 | { warnings = [] 129 | , smac_env = senv 130 | , rmac_env = renv 131 | , startcode = 0 132 | , input = (alexStartPos, '\n', [], str) 133 | } 134 | 135 | failP :: String -> P a 136 | failP str = P $ \PState{ input = (p,_,_,_) } -> Left (Just p,str) 137 | 138 | -- Macros are expanded during parsing, to simplify the abstract 139 | -- syntax. The parsing monad passes around two environments mapping 140 | -- macro names to sets and regexps respectively. 141 | 142 | lookupSMac :: (AlexPosn,String) -> P CharSet 143 | lookupSMac (posn,smac) 144 | = P $ \s@PState{ smac_env = senv } -> 145 | case Map.lookup smac senv of 146 | Just ok -> Right (s,ok) 147 | Nothing -> Left (Just posn, "unknown set macro: $" ++ smac) 148 | 149 | lookupRMac :: String -> P RExp 150 | lookupRMac rmac 151 | = P $ \s@PState{ rmac_env = renv } -> 152 | case Map.lookup rmac renv of 153 | Just ok -> Right (s,ok) 154 | Nothing -> Left (Nothing, "unknown regex macro: %" ++ rmac) 155 | 156 | newSMac :: String -> CharSet -> P () 157 | newSMac smac set 158 | = P $ \s -> Right (s{smac_env = Map.insert smac set (smac_env s)}, ()) 159 | 160 | newRMac :: String -> RExp -> P () 161 | newRMac rmac rexp 162 | = P $ \s -> Right (s{rmac_env = Map.insert rmac rexp (rmac_env s)}, ()) 163 | 164 | setStartCode :: StartCode -> P () 165 | setStartCode sc = P $ \s -> Right (s{ startcode = sc }, ()) 166 | 167 | getStartCode :: P StartCode 168 | getStartCode = P $ \s -> Right (s, startcode s) 169 | 170 | getInput :: P AlexInput 171 | getInput = P $ \s -> Right (s, input s) 172 | 173 | setInput :: AlexInput -> P () 174 | setInput inp = P $ \s -> Right (s{ input = inp }, ()) 175 | 176 | -- | Add a warning if given regular expression is nullable 177 | -- unless the user wrote the regex 'Eps'. 178 | warnIfNullable 179 | :: RExp -- ^ Regular expression. 180 | -> AlexPosn -- ^ Position associated to regular expression. 181 | -> P () 182 | -- If the user wrote @()@, they wanted to match the empty sequence! 183 | -- Thus, skip the warning then. 184 | warnIfNullable Eps _ = return () 185 | warnIfNullable r pos = when (nullable r) $ P $ \ s -> 186 | Right (s{ warnings = WarnNullableRExp pos w : warnings s}, ()) 187 | where 188 | w = unwords 189 | [ "Regular expression" 190 | , show r 191 | , "matches the empty string." 192 | ] 193 | -------------------------------------------------------------------------------- /src/Parser.y: -------------------------------------------------------------------------------- 1 | { 2 | -- ----------------------------------------------------------------------------- 3 | -- 4 | -- Parser.y, part of Alex 5 | -- 6 | -- (c) Simon Marlow 2003 7 | -- 8 | -- ----------------------------------------------------------------------------- 9 | 10 | {-# OPTIONS_GHC -w #-} 11 | 12 | module Parser ( parse, P ) where 13 | import AbsSyn 14 | import Scan 15 | import CharSet 16 | import ParseMonad hiding ( StartCode ) 17 | 18 | import Data.Char 19 | --import Debug.Trace 20 | } 21 | 22 | %tokentype { Token } 23 | 24 | %name parse 25 | 26 | %monad { P } { (>>=) } { return } 27 | %lexer { lexer } { T _ EOFT } 28 | 29 | %token 30 | '.' { T _ (SpecialT '.') } 31 | ';' { T _ (SpecialT ';') } 32 | '<' { T _ (SpecialT '<') } 33 | '>' { T _ (SpecialT '>') } 34 | ',' { T _ (SpecialT ',') } 35 | '$' { T _ (SpecialT '$') } 36 | '|' { T _ (SpecialT '|') } 37 | '*' { T _ (SpecialT '*') } 38 | '+' { T _ (SpecialT '+') } 39 | '?' { T _ (SpecialT '?') } 40 | '{' { T _ (SpecialT '{') } 41 | '}' { T _ (SpecialT '}') } 42 | '(' { T _ (SpecialT '(') } 43 | ')' { T _ (SpecialT ')') } 44 | '#' { T _ (SpecialT '#') } 45 | '~' { T _ (SpecialT '~') } 46 | '-' { T _ (SpecialT '-') } 47 | '[' { T _ (SpecialT '[') } 48 | ']' { T _ (SpecialT ']') } 49 | '^' { T _ (SpecialT '^') } 50 | '/' { T _ (SpecialT '/') } 51 | ZERO { T _ ZeroT } 52 | STRING { T _ (StringT $$) } 53 | BIND { T _ (BindT $$) } 54 | ID { T _ (IdT $$) } 55 | CODE { T _ (CodeT _) } 56 | CHAR { T _ (CharT $$) } 57 | NUM { T _ (NumT $$) } 58 | SMAC { T _ (SMacT _) } 59 | RMAC { T _ (RMacT $$) } 60 | SMAC_DEF { T _ (SMacDefT $$) } 61 | RMAC_DEF { T _ (RMacDefT $$) } 62 | WRAPPER { T _ WrapperT } 63 | ENCODING { T _ EncodingT } 64 | ACTIONTYPE { T _ ActionTypeT } 65 | TOKENTYPE { T _ TokenTypeT } 66 | TYPECLASS { T _ TypeClassT } 67 | %% 68 | 69 | alex :: { (Maybe (AlexPosn,Code), [Directive], Scanner, Maybe (AlexPosn,Code)) } 70 | : maybe_code directives macdefs scanner maybe_code { ($1,$2,$4,$5) } 71 | 72 | maybe_code :: { Maybe (AlexPosn,Code) } 73 | : CODE { case $1 of T pos (CodeT code) -> 74 | Just (pos,code) } 75 | | {- empty -} { Nothing } 76 | 77 | directives :: { [Directive] } 78 | : directive directives { $1 : $2 } 79 | | {- empty -} { [] } 80 | 81 | directive :: { Directive } 82 | : WRAPPER STRING { WrapperDirective $2 } 83 | | ENCODING encoding { EncodingDirective $2 } 84 | | ACTIONTYPE STRING { ActionType $2 } 85 | | TOKENTYPE STRING { TokenType $2 } 86 | | TYPECLASS STRING { TypeClass $2 } 87 | 88 | encoding :: { Encoding } 89 | : STRING {% lookupEncoding $1 } 90 | 91 | macdefs :: { () } 92 | : macdef macdefs { () } 93 | | {- empty -} { () } 94 | 95 | -- hack: the lexer looks for the '=' in a macro definition, because there 96 | -- doesn't seem to be a way to formulate the grammar here to avoid a 97 | -- conflict (it needs LR(2) rather than LR(1) to find the '=' and distinguish 98 | -- an SMAC/RMAC at the beginning of a definition from an SMAC/RMAC that is 99 | -- part of a regexp in the previous definition). 100 | macdef :: { () } 101 | : SMAC_DEF set {% newSMac $1 $2 } 102 | | RMAC_DEF rexp {% newRMac $1 $2 } 103 | 104 | scanner :: { Scanner } 105 | : BIND tokendefs { Scanner $1 $2 } 106 | 107 | tokendefs :: { [RECtx] } 108 | : tokendef tokendefs { $1 ++ $2 } 109 | | {- empty -} { [] } 110 | 111 | tokendef :: { [RECtx] } 112 | : startcodes rule { [ replaceCodes $1 (snd $2) ] } 113 | | startcodes '{' rules '}' { map (replaceCodes $1) $3 } 114 | | rule {% do 115 | let (pos, res@(RECtx _ _ e _ _)) = $1 116 | warnIfNullable e pos 117 | return [ res ] 118 | } 119 | 120 | rule :: { (AlexPosn, RECtx) } 121 | : context rhs { let 122 | (l, e, r) = $1 123 | (pos, code) = $2 124 | in (pos, RECtx [] l e r code) 125 | } 126 | 127 | rules :: { [RECtx] } 128 | : rule rules { snd $1 : $2 } 129 | | {- empty -} { [] } 130 | 131 | startcodes :: { [(String,StartCode)] } 132 | : '<' startcodes0 '>' { $2 } 133 | 134 | startcodes0 :: { [(String,StartCode)] } 135 | : startcode ',' startcodes0 { ($1,0) : $3 } 136 | | startcode { [($1,0)] } 137 | 138 | startcode :: { String } 139 | : ZERO { "0" } 140 | | ID { $1 } 141 | 142 | rhs :: { (AlexPosn, Maybe Code) } 143 | : CODE { case $1 of T pos (CodeT code) -> (pos, Just code) } 144 | | ';' { (tokPosn $1, Nothing) } 145 | 146 | context :: { Maybe CharSet, RExp, RightContext RExp } 147 | : left_ctx rexp right_ctx { (Just $1,$2,$3) } 148 | | rexp right_ctx { (Nothing,$1,$2) } 149 | 150 | left_ctx :: { CharSet } 151 | : '^' { charSetSingleton '\n' } 152 | | set '^' { $1 } 153 | 154 | right_ctx :: { RightContext RExp } 155 | : '$' { RightContextRExp (Ch (charSetSingleton '\n')) } 156 | | '/' rexp { RightContextRExp $2 } 157 | | '/' CODE { RightContextCode (case $2 of 158 | T _ (CodeT code) -> code) } 159 | | {- empty -} { NoRightContext } 160 | 161 | rexp :: { RExp } 162 | : alt '|' rexp { $1 :|| $3 } 163 | | alt { $1 } 164 | 165 | alt :: { RExp } 166 | : alt term { $1 :%% $2 } 167 | | term { $1 } 168 | 169 | term :: { RExp } 170 | : rexp0 rep { $2 $1 } 171 | | rexp0 { $1 } 172 | 173 | rep :: { RExp -> RExp } 174 | : '*' { Star } 175 | | '+' { Plus } 176 | | '?' { Ques } 177 | | begin_mult '{' mult '}' { $3 } 178 | -- A bit counterintuitively, we need @begin_mult@ already before the left brace, 179 | -- not just before @mult@. This might be due to the lookahead in the parser. 180 | 181 | -- Enter the "multiplicity" lexer mode to scan number literals 182 | begin_mult :: { () } 183 | : {- empty -} {% setStartCode multiplicity } 184 | 185 | -- Parse a numeric multiplicity. 186 | mult :: { RExp -> RExp } 187 | : NUM { repeat_rng $1 Nothing } 188 | | NUM ',' { repeat_rng $1 (Just Nothing) } 189 | | NUM ',' NUM { repeat_rng $1 (Just (Just $3)) } 190 | 191 | rexp0 :: { RExp } 192 | : '(' ')' { Eps } 193 | | STRING { foldr (:%%) Eps 194 | (map (Ch . charSetSingleton) $1) } 195 | | RMAC {% lookupRMac $1 } 196 | | set { Ch $1 } 197 | | '(' rexp ')' { $2 } 198 | 199 | set :: { CharSet } 200 | : set '#' set0 { $1 `charSetMinus` $3 } 201 | | set0 { $1 } 202 | 203 | set0 :: { CharSet } 204 | : CHAR { charSetSingleton $1 } 205 | | CHAR '-' CHAR { charSetRange $1 $3 } 206 | | smac {% lookupSMac $1 } 207 | | '[' sets ']' { foldr charSetUnion emptyCharSet $2 } 208 | 209 | -- [^sets] is the same as '. # [sets]' 210 | -- The upshot is that [^set] does *not* match a newline character, 211 | -- which seems much more useful than just taking the complement. 212 | | '[' '^' sets ']' 213 | {% do { dot <- lookupSMac (tokPosn $1, "."); 214 | return (dot `charSetMinus` 215 | foldr charSetUnion emptyCharSet $3) }} 216 | 217 | -- ~set is the same as '. # set' 218 | | '~' set0 {% do { dot <- lookupSMac (tokPosn $1, "."); 219 | return (dot `charSetMinus` $2) } } 220 | 221 | sets :: { [CharSet] } 222 | : set sets { $1 : $2 } 223 | | {- empty -} { [] } 224 | 225 | smac :: { (AlexPosn,String) } 226 | : '.' { (tokPosn $1, ".") } 227 | | SMAC { case $1 of T p (SMacT s) -> (p, s) } 228 | 229 | { 230 | happyError :: P a 231 | happyError = failP "parse error" 232 | 233 | -- ----------------------------------------------------------------------------- 234 | -- Utils 235 | 236 | digit c = ord c - ord '0' 237 | 238 | repeat_rng :: Int -> Maybe (Maybe Int) -> (RExp->RExp) 239 | repeat_rng n (Nothing) re = foldr (:%%) Eps (replicate n re) 240 | repeat_rng n (Just Nothing) re = foldr (:%%) (Star re) (replicate n re) 241 | repeat_rng n (Just (Just m)) re = intl :%% rst 242 | where 243 | intl = repeat_rng n Nothing re 244 | rst = foldr (\re re'->Ques(re :%% re')) Eps (replicate (m-n) re) 245 | 246 | replaceCodes codes rectx = rectx{ reCtxStartCodes = codes } 247 | 248 | lookupEncoding :: String -> P Encoding 249 | lookupEncoding s = case map toLower s of 250 | "iso-8859-1" -> return Latin1 251 | "latin1" -> return Latin1 252 | "utf-8" -> return UTF8 253 | "utf8" -> return UTF8 254 | _ -> failP ("encoding " ++ show s ++ " not supported") 255 | 256 | } 257 | -------------------------------------------------------------------------------- /src/Parser.y.boot: -------------------------------------------------------------------------------- 1 | -- placeholder 2 | -------------------------------------------------------------------------------- /src/Scan.x: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------------- 2 | -- ALEX SCANNER AND LITERATE PREPROCESSOR 3 | -- 4 | -- This Script defines the grammar used to generate the Alex scanner and a 5 | -- preprocessing scanner for dealing with literate scripts. The actions for 6 | -- the Alex scanner are given separately in the Alex module. 7 | -- 8 | -- See the Alex manual for a discussion of the scanners defined here. 9 | -- 10 | -- Chris Dornan, Aug-95, 4-Jun-96, 10-Jul-96, 29-Sep-97 11 | ------------------------------------------------------------------------------- 12 | 13 | { 14 | {-# LANGUAGE CPP #-} 15 | 16 | -- Switch off partiality warning about 'head' and 'tail' 17 | #if __GLASGOW_HASKELL__ >= 908 18 | {-# OPTIONS_GHC -Wno-x-partial #-} 19 | #endif 20 | 21 | module Scan (lexer, AlexPosn(..), Token(..), Tkn(..), tokPosn, multiplicity) where 22 | 23 | import Data.Char 24 | import ParseMonad 25 | -- import Debug.Trace 26 | } 27 | 28 | $digit = 0-9 29 | $hexdig = [0-9 A-F a-f] 30 | $octal = 0-7 31 | $lower = a-z 32 | $upper = A-Z 33 | $alpha = [$upper $lower] 34 | $alphanum = [$alpha $digit] 35 | $idchar = [$alphanum \_ \'] 36 | 37 | $special = [\.\;\,\$\|\*\+\?\#\~\-\{\}\(\)\[\]\^\/] 38 | $graphic = $printable # $white 39 | $nonspecial = $graphic # [$special \%] 40 | 41 | @id = $alpha $idchar* 42 | @smac = \$ @id | \$ \{ @id \} 43 | @rmac = \@ @id | \@ \{ @id \} 44 | 45 | @comment = "--".* 46 | @ws = $white+ | @comment 47 | 48 | alex :- 49 | 50 | @ws { skip } -- white space; ignore 51 | 52 | <0> \" [^\"]* \" { string } 53 | <0> (@id @ws?)? \:\- { bind } 54 | <0> \{ / (\n | [^$digit]) { code } 55 | <0> $special { special } -- note: matches { 56 | <0> \% "wrapper" { wrapper } 57 | <0> \% "encoding" { encoding } 58 | <0> \% "action" { actionty } 59 | <0> \% "token" { tokenty } 60 | <0> \% "typeclass" { typeclass } 61 | 62 | <0> \\ $digit+ { decch } 63 | <0> \\ x $hexdig+ { hexch } 64 | <0> \\ o $octal+ { octch } 65 | <0> \\ $printable { escape } 66 | <0> $nonspecial # [\<] { char } 67 | <0> @smac { smac } 68 | <0> @rmac { rmac } 69 | 70 | <0> @smac @ws? \= { smacdef } 71 | <0> @rmac @ws? \= { rmacdef } 72 | 73 | -- identifiers are allowed to be unquoted in startcode lists 74 | <0> \< { special `andBegin` startcodes } 75 | 0 { zero } 76 | @id { startcode } 77 | \, { special } 78 | \> { special `andBegin` afterstartcodes } 79 | 80 | -- After a <..> startcode sequence, we can have a {...} grouping of rules, 81 | -- so don't try to interpret the opening { as a code block. 82 | \{ (\n | [^$digit ]) { special `andBegin` 0 } 83 | () { skip `andBegin` 0 } -- note: empty pattern 84 | 85 | -- Numeric literals are only lexed in multiplicity braces e.g. {nnn,mmm}. 86 | -- Switching to the @multiplicity@ lexer state happens in the parser. 87 | $digit+ { num } 88 | \, { special } 89 | \} { special `andBegin` 0 } 90 | 91 | { 92 | 93 | -- ----------------------------------------------------------------------------- 94 | -- Token type 95 | 96 | data Token = T AlexPosn Tkn 97 | deriving Show 98 | 99 | tokPosn (T p _) = p 100 | 101 | data Tkn 102 | = SpecialT Char 103 | | CodeT String 104 | | ZeroT 105 | | IdT String 106 | | StringT String 107 | | BindT String 108 | | CharT Char 109 | | SMacT String 110 | | RMacT String 111 | | SMacDefT String 112 | | RMacDefT String 113 | | NumT Int 114 | | WrapperT 115 | | EncodingT 116 | | ActionTypeT 117 | | TokenTypeT 118 | | TypeClassT 119 | | EOFT 120 | deriving Show 121 | 122 | -- ----------------------------------------------------------------------------- 123 | -- Token functions 124 | 125 | special, zero, string, bind, escape, decch, hexch, octch, char :: Action 126 | smac, rmac, smacdef, rmacdef, startcode, wrapper, encoding :: Action 127 | actionty, tokenty, typeclass :: Action 128 | special (p,_,str) _ = return $ T p (SpecialT (head str)) 129 | zero (p,_,_) _ = return $ T p ZeroT 130 | string (p,_,str) ln = return $ T p (StringT (extract ln str)) 131 | bind (p,_,str) _ = return $ T p (BindT (takeWhile isIdChar str)) 132 | escape (p,_,str) _ = return $ T p (CharT (esc str)) 133 | decch (p,_,str) ln = return $ T p (CharT (do_ech 10 ln (take (ln-1) (drop 1 str)))) 134 | hexch (p,_,str) ln = return $ T p (CharT (do_ech 16 ln (take (ln-2) (drop 2 str)))) 135 | octch (p,_,str) ln = return $ T p (CharT (do_ech 8 ln (take (ln-2) (drop 2 str)))) 136 | char (p,_,str) _ = return $ T p (CharT (head str)) 137 | num (p,_,str) ln = return $ T p $ NumT $ parseInt 10 $ take ln str 138 | smac (p,_,str) ln = return $ T p (SMacT (mac ln str)) 139 | rmac (p,_,str) ln = return $ T p (RMacT (mac ln str)) 140 | smacdef (p,_,str) ln = return $ T p (SMacDefT (macdef ln str)) 141 | rmacdef (p,_,str) ln = return $ T p (RMacDefT (macdef ln str)) 142 | startcode (p,_,str) ln = return $ T p (IdT (take ln str)) 143 | wrapper (p,_,_) _ = return $ T p WrapperT 144 | encoding (p,_,_) _ = return $ T p EncodingT 145 | actionty (p,_,_) _ = return $ T p ActionTypeT 146 | tokenty (p,_,_) _ = return $ T p TokenTypeT 147 | typeclass (p,_,_) _ = return $ T p TypeClassT 148 | 149 | isIdChar :: Char -> Bool 150 | isIdChar c = isAlphaNum c || c `elem` "_'" 151 | 152 | extract :: Int -> String -> String 153 | extract ln str = take (ln-2) (drop 1 str) 154 | 155 | do_ech :: Int -> Int -> String -> Char 156 | do_ech radix _ln str = chr (parseInt radix str) 157 | 158 | mac :: Int -> String -> String 159 | mac ln str = take (ln-1) $ tail str 160 | 161 | -- TODO : replace not . isSpace with (\c -> not (isSpace c) && c /= '=') 162 | macdef :: Int -> String -> String 163 | macdef _ln str = takeWhile (\c -> not (isSpace c) && c /= '=') $ tail str 164 | 165 | esc :: String -> Char 166 | esc str = 167 | case head $ tail str of 168 | 'a' -> '\a' 169 | 'b' -> '\b' 170 | 'f' -> '\f' 171 | 'n' -> '\n' 172 | 'r' -> '\r' 173 | 't' -> '\t' 174 | 'v' -> '\v' 175 | c -> c 176 | 177 | parseInt :: Int -> String -> Int 178 | parseInt radix ds = foldl1 (\n d -> n * radix + d) (map digitToInt ds) 179 | 180 | -- In brace-delimited code, we have to be careful to match braces 181 | -- within the code, but ignore braces inside strings and character 182 | -- literals. We do an approximate job (doing it properly requires 183 | -- implementing a large chunk of the Haskell lexical syntax). 184 | 185 | code :: Action 186 | code (p, _, _inp) _ = do 187 | currentInput <- getInput 188 | go currentInput 1 "" 189 | where 190 | go :: AlexInput -> Int -> String -> P Token 191 | go inp 0 cs = do 192 | setInput inp 193 | return $ T p $ CodeT $ triml $ reverse $ triml $ tail cs 194 | go inp n cs = do 195 | case alexGetChar inp of 196 | Nothing -> err inp 197 | Just (c,inp2) -> 198 | case c of 199 | '{' -> go inp2 (n+1) (c:cs) 200 | '}' -> go inp2 (n-1) (c:cs) 201 | '\'' -> go_char inp2 n (c:cs) 202 | '\"' -> go_str inp2 n (c:cs) '\"' 203 | c2 -> go inp2 n (c2:cs) 204 | 205 | go_char :: AlexInput -> Int -> String -> P Token 206 | -- try to catch multiple occurrences of ' at identifier end 207 | go_char inp n cs@('\'':'\'':_) = go inp n cs 208 | -- try to catch occurrences of ' within an identifier 209 | go_char inp n cs@('\'':c2:_) 210 | | isAlphaNum c2 = go inp n cs 211 | go_char inp n cs = go_str inp n cs '\'' 212 | 213 | go_str :: AlexInput -> Int -> String -> Char -> P Token 214 | go_str inp n cs end = do 215 | case alexGetChar inp of 216 | Nothing -> err inp 217 | Just (c,inp2) 218 | | c == end -> go inp2 n (c:cs) 219 | | otherwise -> 220 | case c of 221 | '\\' -> case alexGetChar inp2 of 222 | Nothing -> err inp2 223 | Just (d,inp3) -> go_str inp3 n (d:c:cs) end 224 | c2 -> go_str inp2 n (c2:cs) end 225 | 226 | err inp = do setInput inp; lexError "lexical error in code fragment" 227 | 228 | triml = dropWhile isSpace 229 | 230 | lexError :: String -> P a 231 | lexError s = do 232 | (_, _, _, input) <- getInput 233 | failP $ s ++ " at " ++ case input of 234 | c:_ -> show c 235 | [] -> "end of file" 236 | 237 | lexer :: (Token -> P a) -> P a 238 | lexer cont = lexToken >>= cont 239 | 240 | lexToken :: P Token 241 | lexToken = do 242 | inp@(p,c,_,s) <- getInput 243 | sc <- getStartCode 244 | case alexScan inp sc of 245 | AlexEOF -> return (T p EOFT) 246 | AlexError _ -> lexError "lexical error" 247 | AlexSkip inp1 _ -> do 248 | setInput inp1 249 | lexToken 250 | AlexToken inp1 len t -> do 251 | setInput inp1 252 | t (p,c,s) len 253 | 254 | type Action = (AlexPosn,Char,String) -> Int -> P Token 255 | 256 | skip :: Action 257 | skip _ _ = lexToken 258 | 259 | andBegin :: Action -> StartCode -> Action 260 | andBegin act sc inp len = setStartCode sc >> act inp len 261 | } 262 | -------------------------------------------------------------------------------- /src/Scan.x.boot: -------------------------------------------------------------------------------- 1 | -- placeholder 2 | -------------------------------------------------------------------------------- /src/UTF8.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | {-# LANGUAGE OverloadedLists #-} 3 | 4 | module UTF8 where 5 | 6 | import Data.Word ( Word8 ) 7 | import Data.Bits ( (.&.), shiftR ) 8 | import Data.Char ( ord ) 9 | 10 | import qualified Data.List.NonEmpty as List1 11 | type List1 = List1.NonEmpty 12 | 13 | {- 14 | -- Could also be imported: 15 | 16 | import Codec.Binary.UTF8.Light as UTF8 17 | 18 | encode :: Char -> [Word8] 19 | encode c = head (UTF8.encodeUTF8' [UTF8.c2w c]) 20 | 21 | -} 22 | 23 | -- | Encode a Haskell String to a list of Word8 values, in UTF8 format. 24 | encode :: Char -> List1 Word8 25 | encode = fmap fromIntegral . go . ord 26 | where 27 | go oc 28 | | oc <= 0x7f = [oc] 29 | 30 | | oc <= 0x7ff = [ 0xc0 + (oc `shiftR` 6) 31 | , 0x80 + oc .&. 0x3f 32 | ] 33 | 34 | | oc <= 0xffff = [ 0xe0 + (oc `shiftR` 12) 35 | , 0x80 + ((oc `shiftR` 6) .&. 0x3f) 36 | , 0x80 + oc .&. 0x3f 37 | ] 38 | | otherwise = [ 0xf0 + (oc `shiftR` 18) 39 | , 0x80 + ((oc `shiftR` 12) .&. 0x3f) 40 | , 0x80 + ((oc `shiftR` 6) .&. 0x3f) 41 | , 0x80 + oc .&. 0x3f 42 | ] 43 | -------------------------------------------------------------------------------- /src/Util.hs: -------------------------------------------------------------------------------- 1 | -- ----------------------------------------------------------------------------- 2 | -- 3 | -- Util.hs, part of Alex 4 | -- 5 | -- (c) Simon Marlow 2003 6 | -- 7 | -- General utilities used in various parts of Alex 8 | -- 9 | -- ----------------------------------------------------------------------------} 10 | 11 | module Util 12 | ( str 13 | , char 14 | , nl 15 | , paren 16 | , interleave_shows 17 | , space 18 | , cjustify 19 | , ljustify 20 | , rjustify 21 | , spaces 22 | , hline 23 | ) where 24 | 25 | -- Pretty-printing utilities 26 | 27 | str :: String -> String -> String 28 | str = showString 29 | 30 | char :: Char -> String -> String 31 | char c = (c :) 32 | 33 | nl :: String -> String 34 | nl = char '\n' 35 | 36 | paren :: (String -> String) -> String -> String 37 | paren s = char '(' . s . char ')' 38 | 39 | interleave_shows :: (String -> String) -> [String -> String] -> String -> String 40 | interleave_shows _ [] = id 41 | interleave_shows s xs = foldr1 (\a b -> a . s . b) xs 42 | 43 | space :: String -> String 44 | space = char ' ' 45 | 46 | cjustify, ljustify, rjustify :: Int -> String -> String 47 | cjustify n s = spaces halfm ++ s ++ spaces (m - halfm) 48 | where 49 | m = n - length s 50 | halfm = m `div` 2 51 | ljustify n s = s ++ spaces (max 0 (n - length s)) 52 | rjustify n s = spaces (n - length s) ++ s 53 | 54 | spaces :: Int -> String 55 | spaces n = replicate n ' ' 56 | 57 | hline :: String 58 | hline = replicate 77 '-' 59 | -------------------------------------------------------------------------------- /src/ghc_hooks.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void ErrorHdrHook(chan) 4 | FILE *chan; 5 | {} 6 | -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | resolver: lts-21.25 2 | packages: 3 | - . 4 | -------------------------------------------------------------------------------- /test-debug.hs: -------------------------------------------------------------------------------- 1 | import System.Process (system) 2 | import System.Exit (exitWith) 3 | 4 | main = system "make -k -C tests clean tests-debug" >>= exitWith 5 | -------------------------------------------------------------------------------- /test.hs: -------------------------------------------------------------------------------- 1 | import System.Process (system) 2 | import System.Exit (exitWith) 3 | 4 | main = system "make -k -C tests clean tests" >>= exitWith 5 | -------------------------------------------------------------------------------- /tests/Makefile: -------------------------------------------------------------------------------- 1 | # NOTE: `cabal test` will take care to build the local `alex` 2 | # executable and place it into $PATH for us to pick up. 3 | # 4 | # If it doesn't look like the alex binary in $PATH comes from the 5 | # build tree, then we'll fall back to pointing to 6 | # ../dist/build/alex/alex to support running tests via "runghc 7 | # Setup.hs test". 8 | # 9 | # If ALEX has been set outside, e.g. in the environment, we trust this setting. 10 | # This way, we can pass in the correct Alex executable from a CI environment 11 | # without danger of it being "fixed" by the logic below. 12 | # [2021-06-15, PR #189](https://github.com/simonmar/alex/pull/189) 13 | # 14 | ifndef ALEX 15 | ALEX=$(shell which alex) 16 | ifeq "$(filter $(dir $(shell pwd))%,$(ALEX))" "" 17 | ALEX=../dist/build/alex/alex 18 | endif 19 | endif 20 | 21 | # NOTE: This assumes that a working `ghc` is on $PATH; this may not 22 | # necessarily be the same GHC used by `cabal` for building `alex`. 23 | # 24 | # Again, if HC has been set in the environment (e.g. by the CI), we keep this setting. 25 | # [2021-06-15, PR #189](https://github.com/simonmar/alex/pull/189) 26 | # 27 | HC ?= ghc 28 | 29 | # Some GHC warnings are only available from a certain version on 30 | # Get the GHC version 31 | GHC_VERSION:=$(shell $(HC) --numeric-version) 32 | GHC_VERSION_WORDS=$(subst ., ,$(GHC_VERSION)) 33 | GHC_MAJOR_VERSION=$(word 1,$(GHC_VERSION_WORDS)) 34 | GHC_MINOR_VERSION=$(word 2,$(GHC_VERSION_WORDS)) 35 | 36 | # Text dependency comes with GHC from 8.4 onwards 37 | GHC_SHIPS_WITH_TEXT:=$(shell if [ $(GHC_MAJOR_VERSION) -gt 8 -o $(GHC_MAJOR_VERSION) -ge 8 -a $(GHC_MINOR_VERSION) -ge 4 ]; then echo "yes"; else echo "no"; fi) 38 | 39 | # Turn off x-partial warning (new in GHC 9.8) 40 | WARNS_DEP_GHC_GTEQ_9_8:=$(shell if [ $(GHC_MAJOR_VERSION) -gt 9 -o $(GHC_MAJOR_VERSION) -ge 9 -a $(GHC_MINOR_VERSION) -ge 8 ]; then echo "-Wno-x-partial"; fi) 41 | 42 | HC_OPTS=-Wall $(WARNS_DEP_GHC_GTEQ_9_8) -fwarn-incomplete-uni-patterns -Werror 43 | 44 | .PRECIOUS: %.n.hs %.g.hs %.o %.exe %.bin 45 | 46 | ifeq "$(TARGETPLATFORM)" "i386-unknown-mingw32" 47 | HS_PROG_EXT = .exe 48 | else 49 | HS_PROG_EXT = .bin 50 | endif 51 | 52 | TESTS = \ 53 | basic_typeclass.x \ 54 | basic_typeclass_bytestring.x \ 55 | default_typeclass.x \ 56 | gscan_typeclass.x \ 57 | issue_71.x \ 58 | issue_119.x \ 59 | issue_141.x \ 60 | issue_197.x \ 61 | issue_262.x \ 62 | issue_269_part1.x \ 63 | issue_269_part2.x \ 64 | monad_typeclass.x \ 65 | monad_typeclass_bytestring.x \ 66 | monadUserState_typeclass.x \ 67 | monadUserState_typeclass_bytestring.x \ 68 | null.x \ 69 | posn_typeclass.x \ 70 | posn_typeclass_bytestring.x \ 71 | strict_typeclass.x \ 72 | simple.x \ 73 | tokens.x \ 74 | tokens_bytestring.x \ 75 | tokens_bytestring_unicode.x \ 76 | tokens_gscan.x \ 77 | tokens_monad_bytestring.x \ 78 | tokens_monadUserState_bytestring.x \ 79 | tokens_posn.x \ 80 | tokens_posn_bytestring.x \ 81 | tokens_scan_user.x \ 82 | tokens_strict_bytestring.x \ 83 | unicode.x 84 | 85 | ifeq "$(GHC_SHIPS_WITH_TEXT)" "yes" 86 | TEXT_DEP = -package text 87 | 88 | TEXT_TESTS = \ 89 | strict_text_typeclass.x \ 90 | posn_typeclass_strict_text.x \ 91 | tokens_monadUserState_strict_text.x 92 | else 93 | TEXT_DEP = 94 | 95 | TEXT_TESTS = 96 | endif 97 | 98 | # NOTE: `cabal` will set the `alex_datadir` env-var accordingly before invoking the test-suite 99 | #TEST_ALEX_OPTS = --template=../data/ 100 | TEST_ALEX_OPTS= 101 | 102 | %.n.hs : %.x 103 | $(ALEX) $(TEST_ALEX_OPTS) $< -o $@ 104 | 105 | %.g.hs : %.x 106 | $(ALEX) $(TEST_ALEX_OPTS) -g $< -o $@ 107 | 108 | %.d.hs : %.x 109 | $(ALEX) $(TEST_ALEX_OPTS) --debug $< -o $@ 110 | 111 | CLEAN_FILES += *.n.hs *.g.hs *.d.hs *.info *.hi *.o *.bin *.exe 112 | 113 | TESTS_HS = $(shell echo $(TESTS) $(TEXT_TESTS) | sed -e 's/\([^\. ]*\)\.\(l\)\{0,1\}x/\1.n.hs \1.g.hs/g') 114 | TESTS_HS_DEBUG = $(shell echo $(TESTS) $(TEXT_TESTS) | sed -e 's/\([^\. ]*\)\.\(l\)\{0,1\}x/\1.d.hs/g') 115 | TESTS_HS_ALL = $(TESTS_HS) $(TESTS_HS_DEBUG) 116 | 117 | BASIC_TESTS = $(patsubst %.hs, %.run, $(TESTS_HS)) 118 | DEBUG_TESTS = $(patsubst %.hs, %.run, $(TESTS_HS_DEBUG)) 119 | ALL_TESTS = $(BASIC_TESTS) $(DEBUG_TESTS) 120 | 121 | %.run : %$(HS_PROG_EXT) 122 | ./$< 123 | 124 | %$(HS_PROG_EXT) : %.hs 125 | $(HC) $(HC_OPTS) -package array -package bytestring $(TEXT_DEP) $($*_LD_OPTS) $< -o $@ 126 | 127 | all :: $(ALL_TESTS) 128 | 129 | tests :: $(BASIC_TESTS) 130 | 131 | tests-debug :: $(DEBUG_TESTS) 132 | 133 | .PHONY: clean debug 134 | 135 | clean: 136 | rm -f $(CLEAN_FILES) 137 | 138 | debug : 139 | @echo ALEX = $(ALEX) 140 | @echo HC_OPTS = $(HC_OPTS) 141 | @echo ALL_TESTS = $(ALL_TESTS) 142 | -------------------------------------------------------------------------------- /tests/basic_typeclass.x: -------------------------------------------------------------------------------- 1 | { 2 | 3 | module Main (main) where 4 | import System.Exit 5 | import Prelude hiding (lex) 6 | 7 | } 8 | 9 | %wrapper "basic" 10 | %token "Token s" 11 | %typeclass "Read s" 12 | 13 | tokens :- 14 | 15 | [a-b]+$ { idtoken 0 } 16 | [c-d]+/"." { idtoken 1 } 17 | [e-f]+/{ tokpred } { idtoken 2 } 18 | ^[g-h]+$ { idtoken 3 } 19 | ^[i-j]+/"." { idtoken 4 } 20 | ^[k-l]+/{ tokpred } { idtoken 5 } 21 | [m-n]+$ { idtoken 6 } 22 | [o-p]+/"." { idtoken 7 } 23 | [q-r]+/{ tokpred } { idtoken 8 } 24 | [0-1]^[s-t]+$ { idtoken 9 } 25 | [2-3]^[u-v]+/"." { idtoken 10 } 26 | [4-5]^[w-x]+/{ tokpred } { idtoken 11 } 27 | [y-z]+ { idtoken 12 } 28 | [A-B]+$ ; 29 | [C-D]+/"." ; 30 | [E-F]+/{ tokpred } ; 31 | ^[G-H]+$ ; 32 | ^[I-J]+/"." ; 33 | ^[K-L]+/{ tokpred } ; 34 | [M-N]+$ ; 35 | [O-P]+/"." ; 36 | [Q-R]+/{ tokpred } ; 37 | [0-1]^[S-T]+$ ; 38 | [2-3]^[U-V]+/"." ; 39 | [4-5]^[W-X]+/{ tokpred } ; 40 | [Y-Z]+ ; 41 | \. ; 42 | [ \n\t\r]+ ; 43 | [0-9] ; 44 | 45 | { 46 | 47 | tokpred :: () -> AlexInput -> Int -> AlexInput -> Bool 48 | tokpred _ _ _ _ = True 49 | 50 | idtoken :: Read s => Int -> String -> Token s 51 | idtoken n s = Id n (read ("\"" ++ s ++ "\"")) 52 | 53 | data Token s = Id Int s 54 | deriving (Show, Ord, Eq) 55 | 56 | lex :: Read s => String -> [Token s] 57 | lex = alexScanTokens 58 | 59 | input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw" 60 | 61 | tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji", 62 | Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst", 63 | Id 10 "uuvu", Id 11 "xxw"] 64 | 65 | main :: IO () 66 | main = 67 | let 68 | result :: [Token String] 69 | result = lex input 70 | in do 71 | if result /= tokens 72 | then exitFailure 73 | else exitWith ExitSuccess 74 | 75 | } 76 | -------------------------------------------------------------------------------- /tests/basic_typeclass_bytestring.x: -------------------------------------------------------------------------------- 1 | { 2 | 3 | {-# LANGUAGE OverloadedStrings #-} 4 | 5 | module Main (main) where 6 | import System.Exit 7 | import Prelude hiding (lex) 8 | 9 | import Data.ByteString.Lazy.Char8 as Lazy 10 | 11 | } 12 | 13 | %wrapper "basic-bytestring" 14 | %token "Token s" 15 | %typeclass "Read s" 16 | 17 | tokens :- 18 | 19 | [a-b]+$ { idtoken 0 } 20 | [c-d]+/"." { idtoken 1 } 21 | [e-f]+/{ tokpred } { idtoken 2 } 22 | ^[g-h]+$ { idtoken 3 } 23 | ^[i-j]+/"." { idtoken 4 } 24 | ^[k-l]+/{ tokpred } { idtoken 5 } 25 | [m-n]+$ { idtoken 6 } 26 | [o-p]+/"." { idtoken 7 } 27 | [q-r]+/{ tokpred } { idtoken 8 } 28 | [0-1]^[s-t]+$ { idtoken 9 } 29 | [2-3]^[u-v]+/"." { idtoken 10 } 30 | [4-5]^[w-x]+/{ tokpred } { idtoken 11 } 31 | [y-z]+ { idtoken 12 } 32 | [A-B]+$ ; 33 | [C-D]+/"." ; 34 | [E-F]+/{ tokpred } ; 35 | ^[G-H]+$ ; 36 | ^[I-J]+/"." ; 37 | ^[K-L]+/{ tokpred } ; 38 | [M-N]+$ ; 39 | [O-P]+/"." ; 40 | [Q-R]+/{ tokpred } ; 41 | [0-1]^[S-T]+$ ; 42 | [2-3]^[U-V]+/"." ; 43 | [4-5]^[W-X]+/{ tokpred } ; 44 | [Y-Z]+ ; 45 | \. ; 46 | [ \n\t\r]+ ; 47 | [0-9] ; 48 | 49 | { 50 | 51 | tokpred :: () -> AlexInput -> Int -> AlexInput -> Bool 52 | tokpred _ _ _ _ = True 53 | 54 | idtoken :: Read s => Int -> Lazy.ByteString -> Token s 55 | idtoken n s = Id n (read ("\"" ++ (Lazy.unpack s) ++ "\"")) 56 | 57 | data Token s = Id Int s 58 | deriving (Show, Ord, Eq) 59 | 60 | lex :: Read s => Lazy.ByteString -> [Token s] 61 | lex = alexScanTokens 62 | 63 | input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw" 64 | 65 | tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji", 66 | Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst", 67 | Id 10 "uuvu", Id 11 "xxw"] 68 | 69 | main :: IO () 70 | main = 71 | let 72 | result :: [Token String] 73 | result = lex input 74 | in do 75 | if result /= tokens 76 | then exitFailure 77 | else exitWith ExitSuccess 78 | 79 | } 80 | -------------------------------------------------------------------------------- /tests/gscan_typeclass.x: -------------------------------------------------------------------------------- 1 | { 2 | 3 | module Main (main) where 4 | import System.Exit 5 | import Prelude hiding (lex) 6 | 7 | } 8 | 9 | %wrapper "gscan" 10 | %token "[Token s]" 11 | %typeclass "Read s" 12 | 13 | tokens :- 14 | 15 | [a-b]+$ { idtoken 0 } 16 | [c-d]+/"." { idtoken 1 } 17 | [e-f]+/{ tokpred } { idtoken 2 } 18 | ^[g-h]+$ { idtoken 3 } 19 | ^[i-j]+/"." { idtoken 4 } 20 | ^[k-l]+/{ tokpred } { idtoken 5 } 21 | [m-n]+$ { idtoken 6 } 22 | [o-p]+/"." { idtoken 7 } 23 | [q-r]+/{ tokpred } { idtoken 8 } 24 | [0-1]^[s-t]+$ { idtoken 9 } 25 | [2-3]^[u-v]+/"." { idtoken 10 } 26 | [4-5]^[w-x]+/{ tokpred } { idtoken 11 } 27 | [y-z]+ { idtoken 12 } 28 | [A-B]+$ ; 29 | [C-D]+/"." ; 30 | [E-F]+/{ tokpred } ; 31 | ^[G-H]+$ ; 32 | ^[I-J]+/"." ; 33 | ^[K-L]+/{ tokpred } ; 34 | [M-N]+$ ; 35 | [O-P]+/"." ; 36 | [Q-R]+/{ tokpred } ; 37 | [0-1]^[S-T]+$ ; 38 | [2-3]^[U-V]+/"." ; 39 | [4-5]^[W-X]+/{ tokpred } ; 40 | [Y-Z]+ ; 41 | \. ; 42 | [ \n\t\r]+ ; 43 | [0-9] ; 44 | 45 | { 46 | 47 | tokpred :: () -> AlexInput -> Int -> AlexInput -> Bool 48 | tokpred _ _ _ _ = True 49 | 50 | idtoken :: Read s => Int -> AlexPosn -> Char -> String -> Int -> 51 | ((Int,state) -> [Token s]) -> (Int,state) -> [Token s] 52 | idtoken n _ _ s len cont st = Id n (read ("\"" ++ take len s ++ "\"")) : cont st 53 | 54 | data Token s = Id Int s deriving Eq 55 | 56 | lex :: Read s => String -> [Token s] 57 | lex str = alexGScan (\_ _ _ _ -> []) (0 :: Int) str 58 | 59 | input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw" 60 | 61 | tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji", 62 | Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst", 63 | Id 10 "uuvu", Id 11 "xxw"] 64 | 65 | main :: IO () 66 | main = 67 | let 68 | result :: [Token String] 69 | result = lex input 70 | in do 71 | if result /= tokens 72 | then exitFailure 73 | else exitWith ExitSuccess 74 | 75 | } 76 | -------------------------------------------------------------------------------- /tests/issue_119.x: -------------------------------------------------------------------------------- 1 | -- -*- haskell -*- 2 | { 3 | -- Issue 119, 4 | -- reported 2017-10-11 by Herbert Valerio Riedel, 5 | -- fixed 2020-01-26 by Andreas Abel. 6 | -- 7 | -- Problem was: the computed token length (in number of characters) 8 | -- attached to AlexToken is tailored to UTF8 encoding and wrong 9 | -- for LATIN1 encoding. 10 | 11 | module Main where 12 | 13 | import Control.Monad (unless) 14 | import qualified Data.ByteString as B 15 | import Data.Word 16 | import System.Exit (exitFailure) 17 | } 18 | 19 | %encoding "latin1" 20 | 21 | :- 22 | 23 | [\x01-\xff]+ { False } 24 | [\x00] { True } 25 | 26 | { 27 | type AlexInput = B.ByteString 28 | 29 | alexGetByte :: AlexInput -> Maybe (Word8,AlexInput) 30 | alexGetByte = B.uncons 31 | 32 | alexInputPrevChar :: AlexInput -> Char 33 | alexInputPrevChar = undefined 34 | 35 | -- generated by @alex@ 36 | alexScan :: AlexInput -> Int -> AlexReturn Bool 37 | 38 | {- 39 | 40 | GOOD cases: 41 | 42 | ("012\NUL3","012","\NUL3",3,3,False) 43 | ("\NUL0","\NUL","0",1,1,True) 44 | ("012","012","",3,3,False) 45 | 46 | BAD case: 47 | 48 | ("0@P`p\128\144\160","0@P`p","",5,8,False) 49 | 50 | expected: 51 | 52 | ("0@P`p\128\144\160","0@P`p\128\144\160","",8,8,False) 53 | 54 | -} 55 | main :: IO () 56 | main = do 57 | go (B.pack [0x30,0x31,0x32,0x00,0x33]) -- GOOD 58 | go (B.pack [0x00,0x30]) -- GOOD 59 | go (B.pack [0x30,0x31,0x32]) -- GOOD 60 | 61 | go (B.pack [0x30,0x40,0x50,0x60,0x70,0x80,0x90,0xa0]) -- WAS: BAD 62 | where 63 | go inp = do 64 | case (alexScan inp 0) of 65 | -- expected invariant: len == B.length inp - B.length inp' 66 | AlexToken inp' len b -> do 67 | let diff = B.length inp - B.length inp' 68 | unless (len == diff) $ do 69 | putStrLn $ "ERROR: reported length and consumed length differ!" 70 | print (inp, B.take len inp, inp', len, diff, b) 71 | exitFailure 72 | _ -> undefined 73 | } 74 | -------------------------------------------------------------------------------- /tests/issue_141.x: -------------------------------------------------------------------------------- 1 | { 2 | -- Issue #141 3 | -- reported 2015-10-20 by Iavor S. Diatchki 4 | -- fixed 2020-01-31 by Andreas Abel 5 | -- 6 | -- Problem was: 7 | -- Only one-digit numbers were accepted in repetition ranges. 8 | 9 | module Main (main) where 10 | 11 | import System.Exit 12 | } 13 | 14 | %wrapper "posn" 15 | %token "Token" 16 | 17 | :- 18 | 19 | -- allow several digits in repetition ranges, e.g. 14 20 | "a"{14,14} { \ _ _ -> A } 21 | [\ \n\t]+ ; 22 | 23 | { 24 | data Token = A 25 | deriving (Eq, Show) 26 | 27 | -- 12345678901234 28 | input = "aaaaaaaaaaaaaa\n" -- fourteen a's 29 | expected_result = [A] 30 | 31 | main :: IO () 32 | main 33 | | result == expected_result = do 34 | exitWith ExitSuccess 35 | | otherwise = do 36 | print $ take 20 result 37 | exitFailure 38 | where 39 | result = alexScanTokens input 40 | } 41 | -------------------------------------------------------------------------------- /tests/issue_197.x: -------------------------------------------------------------------------------- 1 | { 2 | -- Issue #197 3 | -- reported 2022-01-21 by https://github.com/Commelina 4 | -- fixed 2022-01-23 by Andreas Abel & John Ericson 5 | -- 6 | -- Problem was: 7 | -- Surface syntax regressed and could no longer handle character strings 8 | -- that looked like numbers. 9 | 10 | module Main (main) where 11 | 12 | import System.Exit 13 | } 14 | 15 | %wrapper "posn" 16 | %token "Token" 17 | 18 | @iec60559suffix = (32|64|128)[x]? 19 | @any = [01-89]+[x]? 20 | 21 | :- 22 | 23 | $white+ ; 24 | @iec60559suffix { \ _ -> Good } 25 | @any { \ _ -> Bad } 26 | 27 | { 28 | data Token = Good String | Bad String 29 | deriving (Eq, Show) 30 | 31 | input = "32 32x 99 99x 128x" 32 | expected_result = [Good "32", Good "32x", Bad "99", Bad "99x", Good "128x"] 33 | 34 | main :: IO () 35 | main 36 | | result == expected_result = do 37 | exitWith ExitSuccess 38 | | otherwise = do 39 | print result 40 | exitFailure 41 | where 42 | result = alexScanTokens input 43 | } 44 | -------------------------------------------------------------------------------- /tests/issue_262.x: -------------------------------------------------------------------------------- 1 | { 2 | -- https://github.com/haskell/alex/pull/262 3 | -- https://gitlab.haskell.org/ghc/ghc/-/issues/25609 4 | -- 5 | -- Error happens when using alexScan with a lexer that 6 | -- inspects the context. 7 | 8 | {-# LANGUAGE ScopedTypeVariables #-} 9 | 10 | import Control.Exception 11 | import Data.List (isInfixOf) 12 | } 13 | 14 | %wrapper "basic" 15 | 16 | :- 17 | .* / { \state _ _ _ -> state == 'x' } { id } 18 | 19 | { 20 | main :: IO () 21 | main = do 22 | result <- try $ evaluate $ alexScan ('\n', [], "") 0 `seq` () 23 | case result of 24 | Left (e :: SomeException) 25 | | "use alexScanUser instead" `isInfixOf` show e 26 | -> pure () 27 | _ -> error $ "Got unexpected result: " ++ show result 28 | } 29 | -------------------------------------------------------------------------------- /tests/issue_269_part1.x: -------------------------------------------------------------------------------- 1 | { 2 | 3 | -- Issue #269 4 | -- reported 2025-04-02 by Antoine Leblanc (https://github.com/nicuveo) 5 | -- fixed 2025-04-03 by Antoine Leblanc 6 | -- 7 | -- Problem was: 8 | -- The minimizer was not initialized with the proper subsets of 9 | -- states, which could result in different states being erroneously 10 | -- considered equivalent, which in turn could result in the wrong 11 | -- rule being selected at runtime. 12 | -- 13 | -- This version of the test fails with the minimizer as implemented 14 | -- after the changes in 4f0b51b8c370d0dd0f9c65af98282789f1cb035f. 15 | 16 | import Control.Monad (when) 17 | import System.Exit 18 | 19 | } 20 | 21 | %wrapper "basic" 22 | 23 | tokens :- 24 | [abc] { Left } 25 | "abc" { const $ Right "abc" } 26 | 27 | { 28 | 29 | test :: String -> [Either String String] -> IO () 30 | test input expected = 31 | when (expected /= alexScanTokens input) 32 | exitFailure 33 | 34 | main :: IO () 35 | main = do 36 | test "abc" [Right "abc"] 37 | test "bbb" [Left "b", Left "b", Left "b"] 38 | test "bbc" [Left "b", Left "b", Left "c"] 39 | 40 | } 41 | -------------------------------------------------------------------------------- /tests/issue_269_part2.x: -------------------------------------------------------------------------------- 1 | { 2 | 3 | -- Issue #269 4 | -- reported 2025-04-02 by Antoine Leblanc (https://github.com/nicuveo) 5 | -- fixed 2025-04-03 by Antoine Leblanc 6 | -- 7 | -- Problem was: 8 | -- The minimizer was not initialized with the proper subsets of 9 | -- states, which could result in different states being erroneously 10 | -- considered equivalent, which in turn could result in the wrong 11 | -- rule being selected at runtime. 12 | -- 13 | -- This version of the test fails with the minimizer as implemented 14 | -- pre 4f0b51b8c370d0dd0f9c65af98282789f1cb035f. 15 | 16 | import Control.Monad (when) 17 | import System.Exit 18 | 19 | } 20 | 21 | %wrapper "basic" 22 | 23 | tokens :- 24 | "abc" { const $ Right "abc" } 25 | [abc] { Left } 26 | 27 | { 28 | 29 | test :: String -> [Either String String] -> IO () 30 | test input expected = 31 | when (expected /= alexScanTokens input) 32 | exitFailure 33 | 34 | main :: IO () 35 | main = do 36 | test "abc" [Right "abc"] 37 | test "bbb" [Left "b", Left "b", Left "b"] 38 | test "bbc" [Left "b", Left "b", Left "c"] 39 | 40 | } 41 | -------------------------------------------------------------------------------- /tests/issue_71.x: -------------------------------------------------------------------------------- 1 | { 2 | -- Issue #71 3 | -- reported 2015-10-20 by Ian Duncan 4 | -- fixed 2020-01-22 by Andreas Abel 5 | -- 6 | -- Problem was: 7 | -- DFA minimization crashed with "Prelude head: empty list" because 8 | -- empty set of non-accepting states was treated as empty equivalence 9 | -- class of states. 10 | -- 11 | -- Issue #258, 2024-02-27, Andreas Abel: 12 | -- Since GHC 9.4, type 'Symbol' conflicts with 'GHC.Exts.Symbol' 13 | -- which was imported by alex <= 3.5.0.0 14 | -- because of an unqualified import of 'GHC.Exts'. 15 | 16 | module Main (main) where 17 | 18 | import System.Exit 19 | } 20 | 21 | %wrapper "posn" 22 | %token "Symbol" 23 | 24 | $whitespace = [\ \n\t] 25 | @whitespaces = $whitespace* 26 | 27 | :- 28 | 29 | @whitespaces { \ _ _ -> Whitespaces } 30 | "a" { \ _ _ -> A } 31 | 32 | { 33 | -- Calling the token type 'Symbol' will trigger a clash with GHC.Exts.Symbol 34 | -- if the lexer is built with alex <= 3.5.0.0. 35 | 36 | data Symbol = Whitespaces | A 37 | deriving (Eq, Show) 38 | 39 | input = "aa \n\taa \t \n a" 40 | expected_result = [A,A,Whitespaces,A,A,Whitespaces,A] 41 | 42 | main :: IO () 43 | main 44 | -- Since the whitespaces token is nullable, Alex 45 | -- will recognize an infinite number of those 46 | -- at the end of file. This behavior is problematic, 47 | -- but we don't fix it here. 48 | -- We just test here whether the expected result 49 | -- is a prefix of the produced result. 50 | | take (length expected_result) result == expected_result = do 51 | exitWith ExitSuccess 52 | | otherwise = do 53 | print $ take 20 result 54 | exitFailure 55 | where 56 | result = alexScanTokens input 57 | } 58 | -------------------------------------------------------------------------------- /tests/monadUserState_typeclass.x: -------------------------------------------------------------------------------- 1 | { 2 | 3 | module Main (main) where 4 | import System.Exit 5 | import Prelude hiding (lex) 6 | 7 | } 8 | 9 | %wrapper "monadUserState" 10 | %token "Token s" 11 | %typeclass "Read s" 12 | 13 | tokens :- 14 | 15 | [a-b]+$ { idtoken 0 } 16 | [c-d]+/"." { idtoken 1 } 17 | [e-f]+/{ tokpred } { idtoken 2 } 18 | ^[g-h]+$ { idtoken 3 } 19 | ^[i-j]+/"." { idtoken 4 } 20 | ^[k-l]+/{ tokpred } { idtoken 5 } 21 | [m-n]+$ { idtoken 6 } 22 | [o-p]+/"." { idtoken 7 } 23 | [q-r]+/{ tokpred } { idtoken 8 } 24 | [0-1]^[s-t]+$ { idtoken 9 } 25 | [2-3]^[u-v]+/"." { idtoken 10 } 26 | [4-5]^[w-x]+/{ tokpred } { idtoken 11 } 27 | [y-z]+ { idtoken 12 } 28 | [A-B]+$ ; 29 | [C-D]+/"." ; 30 | [E-F]+/{ tokpred } ; 31 | ^[G-H]+$ ; 32 | ^[I-J]+/"." ; 33 | ^[K-L]+/{ tokpred } ; 34 | [M-N]+$ ; 35 | [O-P]+/"." ; 36 | [Q-R]+/{ tokpred } ; 37 | [0-1]^[S-T]+$ ; 38 | [2-3]^[U-V]+/"." ; 39 | [4-5]^[W-X]+/{ tokpred } ; 40 | [Y-Z]+ ; 41 | \. ; 42 | [ \n\t\r]+ ; 43 | [0-9] ; 44 | 45 | { 46 | 47 | type AlexUserState = Int 48 | 49 | alexInitUserState = 0 50 | 51 | alexEOF :: Alex (Token s) 52 | alexEOF = return EOF 53 | 54 | tokpred :: AlexUserState -> AlexInput -> Int -> AlexInput -> Bool 55 | tokpred _ _ _ _ = True 56 | 57 | idtoken :: Read s => Int -> AlexInput -> Int -> Alex (Token s) 58 | idtoken n (_, _, _, s) len = return (Id n (read ("\"" ++ take len s ++ "\""))) 59 | 60 | data Token s = Id Int s | EOF deriving Eq 61 | 62 | lex :: Read s => String -> Either String [Token s] 63 | lex inp = 64 | let 65 | lexAll = 66 | do 67 | res <- alexMonadScan 68 | case res of 69 | EOF -> return [] 70 | tok -> 71 | do 72 | rest <- lexAll 73 | return (tok : rest) 74 | in 75 | runAlex inp lexAll 76 | 77 | input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw" 78 | 79 | tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji", 80 | Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst", 81 | Id 10 "uuvu", Id 11 "xxw"] 82 | 83 | main :: IO () 84 | main = 85 | let 86 | result = lex input 87 | in do 88 | case result of 89 | Left _ -> exitFailure 90 | Right toks -> 91 | do 92 | if toks /= tokens 93 | then exitFailure 94 | else exitWith ExitSuccess 95 | 96 | } 97 | -------------------------------------------------------------------------------- /tests/monadUserState_typeclass_bytestring.x: -------------------------------------------------------------------------------- 1 | { 2 | 3 | {-# LANGUAGE OverloadedStrings #-} 4 | 5 | module Main (main) where 6 | import System.Exit 7 | import Prelude hiding (lex) 8 | 9 | import qualified Data.ByteString.Lazy.Char8 as Lazy 10 | 11 | } 12 | 13 | %wrapper "monadUserState-bytestring" 14 | %token "Token s" 15 | %typeclass "Read s" 16 | 17 | tokens :- 18 | 19 | [a-b]+$ { idtoken 0 } 20 | [c-d]+/"." { idtoken 1 } 21 | [e-f]+/{ tokpred } { idtoken 2 } 22 | ^[g-h]+$ { idtoken 3 } 23 | ^[i-j]+/"." { idtoken 4 } 24 | ^[k-l]+/{ tokpred } { idtoken 5 } 25 | [m-n]+$ { idtoken 6 } 26 | [o-p]+/"." { idtoken 7 } 27 | [q-r]+/{ tokpred } { idtoken 8 } 28 | [0-1]^[s-t]+$ { idtoken 9 } 29 | [2-3]^[u-v]+/"." { idtoken 10 } 30 | [4-5]^[w-x]+/{ tokpred } { idtoken 11 } 31 | [y-z]+ { idtoken 12 } 32 | [A-B]+$ ; 33 | [C-D]+/"." ; 34 | [E-F]+/{ tokpred } ; 35 | ^[G-H]+$ ; 36 | ^[I-J]+/"." ; 37 | ^[K-L]+/{ tokpred } ; 38 | [M-N]+$ ; 39 | [O-P]+/"." ; 40 | [Q-R]+/{ tokpred } ; 41 | [0-1]^[S-T]+$ ; 42 | [2-3]^[U-V]+/"." ; 43 | [4-5]^[W-X]+/{ tokpred } ; 44 | [Y-Z]+ ; 45 | \. ; 46 | [ \n\t\r]+ ; 47 | [0-9] ; 48 | 49 | { 50 | 51 | type AlexUserState = Int 52 | 53 | alexInitUserState = 0 54 | 55 | alexEOF :: Alex (Token s) 56 | alexEOF = return EOF 57 | 58 | tokpred :: AlexUserState -> AlexInput -> Int -> AlexInput -> Bool 59 | tokpred _ _ _ _ = True 60 | 61 | idtoken :: Read s => Int -> AlexInput -> Int64 -> Alex (Token s) 62 | idtoken n (_, _, s, _) len = 63 | return (Id n (read ("\"" ++ Lazy.unpack (Lazy.take (fromIntegral len) s) ++ 64 | "\""))) 65 | 66 | data Token s = Id Int s | EOF deriving Eq 67 | 68 | lex :: Read s => Lazy.ByteString -> Either String [Token s] 69 | lex inp = 70 | let 71 | lexAll = 72 | do 73 | -- Andreas Abel, 2023-12-30, issue #220: 74 | -- Test that alex{G,S}etUserState are in scope. 75 | u <- alexGetUserState 76 | alexSetUserState (u + 1) 77 | 78 | res <- alexMonadScan 79 | case res of 80 | EOF -> return [] 81 | tok -> 82 | do 83 | rest <- lexAll 84 | return (tok : rest) 85 | in 86 | runAlex inp lexAll 87 | 88 | input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw" 89 | 90 | tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji", 91 | Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst", 92 | Id 10 "uuvu", Id 11 "xxw"] 93 | 94 | main :: IO () 95 | main = 96 | let 97 | result :: Either String [Token String] 98 | result = lex input 99 | in do 100 | case result of 101 | Left _ -> exitFailure 102 | Right toks -> 103 | do 104 | if toks /= tokens 105 | then exitFailure 106 | else exitWith ExitSuccess 107 | 108 | } 109 | -------------------------------------------------------------------------------- /tests/monad_typeclass.x: -------------------------------------------------------------------------------- 1 | { 2 | 3 | module Main (main) where 4 | import System.Exit 5 | import Prelude hiding (lex) 6 | 7 | } 8 | 9 | %wrapper "monad" 10 | %token "Token s" 11 | %typeclass "Read s" 12 | 13 | tokens :- 14 | 15 | [a-b]+$ { idtoken 0 } 16 | [c-d]+/"." { idtoken 1 } 17 | [e-f]+/{ tokpred } { idtoken 2 } 18 | ^[g-h]+$ { idtoken 3 } 19 | ^[i-j]+/"." { idtoken 4 } 20 | ^[k-l]+/{ tokpred } { idtoken 5 } 21 | [m-n]+$ { idtoken 6 } 22 | [o-p]+/"." { idtoken 7 } 23 | [q-r]+/{ tokpred } { idtoken 8 } 24 | [0-1]^[s-t]+$ { idtoken 9 } 25 | [2-3]^[u-v]+/"." { idtoken 10 } 26 | [4-5]^[w-x]+/{ tokpred } { idtoken 11 } 27 | [y-z]+ { idtoken 12 } 28 | [A-B]+$ ; 29 | [C-D]+/"." ; 30 | [E-F]+/{ tokpred } ; 31 | ^[G-H]+$ ; 32 | ^[I-J]+/"." ; 33 | ^[K-L]+/{ tokpred } ; 34 | [M-N]+$ ; 35 | [O-P]+/"." ; 36 | [Q-R]+/{ tokpred } ; 37 | [0-1]^[S-T]+$ ; 38 | [2-3]^[U-V]+/"." ; 39 | [4-5]^[W-X]+/{ tokpred } ; 40 | [Y-Z]+ ; 41 | \. ; 42 | [ \n\t\r]+ ; 43 | [0-9] ; 44 | 45 | { 46 | 47 | alexEOF :: Alex (Token s) 48 | alexEOF = return EOF 49 | 50 | tokpred :: () -> AlexInput -> Int -> AlexInput -> Bool 51 | tokpred _ _ _ _ = True 52 | 53 | idtoken :: Read s => Int -> AlexInput -> Int -> Alex (Token s) 54 | idtoken n (_, _, _, s) len = return (Id n (read ("\"" ++ take len s ++ "\""))) 55 | 56 | data Token s = Id Int s | EOF deriving Eq 57 | 58 | lex :: Read s => String -> Either String [Token s] 59 | lex inp = 60 | let 61 | lexAll = 62 | do 63 | res <- alexMonadScan 64 | case res of 65 | EOF -> return [] 66 | tok -> 67 | do 68 | rest <- lexAll 69 | return (tok : rest) 70 | in 71 | runAlex inp lexAll 72 | 73 | input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw" 74 | 75 | tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji", 76 | Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst", 77 | Id 10 "uuvu", Id 11 "xxw"] 78 | 79 | main :: IO () 80 | main = 81 | let 82 | result = lex input 83 | in do 84 | case result of 85 | Left _ -> exitFailure 86 | Right toks -> 87 | do 88 | if toks /= tokens 89 | then exitFailure 90 | else exitWith ExitSuccess 91 | 92 | } 93 | -------------------------------------------------------------------------------- /tests/monad_typeclass_bytestring.x: -------------------------------------------------------------------------------- 1 | { 2 | 3 | {-# LANGUAGE OverloadedStrings #-} 4 | 5 | module Main (main) where 6 | import System.Exit 7 | import Prelude hiding (lex) 8 | 9 | import qualified Data.ByteString.Lazy.Char8 as Lazy 10 | 11 | } 12 | 13 | %wrapper "monad-bytestring" 14 | %token "Token s" 15 | %typeclass "Read s" 16 | 17 | tokens :- 18 | 19 | [a-b]+$ { idtoken 0 } 20 | [c-d]+/"." { idtoken 1 } 21 | [e-f]+/{ tokpred } { idtoken 2 } 22 | ^[g-h]+$ { idtoken 3 } 23 | ^[i-j]+/"." { idtoken 4 } 24 | ^[k-l]+/{ tokpred } { idtoken 5 } 25 | [m-n]+$ { idtoken 6 } 26 | [o-p]+/"." { idtoken 7 } 27 | [q-r]+/{ tokpred } { idtoken 8 } 28 | [0-1]^[s-t]+$ { idtoken 9 } 29 | [2-3]^[u-v]+/"." { idtoken 10 } 30 | [4-5]^[w-x]+/{ tokpred } { idtoken 11 } 31 | [y-z]+ { idtoken 12 } 32 | [A-B]+$ ; 33 | [C-D]+/"." ; 34 | [E-F]+/{ tokpred } ; 35 | ^[G-H]+$ ; 36 | ^[I-J]+/"." ; 37 | ^[K-L]+/{ tokpred } ; 38 | [M-N]+$ ; 39 | [O-P]+/"." ; 40 | [Q-R]+/{ tokpred } ; 41 | [0-1]^[S-T]+$ ; 42 | [2-3]^[U-V]+/"." ; 43 | [4-5]^[W-X]+/{ tokpred } ; 44 | [Y-Z]+ ; 45 | \. ; 46 | [ \n\t\r]+ ; 47 | [0-9] ; 48 | 49 | { 50 | 51 | alexEOF :: Alex (Token s) 52 | alexEOF = return EOF 53 | 54 | tokpred :: () -> AlexInput -> Int -> AlexInput -> Bool 55 | tokpred _ _ _ _ = True 56 | 57 | idtoken :: Read s => Int -> AlexInput -> Int64 -> Alex (Token s) 58 | idtoken n (_, _, s, _) len = 59 | return (Id n (read ("\"" ++ Lazy.unpack (Lazy.take (fromIntegral len) s) ++ 60 | "\""))) 61 | 62 | data Token s = Id Int s | EOF deriving Eq 63 | 64 | lex :: Read s => Lazy.ByteString -> Either String [Token s] 65 | lex inp = 66 | let 67 | lexAll = 68 | do 69 | res <- alexMonadScan 70 | case res of 71 | EOF -> return [] 72 | tok -> 73 | do 74 | rest <- lexAll 75 | return (tok : rest) 76 | in 77 | runAlex inp lexAll 78 | 79 | input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw" 80 | 81 | tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji", 82 | Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst", 83 | Id 10 "uuvu", Id 11 "xxw"] 84 | 85 | main :: IO () 86 | main = 87 | let 88 | result :: Either String [Token String] 89 | result = lex input 90 | in do 91 | case result of 92 | Left _ -> exitFailure 93 | Right toks -> 94 | do 95 | if toks /= tokens 96 | then exitFailure 97 | else exitWith ExitSuccess 98 | 99 | } 100 | -------------------------------------------------------------------------------- /tests/null.x: -------------------------------------------------------------------------------- 1 | { 2 | -- Tests the basic operation. 3 | module Main where 4 | 5 | import Data.Char (toUpper) 6 | import Control.Monad 7 | import System.Exit 8 | import System.IO 9 | import Prelude hiding (null) 10 | } 11 | 12 | %wrapper "monad" 13 | 14 | @word = [A-Za-z]+ 15 | @null = \0 16 | 17 | $escchars = [abfnrtv\\"\'&] 18 | @escape = \\ ($escchars | \0) 19 | @gap = \\ $white+ \\ 20 | @string = $printable # [\"] | " " | @escape | @gap 21 | 22 | @inComment = ([^\*] | $white)+ | ([\*]+ ([\x00-\xff] # [\/])) 23 | 24 | tokens :- 25 | 26 | $white+ ; 27 | 28 | <0> { 29 | @null { null } 30 | @word { word } 31 | \" @string \" { string } 32 | "--" @inComment \n { word } 33 | } 34 | 35 | { 36 | {- we can now have comments in source code? -} 37 | word (_,_,_,input) len = return (take len input) 38 | 39 | null (_,_,_,_) _ = return "\0" 40 | 41 | string (_,_,_,input) _ = return (drop 1 (reverse (drop 1 (reverse input)))) 42 | 43 | alexEOF = return "stopped." 44 | 45 | scanner str = runAlex str $ do 46 | let loop = do tok <- alexMonadScan 47 | if tok == "stopped." || tok == "error." 48 | then return [tok] 49 | else do toks <- loop 50 | return (tok:toks) 51 | loop 52 | 53 | main = do 54 | let test1 = scanner str1 55 | when (test1 /= out1) $ 56 | do hPutStrLn stderr "Test 1 failed:" 57 | print test1 58 | exitFailure 59 | 60 | let test2 = scanner str2 61 | when (test2 /= out2) $ 62 | do hPutStrLn stderr "Test 2 failed:" 63 | print test2 64 | exitFailure 65 | 66 | str1 = "a\0bb\0ccc\0\0\"\\\0\"" 67 | out1 = Right ["a","\NUL","bb","\NUL","ccc","\NUL","\NUL","\\\NUL", "stopped."] 68 | 69 | str2 = "." 70 | out2 = Left "lexical error at line 1, column 1" 71 | } 72 | -------------------------------------------------------------------------------- /tests/posn_typeclass.x: -------------------------------------------------------------------------------- 1 | { 2 | 3 | module Main (main) where 4 | import System.Exit 5 | import Prelude hiding (lex) 6 | 7 | } 8 | 9 | %wrapper "posn" 10 | %token "Token s" 11 | %typeclass "Read s" 12 | 13 | tokens :- 14 | 15 | [a-b]+$ { idtoken 0 } 16 | [c-d]+/"." { idtoken 1 } 17 | [e-f]+/{ tokpred } { idtoken 2 } 18 | ^[g-h]+$ { idtoken 3 } 19 | ^[i-j]+/"." { idtoken 4 } 20 | ^[k-l]+/{ tokpred } { idtoken 5 } 21 | [m-n]+$ { idtoken 6 } 22 | [o-p]+/"." { idtoken 7 } 23 | [q-r]+/{ tokpred } { idtoken 8 } 24 | [0-1]^[s-t]+$ { idtoken 9 } 25 | [2-3]^[u-v]+/"." { idtoken 10 } 26 | [4-5]^[w-x]+/{ tokpred } { idtoken 11 } 27 | [y-z]+ { idtoken 12 } 28 | [A-B]+$ ; 29 | [C-D]+/"." ; 30 | [E-F]+/{ tokpred } ; 31 | ^[G-H]+$ ; 32 | ^[I-J]+/"." ; 33 | ^[K-L]+/{ tokpred } ; 34 | [M-N]+$ ; 35 | [O-P]+/"." ; 36 | [Q-R]+/{ tokpred } ; 37 | [0-1]^[S-T]+$ ; 38 | [2-3]^[U-V]+/"." ; 39 | [4-5]^[W-X]+/{ tokpred } ; 40 | [Y-Z]+ ; 41 | \. ; 42 | [ \n\t\r]+ ; 43 | [0-9] ; 44 | 45 | { 46 | 47 | tokpred :: () -> AlexInput -> Int -> AlexInput -> Bool 48 | tokpred _ _ _ _ = True 49 | 50 | idtoken :: Read s => Int -> AlexPosn -> String -> Token s 51 | idtoken n _ s = Id n (read ("\"" ++ s ++ "\"")) 52 | 53 | data Token s = Id Int s 54 | deriving (Show, Ord, Eq) 55 | 56 | lex :: Read s => String -> [Token s] 57 | lex = alexScanTokens 58 | 59 | input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw" 60 | 61 | tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji", 62 | Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst", 63 | Id 10 "uuvu", Id 11 "xxw"] 64 | 65 | main :: IO () 66 | main = 67 | let 68 | result :: [Token String] 69 | result = lex input 70 | in do 71 | if result /= tokens 72 | then exitFailure 73 | else exitWith ExitSuccess 74 | 75 | } 76 | -------------------------------------------------------------------------------- /tests/posn_typeclass_bytestring.x: -------------------------------------------------------------------------------- 1 | { 2 | 3 | {-# LANGUAGE OverloadedStrings #-} 4 | 5 | module Main (main) where 6 | import System.Exit 7 | import Prelude hiding (lex) 8 | 9 | import Data.ByteString.Lazy.Char8 as Lazy 10 | 11 | } 12 | 13 | %wrapper "posn-bytestring" 14 | %token "Token s" 15 | %typeclass "Read s" 16 | 17 | tokens :- 18 | 19 | [a-b]+$ { idtoken 0 } 20 | [c-d]+/"." { idtoken 1 } 21 | [e-f]+/{ tokpred } { idtoken 2 } 22 | ^[g-h]+$ { idtoken 3 } 23 | ^[i-j]+/"." { idtoken 4 } 24 | ^[k-l]+/{ tokpred } { idtoken 5 } 25 | [m-n]+$ { idtoken 6 } 26 | [o-p]+/"." { idtoken 7 } 27 | [q-r]+/{ tokpred } { idtoken 8 } 28 | [0-1]^[s-t]+$ { idtoken 9 } 29 | [2-3]^[u-v]+/"." { idtoken 10 } 30 | [4-5]^[w-x]+/{ tokpred } { idtoken 11 } 31 | [y-z]+ { idtoken 12 } 32 | [A-B]+$ ; 33 | [C-D]+/"." ; 34 | [E-F]+/{ tokpred } ; 35 | ^[G-H]+$ ; 36 | ^[I-J]+/"." ; 37 | ^[K-L]+/{ tokpred } ; 38 | [M-N]+$ ; 39 | [O-P]+/"." ; 40 | [Q-R]+/{ tokpred } ; 41 | [0-1]^[S-T]+$ ; 42 | [2-3]^[U-V]+/"." ; 43 | [4-5]^[W-X]+/{ tokpred } ; 44 | [Y-Z]+ ; 45 | \. ; 46 | [ \n\t\r]+ ; 47 | [0-9] ; 48 | 49 | { 50 | 51 | tokpred :: () -> AlexInput -> Int -> AlexInput -> Bool 52 | tokpred _ _ _ _ = True 53 | 54 | idtoken :: Read s => Int -> AlexPosn -> Lazy.ByteString -> Token s 55 | idtoken n _ s = Id n (read ("\"" ++ (Lazy.unpack s) ++ "\"")) 56 | 57 | data Token s = Id Int s 58 | deriving (Show, Ord, Eq) 59 | 60 | lex :: Read s => Lazy.ByteString -> [Token s] 61 | lex = alexScanTokens 62 | 63 | input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw" 64 | 65 | tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji", 66 | Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst", 67 | Id 10 "uuvu", Id 11 "xxw"] 68 | 69 | main :: IO () 70 | main = 71 | let 72 | result :: [Token String] 73 | result = lex input 74 | in do 75 | if result /= tokens 76 | then exitFailure 77 | else exitWith ExitSuccess 78 | 79 | } 80 | -------------------------------------------------------------------------------- /tests/posn_typeclass_strict_text.x: -------------------------------------------------------------------------------- 1 | { 2 | 3 | {-# LANGUAGE OverloadedStrings #-} 4 | 5 | module Main (main) where 6 | import System.Exit 7 | import Prelude hiding (lex) 8 | 9 | import qualified Data.Text as Text 10 | 11 | } 12 | 13 | %wrapper "posn-strict-text" 14 | %token "Token s" 15 | %typeclass "Read s" 16 | 17 | tokens :- 18 | 19 | [a-b]+$ { idtoken 0 } 20 | [c-d]+/"." { idtoken 1 } 21 | [e-f]+/{ tokpred } { idtoken 2 } 22 | ^[g-h]+$ { idtoken 3 } 23 | ^[i-j]+/"." { idtoken 4 } 24 | ^[k-l]+/{ tokpred } { idtoken 5 } 25 | [m-n]+$ { idtoken 6 } 26 | [o-p]+/"." { idtoken 7 } 27 | [q-r]+/{ tokpred } { idtoken 8 } 28 | [0-1]^[s-t]+$ { idtoken 9 } 29 | [2-3]^[u-v]+/"." { idtoken 10 } 30 | [4-5]^[w-x]+/{ tokpred } { idtoken 11 } 31 | [y-z]+ { idtoken 12 } 32 | [A-B]+$ ; 33 | [C-D]+/"." ; 34 | [E-F]+/{ tokpred } ; 35 | ^[G-H]+$ ; 36 | ^[I-J]+/"." ; 37 | ^[K-L]+/{ tokpred } ; 38 | [M-N]+$ ; 39 | [O-P]+/"." ; 40 | [Q-R]+/{ tokpred } ; 41 | [0-1]^[S-T]+$ ; 42 | [2-3]^[U-V]+/"." ; 43 | [4-5]^[W-X]+/{ tokpred } ; 44 | [Y-Z]+ ; 45 | \. ; 46 | [ \n\t\r]+ ; 47 | [0-9] ; 48 | 49 | { 50 | 51 | tokpred :: () -> AlexInput -> Int -> AlexInput -> Bool 52 | tokpred _ _ _ _ = True 53 | 54 | idtoken :: Read s => Int -> AlexPosn -> Text.Text -> Token s 55 | idtoken n _ s = Id n (read ("\"" ++ (Text.unpack s) ++ "\"")) 56 | 57 | data Token s = Id Int s 58 | deriving (Show, Ord, Eq) 59 | 60 | lex :: Read s => Text.Text -> [Token s] 61 | lex = alexScanTokens 62 | 63 | input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw" 64 | 65 | tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji", 66 | Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst", 67 | Id 10 "uuvu", Id 11 "xxw"] 68 | 69 | main :: IO () 70 | main = 71 | let 72 | result :: [Token String] 73 | result = lex input 74 | in do 75 | if result /= tokens 76 | then exitFailure 77 | else exitWith ExitSuccess 78 | 79 | } 80 | -------------------------------------------------------------------------------- /tests/simple.x: -------------------------------------------------------------------------------- 1 | { 2 | -- Tests the basic operation. 3 | module Main where 4 | 5 | import Data.Char (toUpper) 6 | import Control.Monad 7 | import System.Exit 8 | import System.IO 9 | } 10 | 11 | %wrapper "monad" 12 | 13 | @word = [A-Za-z]+ 14 | 15 | tokens :- 16 | 17 | $white+ ; 18 | 19 | <0> { 20 | "magic" { magic } -- should override later patterns 21 | ^ @word $ { both } -- test both trailing and left context 22 | @word $ { eol } -- test trailing context 23 | ^ @word { bol } -- test left context 24 | @word { word } 25 | } 26 | 27 | <0> \( { begin parens } 28 | [A-Za-z]+ { parenword } 29 | \) { begin 0 } 30 | 31 | { 32 | {- we can now have comments in source code? -} 33 | word (_,_,_,input) len = return (take len input) 34 | 35 | both (_,_,_,input) len = return ("BOTH:"++ take len input) 36 | 37 | eol (_,_,_,input) len = return ("EOL:"++ take len input) 38 | 39 | bol (_,_,_,input) len = return ("BOL:"++ take len input) 40 | 41 | parenword (_,_,_,input) len = return (map toUpper (take len input)) 42 | 43 | magic (_,_,_,_) _ = return "PING!" 44 | 45 | alexEOF = return "stopped." 46 | 47 | scanner str = runAlex str $ do 48 | let loop = do tok <- alexMonadScan 49 | if tok == "stopped." || tok == "error." 50 | then return [tok] 51 | else do toks <- loop 52 | return (tok:toks) 53 | loop 54 | 55 | main = do 56 | let test1 = scanner str1 57 | when (test1 /= out1) $ 58 | do hPutStrLn stderr "Test 1 failed:" 59 | print test1 60 | exitFailure 61 | 62 | let test2 = scanner str2 63 | when (test2 /= out2) $ 64 | do hPutStrLn stderr "Test 2 failed:" 65 | print test2 66 | exitFailure 67 | 68 | str1 = "a b c (d e f) magic (magic) eol\nbol \nboth\n" 69 | out1 = Right ["BOL:a","b","c","D","E","F","PING!","MAGIC","EOL:eol", "BOL:bol", "BOTH:both", "stopped."] 70 | 71 | str2 = "." 72 | out2 = Left "lexical error at line 1, column 1" 73 | } 74 | -------------------------------------------------------------------------------- /tests/strict_text_typeclass.x: -------------------------------------------------------------------------------- 1 | { 2 | 3 | {-# LANGUAGE OverloadedStrings #-} 4 | 5 | module Main (main) where 6 | import System.Exit 7 | import Prelude hiding (lex) 8 | 9 | import qualified Data.Text as Text 10 | 11 | } 12 | 13 | %wrapper "strict-text" 14 | %token "Token s" 15 | %typeclass "Read s" 16 | 17 | tokens :- 18 | 19 | [a-b]+$ { idtoken 0 } 20 | [c-d]+/"." { idtoken 1 } 21 | [e-f]+/{ tokpred } { idtoken 2 } 22 | ^[g-h]+$ { idtoken 3 } 23 | ^[i-j]+/"." { idtoken 4 } 24 | ^[k-l]+/{ tokpred } { idtoken 5 } 25 | [m-n]+$ { idtoken 6 } 26 | [o-p]+/"." { idtoken 7 } 27 | [q-r]+/{ tokpred } { idtoken 8 } 28 | [0-1]^[s-t]+$ { idtoken 9 } 29 | [2-3]^[u-v]+/"." { idtoken 10 } 30 | [4-5]^[w-x]+/{ tokpred } { idtoken 11 } 31 | [y-z]+ { idtoken 12 } 32 | [A-B]+$ ; 33 | [C-D]+/"." ; 34 | [E-F]+/{ tokpred } ; 35 | ^[G-H]+$ ; 36 | ^[I-J]+/"." ; 37 | ^[K-L]+/{ tokpred } ; 38 | [M-N]+$ ; 39 | [O-P]+/"." ; 40 | [Q-R]+/{ tokpred } ; 41 | [0-1]^[S-T]+$ ; 42 | [2-3]^[U-V]+/"." ; 43 | [4-5]^[W-X]+/{ tokpred } ; 44 | [Y-Z]+ ; 45 | \. ; 46 | [ \n\t\r]+ ; 47 | [0-9] ; 48 | 49 | { 50 | 51 | tokpred :: () -> AlexInput -> Int -> AlexInput -> Bool 52 | tokpred _ _ _ _ = True 53 | 54 | idtoken :: Read s => Int -> Text.Text -> Token s 55 | idtoken n s = Id n (read ("\"" ++ (Text.unpack s) ++ "\"")) 56 | 57 | data Token s = Id Int s 58 | deriving (Show, Ord, Eq) 59 | 60 | lex :: Read s => Text.Text -> [Token s] 61 | lex = alexScanTokens 62 | 63 | input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw" 64 | 65 | tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji", 66 | Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst", 67 | Id 10 "uuvu", Id 11 "xxw"] 68 | 69 | main :: IO () 70 | main = 71 | let 72 | result :: [Token String] 73 | result = lex input 74 | in do 75 | if result /= tokens 76 | then exitFailure 77 | else exitWith ExitSuccess 78 | 79 | } 80 | -------------------------------------------------------------------------------- /tests/strict_typeclass.x: -------------------------------------------------------------------------------- 1 | { 2 | 3 | {-# LANGUAGE OverloadedStrings #-} 4 | 5 | module Main (main) where 6 | import System.Exit 7 | import Prelude hiding (lex) 8 | 9 | import Data.ByteString.Char8 as Strict 10 | 11 | } 12 | 13 | %wrapper "strict-bytestring" 14 | %token "Token s" 15 | %typeclass "Read s" 16 | 17 | tokens :- 18 | 19 | [a-b]+$ { idtoken 0 } 20 | [c-d]+/"." { idtoken 1 } 21 | [e-f]+/{ tokpred } { idtoken 2 } 22 | ^[g-h]+$ { idtoken 3 } 23 | ^[i-j]+/"." { idtoken 4 } 24 | ^[k-l]+/{ tokpred } { idtoken 5 } 25 | [m-n]+$ { idtoken 6 } 26 | [o-p]+/"." { idtoken 7 } 27 | [q-r]+/{ tokpred } { idtoken 8 } 28 | [0-1]^[s-t]+$ { idtoken 9 } 29 | [2-3]^[u-v]+/"." { idtoken 10 } 30 | [4-5]^[w-x]+/{ tokpred } { idtoken 11 } 31 | [y-z]+ { idtoken 12 } 32 | [A-B]+$ ; 33 | [C-D]+/"." ; 34 | [E-F]+/{ tokpred } ; 35 | ^[G-H]+$ ; 36 | ^[I-J]+/"." ; 37 | ^[K-L]+/{ tokpred } ; 38 | [M-N]+$ ; 39 | [O-P]+/"." ; 40 | [Q-R]+/{ tokpred } ; 41 | [0-1]^[S-T]+$ ; 42 | [2-3]^[U-V]+/"." ; 43 | [4-5]^[W-X]+/{ tokpred } ; 44 | [Y-Z]+ ; 45 | \. ; 46 | [ \n\t\r]+ ; 47 | [0-9] ; 48 | 49 | { 50 | 51 | tokpred :: () -> AlexInput -> Int -> AlexInput -> Bool 52 | tokpred _ _ _ _ = True 53 | 54 | idtoken :: Read s => Int -> Strict.ByteString -> Token s 55 | idtoken n s = Id n (read ("\"" ++ (Strict.unpack s) ++ "\"")) 56 | 57 | data Token s = Id Int s deriving Eq 58 | 59 | lex :: Read s => Strict.ByteString -> [Token s] 60 | lex = alexScanTokens 61 | 62 | input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw" 63 | 64 | tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji", 65 | Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst", 66 | Id 10 "uuvu", Id 11 "xxw"] 67 | 68 | main :: IO () 69 | main = 70 | let 71 | result :: [Token String] 72 | result = lex input 73 | in do 74 | if result /= tokens 75 | then exitFailure 76 | else exitWith ExitSuccess 77 | 78 | } 79 | -------------------------------------------------------------------------------- /tests/tokens.x: -------------------------------------------------------------------------------- 1 | { 2 | module Main (main) where 3 | import System.Exit 4 | } 5 | 6 | %wrapper "basic" 7 | 8 | $digit=0-9 -- digits 9 | $alpha = [a-zA-Z] -- alphabetic characters 10 | 11 | tokens :- 12 | 13 | $white+ ; 14 | "--".* ; 15 | let { \_ -> Let } 16 | in { \_ -> In } 17 | $digit+ { \s -> Int (read s) } 18 | [\=\+\-\*\/\(\)] { \s -> Sym (head s) } 19 | $alpha [$alpha $digit \_ \']* { \s -> Var s } 20 | 21 | -- a left-context pattern for testing 22 | ^ \# ; 23 | 24 | { 25 | -- Each right-hand side has type :: String -> Token 26 | 27 | -- The token type: 28 | data Token = 29 | Let | 30 | In | 31 | Sym Char | 32 | Var String | 33 | Int Int | 34 | Err 35 | deriving (Eq,Show) 36 | 37 | main = if test1 /= result1 then exitFailure 38 | else exitWith ExitSuccess 39 | 40 | test1 = alexScanTokens " let in 012334\n=+*foo bar__'" 41 | result1 = identifierWithLotsOfQuotes'' 42 | 43 | identifierWithLotsOfQuotes'' :: [Token] 44 | identifierWithLotsOfQuotes'' = 45 | [Let,In,Int 12334,Sym '=',Sym '+',Sym '*',Var "foo",Var "bar__'"] 46 | 47 | } 48 | -------------------------------------------------------------------------------- /tests/tokens_bytestring.x: -------------------------------------------------------------------------------- 1 | { 2 | {-# LANGUAGE OverloadedStrings #-} 3 | module Main (main) where 4 | import System.Exit 5 | import Data.ByteString.Lazy.Char8 (unpack) 6 | } 7 | 8 | %wrapper "basic-bytestring" 9 | %encoding "latin1" 10 | 11 | $digit = 0-9 -- digits 12 | $alpha = [a-zA-Z] -- alphabetic characters 13 | 14 | tokens :- 15 | 16 | $white+ ; 17 | "--".* ; 18 | let { \_ -> Let } 19 | in { \_ -> In } 20 | $digit+ { \s -> Int (read (unpack s)) } 21 | [\=\+\-\*\/\(\)] { \s -> Sym (head (unpack s)) } 22 | $alpha [$alpha $digit \_ \']* { \s -> Var (unpack s) } 23 | 24 | { 25 | -- Each right-hand side has type :: ByteString -> Token 26 | 27 | -- The token type: 28 | data Token = 29 | Let | 30 | In | 31 | Sym Char | 32 | Var String | 33 | Int Int | 34 | Err 35 | deriving (Eq,Show) 36 | 37 | main = if test1 /= result1 then exitFailure 38 | else exitWith ExitSuccess 39 | 40 | test1 = alexScanTokens " let in 012334\n=+*foo bar__'" 41 | result1 = [Let,In,Int 12334,Sym '=',Sym '+',Sym '*',Var "foo",Var "bar__'"] 42 | 43 | } 44 | -------------------------------------------------------------------------------- /tests/tokens_bytestring_unicode.x: -------------------------------------------------------------------------------- 1 | { 2 | {-# LANGUAGE OverloadedStrings #-} 3 | module Main (main) where 4 | import System.Exit 5 | import Data.ByteString.Lazy.Char8 (unpack) 6 | } 7 | 8 | %wrapper "basic-bytestring" 9 | %encoding "utf-8" 10 | 11 | $digit = 0-9 -- digits 12 | $alpha = [a-zA-Zαβ] -- alphabetic characters 13 | 14 | tokens :- 15 | 16 | $white+ ; 17 | "--".* ; 18 | let { \_ -> Let } 19 | in { \_ -> In } 20 | $digit+ { \s -> Int (read (unpack s)) } 21 | [\=\+\-\*\/\(\)] { \s -> Sym (head (unpack s)) } 22 | $alpha [$alpha $digit \_ \']* { \s -> Var (unpack s) } 23 | 24 | { 25 | -- Each right-hand side has type :: ByteString -> Token 26 | 27 | -- The token type: 28 | data Token = 29 | Let | 30 | In | 31 | Sym Char | 32 | Var String | 33 | Int Int | 34 | Err 35 | deriving (Eq,Show) 36 | 37 | main = if test1 /= result1 then exitFailure 38 | else exitWith ExitSuccess 39 | 40 | -- \206\177\206\178\206\178 is "αββ" utf-8 encoded 41 | test1 = alexScanTokens " let in 012334\n=+*foo \206\177\206\178\206\178 bar__'" 42 | result1 = [Let,In,Int 12334,Sym '=',Sym '+',Sym '*',Var "foo",Var "\206\177\206\178\206\178",Var "bar__'"] 43 | } 44 | -------------------------------------------------------------------------------- /tests/tokens_gscan.x: -------------------------------------------------------------------------------- 1 | { 2 | module Main (main) where 3 | import System.Exit 4 | } 5 | 6 | %wrapper "gscan" 7 | 8 | $digit = 0-9 -- digits 9 | $alpha = [a-zA-Z] -- alphabetic characters 10 | 11 | tokens :- 12 | 13 | $white+ ; 14 | "--".* ; 15 | let { tok (\p _ -> Let p) } 16 | in { tok (\p _ -> In p) } 17 | $digit+ { tok (\p s -> Int p (read s)) } 18 | [\=\+\-\*\/\(\)] { tok (\p s -> Sym p (head s)) } 19 | $alpha [$alpha $digit \_ \']* { tok (\p s -> Var p s) } 20 | 21 | { 22 | -- Some action helpers: 23 | tok f p _ str len cont (sc,state) = f p (take len str) : cont (sc,state) 24 | 25 | -- The token type: 26 | data Token = 27 | Let AlexPosn | 28 | In AlexPosn | 29 | Sym AlexPosn Char | 30 | Var AlexPosn String | 31 | Int AlexPosn Int | 32 | Err AlexPosn 33 | deriving (Eq,Show) 34 | 35 | main = if test1 /= result1 then exitFailure 36 | else exitWith ExitSuccess 37 | 38 | test1 = alexGScan stop undefined " let in 012334\n=+*foo bar__'" 39 | 40 | stop _ _ "" (_,_) = [] 41 | stop _ _ _ (_,_) = error "lexical error" 42 | 43 | result1 = [Let (AlexPn 2 1 3),In (AlexPn 6 1 7),Int (AlexPn 9 1 10) 12334,Sym (AlexPn 16 2 1) '=',Sym (AlexPn 17 2 2) '+',Sym (AlexPn 18 2 3) '*',Var (AlexPn 19 2 4) "foo",Var (AlexPn 23 2 8) "bar__'"] 44 | } 45 | -------------------------------------------------------------------------------- /tests/tokens_monadUserState_bytestring.x: -------------------------------------------------------------------------------- 1 | { 2 | {-# LANGUAGE OverloadedStrings #-} 3 | module Main (main) where 4 | import System.Exit 5 | import qualified Data.ByteString.Lazy.Char8 as B 6 | } 7 | 8 | %wrapper "monadUserState-bytestring" 9 | %encoding "iso-8859-1" 10 | 11 | $digit = 0-9 -- digits 12 | $alpha = [a-zA-Z] -- alphabetic characters 13 | 14 | tokens :- 15 | 16 | $white+ ; 17 | "--".* ; 18 | let { tok (\p _ -> Let p) } 19 | in { tok (\p _ -> In p) } 20 | $digit+ { tok (\p s -> Int p (read (B.unpack s))) } 21 | [\=\+\-\*\/\(\)] { tok (\p s -> Sym p (head (B.unpack s))) } 22 | $alpha [$alpha $digit \_ \']* { tok (\p s -> Var p (B.unpack s)) } 23 | 24 | { 25 | -- Each right-hand side has type :: AlexPosn -> String -> Token 26 | 27 | -- Some action helpers: 28 | tok f (p,_,input,_) len = return (f p (B.take (fromIntegral len) input)) 29 | 30 | -- The token type: 31 | data Token 32 | = Let AlexPosn 33 | | In AlexPosn 34 | | Sym AlexPosn Char 35 | | Var AlexPosn String 36 | | Int AlexPosn Int 37 | | Err AlexPosn 38 | | EOF 39 | deriving (Eq,Show) 40 | 41 | alexEOF = return EOF 42 | 43 | main = if test1 /= result1 then do print test1; exitFailure 44 | else exitWith ExitSuccess 45 | 46 | type AlexUserState = () 47 | alexInitUserState = () 48 | 49 | scanner str = runAlex str $ do 50 | let 51 | loop = do 52 | 53 | -- Andreas Abel, 2023-12-30, issue #220: 54 | -- Test that alex{G,S}etUserState are in scope. 55 | () <- alexGetUserState 56 | alexSetUserState () 57 | 58 | tk <- alexMonadScan 59 | if tk == EOF 60 | then return [tk] 61 | else do 62 | toks <- loop 63 | return (tk:toks) 64 | loop 65 | 66 | test1 = case scanner " let in 012334\n=+*foo bar__'" of 67 | Left err -> error err 68 | Right toks -> toks 69 | 70 | result1 = [Let (AlexPn 2 1 3),In (AlexPn 6 1 7),Int (AlexPn 9 1 10) 12334,Sym (AlexPn 16 2 1) '=',Sym (AlexPn 17 2 2) '+',Sym (AlexPn 18 2 3) '*',Var (AlexPn 19 2 4) "foo",Var (AlexPn 23 2 8) "bar__'", EOF] 71 | 72 | 73 | } 74 | -------------------------------------------------------------------------------- /tests/tokens_monadUserState_strict_text.x: -------------------------------------------------------------------------------- 1 | { 2 | {-# LANGUAGE OverloadedStrings #-} 3 | module Main (main) where 4 | import System.Exit 5 | import qualified Data.Text 6 | } 7 | 8 | %wrapper "monadUserState-strict-text" 9 | %encoding "iso-8859-1" 10 | 11 | $digit = 0-9 -- digits 12 | $alpha = [a-zA-Z] -- alphabetic characters 13 | 14 | tokens :- 15 | 16 | $white+ ; 17 | "--".* ; 18 | let { tok (\p _ -> Let p) } 19 | in { tok (\p _ -> In p) } 20 | $digit+ { tok (\p s -> Int p (read (Data.Text.unpack s))) } 21 | [\=\+\-\*\/\(\)] { tok (\p s -> Sym p (head (Data.Text.unpack s))) } 22 | $alpha [$alpha $digit \_ \']* { tok (\p s -> Var p (Data.Text.unpack s)) } 23 | 24 | { 25 | -- Each right-hand side has type :: AlexPosn -> Data.Text.Text -> Token 26 | 27 | -- Some action helpers: 28 | tok f (p,_,_,input) len = return (f p (Data.Text.take (fromIntegral len) input)) 29 | 30 | -- The token type: 31 | data Token = 32 | Let AlexPosn | 33 | In AlexPosn | 34 | Sym AlexPosn Char | 35 | Var AlexPosn String | 36 | Int AlexPosn Int | 37 | Err AlexPosn | 38 | EOF 39 | deriving (Eq,Show) 40 | 41 | alexEOF = return EOF 42 | 43 | main = if test1 /= result1 then do print test1; exitFailure 44 | else exitWith ExitSuccess 45 | 46 | type AlexUserState = () 47 | alexInitUserState = () 48 | 49 | scanner str = runAlex str $ do 50 | let loop = do tk <- alexMonadScan 51 | if tk == EOF 52 | then return [tk] 53 | else do toks <- loop 54 | return (tk:toks) 55 | loop 56 | 57 | test1 = case scanner " let in 012334\n=+*foo bar__'" of 58 | Left err -> error err 59 | Right toks -> toks 60 | 61 | result1 = [Let (AlexPn 2 1 3),In (AlexPn 6 1 7),Int (AlexPn 9 1 10) 12334,Sym (AlexPn 16 2 1) '=',Sym (AlexPn 17 2 2) '+',Sym (AlexPn 18 2 3) '*',Var (AlexPn 19 2 4) "foo",Var (AlexPn 23 2 8) "bar__'", EOF] 62 | 63 | 64 | } 65 | -------------------------------------------------------------------------------- /tests/tokens_monad_bytestring.x: -------------------------------------------------------------------------------- 1 | { 2 | {-# LANGUAGE OverloadedStrings #-} 3 | module Main (main) where 4 | import System.Exit 5 | import qualified Data.ByteString.Lazy.Char8 as B 6 | } 7 | 8 | %wrapper "monad-bytestring" 9 | %encoding "Latin1" 10 | 11 | $digit = 0-9 -- digits 12 | $alpha = [a-zA-Z] -- alphabetic characters 13 | 14 | tokens :- 15 | 16 | $white+ ; 17 | "--".* ; 18 | let { tok (\p _ -> Let p) } 19 | in { tok (\p _ -> In p) } 20 | $digit+ { tok (\p s -> Int p (read (B.unpack s))) } 21 | [\=\+\-\*\/\(\)] { tok (\p s -> Sym p (head (B.unpack s))) } 22 | $alpha [$alpha $digit \_ \']* { tok (\p s -> Var p (B.unpack s)) } 23 | 24 | { 25 | -- Each right-hand side has type :: AlexPosn -> String -> Token 26 | 27 | -- Some action helpers: 28 | tok f (p,_,input,_) len = return (f p (B.take (fromIntegral len) input)) 29 | 30 | -- The token type: 31 | data Token = 32 | Let AlexPosn | 33 | In AlexPosn | 34 | Sym AlexPosn Char | 35 | Var AlexPosn String | 36 | Int AlexPosn Int | 37 | Err AlexPosn | 38 | EOF 39 | deriving (Eq,Ord,Show) 40 | -- Adding 'Ord' is possible since 3.3.0 (issue #233) 41 | -- where an 'Ord' instance for 'AlexPosn' is generated by default. 42 | 43 | alexEOF = return EOF 44 | 45 | main = if test1 /= result1 then do print test1; exitFailure 46 | else exitWith ExitSuccess 47 | 48 | scanner str = runAlex str $ do 49 | let loop = do tk <- alexMonadScan 50 | if tk == EOF 51 | then return [tk] 52 | else do toks <- loop 53 | return (tk:toks) 54 | loop 55 | 56 | test1 = case scanner " let in 012334\n=+*foo bar__'" of 57 | Left err -> error err 58 | Right toks -> toks 59 | 60 | result1 = [Let (AlexPn 2 1 3),In (AlexPn 6 1 7),Int (AlexPn 9 1 10) 12334,Sym (AlexPn 16 2 1) '=',Sym (AlexPn 17 2 2) '+',Sym (AlexPn 18 2 3) '*',Var (AlexPn 19 2 4) "foo",Var (AlexPn 23 2 8) "bar__'", EOF] 61 | 62 | 63 | } 64 | -------------------------------------------------------------------------------- /tests/tokens_posn.x: -------------------------------------------------------------------------------- 1 | { 2 | module Main (main) where 3 | import System.Exit 4 | } 5 | 6 | %wrapper "posn" 7 | 8 | $digit = 0-9 -- digits 9 | $alpha = [a-zA-Z] -- alphabetic characters 10 | 11 | tokens :- 12 | 13 | $white+ ; 14 | "--".* ; 15 | let { tok (\p _ -> Let p) } 16 | in { tok (\p _ -> In p) } 17 | $digit+ { tok (\p s -> Int p (read s)) } 18 | [\=\+\-\*\/\(\)] { tok (\p s -> Sym p (head s)) } 19 | $alpha [$alpha $digit \_ \']* { tok (\p s -> Var p s) } 20 | 21 | { 22 | -- Each right-hand side has type :: AlexPosn -> String -> Token 23 | 24 | -- Some action helpers: 25 | tok f p s = f p s 26 | 27 | -- The token type: 28 | data Token = 29 | Let AlexPosn | 30 | In AlexPosn | 31 | Sym AlexPosn Char | 32 | Var AlexPosn String | 33 | Int AlexPosn Int | 34 | Err AlexPosn 35 | deriving (Eq,Show) 36 | 37 | main = if test1 /= result1 then exitFailure 38 | else exitWith ExitSuccess 39 | 40 | test1 = alexScanTokens " let in 012334\n=+*foo bar__'" 41 | result1 = [Let (AlexPn 2 1 3),In (AlexPn 6 1 7),Int (AlexPn 9 1 10) 12334,Sym (AlexPn 16 2 1) '=',Sym (AlexPn 17 2 2) '+',Sym (AlexPn 18 2 3) '*',Var (AlexPn 19 2 4) "foo",Var (AlexPn 23 2 8) "bar__'"] 42 | 43 | 44 | } 45 | -------------------------------------------------------------------------------- /tests/tokens_posn_bytestring.x: -------------------------------------------------------------------------------- 1 | { 2 | {-# LANGUAGE OverloadedStrings #-} 3 | module Main (main) where 4 | import System.Exit 5 | import Data.ByteString.Lazy.Char8 (unpack) 6 | } 7 | 8 | %wrapper "posn-bytestring" 9 | %encoding "UTF8" 10 | 11 | $digit = 0-9 -- digits 12 | $alpha = [a-zA-Z] -- alphabetic characters 13 | 14 | tokens :- 15 | 16 | $white+ ; 17 | "--".* ; 18 | let { tok (\p _ -> Let p) } 19 | in { tok (\p _ -> In p) } 20 | $digit+ { tok (\p s -> Int p (read (unpack s))) } 21 | [\=\+\-\*\/\(\)] { tok (\p s -> Sym p (head (unpack s))) } 22 | $alpha [$alpha $digit \_ \']* { tok (\p s -> Var p (unpack s)) } 23 | 24 | { 25 | -- Each right-hand side has type :: AlexPosn -> String -> Token 26 | 27 | -- Some action helpers: 28 | tok f p s = f p s 29 | 30 | -- The token type: 31 | data Token = 32 | Let AlexPosn | 33 | In AlexPosn | 34 | Sym AlexPosn Char | 35 | Var AlexPosn String | 36 | Int AlexPosn Int | 37 | Err AlexPosn 38 | deriving (Eq,Show) 39 | 40 | main = if test1 /= result1 then exitFailure 41 | else exitWith ExitSuccess 42 | 43 | test1 = alexScanTokens " let in 012334\n=+*foo bar__'" 44 | result1 = [Let (AlexPn 2 1 3),In (AlexPn 6 1 7),Int (AlexPn 9 1 10) 12334,Sym (AlexPn 16 2 1) '=',Sym (AlexPn 17 2 2) '+',Sym (AlexPn 18 2 3) '*',Var (AlexPn 19 2 4) "foo",Var (AlexPn 23 2 8) "bar__'"] 45 | 46 | 47 | } 48 | -------------------------------------------------------------------------------- /tests/tokens_scan_user.x: -------------------------------------------------------------------------------- 1 | { 2 | module Main (main) where 3 | import System.Exit 4 | } 5 | 6 | %wrapper "basic" -- Defines: AlexInput, alexGetByte, alexPrevChar 7 | 8 | $digit = 0-9 9 | $alpha = [a-zA-Z] 10 | $ws = [\ \t\n] 11 | 12 | tokens :- 13 | 14 | 5 / {\ u _ibt _l _iat -> u == FiveIsMagic} { \s -> TFive (head s) } 15 | $digit { \s -> TDigit (head s) } 16 | $alpha { \s -> TAlpha (head s) } 17 | $ws { \s -> TWSpace (head s) } 18 | 19 | { 20 | 21 | data Token = TDigit Char 22 | | TAlpha Char 23 | | TWSpace Char 24 | | TFive Char -- Predicated only 25 | | TLexError 26 | deriving (Eq,Show) 27 | 28 | data UserLexerMode = NormalMode 29 | | FiveIsMagic 30 | deriving Eq 31 | 32 | main | test1 /= result1 = exitFailure 33 | | test2 /= result2 = exitFailure 34 | -- all succeeded 35 | | otherwise = exitWith ExitSuccess 36 | 37 | run_lexer :: UserLexerMode -> String -> [Token] 38 | run_lexer m s = go ('\n', [], s) 39 | where go i@(_,_,s') = case alexScanUser m i 0 of 40 | AlexEOF -> [] 41 | AlexError _i -> [TLexError] 42 | AlexSkip i' _len -> go i' 43 | AlexToken i' len t -> t (take len s') : go i' 44 | 45 | test1 = run_lexer FiveIsMagic "5 x" 46 | result1 = [TFive '5',TWSpace ' ',TAlpha 'x'] 47 | 48 | test2 = run_lexer NormalMode "5 x" 49 | result2 = [TDigit '5',TWSpace ' ',TAlpha 'x'] 50 | } 51 | -------------------------------------------------------------------------------- /tests/tokens_strict_bytestring.x: -------------------------------------------------------------------------------- 1 | { 2 | {-# LANGUAGE OverloadedStrings #-} 3 | module Main (main) where 4 | import System.Exit 5 | import Data.ByteString.Char8 (unpack) 6 | } 7 | 8 | %wrapper "strict-bytestring" 9 | %encoding "ISO-8859-1" 10 | 11 | $digit = 0-9 -- digits 12 | $alpha = [a-zA-Z] -- alphabetic characters 13 | 14 | tokens :- 15 | 16 | $white+ ; 17 | "--".* ; 18 | let { \_ -> Let } 19 | in { \_ -> In } 20 | $digit+ { \s -> Int (read (unpack s)) } 21 | [\=\+\-\*\/\(\)] { \s -> Sym (head (unpack s)) } 22 | $alpha [$alpha $digit \_ \']* { \s -> Var (unpack s) } 23 | 24 | { 25 | -- Each right-hand side has type :: ByteString -> Token 26 | 27 | -- The token type: 28 | data Token = 29 | Let | 30 | In | 31 | Sym Char | 32 | Var String | 33 | Int Int | 34 | Err 35 | deriving (Eq,Show) 36 | 37 | main = if test1 /= result1 then exitFailure 38 | else exitWith ExitSuccess 39 | 40 | test1 = alexScanTokens " let in 012334\n=+*foo bar__'" 41 | result1 = [Let,In,Int 12334,Sym '=',Sym '+',Sym '*',Var "foo",Var "bar__'"] 42 | 43 | } 44 | -------------------------------------------------------------------------------- /tests/unicode.x: -------------------------------------------------------------------------------- 1 | { 2 | -- Tests the basic operation. 3 | module Main where 4 | 5 | import Data.Char (toUpper) 6 | import Control.Monad 7 | import System.Exit 8 | import System.IO 9 | } 10 | 11 | %wrapper "monad" 12 | 13 | @word = [A-Za-z]+ 14 | 15 | tokens :- 16 | 17 | <0> { 18 | "αω" { string } 19 | [AΓ] { character } 20 | . { other } 21 | } 22 | 23 | 24 | { 25 | string :: AlexInput -> Int -> Alex String 26 | string (_,_,_,_) _ = return "string!" 27 | 28 | other :: AlexInput -> Int -> Alex String 29 | other (_,_,_,input) len = return (take len input) 30 | 31 | character :: AlexInput -> Int -> Alex String 32 | character (_,_,_,_) _ = return "PING!" 33 | 34 | alexEOF :: Alex String 35 | alexEOF = return "stopped." 36 | 37 | scanner :: String -> Either String [String] 38 | scanner str = runAlex str $ do 39 | let loop = do tok <- alexMonadScan 40 | if tok == "stopped." || tok == "error." 41 | then return [tok] 42 | else do toks <- loop 43 | return (tok:toks) 44 | loop 45 | 46 | main :: IO () 47 | main = do 48 | let test1 = scanner str1 49 | when (test1 /= out1) $ 50 | do hPutStrLn stderr "Test 1 failed:" 51 | print test1 52 | exitFailure 53 | 54 | let test2 = scanner str2 55 | when (test2 /= out2) $ 56 | do hPutStrLn stderr "Test 2 failed:" 57 | print test2 58 | exitFailure 59 | 60 | let test3 = scanner str3 61 | when (test3 /= out3) $ 62 | do hPutStrLn stderr "Test 3 failed:" 63 | print test3 64 | exitFailure 65 | 66 | let test4 = scanner str4 67 | when (test4 /= out4) $ 68 | do hPutStrLn stderr "Test 4 failed:" 69 | print test4 70 | exitFailure 71 | 72 | 73 | 74 | str1 = "A." 75 | out1 = Right ["PING!",".","stopped."] 76 | 77 | str2 = "\n" 78 | out2 = Left "lexical error at line 1, column 1" 79 | 80 | 81 | str3 = "αω --" 82 | out3 = Right ["string!"," ","-","-","stopped."] 83 | 84 | str4 = "βΓ" 85 | out4 = Right ["β","PING!","stopped."] 86 | 87 | } 88 | --------------------------------------------------------------------------------