├── .github
    ├── haskell-ci.patch
    └── workflows
    │   ├── ci.yml
    │   ├── emulated.yml
    │   └── fix-whitespace.yml
├── .gitignore
├── .mailmap
├── .readthedocs.yaml
├── CHANGELOG.md
├── CONTRIBUTING.rst
├── LICENSE
├── Makefile
├── NOTE.txt
├── README.md
├── Setup.hs
├── TODO
├── alex.cabal
├── build-windows-dist.sh
├── cabal.project
├── data
    ├── AlexTemplate.hs
    └── AlexWrappers.hs
├── doc
    ├── .gitignore
    ├── Makefile
    ├── about.rst
    ├── api.rst
    ├── conf.py
    ├── contributing.rst
    ├── index.rst
    ├── introduction.rst
    ├── invoking.rst
    ├── make.bat
    ├── obtaining.rst
    ├── regex.rst
    ├── requirements.txt
    └── syntax.rst
├── examples
    ├── Makefile
    ├── Tokens.x
    ├── Tokens_gscan.x
    ├── Tokens_posn.x
    ├── examples.x
    ├── haskell.x
    ├── lit.x
    ├── pp.x
    ├── state.x
    ├── tiger.x
    ├── tiger
    │   └── Literals.txt
    ├── tiny.y
    ├── words.x
    ├── words_monad.x
    └── words_posn.x
├── fix-whitespace.yaml
├── make-sdist.sh
├── src
    ├── AbsSyn.hs
    ├── CharSet.hs
    ├── DFA.hs
    ├── DFAMin.hs
    ├── DFS.hs
    ├── Data
    │   ├── LICENSE.txt
    │   ├── Ranged.hs
    │   └── Ranged
    │   │   ├── Boundaries.hs
    │   │   ├── RangedSet.hs
    │   │   └── Ranges.hs
    ├── Info.hs
    ├── Main.hs
    ├── NFA.hs
    ├── Output.hs
    ├── ParseMonad.hs
    ├── Parser.y
    ├── Parser.y.boot
    ├── Scan.x
    ├── Scan.x.boot
    ├── UTF8.hs
    ├── Util.hs
    └── ghc_hooks.c
├── stack.yaml
├── test-debug.hs
├── test.hs
└── tests
    ├── Makefile
    ├── basic_typeclass.x
    ├── basic_typeclass_bytestring.x
    ├── default_typeclass.x
    ├── gscan_typeclass.x
    ├── issue_119.x
    ├── issue_141.x
    ├── issue_197.x
    ├── issue_262.x
    ├── issue_269_part1.x
    ├── issue_269_part2.x
    ├── issue_71.x
    ├── monadUserState_typeclass.x
    ├── monadUserState_typeclass_bytestring.x
    ├── monad_typeclass.x
    ├── monad_typeclass_bytestring.x
    ├── null.x
    ├── posn_typeclass.x
    ├── posn_typeclass_bytestring.x
    ├── posn_typeclass_strict_text.x
    ├── simple.x
    ├── strict_text_typeclass.x
    ├── strict_typeclass.x
    ├── tokens.x
    ├── tokens_bytestring.x
    ├── tokens_bytestring_unicode.x
    ├── tokens_gscan.x
    ├── tokens_monadUserState_bytestring.x
    ├── tokens_monadUserState_strict_text.x
    ├── tokens_monad_bytestring.x
    ├── tokens_posn.x
    ├── tokens_posn_bytestring.x
    ├── tokens_scan_user.x
    ├── tokens_strict_bytestring.x
    └── unicode.x


/.github/haskell-ci.patch:
--------------------------------------------------------------------------------
 1 | --- .github/workflows/haskell-ci.yml	2023-02-08 20:09:03.000000000 +0100
 2 | +++ .github/workflows/haskell-ci.yml-patched	2023-02-08 20:08:57.000000000 +0100
 3 | @@ -226,10 +226,23 @@
 4 |            rm -f cabal-plan.xz
 5 |            chmod a+x $HOME/.cabal/bin/cabal-plan
 6 |            cabal-plan --version
 7 | +
 8 | +      - name: install alex and happy
 9 | +        run: |
10 | +          $CABAL v2-install $ARG_COMPILER alex happy
11 | +
12 |        - name: checkout
13 |          uses: actions/checkout@v3
14 |          with:
15 |            path: source
16 | +
17 | +      - name: generate Parser.hs and Scan.hs
18 | +        run: |
19 | +          happy -agc $GITHUB_WORKSPACE/source/src/Parser.y -o $GITHUB_WORKSPACE/source/src/Parser.hs
20 | +          alex -g $GITHUB_WORKSPACE/source/src/Scan.x -o $GITHUB_WORKSPACE/source/src/Scan.hs
21 | +          mv $GITHUB_WORKSPACE/source/src/Parser.y $GITHUB_WORKSPACE/source/src/Parser.y.boot
22 | +          mv $GITHUB_WORKSPACE/source/src/Scan.x $GITHUB_WORKSPACE/source/src/Scan.x.boot
23 | +
24 |        - name: initial cabal.project for sdist
25 |          run: |
26 |            touch cabal.project
27 | @@ -275,15 +288,21 @@
28 |          run: |
29 |            $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --dependencies-only -j2 all
30 |            $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dependencies-only -j2 all
31 | -      - name: build w/o tests
32 | +
33 | +      - name: build w/o tests and install
34 |          run: |
35 | -          $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks all
36 | +          $CABAL v2-install --reinstall --overwrite-policy=always $ARG_COMPILER --disable-tests --disable-benchmarks all
37 | +
38 |        - name: build
39 |          run: |
40 |            $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --write-ghc-environment-files=always
41 |        - name: tests
42 |          run: |
43 | +          # echo "ALEX=$HOME/.cabal/bin/alex" >> "$GITHUB_ENV"
44 | +          export ALEX=$HOME/.cabal/bin/alex
45 | +          export HC=$HC
46 |            $CABAL v2-test $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --test-show-details=direct
47 | +
48 |        - name: cabal check
49 |          run: |
50 |            cd ${PKGDIR_alex} || false
51 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: Build
  2 | on:
  3 |   push:
  4 |     branches:
  5 |       - master
  6 |   pull_request:
  7 |     branches:
  8 |       - master
  9 | 
 10 | defaults:
 11 |   run:
 12 |     shell: bash
 13 | 
 14 | jobs:
 15 |   main:
 16 |     name: GHC ${{ matrix.ghc }} on ${{ matrix.os }}
 17 |     runs-on: ${{ matrix.os }}
 18 |     strategy:
 19 |       fail-fast: false
 20 |       matrix:
 21 |         os: [ubuntu-latest]
 22 |         ghc:
 23 |           - "8.0.2"
 24 |           - "8.2.2"
 25 |           - "8.4.4"
 26 |           - "8.6.5"
 27 |           - "8.8.4"
 28 |           - "8.10.7"
 29 |           - "9.0.2"
 30 |           - "9.2.8"
 31 |           - "9.4.8"
 32 |           - "9.6.7"
 33 |           - "9.8.4"
 34 |           - "9.10.1"
 35 |           - "9.12.2"
 36 |         include:
 37 |           - ghc: "9.12.2"
 38 |             os:  macos-latest
 39 |           - ghc: "9.12.2"
 40 |             os:  windows-latest
 41 | 
 42 |     steps:
 43 |     - uses: actions/checkout@v4
 44 | 
 45 |     - uses: haskell-actions/setup@v2
 46 |       id: setup
 47 |       with:
 48 |         ghc-version: ${{ matrix.ghc }}
 49 |         cabal-version: "latest"
 50 |         cabal-update: true
 51 | 
 52 |     - uses: actions/cache@v4
 53 |       name: Cache cabal stuff
 54 |       with:
 55 |         path: |
 56 |           ${{ steps.setup.outputs.cabal-store }}
 57 |           dist-newstyle
 58 |         key: ${{ runner.os }}-${{ matrix.ghc }}
 59 | 
 60 |     - name: Versions
 61 |       run: |
 62 |         cabal --version
 63 | 
 64 |     - name: Install alex & happy
 65 |       run: |
 66 |         cd ../
 67 |         cabal install alex happy
 68 | 
 69 |     - name: Unpack
 70 |       run: |
 71 |         cp src/Parser.y src/Parser.y.boot
 72 |         cp src/Scan.x src/Scan.x.boot
 73 |         cabal sdist --ignore-project --output-directory .
 74 |         cabal get alex-*.tar.gz
 75 | 
 76 |     - name: Build
 77 |       run: |
 78 |         cd alex-*/
 79 |         cabal build all --enable-tests --enable-benchmarks
 80 | 
 81 |     - name: Test
 82 |       run: |
 83 |         cd alex-*/
 84 |         ALEX="$(cabal list-bin alex)"
 85 |         export ALEX
 86 |         cabal run --enable-tests alex:test:tests
 87 | 
 88 |     - name: Test (with debugging)
 89 |       run: |
 90 |         cd alex-*/
 91 |         ALEX="$(cabal list-bin alex)"
 92 |         export ALEX
 93 |         cabal run --enable-tests alex:test:tests-debug
 94 | 
 95 |     - name: Haddock
 96 |       run: |
 97 |         cd alex-*/
 98 |         cabal haddock --disable-documentation --haddock-all all
 99 | 
100 |     - name: Cabal check
101 |       run: |
102 |         cd alex-*/
103 |         cabal check
104 | 


--------------------------------------------------------------------------------
/.github/workflows/emulated.yml:
--------------------------------------------------------------------------------
 1 | name: emulated
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - master
 6 |   pull_request:
 7 |     branches:
 8 |       - master
 9 | 
10 | defaults:
11 |   run:
12 |     shell: bash
13 | 
14 | jobs:
15 |   # Emulation is incredibly slow and memory demanding. It seems that any
16 |   # executable with GHC RTS takes at least 7-8 Gb of RAM, so we can run
17 |   # `cabal` or `ghc` on their own, but cannot run them both at the same time,
18 |   # striking out `cabal test`. Instead we rely on system packages and invoke
19 |   # `ghc --make` manually, and even so `ghc -O` is prohibitively expensive.
20 |   emulated:
21 |     runs-on: ubuntu-latest
22 |     strategy:
23 |       fail-fast: true
24 |       matrix:
25 |         arch: ['s390x', 'ppc64le']
26 |     steps:
27 |     - uses: actions/checkout@v4
28 |     - uses: uraimo/run-on-arch-action@v3
29 |       timeout-minutes: 60
30 |       with:
31 |         arch: ${{ matrix.arch }}
32 |         distro: ubuntu_latest
33 |         githubToken: ${{ github.token }}
34 |         install: |
35 |           apt-get update -y
36 |           apt-get install -y ghc alex happy
37 |         run: |
38 |           (cd src/; alex -g Scan.x; happy -ag Parser.y)
39 |           # Need to remove mention of the Cabal path module, and then substitutes
40 |           #   getDataDir  |->  return "/pwd/data/"
41 |           #   version     |->  undefined
42 |           sed -i '/^import Paths_alex.*$/d' src/Main.hs #
43 |           # The nested sed here escapes / into \/ so that the outer sed doesn't
44 |           # interpret the forward slashes. You're welcome.
45 |           sed -i "s/getDataDir/\(return \"$(pwd | sed 's/\//\\\//g')\\/data\"\)/g" src/Main.hs
46 |           sed -i "s/version/undefined/g" src/Main.hs
47 |           ghc -XHaskell2010 -XPatternSynonyms -XFlexibleContexts -XMagicHash -XCPP -XNondecreasingIndentation -XScopedTypeVariables -XTupleSections -XDeriveFunctor \
48 |               -package array -package containers -package directory \
49 |               -isrc src/Main.hs \
50 |               -o alex
51 |           ./alex -g tests/simple.x
52 |           ghc -package array tests/simple.hs -o simple
53 |           ./simple +RTS -s
54 | 


--------------------------------------------------------------------------------
/.github/workflows/fix-whitespace.yml:
--------------------------------------------------------------------------------
 1 | name: Whitespace
 2 | on:
 3 |   push:
 4 |   pull_request:
 5 | 
 6 | jobs:
 7 |   check-whitespace:
 8 |     runs-on: ubuntu-latest
 9 | 
10 |     steps:
11 |     - name: Checkout sources
12 |       uses: actions/checkout@v4
13 | 
14 |     - name: Check for whitespace violations
15 |       uses: andreasabel/fix-whitespace-action@v1
16 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.hi
 2 | *.prof
 3 | *~
 4 | *.o
 5 | *.dyn_hi
 6 | *.dyn_o
 7 | *.info
 8 | /.ghc.environment.*
 9 | /data/
10 | /dist
11 | /dist-newstyle
12 | /examples/*.alex.hs
13 | /examples/*.happy.hs
14 | /examples/*.bin
15 | /examples/*.exe
16 | /old-*/
17 | /tests/*.[dgn].hs
18 | /tests/*.[dgn].bin
19 | /tests/*.[dgn].exe
20 | .cabal-sandbox
21 | .stack-work
22 | cabal.sandbox.config
23 | /src/TAGS
24 | /TAGS
25 | /stack.yaml.lock
26 | 


--------------------------------------------------------------------------------
/.mailmap:
--------------------------------------------------------------------------------
 1 | # Fill-in/fix authorship information for migrated commits.  See
 2 | # git-shortlog(1) for details.
 3 | 
 4 | Alain Cremieux <alcremi@pobox.com>	alcremi@pobox.com <unknown>
 5 | 
 6 | Alexander Biehl <alex@scarf.sh> alexbiehl <alex.biehl@gmail.com>
 7 | 
 8 | Duncan Coutts <duncan@haskell.org>			Duncan Coutts	<duncan.coutts@worc.ox.ac.uk>
 9 | Duncan Coutts <duncan@haskell.org> <duncan@community.haskell.org>
10 | 
11 | Gwern <gwern0@gmail.com>		gwern0@gmail.com <unknown>
12 | 
13 | John Ericson <John.Ericson@Obsidian.Systems> <git@JohnEricson.me>
14 | 
15 | Mike Thomas <mthomas@gil.com.au>	mthomas		<unknown>
16 | 
17 | Neil Mitchell <ndmitchell@gmail.com>  <unknown>
18 | 
19 | Omer Agacan <omeragacan@gmail.com> osa1
20 | 
21 | Ross Paterson <ross@soi.city.ac.uk>	ross		<unknown>
22 | 
23 | Sebastian Graf <sebastian.graf@kit.edu> <sgraf1337@gmail.com>
24 | 
25 | Sergey Vinokurov <serg.foo@gmail.com> <1149355+sergv@users.noreply.github.com>
26 | 
27 | Simon Marlow <marlowsd@gmail.com>	simonmar	<unknown>
28 | Simon Marlow <marlowsd@gmail.com>			<simonmar@microsoft.com>
29 | 
30 | Sven Panne <sven.panne@aedion.de>	panne		<unknown>
31 | Sven Panne <sven.panne@aedion.de>	sven.panne@aedion.de <unknown>
32 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file for Sphinx projects
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | # Set the OS, Python version and other tools you might need
 8 | build:
 9 |   os: ubuntu-22.04
10 |   tools:
11 |     python: "3"
12 |       # Latest 3.x
13 |     # You can also specify other tool versions:
14 |     # nodejs: "19"
15 |     # rust: "1.64"
16 |     # golang: "1.19"
17 | 
18 | # Build documentation in the "doc/" directory with Sphinx
19 | sphinx:
20 |    configuration: doc/conf.py
21 | 
22 | # Optionally build your docs in additional formats such as PDF and ePub
23 | # formats:
24 | #    - pdf
25 | #    - epub
26 | 
27 | # Optional but recommended, declare the Python requirements required
28 | # to build your documentation
29 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
30 | python:
31 |    install:
32 |    - requirements: doc/requirements.txt
33 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
1 | doc/contributing.rst


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 1995-2011, Chris Dornan and Simon Marlow
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are
 6 | met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 | 
11 |     * Redistributions in binary form must reproduce the above
12 |       copyright notice, this list of conditions and the following
13 |       disclaimer in the documentation and/or other materials provided
14 |       with the distribution.
15 | 
16 |     * Neither the name of the copyright holders, nor the names of the
17 |       contributors may be used to endorse or promote products derived
18 |       from this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CABAL = cabal
 2 | 
 3 | HAPPY = happy
 4 | HAPPY_OPTS = -agc
 5 | 
 6 | ALEX = alex
 7 | ALEX_OPTS = -g
 8 | ALEX_VER = `awk '/^version:/ { print $$2 }' alex.cabal`
 9 | 
10 | SDIST_DIR=dist-newstyle/sdist
11 | 
12 | sdist ::
13 | 	@case "`$(CABAL) --numeric-version`" in \
14 | 		2.[2-9].* | [3-9].* ) ;; \
15 | 		* ) echo "Error: needs cabal 2.2.0.0 or later (but got : `$(CABAL) --numeric-version`)" ; exit 1 ;; \
16 | 	esac
17 | 	@if [ "`git status -s`" != '' ]; then \
18 | 		echo "Error: Tree is not clean"; \
19 | 		exit 1; \
20 | 	fi
21 | 	$(HAPPY) $(HAPPY_OPTS) src/Parser.y -o src/Parser.hs
22 | 	$(ALEX) $(ALEX_OPTS) src/Scan.x -o src/Scan.hs
23 | 	mv src/Parser.y src/Parser.y.boot
24 | 	mv src/Scan.x src/Scan.x.boot
25 | 	$(CABAL) v2-sdist
26 | 	@if [ ! -f "${SDIST_DIR}/alex-$(ALEX_VER).tar.gz" ]; then \
27 | 		echo "Error: source tarball not found: dist/alex-$(ALEX_VER).tar.gz"; \
28 | 		exit 1; \
29 | 	fi
30 | 	git checkout .
31 | 	git clean -f
32 | 
33 | sdist-test :: sdist sdist-test-only
34 | 	@rm -rf "${SDIST_DIR}/alex-${ALEX_VER}/"
35 | 
36 | sdist-test-only ::
37 | 	@if [ ! -f "${SDIST_DIR}/alex-$(ALEX_VER).tar.gz" ]; then \
38 | 		echo "Error: source tarball not found: ${SDIST_DIR}/alex-$(ALEX_VER).tar.gz"; \
39 | 		exit 1; \
40 | 	fi
41 | 	rm -rf "${SDIST_DIR}/alex-$(ALEX_VER)/"
42 | 	tar -xf "${SDIST_DIR}/alex-$(ALEX_VER).tar.gz" -C ${SDIST_DIR}/
43 | 	echo "packages: ." > "${SDIST_DIR}/alex-$(ALEX_VER)/cabal.project"
44 | 	cd "${SDIST_DIR}/alex-$(ALEX_VER)/" && cabal v2-test --enable-tests all
45 | 	@echo ""
46 | 	@echo "Success! ${SDIST_DIR}/alex-$(ALEX_VER).tar.gz is ready for distribution!"
47 | 	@echo ""
48 | 


--------------------------------------------------------------------------------
/NOTE.txt:
--------------------------------------------------------------------------------
 1 | Note:
 2 | 
 3 | The contents of package Ranged-sets-0.3.0 has been copied into this
 4 | package, in order to allow it to be part of the Haskell Platform,
 5 | without introducing additional dependencies.
 6 | 
 7 | The original license agreement has been included in the src/Data
 8 | subdirectory, as required by the package source.
 9 | 
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Alex: A Lexical Analyser Generator
 2 | 
 3 | [![CI](https://github.com/haskell/alex/actions/workflows/ci.yml/badge.svg)](https://github.com/haskell/alex/actions/workflows/ci.yml)
 4 | 
 5 | Alex is a tool for generating lexical analysers, also known as "lexers" and "scanners", in Haskell.
 6 | The lexical analysers implement a description of the tokens to be recognised in the form of regular expressions.
 7 | It is similar to the tools "lex" and "flex" for C/C++.
 8 | 
 9 | Share and enjoy!
10 | 
11 | ## Documentation
12 | 
13 | Documentation is hosted on [Read the Docs](https://haskell-alex.readthedocs.io):
14 | 
15 | - [Online (HTML)](https://haskell-alex.readthedocs.io)
16 | - [PDF](https://haskell-alex.readthedocs.io/_/downloads/en/latest/pdf/)
17 | - [Downloadable HTML](https://haskell-alex.readthedocs.io/_/downloads/en/latest/htmlzip/)
18 | 
19 | For basic information of the sort typically found in a read-me, see the following sections of the docs:
20 | 
21 | - [About Alex](https://haskell-alex.readthedocs.io/en/latest/about.html)
22 | - [Obtaining Alex](https://haskell-alex.readthedocs.io/en/latest/obtaining.html)
23 | - [Contributing](https://haskell-alex.readthedocs.io/en/latest/contributing.html)
24 | 


--------------------------------------------------------------------------------
/Setup.hs:
--------------------------------------------------------------------------------
1 | import Distribution.Simple
2 | main = defaultMain
3 | 


--------------------------------------------------------------------------------
/TODO:
--------------------------------------------------------------------------------
 1 | - Option for pure Haskell 98 output?
 2 | 	- maybe Haskell 2010 at this point?
 3 | 	- how about an option to use Data.Array.Unboxed?
 4 | 
 5 | - Put in {-# LINE #-} pragmas for token actions
 6 | 
 7 | - Prune states that aren't reachable?
 8 | 
 9 | - Issue a warning for tokens that can't be generated?
10 | 
11 | - Info file?
12 | 	- start codes
13 | 	- accepting states
14 | 
15 | - More compact lexer table encoding:
16 | 	- equivalence classes?
17 | 
18 | - Improve performance of Alex itself
19 | 
20 | - AlexEOF doesn't provide a way to get at the text position of the EOF.
21 | 
22 | - Allow user-defined wrappers?  Wrappers in files relative to the
23 |   current directory, for example?
24 | 
25 | - case-insensitivity option (like flex's -i).
26 | 
27 | 


--------------------------------------------------------------------------------
/alex.cabal:
--------------------------------------------------------------------------------
  1 | cabal-version: >= 1.10
  2 | name: alex
  3 | version: 3.5.3.0
  4 | -- don't forget updating changelog.md!
  5 | license: BSD3
  6 | license-file: LICENSE
  7 | copyright: (c) Chis Dornan, Simon Marlow
  8 | author: Chris Dornan and Simon Marlow
  9 | maintainer: https://github.com/haskell/alex
 10 | bug-reports: https://github.com/haskell/alex/issues
 11 | stability: stable
 12 | homepage: http://www.haskell.org/alex/
 13 | synopsis: Alex is a tool for generating lexical analysers in Haskell
 14 | description:
 15 |   Alex is a tool for generating lexical analysers in Haskell.
 16 |   It takes a description of tokens based on regular
 17 |   expressions and generates a Haskell module containing code
 18 |   for scanning text efficiently. It is similar to the tool
 19 |   lex or flex for C/C++.
 20 | 
 21 | category: Development
 22 | build-type: Simple
 23 | 
 24 | tested-with:
 25 |         GHC == 9.12.2
 26 |         GHC == 9.10.1
 27 |         GHC == 9.8.4
 28 |         GHC == 9.6.7
 29 |         GHC == 9.4.8
 30 |         GHC == 9.2.8
 31 |         GHC == 9.0.2
 32 |         GHC == 8.10.7
 33 |         GHC == 8.8.4
 34 |         GHC == 8.6.5
 35 |         GHC == 8.4.4
 36 |         GHC == 8.2.2
 37 |         GHC == 8.0.2
 38 | 
 39 | data-dir: data/
 40 | 
 41 | data-files:
 42 |         AlexTemplate.hs
 43 |         AlexWrappers.hs
 44 | 
 45 | extra-source-files:
 46 |         CHANGELOG.md
 47 |         README.md
 48 |         examples/Makefile
 49 |         examples/Tokens.x
 50 |         examples/Tokens_gscan.x
 51 |         examples/Tokens_posn.x
 52 |         examples/examples.x
 53 |         examples/haskell.x
 54 |         examples/lit.x
 55 |         examples/pp.x
 56 |         examples/state.x
 57 |         examples/tiny.y
 58 |         examples/words.x
 59 |         examples/words_monad.x
 60 |         examples/words_posn.x
 61 |         src/Parser.y.boot
 62 |         src/Scan.x.boot
 63 |         src/ghc_hooks.c
 64 |         tests/Makefile
 65 |         tests/simple.x
 66 |         tests/null.x
 67 |         tests/tokens.x
 68 |         tests/tokens_gscan.x
 69 |         tests/tokens_posn.x
 70 |         tests/tokens_bytestring.x
 71 |         tests/tokens_posn_bytestring.x
 72 |         tests/tokens_scan_user.x
 73 |         tests/tokens_strict_bytestring.x
 74 |         tests/tokens_monad_bytestring.x
 75 |         tests/tokens_monadUserState_bytestring.x
 76 |         tests/tokens_bytestring_unicode.x
 77 |         tests/basic_typeclass.x
 78 |         tests/basic_typeclass_bytestring.x
 79 |         tests/default_typeclass.x
 80 |         tests/gscan_typeclass.x
 81 |         tests/posn_typeclass.x
 82 |         tests/monad_typeclass.x
 83 |         tests/monad_typeclass_bytestring.x
 84 |         tests/monadUserState_typeclass.x
 85 |         tests/monadUserState_typeclass_bytestring.x
 86 |         tests/posn_typeclass_bytestring.x
 87 |         tests/strict_typeclass.x
 88 |         tests/unicode.x
 89 |         tests/issue_71.x
 90 |         tests/issue_119.x
 91 |         tests/issue_141.x
 92 |         tests/issue_197.x
 93 |         tests/issue_262.x
 94 |         tests/issue_269_part1.x
 95 |         tests/issue_269_part2.x
 96 |         tests/strict_text_typeclass.x
 97 |         tests/posn_typeclass_strict_text.x
 98 |         tests/tokens_monadUserState_strict_text.x
 99 | 
100 | source-repository head
101 |     type:     git
102 |     location: https://github.com/haskell/alex.git
103 | 
104 | executable alex
105 |   hs-source-dirs: src
106 |   main-is: Main.hs
107 | 
108 |   build-depends:
109 |       base >= 4.9 && < 5
110 |         -- Data.List.NonEmpty enters `base` at 4.9
111 |     , array
112 |     , containers
113 |     , directory
114 | 
115 |   default-language:
116 |     Haskell2010
117 |   default-extensions:
118 |     DeriveFunctor
119 |     PatternSynonyms
120 |     ScopedTypeVariables
121 |     TupleSections
122 |   other-extensions:
123 |     CPP
124 |     FlexibleContexts
125 |     MagicHash
126 |     NondecreasingIndentation
127 |     OverloadedLists
128 |   ghc-options: -Wall -Wcompat -rtsopts
129 | 
130 |   other-modules:
131 |     AbsSyn
132 |     CharSet
133 |     DFA
134 |     DFAMin
135 |     DFS
136 |     Info
137 |     NFA
138 |     Output
139 |     Paths_alex
140 |     Parser
141 |     ParseMonad
142 |     Scan
143 |     Util
144 |     UTF8
145 |     Data.Ranged
146 |     Data.Ranged.Boundaries
147 |     Data.Ranged.RangedSet
148 |     Data.Ranged.Ranges
149 | 
150 | test-suite tests
151 |   type: exitcode-stdio-1.0
152 |   main-is: test.hs
153 |   -- This line is important as it ensures that the local `exe:alex` component declared above is built before the test-suite component is invoked, as well as making sure that `alex` is made available on $PATH and `$alex_datadir` is set accordingly before invoking `test.hs`
154 |   build-tools: alex
155 | 
156 |   default-language: Haskell2010
157 | 
158 |   build-depends:
159 |       base < 5
160 |     , process
161 | 
162 | test-suite tests-debug
163 |   type: exitcode-stdio-1.0
164 |   main-is: test-debug.hs
165 |   -- This line is important as it ensures that the local `exe:alex` component declared above is built before the test-suite component is invoked, as well as making sure that `alex` is made available on $PATH and `$alex_datadir` is set accordingly before invoking `test.hs`
166 |   build-tools: alex
167 | 
168 |   default-language: Haskell2010
169 | 
170 |   build-depends:
171 |       base < 5
172 |     , process
173 | 


--------------------------------------------------------------------------------
/build-windows-dist.sh:
--------------------------------------------------------------------------------
 1 | # mini script for building the relocatable Windows binary distribution.
 2 | #
 3 | #    sh build-windows-dist.sh
 4 | #
 5 | # NB. the Cabal that shipped with GHC 6.6 isn't enough for this, because it
 6 | # is missing this patch:
 7 | # 
 8 | # Fri Oct 13 11:09:41 BST 2006  Simon Marlow <simonmar@microsoft.com>
 9 | #   * Fix getDataDir etc. when bindir=$prefix
10 | #
11 | # So you need to use a more recent Cabal.  GHC 6.6 is fine for building the
12 | # package, though.
13 | 
14 | ghc --make Setup
15 | ./Setup configure --prefix=`pwd`/install --bindir='$prefix' --libdir='$prefix' --datadir='$prefix'
16 | ./Setup build
17 | ./Setup install
18 | echo Now zip up `pwd`/install as "alex-<version>-Win32.zip"
19 | 


--------------------------------------------------------------------------------
/cabal.project:
--------------------------------------------------------------------------------
1 | packages:
2 |   ./
3 | 


--------------------------------------------------------------------------------
/data/AlexTemplate.hs:
--------------------------------------------------------------------------------
  1 | -- -----------------------------------------------------------------------------
  2 | -- ALEX TEMPLATE
  3 | --
  4 | -- This code is in the PUBLIC DOMAIN; you may copy it freely and use
  5 | -- it for any purpose whatsoever.
  6 | 
  7 | -- -----------------------------------------------------------------------------
  8 | -- INTERNALS and main scanner engine
  9 | 
 10 | #ifdef ALEX_GHC
 11 | #  define ILIT(n) n#
 12 | #  define IBOX(n) (I# (n))
 13 | #  define FAST_INT Int#
 14 | -- Do not remove this comment. Required to fix CPP parsing when using GCC and a clang-compiled alex.
 15 | #  if __GLASGOW_HASKELL__ > 706
 16 | #    define GTE(n,m) (GHC.Exts.tagToEnum# (n >=# m))
 17 | #    define EQ(n,m) (GHC.Exts.tagToEnum# (n ==# m))
 18 | #  else
 19 | #    define GTE(n,m) (n >=# m)
 20 | #    define EQ(n,m) (n ==# m)
 21 | #  endif
 22 | #  define PLUS(n,m) (n +# m)
 23 | #  define MINUS(n,m) (n -# m)
 24 | #  define TIMES(n,m) (n *# m)
 25 | #  define NEGATE(n) (negateInt# (n))
 26 | #  define IF_GHC(x) (x)
 27 | #else
 28 | #  define ILIT(n) (n)
 29 | #  define IBOX(n) (n)
 30 | #  define FAST_INT Int
 31 | #  define GTE(n,m) (n >= m)
 32 | #  define EQ(n,m) (n == m)
 33 | #  define PLUS(n,m) (n + m)
 34 | #  define MINUS(n,m) (n - m)
 35 | #  define TIMES(n,m) (n * m)
 36 | #  define NEGATE(n) (negate (n))
 37 | #  define IF_GHC(x)
 38 | #endif
 39 | 
 40 | #ifdef ALEX_GHC
 41 | data AlexAddr = AlexA# Addr#
 42 | -- Do not remove this comment. Required to fix CPP parsing when using GCC and a clang-compiled alex.
 43 | 
 44 | {-# INLINE alexIndexInt16OffAddr #-}
 45 | alexIndexInt16OffAddr :: AlexAddr -> Int# -> Int#
 46 | alexIndexInt16OffAddr (AlexA# arr) off =
 47 | #if __GLASGOW_HASKELL__ >= 901
 48 |   GHC.Exts.int16ToInt# -- qualified import because it doesn't exist on older GHC's
 49 | #endif
 50 | #ifdef WORDS_BIGENDIAN
 51 |   (GHC.Exts.word16ToInt16# (GHC.Exts.wordToWord16# (GHC.Exts.byteSwap16# (GHC.Exts.word16ToWord# (GHC.Exts.int16ToWord16#
 52 | #endif
 53 |   (indexInt16OffAddr# arr off)
 54 | #ifdef WORDS_BIGENDIAN
 55 |   )))))
 56 | #endif
 57 | #else
 58 | alexIndexInt16OffAddr = (Data.Array.!)
 59 | #endif
 60 | 
 61 | #ifdef ALEX_GHC
 62 | {-# INLINE alexIndexInt32OffAddr #-}
 63 | alexIndexInt32OffAddr :: AlexAddr -> Int# -> Int#
 64 | alexIndexInt32OffAddr (AlexA# arr) off =
 65 | #if __GLASGOW_HASKELL__ >= 901
 66 |   GHC.Exts.int32ToInt# -- qualified import because it doesn't exist on older GHC's
 67 | #endif
 68 | #ifdef WORDS_BIGENDIAN
 69 |   (GHC.Exts.word32ToInt32# (GHC.Exts.wordToWord32# (GHC.Exts.byteSwap32# (GHC.Exts.word32ToWord# (GHC.Exts.int32ToWord32#
 70 | #endif
 71 |   (indexInt32OffAddr# arr off)
 72 | #ifdef WORDS_BIGENDIAN
 73 |   )))))
 74 | #endif
 75 | #else
 76 | alexIndexInt32OffAddr = (Data.Array.!)
 77 | #endif
 78 | 
 79 | #ifdef ALEX_GHC
 80 | -- GHC >= 503, unsafeAt is available from Data.Array.Base.
 81 | quickIndex = unsafeAt
 82 | #else
 83 | quickIndex = (Data.Array.!)
 84 | #endif
 85 | 
 86 | -- -----------------------------------------------------------------------------
 87 | -- Main lexing routines
 88 | 
 89 | data AlexReturn a
 90 |   = AlexEOF
 91 |   | AlexError  !AlexInput
 92 |   | AlexSkip   !AlexInput !Int
 93 |   | AlexToken  !AlexInput !Int a
 94 | 
 95 | -- alexScan :: AlexInput -> StartCode -> AlexReturn a
 96 | alexScan input__ IBOX(sc)
 97 |   = alexScanUser (error "alex rule requiring context was invoked by alexScan; use alexScanUser instead?") input__ IBOX(sc)
 98 | 
 99 | -- If the generated alexScan/alexScanUser functions are called multiple times
100 | -- in the same file, alexScanUser gets broken out into a separate function and
101 | -- increases memory usage. Make sure GHC inlines this function and optimizes it.
102 | {-# INLINE alexScanUser #-}
103 | 
104 | alexScanUser user__ input__ IBOX(sc)
105 |   = case alex_scan_tkn user__ input__ ILIT(0) input__ sc AlexNone of
106 |   (AlexNone, input__') ->
107 |     case alexGetByte input__ of
108 |       Nothing ->
109 | #ifdef ALEX_DEBUG
110 |                                    Debug.Trace.trace ("End of input.") $
111 | #endif
112 |                                    AlexEOF
113 |       Just _ ->
114 | #ifdef ALEX_DEBUG
115 |                                    Debug.Trace.trace ("Error.") $
116 | #endif
117 |                                    AlexError input__'
118 | 
119 |   (AlexLastSkip input__'' len, _) ->
120 | #ifdef ALEX_DEBUG
121 |     Debug.Trace.trace ("Skipping.") $
122 | #endif
123 |     AlexSkip input__'' len
124 | 
125 |   (AlexLastAcc k input__''' len, _) ->
126 | #ifdef ALEX_DEBUG
127 |     Debug.Trace.trace ("Accept.") $
128 | #endif
129 |     AlexToken input__''' len ((Data.Array.!) alex_actions k)
130 | 
131 | 
132 | -- Push the input through the DFA, remembering the most recent accepting
133 | -- state it encountered.
134 | 
135 | alex_scan_tkn user__ orig_input len input__ s last_acc =
136 |   input__ `seq` -- strict in the input
137 |   let
138 |   new_acc = (check_accs (alex_accept `quickIndex` IBOX(s)))
139 |   in
140 |   new_acc `seq`
141 |   case alexGetByte input__ of
142 |      Nothing -> (new_acc, input__)
143 |      Just (c, new_input) ->
144 | #ifdef ALEX_DEBUG
145 |       Debug.Trace.trace ("State: " ++ show IBOX(s) ++ ", char: " ++ show c ++ " " ++ (show . chr . fromIntegral) c) $
146 | #endif
147 |       case fromIntegral c of { IBOX(ord_c) ->
148 |         let
149 |                 base   = alexIndexInt32OffAddr alex_base s
150 |                 offset = PLUS(base,ord_c)
151 | 
152 |                 new_s = if GTE(offset,ILIT(0))
153 |                           && let check  = alexIndexInt16OffAddr alex_check offset
154 |                              in  EQ(check,ord_c)
155 |                           then alexIndexInt16OffAddr alex_table offset
156 |                           else alexIndexInt16OffAddr alex_deflt s
157 |         in
158 |         case new_s of
159 |             ILIT(-1) -> (new_acc, input__)
160 |                 -- on an error, we want to keep the input *before* the
161 |                 -- character that failed, not after.
162 |             _ -> alex_scan_tkn user__ orig_input
163 | #ifdef ALEX_LATIN1
164 |                    PLUS(len,ILIT(1))
165 |                    -- issue 119: in the latin1 encoding, *each* byte is one character
166 | #else
167 |                    (if c < 0x80 || c >= 0xC0 then PLUS(len,ILIT(1)) else len)
168 |                    -- note that the length is increased ONLY if this is the 1st byte in a char encoding)
169 | #endif
170 |                    new_input new_s new_acc
171 |       }
172 |   where
173 |         check_accs (AlexAccNone) = last_acc
174 |         check_accs (AlexAcc a  ) = AlexLastAcc a input__ IBOX(len)
175 |         check_accs (AlexAccSkip) = AlexLastSkip  input__ IBOX(len)
176 | #ifndef ALEX_NOPRED
177 |         check_accs (AlexAccPred a predx rest)
178 |            | predx user__ orig_input IBOX(len) input__
179 |            = AlexLastAcc a input__ IBOX(len)
180 |            | otherwise
181 |            = check_accs rest
182 |         check_accs (AlexAccSkipPred predx rest)
183 |            | predx user__ orig_input IBOX(len) input__
184 |            = AlexLastSkip input__ IBOX(len)
185 |            | otherwise
186 |            = check_accs rest
187 | #endif
188 | 
189 | data AlexLastAcc
190 |   = AlexNone
191 |   | AlexLastAcc !Int !AlexInput !Int
192 |   | AlexLastSkip     !AlexInput !Int
193 | 
194 | data AlexAcc user
195 |   = AlexAccNone
196 |   | AlexAcc Int
197 |   | AlexAccSkip
198 | #ifndef ALEX_NOPRED
199 |   | AlexAccPred Int (AlexAccPred user) (AlexAcc user)
200 |   | AlexAccSkipPred (AlexAccPred user) (AlexAcc user)
201 | 
202 | type AlexAccPred user = user -> AlexInput -> Int -> AlexInput -> Bool
203 | 
204 | -- -----------------------------------------------------------------------------
205 | -- Predicates on a rule
206 | 
207 | alexAndPred p1 p2 user__ in1 len in2
208 |   = p1 user__ in1 len in2 && p2 user__ in1 len in2
209 | 
210 | --alexPrevCharIsPred :: Char -> AlexAccPred _
211 | alexPrevCharIs c _ input__ _ _ = c == alexInputPrevChar input__
212 | 
213 | alexPrevCharMatches f _ input__ _ _ = f (alexInputPrevChar input__)
214 | 
215 | --alexPrevCharIsOneOfPred :: Array Char Bool -> AlexAccPred _
216 | alexPrevCharIsOneOf arr _ input__ _ _ = arr Data.Array.! alexInputPrevChar input__
217 | 
218 | --alexRightContext :: Int -> AlexAccPred _
219 | alexRightContext IBOX(sc) user__ _ _ input__ =
220 |      case alex_scan_tkn user__ input__ ILIT(0) input__ sc AlexNone of
221 |           (AlexNone, _) -> False
222 |           _ -> True
223 |         -- TODO: there's no need to find the longest
224 |         -- match when checking the right context, just
225 |         -- the first match will do.
226 | #endif
227 | 


--------------------------------------------------------------------------------
/doc/.gitignore:
--------------------------------------------------------------------------------
1 | _build/
2 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Flag -n ("nitpick") warns about broken references
12 | # Flag -W turns warnings into errors
13 | # Flag --keep-going continues after errors
14 | SPHINXOPTS   := -n -W --keep-going -E
15 | 
16 | .PHONY: help html Makefile
17 | 
18 | # default goal, first
19 | html: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 
22 | help:
23 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
24 | 
25 | # Catch-all target: route all unknown targets to Sphinx using the new
26 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
27 | %: Makefile
28 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
29 | 


--------------------------------------------------------------------------------
/doc/about.rst:
--------------------------------------------------------------------------------
 1 | .. _about:
 2 | 
 3 | About Alex
 4 | ==========
 5 | 
 6 | Alex can always be obtained from its `home page <https://www.haskell.org/alex>`__.
 7 | The latest source code lives in the `git repository <https://github.com/haskell/alex>`__ on ``GitHub``.
 8 | 
 9 | Releases
10 | --------
11 | 
12 | Releases of Alex are published on `Hackage <https://hackage.haskell.org/package/alex>`__.
13 | They are also given `Git tags <https://github.com/haskell/alex/tags>`__ in the repository.
14 | 
15 | .. _bug-reports:
16 | 
17 | Reporting issues with Alex
18 | --------------------------
19 | 
20 | Please report bugs on the `Alex issue tracker <https://github.com/haskell/alex/issues>`__.
21 | There are no specific mailing lists for the discussion of Alex-related matters,
22 | but such topics should be fine on the `Haskell Cafe <https://www.haskell.org/mailman/listinfo/haskell-cafe>`__ mailing list.
23 | 
24 | License
25 | -------
26 | 
27 | Copyright (c) 1995-2011, Chris Dornan and Simon Marlow.
28 | All rights reserved.
29 | 
30 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
31 | 
32 | -  Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
33 | 
34 | -  Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the
35 |    documentation and/or other materials provided with the distribution.
36 | 
37 | -  Neither the name of the copyright holders, nor the names of the contributors may be used to endorse or promote products derived from this software without specific prior written permission.
38 | 
39 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR APARTICULAR PURPOSE ARE DISCLAIMED.
40 | IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
41 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
42 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 | 
44 | Acknowledgments
45 | ---------------
46 | 
47 | Original authors
48 | ~~~~~~~~~~~~~~~~
49 | 
50 | - Chris Dornan:  cdornan@arm.com
51 | 
52 | - Isaac Jones:   ijones@syntaxpolice.org
53 | 
54 | - Simon Marlow:  simonmar@microsoft.com
55 | 
56 | Current Maintainers
57 | ~~~~~~~~~~~~~~~~~~~
58 | 
59 | - Andreas Abel (@andreasabel)
60 | 
61 | - John Ericson (@Ericson2314)
62 | 
63 | - Simon Marlow (@simonmar)
64 | 
65 | Other contributors
66 | ~~~~~~~~~~~~~~~~~~
67 | 
68 | The data is in the Git history.
69 | GitHub can render that in `various ways <https://github.com/haskell/alex/graphs/contributors>`__.
70 | 
71 | The documentation itself
72 | ~~~~~~~~~~~~~~~~~~~~~~~~
73 | 
74 | This documentation is based the original documentation written by Alex's original authors using DocBook.
75 | 
76 | It was converted to reStructuredText / Sphinx / readthedocs.org by Andreas Abel in February 2022.
77 | 


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | project = 'Alex'
21 | copyright = '2022, Simon Marlow and the Alex developers'
22 | author = 'Simon Marlow and the Alex developers'
23 | 
24 | 
25 | # -- General configuration ---------------------------------------------------
26 | 
27 | # Add any Sphinx extension module names here, as strings. They can be
28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
29 | # ones.
30 | extensions = [
31 | ]
32 | 
33 | # Add any paths that contain templates here, relative to this directory.
34 | templates_path = ['_templates']
35 | 
36 | # List of patterns, relative to source directory, that match files and
37 | # directories to ignore when looking for source files.
38 | # This pattern also affects html_static_path and html_extra_path.
39 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
40 | 
41 | 
42 | # -- Options for HTML output -------------------------------------------------
43 | 
44 | # The theme to use for HTML and HTML Help pages.  See the documentation for
45 | # a list of builtin themes.
46 | #
47 | html_theme = 'sphinx_rtd_theme'
48 | 
49 | # Add any paths that contain custom static files (such as style sheets) here,
50 | # relative to this directory. They are copied after the builtin static files,
51 | # so a file named "default.css" will overwrite the builtin "default.css".
52 | html_static_path = ['_static']
53 | 
54 | # The name of the Pygments (syntax highlighting) style to use.
55 | pygments_style = 'sphinx'
56 | highlight_language = 'Peg'
57 | # Andreas Abel, 2022-02-27: Peg looked best from the "grammar" highlighters.
58 | # I also tried 'Bnf' and 'Abnf'.
59 | 


--------------------------------------------------------------------------------
/doc/contributing.rst:
--------------------------------------------------------------------------------
 1 | .. _contributing:
 2 | 
 3 | Contributing to Alex
 4 | ====================
 5 | 
 6 | .. highlight:: bash
 7 | 
 8 | Source Code Repository
 9 | ----------------------
10 | 
11 | Alex is hosted on `GitHub <https://github.com/haskell/alex>`__.
12 | As previously discussed in :ref:`bug-reports`, we use the built-in `GitHub issue tracker <https://github.com/haskell/alex/issues>`__ for Alex.
13 | We also use `GitHub pull requests <https://github.com/haskell/alex/pulls>`__ for managing changes;
14 | feel free to submit them!
15 | 
16 | Repo Layout
17 | -----------
18 | 
19 | - ``src``: The source code for Alex itself
20 | - ``doc``: The documentation
21 | - ``examples``: Various examples of using Alex
22 | 
23 | Contributor Build Instructions
24 | ------------------------------
25 | 
26 | Alex is built using `GHC <https://www.haskell.org/ghc>`__ and
27 | `Cabal Install <https://www.haskell.org/cabal>`__ (version 2.0 or later).
28 | Make sure they are already installed first.
29 | 
30 | Since Alex itself is implemented in terms of an Alex scanner,
31 | bootstrapping Alex is a bit tricky:
32 | 
33 | You need to have the build-tools ``alex`` and ``happy`` manually installed;
34 | either via your system package manager distribution, the Haskell Platform, or e.g. via (run this outside the Git repository!)::
35 | 
36 |    $ cabal install alex happy
37 | 
38 | which installs them into ``${HOME}/.cabal/bin`` by default.
39 | (make sure they are in your ``$PATH`` for the next steps!)
40 | 
41 | Variant A
42 | ~~~~~~~~~
43 | 
44 | You can install ``alex`` simply by invoking::
45 | 
46 |    $ cabal install
47 | 
48 | from inside the Git folder.
49 | 
50 | Variant B
51 | ~~~~~~~~~
52 | 
53 | Alternatively, you can use the ``Makefile`` which automates the steps of producing a self-contained pre-bootstrapped source distribution with pre-generated lexer/scanners::
54 | 
55 |    $ make sdist
56 |    $ cabal install dist/alex-*.tar.gz
57 | 
58 | For convenience, there is also a ``make sdist-test`` target which builds the source source tarball and runs the test-suite from within the source dist.
59 | 


--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
 1 | ===============
 2 | Alex User Guide
 3 | ===============
 4 | 
 5 | Alex is a tool for generating lexical analysers, also known as "lexers" and "scanners", in Haskell.
 6 | The lexical analysers implement a description of the tokens to be recognised in the form of regular expressions.
 7 | It is similar to the tools "lex" and "flex" for C/C++.
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Contents
12 | 
13 |    about
14 |    obtaining
15 |    introduction
16 |    syntax
17 |    regex
18 |    api
19 |    invoking
20 |    contributing
21 | 
22 | 
23 | 
24 | Indices and tables
25 | ==================
26 | 
27 | * :ref:`genindex`
28 | * :ref:`search`
29 | 


--------------------------------------------------------------------------------
/doc/introduction.rst:
--------------------------------------------------------------------------------
  1 | Introduction
  2 | ============
  3 | 
  4 | Alex is a tool for generating lexical analysers, also known as "lexers" and "scanners", in Haskell.
  5 | The lexical analysers implement a description of the tokens to be recognised in the form of regular expressions.
  6 | It is similar to the tools "lex" and "flex" for C/C++.
  7 | 
  8 | Alex takes a description of tokens based on regular expressions and generates a Haskell module containing code for scanning text
  9 | efficiently.
 10 | Alex is designed to be familiar to existing lex users,
 11 | although it does depart from lex in a number of ways.
 12 | 
 13 | A sample specification would be the following:
 14 | 
 15 | .. code-block:: none
 16 | 
 17 |    {
 18 |    module Main (main) where
 19 |    }
 20 | 
 21 |    %wrapper "basic"
 22 | 
 23 |    $digit = 0-9            -- digits
 24 |    $alpha = [a-zA-Z]       -- alphabetic characters
 25 | 
 26 |    tokens :-
 27 | 
 28 |      $white+                        ;
 29 |      "--".*                         ;
 30 |      let                            { \s -> Let }
 31 |      in                             { \s -> In }
 32 |      $digit+                        { \s -> Int (read s) }
 33 |      [\=\+\-\*\/\(\)]               { \s -> Sym (head s) }
 34 |      $alpha [$alpha $digit \_ \']*  { \s -> Var s }
 35 | 
 36 |    {
 37 |    -- Each action has type :: String -> Token
 38 | 
 39 |    -- The token type:
 40 |    data Token
 41 |      = Let
 42 |      | In
 43 |      | Sym Char
 44 |      | Var String
 45 |      | Int Int
 46 |      deriving (Eq, Show)
 47 | 
 48 |    main = do
 49 |      s <- getContents
 50 |      print (alexScanTokens s)
 51 |    }
 52 | 
 53 | The first few lines between the ``{`` and ``}`` provide a code scrap (some inlined Haskell code) to be placed directly in the output,
 54 | the scrap at the top of the module is normally used to declare the module name for the generated Haskell module, in this case ``Main``.
 55 | 
 56 | The next line, ``%wrapper "basic"`` controls what kind of support code Alex should produce along with the basic scanner.
 57 | The ``basic`` wrapper selects a scanner that tokenises a ``String`` and returns a list of tokens.
 58 | Wrappers are described fully in :ref:`The Interface to an Alex-generated lexer <api>`.
 59 | 
 60 | The next two lines define the ``$digit`` and ``$alpha`` macros for use in the token definitions.
 61 | 
 62 | The ‘\ ``tokens :-``\ ’ line ends the macro definitions and starts the definition of the scanner.
 63 | 
 64 | The scanner is specified as a series of token definitions where each token specification takes the form of
 65 | 
 66 | ::
 67 | 
 68 |    regexp   { code }
 69 | 
 70 | The meaning of this rule is
 71 | "if the input matches <regexp>, then return <code>".
 72 | The code part along with the braces can be replaced by simply ‘\ ``;``\ ’,
 73 | meaning that this token should be ignored in the input stream.
 74 | As you can see, we've used this to ignore whitespace in our example.
 75 | 
 76 | Our scanner is set up so that the actions are all functions with type ``String->Token``.
 77 | When the token is matched, the portion of the input stream that it matched is passed to the appropriate action function as a ``String``.
 78 | 
 79 | At the bottom of the file we have another code fragment, surrounded by braces ``{ ... }``.
 80 | In this fragment, we declare the type of the tokens, and give a ``main`` function that we can use for testing it;
 81 | the ``main`` function just tokenises the input and prints the results to standard output.
 82 | 
 83 | Alex has kindly provided the following function which we can use to invoke the scanner:
 84 | 
 85 | .. code-block:: haskell
 86 | 
 87 |    alexScanTokens :: String -> [Token]
 88 | 
 89 | Alex arranges for the input stream to be tokenised,
 90 | each of the action functions to be passed the appropriate ``String``,
 91 | and a list of ``Token``\ s returned as the result.
 92 | If the input stream is lazy, the output stream will also be produced lazily [1]_.
 93 | 
 94 | We have demonstrated the simplest form of scanner here,
 95 | which was selected by the ``%wrapper "basic"`` line near the top of the file.
 96 | In general, actions do not have to have type ``String->Token``,
 97 | and there's no requirement for the scanner to return a list of tokens.
 98 | 
 99 | With this specification in the file ``Tokens.x``,
100 | Alex can be used to generate ``Tokens.hs``:
101 | 
102 | .. code-block:: sh
103 | 
104 |    $ alex Tokens.x
105 | 
106 | If the module needed to be placed in a different file, ``Main.hs`` for example,
107 | then the output filename can be specified using the ``-o`` option:
108 | 
109 | .. code-block:: sh
110 | 
111 |    $ alex Tokens.x -o Main.hs
112 | 
113 | The resulting module is Haskell 98 compatible.
114 | It can also be readily used with a `Happy <https://www.haskell.org/happy/>`__ parser.
115 | 
116 | .. [1]
117 |    That is, unless you have any patterns that require a long lookahead.
118 | 


--------------------------------------------------------------------------------
/doc/invoking.rst:
--------------------------------------------------------------------------------
 1 | .. _invoking:
 2 | 
 3 | Invoking Alex
 4 | =============
 5 | 
 6 | The command line syntax for Alex is entirely standard:
 7 | 
 8 | .. code-block:: sh
 9 | 
10 |    $ alex { option } file.x  { option }
11 | 
12 | Alex expects a single ``file.x`` to be named on the command line.
13 | By default, Alex will create ``file.hs`` containing the Haskell source for the lexer.
14 | 
15 | The options that Alex accepts are listed below:
16 | 
17 | ``-o`` <file>; ``--outfile``\ =<file>
18 |    Specifies the filename in which the output is to be placed.
19 |    By default, this is the name of the input file with the ``.x`` suffix replaced by ``.hs``.
20 | 
21 | ``-i`` [<file>]; ``--info`` [<=file>]
22 |    Produces a human-readable rendition of the state machine (DFA) that Alex derives from the lexer, in <file>
23 |    (default: ``file.info`` where the input file is ``file.x``).
24 | 
25 |    The format of the info file is currently a bit basic, and not particularly informative.
26 | 
27 | ``-t`` [<dir>]; ``--template``\ =<dir>
28 |    Look in <dir> for template files.
29 | 
30 | ``-g``; ``--ghc``
31 |    Causes Alex to produce a lexer which is optimised for compiling with GHC.
32 |    The lexer will be significantly more efficient,
33 |    both in terms of the size of the compiled lexer and its runtime.
34 | 
35 | ``-d``; ``--debug``
36 |    Causes Alex to produce a lexer which will output debugging messages as it runs.
37 | 
38 | ``-l``; ``--latin1``
39 |    Disables the use of UTF-8 encoding in the generated lexer.
40 |    This has two consequences:
41 | 
42 |    -  The Alex source file is still assumed to be UTF-8 encoded,
43 |       but any Unicode characters outside the range 0-255 are mapped to Latin-1 characters by taking the code point modulo 256.
44 | 
45 |    -  The built-in macros ``$printable`` and '``.``' range over the Latin-1 character set, not the Unicode character set.
46 | 
47 |    Note that this currently does not disable the UTF-8 encoding thathappens in the "basic" wrappers,
48 |    so ``--latin1`` does not make sense in conjunction with these wrappers
49 |    (not that you would want to do that, anyway).
50 |    Alternatively, a ``%encoding "latin1"`` declaration can be used inside the Alex source file to request a Latin-1 mapping.
51 |    See also :ref:`Unicode and UTF-8 <encoding>` for more information about the ``%encoding`` declaration.
52 | 
53 | ``-?``; ``--help``
54 |    Display help and exit.
55 | 
56 | ``-V``; ``--version``
57 |    Output version information and exit.
58 |    Note that for legacy reasons ``-v`` is supported, too, but the use of it is deprecated.
59 |    ``-v`` will be used for verbose mode when it is actually implemented.
60 | 


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/doc/obtaining.rst:
--------------------------------------------------------------------------------
 1 | .. _installing:
 2 | 
 3 | Obtaining Alex
 4 | ==============
 5 | 
 6 | .. highlight:: bash
 7 | 
 8 | If you just want to *use* Alex, you can build from a release.
 9 | This should work the same as any other Haskell package.
10 | 
11 | Alex itself and its examples are intended to work with GHC >= 7.0.
12 | 
13 | Haskell-specific way
14 | --------------------
15 | 
16 | From `Hackage <https://hackage.haskell.org/package/alex>`__ via `Cabal Install <https://www.haskell.org/cabal/>`__::
17 | 
18 |    $ cabal install alex
19 | 
20 | From `Stackage <https://www.stackage.org/package/alex>`__ via `Stack <https://haskellstack.org>`__::
21 | 
22 |    $ stack install alex
23 | 
24 | Moreover, recent versions of ``cabal`` will automatically install the required version of ``alex`` based on ``build-tools``/``build-tool-depends`` `declarations <https://cabal.readthedocs.io/en/3.10/cabal-package.html#pkg-field-build-tool-depends>`__.
25 | 
26 | Operating System way
27 | --------------------
28 | 
29 | Because Alex is a dependency of GHC, it is often packaged by operating systems.
30 | `Repology <https://repology.org>`__ aggregates this info across many distros and operating systems, and Alex is actually listed twice:
31 | 
32 | - https://repology.org/project/haskell:alex/versions
33 | - https://repology.org/project/alex/versions
34 | 
35 | The table contains links to the individual OS packages, which should provide installation instructions.
36 | 


--------------------------------------------------------------------------------
/doc/regex.rst:
--------------------------------------------------------------------------------
  1 | .. _regexps:
  2 | 
  3 | Regular Expression
  4 | ==================
  5 | 
  6 | Regular expressions are the patterns that Alex uses to match tokens in the input stream.
  7 | 
  8 | .. _regexp-syntax:
  9 | 
 10 | Syntax of regular expressions
 11 | -----------------------------
 12 | 
 13 | ::
 14 | 
 15 |    regexp  := rexp2 { '|' rexp2 }
 16 | 
 17 |    rexp2   := rexp1 { rexp1 }
 18 | 
 19 |    rexp1   := rexp0 [ '*' | '+' | '?' | repeat ]
 20 | 
 21 |    rexp0   := set
 22 |             | @rmac
 23 |             | @string
 24 |             | '(' [ regexp ] ')'
 25 | 
 26 |    repeat  := '{' $digit+ '}'
 27 |             | '{' $digit+ ',' '}'
 28 |             | '{' $digit+ ',' $digit+ '}'
 29 | 
 30 | The syntax of regular expressions is fairly standard,
 31 | the only difference from normal lex-style regular expressions being that we allow the sequence ``()`` to denote the regular expression that matches the empty string.
 32 | 
 33 | Spaces are ignored in a regular expression,
 34 | so feel free to space out your regular expression as much as you like,
 35 | even split it over multiple lines and include comments.
 36 | Literal whitespace can be included by surrounding it with quotes ``" "``, or by escaping each whitespace character with ``\``.
 37 | 
 38 | ``set``
 39 |    Matches any of the characters in <set>. See :ref:`Syntax of character sets <charsets>` for the syntax of sets.
 40 | 
 41 | ``@foo``
 42 |    Expands to the definition of the appropriate regular expression macro.
 43 | 
 44 | ``"..."``
 45 |    Matches the sequence of characters in the string, in that order.
 46 | 
 47 | ``r*``
 48 |    Matches zero or more occurrences of <r>.
 49 | 
 50 | ``r+``
 51 |    Matches one or more occurrences of <r>.
 52 | 
 53 | ``r?``
 54 |    Matches zero or one occurrences of <r>.
 55 | 
 56 | ``r{n}``
 57 |    Matches <n> occurrences of <r>.
 58 | 
 59 | ``r{n,}``
 60 |    Matches <n> or more occurrences of <r>.
 61 | 
 62 | ``r{n,m}``
 63 |    Matches between <n> and <m> (inclusive) occurrences of <r>.
 64 | 
 65 | .. _charsets:
 66 | 
 67 | Syntax of character sets
 68 | ------------------------
 69 | 
 70 | Character sets are the fundamental elements in a regular expression.
 71 | A character set is a pattern that matches a single character.
 72 | The syntax of character sets is as follows:
 73 | 
 74 | ::
 75 | 
 76 |    set     := set '#' set0
 77 |            |  set0
 78 | 
 79 |    set0    := @char [ '-' @char ]
 80 |            | '.'
 81 |            |  @smac
 82 |            | '[' [^] { set } ']'
 83 |            | '~' set0
 84 | 
 85 | The various character set constructions are:
 86 | 
 87 | ``char``
 88 |    The simplest character set is a single Unicode character.
 89 |    Note that special characters such as ``[`` and ``.`` must be escaped by prefixing them with ``\``
 90 |    (see the lexical syntax, :ref:`Lexical syntax <lexical>`, for the list of special characters).
 91 | 
 92 |    Certain non-printable characters have special escape sequences.
 93 |    These are: ``\a``, ``\b``, ``\f``, ``\n``, ``\r``, ``\t``, and ``\v``.
 94 |    Other characters can be represented by using their numerical character values
 95 |    (although this may be non-portable):
 96 |    ``\x0A`` is equivalent to ``\n``, for example.
 97 | 
 98 |    Whitespace characters are ignored;
 99 |    to represent a literal space, escape it with ``\``.
100 | 
101 | ``char-char``
102 |    A range of characters can be expressed by separating the characters with a ‘\ ``-``\ ’,
103 |    all the characters with codes in the given range are included in the set.
104 |    Character ranges can also be non-portable.
105 | 
106 | ``.``
107 |    The built-in set ‘\ ``.``\ ’ matches all characters except newline (``\n``).
108 | 
109 |    Equivalent to the set ``[\x00-\x10ffff] # \n``.
110 | 
111 | ``set0 # set1``
112 |    Matches all the characters in <set0> that are not in <set1>.
113 | 
114 | ``[sets]``
115 |    The union of <sets>.
116 | 
117 | ``[^sets]``
118 |    The complement of the union of the <sets>. Equivalent to
119 |    ‘\ ``. # [sets]``\ ’.
120 | 
121 | ``~set``
122 |    The complement of <set>.
123 |    Equivalent to ‘\ ``. # set``\ ’
124 | 
125 | A set macro is written as ``$`` followed by an identifier.
126 | There are some builtin character set macros:
127 | 
128 | ``$white``
129 |    Matches all whitespace characters, including newline.
130 | 
131 |    Equivalent to the set ``[\ \t\n\f\v\r]``.
132 | 
133 | ``$printable``
134 |    Matches all "printable characters".
135 |    Currently this corresponds to Unicode code points 32 to 0x10ffff,
136 |    although strictly speaking there are many non-printable code points in this region.
137 |    In the future Alex may use a more precise definition of ``$printable``.
138 | 
139 | Character set macros can be defined at the top of the file at the same time as regular expression macros
140 | (see :ref:`Regular Expression <regexps>`).
141 | Here are some example character set macros:
142 | 
143 | ::
144 | 
145 |    $lls      = a-z                   -- little letters
146 |    $not_lls  = ~a-z                  -- anything but little letters
147 |    $ls_ds    = [a-zA-Z0-9]           -- letters and digits
148 |    $sym      = [ \! \@ \# \$ ]       -- the symbols !, @, #, and $
149 |    $sym_q_nl = [ \' \! \@ \# \$ \n ] -- the above symbols with ' and newline
150 |    $quotable = $printable # \'       -- any graphic character except '
151 |    $del      = \127                  -- ASCII DEL
152 | 


--------------------------------------------------------------------------------
/doc/requirements.txt:
--------------------------------------------------------------------------------
1 | Sphinx           >= 7.2.5
2 | sphinx_rtd_theme >= 1.3.0
3 | 


--------------------------------------------------------------------------------
/examples/Makefile:
--------------------------------------------------------------------------------
 1 | ALEX=../dist/build/alex/alex
 2 | HC=ghc -Wall -fno-warn-unused-binds -fno-warn-missing-signatures -fno-warn-unused-matches -fno-warn-name-shadowing -fno-warn-unused-imports -fno-warn-tabs
 3 | 
 4 | HAPPY=happy
 5 | HAPPY_OPTS=-agc
 6 | 
 7 | ifeq "$(TARGETPLATFORM)" "i386-unknown-mingw32"
 8 | exeext=.exe
 9 | else
10 | exeext=.bin
11 | endif
12 | 
13 | PROGS = lit Tokens Tokens_gscan words words_posn words_monad tiny haskell tiger
14 | 
15 | ALEX_OPTS = --template=../data/ -g
16 | 
17 | %.alex.hs : %.x
18 | 	$(ALEX) $(ALEX_OPTS) $< -o $@
19 | 
20 | %.happy.hs : %.y
21 | 	$(HAPPY) $(HAPPY_OPTS) $< -o $@
22 | 
23 | %.o : %.hs
24 | 	$(HC) $(HC_OPTS) -c -o $@ $<
25 | 
26 | CLEAN_FILES += *.info *.hi *.o *.bin *.exe
27 | 
28 | all : $(addsuffix $(exeext),$(PROGS))
29 | 
30 | tiny$(exeext) : tiny.happy.hs Tokens_posn.alex.hs
31 | 	$(HC) $(HC_OPTS) -o $@ $^
32 | 
33 | lit$(exeext) : lit.alex.hs
34 | 	$(HC) $(HC_OPTS) -o $@ $^
35 | 
36 | Tokens$(exeext) : Tokens.alex.hs
37 | 	$(HC) $(HC_OPTS) -o $@ $^
38 | 
39 | Tokens_gscan$(exeext) : Tokens_gscan.alex.hs
40 | 	$(HC) $(HC_OPTS) -o $@ $^
41 | 
42 | words$(exeext) : words.alex.hs
43 | 	$(HC) $(HC_OPTS) -o $@ $^
44 | 
45 | words_posn$(exeext) : words_posn.alex.hs
46 | 	$(HC) $(HC_OPTS) -o $@ $^
47 | 
48 | words_monad$(exeext) : words_monad.alex.hs
49 | 	$(HC) $(HC_OPTS) -o $@ $^
50 | 
51 | haskell$(exeext) : haskell.alex.hs
52 | 	$(HC) $(HC_OPTS) -o $@ $^
53 | 
54 | tiger$(exeext) : tiger.alex.hs
55 | 	$(HC) $(HC_OPTS) -main-is TigerLexer -o $@ $^
56 | 
57 | .PHONY: clean
58 | clean:
59 | 	rm -f *.o *.hi $(addsuffix $(exeext),$(PROGS)) \
60 | 		*.alex.hs *.happy.hs
61 | 


--------------------------------------------------------------------------------
/examples/Tokens.x:
--------------------------------------------------------------------------------
 1 | {
 2 | module Main where
 3 | }
 4 | 
 5 | %wrapper "basic"
 6 | 
 7 | $digit = 0-9			-- digits
 8 | $alpha = [a-zA-Z]		-- alphabetic characters
 9 | 
10 | tokens :-
11 | 
12 |   $white+				{ \s -> White }
13 |   "--".*				{ \s -> Comment }
14 |   let					{ \s -> Let }
15 |   in					{ \s -> In }
16 |   $digit+				{ \s -> Int (read s) }
17 |   [\=\+\-\*\/\(\)]			{ \s -> Sym (head s) }
18 |   $alpha [$alpha $digit \_ \']*		{ \s -> Var s }
19 | 
20 | {
21 | -- Each right-hand side has type :: String -> Token
22 | 
23 | -- The token type:
24 | data Token =
25 |         White		|
26 |         Comment		|
27 | 	Let 		|
28 | 	In  		|
29 | 	Sym Char	|
30 | 	Var String	|
31 | 	Int Int		|
32 | 	Err 
33 | 	deriving (Eq,Show)
34 | 
35 | main = do
36 |   s <- getContents
37 |   print (alexScanTokens s)
38 | }
39 | 


--------------------------------------------------------------------------------
/examples/Tokens_gscan.x:
--------------------------------------------------------------------------------
 1 | {
 2 | module Main (main) where
 3 | }
 4 | 
 5 | %wrapper "gscan"
 6 | 
 7 | $digit = 0-9			-- digits
 8 | $alpha = [a-zA-Z]		-- alphabetic characters
 9 | 
10 | tokens :-
11 | 
12 |   $white+				;
13 |   "--".*				;
14 |   let					{ tok (\p s -> Let p) }
15 |   in					{ tok (\p s -> In p) }
16 |   $digit+				{ tok (\p s -> Int p (read s)) }
17 |   [\=\+\-\*\/\(\)]			{ tok (\p s -> Sym p (head s)) }
18 |   $alpha [$alpha $digit \_ \']*		{ tok (\p s -> Var p s) }
19 | 
20 | {
21 | -- Some action helpers:
22 | tok f p c str len cont (sc,state) = f p (take len str) : cont (sc,state)
23 | 
24 | -- The token type:
25 | data Token =
26 | 	Let AlexPosn		|
27 | 	In  AlexPosn		|
28 | 	Sym AlexPosn Char	|
29 | 	Var AlexPosn String	|
30 | 	Int AlexPosn Int	|
31 | 	Err AlexPosn
32 | 	deriving (Eq,Show)
33 | 
34 | main = do
35 |   s <- getContents
36 |   print (alexGScan stop undefined s)
37 |   where
38 | 	stop p c "" (sc,s) = []
39 | 	stop p c _  (sc,s) = error "lexical error"
40 | }
41 | 


--------------------------------------------------------------------------------
/examples/Tokens_posn.x:
--------------------------------------------------------------------------------
 1 | {
 2 | module Tokens_posn (Token(..), AlexPosn(..), alexScanTokens, token_posn) where
 3 | }
 4 | 
 5 | %wrapper "posn"
 6 | 
 7 | $digit = 0-9			-- digits
 8 | $alpha = [a-zA-Z]		-- alphabetic characters
 9 | 
10 | tokens :-
11 | 
12 |   $white+				;
13 |   "--".*				;
14 |   let					{ tok (\p s -> Let p) }
15 |   in					{ tok (\p s -> In p) }
16 |   $digit+				{ tok (\p s -> Int p (read s)) }
17 |   [\=\+\-\*\/\(\)]			{ tok (\p s -> Sym p (head s)) }
18 |   $alpha [$alpha $digit \_ \']*		{ tok (\p s -> Var p s) }
19 | 
20 | {
21 | -- Each right-hand side has type :: AlexPosn -> String -> Token
22 | 
23 | -- Some action helpers:
24 | tok f p s = f p s
25 | 
26 | -- The token type:
27 | data Token =
28 | 	Let AlexPosn		|
29 | 	In  AlexPosn		|
30 | 	Sym AlexPosn Char	|
31 | 	Var AlexPosn String	|
32 | 	Int AlexPosn Int
33 | 	deriving (Eq,Show)
34 | 
35 | token_posn (Let p) = p
36 | token_posn (In p) = p
37 | token_posn (Sym p _) = p
38 | token_posn (Var p _) = p
39 | token_posn (Int p _) = p
40 | }
41 | 


--------------------------------------------------------------------------------
/examples/examples.x:
--------------------------------------------------------------------------------
 1 | "example_rexps":-
 2 | 
 3 |   <a_star>  ::= $ | a+		-- = a*, zero or more as
 4 |   <a_plus>  ::= aa*		-- = a+, one or more as
 5 |   <a_quest> ::= $ | a		-- = a?, zero or one as
 6 |   <a_3>     ::= a{3}		-- = aaa, three as
 7 |   <a_3_5>   ::= a{3,5}		-- = a{3}a?a?
 8 |   <a_3_>    ::= a{3,}		-- = a{3}a*
 9 | 
10 | 
11 | "example_sets":-
12 | 
13 |   <lls>      ::= a-z		   -- little letters
14 |   <not_lls>  ::= ~a-z		   -- anything but little letters
15 |   <ls_ds>    ::= [a-zA-Z0-9]	   -- letters and digits
16 |   <sym>	     ::= `!@@#$'            -- the symbols !, @@, # and $
17 |   <sym_q_nl> ::= [`!#@@$'^'^n]	    -- the above symbols with ' and newline
18 |   <quotable> ::= ^p#^'		   -- any graphic character except '
19 |   <del>      ::= ^127		   -- ASCII DEL
20 | 


--------------------------------------------------------------------------------
/examples/haskell.x:
--------------------------------------------------------------------------------
  1 | --
  2 | -- Lexical syntax for Haskell 98.
  3 | --
  4 | -- (c) Simon Marlow 2003, with the caveat that much of this is
  5 | -- translated directly from the syntax in the Haskell 98 report.
  6 | --
  7 | -- This isn't a complete Haskell 98 lexer - it doesn't handle layout
  8 | -- for one thing.  However, it could be adapted with a small
  9 | -- amount of effort.
 10 | --
 11 | 
 12 | {
 13 | module Main (main) where
 14 | import Data.Char (chr)
 15 | }
 16 | 
 17 | %wrapper "monad"
 18 | 
 19 | $whitechar = [ \t\n\r\f\v]
 20 | $special   = [\(\)\,\;\[\]\`\{\}]
 21 | 
 22 | $ascdigit  = 0-9
 23 | $unidigit  = [] -- TODO
 24 | $digit     = [$ascdigit $unidigit]
 25 | 
 26 | $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
 27 | $unisymbol = [] -- TODO
 28 | $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
 29 | 
 30 | $large     = [A-Z \xc0-\xd6 \xd8-\xde]
 31 | $small     = [a-z \xdf-\xf6 \xf8-\xff \_]
 32 | $alpha     = [$small $large]
 33 | 
 34 | $graphic   = [$small $large $symbol $digit $special \:\"\']
 35 | 
 36 | $octit	   = 0-7
 37 | $hexit     = [0-9 A-F a-f]
 38 | $idchar    = [$alpha $digit \']
 39 | $symchar   = [$symbol \:]
 40 | $nl        = [\n\r]
 41 | 
 42 | @reservedid = 
 43 | 	as|case|class|data|default|deriving|do|else|hiding|if|
 44 | 	import|in|infix|infixl|infixr|instance|let|module|newtype|
 45 | 	of|qualified|then|type|where
 46 | 
 47 | @reservedop =
 48 | 	".." | ":" | "::" | "=" | \\ | "|" | "<-" | "->" | "@" | "~" | "=>"
 49 | 
 50 | @varid  = $small $idchar*
 51 | @conid  = $large $idchar*
 52 | @varsym = $symbol $symchar*
 53 | @consym = \: $symchar*
 54 | 
 55 | @decimal     = $digit+
 56 | @octal       = $octit+
 57 | @hexadecimal = $hexit+
 58 | @exponent    = [eE] [\-\+] @decimal
 59 | 
 60 | $cntrl   = [$large \@\[\\\]\^\_]
 61 | @ascii   = \^ $cntrl | NUL | SOH | STX | ETX | EOT | ENQ | ACK
 62 | 	 | BEL | BS | HT | LF | VT | FF | CR | SO | SI | DLE
 63 | 	 | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN | EM
 64 | 	 | SUB | ESC | FS | GS | RS | US | SP | DEL
 65 | $charesc = [abfnrtv\\\"\'\&]
 66 | @escape  = \\ ($charesc | @ascii | @decimal | o @octal | x @hexadecimal)
 67 | @gap     = \\ $whitechar+ \\
 68 | @string  = $graphic # [\"\\] | " " | @escape | @gap
 69 | 
 70 | haskell :-
 71 | 
 72 | <0> $white+			{ skip }
 73 | <0> "--"\-*[^$symbol].*		{ skip }
 74 | 
 75 | "{-"				{ nested_comment }
 76 | 
 77 | <0> $special			{ mkL LSpecial }
 78 | 
 79 | <0> @reservedid			{ mkL LReservedId }
 80 | <0> @conid \. @varid		{ mkL LQVarId }
 81 | <0> @conid \. @conid		{ mkL LQConId }
 82 | <0> @varid			{ mkL LVarId }
 83 | <0> @conid			{ mkL LConId }
 84 | 
 85 | <0> @reservedop			{ mkL LReservedOp }
 86 | <0> @conid \. @varsym		{ mkL LVarSym }
 87 | <0> @conid \. @consym		{ mkL LConSym }
 88 | <0> @varsym			{ mkL LVarSym }
 89 | <0> @consym			{ mkL LConSym }
 90 | 
 91 | <0> @decimal 
 92 |   | 0[oO] @octal
 93 |   | 0[xX] @hexadecimal		{ mkL LInteger }
 94 | 
 95 | <0> @decimal \. @decimal @exponent?
 96 |   | @decimal @exponent		{ mkL LFloat }
 97 | 
 98 | <0> \' ($graphic # [\'\\] | " " | @escape) \'
 99 | 				{ mkL LChar }
100 | 
101 | <0> \" @string* \"		{ mkL LString }
102 | 
103 | {
104 | data Lexeme = L AlexPosn LexemeClass String
105 | 
106 | data LexemeClass
107 |   = LInteger
108 |   | LFloat
109 |   | LChar
110 |   | LString
111 |   | LSpecial
112 |   | LReservedId
113 |   | LReservedOp
114 |   | LVarId
115 |   | LQVarId
116 |   | LConId
117 |   | LQConId
118 |   | LVarSym
119 |   | LQVarSym
120 |   | LConSym
121 |   | LQConSym
122 |   | LEOF
123 |   deriving Eq
124 |   
125 | mkL :: LexemeClass -> AlexInput -> Int -> Alex Lexeme
126 | mkL c (p,_,_,str) len = return (L p c (take len str))
127 | 
128 | nested_comment :: AlexInput -> Int -> Alex Lexeme
129 | nested_comment _ _ = do
130 |   input <- alexGetInput
131 |   go 1 input
132 |   where go 0 input = do alexSetInput input; alexMonadScan
133 | 	go n input = do
134 |           case alexGetByte input of
135 | 	    Nothing  -> err input
136 | 	    Just (c,input) -> do
137 |               case chr (fromIntegral c) of
138 | 	    	'-' -> do
139 |                   let temp = input
140 |                   case alexGetByte input of
141 | 		    Nothing  -> err input
142 |                     Just (125,input) -> go (n-1) input
143 |                     Just (45, input) -> go n temp
144 |                     Just (c,input)   -> go n input
145 | 	     	'\123' -> do
146 |                   case alexGetByte input of
147 | 		    Nothing  -> err input
148 |                     Just (c,input) | c == fromIntegral (ord '-') -> go (n+1) input
149 | 		    Just (c,input)   -> go n input
150 | 	    	c -> go n input
151 | 
152 |         err input = do alexSetInput input; lexError "error in nested comment"  
153 | 
154 | lexError s = do
155 |   (p,c,_,input) <- alexGetInput
156 |   alexError (showPosn p ++ ": " ++ s ++ 
157 | 		   (if (not (null input))
158 | 		     then " before " ++ show (head input)
159 | 		     else " at end of file"))
160 | 
161 | scanner str = runAlex str $ do
162 |   let loop i = do tok@(L _ cl _) <- alexMonadScan; 
163 | 		  if cl == LEOF
164 | 			then return i
165 | 			else do loop $! (i+1)
166 |   loop 0
167 | 
168 | alexEOF = return (L undefined LEOF "")
169 | 
170 | showPosn (AlexPn _ line col) = show line ++ ':': show col
171 | 
172 | main = do
173 |   s <- getContents
174 |   print (scanner s)
175 | }
176 | 


--------------------------------------------------------------------------------
/examples/lit.x:
--------------------------------------------------------------------------------
 1 | {
 2 | {-# LANGUAGE NPlusKPatterns #-}
 3 | module Main (main) where
 4 | }
 5 | 
 6 | %wrapper "gscan"
 7 | 
 8 | $space   = $white # \n
 9 | @blank   = \n $space*
10 | @scrap   = \n \> .*
11 | @comment = \n ( [^ \> $white] | $space+ ~$white ) .*
12 | 
13 | lit :-
14 |  
15 |    @blank @scrap+		{ scrap }
16 |    @blank @comment*		{ comment }
17 |  
18 | {
19 | scrap _ _ inp len cont st = strip len inp
20 | 	where
21 | 	strip 0 _ = cont st
22 | 	strip (n+1) (c:rst) =
23 | 		if c=='\n'
24 | 		   then '\n':strip_nl n rst
25 | 		   else c:strip n rst
26 | 
27 | 	strip_nl (n+1) ('>':rst) = ' ':strip n rst
28 | 	strip_nl n rst = strip n rst
29 | 
30 | comment _ _ inp len cont st = strip len inp
31 | 	where
32 | 	strip 0 _ = cont st
33 | 	strip (n+1) (c:rst) = if c=='\n' then c:strip n rst else strip n rst
34 | 
35 | 
36 | main:: IO ()
37 | main = interact literate
38 | 
39 | literate:: String -> String
40 | literate inp = drop 2 (alexGScan stop_act () ('\n':'\n':inp))
41 | 
42 | stop_act p _ "" st = []
43 | stop_act p _ _  _  = error (msg ++ loc p ++ "\n")
44 | 
45 | msg  = "literate preprocessing error at "
46 | 
47 | loc (AlexPn _ l c) = "line " ++ show(l-2) ++ ", column " ++ show c
48 | }
49 | 


--------------------------------------------------------------------------------
/examples/pp.x:
--------------------------------------------------------------------------------
 1 | %{
 2 | import System
 3 | import Char
 4 | import Alex
 5 | %}
 6 | 
 7 | 
 8 | "pp_lx"/"pp_acts":-
 9 | 
10 | { ^s = ^w#^n                }		-- spaces and tabs, etc.
11 | { ^f = [A-Za-z0-9`~%-_.,/'] }		-- file name character
12 | 
13 |   <inc> ::= ^#include^s+^"^f+^"^s*^n
14 |   <txt> ::= .*^n
15 | 
16 | 
17 | %{
18 | inc p c inp len cont st = pp fn >> cont st
19 | 	where
20 | 	fn = (takeWhile ('"'/=) . tail . dropWhile isSpace . drop 8) inp
21 | 
22 | txt p c inp len cont st = putStr (take len inp) >> cont st
23 | 
24 | 
25 | main:: IO ()
26 | main =	getArgs						>>= \args ->
27 | 	case args of
28 | 	  [fn] -> pp fn
29 | 	  _ -> error "usage: pp file\n"
30 | 	
31 | pp:: String -> IO ()
32 | pp fn = readFile fn >>= \cts -> gscan pp_scan () cts
33 | 
34 | pp_scan:: GScan () (IO ())
35 | pp_scan = load_gscan (pp_acts,stop_act) pp_lx
36 | 	where
37 | 	stop_act _ _ _ _ = return ()
38 | %}
39 | 


--------------------------------------------------------------------------------
/examples/state.x:
--------------------------------------------------------------------------------
 1 | {
 2 | module Main (main) where
 3 | }
 4 | 
 5 | %wrapper "gscan"
 6 | 
 7 | state :-
 8 | 
 9 |   $white+	{ skip }
10 |   \{ [^\}]* \}  { code }
11 |   [A-Za-z]+	{ ide }
12 | 
13 | {
14 | code _ _ inp len cont (sc,frags) = cont (sc,frag:frags)
15 | 	where
16 | 	frag = take (len-4) (drop 2 inp)
17 | 
18 | ide _ _ inp len cont st = Ide (take len inp):cont st
19 | 
20 | skip _ _ inp len cont st = cont st
21 | 
22 | data Token = Ide String | Eof String | Err 	deriving Show
23 | 
24 | stop_act _ _ "" (_,frags) = [Eof (unlines(reverse frags))]
25 | stop_act _ _ _ _ = [Err]
26 | 
27 | tokens:: String -> [Token]
28 | tokens inp = alexGScan stop_act [] inp
29 | 
30 | main:: IO ()
31 | main = interact (show.tokens)
32 | }
33 | 


--------------------------------------------------------------------------------
/examples/tiger/Literals.txt:
--------------------------------------------------------------------------------
1 | 123 "Hello, world!"
2 | 


--------------------------------------------------------------------------------
/examples/tiny.y:
--------------------------------------------------------------------------------
 1 | -- An example demonstrating how to connect a Happy parser to an Alex lexer.
 2 | {
 3 | import Tokens_posn
 4 | }
 5 | 
 6 | %name calc
 7 | %tokentype { Token }
 8 | 
 9 | %token  let		{ Let _     }
10 | 	in		{ In  _     }
11 | 	int		{ Int _ $$  }
12 | 	var		{ Var _ $$  }
13 | 	'='		{ Sym _ '=' }
14 | 	'+'		{ Sym _ '+' }
15 | 	'-'		{ Sym _ '-' }
16 | 	'*'		{ Sym _ '*' }
17 | 	'/'		{ Sym _ '/' }
18 | 	'('		{ Sym _ '(' }
19 | 	')'		{ Sym _ ')' }
20 | 
21 | %%
22 | 
23 | Exp :: { Exp }
24 | Exp : let var '=' Exp in Exp	{ LetE $2 $4 $6 }
25 |     | Exp1			{ $1            }
26 | 
27 | Exp1 : Exp1 '+' Term		{ PlusE  $1 $3  }
28 |      | Exp1 '-' Term		{ MinusE $1 $3  }
29 |      | Term			{ $1            }
30 | 
31 | Term : Term '*' Factor		{ TimesE $1 $3  }
32 |      | Term '/' Factor		{ DivE $1 $3    }
33 |      | Factor			{ $1            }
34 | 
35 | Factor : '-' Atom		{ NegE $2	}
36 |        | Atom			{ $1		}
37 | 
38 | Atom : int			{ IntE $1       }
39 |        | var			{ VarE $1       }
40 |        | '(' Exp ')'		{ $2            }
41 | 
42 | {
43 | data Exp =
44 | 	LetE   String Exp Exp |
45 | 	PlusE  Exp Exp        |
46 | 	MinusE Exp Exp        |
47 | 	TimesE Exp Exp        |
48 | 	DivE   Exp Exp        |
49 | 	NegE   Exp	      |
50 | 	IntE   Int            |
51 | 	VarE   String
52 | 	deriving Show
53 | 
54 | 
55 | main:: IO ()
56 | main = interact (show.runCalc)
57 | 
58 | runCalc :: String -> Exp
59 | runCalc = calc . alexScanTokens
60 | 
61 | happyError :: [Token] -> a
62 | happyError tks = error ("Parse error at " ++ lcn ++ "\n")
63 | 	where
64 | 	lcn = 	case tks of
65 | 		  [] -> "end of file"
66 | 		  tk:_ -> "line " ++ show l ++ ", column " ++ show c
67 | 			where
68 | 			AlexPn _ l c = token_posn tk
69 | }
70 | 


--------------------------------------------------------------------------------
/examples/words.x:
--------------------------------------------------------------------------------
 1 | -- Performance test; run with input /usr/dict/words, for example
 2 | {
 3 | module Main (main) where
 4 | }
 5 | 
 6 | %wrapper "basic"
 7 | 
 8 | words :-
 9 | 
10 | $white+			;
11 | [A-Za-z0-9\'\-]+	{ \s -> () }
12 | 
13 | {
14 | main = do
15 |  s <- getContents
16 |  print (length (alexScanTokens s))
17 | }
18 | 


--------------------------------------------------------------------------------
/examples/words_monad.x:
--------------------------------------------------------------------------------
 1 | -- Performance test; run with input /usr/dict/words, for example
 2 | {
 3 | module Main (main) where
 4 | }
 5 | 
 6 | %wrapper "monad"
 7 | 
 8 | words :-
 9 | 
10 | $white+			{ skip }
11 | [A-Za-z0-9\'\-]+	{ word }
12 | 
13 | {
14 | word (_,_,_,input) len = return (take len input)
15 | 
16 | scanner str = runAlex str $ do
17 |   let loop i = do tok <- alexMonadScan
18 |                   if tok == "stopped." || tok == "error."
19 | 			then return i
20 | 			else do let i' = i+1 in i' `seq` loop i'
21 |   loop 0
22 | 
23 | alexEOF = return "stopped."
24 | 
25 | main = do
26 |  s <- getContents
27 |  print (scanner s)
28 | }
29 | 


--------------------------------------------------------------------------------
/examples/words_posn.x:
--------------------------------------------------------------------------------
 1 | -- Performance test; run with input /usr/dict/words, for example
 2 | {
 3 | module Main (main) where
 4 | }
 5 | 
 6 | %wrapper "posn"
 7 | 
 8 | words :-
 9 | 
10 | $white+			;
11 | [A-Za-z0-9\'\-]+	{ \p s -> () }
12 | 
13 | {
14 | main = do
15 |  s <- getContents
16 |  print (length (alexScanTokens s))
17 | }
18 | 


--------------------------------------------------------------------------------
/fix-whitespace.yaml:
--------------------------------------------------------------------------------
 1 | # Files checked by `fix-whitespace` for whitespace violations.
 2 | #
 3 | # - Trailing whitespace.
 4 | # - Trailing empty lines.
 5 | # - Final line not ending in a newline characters.
 6 | # - Tabs.
 7 | 
 8 | excluded-dirs:
 9 | - .git
10 | - .stack-work
11 | - dist
12 | - dist-newstyle
13 | - old-docbook
14 | 
15 | included-files:
16 | # Some file types make use of tabs which we do not convert just now (2023-06-20),
17 | # so these are commented out.
18 | - .gitignore
19 | # - .mailmap
20 | - LICENSE
21 | # - "*.bat"
22 | - "*.c"
23 | - "*.cabal"
24 | - "*.hs"
25 | - "*.md"
26 | - "*.project"
27 | - "*.py"
28 | - "*.rst"
29 | - "*.sh"
30 | - "*.txt"
31 | # - "*.x"
32 | # - "*.y"
33 | 


--------------------------------------------------------------------------------
/make-sdist.sh:
--------------------------------------------------------------------------------
 1 | # Put the Happy-generated .hs files in the right place in the source dist.
 2 | 
 3 | # Not necessary any more, Cabal does this:
 4 | 
 5 | # set -e
 6 | # rm -f dist/alex-*.tar.gz
 7 | # rm -rf dist/alex-*/
 8 | # ./Setup sdist
 9 | # cd dist
10 | # tar xvzf alex-*.tar.gz
11 | # cd alex-*/
12 | # mkdir dist
13 | # mkdir dist/build
14 | # mv alex dist/build
15 | # cd ..
16 | # tar cvzf alex-*.tar.gz alex-*/
17 | 
18 | # Steps for doing a release:
19 | #  * Source:
20 | #    - do the above
21 | #    - upload the dist to haskell.org:alex/dist/${version}
22 | #  * Documentation:
23 | #    - cd doc
24 | #    - make html
25 | #    - mv alex alex-html
26 | #    - tar cvzf alex-doc-html-${version}.tar.gz alex-html
27 | #    - scp alex-doc-html-${version}.tar.gz haskell.org:alex/doc
28 | #    - ssh haskell.org
29 | #        - cd alex/doc
30 | #        - tar xvzf alex-doc-html-${version}.tar.gz
31 | #        - rm -rf html-OLD
32 | #        - mv html html-OLD && mv alex-html html
33 | #  * Update the web page (~/darcs/www/alex/index.html), and push it
34 | 


--------------------------------------------------------------------------------
/src/CharSet.hs:
--------------------------------------------------------------------------------
  1 | -- -----------------------------------------------------------------------------
  2 | --
  3 | -- CharSet.hs, part of Alex
  4 | --
  5 | -- (c) Chris Dornan 1995-2000, Simon Marlow 2003
  6 | --
  7 | -- An abstract CharSet type for Alex.  To begin with we'll use Alex's
  8 | -- original definition of sets as functions, then later will
  9 | -- transition to something that will work better with Unicode.
 10 | --
 11 | -- ----------------------------------------------------------------------------}
 12 | 
 13 | {-# LANGUAGE OverloadedLists #-}
 14 | 
 15 | module CharSet (
 16 |   setSingleton,
 17 | 
 18 |   Encoding(..),
 19 | 
 20 |   Byte,
 21 |   ByteSet,
 22 |   byteSetSingleton,
 23 |   byteRanges,
 24 |   byteSetRange,
 25 | 
 26 |   CharSet, -- abstract
 27 |   emptyCharSet,
 28 |   charSetSingleton,
 29 |   charSet,
 30 |   charSetMinus,
 31 |   charSetComplement,
 32 |   charSetRange,
 33 |   charSetUnion,
 34 |   charSetQuote,
 35 |   setUnions,
 36 |   byteSetToArray,
 37 |   byteSetElems,
 38 |   byteSetElem
 39 |   ) where
 40 | 
 41 | import           Data.Array         ( Array, array )
 42 | import           Data.Char          ( chr, ord )
 43 | import           Data.Maybe         ( catMaybes )
 44 | import           Data.Word          ( Word8 )
 45 | import           Data.List.NonEmpty ( pattern (:|), (<|) )
 46 | import qualified Data.List.NonEmpty as List1
 47 | 
 48 | import           UTF8               ( List1, encode )
 49 | import           Data.Ranged
 50 |   ( Boundary( BoundaryAbove, BoundaryAboveAll, BoundaryBelow, BoundaryBelowAll )
 51 |   , DiscreteOrdered, Range( Range ), RSet
 52 |   , makeRangedSet
 53 |   , rSetDifference, rSetEmpty, rSetHas, rSetNegation, rSetRanges, rSetUnion, rSingleton
 54 |   )
 55 | 
 56 | -- import Data.Semigroup (sconcat)
 57 | -- import qualified Data.Foldable      as Fold
 58 | 
 59 | type Byte = Word8
 60 | -- Implementation as functions
 61 | type CharSet = RSet Char
 62 | type ByteSet = RSet Byte
 63 | -- type Utf8Set = RSet [Byte]
 64 | type Utf8Range = Span (List1 Byte)
 65 | 
 66 | data Encoding = Latin1 | UTF8
 67 |               deriving (Eq, Show)
 68 | 
 69 | emptyCharSet :: CharSet
 70 | emptyCharSet = rSetEmpty
 71 | 
 72 | byteSetElem :: ByteSet -> Byte -> Bool
 73 | byteSetElem = rSetHas
 74 | 
 75 | charSetSingleton :: Char -> CharSet
 76 | charSetSingleton = rSingleton
 77 | 
 78 | setSingleton :: DiscreteOrdered a => a -> RSet a
 79 | setSingleton = rSingleton
 80 | 
 81 | charSet :: [Char] -> CharSet
 82 | charSet = setUnions . fmap charSetSingleton
 83 | 
 84 | charSetMinus :: CharSet -> CharSet -> CharSet
 85 | charSetMinus = rSetDifference
 86 | 
 87 | charSetUnion :: CharSet -> CharSet -> CharSet
 88 | charSetUnion = rSetUnion
 89 | 
 90 | setUnions :: DiscreteOrdered a => [RSet a] -> RSet a
 91 | setUnions = foldr rSetUnion rSetEmpty
 92 | 
 93 | charSetComplement :: CharSet -> CharSet
 94 | charSetComplement = rSetNegation
 95 | 
 96 | charSetRange :: Char -> Char -> CharSet
 97 | charSetRange c1 c2 = makeRangedSet [Range (BoundaryBelow c1) (BoundaryAbove c2)]
 98 | 
 99 | {-# INLINE bytes #-}
100 | bytes :: [Byte]
101 | bytes = [minBound..maxBound]
102 | 
103 | byteSetToArray :: ByteSet -> Array Byte Bool
104 | byteSetToArray set = array (minBound, maxBound) [(c, rSetHas set c) | c <- bytes]
105 | 
106 | byteSetElems :: ByteSet -> [Byte]
107 | byteSetElems set = filter (rSetHas set) bytes
108 | 
109 | charToRanges :: Encoding -> CharSet -> [Utf8Range]
110 | charToRanges Latin1 =
111 |     map (fmap ((:| []) . fromIntegral . ord)) -- Span [Byte]
112 |   . catMaybes
113 |   . fmap (charRangeToCharSpan False)
114 |   . rSetRanges
115 | charToRanges UTF8 =
116 |     concat                  -- Span [Byte]
117 |   . fmap toUtfRange         -- [Span [Byte]]
118 |   . fmap (fmap UTF8.encode) -- Span [Byte]
119 |   . catMaybes
120 |   . fmap (charRangeToCharSpan True)
121 |   . rSetRanges
122 | 
123 | -- | Turns a range of characters expressed as a pair of UTF-8 byte sequences into a set of ranges, in which each range of the resulting set is between pairs of sequences of the same length
124 | toUtfRange :: Span (List1 Byte) -> [Span (List1 Byte)]
125 | toUtfRange (Span x y) = List1.toList $ fix x y
126 | 
127 | fix :: List1 Byte -> List1 Byte -> List1 (Span (List1 Byte))
128 | fix x y
129 |     | length x == length y = [Span x y]
130 |     | length x == 1 = Span x [0x7F] <| fix [0xC2,0x80] y
131 |     | length x == 2 = Span x [0xDF,0xBF] <| fix [0xE0,0x80,0x80] y
132 |     | length x == 3 = Span x [0xEF,0xBF,0xBF] <| fix [0xF0,0x80,0x80,0x80] y
133 |     | otherwise = error "fix: incorrect input given"
134 | 
135 | 
136 | byteRangeToBytePair :: Span a -> (a, a)
137 | byteRangeToBytePair (Span x y) = (x, y)
138 | 
139 | data Span a = Span a a -- lower bound inclusive, higher bound exclusive
140 |                        -- (SDM: upper bound inclusive, surely?)
141 | instance Functor Span where
142 |     fmap f (Span x y) = Span (f x) (f y)
143 | 
144 | charRangeToCharSpan :: Bool -> Range Char -> Maybe (Span Char)
145 | charRangeToCharSpan _ (Range BoundaryAboveAll _) = Nothing
146 | charRangeToCharSpan _ (Range (BoundaryAbove c) _) | c == maxBound = Nothing
147 | charRangeToCharSpan _ (Range _ BoundaryBelowAll) = Nothing
148 | charRangeToCharSpan _ (Range _ (BoundaryBelow c)) | c == minBound = Nothing
149 | charRangeToCharSpan uni (Range x y) = Just (Span (l x) (h y))
150 |     where l b = case b of
151 |             BoundaryBelowAll -> '\0'
152 |             BoundaryBelow a  -> a
153 |             BoundaryAbove a  -> succ a
154 |             BoundaryAboveAll -> error "panic: charRangeToCharSpan"
155 |           h b = case b of
156 |             BoundaryBelowAll -> error "panic: charRangeToCharSpan"
157 |             BoundaryBelow a  -> pred a
158 |             BoundaryAbove a  -> a
159 |             BoundaryAboveAll | uni -> chr 0x10ffff
160 |                              | otherwise -> chr 0xff
161 | 
162 | byteRanges :: Encoding -> CharSet -> [(List1 Byte, List1 Byte)]
163 | byteRanges enc = fmap byteRangeToBytePair . charToRanges enc
164 | 
165 | byteSetRange :: Byte -> Byte -> ByteSet
166 | byteSetRange c1 c2 = makeRangedSet [Range (BoundaryBelow c1) (BoundaryAbove c2)]
167 | 
168 | byteSetSingleton :: Byte -> ByteSet
169 | byteSetSingleton = rSingleton
170 | 
171 | -- TODO: More efficient generated code!
172 | charSetQuote :: CharSet -> String
173 | charSetQuote s = "(\\c -> " ++ foldr (\x y -> x ++ " || " ++ y) "False" (map quoteRange (rSetRanges s)) ++ ")"
174 |     where quoteRange (Range l h) = quoteL l ++ " && " ++ quoteH h
175 |           quoteL (BoundaryAbove a) = "c > " ++ show a
176 |           quoteL (BoundaryBelow a) = "c >= " ++ show a
177 |           quoteL (BoundaryAboveAll) = "False"
178 |           quoteL (BoundaryBelowAll) = "True"
179 |           quoteH (BoundaryAbove a) = "c <= " ++ show a
180 |           quoteH (BoundaryBelow a) = "c < " ++ show a
181 |           quoteH (BoundaryAboveAll) = "True"
182 |           quoteH (BoundaryBelowAll) = "False"
183 | 


--------------------------------------------------------------------------------
/src/DFA.hs:
--------------------------------------------------------------------------------
  1 | -- -----------------------------------------------------------------------------
  2 | --
  3 | -- DFA.hs, part of Alex
  4 | --
  5 | -- (c) Chris Dornan 1995-2000, Simon Marlow 2003
  6 | --
  7 | -- This module generates a DFA from a scanner by first converting it
  8 | -- to an NFA and then converting the NFA with the subset construction.
  9 | --
 10 | -- See the chapter on `Finite Automata and Lexical Analysis' in the
 11 | -- dragon book for an excellent overview of the algorithms in this
 12 | -- module.
 13 | --
 14 | -- ----------------------------------------------------------------------------}
 15 | 
 16 | module DFA (scanner2dfa) where
 17 | 
 18 | import Data.Array    ( (!) )
 19 | import Data.Function ( on )
 20 | import Data.Maybe    ( fromJust )
 21 | 
 22 | import qualified Data.IntMap as IntMap
 23 | import qualified Data.IntSet as IntSet
 24 | import qualified Data.Map    as Map
 25 | import qualified Data.List   as List
 26 | 
 27 | import AbsSyn
 28 | import NFA
 29 | import CharSet
 30 | 
 31 | {-                        Defined in the Scan Module
 32 | 
 33 | -- (This section should logically belong to the DFA module but it has been
 34 | -- placed here to make this module self-contained.)
 35 | --
 36 | -- `DFA' provides an alternative to `Scanner' (described in the RExp module);
 37 | -- it can be used directly to scan text efficiently.  Additionally it has an
 38 | -- extra place holder for holding action functions for generating
 39 | -- application-specific tokens.  When this place holder is not being used, the
 40 | -- unit type will be used.
 41 | --
 42 | -- Each state in the automaton consist of a list of `Accept' values, descending
 43 | -- in priority, and an array mapping characters to new states.  As the array
 44 | -- may only cover a sub-range of the characters, a default state number is
 45 | -- given in the third field.  By convention, all transitions to the -1 state
 46 | -- represent invalid transitions.
 47 | --
 48 | -- A list of accept states is provided for as the original specification may
 49 | -- have been ambiguous, in which case the highest priority token should be
 50 | -- taken (the one appearing earliest in the specification); this can not be
 51 | -- calculated when the DFA is generated in all cases as some of the tokens may
 52 | -- be associated with leading or trailing context or start codes.
 53 | --
 54 | -- `scan_token' (see above) can deal with unconditional accept states more
 55 | -- efficiently than those associated with context; to save it testing each time
 56 | -- whether the list of accept states contains an unconditional state, the flag
 57 | -- in the first field of `St' is set to true whenever the list contains an
 58 | -- unconditional state.
 59 | --
 60 | -- The `Accept' structure contains the priority of the token being accepted
 61 | -- (lower numbers => higher priorities), the name of the token, a place holder
 62 | -- that can be used for storing the `action' function for constructing the
 63 | -- token from the input text and the scanner's state, a list of start codes
 64 | -- (listing the start codes that the scanner must be in for the token to be
 65 | -- accepted; empty => no restriction), the leading and trailing context (both
 66 | -- `Nothing' if there is none).
 67 | --
 68 | -- The leading context consists simply of a character predicate that will
 69 | -- return true if the last character read is acceptable.  The trailing context
 70 | -- consists of an alternative starting state within the DFA; if this `sub-dfa'
 71 | -- turns up any accepting state when applied to the residual input then the
 72 | -- trailing context is acceptable (see `scan_token' above).
 73 | 
 74 | type DFA a = Array SNum (State a)
 75 | 
 76 | type SNum = Int
 77 | 
 78 | data State a = St Bool [Accept a] SNum (Array Char SNum)
 79 | 
 80 | data Accept a = Acc Int String a [StartCode] (MB(Char->Bool)) (MB SNum)
 81 | 
 82 | type StartCode = Int
 83 | -}
 84 | 
 85 | 
 86 | -- Scanners are converted to DFAs by converting them to NFAs first.  Converting
 87 | -- an NFA to a DFA works by identifying the states of the DFA with subsets of
 88 | -- the NFA.  The PartDFA is used to construct the DFA; it is essentially a DFA
 89 | -- in which the states are represented directly by state sets of the NFA.
 90 | -- `nfa2pdfa' constructs the partial DFA from the NFA by searching for all the
 91 | -- transitions from a given list of state sets, initially containing the start
 92 | -- state of the partial DFA, until all possible state sets have been considered
 93 | -- The final DFA is then constructed with a `mk_dfa'.
 94 | 
 95 | scanner2dfa:: Encoding -> Scanner -> [StartCode] -> DFA SNum Code
 96 | scanner2dfa enc scanner scs = nfa2dfa scs (scanner2nfa enc scanner scs)
 97 | 
 98 | nfa2dfa:: [StartCode] -> NFA -> DFA SNum Code
 99 | nfa2dfa scs nfa = mk_int_dfa nfa (nfa2pdfa nfa pdfa (dfa_start_states pdfa))
100 |         where
101 |         pdfa = new_pdfa n_starts nfa
102 |         n_starts = length scs  -- number of start states
103 | 
104 | -- `nfa2pdfa' works by taking the next outstanding state set to be considered
105 | -- and ignoring it if the state is already in the partial DFA, otherwise
106 | -- generating all possible transitions from it, adding the new state to the
107 | -- partial DFA and continuing the closure with the extra states.  Note the way
108 | -- it incorporates the trailing context references into the search (by
109 | -- including `rctx_ss' in the search).
110 | 
111 | nfa2pdfa:: NFA -> DFA StateSet Code -> [StateSet] -> DFA StateSet Code
112 | nfa2pdfa _   pdfa [] = pdfa
113 | nfa2pdfa nfa pdfa (ss:umkd)
114 |   |  ss `in_pdfa` pdfa =  nfa2pdfa nfa pdfa  umkd
115 |   |  otherwise         =  nfa2pdfa nfa pdfa' umkd'
116 |   where
117 |         pdfa' = add_pdfa ss (State accs (IntMap.fromList ss_outs)) pdfa
118 | 
119 |         umkd' = rctx_sss ++ map snd ss_outs ++ umkd
120 | 
121 |         -- for each character, the set of states that character would take
122 |         -- us to from the current set of states in the NFA.
123 |         ss_outs :: [(Int, StateSet)]
124 |         ss_outs = [ (fromIntegral ch, mk_ss nfa ss')
125 |                   | ch  <- byteSetElems $ setUnions [p | (p,_) <- outs],
126 |                     let ss'  = [ s' | (p,s') <- outs, byteSetElem p ch ],
127 |                     not (null ss')
128 |                   ]
129 | 
130 |         rctx_sss = [ mk_ss nfa [s]
131 |                    | Acc _ _ _ (RightContextRExp s) <- accs ]
132 | 
133 |         outs :: [(ByteSet,SNum)]
134 |         outs =  [ out | s <- ss, out <- nst_outs (nfa ! s) ]
135 | 
136 |         accs = sort_accs [ acc | s <- ss, acc <- nst_accs (nfa ! s) ]
137 | 
138 | -- `sort_accs' sorts a list of accept values into descending order of priority,
139 | -- eliminating any elements that follow an unconditional accept value.
140 | 
141 | sort_accs :: [Accept a] -> [Accept a]
142 | sort_accs accs = foldr chk [] $ List.sortBy (compare `on` accPrio) accs
143 |         where
144 |         chk acc@(Acc _ _ Nothing NoRightContext) _   = [acc]
145 |         chk acc                                  rst = acc:rst
146 | 
147 | 
148 | {------------------------------------------------------------------------------
149 |                           State Sets and Partial DFAs
150 | ------------------------------------------------------------------------------}
151 | 
152 | 
153 | 
154 | -- A `PartDFA' is a partially constructed DFA in which the states are
155 | -- represented by sets of states of the original NFA.  It is represented by a
156 | -- triple consisting of the start state of the partial DFA, the NFA from which
157 | -- it is derived and a map from state sets to states of the partial DFA.  The
158 | -- state set for a given list of NFA states is calculated by taking the epsilon
159 | -- closure of all the states, sorting the result with duplicates eliminated.
160 | 
161 | type StateSet = [SNum]
162 | 
163 | new_pdfa :: Int -> NFA -> DFA StateSet a
164 | new_pdfa starts nfa
165 |  = DFA { dfa_start_states = [ List.sort $ nst_cl $ nfa ! n | n <- [0 .. starts - 1] ]
166 |        , dfa_states       = Map.empty
167 |        }
168 | 
169 |  -- starts is the number of start states
170 | 
171 | -- constructs the epsilon-closure of a set of NFA states
172 | mk_ss :: NFA -> [SNum] -> StateSet
173 | mk_ss nfa l = IntSet.toAscList $ IntSet.fromList [ s' | s <- l, s' <- nst_cl (nfa ! s) ]
174 | 
175 | add_pdfa:: StateSet -> State StateSet a -> DFA StateSet a -> DFA StateSet a
176 | add_pdfa ss pst (DFA st mp) = DFA st (Map.insert ss pst mp)
177 | 
178 | in_pdfa:: StateSet -> DFA StateSet a -> Bool
179 | in_pdfa ss (DFA _ mp) = ss `Map.member` mp
180 | 
181 | -- Construct a DFA with numbered states, from a DFA whose states are
182 | -- sets of states from the original NFA.
183 | 
184 | mk_int_dfa:: NFA -> DFA StateSet a -> DFA SNum a
185 | mk_int_dfa nfa (DFA start_states mp)
186 |   = DFA [0 .. length start_states-1]
187 |         (Map.fromList [ (lookup' st, cnv pds) | (st, pds) <- Map.toAscList mp ])
188 |   where
189 |         mp' = Map.fromList (zip (start_states ++
190 |                                  (map fst . Map.toAscList) (foldr Map.delete mp start_states)) [0..])
191 | 
192 |         lookup' = fromJust . flip Map.lookup mp'
193 | 
194 |         cnv :: State StateSet a -> State SNum a
195 |         cnv (State accs as) = State accs' as'
196 |                 where
197 |                 as'   = IntMap.mapWithKey (\_ch s -> lookup' s) as
198 | 
199 |                 accs' = map cnv_acc accs
200 |                 cnv_acc (Acc p a lctx rctx) = Acc p a lctx rctx'
201 |                   where rctx' =
202 |                           case rctx of
203 |                                 RightContextRExp s ->
204 |                                   RightContextRExp (lookup' (mk_ss nfa [s]))
205 |                                 other -> other
206 | 


--------------------------------------------------------------------------------
/src/DFS.hs:
--------------------------------------------------------------------------------
  1 | {------------------------------------------------------------------------------
  2 |                                       DFS
  3 | 
  4 | This module is a portable version of the ghc-specific `DFS.g.hs', which is
  5 | itself a straightforward encoding of the Launchbury/King paper on linear graph
  6 | algorithms.  This module uses balanced binary trees instead of mutable arrays
  7 | to implement the depth-first search so the complexity of the algorithms is
  8 | n.log(n) instead of linear.
  9 | 
 10 | The vertices of the graphs manipulated by these modules are labelled with the
 11 | integers from 0 to n-1 where n is the number of vertices in the graph.
 12 | 
 13 | The module's principle products are `mk_graph' for constructing a graph from an
 14 | edge list, `t_close' for taking the transitive closure of a graph and `scc'
 15 | for generating a list of strongly connected components; the components are
 16 | listed in dependency order and each component takes the form of a `dfs tree'
 17 | (see Launchberry and King).  Thus if each edge (fid,fid') encodes the fact that
 18 | function `fid' references function `fid'' in a program then `scc' performs a
 19 | dependency analysis.
 20 | 
 21 | Chris Dornan, 23-Jun-94, 2-Jul-96, 29-Aug-96, 29-Sep-97
 22 | ------------------------------------------------------------------------------}
 23 | 
 24 | module DFS where
 25 | 
 26 | import Data.Array ( (!), accumArray, listArray )
 27 | import Data.Set   ( Set )
 28 | import qualified Data.Set as Set
 29 | 
 30 | -- The result of a depth-first search of a graph is a list of trees,
 31 | -- `GForest'.  `post_order' provides a post-order traversal of a forest.
 32 | 
 33 | type GForest = [GTree]
 34 | data GTree    = GNode Int GForest
 35 | 
 36 | postorder:: GForest -> [Int]
 37 | postorder ts = po ts []
 38 |         where
 39 |         po ts' l = foldr po_tree l ts'
 40 | 
 41 |         po_tree (GNode a ts') l = po ts' (a:l)
 42 | 
 43 | list_tree:: GTree -> [Int]
 44 | list_tree t = l_t t []
 45 |         where
 46 |         l_t (GNode x ts) l = foldr l_t (x:l) ts
 47 | 
 48 | 
 49 | -- Graphs are represented by a pair of an integer, giving the number of nodes
 50 | -- in the graph, and function mapping each vertex (0..n-1, n=size of graph) to
 51 | -- its neighbouring nodes.  `mk_graph' takes a size and an edge list and
 52 | -- constructs a graph.
 53 | 
 54 | type Graph = (Int,Int->[Int])
 55 | type Edge = (Int,Int)
 56 | 
 57 | mk_graph:: Int -> [Edge] -> Graph
 58 | mk_graph sz es = (sz,\v->ar!v)
 59 |         where
 60 |         ar = accumArray (flip (:)) [] (0,sz-1) [(v,v')| (v,v')<-es]
 61 | 
 62 | vertices:: Graph -> [Int]
 63 | vertices (sz,_) = [0..sz-1]
 64 | 
 65 | out:: Graph -> Int -> [Int]
 66 | out (_,f) = f
 67 | 
 68 | edges:: Graph -> [Edge]
 69 | edges g = [(v,v')| v<-vertices g, v'<-out g v]
 70 | 
 71 | rev_edges:: Graph -> [Edge]
 72 | rev_edges g = [(v',v)| v<-vertices g, v'<-out g v]
 73 | 
 74 | reverse_graph:: Graph -> Graph
 75 | reverse_graph g@(sz,_) = mk_graph sz (rev_edges g)
 76 | 
 77 | 
 78 | -- `t_close' takes the transitive closure of a graph; `scc' returns the
 79 | -- strongly connected components of the graph and `top_sort' topologically
 80 | -- sorts the graph.  Note that the array is given one more element in order
 81 | -- to avoid problems with empty arrays.
 82 | 
 83 | t_close:: Graph -> Graph
 84 | t_close g@(sz,_) = (sz,\v->ar!v)
 85 |         where
 86 |         ar = listArray (0,sz) ([postorder(dff' [v] g)| v<-vertices g]++[und])
 87 |         und = error "t_close"
 88 | 
 89 | scc:: Graph -> GForest
 90 | scc g = dff' (reverse (top_sort (reverse_graph g))) g
 91 | 
 92 | top_sort:: Graph -> [Int]
 93 | top_sort = postorder . dff
 94 | 
 95 | 
 96 | -- `dff' computes the depth-first forest.  It works by unrolling the
 97 | -- potentially infinite tree from each of the vertices with `generate_g' and
 98 | -- then pruning out the duplicates.
 99 | 
100 | dff:: Graph -> GForest
101 | dff g = dff' (vertices g) g
102 | 
103 | dff':: [Int] -> Graph -> GForest
104 | dff' vs (_bs, f) = prune (map (generate_g f) vs)
105 | 
106 | generate_g:: (Int->[Int]) -> Int -> GTree
107 | generate_g f v = GNode v (map (generate_g f) (f v))
108 | 
109 | prune:: GForest -> GForest
110 | prune ts = snd(chop(empty_int,ts))
111 |         where
112 |         empty_int:: Set Int
113 |         empty_int = Set.empty
114 | 
115 | chop:: (Set Int,GForest) -> (Set Int,GForest)
116 | chop p@(_, []) = p
117 | chop (vstd,GNode v ts:us) =
118 |         if v `Set.member` vstd
119 |            then chop (vstd,us)
120 |            else let vstd1 = Set.insert v vstd
121 |                     (vstd2,ts') = chop (vstd1,ts)
122 |                     (vstd3,us') = chop (vstd2,us)
123 |                 in
124 |                 (vstd3,GNode v ts' : us')
125 | 
126 | 
127 | {-- Some simple test functions
128 | 
129 | test:: Graph Char
130 | test = mk_graph (char_bds ('a','h')) (mk_pairs "eefggfgegdhfhged")
131 |         where
132 |         mk_pairs [] = []
133 |         mk_pairs (a:b:l) = (a,b):mk_pairs l
134 | 
135 | -}
136 | 


--------------------------------------------------------------------------------
/src/Data/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2005, Paul Johnson
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions
 6 | are met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 | 
11 |     * Redistributions in binary form must reproduce the above copyright
12 |       notice, this list of conditions and the following disclaimer in
13 |       the documentation and/or other materials provided with the
14 |       distribution.
15 | 
16 |     * Neither the name of the Ranged Sets project nor the names of its
17 |       contributors may be used to endorse or promote products derived
18 |       from this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
24 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
28 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/src/Data/Ranged.hs:
--------------------------------------------------------------------------------
 1 | module Data.Ranged (
 2 |    module Data.Ranged.Boundaries,
 3 |    module Data.Ranged.Ranges,
 4 |    module Data.Ranged.RangedSet
 5 | ) where
 6 | 
 7 | import Data.Ranged.Boundaries
 8 | import Data.Ranged.Ranges
 9 | import Data.Ranged.RangedSet
10 | 


--------------------------------------------------------------------------------
/src/Data/Ranged/Boundaries.hs:
--------------------------------------------------------------------------------
  1 | -----------------------------------------------------------------------------
  2 | -- |
  3 | -- Module      :  Data.Ranged.Boundaries
  4 | -- Copyright   :  (c) Paul Johnson 2006
  5 | -- License     :  BSD-style
  6 | -- Maintainer  :  paul@cogito.org.uk
  7 | -- Stability   :  experimental
  8 | -- Portability :  portable
  9 | --
 10 | -----------------------------------------------------------------------------
 11 | 
 12 | module Data.Ranged.Boundaries (
 13 |    DiscreteOrdered (..),
 14 |    enumAdjacent,
 15 |    boundedAdjacent,
 16 |    boundedBelow,
 17 |    Boundary (..),
 18 |    above,
 19 |    (/>/)
 20 | ) where
 21 | 
 22 | import           Data.Ratio
 23 | import           Data.Word
 24 | 
 25 | infix 4 />/
 26 | 
 27 | {- |
 28 | Distinguish between dense and sparse ordered types.  A dense type is
 29 | one in which any two values @v1 < v2@ have a third value @v3@ such that
 30 | @v1 < v3 < v2@.
 31 | 
 32 | In theory the floating types are dense, although in practice they can only have
 33 | finitely many values.  This class treats them as dense.
 34 | 
 35 | Tuples up to 4 members are declared as instances.  Larger tuples may be added
 36 | if necessary.
 37 | 
 38 | Most values of sparse types have an @adjacentBelow@, such that, for all x:
 39 | 
 40 | > case adjacentBelow x of
 41 | >    Just x1 -> adjacent x1 x
 42 | >    Nothing -> True
 43 | 
 44 | The exception is for bounded types when @x == lowerBound@.  For dense types
 45 | @adjacentBelow@ always returns 'Nothing'.
 46 | 
 47 | This approach was suggested by Ben Rudiak-Gould on comp.lang.functional.
 48 | -}
 49 | 
 50 | class Ord a => DiscreteOrdered a where
 51 |    -- | Two values @x@ and @y@ are adjacent if @x < y@ and there does not
 52 |    -- exist a third value between them.  Always @False@ for dense types.
 53 |    adjacent :: a -> a -> Bool
 54 |    -- | The value immediately below the argument, if it can be determined.
 55 |    adjacentBelow :: a -> Maybe a
 56 | 
 57 | 
 58 | -- Implementation note: the precise rules about unbounded enumerated vs
 59 | -- bounded enumerated types are difficult to express using Haskell 98, so
 60 | -- the prelude types are listed individually here.
 61 | 
 62 | instance DiscreteOrdered Bool where
 63 |    adjacent = boundedAdjacent
 64 |    adjacentBelow = boundedBelow
 65 | 
 66 | instance DiscreteOrdered Ordering where
 67 |    adjacent = boundedAdjacent
 68 |    adjacentBelow = boundedBelow
 69 | 
 70 | instance DiscreteOrdered Char where
 71 |    adjacent = boundedAdjacent
 72 |    adjacentBelow = boundedBelow
 73 | 
 74 | instance DiscreteOrdered Int where
 75 |    adjacent = boundedAdjacent
 76 |    adjacentBelow = boundedBelow
 77 | 
 78 | instance DiscreteOrdered Integer where
 79 |    adjacent = enumAdjacent
 80 |    adjacentBelow = Just . pred
 81 | 
 82 | instance DiscreteOrdered Double where
 83 |    adjacent _ _ = False
 84 |    adjacentBelow = const Nothing
 85 | 
 86 | instance DiscreteOrdered Float where
 87 |    adjacent _ _ = False
 88 |    adjacentBelow = const Nothing
 89 | 
 90 | instance (Integral a) => DiscreteOrdered (Ratio a) where
 91 |    adjacent _ _ = False
 92 |    adjacentBelow = const Nothing
 93 | 
 94 | instance Ord a => DiscreteOrdered [a] where
 95 |    adjacent _ _ = False
 96 |    adjacentBelow = const Nothing
 97 | 
 98 | instance (Ord a, DiscreteOrdered b) => DiscreteOrdered (a, b)
 99 |    where
100 |       adjacent (x1, x2) (y1, y2) = (x1 == y1) && adjacent x2 y2
101 |       adjacentBelow (x1, x2) = do -- Maybe monad
102 |          x2' <- adjacentBelow x2
103 |          return (x1, x2')
104 | 
105 | instance (Ord a, Ord b, DiscreteOrdered c) => DiscreteOrdered (a, b, c)
106 |    where
107 |       adjacent (x1, x2, x3) (y1, y2, y3) =
108 |          (x1 == y1) && (x2 == y2) && adjacent x3 y3
109 |       adjacentBelow (x1, x2, x3) = do -- Maybe monad
110 |          x3' <- adjacentBelow x3
111 |          return (x1, x2, x3')
112 | 
113 | instance (Ord a, Ord b, Ord c, DiscreteOrdered d) =>
114 |          DiscreteOrdered (a, b, c, d)
115 |    where
116 |       adjacent (x1, x2, x3, x4) (y1, y2, y3, y4) =
117 |          (x1 == y1) && (x2 == y2) && (x3 == y3) && adjacent x4 y4
118 |       adjacentBelow (x1, x2, x3, x4) = do -- Maybe monad
119 |          x4' <- adjacentBelow x4
120 |          return (x1, x2, x3, x4')
121 | 
122 | instance DiscreteOrdered Word8 where
123 |     adjacent x y = x + 1 == y
124 |     adjacentBelow 0 = Nothing
125 |     adjacentBelow x = Just (x-1)
126 | 
127 | 
128 | -- | Check adjacency for sparse enumerated types (i.e. where there
129 | -- is no value between @x@ and @succ x@).
130 | enumAdjacent :: (Ord a, Enum a) => a -> a -> Bool
131 | enumAdjacent x y = (succ x == y)
132 | 
133 | -- | Check adjacency, allowing for case where x = maxBound.  Use as the
134 | -- definition of "adjacent" for bounded enumerated types such as Int and Char.
135 | boundedAdjacent :: (Ord a, Enum a) => a -> a -> Bool
136 | boundedAdjacent x y = if x < y then succ x == y else False
137 | 
138 | 
139 | -- | The usual implementation of 'adjacentBelow' for bounded enumerated types.
140 | boundedBelow :: (Eq a, Enum a, Bounded a) => a -> Maybe a
141 | boundedBelow x = if x == minBound then Nothing else Just $ pred x
142 | 
143 | {- |
144 | A Boundary is a division of an ordered type into values above
145 | and below the boundary.  No value can sit on a boundary.
146 | 
147 | Known bug: for Bounded types
148 | 
149 | * @BoundaryAbove maxBound < BoundaryAboveAll@
150 | 
151 | * @BoundaryBelow minBound > BoundaryBelowAll@
152 | 
153 | This is incorrect because there are no possible values in
154 | between the left and right sides of these inequalities.
155 | -}
156 | 
157 | data Boundary a =
158 |       -- | The argument is the highest value below the boundary.
159 |       BoundaryAbove a |
160 |       -- | The argument is the lowest value above the boundary.
161 |       BoundaryBelow a |
162 |       -- | The boundary above all values.
163 |       BoundaryAboveAll |
164 |       -- | The boundary below all values.
165 |       BoundaryBelowAll
166 |    deriving (Show)
167 | 
168 | -- | True if the value is above the boundary, false otherwise.
169 | above :: Ord v => Boundary v -> v -> Bool
170 | above (BoundaryAbove b) v = v > b
171 | above (BoundaryBelow b) v = v >= b
172 | above BoundaryAboveAll _  = False
173 | above BoundaryBelowAll _  = True
174 | 
175 | -- | Same as 'above', but with the arguments reversed for more intuitive infix
176 | -- usage.
177 | (/>/) :: Ord v => v -> Boundary v -> Bool
178 | (/>/) = flip above
179 | 
180 | instance (DiscreteOrdered a) => Eq (Boundary a) where
181 |    b1 == b2  = compare b1 b2 == EQ
182 | 
183 | instance (DiscreteOrdered a) => Ord (Boundary a) where
184 |    -- Comparison alogrithm based on brute force and ignorance:
185 |    -- enumerate all combinations.
186 | 
187 |    compare boundary1 boundary2 =
188 |       case boundary1 of
189 |          BoundaryAbove b1 ->
190 |             case boundary2 of
191 |                BoundaryAbove b2 -> compare b1 b2
192 |                BoundaryBelow b2 ->
193 |                   if b1 < b2
194 |                      then
195 |                         if adjacent b1 b2 then EQ else LT
196 |                      else GT
197 |                BoundaryAboveAll -> LT
198 |                BoundaryBelowAll -> GT
199 |          BoundaryBelow b1 ->
200 |             case boundary2 of
201 |                BoundaryAbove b2 ->
202 |                   if b1 > b2
203 |                      then
204 |                         if adjacent b2 b1 then EQ else GT
205 |                      else LT
206 |                BoundaryBelow b2 -> compare b1 b2
207 |                BoundaryAboveAll -> LT
208 |                BoundaryBelowAll -> GT
209 |          BoundaryAboveAll ->
210 |             case boundary2 of
211 |                BoundaryAboveAll -> EQ
212 |                _                -> GT
213 |          BoundaryBelowAll ->
214 |             case boundary2 of
215 |                BoundaryBelowAll -> EQ
216 |                _                -> LT
217 | 


--------------------------------------------------------------------------------
/src/Data/Ranged/RangedSet.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP #-}
  2 | 
  3 | module Data.Ranged.RangedSet (
  4 |    -- ** Ranged Set Type
  5 |    RSet,
  6 |    rSetRanges,
  7 |    -- ** Ranged Set construction functions and their preconditions
  8 |    makeRangedSet,
  9 |    unsafeRangedSet,
 10 |    validRangeList,
 11 |    normaliseRangeList,
 12 |    rSingleton,
 13 |    rSetUnfold,
 14 |    -- ** Predicates
 15 |    rSetIsEmpty,
 16 |    rSetIsFull,
 17 |    (-?-),  rSetHas,
 18 |    (-<=-), rSetIsSubset,
 19 |    (-<-),  rSetIsSubsetStrict,
 20 |    -- ** Set Operations
 21 |    (-\/-), rSetUnion,
 22 |    (-/\-), rSetIntersection,
 23 |    (-!-),  rSetDifference,
 24 |    rSetNegation,
 25 |    -- ** Useful Sets
 26 |    rSetEmpty,
 27 |    rSetFull,
 28 | ) where
 29 | 
 30 | import Data.Ranged.Boundaries
 31 | import Data.Ranged.Ranges
 32 | #if !MIN_VERSION_base(4,11,0)
 33 | import Data.Semigroup
 34 | #endif
 35 | 
 36 | import qualified Data.List as List
 37 | 
 38 | infixl 7 -/\-
 39 | infixl 6 -\/-, -!-
 40 | infixl 5 -<=-, -<-, -?-
 41 | 
 42 | -- | An RSet (for Ranged Set) is a list of ranges.  The ranges must be sorted
 43 | -- and not overlap.
 44 | newtype DiscreteOrdered v => RSet v = RSet {rSetRanges :: [Range v]}
 45 |    deriving (Eq, Show, Ord)
 46 | 
 47 | instance DiscreteOrdered a => Semigroup (RSet a) where
 48 |     (<>) = rSetUnion
 49 | 
 50 | instance DiscreteOrdered a => Monoid (RSet a) where
 51 |     mempty  = rSetEmpty
 52 |     mappend = (<>)
 53 | 
 54 | -- | Determine if the ranges in the list are both in order and non-overlapping.
 55 | -- If so then they are suitable input for the unsafeRangedSet function.
 56 | validRangeList :: DiscreteOrdered v => [Range v] -> Bool
 57 | validRangeList rs = and $
 58 |   all (\ (Range lower upper) -> lower <= upper) rs :
 59 |   zipWith (\ (Range _ upper1) (Range lower2 _) -> upper1 <= lower2) rs (drop 1 rs)
 60 | 
 61 | 
 62 | -- | Rearrange and merge the ranges in the list so that they are in order and
 63 | -- non-overlapping.
 64 | normaliseRangeList :: DiscreteOrdered v => [Range v] -> [Range v]
 65 | normaliseRangeList = normalise . List.sort . filter (not . rangeIsEmpty)
 66 | 
 67 | 
 68 | -- Private routine: normalise a range list that is known to be already sorted.
 69 | -- This precondition is not checked.
 70 | normalise :: DiscreteOrdered v => [Range v] -> [Range v]
 71 | normalise (r1:r2:rs) =
 72 |          if overlap r1 r2
 73 |                then normalise $
 74 |                        Range (rangeLower r1)
 75 |                              (max (rangeUpper r1) (rangeUpper r2))
 76 |                        : rs
 77 |                else r1 : (normalise $ r2 : rs)
 78 |    where
 79 |       overlap (Range _ upper1) (Range lower2 _) = upper1 >= lower2
 80 | 
 81 | normalise rs = rs
 82 | 
 83 | 
 84 | -- | Create a new Ranged Set from a list of ranges.  The list may contain
 85 | -- ranges that overlap or are not in ascending order.
 86 | makeRangedSet :: DiscreteOrdered v => [Range v] -> RSet v
 87 | makeRangedSet = RSet . normaliseRangeList
 88 | 
 89 | 
 90 | -- | Create a new Ranged Set from a list of ranges. @validRangeList ranges@
 91 | -- must return @True@.  This precondition is not checked.
 92 | unsafeRangedSet :: DiscreteOrdered v => [Range v] -> RSet v
 93 | unsafeRangedSet = RSet
 94 | 
 95 | -- | Create a Ranged Set from a single element.
 96 | rSingleton :: DiscreteOrdered v => v -> RSet v
 97 | rSingleton v = unsafeRangedSet [singletonRange v]
 98 | 
 99 | -- | True if the set has no members.
100 | rSetIsEmpty :: DiscreteOrdered v => RSet v -> Bool
101 | rSetIsEmpty = null . rSetRanges
102 | 
103 | 
104 | -- | True if the negation of the set has no members.
105 | rSetIsFull :: DiscreteOrdered v => RSet v -> Bool
106 | rSetIsFull = rSetIsEmpty . rSetNegation
107 | 
108 | 
109 | -- | True if the value is within the ranged set.  Infix precedence is left 5.
110 | rSetHas, (-?-) :: DiscreteOrdered v => RSet v -> v -> Bool
111 | rSetHas (RSet ls) value = rSetHas1 ls
112 |    where
113 |       rSetHas1 [] = False
114 |       rSetHas1 (r:rs)
115 |          | value />/ rangeLower r = rangeHas r value || rSetHas1 rs
116 |          | otherwise              = False
117 | 
118 | (-?-) = rSetHas
119 | 
120 | -- | True if the first argument is a subset of the second argument, or is
121 | -- equal.
122 | --
123 | -- Infix precedence is left 5.
124 | rSetIsSubset, (-<=-) :: DiscreteOrdered v => RSet v -> RSet v -> Bool
125 | rSetIsSubset rs1 rs2 = rSetIsEmpty (rs1 -!- rs2)
126 | (-<=-) = rSetIsSubset
127 | 
128 | 
129 | -- | True if the first argument is a strict subset of the second argument.
130 | --
131 | -- Infix precedence is left 5.
132 | rSetIsSubsetStrict, (-<-) :: DiscreteOrdered v => RSet v -> RSet v -> Bool
133 | rSetIsSubsetStrict rs1 rs2 =
134 |    rSetIsEmpty (rs1 -!- rs2)
135 |    && not (rSetIsEmpty (rs2 -!- rs1))
136 | 
137 | (-<-) = rSetIsSubsetStrict
138 | 
139 | -- | Set union for ranged sets.  Infix precedence is left 6.
140 | rSetUnion, (-\/-) :: DiscreteOrdered v => RSet v -> RSet v -> RSet v
141 | -- Implementation note: rSetUnion merges the two lists into a single
142 | -- sorted list and then calls normalise to combine overlapping ranges.
143 | rSetUnion (RSet ls1) (RSet ls2) = RSet $ normalise $ merge ls1 ls2
144 |    where
145 |       merge ms1 [] = ms1
146 |       merge [] ms2 = ms2
147 |       merge ms1@(h1:t1) ms2@(h2:t2) =
148 |          if h1 <  h2
149 |             then h1 : merge t1 ms2
150 |             else h2 : merge ms1 t2
151 | 
152 | (-\/-) = rSetUnion
153 | 
154 | -- | Set intersection for ranged sets.  Infix precedence is left 7.
155 | rSetIntersection, (-/\-) :: DiscreteOrdered v => RSet v -> RSet v -> RSet v
156 | rSetIntersection (RSet ls1) (RSet ls2) =
157 |    RSet $ filter (not . rangeIsEmpty) $ merge ls1 ls2
158 |    where
159 |       merge ms1@(h1:t1) ms2@(h2:t2) =
160 |          rangeIntersection h1 h2
161 |          : if rangeUpper h1 < rangeUpper h2
162 |                then merge t1 ms2
163 |                else merge ms1 t2
164 |       merge _ _ = []
165 | 
166 | (-/\-) = rSetIntersection
167 | 
168 | 
169 | -- | Set difference.  Infix precedence is left 6.
170 | rSetDifference, (-!-) :: DiscreteOrdered v => RSet v -> RSet v -> RSet v
171 | rSetDifference rs1 rs2 = rs1 -/\- (rSetNegation rs2)
172 | (-!-) = rSetDifference
173 | 
174 | 
175 | -- | Set negation.
176 | rSetNegation :: DiscreteOrdered a => RSet a -> RSet a
177 | rSetNegation set = RSet $ ranges1 $ setBounds1
178 |    where
179 |       ranges1 (b1:b2:bs) = Range b1 b2 : ranges1 bs
180 |       ranges1 [BoundaryAboveAll] = []
181 |       ranges1 [b] = [Range b BoundaryAboveAll]
182 |       ranges1 _ = []
183 |       setBounds1 = case setBounds of
184 |          (BoundaryBelowAll : bs)  -> bs
185 |          _                        -> BoundaryBelowAll : setBounds
186 |       setBounds = bounds $ rSetRanges set
187 |       bounds (r:rs) = rangeLower r : rangeUpper r : bounds rs
188 |       bounds _ = []
189 | 
190 | -- | The empty set.
191 | rSetEmpty :: DiscreteOrdered a => RSet a
192 | rSetEmpty = RSet []
193 | 
194 | -- | The set that contains everything.
195 | rSetFull :: DiscreteOrdered a => RSet a
196 | rSetFull = RSet [Range BoundaryBelowAll BoundaryAboveAll]
197 | 
198 | -- | Construct a range set.
199 | rSetUnfold :: DiscreteOrdered a =>
200 |    Boundary a
201 |       -- ^ A first lower boundary.
202 |    -> (Boundary a -> Boundary a)
203 |       -- ^ A function from a lower boundary to an upper boundary, which must
204 |       -- return a result greater than the argument (not checked).
205 |    -> (Boundary a -> Maybe (Boundary a))
206 |       -- ^ A function from a lower boundary to @Maybe@ the successor lower
207 |       -- boundary, which must return a result greater than the argument
208 |       -- (not checked).  If ranges overlap then they will be merged.
209 |    -> RSet a
210 | rSetUnfold bound upperFunc succFunc = RSet $ normalise $ ranges1 bound
211 |    where
212 |       ranges1 b =
213 |          Range b (upperFunc b)
214 |          : case succFunc b of
215 |             Just b2 -> ranges1 b2
216 |             Nothing -> []
217 | 


--------------------------------------------------------------------------------
/src/Data/Ranged/Ranges.hs:
--------------------------------------------------------------------------------
  1 | -----------------------------------------------------------------------------
  2 | --
  3 | -- Module      :  Data.Ranged.Ranges
  4 | -- Copyright   :  (c) Paul Johnson 2006
  5 | -- License     :  BSD-style
  6 | -- Maintainer  :  paul@cogito.org.uk
  7 | -- Stability   :  experimental
  8 | -- Portability :  portable
  9 | --
 10 | -----------------------------------------------------------------------------
 11 | 
 12 | -- | A range has an upper and lower boundary.
 13 | module Data.Ranged.Ranges (
 14 |    -- ** Construction
 15 |    Range (..),
 16 |    emptyRange,
 17 |    fullRange,
 18 |    -- ** Predicates
 19 |    rangeIsEmpty,
 20 |    rangeIsFull,
 21 |    rangeOverlap,
 22 |    rangeEncloses,
 23 |    rangeSingletonValue,
 24 |    -- ** Membership
 25 |    rangeHas,
 26 |    rangeListHas,
 27 |    -- ** Set Operations
 28 |    singletonRange,
 29 |    rangeIntersection,
 30 |    rangeUnion,
 31 |    rangeDifference,
 32 | ) where
 33 | 
 34 | import           Data.Ranged.Boundaries
 35 | 
 36 | -- | A Range has upper and lower boundaries.
 37 | data Range v = Range {rangeLower, rangeUpper :: Boundary v}
 38 | 
 39 | instance (DiscreteOrdered a) => Eq (Range a) where
 40 |    r1 == r2   = (rangeIsEmpty r1 && rangeIsEmpty r2) ||
 41 |                 (rangeLower r1 == rangeLower r2 &&
 42 |                  rangeUpper r1 == rangeUpper r2)
 43 | 
 44 | 
 45 | instance (DiscreteOrdered a) => Ord (Range a) where
 46 |    compare r1 r2
 47 |       | r1 == r2       = EQ
 48 |       | rangeIsEmpty r1  = LT
 49 |       | rangeIsEmpty r2  = GT
 50 |       | otherwise      = compare (rangeLower r1, rangeUpper r1)
 51 |                                  (rangeLower r2, rangeUpper r2)
 52 | 
 53 | instance (Show a, DiscreteOrdered a) => Show (Range a) where
 54 |    show r
 55 |       | rangeIsEmpty r     = "Empty"
 56 |       | rangeIsFull r      = "All x"
 57 |       | otherwise          =
 58 |          case rangeSingletonValue r of
 59 |             Just v  -> "x == " ++ show v
 60 |             Nothing -> lowerBound ++ "x" ++ upperBound
 61 |       where
 62 |          lowerBound = case rangeLower r of
 63 |             BoundaryBelowAll -> ""
 64 |             BoundaryBelow v  -> show v ++ " <= "
 65 |             BoundaryAbove v  -> show v ++ " < "
 66 |             BoundaryAboveAll -> error "show Range: lower bound is BoundaryAboveAll"
 67 |          upperBound = case rangeUpper r of
 68 |             BoundaryBelowAll -> error "show Range: upper bound is BoundaryBelowAll"
 69 |             BoundaryBelow v  -> " < " ++ show v
 70 |             BoundaryAbove v  -> " <= " ++ show v
 71 |             BoundaryAboveAll -> ""
 72 | 
 73 | 
 74 | -- | True if the value is within the range.
 75 | rangeHas :: Ord v => Range v -> v -> Bool
 76 | 
 77 | rangeHas (Range b1 b2) v =
 78 |    (v />/ b1) && not (v />/ b2)
 79 | 
 80 | 
 81 | -- | True if the value is within one of the ranges.
 82 | rangeListHas :: Ord v =>
 83 |    [Range v] -> v -> Bool
 84 | rangeListHas ls v = or $ map (\r -> rangeHas r v) ls
 85 | 
 86 | 
 87 | -- | The empty range
 88 | emptyRange :: Range v
 89 | emptyRange = Range BoundaryAboveAll BoundaryBelowAll
 90 | 
 91 | 
 92 | -- | The full range.  All values are within it.
 93 | fullRange :: Range v
 94 | fullRange = Range BoundaryBelowAll BoundaryAboveAll
 95 | 
 96 | 
 97 | -- | A range containing a single value
 98 | singletonRange :: v -> Range v
 99 | singletonRange v = Range (BoundaryBelow v) (BoundaryAbove v)
100 | 
101 | 
102 | -- | If the range is a singleton, returns @Just@ the value.  Otherwise returns
103 | -- @Nothing@.
104 | --
105 | -- Known bug: This always returns @Nothing@ for ranges including
106 | -- @BoundaryBelowAll@ or @BoundaryAboveAll@.  For bounded types this can be
107 | -- incorrect.  For instance, the following range only contains one value:
108 | --
109 | -- >    Range (BoundaryBelow maxBound) BoundaryAboveAll
110 | rangeSingletonValue :: DiscreteOrdered v => Range v -> Maybe v
111 | rangeSingletonValue (Range (BoundaryBelow v1) (BoundaryBelow v2))
112 |    | adjacent v1 v2  = Just v1
113 |    | otherwise       = Nothing
114 | rangeSingletonValue (Range (BoundaryBelow v1) (BoundaryAbove v2))
115 |    | v1 == v2        = Just v1
116 |    | otherwise       = Nothing
117 | rangeSingletonValue (Range (BoundaryAbove v1) (BoundaryBelow v2)) =
118 |    do
119 |       v2' <- adjacentBelow v2
120 |       v2'' <- adjacentBelow v2'
121 |       if v1 == v2'' then return v2' else Nothing
122 | rangeSingletonValue (Range (BoundaryAbove v1) (BoundaryAbove v2))
123 |    | adjacent v1 v2  = Just v2
124 |    | otherwise       = Nothing
125 | rangeSingletonValue (Range _ _) = Nothing
126 | 
127 | -- | A range is empty unless its upper boundary is greater than its lower
128 | -- boundary.
129 | rangeIsEmpty :: DiscreteOrdered v => Range v -> Bool
130 | rangeIsEmpty (Range lower upper) = upper <= lower
131 | 
132 | 
133 | -- | A range is full if it contains every possible value.
134 | rangeIsFull :: DiscreteOrdered v => Range v -> Bool
135 | rangeIsFull = (== fullRange)
136 | 
137 | -- | Two ranges overlap if their intersection is non-empty.
138 | rangeOverlap :: DiscreteOrdered v => Range v -> Range v -> Bool
139 | rangeOverlap r1 r2 =
140 |    not (rangeIsEmpty r1)
141 |    && not (rangeIsEmpty r2)
142 |    && not (rangeUpper r1 <= rangeLower r2 || rangeUpper r2 <= rangeLower r1)
143 | 
144 | 
145 | -- | The first range encloses the second if every value in the second range is
146 | -- also within the first range.  If the second range is empty then this is
147 | -- always true.
148 | rangeEncloses :: DiscreteOrdered v => Range v -> Range v -> Bool
149 | rangeEncloses r1 r2 =
150 |    (rangeLower r1 <= rangeLower r2 && rangeUpper r2 <= rangeUpper r1)
151 |    || rangeIsEmpty r2
152 | 
153 | 
154 | -- | Intersection of two ranges, if any.
155 | rangeIntersection :: DiscreteOrdered v => Range v -> Range v -> Range v
156 | rangeIntersection r1@(Range lower1 upper1) r2@(Range lower2 upper2)
157 |     | rangeIsEmpty r1 || rangeIsEmpty r2  = emptyRange
158 |     | otherwise  = Range (max lower1 lower2) (min upper1 upper2)
159 | 
160 | 
161 | -- | Union of two ranges.  Returns one or two results.
162 | --
163 | -- If there are two results then they are guaranteed to have a non-empty
164 | -- gap in between, but may not be in ascending order.
165 | rangeUnion :: DiscreteOrdered v => Range v -> Range v -> [Range v]
166 | rangeUnion r1@(Range lower1 upper1) r2@(Range lower2 upper2)
167 |    | rangeIsEmpty r1  = [r2]
168 |    | rangeIsEmpty r2  = [r1]
169 |    | otherwise =
170 |        if touching then [Range lower upper] else [r1, r2]
171 |    where
172 |      touching = (max lower1 lower2) <= (min upper1 upper2)
173 |      lower = min lower1 lower2
174 |      upper = max upper1 upper2
175 | 
176 | 
177 | -- | @range1@ minus @range2@.  Returns zero, one or two results.  Multiple
178 | -- results are guaranteed to have non-empty gaps in between, but may not be in
179 | -- ascending order.
180 | rangeDifference :: DiscreteOrdered v => Range v -> Range v -> [Range v]
181 | 
182 | rangeDifference r1@(Range lower1 upper1) (Range lower2 upper2) =
183 |    -- There are six possibilities
184 |    --    1: r2 completely less than r1
185 |    --    2: r2 overlaps bottom of r1
186 |    --    3: r2 encloses r1
187 |    --    4: r1 encloses r2
188 |    --    5: r2 overlaps top of r1
189 |    --    6: r2 completely greater than r1
190 |    if intersects
191 |       then -- Cases 2,3,4,5
192 |          filter (not . rangeIsEmpty) [Range lower1 lower2, Range upper2 upper1]
193 |       else -- Cases 1, 6
194 |          [r1]
195 |    where
196 |       intersects = (max lower1 lower2) < (min upper1 upper2)
197 | 


--------------------------------------------------------------------------------
/src/Info.hs:
--------------------------------------------------------------------------------
 1 | -- -----------------------------------------------------------------------------
 2 | --
 3 | -- Info.hs, part of Alex
 4 | --
 5 | -- (c) Simon Marlow 2003
 6 | --
 7 | -- Generate a human-readable rendition of the state machine.
 8 | --
 9 | -- ----------------------------------------------------------------------------}
10 | 
11 | module Info (infoDFA) where
12 | 
13 | import AbsSyn
14 | import qualified Data.Map as Map
15 | import qualified Data.IntMap as IntMap
16 | import Util
17 | 
18 | -- -----------------------------------------------------------------------------
19 | -- Generate a human readable dump of the state machine
20 | 
21 | infoDFA :: Int -> String -> DFA SNum Code -> ShowS
22 | infoDFA _ func_nm dfa
23 |   = str "Scanner : " . str func_nm . nl
24 |   . str "States  : " . shows (length dfa_list) . nl
25 |   . nl . infoDFA'
26 |   where
27 |     dfa_list = Map.toAscList (dfa_states dfa)
28 | 
29 |     infoDFA' = interleave_shows nl (map infoStateN dfa_list)
30 | 
31 |     infoStateN (i,s) = str "State " . shows i . nl . infoState s
32 | 
33 |     infoState :: State SNum Code -> ShowS
34 |     infoState (State accs out)
35 |         = foldr (.) id (map infoAccept accs)
36 |         . infoArr out . nl
37 | 
38 |     infoArr out
39 |         = char '\t' . interleave_shows (str "\n\t")
40 |                         (map infoTransition (IntMap.toAscList out))
41 | 
42 |     infoAccept (Acc p act lctx rctx)
43 |         = str "\tAccept" . paren (shows p) . space
44 |         . outputLCtx lctx . space
45 |         . showRCtx rctx
46 |         . (case act of
47 |             Nothing   -> id
48 |             Just code -> str " { " . str code . str " }")
49 |         . nl
50 | 
51 |     infoTransition (char',state)
52 |         = str (ljustify 8 (show char'))
53 |         . str " -> "
54 |         . shows state
55 | 
56 |     outputLCtx Nothing
57 |           = id
58 |     outputLCtx (Just set)
59 |           = paren (show set ++) . char '^'
60 | 
61 |     -- outputArr arr
62 |           -- = str "Array.array " . shows (bounds arr) . space
63 |           -- . shows (assocs arr)
64 | 


--------------------------------------------------------------------------------
/src/ParseMonad.hs:
--------------------------------------------------------------------------------
  1 | -- -----------------------------------------------------------------------------
  2 | --
  3 | -- ParseMonad.hs, part of Alex
  4 | --
  5 | -- (c) Simon Marlow 2003
  6 | --
  7 | -- ----------------------------------------------------------------------------}
  8 | 
  9 | module ParseMonad (
 10 |         AlexInput, alexInputPrevChar, alexGetChar, alexGetByte,
 11 |         AlexPosn(..), alexStartPos,
 12 |         Warning(..), warnIfNullable,
 13 |         P, runP, StartCode, failP, lookupSMac, lookupRMac, newSMac, newRMac,
 14 |         setStartCode, getStartCode, getInput, setInput,
 15 |  ) where
 16 | 
 17 | import Control.Monad       ( liftM, ap, when )
 18 | import Data.Map            ( Map )
 19 | import Data.List.NonEmpty  ( pattern (:|) )
 20 | import Data.Word           ( Word8 )
 21 | import qualified Data.Map as Map
 22 | 
 23 | import AbsSyn   hiding ( StartCode )
 24 | import CharSet  ( CharSet )
 25 | import UTF8
 26 | 
 27 | -- -----------------------------------------------------------------------------
 28 | -- The input type
 29 | --import Codec.Binary.UTF8.Light as UTF8
 30 | 
 31 | type Byte = Word8
 32 | 
 33 | type AlexInput = (AlexPosn,     -- current position,
 34 |                   Char,         -- previous char
 35 |                   [Byte],
 36 |                   String)       -- current input string
 37 | 
 38 | alexInputPrevChar :: AlexInput -> Char
 39 | alexInputPrevChar (_,c,_,_) = c
 40 | 
 41 | 
 42 | alexGetChar :: AlexInput -> Maybe (Char, AlexInput)
 43 | alexGetChar (_, _, [], [])  = Nothing
 44 | alexGetChar (p, _, [], c:s) = p' `seq` Just (c, (p', c, [], s))
 45 |   where
 46 |     p' = alexMove p c
 47 | alexGetChar (_, _ , _:_, _) = undefined -- hide compiler warning
 48 | 
 49 | alexGetByte :: AlexInput -> Maybe (Byte, AlexInput)
 50 | alexGetByte (p, c, b:bs, s)   = Just (b, (p, c, bs, s))
 51 | alexGetByte (_, _, [],   [])  = Nothing
 52 | alexGetByte (p, _, [],   c:s) = p' `seq`  Just (b, (p', c, bs, s))
 53 |   where
 54 |     p' = alexMove p c
 55 |     b :| bs = UTF8.encode c
 56 | 
 57 | -- -----------------------------------------------------------------------------
 58 | -- Token positions
 59 | 
 60 | -- `Posn' records the location of a token in the input text.  It has three
 61 | -- fields: the address (number of characters preceding the token), line number
 62 | -- and column of a token within the file. `start_pos' gives the position of the
 63 | -- start of the file and `eof_pos' a standard encoding for the end of file.
 64 | -- `move_pos' calculates the new position after traversing a given character,
 65 | -- assuming the usual eight character tab stops.
 66 | 
 67 | data AlexPosn = AlexPn !Int !Int !Int
 68 |         deriving (Eq, Show, Ord)
 69 | 
 70 | alexStartPos :: AlexPosn
 71 | alexStartPos = AlexPn 0 1 1
 72 | 
 73 | alexMove :: AlexPosn -> Char -> AlexPosn
 74 | alexMove (AlexPn a l c) '\t' = AlexPn (a+1)  l     (((c+7) `div` 8)*8+1)
 75 | alexMove (AlexPn a l _) '\n' = AlexPn (a+1) (l+1)   1
 76 | alexMove (AlexPn a l c) _    = AlexPn (a+1)  l     (c+1)
 77 | 
 78 | -- -----------------------------------------------------------------------------
 79 | -- Alex lexing/parsing monad
 80 | 
 81 | data Warning
 82 |   = WarnNullableRExp
 83 |     { _warnPos  :: AlexPosn  -- ^ The position of the code following the regex.
 84 |     , _warnText :: String    -- ^ Warning text.
 85 |     }
 86 | 
 87 | type ParseError = (Maybe AlexPosn, String)
 88 | type StartCode = Int
 89 | 
 90 | data PState = PState
 91 |   { warnings  :: [Warning]           -- ^ Stack of warnings, top = last warning.
 92 |   , smac_env  :: Map String CharSet
 93 |   , rmac_env  :: Map String RExp
 94 |   , startcode :: Int
 95 |   , input     :: AlexInput
 96 |   }
 97 | 
 98 | newtype P a = P { unP :: PState -> Either ParseError (PState,a) }
 99 | 
100 | instance Functor P where
101 |   fmap = liftM
102 | 
103 | instance Applicative P where
104 |   pure a = P $ \env -> Right (env,a)
105 |   (<*>) = ap
106 | 
107 | instance Monad P where
108 |  (P m) >>= k = P $ \env -> case m env of
109 |                         Left err -> Left err
110 |                         Right (env',ok) -> unP (k ok) env'
111 |  return = pure
112 | 
113 | -- | Run the parser on given input.
114 | runP :: String
115 |           -- ^ Input string.
116 |      -> (Map String CharSet, Map String RExp)
117 |           -- ^ Character set and regex definitions.
118 |      -> P a
119 |           -- ^ Parsing computation.
120 |      -> Either ParseError ([Warning], a)
121 |           -- ^ List of warnings in first-to-last order, result.
122 | runP str (senv,renv) (P p)
123 |   = case p initial_state of
124 |         Left err -> Left err
125 |         Right (s, a) -> Right (reverse (warnings s), a)
126 |   where
127 |   initial_state = PState
128 |     { warnings  = []
129 |     , smac_env  = senv
130 |     , rmac_env  = renv
131 |     , startcode = 0
132 |     , input     = (alexStartPos, '\n', [], str)
133 |     }
134 | 
135 | failP :: String -> P a
136 | failP str = P $ \PState{ input = (p,_,_,_) } -> Left (Just p,str)
137 | 
138 | -- Macros are expanded during parsing, to simplify the abstract
139 | -- syntax.  The parsing monad passes around two environments mapping
140 | -- macro names to sets and regexps respectively.
141 | 
142 | lookupSMac :: (AlexPosn,String) -> P CharSet
143 | lookupSMac (posn,smac)
144 |  = P $ \s@PState{ smac_env = senv } ->
145 |        case Map.lookup smac senv of
146 |         Just ok -> Right (s,ok)
147 |         Nothing -> Left (Just posn, "unknown set macro: $" ++ smac)
148 | 
149 | lookupRMac :: String -> P RExp
150 | lookupRMac rmac
151 |  = P $ \s@PState{ rmac_env = renv } ->
152 |        case Map.lookup rmac renv of
153 |         Just ok -> Right (s,ok)
154 |         Nothing -> Left (Nothing, "unknown regex macro: %" ++ rmac)
155 | 
156 | newSMac :: String -> CharSet -> P ()
157 | newSMac smac set
158 |   = P $ \s -> Right (s{smac_env = Map.insert smac set (smac_env s)}, ())
159 | 
160 | newRMac :: String -> RExp -> P ()
161 | newRMac rmac rexp
162 |   = P $ \s -> Right (s{rmac_env = Map.insert rmac rexp (rmac_env s)}, ())
163 | 
164 | setStartCode :: StartCode -> P ()
165 | setStartCode sc = P $ \s -> Right (s{ startcode = sc }, ())
166 | 
167 | getStartCode :: P StartCode
168 | getStartCode = P $ \s -> Right (s, startcode s)
169 | 
170 | getInput :: P AlexInput
171 | getInput = P $ \s -> Right (s, input s)
172 | 
173 | setInput :: AlexInput -> P ()
174 | setInput inp = P $ \s -> Right (s{ input = inp }, ())
175 | 
176 | -- | Add a warning if given regular expression is nullable
177 | --   unless the user wrote the regex 'Eps'.
178 | warnIfNullable
179 |   :: RExp       -- ^ Regular expression.
180 |   -> AlexPosn   -- ^ Position associated to regular expression.
181 |   -> P ()
182 | -- If the user wrote @()@, they wanted to match the empty sequence!
183 | -- Thus, skip the warning then.
184 | warnIfNullable Eps _ = return ()
185 | warnIfNullable r pos = when (nullable r) $ P $ \ s ->
186 |   Right (s{ warnings = WarnNullableRExp pos w : warnings s}, ())
187 |   where
188 |   w = unwords
189 |       [ "Regular expression"
190 |       , show r
191 |       , "matches the empty string."
192 |       ]
193 | 


--------------------------------------------------------------------------------
/src/Parser.y:
--------------------------------------------------------------------------------
  1 | {
  2 | -- -----------------------------------------------------------------------------
  3 | --
  4 | -- Parser.y, part of Alex
  5 | --
  6 | -- (c) Simon Marlow 2003
  7 | --
  8 | -- -----------------------------------------------------------------------------
  9 | 
 10 | {-# OPTIONS_GHC -w #-}
 11 | 
 12 | module Parser ( parse, P ) where
 13 | import AbsSyn
 14 | import Scan
 15 | import CharSet
 16 | import ParseMonad hiding ( StartCode )
 17 | 
 18 | import Data.Char
 19 | --import Debug.Trace
 20 | }
 21 | 
 22 | %tokentype { Token }
 23 | 
 24 | %name parse
 25 | 
 26 | %monad { P } { (>>=) } { return }
 27 | %lexer { lexer } { T _ EOFT }
 28 | 
 29 | %token
 30 | 	'.'		{ T _ (SpecialT '.') }
 31 | 	';'		{ T _ (SpecialT ';') }
 32 | 	'<'		{ T _ (SpecialT '<') }
 33 | 	'>'		{ T _ (SpecialT '>') }
 34 | 	','		{ T _ (SpecialT ',') }
 35 | 	'$'		{ T _ (SpecialT '$') }
 36 | 	'|'		{ T _ (SpecialT '|') }
 37 | 	'*'		{ T _ (SpecialT '*') }
 38 | 	'+'		{ T _ (SpecialT '+') }
 39 | 	'?'		{ T _ (SpecialT '?') }
 40 | 	'{'		{ T _ (SpecialT '{') }
 41 | 	'}'		{ T _ (SpecialT '}') }
 42 | 	'('		{ T _ (SpecialT '(') }
 43 | 	')'		{ T _ (SpecialT ')') }
 44 | 	'#'		{ T _ (SpecialT '#') }
 45 | 	'~'		{ T _ (SpecialT '~') }
 46 | 	'-'		{ T _ (SpecialT '-') }
 47 | 	'['		{ T _ (SpecialT '[') }
 48 | 	']'		{ T _ (SpecialT ']') }
 49 | 	'^'		{ T _ (SpecialT '^') }
 50 | 	'/'		{ T _ (SpecialT '/') }
 51 | 	ZERO		{ T _ ZeroT }
 52 | 	STRING		{ T _ (StringT $$) }
 53 | 	BIND		{ T _ (BindT $$) }
 54 | 	ID		{ T _ (IdT $$) }
 55 | 	CODE		{ T _ (CodeT _) }
 56 | 	CHAR		{ T _ (CharT $$) }
 57 | 	NUM		{ T _ (NumT $$) }
 58 | 	SMAC		{ T _ (SMacT _) }
 59 | 	RMAC		{ T _ (RMacT $$) }
 60 | 	SMAC_DEF	{ T _ (SMacDefT $$) }
 61 | 	RMAC_DEF	{ T _ (RMacDefT $$) }
 62 | 	WRAPPER		{ T _ WrapperT }
 63 | 	ENCODING	{ T _ EncodingT }
 64 |         ACTIONTYPE      { T _ ActionTypeT }
 65 |         TOKENTYPE       { T _ TokenTypeT }
 66 |         TYPECLASS       { T _ TypeClassT }
 67 | %%
 68 | 
 69 | alex	:: { (Maybe (AlexPosn,Code), [Directive], Scanner, Maybe (AlexPosn,Code)) }
 70 | 	: maybe_code directives macdefs scanner maybe_code { ($1,$2,$4,$5) }
 71 | 
 72 | maybe_code :: { Maybe (AlexPosn,Code) }
 73 | 	: CODE				{ case $1 of T pos (CodeT code) ->
 74 | 						Just (pos,code) }
 75 | 	| {- empty -}			{ Nothing }
 76 | 
 77 | directives :: { [Directive] }
 78 | 	: directive directives		{ $1 : $2 }
 79 | 	| {- empty -}			{ [] }
 80 | 
 81 | directive  :: { Directive }
 82 | 	: WRAPPER STRING		{ WrapperDirective $2 }
 83 | 	| ENCODING encoding		{ EncodingDirective $2 }
 84 |         | ACTIONTYPE STRING             { ActionType $2 }
 85 |         | TOKENTYPE STRING              { TokenType $2 }
 86 |         | TYPECLASS STRING              { TypeClass $2 }
 87 | 
 88 | encoding :: { Encoding }
 89 |         : STRING         		{% lookupEncoding $1 }
 90 | 
 91 | macdefs :: { () }
 92 | 	: macdef macdefs		{ () }
 93 | 	| {- empty -}			{ () }
 94 | 
 95 | -- hack: the lexer looks for the '=' in a macro definition, because there
 96 | -- doesn't seem to be a way to formulate the grammar here to avoid a
 97 | -- conflict (it needs LR(2) rather than LR(1) to find the '=' and distinguish
 98 | -- an SMAC/RMAC at the beginning of a definition from an SMAC/RMAC that is
 99 | -- part of a regexp in the previous definition).
100 | macdef	:: { () }
101 | 	: SMAC_DEF set			{% newSMac $1 $2 }
102 | 	| RMAC_DEF rexp			{% newRMac $1 $2 }
103 | 
104 | scanner	:: { Scanner }
105 | 	: BIND tokendefs	 	{ Scanner $1 $2 }
106 | 
107 | tokendefs :: { [RECtx] }
108 | 	: tokendef tokendefs		{ $1 ++ $2 }
109 | 	| {- empty -}			{ [] }
110 | 
111 | tokendef :: { [RECtx] }
112 | 	: startcodes rule		{ [ replaceCodes $1 (snd $2) ] }
113 | 	| startcodes '{' rules '}'	{ map (replaceCodes $1) $3 }
114 | 	| rule				{% do
115 |                                            let (pos, res@(RECtx _ _ e _ _)) = $1
116 |                                            warnIfNullable e pos
117 |                                            return [ res ]
118 |                                         }
119 | 
120 | rule    :: { (AlexPosn, RECtx) }
121 |         : context rhs                   { let
122 |                                             (l, e, r)   = $1
123 |                                             (pos, code) = $2
124 |                                           in (pos, RECtx [] l e r code)
125 |                                         }
126 | 
127 | rules	:: { [RECtx] }
128 | 	: rule rules			{ snd $1 : $2 }
129 | 	| {- empty -}			{ [] }
130 | 
131 | startcodes :: { [(String,StartCode)] }
132 | 	: '<' startcodes0 '>' 		{ $2 }
133 | 
134 | startcodes0 :: { [(String,StartCode)] }
135 | 	: startcode ',' startcodes0 	{ ($1,0) : $3 }
136 | 	| startcode 			{ [($1,0)] }
137 | 
138 | startcode :: { String }
139 | 	: ZERO 				{ "0" }
140 | 	| ID	 			{ $1 }
141 | 
142 | rhs	:: { (AlexPosn, Maybe Code) }
143 | 	: CODE 				{ case $1 of T pos (CodeT code) -> (pos, Just code) }
144 | 	| ';'				{ (tokPosn $1, Nothing) }
145 | 
146 | context :: { Maybe CharSet, RExp, RightContext RExp }
147 | 	: left_ctx rexp right_ctx	{ (Just $1,$2,$3) }
148 | 	| rexp right_ctx		{ (Nothing,$1,$2) }
149 | 
150 | left_ctx :: { CharSet }
151 | 	: '^'				{ charSetSingleton '\n' }
152 | 	| set '^' 			{ $1 }
153 | 
154 | right_ctx :: { RightContext RExp }
155 | 	: '$'		{ RightContextRExp (Ch (charSetSingleton '\n')) }
156 | 	| '/' rexp	{ RightContextRExp $2 }
157 |         | '/' CODE	{ RightContextCode (case $2 of
158 | 						T _ (CodeT code) -> code) }
159 | 	| {- empty -}	{ NoRightContext }
160 | 
161 | rexp	:: { RExp }
162 | 	: alt '|' rexp 			{ $1 :|| $3 }
163 | 	| alt		 		{ $1 }
164 | 
165 | alt	:: { RExp }
166 | 	: alt term  			{ $1 :%% $2 }
167 | 	| term 				{ $1 }
168 | 
169 | term	:: { RExp }
170 | 	: rexp0 rep 			{ $2 $1 }
171 | 	| rexp0 			{ $1 }
172 | 
173 | rep	:: { RExp -> RExp }
174 | 	: '*' 				{ Star }
175 | 	| '+' 				{ Plus }
176 | 	| '?' 				{ Ques }
177 | 	| begin_mult '{' mult '}'	{ $3 }
178 | -- A bit counterintuitively, we need @begin_mult@ already before the left brace,
179 | -- not just before @mult@.  This might be due to the lookahead in the parser.
180 | 
181 | -- Enter the "multiplicity" lexer mode to scan number literals
182 | begin_mult :: { () }
183 | 	: {- empty -}			{% setStartCode multiplicity }
184 | 
185 | -- Parse a numeric multiplicity.
186 | mult	:: { RExp -> RExp }
187 | 	: NUM				{ repeat_rng $1 Nothing }
188 | 	| NUM ','			{ repeat_rng $1 (Just Nothing) }
189 | 	| NUM ',' NUM			{ repeat_rng $1 (Just (Just $3)) }
190 | 
191 | rexp0	:: { RExp }
192 | 	: '(' ')'  			{ Eps }
193 | 	| STRING			{ foldr (:%%) Eps
194 | 					    (map (Ch . charSetSingleton) $1) }
195 | 	| RMAC 				{% lookupRMac $1 }
196 | 	| set 				{ Ch $1 }
197 | 	| '(' rexp ')' 			{ $2 }
198 | 
199 | set	:: { CharSet }
200 |  	: set '#' set0 			{ $1 `charSetMinus` $3 }
201 | 	| set0 				{ $1 }
202 | 
203 | set0	:: { CharSet }
204 | 	: CHAR 				{ charSetSingleton $1 }
205 | 	| CHAR '-' CHAR			{ charSetRange $1 $3 }
206 | 	| smac 				{% lookupSMac $1 }
207 | 	| '[' sets ']' 			{ foldr charSetUnion emptyCharSet $2 }
208 | 
209 | 	-- [^sets] is the same as  '. # [sets]'
210 | 	-- The upshot is that [^set] does *not* match a newline character,
211 | 	-- which seems much more useful than just taking the complement.
212 | 	| '[' '^' sets ']'
213 | 			{% do { dot <- lookupSMac (tokPosn $1, ".");
214 | 		      	        return (dot `charSetMinus`
215 | 			      		  foldr charSetUnion emptyCharSet $3) }}
216 | 
217 | 	-- ~set is the same as '. # set'
218 | 	| '~' set0	{% do { dot <- lookupSMac (tokPosn $1, ".");
219 | 		      	        return (dot `charSetMinus` $2) } }
220 | 
221 | sets	:: { [CharSet] }
222 | 	: set sets			{ $1 : $2 }
223 | 	| {- empty -}			{ [] }
224 | 
225 | smac	:: { (AlexPosn,String) }
226 |  	: '.'				{ (tokPosn $1, ".") }
227 | 	| SMAC				{ case $1 of T p (SMacT s) -> (p, s) }
228 | 
229 | {
230 | happyError :: P a
231 | happyError = failP "parse error"
232 | 
233 | -- -----------------------------------------------------------------------------
234 | -- Utils
235 | 
236 | digit c = ord c - ord '0'
237 | 
238 | repeat_rng :: Int -> Maybe (Maybe Int) -> (RExp->RExp)
239 | repeat_rng n (Nothing) re = foldr (:%%) Eps (replicate n re)
240 | repeat_rng n (Just Nothing) re = foldr (:%%) (Star re) (replicate n re)
241 | repeat_rng n (Just (Just m)) re = intl :%% rst
242 | 	where
243 | 	intl = repeat_rng n Nothing re
244 | 	rst = foldr (\re re'->Ques(re :%% re')) Eps (replicate (m-n) re)
245 | 
246 | replaceCodes codes rectx = rectx{ reCtxStartCodes = codes }
247 | 
248 | lookupEncoding :: String -> P Encoding
249 | lookupEncoding s = case map toLower s of
250 |   "iso-8859-1" -> return Latin1
251 |   "latin1"     -> return Latin1
252 |   "utf-8"      -> return UTF8
253 |   "utf8"       -> return UTF8
254 |   _            -> failP ("encoding " ++ show s ++ " not supported")
255 | 
256 | }
257 | 


--------------------------------------------------------------------------------
/src/Parser.y.boot:
--------------------------------------------------------------------------------
1 | -- placeholder
2 | 


--------------------------------------------------------------------------------
/src/Scan.x:
--------------------------------------------------------------------------------
  1 | -------------------------------------------------------------------------------
  2 | --                  ALEX SCANNER AND LITERATE PREPROCESSOR
  3 | --
  4 | -- This Script defines the grammar used to generate the Alex scanner and a
  5 | -- preprocessing scanner for dealing with literate scripts.  The actions for
  6 | -- the Alex scanner are given separately in the Alex module.
  7 | --
  8 | -- See the Alex manual for a discussion of the scanners defined here.
  9 | --
 10 | -- Chris Dornan, Aug-95, 4-Jun-96, 10-Jul-96, 29-Sep-97
 11 | -------------------------------------------------------------------------------
 12 | 
 13 | {
 14 | {-# LANGUAGE CPP #-}
 15 | 
 16 | -- Switch off partiality warning about 'head' and 'tail'
 17 | #if __GLASGOW_HASKELL__ >= 908
 18 | {-# OPTIONS_GHC -Wno-x-partial #-}
 19 | #endif
 20 | 
 21 | module Scan (lexer, AlexPosn(..), Token(..), Tkn(..), tokPosn, multiplicity) where
 22 | 
 23 | import Data.Char
 24 | import ParseMonad
 25 | -- import Debug.Trace
 26 | }
 27 | 
 28 | $digit    = 0-9
 29 | $hexdig   = [0-9 A-F a-f]
 30 | $octal    = 0-7
 31 | $lower    = a-z
 32 | $upper    = A-Z
 33 | $alpha    = [$upper $lower]
 34 | $alphanum = [$alpha $digit]
 35 | $idchar   = [$alphanum \_ \']
 36 | 
 37 | $special    = [\.\;\,\$\|\*\+\?\#\~\-\{\}\(\)\[\]\^\/]
 38 | $graphic    = $printable # $white
 39 | $nonspecial = $graphic # [$special \%]
 40 | 
 41 | @id     = $alpha $idchar*
 42 | @smac   = \$ @id | \$ \{ @id \}
 43 | @rmac   = \@ @id | \@ \{ @id \}
 44 | 
 45 | @comment = "--".*
 46 | @ws      = $white+ | @comment
 47 | 
 48 | alex :-
 49 | 
 50 | @ws                             { skip }     -- white space; ignore
 51 | 
 52 | <0> \" [^\"]* \"                { string }
 53 | <0> (@id @ws?)? \:\-            { bind }
 54 | <0> \{ / (\n | [^$digit])       { code }
 55 | <0> $special                    { special }  -- note: matches {
 56 | <0> \% "wrapper"                { wrapper }
 57 | <0> \% "encoding"               { encoding }
 58 | <0> \% "action"                 { actionty }
 59 | <0> \% "token"                  { tokenty }
 60 | <0> \% "typeclass"              { typeclass }
 61 | 
 62 | <0> \\ $digit+                  { decch }
 63 | <0> \\ x $hexdig+               { hexch }
 64 | <0> \\ o $octal+                { octch }
 65 | <0> \\ $printable               { escape }
 66 | <0> $nonspecial # [\<]          { char }
 67 | <0> @smac                       { smac }
 68 | <0> @rmac                       { rmac }
 69 | 
 70 | <0> @smac @ws? \=               { smacdef }
 71 | <0> @rmac @ws? \=               { rmacdef }
 72 | 
 73 | -- identifiers are allowed to be unquoted in startcode lists
 74 | <0>             \<              { special `andBegin` startcodes }
 75 | <startcodes>    0               { zero }
 76 | <startcodes>    @id             { startcode }
 77 | <startcodes>    \,              { special }
 78 | <startcodes>    \>              { special `andBegin` afterstartcodes }
 79 | 
 80 | -- After a <..> startcode sequence, we can have a {...} grouping of rules,
 81 | -- so don't try to interpret the opening { as a code block.
 82 | <afterstartcodes> \{ (\n | [^$digit ])  { special `andBegin` 0 }
 83 | <afterstartcodes> ()            { skip `andBegin` 0 }  -- note: empty pattern
 84 | 
 85 | -- Numeric literals are only lexed in multiplicity braces e.g. {nnn,mmm}.
 86 | -- Switching to the @multiplicity@ lexer state happens in the parser.
 87 | <multiplicity> $digit+          { num }
 88 | <multiplicity> \,               { special }
 89 | <multiplicity> \}               { special `andBegin` 0 }
 90 | 
 91 | {
 92 | 
 93 | -- -----------------------------------------------------------------------------
 94 | -- Token type
 95 | 
 96 | data Token = T AlexPosn Tkn
 97 |   deriving Show
 98 | 
 99 | tokPosn (T p _) = p
100 | 
101 | data Tkn
102 |   = SpecialT Char
103 |   | CodeT String
104 |   | ZeroT
105 |   | IdT String
106 |   | StringT String
107 |   | BindT String
108 |   | CharT Char
109 |   | SMacT String
110 |   | RMacT String
111 |   | SMacDefT String
112 |   | RMacDefT String
113 |   | NumT Int
114 |   | WrapperT
115 |   | EncodingT
116 |   | ActionTypeT
117 |   | TokenTypeT
118 |   | TypeClassT
119 |   | EOFT
120 |   deriving Show
121 | 
122 | -- -----------------------------------------------------------------------------
123 | -- Token functions
124 | 
125 | special, zero, string, bind, escape, decch, hexch, octch, char :: Action
126 | smac, rmac, smacdef, rmacdef, startcode, wrapper, encoding :: Action
127 | actionty, tokenty, typeclass :: Action
128 | special   (p,_,str) _  = return $ T p (SpecialT  (head str))
129 | zero      (p,_,_)   _  = return $ T p ZeroT
130 | string    (p,_,str) ln = return $ T p (StringT (extract ln str))
131 | bind      (p,_,str) _  = return $ T p (BindT (takeWhile isIdChar str))
132 | escape    (p,_,str) _  = return $ T p (CharT (esc str))
133 | decch     (p,_,str) ln = return $ T p (CharT (do_ech 10 ln (take (ln-1) (drop 1 str))))
134 | hexch     (p,_,str) ln = return $ T p (CharT (do_ech 16 ln (take (ln-2) (drop 2 str))))
135 | octch     (p,_,str) ln = return $ T p (CharT (do_ech 8  ln (take (ln-2) (drop 2 str))))
136 | char      (p,_,str) _  = return $ T p (CharT (head str))
137 | num       (p,_,str) ln = return $ T p $ NumT $ parseInt 10 $ take ln str
138 | smac      (p,_,str) ln = return $ T p (SMacT (mac ln str))
139 | rmac      (p,_,str) ln = return $ T p (RMacT (mac ln str))
140 | smacdef   (p,_,str) ln = return $ T p (SMacDefT (macdef ln str))
141 | rmacdef   (p,_,str) ln = return $ T p (RMacDefT (macdef ln str))
142 | startcode (p,_,str) ln = return $ T p (IdT (take ln str))
143 | wrapper   (p,_,_)   _  = return $ T p WrapperT
144 | encoding  (p,_,_)   _  = return $ T p EncodingT
145 | actionty  (p,_,_)   _  = return $ T p ActionTypeT
146 | tokenty   (p,_,_)   _  = return $ T p TokenTypeT
147 | typeclass (p,_,_)   _  = return $ T p TypeClassT
148 | 
149 | isIdChar :: Char -> Bool
150 | isIdChar c = isAlphaNum c || c `elem` "_'"
151 | 
152 | extract :: Int -> String -> String
153 | extract ln str = take (ln-2) (drop 1 str)
154 | 
155 | do_ech :: Int -> Int -> String -> Char
156 | do_ech radix _ln str = chr (parseInt radix str)
157 | 
158 | mac :: Int -> String -> String
159 | mac ln str = take (ln-1) $ tail str
160 | 
161 | -- TODO : replace not . isSpace with (\c -> not (isSpace c) && c /= '=')
162 | macdef :: Int -> String -> String
163 | macdef _ln str = takeWhile (\c -> not (isSpace c) && c /= '=') $ tail str
164 | 
165 | esc :: String -> Char
166 | esc str =
167 |   case head $ tail str of
168 |     'a' -> '\a'
169 |     'b' -> '\b'
170 |     'f' -> '\f'
171 |     'n' -> '\n'
172 |     'r' -> '\r'
173 |     't' -> '\t'
174 |     'v' -> '\v'
175 |     c   ->  c
176 | 
177 | parseInt :: Int -> String -> Int
178 | parseInt radix ds = foldl1 (\n d -> n * radix + d) (map digitToInt ds)
179 | 
180 | -- In brace-delimited code, we have to be careful to match braces
181 | -- within the code, but ignore braces inside strings and character
182 | -- literals.  We do an approximate job (doing it properly requires
183 | -- implementing a large chunk of the Haskell lexical syntax).
184 | 
185 | code :: Action
186 | code (p, _, _inp) _ = do
187 |   currentInput <- getInput
188 |   go currentInput 1 ""
189 |   where
190 |     go :: AlexInput -> Int -> String -> P Token
191 |     go inp 0 cs = do
192 |       setInput inp
193 |       return $ T p $ CodeT $ triml $ reverse $ triml $ tail cs
194 |     go inp n cs = do
195 |       case alexGetChar inp of
196 |         Nothing       -> err inp
197 |         Just (c,inp2) ->
198 |           case c of
199 |             '{'  -> go inp2 (n+1) (c:cs)
200 |             '}'  -> go inp2 (n-1) (c:cs)
201 |             '\'' -> go_char inp2 n (c:cs)
202 |             '\"' -> go_str inp2 n (c:cs) '\"'
203 |             c2   -> go inp2 n (c2:cs)
204 | 
205 |     go_char :: AlexInput -> Int -> String -> P Token
206 |     -- try to catch multiple occurrences of ' at identifier end
207 |     go_char inp n cs@('\'':'\'':_) = go inp n cs
208 |     -- try to catch occurrences of ' within an identifier
209 |     go_char inp n cs@('\'':c2:_)
210 |       | isAlphaNum c2              = go inp n cs
211 |     go_char inp n cs               = go_str inp n cs '\''
212 | 
213 |     go_str :: AlexInput -> Int -> String -> Char -> P Token
214 |     go_str inp n cs end = do
215 |       case alexGetChar inp of
216 |           Nothing -> err inp
217 |           Just (c,inp2)
218 |             | c == end  -> go inp2 n (c:cs)
219 |             | otherwise ->
220 |               case c of
221 |                 '\\' -> case alexGetChar inp2 of
222 |                           Nothing       -> err inp2
223 |                           Just (d,inp3) -> go_str inp3 n (d:c:cs) end
224 |                 c2   -> go_str inp2 n (c2:cs) end
225 | 
226 |     err inp = do setInput inp; lexError "lexical error in code fragment"
227 | 
228 |     triml = dropWhile isSpace
229 | 
230 | lexError :: String -> P a
231 | lexError s = do
232 |   (_, _, _, input) <- getInput
233 |   failP $ s ++ " at " ++ case input of
234 |     c:_ -> show c
235 |     []  -> "end of file"
236 | 
237 | lexer :: (Token -> P a) -> P a
238 | lexer cont = lexToken >>= cont
239 | 
240 | lexToken :: P Token
241 | lexToken = do
242 |   inp@(p,c,_,s) <- getInput
243 |   sc <- getStartCode
244 |   case alexScan inp sc of
245 |     AlexEOF -> return (T p EOFT)
246 |     AlexError _ -> lexError "lexical error"
247 |     AlexSkip inp1 _ -> do
248 |       setInput inp1
249 |       lexToken
250 |     AlexToken inp1 len t -> do
251 |       setInput inp1
252 |       t (p,c,s) len
253 | 
254 | type Action = (AlexPosn,Char,String) -> Int -> P Token
255 | 
256 | skip :: Action
257 | skip _ _ = lexToken
258 | 
259 | andBegin :: Action -> StartCode -> Action
260 | andBegin act sc inp len = setStartCode sc >> act inp len
261 | }
262 | 


--------------------------------------------------------------------------------
/src/Scan.x.boot:
--------------------------------------------------------------------------------
1 | -- placeholder
2 | 


--------------------------------------------------------------------------------
/src/UTF8.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE FlexibleContexts #-}
 2 | {-# LANGUAGE OverloadedLists  #-}
 3 | 
 4 | module UTF8 where
 5 | 
 6 | import Data.Word ( Word8 )
 7 | import Data.Bits ( (.&.), shiftR )
 8 | import Data.Char ( ord )
 9 | 
10 | import qualified Data.List.NonEmpty as List1
11 | type List1 = List1.NonEmpty
12 | 
13 | {-
14 | -- Could also be imported:
15 | 
16 | import Codec.Binary.UTF8.Light as UTF8
17 | 
18 | encode :: Char -> [Word8]
19 | encode c = head (UTF8.encodeUTF8' [UTF8.c2w c])
20 | 
21 | -}
22 | 
23 | -- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
24 | encode :: Char -> List1 Word8
25 | encode = fmap fromIntegral . go . ord
26 |  where
27 |   go oc
28 |    | oc <= 0x7f       = [oc]
29 | 
30 |    | oc <= 0x7ff      = [ 0xc0 + (oc `shiftR` 6)
31 |                         , 0x80 + oc .&. 0x3f
32 |                         ]
33 | 
34 |    | oc <= 0xffff     = [ 0xe0 + (oc `shiftR` 12)
35 |                         , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
36 |                         , 0x80 + oc .&. 0x3f
37 |                         ]
38 |    | otherwise        = [ 0xf0 + (oc `shiftR` 18)
39 |                         , 0x80 + ((oc `shiftR` 12) .&. 0x3f)
40 |                         , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
41 |                         , 0x80 + oc .&. 0x3f
42 |                         ]
43 | 


--------------------------------------------------------------------------------
/src/Util.hs:
--------------------------------------------------------------------------------
 1 | -- -----------------------------------------------------------------------------
 2 | --
 3 | -- Util.hs, part of Alex
 4 | --
 5 | -- (c) Simon Marlow 2003
 6 | --
 7 | -- General utilities used in various parts of Alex
 8 | --
 9 | -- ----------------------------------------------------------------------------}
10 | 
11 | module Util
12 |   ( str
13 |   , char
14 |   , nl
15 |   , paren
16 |   , interleave_shows
17 |   , space
18 |   , cjustify
19 |   , ljustify
20 |   , rjustify
21 |   , spaces
22 |   , hline
23 |   ) where
24 | 
25 | -- Pretty-printing utilities
26 | 
27 | str :: String -> String -> String
28 | str = showString
29 | 
30 | char :: Char -> String -> String
31 | char c = (c :)
32 | 
33 | nl :: String -> String
34 | nl = char '\n'
35 | 
36 | paren :: (String -> String) -> String -> String
37 | paren s = char '(' . s . char ')'
38 | 
39 | interleave_shows :: (String -> String) -> [String -> String] -> String -> String
40 | interleave_shows _ [] = id
41 | interleave_shows s xs = foldr1 (\a b -> a . s . b) xs
42 | 
43 | space :: String -> String
44 | space = char ' '
45 | 
46 | cjustify, ljustify, rjustify :: Int -> String -> String
47 | cjustify n s = spaces halfm ++ s ++ spaces (m - halfm)
48 |   where
49 |     m     = n - length s
50 |     halfm = m `div` 2
51 | ljustify n s = s ++ spaces (max 0 (n - length s))
52 | rjustify n s = spaces (n - length s) ++ s
53 | 
54 | spaces   :: Int -> String
55 | spaces n = replicate n ' '
56 | 
57 | hline :: String
58 | hline = replicate 77 '-'
59 | 


--------------------------------------------------------------------------------
/src/ghc_hooks.c:
--------------------------------------------------------------------------------
1 | #include <stdio.h>
2 | 
3 | void ErrorHdrHook(chan)
4 |      FILE *chan;
5 | {}
6 | 


--------------------------------------------------------------------------------
/stack.yaml:
--------------------------------------------------------------------------------
1 | resolver: lts-21.25
2 | packages:
3 | - .
4 | 


--------------------------------------------------------------------------------
/test-debug.hs:
--------------------------------------------------------------------------------
1 | import System.Process (system)
2 | import System.Exit (exitWith)
3 | 
4 | main = system "make -k -C tests clean tests-debug" >>= exitWith
5 | 


--------------------------------------------------------------------------------
/test.hs:
--------------------------------------------------------------------------------
1 | import System.Process (system)
2 | import System.Exit (exitWith)
3 | 
4 | main = system "make -k -C tests clean tests" >>= exitWith
5 | 


--------------------------------------------------------------------------------
/tests/Makefile:
--------------------------------------------------------------------------------
  1 | # NOTE: `cabal test` will take care to build the local `alex`
  2 | # executable and place it into $PATH for us to pick up.
  3 | #
  4 | # If it doesn't look like the alex binary in $PATH comes from the
  5 | # build tree, then we'll fall back to pointing to
  6 | # ../dist/build/alex/alex to support running tests via "runghc
  7 | # Setup.hs test".
  8 | #
  9 | # If ALEX has been set outside, e.g. in the environment, we trust this setting.
 10 | # This way, we can pass in the correct Alex executable from a CI environment
 11 | # without danger of it being "fixed" by the logic below.
 12 | # [2021-06-15, PR #189](https://github.com/simonmar/alex/pull/189)
 13 | #
 14 | ifndef ALEX
 15 | ALEX=$(shell which alex)
 16 | ifeq "$(filter $(dir $(shell pwd))%,$(ALEX))" ""
 17 | ALEX=../dist/build/alex/alex
 18 | endif
 19 | endif
 20 | 
 21 | # NOTE: This assumes that a working `ghc` is on $PATH; this may not
 22 | # necessarily be the same GHC used by `cabal` for building `alex`.
 23 | #
 24 | # Again, if HC has been set in the environment (e.g. by the CI), we keep this setting.
 25 | # [2021-06-15, PR #189](https://github.com/simonmar/alex/pull/189)
 26 | #
 27 | HC ?= ghc
 28 | 
 29 | # Some GHC warnings are only available from a certain version on
 30 | # Get the GHC version
 31 | GHC_VERSION:=$(shell $(HC) --numeric-version)
 32 | GHC_VERSION_WORDS=$(subst ., ,$(GHC_VERSION))
 33 | GHC_MAJOR_VERSION=$(word 1,$(GHC_VERSION_WORDS))
 34 | GHC_MINOR_VERSION=$(word 2,$(GHC_VERSION_WORDS))
 35 | 
 36 | # Text dependency comes with GHC from 8.4 onwards
 37 | GHC_SHIPS_WITH_TEXT:=$(shell if [ $(GHC_MAJOR_VERSION) -gt 8 -o $(GHC_MAJOR_VERSION) -ge 8 -a $(GHC_MINOR_VERSION) -ge 4 ]; then echo "yes"; else echo "no"; fi)
 38 | 
 39 | # Turn off x-partial warning (new in GHC 9.8)
 40 | WARNS_DEP_GHC_GTEQ_9_8:=$(shell if [ $(GHC_MAJOR_VERSION) -gt 9 -o $(GHC_MAJOR_VERSION) -ge 9 -a $(GHC_MINOR_VERSION) -ge 8 ]; then echo "-Wno-x-partial"; fi)
 41 | 
 42 | HC_OPTS=-Wall $(WARNS_DEP_GHC_GTEQ_9_8) -fwarn-incomplete-uni-patterns -Werror
 43 | 
 44 | .PRECIOUS: %.n.hs %.g.hs %.o %.exe %.bin
 45 | 
 46 | ifeq "$(TARGETPLATFORM)" "i386-unknown-mingw32"
 47 | HS_PROG_EXT = .exe
 48 | else
 49 | HS_PROG_EXT = .bin
 50 | endif
 51 | 
 52 | TESTS = \
 53 |         basic_typeclass.x \
 54 |         basic_typeclass_bytestring.x \
 55 |         default_typeclass.x \
 56 |         gscan_typeclass.x \
 57 |         issue_71.x \
 58 |         issue_119.x \
 59 |         issue_141.x \
 60 |         issue_197.x \
 61 |         issue_262.x \
 62 |         issue_269_part1.x \
 63 |         issue_269_part2.x \
 64 |         monad_typeclass.x \
 65 |         monad_typeclass_bytestring.x \
 66 |         monadUserState_typeclass.x \
 67 |         monadUserState_typeclass_bytestring.x \
 68 |         null.x \
 69 |         posn_typeclass.x \
 70 |         posn_typeclass_bytestring.x \
 71 |         strict_typeclass.x \
 72 |         simple.x \
 73 |         tokens.x \
 74 |         tokens_bytestring.x \
 75 |         tokens_bytestring_unicode.x \
 76 |         tokens_gscan.x \
 77 |         tokens_monad_bytestring.x \
 78 |         tokens_monadUserState_bytestring.x \
 79 |         tokens_posn.x \
 80 |         tokens_posn_bytestring.x \
 81 |         tokens_scan_user.x \
 82 |         tokens_strict_bytestring.x \
 83 |         unicode.x
 84 | 
 85 | ifeq "$(GHC_SHIPS_WITH_TEXT)" "yes"
 86 | TEXT_DEP = -package text
 87 | 
 88 | TEXT_TESTS = \
 89 | 		strict_text_typeclass.x \
 90 | 		posn_typeclass_strict_text.x \
 91 | 		tokens_monadUserState_strict_text.x
 92 | else
 93 | TEXT_DEP =
 94 | 
 95 | TEXT_TESTS =
 96 | endif
 97 | 
 98 | # NOTE: `cabal` will set the `alex_datadir` env-var accordingly before invoking the test-suite
 99 | #TEST_ALEX_OPTS = --template=../data/
100 | TEST_ALEX_OPTS=
101 | 
102 | %.n.hs : %.x
103 | 	$(ALEX) $(TEST_ALEX_OPTS) $< -o $@
104 | 
105 | %.g.hs : %.x
106 | 	$(ALEX) $(TEST_ALEX_OPTS) -g $< -o $@
107 | 
108 | %.d.hs : %.x
109 | 	$(ALEX) $(TEST_ALEX_OPTS) --debug $< -o $@
110 | 
111 | CLEAN_FILES += *.n.hs *.g.hs *.d.hs *.info *.hi *.o *.bin *.exe
112 | 
113 | TESTS_HS       = $(shell echo $(TESTS) $(TEXT_TESTS) | sed -e 's/\([^\. ]*\)\.\(l\)\{0,1\}x/\1.n.hs \1.g.hs/g')
114 | TESTS_HS_DEBUG = $(shell echo $(TESTS) $(TEXT_TESTS) | sed -e 's/\([^\. ]*\)\.\(l\)\{0,1\}x/\1.d.hs/g')
115 | TESTS_HS_ALL   = $(TESTS_HS) $(TESTS_HS_DEBUG)
116 | 
117 | BASIC_TESTS = $(patsubst %.hs, %.run, $(TESTS_HS))
118 | DEBUG_TESTS = $(patsubst %.hs, %.run, $(TESTS_HS_DEBUG))
119 | ALL_TESTS   = $(BASIC_TESTS) $(DEBUG_TESTS)
120 | 
121 | %.run : %$(HS_PROG_EXT)
122 | 	./$<
123 | 
124 | %$(HS_PROG_EXT) : %.hs
125 | 	$(HC) $(HC_OPTS) -package array -package bytestring $(TEXT_DEP) $($*_LD_OPTS) $< -o $@
126 | 
127 | all :: $(ALL_TESTS)
128 | 
129 | tests :: $(BASIC_TESTS)
130 | 
131 | tests-debug :: $(DEBUG_TESTS)
132 | 
133 | .PHONY: clean debug
134 | 
135 | clean:
136 | 	rm -f $(CLEAN_FILES)
137 | 
138 | debug :
139 | 	@echo ALEX      = $(ALEX)
140 | 	@echo HC_OPTS   = $(HC_OPTS)
141 | 	@echo ALL_TESTS = $(ALL_TESTS)
142 | 


--------------------------------------------------------------------------------
/tests/basic_typeclass.x:
--------------------------------------------------------------------------------
 1 | {
 2 | 
 3 | module Main (main) where
 4 | import System.Exit
 5 | import Prelude hiding (lex)
 6 | 
 7 | }
 8 | 
 9 | %wrapper "basic"
10 | %token "Token s"
11 | %typeclass "Read s"
12 | 
13 | tokens :-
14 | 
15 | [a-b]+$                     { idtoken 0 }
16 | [c-d]+/"."                  { idtoken 1 }
17 | [e-f]+/{ tokpred }          { idtoken 2 }
18 | ^[g-h]+$                    { idtoken 3 }
19 | ^[i-j]+/"."                 { idtoken 4 }
20 | ^[k-l]+/{ tokpred }         { idtoken 5 }
21 | [m-n]+$                     { idtoken 6 }
22 | [o-p]+/"."                  { idtoken 7 }
23 | [q-r]+/{ tokpred }          { idtoken 8 }
24 | [0-1]^[s-t]+$               { idtoken 9 }
25 | [2-3]^[u-v]+/"."            { idtoken 10 }
26 | [4-5]^[w-x]+/{ tokpred }    { idtoken 11 }
27 | [y-z]+                      { idtoken 12 }
28 | [A-B]+$                     ;
29 | [C-D]+/"."                  ;
30 | [E-F]+/{ tokpred }          ;
31 | ^[G-H]+$                    ;
32 | ^[I-J]+/"."                 ;
33 | ^[K-L]+/{ tokpred }         ;
34 | [M-N]+$                     ;
35 | [O-P]+/"."                  ;
36 | [Q-R]+/{ tokpred }          ;
37 | [0-1]^[S-T]+$               ;
38 | [2-3]^[U-V]+/"."            ;
39 | [4-5]^[W-X]+/{ tokpred }    ;
40 | [Y-Z]+                      ;
41 | \.                          ;
42 | [ \n\t\r]+                  ;
43 | [0-9]                       ;
44 | 
45 | {
46 | 
47 | tokpred :: () -> AlexInput -> Int -> AlexInput -> Bool
48 | tokpred _ _ _ _ = True
49 | 
50 | idtoken :: Read s => Int -> String -> Token s
51 | idtoken n s = Id n (read ("\"" ++ s ++ "\""))
52 | 
53 | data Token s = Id Int s
54 |   deriving (Show, Ord, Eq)
55 | 
56 | lex :: Read s => String -> [Token s]
57 | lex = alexScanTokens
58 | 
59 | input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw"
60 | 
61 | tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji",
62 |            Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst",
63 |            Id 10 "uuvu", Id 11 "xxw"]
64 | 
65 | main :: IO ()
66 | main =
67 |   let
68 |     result :: [Token String]
69 |     result = lex input
70 |   in do
71 |     if result /= tokens
72 |       then exitFailure
73 |       else exitWith ExitSuccess
74 | 
75 | }
76 | 


--------------------------------------------------------------------------------
/tests/basic_typeclass_bytestring.x:
--------------------------------------------------------------------------------
 1 | {
 2 | 
 3 | {-# LANGUAGE OverloadedStrings #-}
 4 | 
 5 | module Main (main) where
 6 | import System.Exit
 7 | import Prelude hiding (lex)
 8 | 
 9 | import Data.ByteString.Lazy.Char8 as Lazy
10 | 
11 | }
12 | 
13 | %wrapper "basic-bytestring"
14 | %token "Token s"
15 | %typeclass "Read s"
16 | 
17 | tokens :-
18 | 
19 | [a-b]+$                     { idtoken 0 }
20 | [c-d]+/"."                  { idtoken 1 }
21 | [e-f]+/{ tokpred }          { idtoken 2 }
22 | ^[g-h]+$                    { idtoken 3 }
23 | ^[i-j]+/"."                 { idtoken 4 }
24 | ^[k-l]+/{ tokpred }         { idtoken 5 }
25 | [m-n]+$                     { idtoken 6 }
26 | [o-p]+/"."                  { idtoken 7 }
27 | [q-r]+/{ tokpred }          { idtoken 8 }
28 | [0-1]^[s-t]+$               { idtoken 9 }
29 | [2-3]^[u-v]+/"."            { idtoken 10 }
30 | [4-5]^[w-x]+/{ tokpred }    { idtoken 11 }
31 | [y-z]+                      { idtoken 12 }
32 | [A-B]+$                     ;
33 | [C-D]+/"."                  ;
34 | [E-F]+/{ tokpred }          ;
35 | ^[G-H]+$                    ;
36 | ^[I-J]+/"."                 ;
37 | ^[K-L]+/{ tokpred }         ;
38 | [M-N]+$                     ;
39 | [O-P]+/"."                  ;
40 | [Q-R]+/{ tokpred }          ;
41 | [0-1]^[S-T]+$               ;
42 | [2-3]^[U-V]+/"."            ;
43 | [4-5]^[W-X]+/{ tokpred }    ;
44 | [Y-Z]+                      ;
45 | \.                          ;
46 | [ \n\t\r]+                  ;
47 | [0-9]                       ;
48 | 
49 | {
50 | 
51 | tokpred :: () -> AlexInput -> Int -> AlexInput -> Bool
52 | tokpred _ _ _ _ = True
53 | 
54 | idtoken :: Read s => Int -> Lazy.ByteString -> Token s
55 | idtoken n s = Id n (read ("\"" ++ (Lazy.unpack s) ++ "\""))
56 | 
57 | data Token s = Id Int s
58 |   deriving (Show, Ord, Eq)
59 | 
60 | lex :: Read s => Lazy.ByteString -> [Token s]
61 | lex = alexScanTokens
62 | 
63 | input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw"
64 | 
65 | tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji",
66 |            Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst",
67 |            Id 10 "uuvu", Id 11 "xxw"]
68 | 
69 | main :: IO ()
70 | main =
71 |   let
72 |     result :: [Token String]
73 |     result = lex input
74 |   in do
75 |     if result /= tokens
76 |       then exitFailure
77 |       else exitWith ExitSuccess
78 | 
79 | }
80 | 


--------------------------------------------------------------------------------
/tests/gscan_typeclass.x:
--------------------------------------------------------------------------------
 1 | {
 2 | 
 3 | module Main (main) where
 4 | import System.Exit
 5 | import Prelude hiding (lex)
 6 | 
 7 | }
 8 | 
 9 | %wrapper "gscan"
10 | %token "[Token s]"
11 | %typeclass "Read s"
12 | 
13 | tokens :-
14 | 
15 | [a-b]+$                     { idtoken 0 }
16 | [c-d]+/"."                  { idtoken 1 }
17 | [e-f]+/{ tokpred }          { idtoken 2 }
18 | ^[g-h]+$                    { idtoken 3 }
19 | ^[i-j]+/"."                 { idtoken 4 }
20 | ^[k-l]+/{ tokpred }         { idtoken 5 }
21 | [m-n]+$                     { idtoken 6 }
22 | [o-p]+/"."                  { idtoken 7 }
23 | [q-r]+/{ tokpred }          { idtoken 8 }
24 | [0-1]^[s-t]+$               { idtoken 9 }
25 | [2-3]^[u-v]+/"."            { idtoken 10 }
26 | [4-5]^[w-x]+/{ tokpred }    { idtoken 11 }
27 | [y-z]+                      { idtoken 12 }
28 | [A-B]+$                     ;
29 | [C-D]+/"."                  ;
30 | [E-F]+/{ tokpred }          ;
31 | ^[G-H]+$                    ;
32 | ^[I-J]+/"."                 ;
33 | ^[K-L]+/{ tokpred }         ;
34 | [M-N]+$                     ;
35 | [O-P]+/"."                  ;
36 | [Q-R]+/{ tokpred }          ;
37 | [0-1]^[S-T]+$               ;
38 | [2-3]^[U-V]+/"."            ;
39 | [4-5]^[W-X]+/{ tokpred }    ;
40 | [Y-Z]+                      ;
41 | \.                          ;
42 | [ \n\t\r]+                  ;
43 | [0-9]                       ;
44 | 
45 | {
46 | 
47 | tokpred :: () -> AlexInput -> Int -> AlexInput -> Bool
48 | tokpred _ _ _ _ = True
49 | 
50 | idtoken :: Read s => Int -> AlexPosn -> Char -> String -> Int ->
51 |                      ((Int,state) -> [Token s]) -> (Int,state) -> [Token s]
52 | idtoken n _ _ s len cont st = Id n (read ("\"" ++ take len s ++ "\"")) : cont st
53 | 
54 | data Token s = Id Int s deriving Eq
55 | 
56 | lex :: Read s => String -> [Token s]
57 | lex str = alexGScan (\_ _ _ _ -> []) (0 :: Int) str
58 | 
59 | input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw"
60 | 
61 | tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji",
62 |            Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst",
63 |            Id 10 "uuvu", Id 11 "xxw"]
64 | 
65 | main :: IO ()
66 | main =
67 |   let
68 |     result :: [Token String]
69 |     result = lex input
70 |   in do
71 |     if result /= tokens
72 |       then exitFailure
73 |       else exitWith ExitSuccess
74 | 
75 | }
76 | 


--------------------------------------------------------------------------------
/tests/issue_119.x:
--------------------------------------------------------------------------------
 1 | -- -*- haskell -*-
 2 | {
 3 | -- Issue 119,
 4 | -- reported 2017-10-11 by Herbert Valerio Riedel,
 5 | -- fixed 2020-01-26 by Andreas Abel.
 6 | --
 7 | -- Problem was: the computed token length (in number of characters)
 8 | -- attached to AlexToken is tailored to UTF8 encoding and wrong
 9 | -- for LATIN1 encoding.
10 | 
11 | module Main where
12 | 
13 | import Control.Monad (unless)
14 | import qualified Data.ByteString as B
15 | import Data.Word
16 | import System.Exit   (exitFailure)
17 | }
18 | 
19 | %encoding "latin1"
20 | 
21 | :-
22 | 
23 | [\x01-\xff]+ { False }
24 | [\x00]       { True  }
25 | 
26 | {
27 | type AlexInput = B.ByteString
28 | 
29 | alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
30 | alexGetByte = B.uncons
31 | 
32 | alexInputPrevChar :: AlexInput -> Char
33 | alexInputPrevChar = undefined
34 | 
35 | -- generated by @alex@
36 | alexScan :: AlexInput -> Int -> AlexReturn Bool
37 | 
38 | {-
39 | 
40 | GOOD cases:
41 | 
42 | ("012\NUL3","012","\NUL3",3,3,False)
43 | ("\NUL0","\NUL","0",1,1,True)
44 | ("012","012","",3,3,False)
45 | 
46 | BAD case:
47 | 
48 | ("0@P`p\128\144\160","0@P`p","",5,8,False)
49 | 
50 | expected:
51 | 
52 | ("0@P`p\128\144\160","0@P`p\128\144\160","",8,8,False)
53 | 
54 | -}
55 | main :: IO ()
56 | main = do
57 |     go (B.pack [0x30,0x31,0x32,0x00,0x33])                -- GOOD
58 |     go (B.pack [0x00,0x30])                               -- GOOD
59 |     go (B.pack [0x30,0x31,0x32])                          -- GOOD
60 | 
61 |     go (B.pack [0x30,0x40,0x50,0x60,0x70,0x80,0x90,0xa0]) -- WAS: BAD
62 |   where
63 |     go inp = do
64 |       case (alexScan inp 0) of
65 |         -- expected invariant: len == B.length inp - B.length inp'
66 |         AlexToken inp' len b -> do
67 |           let diff = B.length inp - B.length inp'
68 |           unless (len == diff) $ do
69 |             putStrLn $ "ERROR: reported length and consumed length differ!"
70 |             print (inp, B.take len inp, inp', len, diff, b)
71 |             exitFailure
72 |         _ -> undefined
73 | }
74 | 


--------------------------------------------------------------------------------
/tests/issue_141.x:
--------------------------------------------------------------------------------
 1 | {
 2 | -- Issue #141
 3 | -- reported 2015-10-20 by Iavor S. Diatchki
 4 | -- fixed 2020-01-31 by Andreas Abel
 5 | --
 6 | -- Problem was:
 7 | -- Only one-digit numbers were accepted in repetition ranges.
 8 | 
 9 | module Main (main) where
10 | 
11 | import System.Exit
12 | }
13 | 
14 | %wrapper "posn"
15 | %token   "Token"
16 | 
17 | :-
18 | 
19 | -- allow several digits in repetition ranges, e.g. 14
20 | "a"{14,14}          { \ _ _ -> A }
21 | [\ \n\t]+           ;
22 | 
23 | {
24 | data Token = A
25 |   deriving (Eq, Show)
26 | 
27 |                 -- 12345678901234
28 | input           = "aaaaaaaaaaaaaa\n"  -- fourteen a's
29 | expected_result = [A]
30 | 
31 | main :: IO ()
32 | main
33 |   | result == expected_result = do
34 |       exitWith ExitSuccess
35 |   | otherwise = do
36 |       print $ take 20 result
37 |       exitFailure
38 |   where
39 |   result = alexScanTokens input
40 | }
41 | 


--------------------------------------------------------------------------------
/tests/issue_197.x:
--------------------------------------------------------------------------------
 1 | {
 2 | -- Issue #197
 3 | -- reported 2022-01-21 by https://github.com/Commelina
 4 | -- fixed 2022-01-23 by Andreas Abel & John Ericson
 5 | --
 6 | -- Problem was:
 7 | -- Surface syntax regressed and could no longer handle character strings
 8 | -- that looked like numbers.
 9 | 
10 | module Main (main) where
11 | 
12 | import System.Exit
13 | }
14 | 
15 | %wrapper "posn"
16 | %token   "Token"
17 | 
18 | @iec60559suffix = (32|64|128)[x]?
19 | @any            = [01-89]+[x]?
20 | 
21 | :-
22 | 
23 | $white+         ;
24 | @iec60559suffix { \ _ -> Good }
25 | @any            { \ _ -> Bad }
26 | 
27 | {
28 | data Token = Good String | Bad String
29 |   deriving (Eq, Show)
30 | 
31 | input           = "32 32x 99 99x 128x"
32 | expected_result = [Good "32", Good "32x", Bad "99", Bad "99x", Good "128x"]
33 | 
34 | main :: IO ()
35 | main
36 |   | result == expected_result = do
37 |       exitWith ExitSuccess
38 |   | otherwise = do
39 |       print result
40 |       exitFailure
41 |   where
42 |   result = alexScanTokens input
43 | }
44 | 


--------------------------------------------------------------------------------
/tests/issue_262.x:
--------------------------------------------------------------------------------
 1 | {
 2 | -- https://github.com/haskell/alex/pull/262
 3 | -- https://gitlab.haskell.org/ghc/ghc/-/issues/25609
 4 | --
 5 | -- Error happens when using alexScan with a lexer that
 6 | -- inspects the context.
 7 | 
 8 | {-# LANGUAGE ScopedTypeVariables #-}
 9 | 
10 | import Control.Exception
11 | import Data.List (isInfixOf)
12 | }
13 | 
14 | %wrapper "basic"
15 | 
16 | :-
17 | .* / { \state _ _ _ -> state == 'x' } { id }
18 | 
19 | {
20 | main :: IO ()
21 | main = do
22 |   result <- try $ evaluate $ alexScan ('\n', [], "") 0 `seq` ()
23 |   case result of
24 |     Left (e :: SomeException)
25 |       | "use alexScanUser instead" `isInfixOf` show e
26 |       -> pure ()
27 |     _ -> error $ "Got unexpected result: " ++ show result
28 | }
29 | 


--------------------------------------------------------------------------------
/tests/issue_269_part1.x:
--------------------------------------------------------------------------------
 1 | {
 2 | 
 3 | -- Issue #269
 4 | -- reported 2025-04-02 by Antoine Leblanc (https://github.com/nicuveo)
 5 | -- fixed 2025-04-03 by Antoine Leblanc
 6 | --
 7 | -- Problem was:
 8 | --   The minimizer was not initialized with the proper subsets of
 9 | --   states, which could result in different states being erroneously
10 | --   considered equivalent, which in turn could result in the wrong
11 | --   rule being selected at runtime.
12 | --
13 | -- This version of the test fails with the minimizer as implemented
14 | -- after the changes in 4f0b51b8c370d0dd0f9c65af98282789f1cb035f.
15 | 
16 | import Control.Monad (when)
17 | import System.Exit
18 | 
19 | }
20 | 
21 | %wrapper "basic"
22 | 
23 | tokens :-
24 |   [abc]  { Left }
25 |   "abc"  { const $ Right "abc" }
26 | 
27 | {
28 | 
29 | test :: String -> [Either String String] -> IO ()
30 | test input expected =
31 |   when (expected /= alexScanTokens input)
32 |     exitFailure
33 | 
34 | main :: IO ()
35 | main = do
36 |   test "abc" [Right "abc"]
37 |   test "bbb" [Left "b", Left "b", Left "b"]
38 |   test "bbc" [Left "b", Left "b", Left "c"]
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/tests/issue_269_part2.x:
--------------------------------------------------------------------------------
 1 | {
 2 | 
 3 | -- Issue #269
 4 | -- reported 2025-04-02 by Antoine Leblanc (https://github.com/nicuveo)
 5 | -- fixed 2025-04-03 by Antoine Leblanc
 6 | --
 7 | -- Problem was:
 8 | --   The minimizer was not initialized with the proper subsets of
 9 | --   states, which could result in different states being erroneously
10 | --   considered equivalent, which in turn could result in the wrong
11 | --   rule being selected at runtime.
12 | --
13 | -- This version of the test fails with the minimizer as implemented
14 | -- pre 4f0b51b8c370d0dd0f9c65af98282789f1cb035f.
15 | 
16 | import Control.Monad (when)
17 | import System.Exit
18 | 
19 | }
20 | 
21 | %wrapper "basic"
22 | 
23 | tokens :-
24 |   "abc"  { const $ Right "abc" }
25 |   [abc]  { Left }
26 | 
27 | {
28 | 
29 | test :: String -> [Either String String] -> IO ()
30 | test input expected =
31 |   when (expected /= alexScanTokens input)
32 |     exitFailure
33 | 
34 | main :: IO ()
35 | main = do
36 |   test "abc" [Right "abc"]
37 |   test "bbb" [Left "b", Left "b", Left "b"]
38 |   test "bbc" [Left "b", Left "b", Left "c"]
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/tests/issue_71.x:
--------------------------------------------------------------------------------
 1 | {
 2 | -- Issue #71
 3 | -- reported 2015-10-20 by Ian Duncan
 4 | -- fixed 2020-01-22 by Andreas Abel
 5 | --
 6 | -- Problem was:
 7 | -- DFA minimization crashed with "Prelude head: empty list" because
 8 | -- empty set of non-accepting states was treated as empty equivalence
 9 | -- class of states.
10 | --
11 | -- Issue #258, 2024-02-27, Andreas Abel:
12 | -- Since GHC 9.4, type 'Symbol' conflicts with 'GHC.Exts.Symbol'
13 | -- which was imported by alex <= 3.5.0.0
14 | -- because of an unqualified import of 'GHC.Exts'.
15 | 
16 | module Main (main) where
17 | 
18 | import System.Exit
19 | }
20 | 
21 | %wrapper "posn"
22 | %token   "Symbol"
23 | 
24 | $whitespace  = [\ \n\t]
25 | @whitespaces = $whitespace*
26 | 
27 | :-
28 | 
29 | @whitespaces { \ _ _ -> Whitespaces }
30 | "a"          { \ _ _ -> A           }
31 | 
32 | {
33 | -- Calling the token type 'Symbol' will trigger a clash with GHC.Exts.Symbol
34 | -- if the lexer is built with alex <= 3.5.0.0.
35 | 
36 | data Symbol = Whitespaces | A
37 |   deriving (Eq, Show)
38 | 
39 | input           = "aa    \n\taa   \t  \n  a"
40 | expected_result = [A,A,Whitespaces,A,A,Whitespaces,A]
41 | 
42 | main :: IO ()
43 | main
44 |     -- Since the whitespaces token is nullable, Alex
45 |     -- will recognize an infinite number of those
46 |     -- at the end of file.  This behavior is problematic,
47 |     -- but we don't fix it here.
48 |     -- We just test here whether the expected result
49 |     -- is a prefix of the produced result.
50 |   | take (length expected_result) result == expected_result = do
51 |       exitWith ExitSuccess
52 |   | otherwise = do
53 |       print $ take 20 result
54 |       exitFailure
55 |   where
56 |   result = alexScanTokens input
57 | }
58 | 


--------------------------------------------------------------------------------
/tests/monadUserState_typeclass.x:
--------------------------------------------------------------------------------
 1 | {
 2 | 
 3 | module Main (main) where
 4 | import System.Exit
 5 | import Prelude hiding (lex)
 6 | 
 7 | }
 8 | 
 9 | %wrapper "monadUserState"
10 | %token "Token s"
11 | %typeclass "Read s"
12 | 
13 | tokens :-
14 | 
15 | [a-b]+$                     { idtoken 0 }
16 | [c-d]+/"."                  { idtoken 1 }
17 | [e-f]+/{ tokpred }          { idtoken 2 }
18 | ^[g-h]+$                    { idtoken 3 }
19 | ^[i-j]+/"."                 { idtoken 4 }
20 | ^[k-l]+/{ tokpred }         { idtoken 5 }
21 | [m-n]+$                     { idtoken 6 }
22 | [o-p]+/"."                  { idtoken 7 }
23 | [q-r]+/{ tokpred }          { idtoken 8 }
24 | [0-1]^[s-t]+$               { idtoken 9 }
25 | [2-3]^[u-v]+/"."            { idtoken 10 }
26 | [4-5]^[w-x]+/{ tokpred }    { idtoken 11 }
27 | [y-z]+                      { idtoken 12 }
28 | [A-B]+$                     ;
29 | [C-D]+/"."                  ;
30 | [E-F]+/{ tokpred }          ;
31 | ^[G-H]+$                    ;
32 | ^[I-J]+/"."                 ;
33 | ^[K-L]+/{ tokpred }         ;
34 | [M-N]+$                     ;
35 | [O-P]+/"."                  ;
36 | [Q-R]+/{ tokpred }          ;
37 | [0-1]^[S-T]+$               ;
38 | [2-3]^[U-V]+/"."            ;
39 | [4-5]^[W-X]+/{ tokpred }    ;
40 | [Y-Z]+                      ;
41 | \.                          ;
42 | [ \n\t\r]+                  ;
43 | [0-9]                       ;
44 | 
45 | {
46 | 
47 | type AlexUserState = Int
48 | 
49 | alexInitUserState = 0
50 | 
51 | alexEOF :: Alex (Token s)
52 | alexEOF = return EOF
53 | 
54 | tokpred :: AlexUserState -> AlexInput -> Int -> AlexInput -> Bool
55 | tokpred _ _ _ _ = True
56 | 
57 | idtoken :: Read s => Int -> AlexInput -> Int -> Alex (Token s)
58 | idtoken n (_, _, _, s) len = return (Id n (read ("\"" ++ take len s ++ "\"")))
59 | 
60 | data Token s = Id Int s | EOF deriving Eq
61 | 
62 | lex :: Read s => String -> Either String [Token s]
63 | lex inp =
64 |   let
65 |     lexAll =
66 |       do
67 |         res <- alexMonadScan
68 |         case res of
69 |           EOF -> return []
70 |           tok ->
71 |             do
72 |               rest <- lexAll
73 |               return (tok : rest)
74 |   in
75 |     runAlex inp lexAll
76 | 
77 | input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw"
78 | 
79 | tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji",
80 |            Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst",
81 |            Id 10 "uuvu", Id 11 "xxw"]
82 | 
83 | main :: IO ()
84 | main =
85 |   let
86 |     result = lex input
87 |   in do
88 |     case result of
89 |       Left _ -> exitFailure
90 |       Right toks ->
91 |         do
92 |           if toks /= tokens
93 |             then exitFailure
94 |             else exitWith ExitSuccess
95 | 
96 | }
97 | 


--------------------------------------------------------------------------------
/tests/monadUserState_typeclass_bytestring.x:
--------------------------------------------------------------------------------
  1 | {
  2 | 
  3 | {-# LANGUAGE OverloadedStrings #-}
  4 | 
  5 | module Main (main) where
  6 | import System.Exit
  7 | import Prelude hiding (lex)
  8 | 
  9 | import qualified Data.ByteString.Lazy.Char8 as Lazy
 10 | 
 11 | }
 12 | 
 13 | %wrapper "monadUserState-bytestring"
 14 | %token "Token s"
 15 | %typeclass "Read s"
 16 | 
 17 | tokens :-
 18 | 
 19 | [a-b]+$                     { idtoken 0 }
 20 | [c-d]+/"."                  { idtoken 1 }
 21 | [e-f]+/{ tokpred }          { idtoken 2 }
 22 | ^[g-h]+$                    { idtoken 3 }
 23 | ^[i-j]+/"."                 { idtoken 4 }
 24 | ^[k-l]+/{ tokpred }         { idtoken 5 }
 25 | [m-n]+$                     { idtoken 6 }
 26 | [o-p]+/"."                  { idtoken 7 }
 27 | [q-r]+/{ tokpred }          { idtoken 8 }
 28 | [0-1]^[s-t]+$               { idtoken 9 }
 29 | [2-3]^[u-v]+/"."            { idtoken 10 }
 30 | [4-5]^[w-x]+/{ tokpred }    { idtoken 11 }
 31 | [y-z]+                      { idtoken 12 }
 32 | [A-B]+$                     ;
 33 | [C-D]+/"."                  ;
 34 | [E-F]+/{ tokpred }          ;
 35 | ^[G-H]+$                    ;
 36 | ^[I-J]+/"."                 ;
 37 | ^[K-L]+/{ tokpred }         ;
 38 | [M-N]+$                     ;
 39 | [O-P]+/"."                  ;
 40 | [Q-R]+/{ tokpred }          ;
 41 | [0-1]^[S-T]+$               ;
 42 | [2-3]^[U-V]+/"."            ;
 43 | [4-5]^[W-X]+/{ tokpred }    ;
 44 | [Y-Z]+                      ;
 45 | \.                          ;
 46 | [ \n\t\r]+                  ;
 47 | [0-9]                       ;
 48 | 
 49 | {
 50 | 
 51 | type AlexUserState = Int
 52 | 
 53 | alexInitUserState = 0
 54 | 
 55 | alexEOF :: Alex (Token s)
 56 | alexEOF = return EOF
 57 | 
 58 | tokpred :: AlexUserState -> AlexInput -> Int -> AlexInput -> Bool
 59 | tokpred _ _ _ _ = True
 60 | 
 61 | idtoken :: Read s => Int -> AlexInput -> Int64 -> Alex (Token s)
 62 | idtoken n (_, _, s, _) len =
 63 |   return (Id n (read ("\"" ++ Lazy.unpack (Lazy.take (fromIntegral len) s) ++
 64 |                       "\"")))
 65 | 
 66 | data Token s = Id Int s | EOF deriving Eq
 67 | 
 68 | lex :: Read s => Lazy.ByteString -> Either String [Token s]
 69 | lex inp =
 70 |   let
 71 |     lexAll =
 72 |       do
 73 |         -- Andreas Abel, 2023-12-30, issue #220:
 74 |         -- Test that alex{G,S}etUserState are in scope.
 75 |         u <- alexGetUserState
 76 |         alexSetUserState (u + 1)
 77 | 
 78 |         res <- alexMonadScan
 79 |         case res of
 80 |           EOF -> return []
 81 |           tok ->
 82 |             do
 83 |               rest <- lexAll
 84 |               return (tok : rest)
 85 |   in
 86 |     runAlex inp lexAll
 87 | 
 88 | input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw"
 89 | 
 90 | tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji",
 91 |            Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst",
 92 |            Id 10 "uuvu", Id 11 "xxw"]
 93 | 
 94 | main :: IO ()
 95 | main =
 96 |   let
 97 |     result :: Either String [Token String]
 98 |     result = lex input
 99 |   in do
100 |     case result of
101 |       Left _ -> exitFailure
102 |       Right toks ->
103 |         do
104 |           if toks /= tokens
105 |             then exitFailure
106 |             else exitWith ExitSuccess
107 | 
108 | }
109 | 


--------------------------------------------------------------------------------
/tests/monad_typeclass.x:
--------------------------------------------------------------------------------
 1 | {
 2 | 
 3 | module Main (main) where
 4 | import System.Exit
 5 | import Prelude hiding (lex)
 6 | 
 7 | }
 8 | 
 9 | %wrapper "monad"
10 | %token "Token s"
11 | %typeclass "Read s"
12 | 
13 | tokens :-
14 | 
15 | [a-b]+$                     { idtoken 0 }
16 | [c-d]+/"."                  { idtoken 1 }
17 | [e-f]+/{ tokpred }          { idtoken 2 }
18 | ^[g-h]+$                    { idtoken 3 }
19 | ^[i-j]+/"."                 { idtoken 4 }
20 | ^[k-l]+/{ tokpred }         { idtoken 5 }
21 | [m-n]+$                     { idtoken 6 }
22 | [o-p]+/"."                  { idtoken 7 }
23 | [q-r]+/{ tokpred }          { idtoken 8 }
24 | [0-1]^[s-t]+$               { idtoken 9 }
25 | [2-3]^[u-v]+/"."            { idtoken 10 }
26 | [4-5]^[w-x]+/{ tokpred }    { idtoken 11 }
27 | [y-z]+                      { idtoken 12 }
28 | [A-B]+$                     ;
29 | [C-D]+/"."                  ;
30 | [E-F]+/{ tokpred }          ;
31 | ^[G-H]+$                    ;
32 | ^[I-J]+/"."                 ;
33 | ^[K-L]+/{ tokpred }         ;
34 | [M-N]+$                     ;
35 | [O-P]+/"."                  ;
36 | [Q-R]+/{ tokpred }          ;
37 | [0-1]^[S-T]+$               ;
38 | [2-3]^[U-V]+/"."            ;
39 | [4-5]^[W-X]+/{ tokpred }    ;
40 | [Y-Z]+                      ;
41 | \.                          ;
42 | [ \n\t\r]+                  ;
43 | [0-9]                       ;
44 | 
45 | {
46 | 
47 | alexEOF :: Alex (Token s)
48 | alexEOF = return EOF
49 | 
50 | tokpred :: () -> AlexInput -> Int -> AlexInput -> Bool
51 | tokpred _ _ _ _ = True
52 | 
53 | idtoken :: Read s => Int -> AlexInput -> Int -> Alex (Token s)
54 | idtoken n (_, _, _, s) len = return (Id n (read ("\"" ++ take len s ++ "\"")))
55 | 
56 | data Token s = Id Int s | EOF deriving Eq
57 | 
58 | lex :: Read s => String -> Either String [Token s]
59 | lex inp =
60 |   let
61 |     lexAll =
62 |       do
63 |         res <- alexMonadScan
64 |         case res of
65 |           EOF -> return []
66 |           tok ->
67 |             do
68 |               rest <- lexAll
69 |               return (tok : rest)
70 |   in
71 |     runAlex inp lexAll
72 | 
73 | input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw"
74 | 
75 | tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji",
76 |            Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst",
77 |            Id 10 "uuvu", Id 11 "xxw"]
78 | 
79 | main :: IO ()
80 | main =
81 |   let
82 |     result = lex input
83 |   in do
84 |     case result of
85 |       Left _ -> exitFailure
86 |       Right toks ->
87 |         do
88 |           if toks /= tokens
89 |             then exitFailure
90 |             else exitWith ExitSuccess
91 | 
92 | }
93 | 


--------------------------------------------------------------------------------
/tests/monad_typeclass_bytestring.x:
--------------------------------------------------------------------------------
  1 | {
  2 | 
  3 | {-# LANGUAGE OverloadedStrings #-}
  4 | 
  5 | module Main (main) where
  6 | import System.Exit
  7 | import Prelude hiding (lex)
  8 | 
  9 | import qualified Data.ByteString.Lazy.Char8 as Lazy
 10 | 
 11 | }
 12 | 
 13 | %wrapper "monad-bytestring"
 14 | %token "Token s"
 15 | %typeclass "Read s"
 16 | 
 17 | tokens :-
 18 | 
 19 | [a-b]+$                     { idtoken 0 }
 20 | [c-d]+/"."                  { idtoken 1 }
 21 | [e-f]+/{ tokpred }          { idtoken 2 }
 22 | ^[g-h]+$                    { idtoken 3 }
 23 | ^[i-j]+/"."                 { idtoken 4 }
 24 | ^[k-l]+/{ tokpred }         { idtoken 5 }
 25 | [m-n]+$                     { idtoken 6 }
 26 | [o-p]+/"."                  { idtoken 7 }
 27 | [q-r]+/{ tokpred }          { idtoken 8 }
 28 | [0-1]^[s-t]+$               { idtoken 9 }
 29 | [2-3]^[u-v]+/"."            { idtoken 10 }
 30 | [4-5]^[w-x]+/{ tokpred }    { idtoken 11 }
 31 | [y-z]+                      { idtoken 12 }
 32 | [A-B]+$                     ;
 33 | [C-D]+/"."                  ;
 34 | [E-F]+/{ tokpred }          ;
 35 | ^[G-H]+$                    ;
 36 | ^[I-J]+/"."                 ;
 37 | ^[K-L]+/{ tokpred }         ;
 38 | [M-N]+$                     ;
 39 | [O-P]+/"."                  ;
 40 | [Q-R]+/{ tokpred }          ;
 41 | [0-1]^[S-T]+$               ;
 42 | [2-3]^[U-V]+/"."            ;
 43 | [4-5]^[W-X]+/{ tokpred }    ;
 44 | [Y-Z]+                      ;
 45 | \.                          ;
 46 | [ \n\t\r]+                  ;
 47 | [0-9]                       ;
 48 | 
 49 | {
 50 | 
 51 | alexEOF :: Alex (Token s)
 52 | alexEOF = return EOF
 53 | 
 54 | tokpred :: () -> AlexInput -> Int -> AlexInput -> Bool
 55 | tokpred _ _ _ _ = True
 56 | 
 57 | idtoken :: Read s => Int -> AlexInput -> Int64 -> Alex (Token s)
 58 | idtoken n (_, _, s, _) len =
 59 |   return (Id n (read ("\"" ++ Lazy.unpack (Lazy.take (fromIntegral len) s) ++
 60 |                       "\"")))
 61 | 
 62 | data Token s = Id Int s | EOF deriving Eq
 63 | 
 64 | lex :: Read s => Lazy.ByteString -> Either String [Token s]
 65 | lex inp =
 66 |   let
 67 |     lexAll =
 68 |       do
 69 |         res <- alexMonadScan
 70 |         case res of
 71 |           EOF -> return []
 72 |           tok ->
 73 |             do
 74 |               rest <- lexAll
 75 |               return (tok : rest)
 76 |   in
 77 |     runAlex inp lexAll
 78 | 
 79 | input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw"
 80 | 
 81 | tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji",
 82 |            Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst",
 83 |            Id 10 "uuvu", Id 11 "xxw"]
 84 | 
 85 | main :: IO ()
 86 | main =
 87 |   let
 88 |     result :: Either String [Token String]
 89 |     result = lex input
 90 |   in do
 91 |     case result of
 92 |       Left _ -> exitFailure
 93 |       Right toks ->
 94 |         do
 95 |           if toks /= tokens
 96 |             then exitFailure
 97 |             else exitWith ExitSuccess
 98 | 
 99 | }
100 | 


--------------------------------------------------------------------------------
/tests/null.x:
--------------------------------------------------------------------------------
 1 | {
 2 | -- Tests the basic operation.
 3 | module Main where
 4 | 
 5 | import Data.Char (toUpper)
 6 | import Control.Monad
 7 | import System.Exit
 8 | import System.IO
 9 | import Prelude hiding (null)
10 | }
11 | 
12 | %wrapper "monad"
13 | 
14 | @word = [A-Za-z]+
15 | @null = \0
16 | 
17 | $escchars = [abfnrtv\\"\'&]
18 | @escape = \\ ($escchars | \0)
19 | @gap = \\ $white+ \\
20 | @string = $printable # [\"] | " " | @escape | @gap
21 | 
22 | @inComment = ([^\*] | $white)+ | ([\*]+ ([\x00-\xff] # [\/]))
23 | 
24 | tokens :-
25 | 
26 | $white+			;
27 | 
28 | <0> {
29 |    @null                { null }
30 |    @word                { word }
31 |    \" @string \"        { string }
32 |    "--" @inComment \n   { word }
33 | }
34 | 
35 | {
36 | {- we can now have comments in source code? -}
37 | word (_,_,_,input) len = return (take len input)
38 | 
39 | null (_,_,_,_) _ = return "\0"
40 | 
41 | string (_,_,_,input) _ = return (drop 1 (reverse (drop 1 (reverse input))))
42 | 
43 | alexEOF = return "stopped."
44 | 
45 | scanner str = runAlex str $ do
46 |   let loop = do tok <- alexMonadScan
47 | 		if tok == "stopped." || tok == "error."
48 | 			then return [tok]
49 | 			else do toks <- loop
50 | 				return (tok:toks)
51 |   loop
52 | 
53 | main = do
54 |   let test1 = scanner str1
55 |   when (test1 /= out1) $
56 | 	do hPutStrLn stderr "Test 1 failed:"
57 | 	   print test1
58 | 	   exitFailure
59 | 
60 |   let test2 = scanner str2
61 |   when (test2 /= out2) $
62 | 	do hPutStrLn stderr "Test 2 failed:"
63 | 	   print test2
64 | 	   exitFailure
65 | 
66 | str1 = "a\0bb\0ccc\0\0\"\\\0\""
67 | out1 = Right ["a","\NUL","bb","\NUL","ccc","\NUL","\NUL","\\\NUL", "stopped."]
68 | 
69 | str2 = "."
70 | out2 = Left "lexical error at line 1, column 1"
71 | }
72 | 


--------------------------------------------------------------------------------
/tests/posn_typeclass.x:
--------------------------------------------------------------------------------
 1 | {
 2 | 
 3 | module Main (main) where
 4 | import System.Exit
 5 | import Prelude hiding (lex)
 6 | 
 7 | }
 8 | 
 9 | %wrapper "posn"
10 | %token "Token s"
11 | %typeclass "Read s"
12 | 
13 | tokens :-
14 | 
15 | [a-b]+$                     { idtoken 0 }
16 | [c-d]+/"."                  { idtoken 1 }
17 | [e-f]+/{ tokpred }          { idtoken 2 }
18 | ^[g-h]+$                    { idtoken 3 }
19 | ^[i-j]+/"."                 { idtoken 4 }
20 | ^[k-l]+/{ tokpred }         { idtoken 5 }
21 | [m-n]+$                     { idtoken 6 }
22 | [o-p]+/"."                  { idtoken 7 }
23 | [q-r]+/{ tokpred }          { idtoken 8 }
24 | [0-1]^[s-t]+$               { idtoken 9 }
25 | [2-3]^[u-v]+/"."            { idtoken 10 }
26 | [4-5]^[w-x]+/{ tokpred }    { idtoken 11 }
27 | [y-z]+                      { idtoken 12 }
28 | [A-B]+$                     ;
29 | [C-D]+/"."                  ;
30 | [E-F]+/{ tokpred }          ;
31 | ^[G-H]+$                    ;
32 | ^[I-J]+/"."                 ;
33 | ^[K-L]+/{ tokpred }         ;
34 | [M-N]+$                     ;
35 | [O-P]+/"."                  ;
36 | [Q-R]+/{ tokpred }          ;
37 | [0-1]^[S-T]+$               ;
38 | [2-3]^[U-V]+/"."            ;
39 | [4-5]^[W-X]+/{ tokpred }    ;
40 | [Y-Z]+                      ;
41 | \.                          ;
42 | [ \n\t\r]+                  ;
43 | [0-9]                       ;
44 | 
45 | {
46 | 
47 | tokpred :: () -> AlexInput -> Int -> AlexInput -> Bool
48 | tokpred _ _ _ _ = True
49 | 
50 | idtoken :: Read s => Int -> AlexPosn -> String -> Token s
51 | idtoken n _ s = Id n (read ("\"" ++ s ++ "\""))
52 | 
53 | data Token s = Id Int s
54 |   deriving (Show, Ord, Eq)
55 | 
56 | lex :: Read s => String -> [Token s]
57 | lex = alexScanTokens
58 | 
59 | input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw"
60 | 
61 | tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji",
62 |            Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst",
63 |            Id 10 "uuvu", Id 11 "xxw"]
64 | 
65 | main :: IO ()
66 | main =
67 |   let
68 |     result :: [Token String]
69 |     result = lex input
70 |   in do
71 |     if result /= tokens
72 |       then exitFailure
73 |       else exitWith ExitSuccess
74 | 
75 | }
76 | 


--------------------------------------------------------------------------------
/tests/posn_typeclass_bytestring.x:
--------------------------------------------------------------------------------
 1 | {
 2 | 
 3 | {-# LANGUAGE OverloadedStrings #-}
 4 | 
 5 | module Main (main) where
 6 | import System.Exit
 7 | import Prelude hiding (lex)
 8 | 
 9 | import Data.ByteString.Lazy.Char8 as Lazy
10 | 
11 | }
12 | 
13 | %wrapper "posn-bytestring"
14 | %token "Token s"
15 | %typeclass "Read s"
16 | 
17 | tokens :-
18 | 
19 | [a-b]+$                     { idtoken 0 }
20 | [c-d]+/"."                  { idtoken 1 }
21 | [e-f]+/{ tokpred }          { idtoken 2 }
22 | ^[g-h]+$                    { idtoken 3 }
23 | ^[i-j]+/"."                 { idtoken 4 }
24 | ^[k-l]+/{ tokpred }         { idtoken 5 }
25 | [m-n]+$                     { idtoken 6 }
26 | [o-p]+/"."                  { idtoken 7 }
27 | [q-r]+/{ tokpred }          { idtoken 8 }
28 | [0-1]^[s-t]+$               { idtoken 9 }
29 | [2-3]^[u-v]+/"."            { idtoken 10 }
30 | [4-5]^[w-x]+/{ tokpred }    { idtoken 11 }
31 | [y-z]+                      { idtoken 12 }
32 | [A-B]+$                     ;
33 | [C-D]+/"."                  ;
34 | [E-F]+/{ tokpred }          ;
35 | ^[G-H]+$                    ;
36 | ^[I-J]+/"."                 ;
37 | ^[K-L]+/{ tokpred }         ;
38 | [M-N]+$                     ;
39 | [O-P]+/"."                  ;
40 | [Q-R]+/{ tokpred }          ;
41 | [0-1]^[S-T]+$               ;
42 | [2-3]^[U-V]+/"."            ;
43 | [4-5]^[W-X]+/{ tokpred }    ;
44 | [Y-Z]+                      ;
45 | \.                          ;
46 | [ \n\t\r]+                  ;
47 | [0-9]                       ;
48 | 
49 | {
50 | 
51 | tokpred :: () -> AlexInput -> Int -> AlexInput -> Bool
52 | tokpred _ _ _ _ = True
53 | 
54 | idtoken :: Read s => Int -> AlexPosn -> Lazy.ByteString -> Token s
55 | idtoken n _ s = Id n (read ("\"" ++ (Lazy.unpack s) ++ "\""))
56 | 
57 | data Token s = Id Int s
58 |   deriving (Show, Ord, Eq)
59 | 
60 | lex :: Read s => Lazy.ByteString -> [Token s]
61 | lex = alexScanTokens
62 | 
63 | input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw"
64 | 
65 | tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji",
66 |            Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst",
67 |            Id 10 "uuvu", Id 11 "xxw"]
68 | 
69 | main :: IO ()
70 | main =
71 |   let
72 |     result :: [Token String]
73 |     result = lex input
74 |   in do
75 |     if result /= tokens
76 |       then exitFailure
77 |       else exitWith ExitSuccess
78 | 
79 | }
80 | 


--------------------------------------------------------------------------------
/tests/posn_typeclass_strict_text.x:
--------------------------------------------------------------------------------
 1 | {
 2 | 
 3 | {-# LANGUAGE OverloadedStrings #-}
 4 | 
 5 | module Main (main) where
 6 | import System.Exit
 7 | import Prelude hiding (lex)
 8 | 
 9 | import qualified Data.Text as Text
10 | 
11 | }
12 | 
13 | %wrapper "posn-strict-text"
14 | %token "Token s"
15 | %typeclass "Read s"
16 | 
17 | tokens :-
18 | 
19 | [a-b]+$                     { idtoken 0 }
20 | [c-d]+/"."                  { idtoken 1 }
21 | [e-f]+/{ tokpred }          { idtoken 2 }
22 | ^[g-h]+$                    { idtoken 3 }
23 | ^[i-j]+/"."                 { idtoken 4 }
24 | ^[k-l]+/{ tokpred }         { idtoken 5 }
25 | [m-n]+$                     { idtoken 6 }
26 | [o-p]+/"."                  { idtoken 7 }
27 | [q-r]+/{ tokpred }          { idtoken 8 }
28 | [0-1]^[s-t]+$               { idtoken 9 }
29 | [2-3]^[u-v]+/"."            { idtoken 10 }
30 | [4-5]^[w-x]+/{ tokpred }    { idtoken 11 }
31 | [y-z]+                      { idtoken 12 }
32 | [A-B]+$                     ;
33 | [C-D]+/"."                  ;
34 | [E-F]+/{ tokpred }          ;
35 | ^[G-H]+$                    ;
36 | ^[I-J]+/"."                 ;
37 | ^[K-L]+/{ tokpred }         ;
38 | [M-N]+$                     ;
39 | [O-P]+/"."                  ;
40 | [Q-R]+/{ tokpred }          ;
41 | [0-1]^[S-T]+$               ;
42 | [2-3]^[U-V]+/"."            ;
43 | [4-5]^[W-X]+/{ tokpred }    ;
44 | [Y-Z]+                      ;
45 | \.                          ;
46 | [ \n\t\r]+                  ;
47 | [0-9]                       ;
48 | 
49 | {
50 | 
51 | tokpred :: () -> AlexInput -> Int -> AlexInput -> Bool
52 | tokpred _ _ _ _ = True
53 | 
54 | idtoken :: Read s => Int -> AlexPosn -> Text.Text -> Token s
55 | idtoken n _ s = Id n (read ("\"" ++ (Text.unpack s) ++ "\""))
56 | 
57 | data Token s = Id Int s
58 |   deriving (Show, Ord, Eq)
59 | 
60 | lex :: Read s => Text.Text -> [Token s]
61 | lex = alexScanTokens
62 | 
63 | input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw"
64 | 
65 | tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji",
66 |            Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst",
67 |            Id 10 "uuvu", Id 11 "xxw"]
68 | 
69 | main :: IO ()
70 | main =
71 |   let
72 |     result :: [Token String]
73 |     result = lex input
74 |   in do
75 |     if result /= tokens
76 |       then exitFailure
77 |       else exitWith ExitSuccess
78 | 
79 | }
80 | 


--------------------------------------------------------------------------------
/tests/simple.x:
--------------------------------------------------------------------------------
 1 | {
 2 | -- Tests the basic operation.
 3 | module Main where
 4 | 
 5 | import Data.Char (toUpper)
 6 | import Control.Monad
 7 | import System.Exit
 8 | import System.IO
 9 | }
10 | 
11 | %wrapper "monad"
12 | 
13 | @word = [A-Za-z]+
14 | 
15 | tokens :-
16 | 
17 | $white+			;
18 | 
19 | <0> {
20 |    "magic"		{ magic } -- should override later patterns
21 |    ^ @word $		{ both }  -- test both trailing and left context
22 |    @word $		{ eol }  -- test trailing context
23 |    ^ @word		{ bol }  -- test left context
24 |    @word		{ word }
25 | }
26 | 
27 | <0> \(			{ begin parens }
28 | <parens> [A-Za-z]+	{ parenword }
29 | <parens> \)		{ begin 0 }
30 | 
31 | {
32 | {- we can now have comments in source code? -}
33 | word (_,_,_,input) len = return (take len input)
34 | 
35 | both (_,_,_,input) len = return ("BOTH:"++ take len input)
36 | 
37 | eol (_,_,_,input) len = return ("EOL:"++ take len input)
38 | 
39 | bol (_,_,_,input) len = return ("BOL:"++ take len input)
40 | 
41 | parenword (_,_,_,input) len = return (map toUpper (take len input))
42 | 
43 | magic (_,_,_,_) _ = return "PING!"
44 | 
45 | alexEOF = return "stopped."
46 | 
47 | scanner str = runAlex str $ do
48 |   let loop = do tok <- alexMonadScan
49 | 		if tok == "stopped." || tok == "error."
50 | 			then return [tok]
51 | 			else do toks <- loop
52 | 				return (tok:toks)
53 |   loop
54 | 
55 | main = do
56 |   let test1 = scanner str1
57 |   when (test1 /= out1) $
58 | 	do hPutStrLn stderr "Test 1 failed:"
59 | 	   print test1
60 | 	   exitFailure
61 | 
62 |   let test2 = scanner str2
63 |   when (test2 /= out2) $
64 | 	do hPutStrLn stderr "Test 2 failed:"
65 | 	   print test2
66 | 	   exitFailure
67 | 
68 | str1 = "a b c (d e f) magic (magic) eol\nbol \nboth\n"
69 | out1 = Right ["BOL:a","b","c","D","E","F","PING!","MAGIC","EOL:eol", "BOL:bol", "BOTH:both", "stopped."]
70 | 
71 | str2 = "."
72 | out2 = Left "lexical error at line 1, column 1"
73 | }
74 | 


--------------------------------------------------------------------------------
/tests/strict_text_typeclass.x:
--------------------------------------------------------------------------------
 1 | {
 2 | 
 3 | {-# LANGUAGE OverloadedStrings #-}
 4 | 
 5 | module Main (main) where
 6 | import System.Exit
 7 | import Prelude hiding (lex)
 8 | 
 9 | import qualified Data.Text as Text
10 | 
11 | }
12 | 
13 | %wrapper "strict-text"
14 | %token "Token s"
15 | %typeclass "Read s"
16 | 
17 | tokens :-
18 | 
19 | [a-b]+$                     { idtoken 0 }
20 | [c-d]+/"."                  { idtoken 1 }
21 | [e-f]+/{ tokpred }          { idtoken 2 }
22 | ^[g-h]+$                    { idtoken 3 }
23 | ^[i-j]+/"."                 { idtoken 4 }
24 | ^[k-l]+/{ tokpred }         { idtoken 5 }
25 | [m-n]+$                     { idtoken 6 }
26 | [o-p]+/"."                  { idtoken 7 }
27 | [q-r]+/{ tokpred }          { idtoken 8 }
28 | [0-1]^[s-t]+$               { idtoken 9 }
29 | [2-3]^[u-v]+/"."            { idtoken 10 }
30 | [4-5]^[w-x]+/{ tokpred }    { idtoken 11 }
31 | [y-z]+                      { idtoken 12 }
32 | [A-B]+$                     ;
33 | [C-D]+/"."                  ;
34 | [E-F]+/{ tokpred }          ;
35 | ^[G-H]+$                    ;
36 | ^[I-J]+/"."                 ;
37 | ^[K-L]+/{ tokpred }         ;
38 | [M-N]+$                     ;
39 | [O-P]+/"."                  ;
40 | [Q-R]+/{ tokpred }          ;
41 | [0-1]^[S-T]+$               ;
42 | [2-3]^[U-V]+/"."            ;
43 | [4-5]^[W-X]+/{ tokpred }    ;
44 | [Y-Z]+                      ;
45 | \.                          ;
46 | [ \n\t\r]+                  ;
47 | [0-9]                       ;
48 | 
49 | {
50 | 
51 | tokpred :: () -> AlexInput -> Int -> AlexInput -> Bool
52 | tokpred _ _ _ _ = True
53 | 
54 | idtoken :: Read s => Int -> Text.Text -> Token s
55 | idtoken n s = Id n (read ("\"" ++ (Text.unpack s) ++ "\""))
56 | 
57 | data Token s = Id Int s
58 |   deriving (Show, Ord, Eq)
59 | 
60 | lex :: Read s => Text.Text -> [Token s]
61 | lex = alexScanTokens
62 | 
63 | input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw"
64 | 
65 | tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji",
66 |            Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst",
67 |            Id 10 "uuvu", Id 11 "xxw"]
68 | 
69 | main :: IO ()
70 | main =
71 |   let
72 |     result :: [Token String]
73 |     result = lex input
74 |   in do
75 |     if result /= tokens
76 |       then exitFailure
77 |       else exitWith ExitSuccess
78 | 
79 | }
80 | 


--------------------------------------------------------------------------------
/tests/strict_typeclass.x:
--------------------------------------------------------------------------------
 1 | {
 2 | 
 3 | {-# LANGUAGE OverloadedStrings #-}
 4 | 
 5 | module Main (main) where
 6 | import System.Exit
 7 | import Prelude hiding (lex)
 8 | 
 9 | import Data.ByteString.Char8 as Strict
10 | 
11 | }
12 | 
13 | %wrapper "strict-bytestring"
14 | %token "Token s"
15 | %typeclass "Read s"
16 | 
17 | tokens :-
18 | 
19 | [a-b]+$                     { idtoken 0 }
20 | [c-d]+/"."                  { idtoken 1 }
21 | [e-f]+/{ tokpred }          { idtoken 2 }
22 | ^[g-h]+$                    { idtoken 3 }
23 | ^[i-j]+/"."                 { idtoken 4 }
24 | ^[k-l]+/{ tokpred }         { idtoken 5 }
25 | [m-n]+$                     { idtoken 6 }
26 | [o-p]+/"."                  { idtoken 7 }
27 | [q-r]+/{ tokpred }          { idtoken 8 }
28 | [0-1]^[s-t]+$               { idtoken 9 }
29 | [2-3]^[u-v]+/"."            { idtoken 10 }
30 | [4-5]^[w-x]+/{ tokpred }    { idtoken 11 }
31 | [y-z]+                      { idtoken 12 }
32 | [A-B]+$                     ;
33 | [C-D]+/"."                  ;
34 | [E-F]+/{ tokpred }          ;
35 | ^[G-H]+$                    ;
36 | ^[I-J]+/"."                 ;
37 | ^[K-L]+/{ tokpred }         ;
38 | [M-N]+$                     ;
39 | [O-P]+/"."                  ;
40 | [Q-R]+/{ tokpred }          ;
41 | [0-1]^[S-T]+$               ;
42 | [2-3]^[U-V]+/"."            ;
43 | [4-5]^[W-X]+/{ tokpred }    ;
44 | [Y-Z]+                      ;
45 | \.                          ;
46 | [ \n\t\r]+                  ;
47 | [0-9]                       ;
48 | 
49 | {
50 | 
51 | tokpred :: () -> AlexInput -> Int -> AlexInput -> Bool
52 | tokpred _ _ _ _ = True
53 | 
54 | idtoken :: Read s => Int -> Strict.ByteString -> Token s
55 | idtoken n s = Id n (read ("\"" ++ (Strict.unpack s) ++ "\""))
56 | 
57 | data Token s = Id Int s deriving Eq
58 | 
59 | lex :: Read s => Strict.ByteString -> [Token s]
60 | lex = alexScanTokens
61 | 
62 | input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw"
63 | 
64 | tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji",
65 |            Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst",
66 |            Id 10 "uuvu", Id 11 "xxw"]
67 | 
68 | main :: IO ()
69 | main =
70 |   let
71 |     result :: [Token String]
72 |     result = lex input
73 |   in do
74 |     if result /= tokens
75 |       then exitFailure
76 |       else exitWith ExitSuccess
77 | 
78 | }
79 | 


--------------------------------------------------------------------------------
/tests/tokens.x:
--------------------------------------------------------------------------------
 1 | {
 2 | module Main (main) where
 3 | import System.Exit
 4 | }
 5 | 
 6 | %wrapper "basic"
 7 | 
 8 | $digit=0-9                      -- digits
 9 | $alpha = [a-zA-Z]               -- alphabetic characters
10 | 
11 | tokens :-
12 | 
13 |   $white+				;
14 |   "--".*				;
15 |   let					{ \_ -> Let }
16 |   in					{ \_ -> In }
17 |   $digit+				{ \s -> Int (read s) }
18 |   [\=\+\-\*\/\(\)]			{ \s -> Sym (head s) }
19 |   $alpha [$alpha $digit \_ \']*		{ \s -> Var s }
20 | 
21 |   -- a left-context pattern for testing
22 |   ^ \#                                  ;
23 | 
24 | {
25 | -- Each right-hand side has type :: String -> Token
26 | 
27 | -- The token type:
28 | data Token =
29 | 	Let 		|
30 | 	In  		|
31 | 	Sym Char	|
32 | 	Var String	|
33 | 	Int Int		|
34 | 	Err
35 | 	deriving (Eq,Show)
36 | 
37 | main = if test1 /= result1 then exitFailure
38 | 			   else exitWith ExitSuccess
39 | 
40 | test1 = alexScanTokens "  let in 012334\n=+*foo bar__'"
41 | result1 = identifierWithLotsOfQuotes''
42 | 
43 | identifierWithLotsOfQuotes'' :: [Token]
44 | identifierWithLotsOfQuotes'' =
45 |   [Let,In,Int 12334,Sym '=',Sym '+',Sym '*',Var "foo",Var "bar__'"]
46 | 
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/tokens_bytestring.x:
--------------------------------------------------------------------------------
 1 | {
 2 | {-# LANGUAGE OverloadedStrings #-}
 3 | module Main (main) where
 4 | import System.Exit
 5 | import Data.ByteString.Lazy.Char8 (unpack)
 6 | }
 7 | 
 8 | %wrapper "basic-bytestring"
 9 | %encoding "latin1"
10 | 
11 | $digit = 0-9			-- digits
12 | $alpha = [a-zA-Z]		-- alphabetic characters
13 | 
14 | tokens :-
15 | 
16 |   $white+				;
17 |   "--".*				;
18 |   let					{ \_ -> Let }
19 |   in					{ \_ -> In }
20 |   $digit+                               { \s -> Int (read (unpack s)) }
21 |   [\=\+\-\*\/\(\)]                      { \s -> Sym (head (unpack s)) }
22 |   $alpha [$alpha $digit \_ \']*         { \s -> Var (unpack s) }
23 | 
24 | {
25 | -- Each right-hand side has type :: ByteString -> Token
26 | 
27 | -- The token type:
28 | data Token =
29 | 	Let 		|
30 | 	In  		|
31 | 	Sym Char	|
32 | 	Var String	|
33 | 	Int Int		|
34 | 	Err
35 | 	deriving (Eq,Show)
36 | 
37 | main = if test1 /= result1 then exitFailure
38 |                            else exitWith ExitSuccess
39 | 
40 | test1 = alexScanTokens "  let in 012334\n=+*foo bar__'"
41 | result1 = [Let,In,Int 12334,Sym '=',Sym '+',Sym '*',Var "foo",Var "bar__'"]
42 | 
43 | }
44 | 


--------------------------------------------------------------------------------
/tests/tokens_bytestring_unicode.x:
--------------------------------------------------------------------------------
 1 | {
 2 | {-# LANGUAGE OverloadedStrings #-}
 3 | module Main (main) where
 4 | import System.Exit
 5 | import Data.ByteString.Lazy.Char8 (unpack)
 6 | }
 7 | 
 8 | %wrapper "basic-bytestring"
 9 | %encoding "utf-8"
10 | 
11 | $digit = 0-9      -- digits
12 | $alpha = [a-zA-Zαβ]    -- alphabetic characters
13 | 
14 | tokens :-
15 | 
16 |   $white+        ;
17 |   "--".*         ;
18 |   let            { \_ -> Let }
19 |   in             { \_ -> In }
20 |   $digit+                               { \s -> Int (read (unpack s)) }
21 |   [\=\+\-\*\/\(\)]                      { \s -> Sym (head (unpack s)) }
22 |   $alpha [$alpha $digit \_ \']*         { \s -> Var (unpack s) }
23 | 
24 | {
25 | -- Each right-hand side has type :: ByteString -> Token
26 | 
27 | -- The token type:
28 | data Token =
29 |   Let     |
30 |   In      |
31 |   Sym Char  |
32 |   Var String  |
33 |   Int Int    |
34 |   Err
35 |   deriving (Eq,Show)
36 | 
37 | main = if test1 /= result1 then exitFailure
38 |                            else exitWith ExitSuccess
39 | 
40 | -- \206\177\206\178\206\178 is "αββ" utf-8 encoded
41 | test1 = alexScanTokens "  let in 012334\n=+*foo \206\177\206\178\206\178 bar__'"
42 | result1 = [Let,In,Int 12334,Sym '=',Sym '+',Sym '*',Var "foo",Var "\206\177\206\178\206\178",Var "bar__'"]
43 | }
44 | 


--------------------------------------------------------------------------------
/tests/tokens_gscan.x:
--------------------------------------------------------------------------------
 1 | {
 2 | module Main (main) where
 3 | import System.Exit
 4 | }
 5 | 
 6 | %wrapper "gscan"
 7 | 
 8 | $digit = 0-9			-- digits
 9 | $alpha = [a-zA-Z]		-- alphabetic characters
10 | 
11 | tokens :-
12 | 
13 |   $white+				;
14 |   "--".*				;
15 |   let					{ tok (\p _ -> Let p) }
16 |   in					{ tok (\p _ -> In p) }
17 |   $digit+				{ tok (\p s -> Int p (read s)) }
18 |   [\=\+\-\*\/\(\)]			{ tok (\p s -> Sym p (head s)) }
19 |   $alpha [$alpha $digit \_ \']*		{ tok (\p s -> Var p s) }
20 | 
21 | {
22 | -- Some action helpers:
23 | tok f p _ str len cont (sc,state) = f p (take len str) : cont (sc,state)
24 | 
25 | -- The token type:
26 | data Token =
27 | 	Let AlexPosn		|
28 | 	In  AlexPosn		|
29 | 	Sym AlexPosn Char	|
30 | 	Var AlexPosn String	|
31 | 	Int AlexPosn Int	|
32 | 	Err AlexPosn
33 | 	deriving (Eq,Show)
34 | 
35 | main = if test1 /= result1 then exitFailure
36 | 			   else exitWith ExitSuccess
37 | 
38 | test1 = alexGScan stop undefined "  let in 012334\n=+*foo bar__'"
39 | 
40 | stop _ _ "" (_,_) = []
41 | stop _ _ _  (_,_) = error "lexical error"
42 | 
43 | result1 = [Let (AlexPn 2 1 3),In (AlexPn 6 1 7),Int (AlexPn 9 1 10) 12334,Sym (AlexPn 16 2 1) '=',Sym (AlexPn 17 2 2) '+',Sym (AlexPn 18 2 3) '*',Var (AlexPn 19 2 4) "foo",Var (AlexPn 23 2 8) "bar__'"]
44 | }
45 | 


--------------------------------------------------------------------------------
/tests/tokens_monadUserState_bytestring.x:
--------------------------------------------------------------------------------
 1 | {
 2 | {-# LANGUAGE OverloadedStrings #-}
 3 | module Main (main) where
 4 | import System.Exit
 5 | import qualified Data.ByteString.Lazy.Char8 as B
 6 | }
 7 | 
 8 | %wrapper "monadUserState-bytestring"
 9 | %encoding "iso-8859-1"
10 | 
11 | $digit = 0-9			-- digits
12 | $alpha = [a-zA-Z]		-- alphabetic characters
13 | 
14 | tokens :-
15 | 
16 |   $white+				;
17 |   "--".*				;
18 |   let					{ tok (\p _ -> Let p) }
19 |   in					{ tok (\p _ -> In p) }
20 |   $digit+                               { tok (\p s -> Int p (read (B.unpack s))) }
21 |   [\=\+\-\*\/\(\)]                      { tok (\p s -> Sym p (head (B.unpack s))) }
22 |   $alpha [$alpha $digit \_ \']*         { tok (\p s -> Var p (B.unpack s)) }
23 | 
24 | {
25 | -- Each right-hand side has type :: AlexPosn -> String -> Token
26 | 
27 | -- Some action helpers:
28 | tok f (p,_,input,_) len = return (f p (B.take (fromIntegral len) input))
29 | 
30 | -- The token type:
31 | data Token
32 |   = Let AlexPosn
33 |   | In  AlexPosn
34 |   | Sym AlexPosn Char
35 |   | Var AlexPosn String
36 |   | Int AlexPosn Int
37 |   | Err AlexPosn
38 |   | EOF
39 |   deriving (Eq,Show)
40 | 
41 | alexEOF = return EOF
42 | 
43 | main = if test1 /= result1 then do print test1; exitFailure
44 |                            else exitWith ExitSuccess
45 | 
46 | type AlexUserState = ()
47 | alexInitUserState = ()
48 | 
49 | scanner str = runAlex str $ do
50 |   let
51 |     loop = do
52 | 
53 |       -- Andreas Abel, 2023-12-30, issue #220:
54 |       -- Test that alex{G,S}etUserState are in scope.
55 |       () <- alexGetUserState
56 |       alexSetUserState ()
57 | 
58 |       tk <- alexMonadScan
59 |       if tk == EOF
60 |           then return [tk]
61 |           else do
62 |             toks <- loop
63 |             return (tk:toks)
64 |   loop
65 | 
66 | test1 = case scanner "  let in 012334\n=+*foo bar__'" of
67 |           Left err -> error err
68 |           Right toks -> toks
69 | 
70 | result1 = [Let (AlexPn 2 1 3),In (AlexPn 6 1 7),Int (AlexPn 9 1 10) 12334,Sym (AlexPn 16 2 1) '=',Sym (AlexPn 17 2 2) '+',Sym (AlexPn 18 2 3) '*',Var (AlexPn 19 2 4) "foo",Var (AlexPn 23 2 8) "bar__'", EOF]
71 | 
72 | 
73 | }
74 | 


--------------------------------------------------------------------------------
/tests/tokens_monadUserState_strict_text.x:
--------------------------------------------------------------------------------
 1 | {
 2 | {-# LANGUAGE OverloadedStrings #-}
 3 | module Main (main) where
 4 | import System.Exit
 5 | import qualified Data.Text
 6 | }
 7 | 
 8 | %wrapper "monadUserState-strict-text"
 9 | %encoding "iso-8859-1"
10 | 
11 | $digit = 0-9			-- digits
12 | $alpha = [a-zA-Z]		-- alphabetic characters
13 | 
14 | tokens :-
15 | 
16 |   $white+				;
17 |   "--".*				;
18 |   let					{ tok (\p _ -> Let p) }
19 |   in					{ tok (\p _ -> In p) }
20 |   $digit+                               { tok (\p s -> Int p (read (Data.Text.unpack s))) }
21 |   [\=\+\-\*\/\(\)]                      { tok (\p s -> Sym p (head (Data.Text.unpack s))) }
22 |   $alpha [$alpha $digit \_ \']*         { tok (\p s -> Var p (Data.Text.unpack s)) }
23 | 
24 | {
25 | -- Each right-hand side has type :: AlexPosn -> Data.Text.Text -> Token
26 | 
27 | -- Some action helpers:
28 | tok f (p,_,_,input) len = return (f p (Data.Text.take (fromIntegral len) input))
29 | 
30 | -- The token type:
31 | data Token =
32 | 	Let AlexPosn		|
33 | 	In  AlexPosn		|
34 | 	Sym AlexPosn Char	|
35 |     Var AlexPosn String     |
36 | 	Int AlexPosn Int	|
37 |         Err AlexPosn            |
38 |         EOF
39 |         deriving (Eq,Show)
40 | 
41 | alexEOF = return EOF
42 | 
43 | main = if test1 /= result1 then do print test1; exitFailure
44 | 			   else exitWith ExitSuccess
45 | 
46 | type AlexUserState = ()
47 | alexInitUserState = ()
48 | 
49 | scanner str = runAlex str $ do
50 |   let loop = do tk <- alexMonadScan
51 |                 if tk == EOF
52 |                         then return [tk]
53 | 			else do toks <- loop
54 |                                 return (tk:toks)
55 |   loop
56 | 
57 | test1 = case scanner "  let in 012334\n=+*foo bar__'" of
58 |           Left err -> error err
59 |           Right toks -> toks
60 | 
61 | result1 = [Let (AlexPn 2 1 3),In (AlexPn 6 1 7),Int (AlexPn 9 1 10) 12334,Sym (AlexPn 16 2 1) '=',Sym (AlexPn 17 2 2) '+',Sym (AlexPn 18 2 3) '*',Var (AlexPn 19 2 4) "foo",Var (AlexPn 23 2 8) "bar__'", EOF]
62 | 
63 | 
64 | }
65 | 


--------------------------------------------------------------------------------
/tests/tokens_monad_bytestring.x:
--------------------------------------------------------------------------------
 1 | {
 2 | {-# LANGUAGE OverloadedStrings #-}
 3 | module Main (main) where
 4 | import System.Exit
 5 | import qualified Data.ByteString.Lazy.Char8 as B
 6 | }
 7 | 
 8 | %wrapper "monad-bytestring"
 9 | %encoding "Latin1"
10 | 
11 | $digit = 0-9			-- digits
12 | $alpha = [a-zA-Z]		-- alphabetic characters
13 | 
14 | tokens :-
15 | 
16 |   $white+				;
17 |   "--".*				;
18 |   let					{ tok (\p _ -> Let p) }
19 |   in					{ tok (\p _ -> In p) }
20 |   $digit+                               { tok (\p s -> Int p (read (B.unpack s))) }
21 |   [\=\+\-\*\/\(\)]                      { tok (\p s -> Sym p (head (B.unpack s))) }
22 |   $alpha [$alpha $digit \_ \']*         { tok (\p s -> Var p (B.unpack s)) }
23 | 
24 | {
25 | -- Each right-hand side has type :: AlexPosn -> String -> Token
26 | 
27 | -- Some action helpers:
28 | tok f (p,_,input,_) len = return (f p (B.take (fromIntegral len) input))
29 | 
30 | -- The token type:
31 | data Token =
32 | 	Let AlexPosn		|
33 | 	In  AlexPosn		|
34 | 	Sym AlexPosn Char	|
35 |         Var AlexPosn String     |
36 | 	Int AlexPosn Int	|
37 |         Err AlexPosn            |
38 |         EOF
39 |         deriving (Eq,Ord,Show)
40 |   -- Adding 'Ord' is possible since 3.3.0 (issue #233)
41 |   -- where an 'Ord' instance for 'AlexPosn' is generated by default.
42 | 
43 | alexEOF = return EOF
44 | 
45 | main = if test1 /= result1 then do print test1; exitFailure
46 | 			   else exitWith ExitSuccess
47 | 
48 | scanner str = runAlex str $ do
49 |   let loop = do tk <- alexMonadScan
50 |                 if tk == EOF
51 |                         then return [tk]
52 | 			else do toks <- loop
53 |                                 return (tk:toks)
54 |   loop
55 | 
56 | test1 = case scanner "  let in 012334\n=+*foo bar__'" of
57 |           Left err -> error err
58 |           Right toks -> toks
59 | 
60 | result1 = [Let (AlexPn 2 1 3),In (AlexPn 6 1 7),Int (AlexPn 9 1 10) 12334,Sym (AlexPn 16 2 1) '=',Sym (AlexPn 17 2 2) '+',Sym (AlexPn 18 2 3) '*',Var (AlexPn 19 2 4) "foo",Var (AlexPn 23 2 8) "bar__'", EOF]
61 | 
62 | 
63 | }
64 | 


--------------------------------------------------------------------------------
/tests/tokens_posn.x:
--------------------------------------------------------------------------------
 1 | {
 2 | module Main (main) where
 3 | import System.Exit
 4 | }
 5 | 
 6 | %wrapper "posn"
 7 | 
 8 | $digit = 0-9			-- digits
 9 | $alpha = [a-zA-Z]		-- alphabetic characters
10 | 
11 | tokens :-
12 | 
13 |   $white+				;
14 |   "--".*				;
15 |   let					{ tok (\p _ -> Let p) }
16 |   in					{ tok (\p _ -> In p) }
17 |   $digit+				{ tok (\p s -> Int p (read s)) }
18 |   [\=\+\-\*\/\(\)]			{ tok (\p s -> Sym p (head s)) }
19 |   $alpha [$alpha $digit \_ \']*		{ tok (\p s -> Var p s) }
20 | 
21 | {
22 | -- Each right-hand side has type :: AlexPosn -> String -> Token
23 | 
24 | -- Some action helpers:
25 | tok f p s = f p s
26 | 
27 | -- The token type:
28 | data Token =
29 | 	Let AlexPosn		|
30 | 	In  AlexPosn		|
31 | 	Sym AlexPosn Char	|
32 | 	Var AlexPosn String	|
33 | 	Int AlexPosn Int	|
34 | 	Err AlexPosn
35 | 	deriving (Eq,Show)
36 | 
37 | main = if test1 /= result1 then exitFailure
38 | 			   else exitWith ExitSuccess
39 | 
40 | test1 = alexScanTokens "  let in 012334\n=+*foo bar__'"
41 | result1 = [Let (AlexPn 2 1 3),In (AlexPn 6 1 7),Int (AlexPn 9 1 10) 12334,Sym (AlexPn 16 2 1) '=',Sym (AlexPn 17 2 2) '+',Sym (AlexPn 18 2 3) '*',Var (AlexPn 19 2 4) "foo",Var (AlexPn 23 2 8) "bar__'"]
42 | 
43 | 
44 | }
45 | 


--------------------------------------------------------------------------------
/tests/tokens_posn_bytestring.x:
--------------------------------------------------------------------------------
 1 | {
 2 | {-# LANGUAGE OverloadedStrings #-}
 3 | module Main (main) where
 4 | import System.Exit
 5 | import Data.ByteString.Lazy.Char8 (unpack)
 6 | }
 7 | 
 8 | %wrapper "posn-bytestring"
 9 | %encoding "UTF8"
10 | 
11 | $digit = 0-9			-- digits
12 | $alpha = [a-zA-Z]		-- alphabetic characters
13 | 
14 | tokens :-
15 | 
16 |   $white+				;
17 |   "--".*				;
18 |   let					{ tok (\p _ -> Let p) }
19 |   in					{ tok (\p _ -> In p) }
20 |   $digit+                               { tok (\p s -> Int p (read (unpack s))) }
21 |   [\=\+\-\*\/\(\)]                      { tok (\p s -> Sym p (head (unpack s))) }
22 |   $alpha [$alpha $digit \_ \']*         { tok (\p s -> Var p (unpack s)) }
23 | 
24 | {
25 | -- Each right-hand side has type :: AlexPosn -> String -> Token
26 | 
27 | -- Some action helpers:
28 | tok f p s = f p s
29 | 
30 | -- The token type:
31 | data Token =
32 | 	Let AlexPosn		|
33 | 	In  AlexPosn		|
34 | 	Sym AlexPosn Char	|
35 |         Var AlexPosn String     |
36 | 	Int AlexPosn Int	|
37 | 	Err AlexPosn
38 | 	deriving (Eq,Show)
39 | 
40 | main = if test1 /= result1 then exitFailure
41 | 			   else exitWith ExitSuccess
42 | 
43 | test1 = alexScanTokens "  let in 012334\n=+*foo bar__'"
44 | result1 = [Let (AlexPn 2 1 3),In (AlexPn 6 1 7),Int (AlexPn 9 1 10) 12334,Sym (AlexPn 16 2 1) '=',Sym (AlexPn 17 2 2) '+',Sym (AlexPn 18 2 3) '*',Var (AlexPn 19 2 4) "foo",Var (AlexPn 23 2 8) "bar__'"]
45 | 
46 | 
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/tokens_scan_user.x:
--------------------------------------------------------------------------------
 1 | {
 2 | module Main (main) where
 3 | import System.Exit
 4 | }
 5 | 
 6 | %wrapper "basic" -- Defines: AlexInput, alexGetByte, alexPrevChar
 7 | 
 8 | $digit = 0-9
 9 | $alpha = [a-zA-Z]
10 | $ws    = [\ \t\n]
11 | 
12 | tokens :-
13 | 
14 |   5 / {\ u _ibt _l _iat -> u == FiveIsMagic} { \s -> TFive (head s) }
15 |   $digit { \s -> TDigit (head s) }
16 |   $alpha { \s -> TAlpha (head s) }
17 |   $ws    { \s -> TWSpace (head s) }
18 | 
19 | {
20 | 
21 | data Token = TDigit Char
22 |            | TAlpha Char
23 |            | TWSpace Char
24 |            | TFive Char -- Predicated only
25 |            | TLexError
26 |     deriving (Eq,Show)
27 | 
28 | data UserLexerMode = NormalMode
29 |                    | FiveIsMagic
30 |     deriving Eq
31 | 
32 | main | test1 /= result1 = exitFailure
33 |      | test2 /= result2 = exitFailure
34 |      -- all succeeded
35 |      | otherwise        = exitWith ExitSuccess
36 | 
37 | run_lexer :: UserLexerMode -> String -> [Token]
38 | run_lexer m s = go ('\n', [], s)
39 |     where go i@(_,_,s') = case alexScanUser m i 0 of
40 |                      AlexEOF             -> []
41 |                      AlexError  _i       -> [TLexError]
42 |                      AlexSkip   i' _len  ->                   go i'
43 |                      AlexToken  i' len t -> t (take len s') : go i'
44 | 
45 | test1 = run_lexer FiveIsMagic "5 x"
46 | result1 = [TFive '5',TWSpace ' ',TAlpha 'x']
47 | 
48 | test2 = run_lexer NormalMode "5 x"
49 | result2 = [TDigit '5',TWSpace ' ',TAlpha 'x']
50 | }
51 | 


--------------------------------------------------------------------------------
/tests/tokens_strict_bytestring.x:
--------------------------------------------------------------------------------
 1 | {
 2 | {-# LANGUAGE OverloadedStrings #-}
 3 | module Main (main) where
 4 | import System.Exit
 5 | import Data.ByteString.Char8 (unpack)
 6 | }
 7 | 
 8 | %wrapper "strict-bytestring"
 9 | %encoding "ISO-8859-1"
10 | 
11 | $digit = 0-9			-- digits
12 | $alpha = [a-zA-Z]		-- alphabetic characters
13 | 
14 | tokens :-
15 | 
16 |   $white+				;
17 |   "--".*				;
18 |   let					{ \_ -> Let }
19 |   in					{ \_ -> In }
20 |   $digit+                               { \s -> Int (read (unpack s)) }
21 |   [\=\+\-\*\/\(\)]                      { \s -> Sym (head (unpack s)) }
22 |   $alpha [$alpha $digit \_ \']*         { \s -> Var (unpack s) }
23 | 
24 | {
25 | -- Each right-hand side has type :: ByteString -> Token
26 | 
27 | -- The token type:
28 | data Token =
29 | 	Let 		|
30 | 	In  		|
31 | 	Sym Char	|
32 | 	Var String	|
33 | 	Int Int		|
34 | 	Err
35 | 	deriving (Eq,Show)
36 | 
37 | main = if test1 /= result1 then exitFailure
38 |                            else exitWith ExitSuccess
39 | 
40 | test1 = alexScanTokens "  let in 012334\n=+*foo bar__'"
41 | result1 = [Let,In,Int 12334,Sym '=',Sym '+',Sym '*',Var "foo",Var "bar__'"]
42 | 
43 | }
44 | 


--------------------------------------------------------------------------------
/tests/unicode.x:
--------------------------------------------------------------------------------
 1 | {
 2 | -- Tests the basic operation.
 3 | module Main where
 4 | 
 5 | import Data.Char (toUpper)
 6 | import Control.Monad
 7 | import System.Exit
 8 | import System.IO
 9 | }
10 | 
11 | %wrapper "monad"
12 | 
13 | @word = [A-Za-z]+
14 | 
15 | tokens :-
16 | 
17 | <0> {
18 |    "αω"        { string }
19 |    [AΓ]        { character }
20 |    .           { other }
21 | }
22 | 
23 | 
24 | {
25 | string :: AlexInput -> Int -> Alex String
26 | string (_,_,_,_) _ = return "string!"
27 | 
28 | other :: AlexInput -> Int -> Alex String
29 | other (_,_,_,input) len = return (take len input)
30 | 
31 | character :: AlexInput -> Int -> Alex String
32 | character (_,_,_,_) _ = return "PING!"
33 | 
34 | alexEOF :: Alex String
35 | alexEOF = return "stopped."
36 | 
37 | scanner :: String -> Either String [String]
38 | scanner str = runAlex str $ do
39 |   let loop = do tok <- alexMonadScan
40 | 		if tok == "stopped." || tok == "error."
41 | 			then return [tok]
42 | 			else do toks <- loop
43 | 				return (tok:toks)
44 |   loop
45 | 
46 | main :: IO ()
47 | main = do
48 |   let test1 = scanner str1
49 |   when (test1 /= out1) $
50 | 	do hPutStrLn stderr "Test 1 failed:"
51 | 	   print test1
52 | 	   exitFailure
53 | 
54 |   let test2 = scanner str2
55 |   when (test2 /= out2) $
56 | 	do hPutStrLn stderr "Test 2 failed:"
57 | 	   print test2
58 | 	   exitFailure
59 | 
60 |   let test3 = scanner str3
61 |   when (test3 /= out3) $
62 | 	do hPutStrLn stderr "Test 3 failed:"
63 | 	   print test3
64 | 	   exitFailure
65 | 
66 |   let test4 = scanner str4
67 |   when (test4 /= out4) $
68 | 	do hPutStrLn stderr "Test 4 failed:"
69 | 	   print test4
70 | 	   exitFailure
71 | 
72 | 
73 | 
74 | str1 = "A."
75 | out1 = Right ["PING!",".","stopped."]
76 | 
77 | str2 = "\n"
78 | out2 = Left "lexical error at line 1, column 1"
79 | 
80 | 
81 | str3 = "αω --"
82 | out3 = Right ["string!"," ","-","-","stopped."]
83 | 
84 | str4 = "βΓ"
85 | out4 = Right ["β","PING!","stopped."]
86 | 
87 | }
88 | 


--------------------------------------------------------------------------------