├── .gitignore
├── ChangeLog.md
├── LICENSE
├── README.md
├── Setup.hs
├── bin
    ├── make-pkg
    └── test-all
├── docs
    ├── IntPair_submachine.png
    ├── MyChar_ATN.png
    ├── PExpression_ATN.png
    ├── PTuple_1.png
    ├── PTuple_2.png
    ├── PTuple_3.png
    ├── PTuple_4.png
    └── pads_to_atn_conversion.md
├── lang_demo.hs
├── package.yaml
├── src
    ├── Data
    │   └── Set
    │   │   └── Monad.hs
    ├── Language
    │   ├── ANTLR4.hs
    │   └── ANTLR4
    │   │   ├── Boot
    │   │       ├── Parser.hs.boot
    │   │       ├── Quote.hs
    │   │       ├── SplicedParser.hs
    │   │       └── Syntax.hs
    │   │   ├── FileOpener.hs
    │   │   ├── G4.hs
    │   │   ├── Parser.hs
    │   │   ├── Parser.hs.bak
    │   │   ├── Regex.hs.boot
    │   │   └── Syntax.hs
    └── Text
    │   └── ANTLR
    │       ├── Allstar.hs
    │       ├── Allstar
    │           ├── ATN.hs
    │           ├── ParserGenerator.hs
    │           └── Stacks.hs
    │       ├── Common.hs
    │       ├── Grammar.hs
    │       ├── LL1.hs
    │       ├── LR.hs
    │       ├── Language.hs
    │       ├── Lex.hs
    │       ├── Lex
    │           ├── Automata.hs
    │           ├── DFA.hs
    │           ├── NFA.hs
    │           ├── Regex.hs
    │           └── Tokenizer.hs
    │       ├── MultiMap.hs
    │       ├── Parser.hs
    │       ├── Pretty.hs
    │       └── Set.hs
├── stack.yaml
└── test
    ├── GrammarReader
        ├── GrammarReader.hs
        ├── output
        │   └── generated_atn_lookup.txt
        └── sample_grammars
        │   └── sample_bnf_grammar.txt
    ├── allstar
        ├── AllStarTests.hs
        ├── ConvertDFA.hs
        ├── ConvertP.hs
        ├── Main.hs
        └── README.md
    ├── atn
        ├── ATN.hs
        └── Main.hs
    ├── c
        ├── C.g4
        ├── CParser.hs
        └── Main.hs
    ├── chisel
        ├── Language
        │   └── Chisel
        │   │   ├── Grammar.hs
        │   │   ├── Parser.hs
        │   │   └── Syntax.hs
        └── Main.hs
    ├── coreg4
        ├── G4.hs
        ├── G4Fast.hs
        ├── G4Parser.hs
        ├── Hello.hs
        ├── HelloParser.hs
        └── Main.hs
    ├── g4
        ├── DoubleSemi.hs
        ├── DoubleSemiP.hs
        ├── Empty.hs
        ├── EmptyP.hs
        ├── Main.hs
        ├── Optional.hs
        └── OptionalParser.hs
    ├── lexer
        └── Main.hs
    ├── ll
        └── Main.hs
    ├── lr
        ├── EOF.hs
        ├── EOFGrammar.hs
        ├── GLRInc.hs
        ├── GLRIncGrammar.hs
        ├── GLRPartial.hs
        ├── GLRPartialGrammar.hs
        └── Main.hs
    ├── sexpression
        ├── Grammar.hs
        ├── Parser.hs
        └── sexpression.hs
    ├── shared-hunit
        └── Text
        │   └── ANTLR
        │       └── HUnit.hs
    ├── shared
        └── Example
        │   └── Grammar.hs
    ├── simple
        ├── Grammar.hs
        └── Main.hs
    ├── swift
        ├── Grammar.hs
        ├── Parser.hs
        ├── Swift.g4
        └── swift.hs
    ├── template
        └── Main.hs
    ├── unit
        ├── Main.hs
        └── PlusBug0.hs
    └── unit0
        ├── DupTerms.hs
        ├── DupTermsGrammar.hs
        ├── Main.hs
        ├── Star0.hs
        ├── Star0Grammar.hs
        ├── Star1.hs
        └── Star1Grammar.hs


/.gitignore:
--------------------------------------------------------------------------------
 1 | /*.local
 2 | /stack.yaml.lock
 3 | cabal.config
 4 | antlr-haskell.cabal
 5 | /books
 6 | .stack-work/
 7 | dist
 8 | /dist-*
 9 | cabal-dev
10 | *.o
11 | *.hi
12 | *.chi
13 | *.chs.h
14 | *.dyn_o
15 | *.dyn_hi
16 | .virtualenv
17 | .hpc
18 | .hsenv
19 | .cabal-sandbox/
20 | cabal.sandbox.config
21 | *.prof
22 | *.aux
23 | *.hp
24 | *~
25 | 


--------------------------------------------------------------------------------
/ChangeLog.md:
--------------------------------------------------------------------------------
1 | # Change Log
2 | 
3 | * April 14, 2018: Moved to hpack's package.yaml format instead of native cabal
4 |   file.
5 | 
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017-2018, Karl Cronburg & Sam Lasser
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 | 
11 |     * Redistributions in binary form must reproduce the above
12 |       copyright notice, this list of conditions and the following
13 |       disclaimer in the documentation and/or other materials provided
14 |       with the distribution.
15 | 
16 |     * Neither the name of Karl Cronburg, Sam Lasser, nor the names of other
17 |       contributors may be used to endorse or promote products derived
18 |       from this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # antlr-haskell
 2 | A Haskell implementation of ANTLR.
 3 | 
 4 | In implementing ANTLR we referenced the behavior of the original Java version
 5 | (ANTLR4):
 6 | [The definitive ANTLR4 Reference.](https://pragprog.com/book/tpantlr2/the-definitive-antlr-4-reference)
 7 | However we have taken much liberty in the design of this library compared to the
 8 | workflow of the original Java version. In particular in implementing ANTLR for
 9 | Haskell we have followed the following principles:
10 | 
11 | - Parsing backends should be interchangeable
12 |   - GLR, LR, SLR, LL, ALL(\*)
13 | - Code should be first class and declarative
14 |   - The implementation of G4 is metacircular
15 |   - Regular expressions are interpreted
16 | - Implement algorithms from first principles
17 |   - Set notation is used in implementing LL and LR algorithms.
18 |   - Pure functional implementations of parsing algorithms can eventually support
19 |     embedding of arbitrary (including IO) actions without breaking the predictive
20 |     parsing abstraction.
21 | 
22 | More info can be found here:
23 | [https://www.cronburg.com/2018/antlr-haskell-project/](https://www.cronburg.com/2018/antlr-haskell-project/)
24 | 
25 | ## Build instructions
26 | 
27 | The library can be built with:
28 | 
29 | ```bash
30 | $ stack build # stack version 2.3.3
31 | $ stack test :simple
32 | ```
33 | 
34 | Or with cabal (tested on 3.0.0.0) like so:
35 | 
36 | ```bash
37 | $ hpack
38 | $ cabal update
39 | $ cabal configure
40 | $ cabal new-build
41 | $ cabal test sexpression
42 | ...
43 | Test suite sexpression: RUNNING...
44 | Test suite sexpression: PASS
45 | Test suite logged to:
46 | /antlr-haskell/dist-newstyle/build/x86_64-linux/ghc-8.6.5/antlr-haskell-0.1.0.1/t/sexpression/test/antlr-haskell-0.1.0.1-sexpression.log
47 | 1 of 1 test suites (1 of 1 test cases) passed.
48 | ```
49 | 
50 | Here's a good one to run when making changes to the library, and you're unsure
51 | of what may become affected by those changes:
52 | 
53 | ```bash
54 | stack test :simple :atn :ll :lr :sexpression :allstar :c
55 | ```
56 | 
57 | And then compare the results with that of this upstream branch. Some of the
58 | GLR features (incremental and partial tokenization, notably) are still experimental,
59 | and so there are known test cases which currently fail.
60 | 
61 | ## Version History
62 | 
63 | - September 25th, 2020. Released version 0.1.0.1: bug fixes, documentation, and
64 |   library versioning updates.
65 | 
66 | 


--------------------------------------------------------------------------------
/Setup.hs:
--------------------------------------------------------------------------------
1 | import Distribution.Simple
2 | main = defaultMain
3 | 


--------------------------------------------------------------------------------
/bin/make-pkg:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPTDIR="$(cd "`dirname $0`"; pwd)"
 3 | GITDIR="$(dirname "$SCRIPTDIR")"
 4 | 
 5 | # =============================================================================
 6 | # MODIFY THESE PARAMETERS EACH TIME YOU PUBLISH A NEW VERSION:
 7 | arch=x86_64-linux-tinfo6
 8 | cVersion=3.0.1.0
 9 | pVersion=0.1.0.1
10 | # =============================================================================
11 | 
12 | gzDir=$GITDIR/.stack-work/dist/$arch/Cabal-$cVersion
13 | gzFile=antlr-haskell-$pVersion.tar.gz
14 | 
15 | echo "Changing to directory $GITDIR"
16 | cd "$GITDIR"
17 | 
18 | # Generates .stack-work/dist/[arch]/Cabal-[version]/antlr-haskell-[version].tar.gz :
19 | echo "Generating $gzFile"
20 | stack sdist --pvp-bounds both
21 | 
22 | # Generates .stack-work/dist/[arch]/Cabal-[version]/doc/html/antlr-haskell/* :
23 | echo "Generating haddock documentation"
24 | stack haddock --haddock-hyperlink-source # --no-haddock-deps
25 | 
26 | # Copy source gzipped tar file to dist:
27 | mkdir -p dist
28 | cp $gzDir/$gzFile dist/
29 | 
30 | # Copy over the haddock documentation:
31 | destDir=dist/antlr-haskell-$pVersion/antlr-haskell-$pVersion-docs
32 | mkdir -p $destDir
33 | cp -r $gzDir/doc/html/antlr-haskell $destDir
34 | 
35 | # Generate tar file to upload to hackage:
36 | echo "Making haddock tar file for *manual* upload to https://hackage.haskell.org/package/antlr-haskell-$pVersion/maintain/docs"
37 | tarFile=antlr-haskell-$pVersion-docs.tar
38 | tar --format=ustar -cvf dist/$tarFile $destDir | grep index.html
39 | 
40 | 


--------------------------------------------------------------------------------
/bin/test-all:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #  && stack test antlr-haskell:Stacks \
 4 | 
 5 | OPTS=--ghc-options="-ddump-splices"
 6 | 
 7 | #TESTS="simple template atn coreg4 ll lr lexer g4 sexpression simple allstar chisel"
 8 | TESTS=$(cd test
 9 |         for f in *; do
10 |           if ! [[ "$f" = "shared"* ]]; then 
11 |             echo -n "$f "
12 |           fi
13 |         done)
14 | failures=""
15 | echo "Running tests: $TESTS"
16 | 
17 | for t in $TESTS; do
18 |   stack test antlr-haskell:$t $OPTS
19 |   if [ $? -eq 1 ]; then
20 |     count=$((count + 1))
21 |     failures="$t $failures"
22 |   fi
23 | done
24 | echo "Total number of failures: $count"
25 | echo "Failed on: " $failures
26 | 
27 | 


--------------------------------------------------------------------------------
/docs/IntPair_submachine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cronburg/antlr-haskell/7a9367038eaa58f9764f2ff694269245fbebc155/docs/IntPair_submachine.png


--------------------------------------------------------------------------------
/docs/MyChar_ATN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cronburg/antlr-haskell/7a9367038eaa58f9764f2ff694269245fbebc155/docs/MyChar_ATN.png


--------------------------------------------------------------------------------
/docs/PExpression_ATN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cronburg/antlr-haskell/7a9367038eaa58f9764f2ff694269245fbebc155/docs/PExpression_ATN.png


--------------------------------------------------------------------------------
/docs/PTuple_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cronburg/antlr-haskell/7a9367038eaa58f9764f2ff694269245fbebc155/docs/PTuple_1.png


--------------------------------------------------------------------------------
/docs/PTuple_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cronburg/antlr-haskell/7a9367038eaa58f9764f2ff694269245fbebc155/docs/PTuple_2.png


--------------------------------------------------------------------------------
/docs/PTuple_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cronburg/antlr-haskell/7a9367038eaa58f9764f2ff694269245fbebc155/docs/PTuple_3.png


--------------------------------------------------------------------------------
/docs/PTuple_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cronburg/antlr-haskell/7a9367038eaa58f9764f2ff694269245fbebc155/docs/PTuple_4.png


--------------------------------------------------------------------------------
/lang_demo.hs:
--------------------------------------------------------------------------------
 1 | {-#LANGUAGE TypeFamilies #-}
 2 | module LangDemo where
 3 | 
 4 | -- If you never change the base value and are only adding expressions
 5 | data BaseVal  = IntV Int
 6 | 
 7 | data BaseExp  = ValE BaseVal
 8 |               | AddE BaseExp BaseExp
 9 | 
10 | data ExtExp   = BaseE BaseExp
11 |               | SubE ExtExp ExtExp
12 | 
13 | 
14 | 
15 | class Lang lang where
16 |   eval :: lang -> BaseVal
17 | 
18 | instance Lang BaseExp where
19 |   eval (ValE i) = i
20 |   eval (AddE be1 be2) =
21 |     let IntV i1 = eval be1
22 |         IntV i2 = eval be2
23 |     in  IntV $ i1 + i2
24 | 
25 | instance Lang ExtExp where
26 |   eval (BaseE b) = eval b
27 |   eval (SubE ee1 ee2) =
28 |     let IntV i1 = eval ee1
29 |         IntV i2 = eval ee2
30 |     in  IntV $ i1 - i2
31 | 
32 | -- if you want to be able to change the set of values you can evaluate to when extending the language
33 | 
34 | data BaseVal2 = IntV2 Int
35 | 
36 | data BaseExp2 = ValBE2 BaseVal2
37 |               | AddE2 BaseExp2 BaseExp2
38 | 
39 | data ExtVal2  = StringV2 String
40 | 
41 | data ExtExp2  = BaseE2 BaseExp2
42 |               | ValEE2 ExtVal2
43 |               | ConcatE2 ExtExp2 ExtExp2
44 | 
45 | -- can use type families to suspend the value kind until later.
46 | data family Val2
47 | 
48 | class Lang2 lang where
49 |   eval :: lang -> Val2
50 | 
51 | -- define at some time t1 for the base language
52 | data instance Val2 = BV2 BaseVal2
53 | 
54 | instance Lang2 BaseExp2 where
55 |   eval ValBE2 bv2 = BV2 bv2
56 |   eval AddE2 be1 be2 =
57 |     let IntV2 i1 = eval be1
58 |         IntV2 i2 = eval be2
59 |     in  BV2 $ IntV2 $ i1 + i2
60 | 
61 | 
62 | -- extend at some later time t2 for the extended language
63 | data instance Val2 = EV2 ExtVal2
64 | 
65 | instance Lang2 ExtExp2 where
66 |   eval BaseE2 be2 = eval be2
67 |   eval ValEE2 ev2 = ev2
68 |   eval ConcatE2 ee1 ee2 =
69 |     case (eval ee1, eval ee2) of
70 |     (StringV2 s1, StringV2 s2) -> EV2 $ StringV2 $ s1 ++ s2
71 |     _                          -> error "trying to concat something other than strings!"
72 |     
73 | 


--------------------------------------------------------------------------------
/src/Language/ANTLR4.hs:
--------------------------------------------------------------------------------
 1 | {-|
 2 |   Module      : Language.ANTLR4
 3 |   Description : Primary entrypoint for top-level antlr-haskell users
 4 |   Copyright   : (c) Karl Cronburg, 2018
 5 |   License     : BSD3
 6 |   Maintainer  : karl@cs.tufts.edu
 7 |   Stability   : experimental
 8 |   Portability : POSIX
 9 | -}
10 | module Language.ANTLR4 (
11 |   -- * Functions
12 |   -- | Compile-time support for expanding LR-specific data types:
13 |     mkLRParser
14 |   -- | Other basic functions used in generated code:
15 |   , (&&&)
16 |   -- * Module exports
17 |   -- | Most importantly for the Grammar type so that the quasiquoter can generate
18 |   -- new grammar itself:
19 |   , module Text.ANTLR.Grammar
20 |   -- | Supporting data types and instances so that the spliced AST translator
21 |   -- functions can talk about parse events, tokens, and EOF:
22 |   , module Text.ANTLR.Parser
23 |   -- | Regular expressions used during tokenization, as opposed to
24 |   -- 'Language.ANTLR4.Regex' which are regexes used for G4 parsing:
25 |   , module Text.ANTLR.Lex.Regex
26 |   -- | The G4 quasiquoter and accompanying grammar:
27 |   , g4, g4_parsers
28 |   -- | For defining pretty-printable instances of quasiquoter-generated data types:
29 |   , module Text.ANTLR.Pretty
30 |   -- | Tokenizer:
31 |   , module T
32 |   -- * Type exports
33 |   -- | Typeclass instances for quasiquoter-generated data types:
34 |   , Hashable(..), Generic(..), Data(..), Lift(..)
35 |   -- | Parser interface data types:
36 |   , S.Set(..), T.Token(..), LRResult(..)
37 |   -- | Grammar interface data types:
38 |   , Directive(..), PRHS(..), TermAnnot(..)
39 |   )
40 | where
41 | 
42 | import Text.ANTLR.Grammar
43 | import Text.ANTLR.Parser
44 | 
45 | import Text.ANTLR.LR as LR
46 | import Text.ANTLR.Lex.Tokenizer as T
47 | import Text.ANTLR.Set as S
48 | 
49 | import Text.ANTLR.Set (Hashable(..), Generic(..))
50 | import Text.ANTLR.Pretty
51 | import Control.Arrow ( (&&&) )
52 | import Text.ANTLR.Lex.Regex
53 | 
54 | import Language.ANTLR4.G4
55 | import Language.ANTLR4.Parser
56 | import Language.ANTLR4.Boot.Quote (mkLRParser, g4_parsers)
57 | 
58 | import Data.Data (Data(..))
59 | import Language.Haskell.TH.Lift (Lift(..))
60 | 
61 | import Language.ANTLR4.Boot.Syntax
62 |   (Directive(..), PRHS(..), TermAnnot(..), ProdElem(..))
63 | 
64 | 


--------------------------------------------------------------------------------
/src/Language/ANTLR4/Boot/Parser.hs.boot:
--------------------------------------------------------------------------------
  1 | {-#LANGUAGE QuasiQuotes, TemplateHaskell#-}
  2 | {-|
  3 |   Module      : Language.ANTLR4.Boot.Parser
  4 |   Description : ANTLR4 boot parser written with parsec
  5 |   Copyright   : (c) Karl Cronburg, 2018
  6 |   License     : BSD3
  7 |   Maintainer  : karl@cs.tufts.edu
  8 |   Stability   : experimental
  9 |   Portability : POSIX
 10 | -}
 11 | module Language.ANTLR4.Boot.Parser where
 12 | -- syntax (Exp)
 13 | import Language.Haskell.TH
 14 | import Language.Haskell.TH.Syntax
 15 | import qualified Debug.Trace as D (trace, traceM)
 16 | 
 17 | import qualified Language.Haskell.Meta as LHM
 18 | 
 19 | -- monadic ops
 20 | import Control.Monad (mapM)
 21 | -- parsec
 22 | import Text.ParserCombinators.Parsec
 23 | import qualified Text.Parsec.String     as PS
 24 | import qualified Text.Parsec.Prim       as PP
 25 | import qualified Text.Parsec.Token      as PT
 26 | import qualified Text.Parsec.Expr       as PE
 27 | import qualified Text.Parsec.Combinator as PC
 28 | import Text.ParserCombinators.Parsec.Language (haskellStyle, reservedOpNames, reservedNames
 29 |   , commentLine, commentStart, commentEnd)
 30 | import Text.ParserCombinators.Parsec.Pos      (newPos)
 31 | -- text munging
 32 | import Data.Char
 33 | 
 34 | import Language.ANTLR4.Boot.Syntax
 35 | import Language.ANTLR4.Regex (parseRegex, regexP)
 36 | 
 37 | --traceM s = D.traceM ("[ANTLR4.Boot.Parser] " ++ s)
 38 | traceM = return
 39 | 
 40 | ------------------------------------------------------------------------------
 41 | -- Or-Try Combinator (tries two parsers, one after the other)
 42 | (<||>) a b = try a <|> try b
 43 | 
 44 | parseANTLR :: SourceName -> Line -> Column -> String -> Either ParseError [G4]
 45 | parseANTLR fileName line column input =
 46 |   PP.parse result fileName input
 47 |   where
 48 | 
 49 |     result = do
 50 |       setPosition (newPos fileName line column)
 51 |       whiteSpace
 52 |       x <- gExps
 53 |       traceM $ show x
 54 |       eof <|> errorParse
 55 |       return x
 56 | 
 57 |     errorParse = do
 58 |       rest <- manyTill anyToken eof
 59 |       unexpected $ '"' : rest ++ "\""
 60 | 
 61 | gExps :: PS.Parser [G4]
 62 | gExps = concat <$> many1 gExp
 63 | 
 64 | gExp :: PS.Parser [G4]
 65 | gExp = do
 66 |   traceM "gExp"
 67 |   whiteSpace
 68 |   xs <- grammarP <||> lexerP <||> prodP
 69 |   traceM $ show xs
 70 |   return xs
 71 | 
 72 | grammarP :: PS.Parser [G4]
 73 | grammarP = do
 74 |   reserved "grammar"
 75 |   h <- upper
 76 |   t <- manyTill anyToken (reservedOp ";")
 77 |   traceM $ show $ Grammar (h : t)
 78 |   return [Grammar (h : t)]
 79 | 
 80 | -- | Assumptions:
 81 | --
 82 | -- * Directives must be on a single line.
 83 | -- *
 84 | prodP :: PS.Parser [G4]
 85 | prodP = do
 86 |   h <- lower
 87 |   t <- manyTill anyChar (reservedOp ":")
 88 |   traceM $ "[prodP] " ++ trim (h : t)
 89 |   rhsList <- sepBy1 rhsP (traceM "rhsList..." >> reservedOp "|")
 90 |   traceM $ "[prodP.rhsList] " ++ show rhsList
 91 |   reservedOp ";"
 92 |   traceM "prodP returning..."
 93 |   return [Prod (trim (h : t)) (concat rhsList)]
 94 |   where
 95 |     rhsP = do
 96 |       mPred <- optionMaybe predP
 97 |       traceM $ "[rhsP0] " ++ show mPred
 98 |       alphaList <- many alphaP
 99 |       traceM $ "[rhsP] " ++ show alphaList
100 |       mMute <- optionMaybe muteP
101 |       pDir  <- optionMaybe directiveP
102 |       whiteSpace
103 |       return [PRHS alphaList mPred mMute pDir]
104 |     alphaP = termP <||> nonTermP
105 |     termP = do
106 |       whiteSpace
107 |       char '\''
108 |       traceM "[prodP.termP.s] "
109 |       s <- manyTill anyChar $ char '\''
110 |       whiteSpace
111 |       traceM $ "[prodP.termP.s] " ++ show s
112 |       return $ GTerm NoAnnot s
113 |     nonTermP = do
114 |       s <- identifier
115 |       traceM $ "[nonTermP] " ++ s
116 |       return $ GNonTerm NoAnnot s
117 |     predP = do
118 |       traceM "[predP]"
119 |       reservedOp "{"
120 |       haskellParseExpTill "}?"
121 |     muteP = do
122 |       traceM "[muteP]"
123 |       reservedOp "{"
124 |       haskellParseExpTill "}"
125 |     directiveP = do
126 |       whiteSpace
127 |       symbol "->"
128 |       whiteSpace
129 |       str <- manyTill anyChar (char '\n')
130 |       whiteSpace
131 |       traceM $ "[directiveP]" ++ show str
132 |       return (toDirective $ trim $ str)
133 | 
134 | -- TODO: not use getInput
135 | rEOF = do
136 |   y <- getInput
137 |   return (case y of
138 |     '-':'>':_ -> True
139 |     ';':_     -> True
140 |     _         -> False)
141 | 
142 | toDirective [] = LowerD []
143 | toDirective s@(h:rst)
144 |   | isUpper h = UpperD s
145 |   | otherwise = LowerD s
146 | 
147 | lexerP :: PS.Parser [G4]
148 | lexerP = do
149 |   mAnnot <- optionMaybe annot
150 |   h <- upper
151 |   t <- manyTill anyChar (reservedOp ":")
152 |   traceM $ "Lexeme Name: " ++ (h:t)
153 |   r <- regexP rEOF
154 |   traceM $ "Regex: " ++ show r
155 |   optionMaybe $ symbol "->"
156 |   mDir <- optionMaybe $ manyTill anyToken (reservedOp ";")
157 |   return $ [Lex mAnnot (trim (h : t)) (LRHS r (toDirective <$> trim <$> mDir))]
158 |   where
159 |     annot = fragment -- <||> ....
160 |     fragment = do
161 |       reserved "fragment"
162 |       return Fragment
163 | 
164 | -- Parser combinators end
165 | haskellParseExpTill :: String -> PS.Parser Exp
166 | haskellParseExpTill op = do {
167 |                               _ <- whiteSpace
168 |                             ; str <- manyTill anyChar (reservedOp op)
169 |                             ; haskellParseExp str
170 |                             }
171 | haskellParseExp :: String -> PS.Parser Exp
172 | haskellParseExp str =
173 |   case LHM.parseExp str of
174 |     Left err    -> error err -- PP.parserZero
175 |     Right expTH -> return expTH
176 | 
177 | whiteSpaceOrComment = comment <||> whiteSpace
178 |  where
179 |    comment = do
180 |      whiteSpace
181 |      reservedOp "//"
182 |      (manyTill anyChar $ try $ string "\n") <||> (manyTill anyChar $ try $ string "\r")
183 |      return ()
184 | 
185 | ------------------------------------------------------------------------------
186 | -- Lexer
187 | lexer :: PT.TokenParser ()
188 | lexer = PT.makeTokenParser $ haskellStyle
189 |  { reservedOpNames = [";", "|", ":", "{", "}", "}?", "'"]
190 |  , reservedNames   = ["grammar"]
191 |  , commentLine     = "//"
192 |  , commentStart    = "/*"
193 |  , commentEnd      = "*/"
194 |  }
195 | 
196 | whiteSpace    = PT.whiteSpace  lexer
197 | identifier    = PT.identifier  lexer
198 | operator      = PT.operator    lexer
199 | reserved      = PT.reserved    lexer
200 | reservedOp    = PT.reservedOp  lexer
201 | charLiteral   = PT.charLiteral lexer
202 | stringLiteral = PT.stringLiteral  lexer
203 | integer       = PT.integer     lexer
204 | natural       = PT.natural     lexer
205 | commaSep1     = PT.commaSep1   lexer
206 | parens        = PT.parens      lexer
207 | braces        = PT.braces      lexer
208 | brackets      = PT.brackets    lexer
209 | symbol        = PT.symbol      lexer
210 | 
211 | expr = PE.buildExpressionParser table term
212 |       <?> "expression"
213 | term = natural
214 |       <?> "simple expression"
215 | table = [ [prefix "-" negate, prefix "+" id ] ]
216 | prefix   name fun = PE.Prefix $ reservedOp name >> return fun
217 | 
218 | -- http://stackoverflow.com/a/6270382
219 | trim xs = dropSpaceTail "" $ dropWhile isSpace xs
220 | 
221 | dropSpaceTail maybeStuff "" = ""
222 | dropSpaceTail maybeStuff (x:xs)
223 |         | isSpace x = dropSpaceTail (x:maybeStuff) xs
224 |         | null maybeStuff = x : dropSpaceTail "" xs
225 |         | otherwise       = reverse maybeStuff ++ x : dropSpaceTail "" xs
226 | 


--------------------------------------------------------------------------------
/src/Language/ANTLR4/Boot/Syntax.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DeriveLift, DeriveAnyClass, DeriveGeneric, OverloadedStrings #-}
  2 | {-|
  3 |   Module      : Language.ANTLR4.Boot.Syntax
  4 |   Description : Both the boot and core syntax data types for G4
  5 |   Copyright   : (c) Karl Cronburg, 2018
  6 |   License     : BSD3
  7 |   Maintainer  : karl@cs.tufts.edu
  8 |   Stability   : experimental
  9 |   Portability : POSIX
 10 | -}
 11 | module Language.ANTLR4.Boot.Syntax
 12 |   ( G4(..), PRHS(..), ProdElem(..), GAnnot(..)
 13 |   , Directive(..)
 14 |   , LRHS(..), Regex(..), isGTerm, isGNonTerm
 15 |   , TermAnnot(..), isMaybeAnnot, isNoAnnot, annot
 16 |   , prodElemSymbol
 17 |   ) where
 18 | import Text.ANTLR.Grammar ()
 19 | import Language.Haskell.TH.Lift (Lift(..))
 20 | 
 21 | import Language.Haskell.TH.Syntax (Exp)
 22 | import qualified Language.Haskell.TH.Syntax as S
 23 | 
 24 | import Text.ANTLR.Set ( Hashable(..), Generic(..) )
 25 | import Text.ANTLR.Pretty -- (rshow, Prettify(..), pStr, pshow, pStr')
 26 | 
 27 | -- | .g4 style syntax representation
 28 | data G4 = -- | Grammar name declaration in g4
 29 |           Grammar { gName :: String -- ^ Name
 30 |                   }
 31 |         -- | One or more g4 productions
 32 |         | Prod { pName    :: String -- ^ Production's name
 33 |                , patterns :: [PRHS] -- ^ List of rules to match on
 34 |                }
 35 |         -- | A single, possibly annotated, g4 lexical rule
 36 |         | Lex  { annotation :: Maybe GAnnot -- ^ Lexical annotation (@fragment@)
 37 |                , lName      :: String       -- ^ Lexical rule name
 38 |                , pattern    :: LRHS         -- ^ The regex to match on
 39 |                }
 40 |   deriving (Show, Eq, Lift, Generic, Hashable)
 41 | 
 42 | instance Prettify G4 where
 43 |   prettify (Grammar gn) = do
 44 |     pStr "grammar "
 45 |     pStr' gn
 46 |   prettify (Prod n ps) = do
 47 |     pStr' n
 48 |     pStr " -> "
 49 |     incrIndent $ length n + 4
 50 |     pListLines ps
 51 |     incrIndent $ 0 - (length n + 4)
 52 |   prettify (Lex annot ln (LRHS regex dir)) = do
 53 |     pStr' ln
 54 |     pStr " -> "
 55 |     incrIndent $ length ln + 4
 56 |     prettify regex
 57 |     incrIndent $ 0 - (length ln + 4)
 58 |     pStr "("
 59 |     prettify dir
 60 |     pStr ")"
 61 | 
 62 | 
 63 | instance Lift Exp
 64 | 
 65 | -- | The right-hand side of a G4 production rule.
 66 | data PRHS = PRHS
 67 |   { alphas      :: [ProdElem] -- ^ In-order list of elements defining this rule
 68 |   , pred        :: Maybe Exp  -- ^ Arbitrary boolean predicate to test whether or not this rule should fire
 69 |   , mutator     :: Maybe Exp  -- ^ Arbitrary mutator to run when this rule fires
 70 |   , pDirective  :: Maybe Directive -- ^ How to construct a Haskell type when this rules fires
 71 |   } deriving (Show, Eq, Lift, Generic)
 72 | 
 73 | instance Prettify PRHS where
 74 |   prettify (PRHS as pred mut pDir) = do
 75 |     prettify as
 76 |     pStr "("
 77 |     pStr' $ show pred; pStr ","
 78 |     pStr' $ show mut;  pStr ","
 79 |     prettify pDir; pStr ")"
 80 | 
 81 | -- | Antiquoted (or g4-embedded) string that goes to the right of an arrow in
 82 | --   a g4 production rule. This specifies how to construct a Haskell type.
 83 | data Directive =
 84 |     UpperD String       -- ^ Probably a Haskell data constructor
 85 |   | LowerD String       -- ^ Probably just a Haskell function to call
 86 |   | HaskellD String     -- ^ Arbitrary antiquoted Haskell code embedded in the G4 grammar
 87 |   deriving (Show, Eq, Ord, Lift, Generic, Hashable)
 88 | 
 89 | instance Prettify Directive where prettify = rshow
 90 | 
 91 | instance Hashable PRHS where
 92 |   hashWithSalt salt prhs = salt `hashWithSalt` alphas prhs
 93 | 
 94 | -- | Annotations on a term (nonterminal or terminal) for extending our G4
 95 | --   BNF-like syntax with regular expression modifiers.
 96 | data TermAnnot =
 97 |     Regular Char -- ^ Regular expression modifier (e.g. +, ?, *)
 98 |   | NoAnnot      -- ^ Term is not annotated with anything
 99 |   deriving (Show, Eq, Ord, Lift, Generic, Hashable)
100 | 
101 | instance Prettify TermAnnot where
102 |   prettify NoAnnot = return ()
103 |   prettify (Regular c) = pStr' [c]
104 | 
105 | -- | Get the annotation from a 'ProdElem'
106 | annot :: ProdElem -> TermAnnot
107 | annot (GTerm a _) = a
108 | annot (GNonTerm a _) = a
109 | 
110 | -- | Is this 'TermAnnot' a maybe?
111 | isMaybeAnnot :: TermAnnot -> Bool
112 | isMaybeAnnot (Regular '?') = True
113 | isMaybeAnnot _             = False
114 | 
115 | -- | Does this 'TermAnnot' have no annotation?
116 | isNoAnnot :: TermAnnot -> Bool
117 | isNoAnnot NoAnnot = True
118 | isNoAnnot _       = False
119 | 
120 | -- | A single production element with any accompanying regex annotation
121 | data ProdElem =
122 |     GTerm     TermAnnot String -- ^ G4 terminal
123 |   | GNonTerm  TermAnnot String -- ^ G4 nonterminal
124 |   deriving (Show, Eq, Ord, Lift, Generic, Hashable)
125 | 
126 | instance Prettify ProdElem where
127 |   prettify (GTerm annot s) = do
128 |     pStr' s
129 |     prettify annot
130 |   prettify (GNonTerm annot s) = do
131 |     pStr' s
132 |     prettify annot
133 | 
134 | prodElemSymbol (GTerm _ s) = s
135 | prodElemSymbol (GNonTerm _ s) = s
136 | 
137 | -- | Is this a terminal G4 element?
138 | isGTerm (GTerm _ _) = True
139 | isGTerm _           = False
140 | 
141 | -- | Is this a nonterminal G4 element?
142 | isGNonTerm (GNonTerm _ _) = True
143 | isGNonTerm _              = False
144 | 
145 | -- | Allowable annotations on a lexical production rule
146 | data    GAnnot   = Fragment -- ^ For now the only annotation is @fragment@.
147 |   deriving (Show, Eq, Lift, Generic, Hashable)
148 | 
149 | -- | Right-hand side of a lexical G4 rule
150 | data LRHS = LRHS
151 |   { regex     :: Regex Char      -- ^ A regular expression over characters as tokens.
152 |   , directive :: Maybe Directive -- ^ Optional directive: @Nothing@ is equivalent to @(Just "String")@.
153 |   }
154 |   deriving (Show, Eq, Lift, Generic, Hashable)
155 | 
156 | -- | G4 representation of a regex (G4 regex syntax, not regexs used by tokenizer)
157 | data Regex s =
158 |     Epsilon              -- ^ Consume no input
159 |   | Literal    [s]       -- ^ Match on a literal string (sequence of characters)
160 |   | Union      [Regex s] -- ^ Match on any
161 |   | Concat     [Regex s] -- ^ Match in sequence
162 |   | Kleene     (Regex s) -- ^ Match zero or more times
163 |   | PosClos    (Regex s) -- ^ Match one or more times
164 |   | Question   (Regex s) -- ^ Match zero or one time.
165 |   | CharSet    [s]       -- ^ Match once on any of the characters
166 |   | Negation   (Regex s) -- ^ Match anything that doesn't match this
167 |   | Named      String    -- ^ A reference to some other regex (need to track an environment)
168 |   deriving (Lift, Eq, Show, Generic, Hashable)
169 | -- TODO: Lex regexs (e.g. complement sets, escape chars, ...)
170 | -- TODO: Set s, and ranges of characters
171 | 
172 | instance (Show s, Prettify s) => Prettify (Regex s) where
173 |   prettify Epsilon = pStr "ε"
174 |   prettify (Literal ss) = do
175 |     pStr "\""
176 |     mapM prettify ss
177 |     pStr "\""
178 |   prettify rest = pStr' $ show rest
179 | 
180 | 


--------------------------------------------------------------------------------
/src/Language/ANTLR4/FileOpener.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE QuasiQuotes, TemplateHaskell, ScopedTypeVariables,
 2 |     OverloadedStrings #-}
 3 | {-|
 4 |   Module      : Language.ANTLR4.FileOpener
 5 |   Description : Quasiquoter for reading files by name at compile time
 6 |   Copyright   : (c) Karl Cronburg, 2018
 7 |   License     : BSD3
 8 |   Maintainer  : karl@cs.tufts.edu
 9 |   Stability   : experimental
10 |   Portability : POSIX
11 | 
12 |   Just do the following. It'll make sense:
13 | 
14 |   @
15 |     foo = id
16 |     file_contents = [open| test/file.foo |]
17 |   @
18 | -}
19 | module Language.ANTLR4.FileOpener (
20 |   -- * File opening quasiquoter
21 |     open
22 |   ) where
23 | import qualified Language.Haskell.TH as TH
24 | import Language.Haskell.TH.Syntax (Exp(..), addDependentFile)
25 | import Language.Haskell.TH.Quote (QuasiQuoter(..))
26 | 
27 | import Data.Text (strip, splitOn, pack, unpack)
28 | 
29 | -- | A quasiquoter for opening a file on disk, reading its contents, and running
30 | --   a function by the same name as the file extension. e.g.:
31 | --
32 | -- @
33 | --   foo = id
34 | --   file_contents = [open| test/file.foo |]
35 | -- @
36 | --
37 | -- @foo@ gets called on the contents of files with the extension @.foo@.
38 | open :: QuasiQuoter
39 | open = QuasiQuoter
40 |   { quoteExp  = openExp
41 |   , quotePat  = error "parse pattern"
42 |   , quoteType = error "parse type"
43 |   , quoteDec  = error "parse decl?"
44 |   }
45 | 
46 | -- | Reads a file and runs a function with the name of the file extension,
47 | --   returning the result for use by a quasiquoter.
48 | openExp :: String -> TH.Q TH.Exp
49 | openExp s = let
50 |     fn  = unpack $ strip $ pack s
51 |     ext = unpack $ last $ splitOn "." $ pack fn
52 |   in do
53 |     file_contents <- TH.runIO (readFile fn)
54 |     addDependentFile fn
55 |     [| $(return $ TH.VarE $ TH.mkName ext) file_contents |]
56 | 
57 | 


--------------------------------------------------------------------------------
/src/Language/ANTLR4/G4.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
  2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
  3 |     , FlexibleInstances, UndecidableInstances, DeriveDataTypeable
  4 |     , TemplateHaskell #-}
  5 | {-|
  6 |   Module      : Language.ANTLR4.G4
  7 |   Description : Core G4 quasiquoter for antlr-haskell
  8 |   Copyright   : (c) Karl Cronburg, 2018
  9 |   License     : BSD3
 10 |   Maintainer  : karl@cs.tufts.edu
 11 |   Stability   : experimental
 12 |   Portability : POSIX
 13 | 
 14 |   Until better haddock integration is developed, you'll need to look
 15 |   at the source for this module to see the G4 grammar for G4.
 16 | -}
 17 | module Language.ANTLR4.G4 where
 18 | 
 19 | import Control.Arrow ( (&&&) )
 20 | import Data.Char (isUpper)
 21 | 
 22 | import Text.ANTLR.Common
 23 | import Text.ANTLR.Grammar
 24 | import Text.ANTLR.Parser
 25 | import qualified Text.ANTLR.LR as LR
 26 | import Text.ANTLR.Lex.Tokenizer as T
 27 | import qualified Text.ANTLR.Set as S
 28 | import Text.ANTLR.Set (Hashable(..), Generic(..))
 29 | import Text.ANTLR.Pretty
 30 | import Text.ANTLR.Lex.Regex (regex2dfa)
 31 | import Data.Data (Data(..))
 32 | import Language.Haskell.TH.Lift (Lift(..))
 33 | 
 34 | import Language.Haskell.TH.Quote (QuasiQuoter(..))
 35 | import qualified Language.Haskell.TH as TH
 36 | import Language.ANTLR4.Boot.Quote (antlr4)
 37 | import Language.ANTLR4.Syntax
 38 | import qualified Language.ANTLR4.Boot.Syntax  as G4S
 39 | import qualified Language.ANTLR4.Boot.Quote   as G4Q
 40 | 
 41 | import Debug.Trace as D
 42 | 
 43 | char :: String -> Char
 44 | char = head
 45 | 
 46 | append :: String -> String -> String
 47 | append = (++)
 48 | 
 49 | list a = [a]
 50 | cons = (:)
 51 | lexemeDirective r d = G4S.LRHS r (Just d)
 52 | lexemeNoDir     r   = G4S.LRHS r Nothing
 53 | lexDecl = G4S.Lex Nothing
 54 | lexFragment = G4S.Lex (Just G4S.Fragment)
 55 | 
 56 | literalRegex :: String -> G4S.Regex Char
 57 | literalRegex = G4S.Literal
 58 | 
 59 | prodDirective as d = G4S.PRHS as Nothing Nothing (Just d)
 60 | prodNoDir     as   = G4S.PRHS as Nothing Nothing Nothing
 61 | prodNoAlphas     d = G4S.PRHS [] Nothing Nothing (Just d)
 62 | prodNothing        = G4S.PRHS [] Nothing Nothing Nothing
 63 | 
 64 | list2 a b = [a,b]
 65 | range a b = [a .. b]
 66 | 
 67 | gterm         = G4S.GTerm    G4S.NoAnnot
 68 | gnonTerm      = G4S.GNonTerm G4S.NoAnnot
 69 | 
 70 | maybeGTerm    = G4S.GTerm    (G4S.Regular '?')
 71 | maybeGNonTerm = G4S.GNonTerm (G4S.Regular '?')
 72 | 
 73 | starGTerm    = G4S.GTerm    (G4S.Regular '*')
 74 | starGNonTerm = G4S.GNonTerm (G4S.Regular '*')
 75 | 
 76 | plusGTerm    = G4S.GTerm    (G4S.Regular '+')
 77 | plusGNonTerm = G4S.GNonTerm (G4S.Regular '+')
 78 | 
 79 | regexAnyChar = G4S.Negation (G4S.CharSet [])
 80 | 
 81 | dQual [] = G4S.UpperD []
 82 | dQual xs = case last xs of
 83 |   [] -> G4S.UpperD $ concatWith "." xs
 84 |   (a:as)
 85 |     | isUpper a -> G4S.UpperD $ concatWith "." xs
 86 |     | otherwise -> G4S.LowerD $ concatWith "." xs
 87 | 
 88 | qDir l u = [l,u]
 89 | 
 90 | haskellD = G4S.HaskellD
 91 | 
 92 | -- Force the above declarations (and their types) into scope:
 93 | $( return [] )
 94 | 
 95 | [antlr4|
 96 |   grammar G4;
 97 | 
 98 |   decls : decl1 ';'                 -> list
 99 |         | decl1 ';' decls           -> cons
100 |         ;
101 | 
102 |   decl1 : 'grammar' UpperID                 -> G4S.Grammar
103 |         | LowerID ':' prods                 -> G4S.Prod
104 |         | UpperID ':' lexemeRHS             -> lexDecl
105 |         | 'fragment' UpperID ':' lexemeRHS  -> lexFragment
106 |         ;
107 | 
108 |   prods : prodRHS                   -> list
109 |         | prodRHS '|' prods         -> cons
110 |         ;
111 | 
112 |   lexemeRHS : regexes1 '->' directive  -> lexemeDirective
113 |             | regexes1                 -> lexemeNoDir
114 |             ;
115 | 
116 |   prodRHS : alphas '->' directive   -> prodDirective
117 |           | alphas                  -> prodNoDir
118 |           |        '->' directive   -> prodNoAlphas
119 |           |                         -> prodNothing
120 |           ;
121 | 
122 |   directive : qDirective          -> dQual
123 |             | UpperID             -> G4S.UpperD
124 |             | LowerID             -> G4S.LowerD
125 |             | '${' HaskellExp '}' -> haskellD
126 |             ;
127 | 
128 |   qDirective  : UpperID '.' qDot -> qDir
129 |               ;
130 | 
131 |   qDot  : UpperID
132 |         | LowerID
133 |         ;
134 | 
135 |   alphas : alpha                    -> list
136 |          | alpha alphas             -> cons
137 |          | '(' alphas ')'
138 |          | '(' alphas ')' '?'
139 |          | '(' alphas ')' '*'
140 |          | '(' alphas ')' '+'
141 |          ;
142 | 
143 |   alpha : Literal '?'               -> maybeGTerm
144 |         | LowerID '?'               -> maybeGNonTerm
145 |         | UpperID '?'               -> maybeGNonTerm
146 |         | Literal '*'               -> starGTerm
147 |         | LowerID '*'               -> starGNonTerm
148 |         | UpperID '*'               -> starGNonTerm
149 |         | Literal '+'               -> plusGTerm
150 |         | LowerID '+'               -> plusGNonTerm
151 |         | UpperID '+'               -> plusGNonTerm
152 |         | Literal                   -> gterm
153 |         | LowerID                   -> gnonTerm
154 |         | UpperID                   -> gnonTerm
155 |         ;
156 | 
157 |   // Regex Stuff:
158 | 
159 |   regexes1 : regexes                -> G4S.Concat
160 |            ;
161 | 
162 |   regexes : regex                   -> list
163 |           | regex regexes           -> cons
164 |           ;
165 | 
166 |   regex   :     regex1 '?'          -> G4S.Question
167 |           |     regex1 '*'          -> G4S.Kleene
168 |           |     regex1 '+'          -> G4S.PosClos
169 |           | '~' regex1              -> G4S.Negation
170 |           |     regex1              -> id
171 |           ;
172 | 
173 |   regex1  : '[' charSet ']'           -> G4S.CharSet
174 |           | Literal                   -> literalRegex
175 |           | UpperID                   -> G4S.Named
176 |           | '(' regexes1 ')'
177 |           | unionR                    -> G4S.Union
178 |           | '.'                       -> regexAnyChar
179 |           ;
180 | 
181 |   unionR  : regex '|' regex         -> list2
182 |           | regex '|' unionR        -> cons
183 |           ;
184 | 
185 |   charSet : charSet1                -> id
186 |           | charSet1 charSet        -> append
187 |           ;
188 | 
189 |   charSet1 : SetChar '-' SetChar    -> range
190 |            | SetChar                -> list
191 |            | EscapedChar            -> list
192 |            ;
193 | 
194 |   UpperID : [A-Z][a-zA-Z0-9_]*      -> String;
195 |   LowerID : [a-z][a-zA-Z0-9_]*      -> String;
196 |   Literal     : '\'' ( ( '\\\'' ) | (~ ( '\'' ) ) )+ '\'' -> stripQuotesReadEscape;
197 |   LineComment : '//' (~ '\n')* '\n' -> String;
198 | 
199 |   HaskellExp : ( ~ '}' )+ -> String;
200 | 
201 |   SetChar     : ~ ']'               -> char ;
202 |   WS          : [ \t\n\r\f\v]+      -> String;
203 |   EscapedChar : '\\' [tnrfv]        -> readEscape ;
204 | 
205 | |]
206 | 
207 | 


--------------------------------------------------------------------------------
/src/Language/ANTLR4/Parser.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, DeriveDataTypeable
 4 |     , TemplateHaskell #-}
 5 | {-|
 6 |   Module      : Language.ANTLR4.Parser
 7 |   Description : Core G4 quasiquoter (with parsers) for antlr-haskell
 8 |   Copyright   : (c) Karl Cronburg, 2019
 9 |   License     : BSD3
10 |   Maintainer  : karl@cs.tufts.edu
11 |   Stability   : experimental
12 |   Portability : POSIX
13 | 
14 | -}
15 | module Language.ANTLR4.Parser (g4) where
16 | 
17 | import Control.Arrow ( (&&&) )
18 | import Data.Char (isUpper)
19 | 
20 | import Text.ANTLR.Common
21 | import Text.ANTLR.Grammar
22 | import Text.ANTLR.Parser
23 | import qualified Text.ANTLR.LR as LR
24 | import Text.ANTLR.Lex.Tokenizer as T hiding (tokenize)
25 | import qualified Text.ANTLR.Set as S
26 | import Text.ANTLR.Set (Hashable(..), Generic(..))
27 | import Text.ANTLR.Pretty
28 | import Text.ANTLR.Lex.Regex (regex2dfa)
29 | import Data.Data (Data(..))
30 | import Language.Haskell.TH.Lift (Lift(..))
31 | 
32 | import Language.Haskell.TH.Quote (QuasiQuoter(..))
33 | import qualified Language.Haskell.TH as TH
34 | import Language.ANTLR4.Boot.Quote (antlr4, g4_parsers)
35 | import Language.ANTLR4.Syntax
36 | import qualified Language.ANTLR4.Boot.Syntax  as G4S
37 | import qualified Language.ANTLR4.Boot.Quote   as G4Q
38 | import Language.ANTLR4.G4
39 | 
40 | import Debug.Trace as D
41 | 
42 | -- Splice the parsers for the grammar we defined in Language.ANTLR4.G4
43 | $(g4_parsers g4AST g4Grammar)
44 | 
45 | {- isWhitespace (Just T_LineComment) = True
46 | isWhitespace (Just T_WS) = True
47 | isWhitespace Nothing = True
48 | isWhitespace _ = False -}
49 | 
50 | isWhitespace T_LineComment = True
51 | isWhitespace T_WS = True
52 | isWhitespace _ = False
53 | 
54 | g4_codeGen :: String -> TH.Q [TH.Dec]
55 | g4_codeGen input = do
56 |   loc <- TH.location
57 |   let fileName = TH.loc_filename loc
58 |   let (line,column) = TH.loc_start loc
59 | 
60 |   {- case allstarParse (filter (not . isWhitespace . stripEOF . getSymbol) (tokenize input)) of
61 |     Left err -> error err
62 |     Right ast -> codeGen ast -}
63 |   case glrParse isWhitespace input of
64 |     (LR.ResultAccept ast) -> codeGen ast
65 |     LR.ResultSet    s   ->
66 |       if S.size s == 1
67 |         then codeGen $ fromAccept (S.findMin s)
68 |         else D.trace (pshow' s) $ codeGen $ fromAccept (S.findMin s)
69 |     err -> error $ pshow' err
70 | 
71 | fromAccept (LR.ResultAccept ast) = ast
72 | fromAccept err = error $ pshow' err
73 | 
74 | codeGen ast = G4Q.g4_decls $ ast2decls ast
75 | 
76 | -- | Entrypoint to the G4 quasiquoter. Currently only supports declaration-level
77 | --   Haskell generation of G4 grammars using a GLR parser. The output grammars
78 | --   need not use a GLR parser themselves.
79 | g4 :: QuasiQuoter
80 | g4 = QuasiQuoter
81 |   (error "parse exp")
82 |   (error "parse pattern")
83 |   (error "parse type")
84 |   g4_codeGen
85 | 
86 | 


--------------------------------------------------------------------------------
/src/Language/ANTLR4/Regex.hs.boot:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DeriveLift, DeriveGeneric, DeriveAnyClass #-}
  2 | {-|
  3 |   Module      : Language.ANTLR4.Regex
  4 |   Description : Parsec parser for G4 regular expressions
  5 |   Copyright   : (c) Karl Cronburg, 2018
  6 |   License     : BSD3
  7 |   Maintainer  : karl@cs.tufts.edu
  8 |   Stability   : experimental
  9 |   Portability : POSIX
 10 | -}
 11 | module Language.ANTLR4.Regex (parseRegex, regexP) where
 12 | import Language.Haskell.TH.Lift (Lift(..))
 13 | import Text.ParserCombinators.Parsec
 14 | import qualified Text.Parsec.String     as PS
 15 | import qualified Text.Parsec.Prim       as PP
 16 | import qualified Text.Parsec.Token      as PT
 17 | import qualified Text.Parsec.Expr       as PE
 18 | import qualified Text.Parsec.Combinator as PC
 19 | import Data.Char (ord)
 20 | import Text.ParserCombinators.Parsec.Language
 21 | import qualified Debug.Trace as D -- trace, traceM
 22 | 
 23 | import Text.ANTLR.Set ( Hashable(..), Generic(..) )
 24 | import Language.ANTLR4.Boot.Syntax ( Regex(..) )
 25 | 
 26 | --traceM s = D.traceM ("[Regex] " ++ s)
 27 | traceM = return
 28 | 
 29 | (<||>) a b = try a <|> try b
 30 | 
 31 | rEOF' = do
 32 |   (eof >>= return . const True)
 33 |   <||>
 34 |   (return False)
 35 | 
 36 | -- | Entrypoint for parsing a G4 regex. This does not get called
 37 | --   by the spliced parser, and is here for posterity (and debugging /
 38 | --   backwards compatibility).
 39 | parseRegex :: String -> Either ParseError (Regex Char)
 40 | parseRegex input = PP.parse (regexP rEOF') "" input
 41 | 
 42 | type RegexC = Regex Char
 43 | 
 44 | -- convert list of sequential regexes into a single regex
 45 | list2regex []  = Epsilon
 46 | list2regex [x] = x
 47 | list2regex xs  = Concat xs
 48 | 
 49 | -- | G4 regex parser, as used exclusively by the boot parser.
 50 | --
 51 | --   rEOF is a parser to indicate when it's okay to stop parsing the regex -}
 52 | regexP :: PS.Parser Bool -> PS.Parser RegexC
 53 | regexP rEOF = let
 54 | 
 55 |   regexP' :: PS.Parser [RegexC]
 56 |   regexP' = do
 57 |     traceM $ "regexP0"
 58 |     r <- extendedRegex regexElement
 59 |     traceM $ "regexP: " ++ show r
 60 |     whiteSpace
 61 |     b <- try rEOF
 62 |     traceM $ "regexP2: " ++ show b
 63 |     y <- getInput
 64 |     traceM $ "regexP3: " ++ show y
 65 |     if b
 66 |       then return [r]
 67 |       else do
 68 |         rs <- regexP'
 69 |         return $ r:rs
 70 |     <?> "regexP"
 71 | 
 72 |   in do
 73 |     xs <- regexP'
 74 |     return $ list2regex xs
 75 | 
 76 | extendedRegex foo = do
 77 |   whiteSpace
 78 |   negation <- optionMaybe (symbol "~")
 79 |   whiteSpace
 80 |   r <- foo
 81 |   whiteSpace
 82 |   p <- optionMaybe (try $ satisfy (`elem` "+*?"))
 83 |   whiteSpace
 84 |   let fncn = (case negation of
 85 |                 (Just _) -> Negation
 86 |                 _        -> id)
 87 |   return $ fncn (case p of
 88 |     Nothing  -> r
 89 |     Just '+' -> PosClos  r
 90 |     Just '*' -> Kleene   r
 91 |     Just '?' -> Question r
 92 |     Just _   -> undefined)
 93 | 
 94 | 
 95 | regexElement :: PS.Parser RegexC
 96 | regexElement = do
 97 |   whiteSpace
 98 |   r <- extendedRegex (charSet <||> literal <||> concatR <||> namedR <||> parens unionR)
 99 |   y <- getInput
100 |   traceM $ show y
101 |   return r
102 | 
103 | {-
104 |   return (case rs of
105 |     [] -> r
106 |     _  -> Concat (r:rs))
107 | -}
108 | 
109 | many2 p = do { x <- p; xs <- many p; return (x:xs) }
110 | 
111 | -- Named regex (upper-case identifier)
112 | namedR :: PS.Parser RegexC
113 | namedR = do
114 |   s <- identifier
115 |   return $ Named s
116 | 
117 | unionR :: PS.Parser RegexC
118 | unionR = do
119 |   traceM "<unionR>"
120 |   u <- sepBy1 regexElement (traceM "OR" >> whiteSpace >> symbol "|" >> traceM "OR2")
121 |   traceM "</unionR>"
122 |   return $ Union u
123 | 
124 | concatR :: PS.Parser RegexC
125 | concatR = do
126 |   traceM "<concatR>"
127 |   c <- many1 (extendedRegex (charSet <||> literal <||> parens regexElement)) >>= return . Concat
128 |   traceM "</concatR>"
129 |   return c
130 | 
131 | parseEscape :: String -> String
132 | parseEscape s = (read $ "\"" ++ s ++ "\"") :: String
133 | 
134 | -- regex string literal uses single quotes
135 | literal :: PS.Parser RegexC
136 | literal = do
137 |   r <- PC.between (char '\'') (char '\'') (many singleChar >>= (return . Literal . parseEscape))
138 |   y <- getInput
139 |   traceM $ "literal: " ++ show y
140 |   return r
141 | 
142 | charSet :: PS.Parser RegexC
143 | charSet = do
144 |   traceM "<charSet>"
145 |   whiteSpace
146 |   cset <- PC.between (char '[') (char ']') (charSetBody >>= (return . CharSet . parseEscape))
147 |   whiteSpace
148 |   traceM $ "</charSet>: " ++ show cset
149 |   return cset
150 | 
151 | charSetBody :: PS.Parser [Char]
152 | charSetBody = do
153 |   traceM $ "<charSetBody>"
154 |   xs <- many $ charSetRange <||> (singleChar >>= (\c -> return [c]))
155 |   traceM $ "charSetBody: " ++ (show $ concat xs)
156 |   return $ concat xs
157 | 
158 | charSetRange :: PS.Parser [Char]
159 | charSetRange = do
160 |   start <- singleChar
161 |   char '-'
162 |   end   <- singleChar
163 |   if ord end <= ord start
164 |     then unexpected [end]
165 |     else return [start..end]
166 | 
167 | singleChar =
168 |   escapedChar
169 |   <||>
170 |   satisfy (\c -> not (c `elem` ['\'', ']']))
171 | 
172 | escapedChar :: PS.Parser Char
173 | escapedChar = (do
174 |     char '\\'
175 |     char '\''
176 |     return '\'')
177 |   <||> (do
178 |     char '\\'
179 |     char ']'
180 |     return ']')
181 |   <||> (do
182 |     char '\\'
183 |     char '['
184 |     return '[')
185 |   <||> (do
186 |     char '\\'
187 |     char '\\'
188 |     return '\\')
189 | 
190 | regexLexer :: PT.TokenParser ()
191 | regexLexer = PT.makeTokenParser $ haskellStyle
192 |   { reservedOpNames = ["[", "]", "//", "-", "+"] }
193 | 
194 | whiteSpace    = PT.whiteSpace  regexLexer
195 | identifier    = PT.identifier  regexLexer
196 | operator      = PT.operator    regexLexer
197 | reserved      = PT.reserved    regexLexer
198 | reservedOp    = PT.reservedOp  regexLexer
199 | charLiteral   = PT.charLiteral regexLexer
200 | stringLiteral = PT.stringLiteral  regexLexer
201 | integer       = PT.integer     regexLexer
202 | natural       = PT.natural     regexLexer
203 | commaSep1     = PT.commaSep1   regexLexer
204 | parens        = PT.parens      regexLexer
205 | braces        = PT.braces      regexLexer
206 | brackets      = PT.brackets    regexLexer
207 | symbol        = PT.symbol      regexLexer
208 | 
209 | 


--------------------------------------------------------------------------------
/src/Language/ANTLR4/Syntax.hs:
--------------------------------------------------------------------------------
 1 | {-|
 2 |   Module      : Language.ANTLR4.Syntax
 3 |   Description : Helper syntax functions used by core G4 parser
 4 |   Copyright   : (c) Karl Cronburg, 2018
 5 |   License     : BSD3
 6 |   Maintainer  : karl@cs.tufts.edu
 7 |   Stability   : experimental
 8 |   Portability : POSIX
 9 | -}
10 | module Language.ANTLR4.Syntax where
11 | import Language.ANTLR4.Boot.Syntax
12 | import Data.Char (readLitChar)
13 | 
14 | import qualified Debug.Trace as D
15 | 
16 | -- | Debugging support
17 | trace s = D.trace ("Language.ANTLR4.Syntax] " ++ s)
18 | 
19 | -- | Parse an escape characters allowable in G4:
20 | readEscape :: String -> Char
21 | readEscape s = let
22 |     eC ('\\':'n':xs)   = '\n'
23 |     eC ('\\':'r':xs)   = '\r'
24 |     eC ('\\':'t':xs)   = '\t'
25 |     eC ('\\':'b':xs)   = '\b'
26 |     eC ('\\':'f':xs)   = '\f'
27 |     eC ('\\':'v':xs)   = '\v'
28 |     eC ('\\':'"':xs)   = '\"'
29 |     eC ('\\':'\'':xs)  = '\''
30 |     eC ('\\':'\\':xs)  = '\\'
31 |   in eC s
32 | 
33 | -- | Parse a literal String by stripping the quotes at the beginning and end of
34 | --   the String, and replacing all escaped characters with the actual escape
35 | --   character code.
36 | stripQuotesReadEscape :: String -> String
37 | stripQuotesReadEscape s = let
38 | 
39 |     eC [] = error "String ended in a single escape '\\': '" ++ s ++ "'"
40 |     eC ('n':xs)   = "\n" ++ sQRE xs
41 |     eC ('r':xs)   = "\r" ++ sQRE xs
42 |     eC ('t':xs)   = "\t" ++ sQRE xs
43 |     eC ('b':xs)   = "\b" ++ sQRE xs
44 |     eC ('f':xs)   = "\f" ++ sQRE xs
45 |     eC ('v':xs)   = "\v" ++ sQRE xs
46 |     eC ('"':xs)   = "\"" ++ sQRE xs
47 |     eC ('\'':xs)  = "\'" ++ sQRE xs
48 |     eC ('\\':xs)  = "\\" ++ sQRE xs
49 |     eC ('u':a:b:c:d:xs) =
50 |       case (readLitChar $ '\\' : 'x' : a : b : c : d : []) of
51 |         ((hex, "") : []) -> hex : sQRE xs
52 |         _ -> error $ "Invalid unicode character '\\u" ++ [a,b,c,d] ++ "' in string '" ++ s ++ "'"
53 |     eC (x:xs)     = error $ "Invalid escape character '" ++ [x] ++ "' in string '" ++ s ++ "'"
54 | 
55 |     sQRE [] = []
56 |     sQRE ('\\':xs) = eC xs
57 |     sQRE (x:xs) = x : sQRE xs
58 | 
59 |   --in trace s $ (sQRE . init . tail) s
60 |   in (sQRE . init . tail) s
61 |   --read $ "\"" ++ (init . tail) s ++ "\"" :: String
62 | 
63 | 


--------------------------------------------------------------------------------
/src/Text/ANTLR/Allstar.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, FlexibleContexts #-}
 2 | {-|
 3 |   Module      : Text.ANTLR.Allstar
 4 |   Description : Entrypoint for using the ALL(*) parsing algorithm
 5 |   Copyright   : (c) Karl Cronburg, 2018
 6 |   License     : BSD3
 7 |   Maintainer  : karl@cs.tufts.edu
 8 |   Stability   : experimental
 9 |   Portability : POSIX
10 | 
11 |   This module contains the glue code for hooking Sam's
12 |   'Text.ANTLR.Allstar.ParserGenerator' implementation into the rest of
13 |   this package.
14 | -}
15 | module Text.ANTLR.Allstar
16 |   ( parse, parse', atnOf
17 |   , ALL.GrammarSymbol(..)
18 |   , ALL.ATNEnv
19 |   ) where
20 | 
21 | import qualified Text.ANTLR.Allstar.ParserGenerator as ALL
22 | 
23 | import qualified Text.ANTLR.Parser as P
24 | import qualified Text.ANTLR.Grammar as G
25 | import qualified Text.ANTLR.Allstar.ATN as ATN
26 | 
27 | import qualified Data.Set as DS
28 | import qualified Text.ANTLR.Set as S
29 | 
30 | import Text.ANTLR.Pretty (Prettify(..))
31 | 
32 | -- | Go from an Allstar AST to the AST type used internally in this package
33 | fromAllstarAST :: ALL.AST nts t -> P.AST nts t
34 | fromAllstarAST (ALL.Node nt ruleFired asts) = P.AST nt (map fromAllstarSymbol ruleFired) (map fromAllstarAST asts)
35 | fromAllstarAST (ALL.Leaf tok)     = P.Leaf tok
36 | 
37 | --   TODO: Handle predicate and mutator state during the conversion
38 | -- | Go from an antlr-haskell Grammar to an Allstar ATNEnv. ALL(*) does not
39 | --   currently support predicates and mutators.
40 | atnOf :: (Ord nt, Ord t, S.Hashable nt, S.Hashable t) => G.Grammar s nt t dt -> ALL.ATNEnv nt t
41 | atnOf g = DS.fromList (map convTrans (S.toList (ATN._Δ (ATN.atnOf g))))
42 | 
43 | -- | ATN Transition to AllStar version
44 | convTrans (st0, e, st1) = (convState st0, convEdge e, convState st1)
45 | 
46 | -- | ATN State to AllStar version
47 | convState (ATN.Start nt)        = ALL.Init nt
48 | convState (ATN.Middle nt i0 i1) = ALL.Middle nt i0 i1
49 | convState (ATN.Accept nt)       = ALL.Final nt
50 | 
51 | -- | ATN Edge to AllStar version
52 | convEdge (ATN.NTE nt) = ALL.GS (ALL.NT nt)
53 | convEdge (ATN.TE t)   = ALL.GS (ALL.T t)
54 | convEdge (ATN.PE p)   = ALL.PRED True -- TODO
55 | convEdge (ATN.ME m)   = ALL.PRED True -- TODO
56 | convEdge ATN.Epsilon  = ALL.GS ALL.EPS
57 | 
58 | -- | Entrypoint to the ALL(*) parsing algorithm.
59 | parse'
60 |   :: ( P.CanParse nts tok, Prettify chr )
61 |   => ALL.Tokenizer chr tok
62 |   -> [chr]
63 |   -> ALL.GrammarSymbol nts (ALL.Label tok)
64 |   -> ALL.ATNEnv nts (ALL.Label tok)
65 |   -> Bool
66 |   -> Either String (P.AST nts tok)
67 | parse' tokenizer inp s0 atns cache = fromAllstarAST <$> ALL.parse tokenizer inp s0 atns cache
68 | 
69 | -- | No tokenizer required (chr == tok):
70 | parse
71 |   :: ( P.CanParse nts tok )
72 |   => [tok]
73 |   -> ALL.GrammarSymbol nts (ALL.Label tok)
74 |   -> ALL.ATNEnv nts (ALL.Label tok)
75 |   -> Bool
76 |   -> Either String (P.AST nts tok)
77 | parse = let
78 |     tokenizer [] = []
79 |     tokenizer (t:ts) = [(t,ts)]
80 |   in parse' tokenizer
81 | 
82 | convSymbol s = ALL.NT s
83 | 
84 | toAllstarSymbol :: G.ProdElem nts ts -> ALL.GrammarSymbol nts ts
85 | toAllstarSymbol (G.NT nts) = ALL.NT nts
86 | toAllstarSymbol (G.T  ts)  = ALL.T  ts
87 | toAllstarSymbol (G.Eps)    = ALL.EPS
88 | 
89 | fromAllstarSymbol :: ALL.GrammarSymbol nts ts -> G.ProdElem nts ts
90 | fromAllstarSymbol (ALL.NT nts) = (G.NT nts)
91 | fromAllstarSymbol (ALL.T ts) = (G.T  ts)
92 | fromAllstarSymbol ALL.EPS = G.Eps
93 | 
94 | 


--------------------------------------------------------------------------------
/src/Text/ANTLR/Allstar/ATN.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE ScopedTypeVariables, DeriveAnyClass, DeriveGeneric
  2 |            , FlexibleContexts, UndecidableInstances, StandaloneDeriving
  3 |            , OverloadedStrings #-}
  4 | {-|
  5 |   Module      : Text.ANTLR.Allstar.ATN
  6 |   Description : Augmented recursive transition network algorithms
  7 |   Copyright   : (c) Karl Cronburg, 2018
  8 |   License     : BSD3
  9 |   Maintainer  : karl@cs.tufts.edu
 10 |   Stability   : experimental
 11 |   Portability : POSIX
 12 | 
 13 | -}
 14 | module Text.ANTLR.Allstar.ATN where
 15 | -- Augmented recursive Transition Network
 16 | import Text.ANTLR.Grammar
 17 | --import Text.ANTLR.Allstar.GSS hiding (Edge, Node)
 18 | import Text.ANTLR.Allstar.Stacks
 19 | import Text.ANTLR.Set (Set(..), empty, fromList, toList, Hashable, Generic)
 20 | import Text.ANTLR.Pretty
 21 | 
 22 | -- | Graph-structured stack over ATN states.
 23 | type Gamma nt = Stacks (ATNState nt)
 24 | 
 25 | -- | An ATN defining some language we wish to parse
 26 | data ATN s nt t = ATN
 27 |   { _Δ :: Set (Transition s nt t) -- ^ The transition function
 28 |   } deriving (Eq, Ord, Show)
 29 | 
 30 | instance (Prettify s, Prettify nt, Prettify t, Hashable nt, Hashable t, Eq nt, Eq t) => Prettify (ATN s nt t) where
 31 |   prettify atn = do
 32 |     pLine "_Δ:"
 33 |     incrIndent 4
 34 |     prettify $ _Δ atn
 35 |     incrIndent (-4)
 36 | 
 37 | -- | Tuple corresponding to a distinct transition in the ATN:
 38 | type Transition s nt t = (ATNState nt, Edge s nt t, ATNState nt)
 39 | 
 40 | -- | The possible subscripts from Figure 8 of the ALL(*) paper
 41 | data ATNState nt  = Start  nt
 42 |                   | Middle nt Int Int
 43 |                   | Accept nt
 44 |   deriving (Eq, Generic, Hashable, Ord, Show)
 45 | 
 46 | -- | LaTeX style ATN states. TODO: check length of NT printed and put curly braces
 47 | -- around it if more than one character.
 48 | instance (Prettify nt) => Prettify (ATNState nt) where
 49 |   prettify (Start nt)  = pStr "p_"  >> prettify nt
 50 |   prettify (Accept nt) = pStr "p'_" >> prettify nt
 51 |   prettify (Middle nt i j) = do
 52 |     pStr "p_{"
 53 |     prettify i
 54 |     pStr ","
 55 |     prettify j
 56 |     pStr "}"
 57 | 
 58 | -- | An edge in an ATN.
 59 | data Edge s nt t =
 60 |     NTE nt              -- ^ Nonterminal edge
 61 |   | TE  t               -- ^ Terminal edge
 62 |   | PE  (Predicate ())  -- ^ Predicated edge with no state
 63 |   | ME  (Mutator   ())  -- ^ Mutator edge with no state
 64 |   | Epsilon             -- ^ Nondeterministic edge parsing nothing
 65 |   deriving (Eq, Generic, Hashable, Ord, Show)
 66 | 
 67 | instance (Prettify s, Prettify nt, Prettify t) => Prettify (Edge s nt t) where
 68 |   prettify x = do
 69 |     pStr "--"
 70 |     case x of
 71 |       NTE nt -> prettify nt
 72 |       TE   t -> prettify t
 73 |       PE   p -> prettify p
 74 |       ME   m -> prettify m
 75 |       Epsilon -> pStr "ε"
 76 |     pStr "-->"
 77 | 
 78 | -- | Convert a G4 grammar into an ATN for parsing with ALL(*)
 79 | atnOf
 80 |   :: forall nt t s dt. (Eq nt, Eq t, Hashable nt, Hashable t)
 81 |   => Grammar s nt t dt -> ATN s nt t
 82 | atnOf g = let
 83 | 
 84 |   _Δ :: Int -> Production s nt t dt -> [Transition s nt t]
 85 |   _Δ i (Production lhs rhs _) = let
 86 |   --(Prod _α)) = let
 87 | 
 88 |     -- Construct an internal production state from the given ATN identifier
 89 |     st :: nt -> Int -> Int -> ATNState nt
 90 |     st = Middle
 91 | 
 92 |     -- Create the transition for the k^th production element in the i^th
 93 |     -- production:
 94 |     _Δ' :: Int -> ProdElem nt t -> Transition s nt t
 95 |     _Δ' k (NT nt) = (st lhs i (k - 1), NTE nt, st lhs i k)
 96 |     _Δ' k (T  t)  = (st lhs i (k - 1), TE  t,  st lhs i k)
 97 | 
 98 |     -- The epsilon (or mu) transition for the accepting / final state:
 99 |     sϵ    = (Start lhs, Epsilon, Middle lhs i 0)
100 |     fϵ _α = (Middle lhs i (length _α), Epsilon, Accept lhs)
101 | 
102 |     sem_state _α = Middle lhs i (length _α + 1)
103 |     sϵ_sem _π _α = [(Start lhs, Epsilon, sem_state _α), (sem_state _α, PE _π, Middle lhs i 0)]
104 |     fϵ_sem       = fϵ
105 | 
106 |     sϵ_mut    = sϵ
107 |     fϵ_mut _μ = (Middle lhs i 0, ME _μ, Accept lhs)
108 | 
109 |     in  (case rhs of
110 |           (Prod Pass _α)        -> [sϵ, fϵ _α]                 ++ zipWith _Δ' [1..(length _α)] _α
111 |           (Prod (Sem _π) _α)    -> sϵ_sem _π _α ++ [fϵ_sem _α] ++ zipWith _Δ' [1..(length _α)] _α
112 |           (Prod (Action _μ) _)  -> [sϵ_mut, fϵ_mut _μ]
113 |         )
114 | 
115 |   in ATN
116 |     { _Δ = fromList $ concat $ zipWith _Δ [0..length (ps g)] $ ps g
117 |     }
118 | 


--------------------------------------------------------------------------------
/src/Text/ANTLR/Allstar/Stacks.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE ScopedTypeVariables, DeriveAnyClass, DeriveGeneric,
 2 |     OverloadedStrings #-}
 3 | {-|
 4 |   Module      : Text.ANTLR.Allstar.Stacks
 5 |   Description : Graph-structured stack (GSS) for the ALL(*) algorithm
 6 |   Copyright   : (c) Karl Cronburg, 2018
 7 |   License     : BSD3
 8 |   Maintainer  : karl@cs.tufts.edu
 9 |   Stability   : experimental
10 |   Portability : POSIX
11 | 
12 | -}
13 | module Text.ANTLR.Allstar.Stacks
14 |   ( Stacks(..)
15 |   , (#)
16 |   , merge
17 |   , push
18 |   , pop
19 |   ) where
20 | import qualified Prelude as P
21 | import Prelude hiding (map, foldr, filter)
22 | import Text.ANTLR.Set
23 |   ( union, Set(..), foldr, map, filter
24 |   , fromList, singleton, Hashable(..), Generic(..)
25 |   )
26 | import qualified Text.ANTLR.Set as Set
27 | import Data.List (nub)
28 | import Text.ANTLR.Pretty
29 | 
30 | -- | Graph-structured stack representation
31 | data Stacks a =
32 |     Empty
33 |   | Wildcard
34 |   | Stacks (Set [a])
35 |   deriving (Eq, Ord, Generic, Hashable, Show)
36 | 
37 | instance (Prettify a, Hashable a, Eq a) => Prettify (Stacks a) where
38 |   prettify Empty      = pStr "[]"
39 |   prettify Wildcard   = pStr "#"
40 |   prettify (Stacks s) = prettify s
41 | 
42 | -- | Represents the set of __all__ stacks
43 | (#) = Wildcard
44 | 
45 | -- | Combine two GSSs
46 | merge :: (Eq a, Hashable a) => Stacks a -> Stacks a -> Stacks a
47 | merge Wildcard _   = Wildcard
48 | merge _ Wildcard   = Wildcard
49 | merge Empty Empty  = Empty
50 | merge (Stacks _Γ)  Empty       = Stacks $ _Γ `union` fromList [[]]
51 | merge Empty       (Stacks _Γ)  = Stacks $ _Γ `union` fromList [[]]
52 | merge (Stacks _Γ) (Stacks _Γ') = Stacks $ _Γ `union` _Γ'
53 | 
54 | -- | Push a state onto all the leaves of the given GSS
55 | push :: (Eq a, Hashable a) => a -> Stacks a -> Stacks a
56 | push a Empty        = Stacks $ singleton [a]
57 | push a Wildcard     = Wildcard
58 | push a (Stacks _Γ)  = Stacks $ map ((:) a) _Γ
59 | 
60 | -- | Get heads of non-empty stacks / lists:
61 | heads :: (Eq a, Hashable a) => Set [a] -> [a]
62 | heads = let
63 |     heads' :: [a] -> [a] -> [a]
64 |     heads' []     bs = bs
65 |     heads' (a:as) bs = a:bs
66 |   in foldr heads' []
67 | 
68 | -- | Pop off all the current states from the given GSS
69 | pop  :: (Eq a, Hashable a) => Stacks a -> [(a, Stacks a)]
70 | pop Empty = []
71 | pop Wildcard = []
72 | pop (Stacks _Γ) = let
73 | --    ss :: a -> Stacks a
74 |     ss a = Stacks $ map tail $ filter (\as -> (not . null) as && ((== a) . head) as) _Γ
75 |   in P.map (\a -> (a, ss a)) (nub . heads $ _Γ)
76 | 
77 | 


--------------------------------------------------------------------------------
/src/Text/ANTLR/Common.hs:
--------------------------------------------------------------------------------
 1 | {-|
 2 |   Module      : Text.ANTLR.Common
 3 |   Description : Haskell-level helper functions used throughout Text.ANTLR
 4 |   Copyright   : (c) Karl Cronburg, 2018
 5 |   License     : BSD3
 6 |   Maintainer  : karl@cs.tufts.edu
 7 |   Stability   : experimental
 8 |   Portability : POSIX
 9 | 
10 | -}
11 | module Text.ANTLR.Common where
12 | 
13 | concatWith cs [] = []
14 | concatWith cs [x] = x
15 | concatWith cs (x:xs) = x ++ cs ++ concatWith cs xs
16 | 
17 | 


--------------------------------------------------------------------------------
/src/Text/ANTLR/Language.hs:
--------------------------------------------------------------------------------
 1 | {-|
 2 |   Module      : Text.ANTLR.Language
 3 |   Description : Viewing a language as a set of words accepted
 4 |   Copyright   : (c) Karl Cronburg, 2018
 5 |   License     : BSD3
 6 |   Maintainer  : karl@cs.tufts.edu
 7 |   Stability   : experimental
 8 |   Portability : POSIX
 9 | 
10 | -}
11 | module Text.ANTLR.Language
12 |   ( Alphabet(..), ascii, isASCII
13 |   ) where
14 | import Prelude hiding (Word)
15 | import Data.Set.Monad (Set(..))
16 | import qualified Data.Set.Monad as Set
17 | 
18 | import Data.Char
19 | 
20 | type Alphabet a = Set a
21 | 
22 | ascii :: Alphabet Char
23 | ascii     = Set.fromList $ map chr [0 .. 127]
24 | 
25 | isASCII :: Char -> Bool
26 | isASCII c = ord c < 127
27 | 
28 | type Word a = [a]
29 | 
30 | type Language a = Set (Word a) 
31 | 
32 | union :: (Ord a) => Set a -> Set a -> Set a
33 | union  = Set.union
34 | 
35 | concat :: (Ord a) => Language a -> Language a -> Language a
36 | concat a b = Set.fromList
37 |   [ s ++ t
38 |   | s <- Set.toList a
39 |   , t <- Set.toList b
40 |   ]
41 | 
42 | kleene = undefined
43 | 
44 | 


--------------------------------------------------------------------------------
/src/Text/ANTLR/Lex.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE FlexibleInstances, InstanceSigs, DeriveDataTypeable
 2 |     , ScopedTypeVariables #-}
 3 | {-|
 4 |   Module      : Text.ANTLR.Lex
 5 |   Description : Entrypoint for lexical and tokenization algorithms
 6 |   Copyright   : (c) Karl Cronburg, 2018
 7 |   License     : BSD3
 8 |   Maintainer  : karl@cs.tufts.edu
 9 |   Stability   : experimental
10 |   Portability : POSIX
11 | 
12 | -}
13 | module Text.ANTLR.Lex
14 |   ( tokenize
15 |   , Token(..)
16 |   , tokenName, tokenValue
17 |   ) where
18 | 
19 | import Text.ANTLR.Lex.Tokenizer
20 | 
21 | 


--------------------------------------------------------------------------------
/src/Text/ANTLR/Lex/Automata.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE ScopedTypeVariables, MonadComprehensions #-}
  2 | {-|
  3 |   Module      : Text.ANTLR.Automata
  4 |   Description : Automatons and algorithms as used during tokenization
  5 |   Copyright   : (c) Karl Cronburg, 2018
  6 |   License     : BSD3
  7 |   Maintainer  : karl@cs.tufts.edu
  8 |   Stability   : experimental
  9 |   Portability : POSIX
 10 | 
 11 | -}
 12 | module Text.ANTLR.Lex.Automata where
 13 | import Text.ANTLR.Set (Set(..), member, toList, union, notMember, Hashable(..), fromList)
 14 | import qualified Text.ANTLR.Set as Set
 15 | 
 16 | -- | An automaton with edges @e@, symbols @s@, and state indices @i@
 17 | data Automata e s i = Automata
 18 |   { _S :: Set i                  -- ^ Finite set of states.
 19 |   , _Σ :: Set s                  -- ^ Input (edge) alphabet
 20 |   , _Δ :: Set (Transition e i)   -- ^ Transition function
 21 |   , s0 :: i                      -- ^ Start state
 22 |   , _F :: Set i                  -- ^ Accepting states
 23 |   } deriving (Eq)
 24 | 
 25 | instance (Eq e, Eq s, Eq i, Hashable e, Hashable s, Hashable i, Show e, Show s, Show i) => Show (Automata e s i) where
 26 |   show (Automata s sigma delta s0 f) =
 27 |     show s
 28 |     ++ "\n  Σ:  " ++ show sigma
 29 |     ++ "\n  Δ:  " ++ show delta
 30 |     ++ "\n  s0: " ++ show s0
 31 |     ++ "\n  F:  " ++ show f
 32 |     ++ "\n"
 33 | 
 34 | -- | Edge label of an automaton, on which we traverse if we match
 35 | --   on one of the tokens @t@ in the set. The boolean is for negation
 36 | --   of the set.
 37 | type AutomataEdge t = (Bool, Set t)
 38 | 
 39 | -- | A triplet with an edge alphabet of @e@ and node states of @i@.
 40 | type Transition e i = (i, AutomataEdge e, i)
 41 | 
 42 | -- | The from-node component of a 'Transition'
 43 | tFrom :: Transition e i -> i
 44 | tFrom (a,b,c) = a
 45 | 
 46 | -- | The to-node component of a 'Transition'
 47 | tTo   :: Transition e i -> i
 48 | tTo (a,b,c) = c
 49 | 
 50 | -- | The set of edge characters in @e@ of a 'Transition'
 51 | tEdge :: Transition e i -> Set e
 52 | tEdge (a,(comp, b),c) = b
 53 | 
 54 | -- | Determine the edge-label alphabet of a set of transitions.
 55 | transitionAlphabet __Δ =
 56 |   [ e
 57 |   | (_, (c, es), _) <- toList __Δ
 58 |   , e               <- es
 59 |   ]
 60 | 
 61 | -- | Compress a set of transitions such that every pair of (start,end) states
 62 | --   appears at most once in the set.
 63 | compress ::
 64 |   (Eq i, Eq e, Hashable i, Hashable e)
 65 |   => Set (Transition e i) -> Set (Transition e i)
 66 | compress __Δ = fromList
 67 |   [ ( a, (c, fromList [ e
 68 |              | (a', (c', es'), b') <- toList __Δ
 69 |              , a' == a && b' == b && c' == c
 70 |              , e <- toList es'
 71 |              ])
 72 |     , b)
 73 |   | (a, (c, es), b) <- toList __Δ
 74 |   ]
 75 | 
 76 | -- | XOR helper function over booleans.
 77 | xor a b = (not a && b) || (not b && a)
 78 | 
 79 | -- | Is the given transition triplet (with a single @e@ character as the edge
 80 | --   edge label) in some set of transitions? Note that we need to handle complement
 81 | --   sets here, in case the given @e@ is in the complement of one of the
 82 | --   transitions in the set.
 83 | transitionMember ::
 84 |   (Eq i, Hashable e, Eq e)
 85 |   => (i, e, i) -> Set (Transition e i) -> Bool
 86 | transitionMember (a, e, b) _Δ =
 87 |   or
 88 |       [ xor complement (e `member` es)
 89 |       | (a', (complement, es), b') <- toList _Δ
 90 |       , a' == a
 91 |       , b' == b
 92 |       ]
 93 | 
 94 | -- | Is the given character @s@ accepted by the given edge label?
 95 | edgeMember s (complement, es) = xor complement (s `member` es)
 96 | 
 97 | -- | An automaton must either 'Accept' or 'Reject'.
 98 | data Result = Accept | Reject
 99 | 
100 | -- | Is the start state valid?
101 | validStartState nfa = s0 nfa `member` _S nfa
102 | 
103 | -- | Are all of the ending states valid?
104 | validFinalStates nfa = and [s `member` _S nfa | s <- toList $ _F nfa]
105 | 
106 | -- | Can all of the nodes as defined by the set of transitions be found
107 | --   in the set of allowable states '_S'?
108 | validTransitions ::
109 |   forall e s i. (Hashable e, Hashable i, Eq e, Eq i)
110 |   => Automata e s i -> Bool
111 | validTransitions nfa = let
112 |     vT :: [Transition e i] -> Bool
113 |     vT [] = True
114 |     vT ((s1, es, s2):rest) =
115 |          s1 `member` _S nfa
116 |       && s2 `member` _S nfa
117 |       && vT rest
118 |   in vT $ (toList . _Δ) nfa
119 | 
120 | -- | An automaton configuration is the set of state (indices) your
121 | --   can currently be in.
122 | type Config i = Set i
123 | 
124 | -- | Generic closure function so that *someone* never asks "what's a closure?" ever
125 | --   again. For an epsilon-closure the given @fncn@ needs to return 'True' when
126 | --   given an @e@ that is an epsilon, and 'False' in all other cases.
127 | closureWith
128 |   :: forall e s i. (Hashable e, Hashable i, Eq e, Eq i)
129 |   => (e -> Bool) -> Automata e s i -> Config i -> Config i
130 | closureWith fncn Automata{_S = _S, _Δ = _Δ'} states = let
131 | 
132 |     -- Check which edges are "epsilons" (or something else).
133 |     _Δ = Set.map (\(a,(comp, b),c) -> (a, (comp, Set.map fncn b), c)) _Δ'
134 | 
135 |     cl :: Config i -> Config i -> Config i
136 |     cl busy ss
137 |       | Set.null ss = Set.empty
138 |       | otherwise = let
139 |           ret = fromList
140 |                 [ s'  | s  <- toList ss
141 |                       , s' <- toList _S
142 |                       , s' `notMember` busy
143 |                       , (s, True, s') `transitionMember` _Δ ]
144 |         in ret `union` cl (ret `union` busy) ret
145 |   in states `union` cl Set.empty states
146 |   --in Set.foldr (\a b -> union (cl a) b) Set.empty states
147 | 
148 | -- | Consume the @e@ character given, based on the fact that we are currently
149 | --   in some 'Config i' of states, resulting in a new config consisting of the
150 | --   states that we can get to by doing so.
151 | move
152 |   :: forall e s i. (Hashable e, Hashable i, Eq i, Eq e)
153 |   => Automata e s i -> Config i -> e -> Config i
154 | move Automata{_S = _S, _Δ = _Δ} _T a = fromList
155 |   [ s'  | s  <- toList _T
156 |         , s' <- toList _S
157 |         , (s, a, s') `transitionMember` _Δ ]
158 | 
159 | -- | Whether or not (a, (True, _), b) is a transition in our set of transitions.
160 | complementMember
161 |   :: (Hashable i, Eq i, Hashable e, Eq e)
162 |   => (i, i) -> Set (Transition e i) -> Bool
163 | complementMember (a, b) =
164 |   not . null . Set.filter (\(a', (c, _), b') -> a' == a && b' == b && c)
165 | 
166 | -- | Set of states you can move to if you see a character not in the alphabet.
167 | moveComplement
168 |   :: forall e s i. (Hashable e, Hashable i, Eq i, Eq e)
169 |   => Automata e s i -> Config i -> Config i
170 | moveComplement Automata{_S = _S, _Δ = _Δ} _T = fromList
171 |   [ s'  | s  <- toList _T
172 |         , s' <- toList _S
173 |         , (s, s') `complementMember` _Δ ]
174 | 
175 | 


--------------------------------------------------------------------------------
/src/Text/ANTLR/Lex/DFA.hs:
--------------------------------------------------------------------------------
 1 | {-|
 2 |   Module      : Text.ANTLR.Lex.DFA
 3 |   Description : Deterministic finite automaton types
 4 |   Copyright   : (c) Karl Cronburg, 2018
 5 |   License     : BSD3
 6 |   Maintainer  : karl@cs.tufts.edu
 7 |   Stability   : experimental
 8 |   Portability : POSIX
 9 | 
10 | -}
11 | module Text.ANTLR.Lex.DFA where
12 | import Text.ANTLR.Lex.Automata
13 | 
14 | -- | DFA edges are just the symbols of our alphabet.
15 | type Edge s = s
16 | 
17 | -- | DFA states are just some Eq-able value, likely integers @i@
18 | type State i = i
19 | 
20 | -- | A DFA is an automata with edges labeled by symbols @s@ and nodes representing
21 | --   states labeled by some type @i@.
22 | type DFA s i = Automata (Edge s) s (State i)
23 | 
24 | 


--------------------------------------------------------------------------------
/src/Text/ANTLR/Lex/NFA.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE ScopedTypeVariables, MonadComprehensions, DeriveAnyClass,
  2 |              DeriveGeneric #-}
  3 | {-|
  4 |   Module      : Text.ANTLR.Lex.NFA
  5 |   Description : Nondeterministic finite automatons and algorithms to compute DFAs
  6 |   Copyright   : (c) Karl Cronburg, 2018
  7 |   License     : BSD3
  8 |   Maintainer  : karl@cs.tufts.edu
  9 |   Stability   : experimental
 10 |   Portability : POSIX
 11 | 
 12 | -}
 13 | module Text.ANTLR.Lex.NFA where
 14 | import Text.ANTLR.Lex.Automata
 15 | import Text.ANTLR.Lex.DFA (DFA(..))
 16 | import qualified Text.ANTLR.Lex.DFA as DFA
 17 | 
 18 | import Text.ANTLR.Set (singleton, notMember, union, Set(..), member, Hashable)
 19 | import qualified Text.ANTLR.Set as Set
 20 | import Text.ANTLR.Set (fromList, toList)
 21 | 
 22 | import Data.List (maximumBy)
 23 | import GHC.Generics (Generic)
 24 | 
 25 | -- | NFA edges can be labeled with either a symbol in symbol alphabet @s@,
 26 | --   or an epsilon.
 27 | data Edge s = Edge s | NFAEpsilon
 28 |   deriving (Ord, Eq, Hashable, Generic)
 29 | 
 30 | instance (Show s) => Show (Edge s) where
 31 |   show NFAEpsilon = "ϵ"
 32 |   show (Edge s)   = "E(" ++ show s ++ ")"
 33 | 
 34 | -- | Is this an edge (not an epsilon)?
 35 | isEdge :: Edge s -> Bool
 36 | isEdge (Edge _) = True
 37 | isEdge _        = False
 38 | 
 39 | -- | An NFA is an automata with edges @'Edge' s@ and nodes @i@.
 40 | type NFA s i = Automata (Edge s) s i
 41 | 
 42 | -- | NFA states
 43 | type State i = i
 44 | 
 45 | -- | DFA states as constructed from an NFA is a set (config) of NFA states.
 46 | type DFAState i = Config (State i)
 47 | 
 48 | -- | Epsilon closure of an NFA is a closure where we can traverse epsilons.
 49 | epsClosure ::
 50 |   (Ord i, Hashable i, Hashable s, Eq s)
 51 |   => Automata (Edge s) s i -> Config i -> Config i
 52 | epsClosure = closureWith (NFAEpsilon ==)
 53 | 
 54 | -- | Subset construction algorithm for constructing a DFA from an NFA.
 55 | nfa2dfa_slow :: forall s i. (Hashable s, Eq s, Hashable i, Eq i, Ord i)
 56 |   => NFA s i -> DFA s (Set (State i))
 57 | nfa2dfa_slow nfa@Automata{s0 = s0, _Σ = _Σ, _F = _F0} = let
 58 |     
 59 |     epsCl = epsClosure nfa
 60 |     mv    = move nfa
 61 | 
 62 |     dS :: Config (DFAState i) -> Config (DFAState i) -> Set (Transition (DFA.Edge s) (DFAState i))
 63 |     dS marked ts
 64 |       | Set.null ts = Set.empty
 65 |       | otherwise = let
 66 |         
 67 |           _Δ  = fromList
 68 |                 [ (_T, (False, singleton a), epsCl (mv _T (Edge a)))
 69 |                 | _T <- toList ts
 70 |                 , _T `notMember` marked
 71 |                 , a  <- toList _Σ
 72 |                 ]
 73 | 
 74 |           _Us = Set.map (\(a,b,c) -> c) _Δ
 75 |           fromStates = Set.map (\(a,b,c) -> a) _Δ
 76 | 
 77 |         in _Δ `union` dS (fromStates `union` marked) _Us
 78 |     
 79 |     _Δ' :: Set (Transition (DFA.Edge s) (DFAState i))
 80 |     _Δ' = dS Set.empty (singleton s0')
 81 | 
 82 |     s0' = epsCl $ singleton s0
 83 | 
 84 |   in Automata
 85 |       { _S = fromList [ tFrom x | x <- toList _Δ' ] `union` fromList [ tTo x | x <- toList _Δ' ]
 86 |       , _Σ = _Σ
 87 |       , _Δ = _Δ'
 88 |       , s0 = s0'
 89 |       , _F = fromList [nfaState | (_,_,nfaState) <- toList _Δ', c <- toList nfaState, c `member` _F0]
 90 |       }
 91 | 
 92 | -- | Subset construction but where we compress our sets of transitions along the way.
 93 | nfa2dfa :: forall s i. (Hashable s, Eq s, Hashable i, Eq i, Ord i)
 94 |   => NFA s i -> DFA s (Set (State i))
 95 | nfa2dfa nfa@Automata{s0 = s0, _Σ = _Σ, _S = _S, _F = _F0} = let
 96 |     
 97 |     epsCl = epsClosure nfa
 98 |     mv    = move nfa
 99 | 
100 |     dS :: Config (DFAState i) -> Config (DFAState i) -> Set (Transition (DFA.Edge s) (DFAState i))
101 |     dS marked ts
102 |       | Set.null ts = Set.empty
103 |       | otherwise = let
104 |         
105 |           _Δ  =
106 |             Set.fromList
107 |                 [ (_T, (False, singleton a), epsCl (mv _T (Edge a)))
108 |                 | _T <- Set.toList ts
109 |                 , _T `notMember` marked
110 |                 , a  <- Set.toList _Σ
111 |                 ]
112 |             `union`
113 |             Set.fromList
114 |                 [ (_T, (True, _Σ), epsCl $ moveComplement nfa _T)
115 |                 | _T <- Set.toList ts
116 |                 , _T `notMember` marked
117 |                 ]
118 | 
119 |           _Us = fromList [ c | (a,b,c) <- toList _Δ ]
120 |           fromStates = fromList [ a | (a,b,c) <- toList _Δ ]
121 | 
122 |         in _Δ `union` dS (fromStates `union` marked) _Us
123 |     
124 |     _Δ' :: Set (Transition (DFA.Edge s) (DFAState i))
125 |     _Δ' = let run_dS = dS Set.empty (singleton s0')
126 |           in  Set.filter (\(_, _, b) -> not $ Set.null b) $ compress run_dS
127 | 
128 |     s0' = epsCl $ singleton s0
129 | 
130 |   in Automata
131 |       { _S = fromList [ tFrom x | x <- toList _Δ' ] `union` fromList [ tTo x | x <- toList _Δ' ]
132 |       , _Σ = _Σ
133 |       , _Δ = _Δ'
134 |       , s0 = s0'
135 |       , _F = fromList [nfaState | (_,_,nfaState) <- toList _Δ', c <- toList nfaState, c `member` _F0]
136 |       }
137 | 
138 | -- | Compute all the states statically used in a particular set of transitions.
139 | allStates :: forall s i. (Hashable i, Eq i) => Set (Transition (Edge s) i) -> Set (State i)
140 | allStates ts = fromList [ n | (n, _, _) <- toList ts ] `union` fromList [ n | (_, _, n) <- toList ts ]
141 | 
142 | -- | Converts the given list of transitions into a complete NFA / Automata
143 | --   structure, assuming two things:
144 | --
145 | -- > The first node of the first edge is the start state
146 | -- > The last  node of the last  edge is the (only) final state
147 | --
148 | list2nfa :: forall s i. (Hashable i, Eq i, Hashable s, Eq s) => [Transition (Edge s) i] -> NFA s i
149 | list2nfa [] = undefined
150 | list2nfa ((t@(n1,_,_)):ts) = Automata
151 |   { _S = allStates $ Set.fromList (t:ts)
152 |   , _Σ = Set.fromList [ e
153 |           | (_, es, _) <- t:ts
154 |           , Edge e     <- filter isEdge (Set.toList $ snd es)
155 |           ]
156 |   , s0 = n1
157 |   , _F = Set.fromList [ (\(_,_,c) -> c) $ last (t:ts) ]
158 |   , _Δ = Set.fromList $ t:ts
159 |   }
160 | 
161 | -- | Rename the states in the second NFA such that they start at the index
162 | --   one greater than the maximum index of the first NFA.
163 | shiftAllStates ::
164 |   forall s i. (Hashable i, Eq i, Ord i, Hashable s, Eq s)
165 |   => (i -> Int) -> (Int -> i) -> NFA s i -> NFA s i -> NFA s i
166 | shiftAllStates from to
167 |   n1 (n2@Automata{_Δ = _Δ2, _S = _S2, _F = _F2, s0 = s2_0})
168 |   = n2 { _Δ = fromList [ (to $ from i0 + shift, e, to $ from i1 + shift) | (i0, e, i1) <- toList _Δ2 ]
169 |        , _S = fromList [ to $ from i + shift | i <- toList _S2 ]
170 |        , _F = fromList [ to $ from i + shift | i <- toList _F2 ]
171 |        , s0 = to $ from s2_0 + shift
172 |        }
173 |   where
174 |     shift = 1 + foldr (\(i0, _, i1) i -> from $ maximum [to i, i0, i1]) 0 (_Δ n1)
175 | 
176 | -- | Take the union of two NFAs, renaming states according to 'shiftAllStates'.
177 | nfaUnion ::
178 |   forall s i. (Ord i, Hashable i, Eq i, Hashable s, Eq s)
179 |   => (i -> Int) -> (Int -> i) -> NFA s i -> NFA s i -> NFA s i
180 | nfaUnion from to
181 |   (n1@Automata{_Δ = _Δ1, _S = _S1, _F = _F1, s0 = s1_0}) n2
182 |   = let
183 | 
184 |     Automata{_Δ = _Δ2, _S = _S2, _F = _F2, s0 = s2_0} = shiftAllStates from to n1 n2
185 |     mx2 = 1 + foldr (\(i0, _, i1) i -> from $ maximum [to i, i0, i1]) 0 _Δ2
186 | 
187 |     _Δ' =     _Δ1
188 |       `union` _Δ2
189 |       `union` Set.singleton (s0', (False, singleton NFAEpsilon), s1_0)
190 |       `union` Set.singleton (s0', (False, singleton NFAEpsilon), s2_0)
191 |       `union` fromList [ (f1_0, (False, singleton NFAEpsilon), f0') | f1_0 <- toList _F1 ]
192 |       `union` fromList [ (f2_0, (False, singleton NFAEpsilon), f0') | f2_0 <- toList _F2 ]
193 | 
194 |     s0' = to mx2
195 |     f0' = to $ mx2 + 1
196 | 
197 |   in Automata
198 |     { _S = allStates _Δ'
199 |     , _Σ =  fromList [ e
200 |             | (_, es, _)  <- toList _Δ'
201 |             , Edge e      <- toList $ Set.filter isEdge $ snd es
202 |             ]
203 |     , s0 = s0'
204 |     , _F = Set.fromList [f0']
205 |     , _Δ = _Δ'
206 |     }
207 | 
208 | -- | Concatenate two NFAs, renaming states in the second NFA according to 'shiftAllStates'.
209 | nfaConcat ::
210 |   forall s i. (Hashable i, Eq i, Ord i, Hashable s, Eq s) => (i -> Int) -> (Int -> i) -> NFA s i -> NFA s i -> NFA s i
211 | nfaConcat from to
212 |   (n1@Automata{_Δ = _Δ1, _S = _S1, _F = _F1, s0 = s1_0}) n2
213 |   = let
214 |     Automata{_Δ = _Δ2, _S = _S2, _F = _F2, s0 = s2_0} = shiftAllStates from to n1 n2
215 |     
216 |     _Δ' =     _Δ1
217 |       `union` _Δ2
218 |       `union` fromList [ (f1_0, (False, singleton NFAEpsilon), s2_0) | f1_0 <- toList _F1 ]
219 |   
220 |   in Automata
221 |     { _S = allStates _Δ'
222 |     , _Σ =  fromList [ e
223 |             | (_, es, _)  <- toList _Δ'
224 |             , Edge e      <- toList $ Set.filter isEdge $ snd es
225 |             ]
226 |     , s0 = s1_0
227 |     , _F = _F2
228 |     , _Δ = _Δ'
229 |     }
230 | 
231 | -- | Take the Kleene-star of an NFA, adding epsilons as needed.
232 | nfaKleene :: forall s i. (Ord i, Hashable i, Eq i, Hashable s, Eq s) => (i -> Int) -> (Int -> i) -> NFA s i -> NFA s i
233 | nfaKleene from to 
234 |   (n1@Automata{_Δ = _Δ1, _S = _S1, _F = _F1, s0 = s1_0})
235 |   = let
236 |     mx1 = 1 + foldr (\(i0, _, i1) i -> from $ maximum [to i, i0, i1]) 0 _Δ1
237 | 
238 |     s0' = to mx1
239 |     f0' = to $ mx1 + 1
240 | 
241 |     _Δ' =     _Δ1
242 |       `union` Set.singleton (s0', (False, singleton NFAEpsilon), s1_0)
243 |       `union` Set.singleton (s0', (False, singleton NFAEpsilon), f0')
244 |       `union` fromList [ (f1_0, (False, singleton NFAEpsilon), s1_0) | f1_0 <- toList _F1 ]
245 |       `union` fromList [ (f1_0, (False, singleton NFAEpsilon), f0')  | f1_0 <- toList _F1 ]
246 | 
247 |   in Automata
248 |     { _S = allStates _Δ'
249 |     , _Σ =  fromList [ e
250 |             | (_, es, _)  <- toList _Δ'
251 |             , Edge e      <- toList $ Set.filter isEdge $ snd es
252 |             ]
253 |     , s0 = s0'
254 |     , _F = Set.fromList [f0']
255 |     , _Δ = _Δ'
256 |     }
257 | 
258 | 


--------------------------------------------------------------------------------
/src/Text/ANTLR/Lex/Regex.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE ScopedTypeVariables, DeriveLift #-}
 2 | {-|
 3 |   Module      : Text.ANTLR.Lex.Regex
 4 |   Description : Regular expressions as used during tokenization
 5 |   Copyright   : (c) Karl Cronburg, 2018
 6 |   License     : BSD3
 7 |   Maintainer  : karl@cs.tufts.edu
 8 |   Stability   : experimental
 9 |   Portability : POSIX
10 | 
11 | -}
12 | module Text.ANTLR.Lex.Regex where
13 | 
14 | import Text.ANTLR.Set (Hashable, singleton, fromList)
15 | import Text.ANTLR.Lex.NFA
16 | import qualified Text.ANTLR.Lex.DFA as DFA
17 | import Language.Haskell.TH.Syntax (Lift(..))
18 | 
19 | -- | Regular expression data representation as used by the tokenizer.
20 | data Regex s =
21 |     Epsilon                         -- ^ Regex accepting the empty string
22 |   | Symbol     s                    -- ^ An individual symbol in the alphabet
23 |   | Literal    [s]                  -- ^ A literal sequence of symbols (concatenated together)
24 |   | Class      [s]                  -- ^ A set of alternative symbols (unioned together)
25 |   | Union      (Regex s) (Regex s)  -- ^ Union of two arbitrary regular expressions
26 |   | Concat     [Regex s]            -- ^ Concatenation of 2 or more regular expressions
27 |   | Kleene     (Regex s)            -- ^ Kleene closure of a regex
28 |   | PosClos    (Regex s)            -- ^ Positive closure
29 |   | Question   (Regex s)            -- ^ 0 or 1 instances
30 |   | MultiUnion [Regex s]            -- ^ Union of two or more arbitrary regexs
31 |   | NotClass   [s]                  -- ^ Complement of a character class
32 |   deriving (Lift)
33 | 
34 | instance (Show s) => Show (Regex s) where
35 |   show Epsilon       = "ϵ"
36 |   show (Symbol s)    = show s
37 |   show (Literal s)   = show s
38 |   show (Class s)     = "[" ++ show s ++ "]"
39 |   show (Union r1 r2) = "(" ++ show r1 ++ "|" ++ show r2 ++ ")"
40 |   show (Concat rs)   = concatMap show rs
41 |   show (Kleene r)    = "(" ++ show r ++ ")*"
42 |   show (PosClos r)   = "(" ++ show r ++ ")+"
43 |   show (Question r)  = "(" ++ show r ++ ")?"
44 |   show (MultiUnion rs) = tail $ concatMap (\r -> "|" ++ show r) rs
45 |   show (NotClass rs)   = "[^" ++ tail (concatMap show rs) ++ "]"
46 | 
47 | -- | Translation code of a regular expresion to an NFA.
48 | regex2nfa' ::
49 |   forall s i. (Hashable i, Ord i, Hashable s, Eq s)
50 |   => (i -> Int) -> (Int -> i) -> Regex s -> NFA s i
51 | regex2nfa' from to r = let
52 |     r2n :: Regex s -> NFA s i
53 |     r2n Epsilon         = list2nfa [ (to 0, (False, singleton   NFAEpsilon), to 1) ]
54 |     r2n (Symbol s)      = list2nfa [ (to 0, (False, singleton $ Edge s),     to 1) ]
55 |     r2n (Union r1 r2)   = nfaUnion   from to (r2n r1) (r2n r2)
56 |     r2n (Concat [])     = r2n Epsilon -- TODO: empty concat 
57 |     r2n (Concat (r:rs)) = foldl (nfaConcat  from to) (r2n r) (map r2n rs)
58 |     r2n (Kleene r1)     = nfaKleene  from to (r2n r1)
59 |     r2n (PosClos r1)    = r2n $ Concat [r1, Kleene r1]
60 |     r2n (Question r1)   = nfaUnion from to (r2n r1) (r2n Epsilon)
61 |     r2n (Class [])      = r2n Epsilon -- TODO: empty character class shouldn't accept empty string?
62 |     r2n (Class (s:ss))  = list2nfa [ (to 0, (False, fromList $ map Edge $ s:ss), to 1) ] --r2n $ foldl Union (Symbol s) (map Symbol ss)
63 |     r2n (MultiUnion []) = r2n Epsilon
64 |     r2n (MultiUnion (r:rs)) = r2n $ foldl Union r rs
65 |     r2n (Literal ss)    = list2nfa $ map (\(s,i) -> (to i, (False, singleton $ Edge s), to $ i + 1)) (zip ss [0..length ss - 1])
66 |     r2n (NotClass [])     = list2nfa $ [ (to 0, (True, fromList []), to 1) ] -- Not nothing = everything
67 |     r2n (NotClass (s:ss)) = list2nfa $ [ (to 0, (True, fromList $ map Edge $ s:ss), to 1) ]
68 |   in r2n r 
69 | 
70 | -- | Entrypoint for translating a regular expression into an 'NFA' with integer indices.
71 | regex2nfa :: (Hashable s, Ord s) => Regex s -> NFA s Int
72 | regex2nfa = regex2nfa' id id
73 | 
74 | -- | Entrypoint for translating a regular expression into a 'DFA.DFA' with integer indices.
75 | regex2dfa :: (Hashable s, Ord s) => Regex s -> DFA.DFA s (DFAState Int)
76 | regex2dfa = nfa2dfa . regex2nfa
77 | 
78 | 


--------------------------------------------------------------------------------
/src/Text/ANTLR/Lex/Tokenizer.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE ScopedTypeVariables, DeriveGeneric, DeriveAnyClass
  2 |   , OverloadedStrings #-}
  3 | {-|
  4 |   Module      : Text.ANTLR.Lex.Tokenizer
  5 |   Description : Tokenization algorithms
  6 |   Copyright   : (c) Karl Cronburg, 2018
  7 |   License     : BSD3
  8 |   Maintainer  : karl@cs.tufts.edu
  9 |   Stability   : experimental
 10 |   Portability : POSIX
 11 | 
 12 | -}
 13 | module Text.ANTLR.Lex.Tokenizer where
 14 | import Text.ANTLR.Lex.Automata
 15 | import Text.ANTLR.Lex.DFA
 16 | 
 17 | import qualified Text.ANTLR.Set as Set
 18 | import Text.ANTLR.Set (Hashable, member, Generic(..), Set(..))
 19 | 
 20 | import Text.ANTLR.Pretty
 21 | import qualified Debug.Trace as D
 22 | import Data.List (find)
 23 | import qualified Data.Text as T
 24 | 
 25 | -- | Token with names @n@, values @v@, and number of input symbols consumed to match
 26 | --   it.
 27 | data Token n v =
 28 |     Token n v Int  -- ^ Tokenized a token
 29 |   | EOF            -- ^ The end-of-file token
 30 |   | Error T.Text   -- ^ Error encountered while tokenizing
 31 |   deriving (Show, Ord, Generic, Hashable)
 32 | 
 33 | instance (Prettify n, Prettify v) => Prettify (Token n v) where
 34 |   prettify EOF = pStr "EOF"
 35 |   prettify (Error s) = pStr "Token Error: " >> pStr s
 36 |   prettify (Token n v i) =
 37 |     prettify v
 38 | 
 39 | instance Eq n => Eq (Token n v) where
 40 |   Token s _ _ == Token s1 _ _ = s == s1
 41 |   EOF         == EOF          = True
 42 |   Error s     == Error s1     = s == s1
 43 |   _           == _            = False
 44 | 
 45 | -- | Token Names are Input Symbols to the parser.
 46 | tokenName :: Token n v -> n
 47 | tokenName (Token n v _) = n
 48 | 
 49 | -- | Get the value of a token, ignoring its name.
 50 | tokenValue :: Token n v -> v
 51 | tokenValue (Token n v _) = v
 52 | 
 53 | -- | Get the number of characters from the input that this token matched on.
 54 | tokenSize :: Token n v -> Int
 55 | tokenSize (Token _ _ i) = i
 56 | tokenSize EOF = 0
 57 | 
 58 | -- | A Lexeme is a sequence of zero or more (matched) input symbols
 59 | type Lexeme s = [s]
 60 | 
 61 | -- | A named DFA over symbols @s@, indices @i@, and names @n@.
 62 | type NDFA s i n = (n, DFA s i)
 63 | 
 64 | -- | Entrypoint for tokenizing an input stream given a list of named DFAs that
 65 | --   we can match on.
 66 | --   
 67 | --   > @dfaTuples@: converts from DFAs to the names associated with them in
 68 | --     the specification of the lexer.
 69 | --
 70 | --   > @fncn@: function for constructing the value of a token from the lexeme
 71 | --     matched (e.g. @varName@) and the associated token name (e.g. @id@)
 72 | --
 73 | tokenize ::
 74 |   forall s i n v. (Eq i, Ord s, Eq s, Show s, Show i, Show n, Show v, Hashable i, Hashable s)
 75 |   => [(n, DFA s i)]       -- ^ Association list of named DFAs.
 76 |   -> (Lexeme s -> n -> v) -- ^ Constructs the value of a token from lexeme matched.
 77 |   -> [s]                  -- ^ The input string.
 78 |   -> [Token n v]          -- ^ The tokenized tokens.
 79 | tokenize dfaTuples fncn input0 = let
 80 | 
 81 |     dfas0 = map snd dfaTuples
 82 | 
 83 |     allTok :: [(NDFA s i n, State i)] -> [s] -> [Token n v]
 84 |     allTok dfaSims0 currInput = let
 85 |         oneTok :: [(NDFA s i n, State i)] -> [s] -> Maybe (Lexeme s, NDFA s i n)
 86 |         oneTok dfaSims []     = Nothing
 87 |         oneTok []      ss     = Nothing
 88 |         oneTok dfaSims (s:ss) = let
 89 |             dfaSims' =
 90 |               [ ((n, dfa), stop)
 91 |               | ((n, dfa), cursor)     <- dfaSims
 92 |               , (start, es, stop) <- Set.toList $ _Δ dfa
 93 |               , start == cursor && s `edgeMember` es ]
 94 | 
 95 |             accepting = [ (n,dfa) | ((n, dfa), cursor) <- dfaSims', cursor `member` _F dfa ]
 96 | 
 97 |           in (case (oneTok dfaSims' ss, accepting) of
 98 |               (Nothing, [])   -> Nothing
 99 |               (Nothing, d:ds) -> Just ([s], d)
100 |               (Just (l,d), _) -> Just (s:l, d))
101 |       in case (currInput, oneTok dfaSims0 currInput) of
102 |           ([], _)       -> [EOF]
103 |           (ss, Nothing) -> [Error $ T.pack $ show ss]
104 |           (ss, Just (l, (name,d))) ->
105 |             Token name (fncn l name) (length l)
106 |             : allTok dfaSims0 (drop (length l) currInput)
107 |   in allTok (zip dfaTuples (map s0 dfas0)) input0
108 | 
109 | -- | Incremental tokenizer takes in the same list of DFAs and AST value
110 | --   constructor function, but instead returns an incremental tokenizer function
111 | --   that expects a set of names that we currently expect to tokenize on,
112 | --   the current input stream, and returns a single tokenized token along
113 | --   with the modified input stream to iteratively call 'tokenizeInc' on.
114 | tokenizeInc
115 |   :: forall s i n v. (Eq i, Ord s, Eq n, Eq s, Show s, Show i, Show n, Show v, Hashable i, Hashable s, Hashable n)
116 |   => (n -> Bool)                         -- ^ Function that returns True on DFA names we wish to filter __out__ of the results.
117 |   -> [(n, DFA s i)]                      -- ^ Closure over association list of named DFAs.
118 |   -> (Lexeme s -> n -> v)                -- ^ Token value constructor from lexemes.
119 |   -> (Set n -> [s] -> (Token n v, [s]))  -- ^ The incremental tokenizer closure.
120 | tokenizeInc filterF dfaTuples fncn = let
121 | 
122 |     tI :: Set n -> [s] -> (Token n v, [s])
123 |     tI ns input = let
124 |         
125 |         dfaTuples'  = filter (\(n,_) -> n `Set.member` ns || filterF n) dfaTuples
126 |         tokenized   = tokenize dfaTuples' fncn input
127 |         
128 |         filterF' (Token n _ _) = filterF n
129 |         filterF' _             = False
130 |         
131 |         ignored     = takeWhile filterF' tokenized
132 |         nextTokens  = dropWhile filterF' tokenized
133 |         -- Yayy lazy function evaluation.
134 |         next = case nextTokens of
135 |                 []     -> EOF
136 |                 (t:_)  -> t --D.traceShowId t
137 |       
138 |       in (next, drop (sum $ map tokenSize $ next : ignored) input)
139 |   in tI
140 | 
141 | tokenizeIncAll
142 |   :: forall s i n v. (Eq i, Ord s, Eq n, Eq s, Show s, Show i, Show n, Show v, Hashable i, Hashable s, Hashable n)
143 |   => (n -> Bool)                         -- ^ Function that returns True on DFA names we wish to filter __out__ of the results.
144 |   -> [(n, DFA s i)]
145 |   -> (Lexeme s -> n -> v)
146 |   -> (Set n -> [s] -> [(Token n v, [s])])
147 | tokenizeIncAll filterF dfaTuples fncn = let
148 |     
149 |     tI :: Set n -> [s] -> [(Token n v, [s])]
150 |     tI ns input = let
151 |         
152 |         dfaTuples'  = filter (\(n,_) -> n `Set.member` ns || filterF n) dfaTuples
153 |         tokenized   = tokenize dfaTuples' fncn input
154 |         
155 |         filterF' (Token n _ _) = filterF n
156 |         filterF' _             = False
157 |         
158 |         ignored     = takeWhile filterF' tokenized
159 |         nextTokens  = dropWhile filterF' tokenized
160 |         -- Yayy lazy function evaluation.
161 |         next = case nextTokens of
162 |                 []     -> EOF
163 |                 (t:_)  -> t --D.traceShowId t
164 |       
165 |         input' = drop (sum $ map tokenSize $ next : ignored) input
166 |       
167 |         ns' = case next of
168 |                 Token n _ _ -> n `Set.delete` ns
169 |                 _ -> Set.empty
170 | 
171 |       in (next, input') : tI ns' input'
172 |   in tI
173 | 
174 | 


--------------------------------------------------------------------------------
/src/Text/ANTLR/MultiMap.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveGeneric, DeriveAnyClass, MonadComprehensions, DeriveLift,
 2 |       DeriveDataTypeable #-}
 3 | {-|
 4 |   Module      : Text.ANTLR.MultiMap
 5 |   Description : A one-to-many key value map
 6 |   Copyright   : (c) Karl Cronburg, 2018
 7 |   License     : BSD3
 8 |   Maintainer  : karl@cs.tufts.edu
 9 |   Stability   : experimental
10 |   Portability : POSIX
11 | 
12 | -}
13 | module Text.ANTLR.MultiMap where
14 | import qualified Data.Map.Strict as M
15 | import Data.Maybe (fromMaybe)
16 | import Text.ANTLR.Set (Generic(..), Hashable(..), Set(..))
17 | import qualified Text.ANTLR.Set as S
18 | import Prelude hiding (lookup)
19 | import Text.ANTLR.Pretty
20 | 
21 | import Data.Data (Data(..))
22 | import Language.Haskell.TH.Syntax (Lift(..))
23 | 
24 | instance (Lift k, Lift v, Data k, Data v, Ord k, Ord v) => Lift (M.Map k v)
25 | 
26 | -- | A multi 'Map' is a mapping from keys @k@ to sets of values @v@. A nice
27 | --   invariant to maintain while using a multi-map is to never have empty
28 | --   sets mapped to by some key.
29 | newtype Map k v = Map (M.Map k (Set v))
30 |   deriving (Generic, Hashable, Eq, Show, Lift)
31 | 
32 | instance (Prettify k, Prettify v, Hashable v, Eq v) => Prettify (Map k v) where
33 |   prettify (Map m) = prettify m
34 | 
35 | -- | The singleton multimap, given a single key and a __single__ value.
36 | singleton :: (Hashable v, Eq v) => k -> v -> Map k v
37 | singleton k v = Map (M.singleton k (S.singleton v))
38 | 
39 | -- | Construct a multi 'Map' from a list of key-value pairs.
40 | fromList :: (Hashable v, Ord k, Eq k, Eq v) => [(k, v)] -> Map k v
41 | fromList kvs = Map (M.fromList
42 |   [ (k1, S.fromList [v2 | (k2, v2) <- kvs, k1 == k2])
43 |   | (k1, _) <- kvs])
44 | 
45 | -- | Same as 'fromList' but where the values in the key-value tuples are already in sets.
46 | fromList' :: (Ord k, Eq k, Hashable v, Eq v) => [(k, Set v)] -> Map k v
47 | fromList' kvs = fromList [(k, v) | (k, vs) <- kvs, v <- S.toList vs]
48 | 
49 | -- | Inverse of 'fromList\''.
50 | toList :: Map k v -> [(k, Set v)]
51 | toList (Map m) = M.toList m
52 | 
53 | -- | Take the union of two maps.
54 | union :: (Ord k, Eq k, Hashable v, Eq v) => Map k v -> Map k v -> Map k v
55 | union m1 m2 = fromList' (toList m1 ++ toList m2)
56 | 
57 | -- | The empty multi-map.
58 | empty :: Map k v
59 | empty = Map M.empty
60 | 
61 | -- | Get the set of values mapped to by some key @k@.
62 | lookup :: (Ord k, Hashable v, Eq v) => k -> Map k v -> Set v
63 | lookup k (Map m) = fromMaybe S.empty (M.lookup k m)
64 | 
65 | -- | Number of keys in the multi-map.
66 | size (Map m) = M.size m
67 | 
68 | -- | Map difference of two multi-maps, deleting individual key-value pairs
69 | --   rather than deleting the entire key. Invariant maintained is that
70 | --   input maps with non-null value sets will result in an output with
71 | --   non-null value sets.
72 | difference (Map m1) m2 = Map $ M.fromList
73 |   [ (k1, vs)
74 |   | (k1, vs1) <- M.toList m1
75 |   , let vs2 = k1 `lookup` m2
76 |   , let vs  = vs1 `S.difference` vs2
77 |   , (not . S.null) vs
78 |   ]
79 | 
80 | 


--------------------------------------------------------------------------------
/src/Text/ANTLR/Parser.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DeriveGeneric, DeriveAnyClass, FlexibleContexts, InstanceSigs
  2 |            , UndecidableInstances, StandaloneDeriving, TypeFamilies
  3 |            , ScopedTypeVariables, FlexibleInstances, MultiParamTypeClasses
  4 |            , OverloadedStrings, DeriveDataTypeable, ConstraintKinds #-}
  5 | {-|
  6 |   Module      : Text.ANTLR.Parser
  7 |   Description : Parsing API for constructing Haskell data types from lists of tokens
  8 |   Copyright   : (c) Karl Cronburg, 2018
  9 |   License     : BSD3
 10 |   Maintainer  : karl@cs.tufts.edu
 11 |   Stability   : experimental
 12 |   Portability : POSIX
 13 | 
 14 | -}
 15 | module Text.ANTLR.Parser where
 16 | import Text.ANTLR.Grammar hiding (Action)
 17 | import Text.ANTLR.Pretty
 18 | import Text.ANTLR.Set (Generic(..))
 19 | import Text.ANTLR.Lex.Tokenizer (Token(..))
 20 | import Data.Data (Data(..))
 21 | import Language.Haskell.TH.Lift (Lift(..))
 22 | import Text.ANTLR.Set (Hashable)
 23 | 
 24 | -- | Nonterminals in a grammar are tabular, terminal symbols are tabular (as are
 25 | --   the EOF-stripped version), terminals are referenceable (can be symbolized),
 26 | --   and terminals are also tabular.
 27 | type CanParse nts t =
 28 |   ( Tabular nts
 29 |   , Tabular (Sym t)
 30 |   , Tabular (StripEOF (Sym t))
 31 |   , HasEOF (Sym t)
 32 |   , Ref t
 33 |   , Tabular t)
 34 | 
 35 | -- | Same as 'CanParse' but with second formal parameter representing (StripEOF (Sym t))
 36 | --   aka "sts" (stripped terminal symbol).
 37 | type CanParse' nts sts = ( Tabular nts, Tabular sts )
 38 | 
 39 | type IsAST ast = ( Ord ast, Eq ast, Hashable ast )
 40 | 
 41 | type IsState st  = ( Ord st, Hashable st, Prettify st )
 42 | type Tabular sym = ( Ord sym, Hashable sym, Prettify sym, Eq sym )
 43 | 
 44 | -- | Action functions triggered during parsing are given the nonterminal we just matched on, the
 45 | --   corresponding list of production elements (grammar symbols) in the RHS of the matched production
 46 | --   alternative, and the result of recursively.
 47 | --
 48 | --   A 'ParseEvent' may also be just a terminal matched on, or an epsilon event
 49 | --   based heavily on which parsing algorithm is being run.
 50 | --
 51 | --   __This__ data type is one of the data types that tie together terminal (token) types
 52 | --   and terminal symbol types. When the parser produces a terminal event, you're
 53 | --   seeing a __token__, but when the parser produces a nonterminal event, you're
 54 | --   seeing a production in the grammar firing which contains terminal __symbols__,
 55 | --   not tokens.
 56 | data ParseEvent ast nts t =
 57 |     TermE t -- ^ A terminal was seen in the input
 58 |   | NonTE (nts, ProdElems nts (StripEOF (Sym t)), [ast]) -- ^ A non-terminal was seen in the input
 59 |   | EpsE -- ^ Epsilon event
 60 | 
 61 | deriving instance (Show ast, Show nts, Show (StripEOF (Sym t)), Show t) => Show (ParseEvent ast nts t)
 62 | 
 63 | instance (Prettify ast, Prettify nts, Prettify (StripEOF (Sym t)), Prettify t) => Prettify (ParseEvent ast nts t) where
 64 |   prettify e = do
 65 |     pStr "Terminal Event: "
 66 |     incrIndent 2
 67 |     prettify e
 68 |     incrIndent (-2)
 69 | 
 70 | -- | An Action as seen by the host language (Haskell) is a function from parse
 71 | --   events to an abstract-syntax tree that the function constructs based on which
 72 | --   non-terminal or terminal symbol was seen.
 73 | type Action ast nts t = ParseEvent ast nts t -> ast
 74 | 
 75 | -- | An Icon (as used in first and follow sets of the LL1 parser and the
 76 | --   shift-reduce table of the LR1 parser) is just a terminal symbol taken from
 77 | --   the grammar, or it's an epsilon or EOF.
 78 | data Icon ts =
 79 |     Icon ts  -- ^ Terminal symbol icon
 80 |   | IconEps  -- ^ Epsilon icon
 81 |   | IconEOF  -- ^ EOF (end of file / input) icon
 82 |   deriving (Generic, Hashable, Show, Eq, Ord, Data, Lift)
 83 | 
 84 | -- | __This__ is the function defining the (n == Sym t == ts) relationship between
 85 | --   the __name__ type of a token, the __symbol__ type of a terminal token (as
 86 | --   constructed by the tokenizer), and the __terminal symbol__ type as used by the
 87 | --   parser. When a parser wants to compare the symbol of an input token to a
 88 | --   terminal symbol found in the grammar, it should convert the token to an icon
 89 | --   using this function and then compare icons using Eq because icons throw away
 90 | --   the value of a token, leaving only the Eq-able piece that we care about.
 91 | token2symbol :: Token n v -> TokenSymbol n
 92 | token2symbol (Token n v _) = TokenSymbol n
 93 | token2symbol EOF = EOFSymbol
 94 | token2symbol (Error s) = EOFSymbol
 95 | 
 96 | -- | Tokens are symbolized by an icon containing their name.
 97 | instance Ref (Token n v) where
 98 |   type Sym (Token n v) = TokenSymbol n
 99 |   getSymbol = token2symbol
100 | 
101 | -- | The symbol for some tokenize is either just it's name @n@ or the special EOF symbol.
102 | data TokenSymbol n =
103 |     TokenSymbol n  -- ^ Named symbol
104 |   | EOFSymbol      -- ^ End-of-file symbol
105 |   deriving (Eq, Ord, Show, Hashable, Generic)
106 | 
107 | instance (Prettify n) => Prettify (TokenSymbol n) where
108 |   prettify (TokenSymbol n) = do
109 |     pStr "TokenSymbol "
110 |     prettify n
111 |   prettify EOFSymbol = pStr "EOFSymbol"
112 | 
113 | -- | A data type with an EOF constructor. There are two things you can do with a
114 | --   data type that has an EOF:
115 | --
116 | -- > Ask for the type *without* the EOF at compile time
117 | -- > Ask whether or not an instance is the EOF symbol at runtime
118 | --
119 | class HasEOF t where
120 |   -- | The unwrapped type (without the EOF data constructor alternative)
121 |   type StripEOF t :: *
122 |   -- | Whether or not the given value of type t is the EOF value
123 |   isEOF :: t -> Bool
124 |   -- | Take a token and try to unwrap its name (an EOF should result in Nothing)
125 |   stripEOF :: t -> Maybe (StripEOF t)
126 | 
127 | instance HasEOF (TokenSymbol n) where
128 |   type StripEOF (TokenSymbol n) = n
129 | 
130 |   isEOF EOFSymbol = True
131 |   isEOF     _     = False
132 | 
133 |   stripEOF EOFSymbol       = Nothing
134 |   stripEOF (TokenSymbol n) = Just n
135 | 
136 | instance HasEOF String where
137 |   type StripEOF String = String
138 | 
139 |   isEOF "" = True
140 |   isEOF _  = False
141 | 
142 |   stripEOF "" = Nothing
143 |   stripEOF x  = Just x
144 | 
145 | instance (Prettify ts) => Prettify (Icon ts) where
146 |   prettify IconEps  = pStr "iϵ"
147 |   prettify IconEOF  = pStr "iEOF"
148 |   prettify (Icon ts) = do
149 |     pStr "Icon "
150 |     prettify ts
151 | 
152 | -- | Is this a terminal-symbol icon?
153 | isIcon Icon{} = True
154 | isIcon _ = False
155 | 
156 | -- | Is this an epsilon icon?
157 | isIconEps IconEps = True
158 | isIconEps _    = False
159 | 
160 | -- | Is this the EOF icon?
161 | isIconEOF IconEOF = True
162 | isIconEOF _   = False
163 | 
164 | -- | Universal Abstract Syntax Tree data type. All internal AST "nodes" have a
165 | --   nonterminal, the grammar production symbols it reduced from, and the
166 | --   resulting recursively defined AST nodes acquired from the parser. Leaf AST
167 | --   nodes can be either an epsilon (when explicit epsilons are used in the
168 | --   grammar) or more importantly a terminal symbol.
169 | --   __This__ is another type that defines the relationship between the terminal
170 | --   token type @t@ and the terminal symbol type @(ts == Sym t)@ where the AST tells
171 | --   you the production rule that fired containing @ts@ as well as the tokens @t@
172 | --   contained in leaves of the AST.
173 | data AST nts t =
174 |     LeafEps -- ^ Epsilon leaf AST node
175 |   | Leaf t  -- ^ Terminal token leaf in the AST
176 |   | AST nts (ProdElems nts (StripEOF (Sym t))) [AST nts t] -- ^ Internal AST node
177 |   deriving (Generic)
178 | 
179 | deriving instance (Eq (StripEOF (Sym t)), Eq nts, Eq t) => Eq (AST nts t)
180 | deriving instance (Ord (StripEOF (Sym t)), Ord nts, Ord t) => Ord (AST nts t)
181 | deriving instance (Show (StripEOF (Sym t)), Show nts, Show t) => Show (AST nts t)
182 | deriving instance (Hashable (StripEOF (Sym t)), Hashable nts, Hashable t) => Hashable (AST nts t)
183 | 
184 | instance (Prettify nts, Prettify t) => Prettify (AST nts t) where
185 |   prettify LeafEps  = pStr "ϵ"
186 |   prettify (Leaf t) = prettify t
187 |   prettify (AST nts ps asts) = do
188 |     prettify nts
189 |     pStr "{"
190 |     prettify asts
191 |     pStr "}"
192 | 
193 | -- | Default AST-constructor function which just copies over the contents of
194 | --   some parse event into an 'AST'.
195 | event2ast :: ParseEvent (AST nts t) nts t -> AST nts t
196 | event2ast (TermE t)               = Leaf t
197 | event2ast (NonTE (nts, ss, asts)) = AST nts ss asts
198 | 
199 | 


--------------------------------------------------------------------------------
/src/Text/ANTLR/Pretty.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE FlexibleInstances, DefaultSignatures, UndecidableInstances
  2 |     , OverloadedStrings #-}
  3 | {-|
  4 |   Module      : Text.ANTLR.Pretty
  5 |   Description : A pretty-printing type class to be used across antlr-haskell modules
  6 |   Copyright   : (c) Karl Cronburg, 2018
  7 |   License     : BSD3
  8 |   Maintainer  : karl@cs.tufts.edu
  9 |   Stability   : experimental
 10 |   Portability : POSIX
 11 | 
 12 |   I want to have something like Show whereby every time I add a new type to the
 13 |   system, I can implement a function that gets called by existing code which
 14 |   happens to have types that get parametrized by that type. I don't want to
 15 |   modify an existing file / centralizing all of the types in my system into a
 16 |   single file makes little sense because then that one file becomes a hub /
 17 |   single point of failure.
 18 | 
 19 |   * I need a typeclass (no modifying existing files, but they need to call my
 20 |   new code without passing around a new show function)
 21 | 
 22 |   * The prettify function of that typeclass needs to return a state monad so
 23 |   that recursive calls keep the state
 24 | 
 25 |   * A pshow function needs to evalState on the prettify function with an
 26 |   initial indentation of zero (along with any other future state values...)
 27 | 
 28 | -}
 29 | module Text.ANTLR.Pretty where
 30 | import Control.Monad.Trans.State.Lazy
 31 | import qualified Data.Map.Strict as M
 32 | import Data.Data (toConstr, Data(..))
 33 | 
 34 | import qualified Data.Text as T
 35 | 
 36 | -- | Pretty-printing state
 37 | data PState = PState
 38 |   { indent   :: Int     -- ^ current indentation level
 39 |   , vis_chrs :: Int     -- ^ number of visible characters consumed so far
 40 |   , str :: T.Text       -- ^ the string, 'T.Text', that we've constructed so far
 41 |   , columns_soft :: Int -- ^ soft limit on number of columns to consume per row
 42 |   , columns_hard :: Int -- ^ hard limit on number of columns to consume per row
 43 |   , curr_col :: Int     -- ^ column number we're on in the current row of 'str'
 44 |   , curr_row :: Int     -- ^ number of rows (newlines) we've printed to 'str'
 45 |   }
 46 | 
 47 | -- | The pretty state monad
 48 | type PrettyM val = State PState val
 49 | 
 50 | -- | No value being threaded through the monad (because result is in 'str')
 51 | type Pretty = PrettyM ()
 52 | 
 53 | -- | Define the 'Prettify' type class for your pretty-printable type @t@.
 54 | class Prettify t where
 55 |   {-# MINIMAL prettify #-}
 56 | 
 57 |   -- | Defines how to pretty-print some type.
 58 |   prettify :: t -> Pretty
 59 |   default prettify :: (Show t) => t -> Pretty
 60 |   prettify = rshow
 61 | 
 62 |   -- | Lists are pretty-printed specially.
 63 |   prettifyList :: [t] -> Pretty
 64 |   prettifyList = prettifyList_
 65 | 
 66 | -- | Initial Pretty state with safe soft and hard column defaults.
 67 | initPState = PState
 68 |   { indent       = 0   -- Indentation level
 69 |   , vis_chrs     = 0   -- Number of visible characters consumed.
 70 |   , str          = T.empty -- The string
 71 |   , columns_soft = 100  -- Soft limit on terminal width.
 72 |   , columns_hard = 120  -- Hard limit on terminal width.
 73 |   , curr_col     = 0   -- Column position in the current row.
 74 |   , curr_row     = 0   -- Number of newlines seen
 75 |   }
 76 | 
 77 | -- | Prettify a string by putting it on the end of the current string state
 78 | pLine :: T.Text -> Pretty
 79 | pLine s = do
 80 |   pStr s
 81 |   _pNewLine
 82 | 
 83 | -- | Pretty print a literal string by just printing the string.
 84 | pStr' :: String -> Pretty
 85 | pStr' = pStr . T.pack
 86 | 
 87 | -- | This currently assumes all input strings contain no newlines, and that this is
 88 | --   only called on relatively small strings because strings running over the end
 89 | --   of the hard column limit get dumped onto the next line __no matter what__.
 90 | --   T.Texts can run over the soft limit, but hitting the soft limit after a call
 91 | --   to 'pStr' forces a newline.
 92 | pStr :: T.Text -> Pretty
 93 | pStr s = do
 94 |   pstate <- get
 95 |   _doIf _pNewLine (T.length s + curr_col pstate > columns_hard pstate && curr_col pstate /= 0)
 96 |   pstate <- get
 97 |   _doIf _pIndent  (curr_col pstate == 0 && indent pstate > 0)
 98 |   pstate <- get
 99 |   put $ pstate
100 |     { str = T.append (str pstate) s
101 |     , curr_col = (curr_col pstate) + T.length s
102 |     }
103 |   pstate <- get
104 |   _doIf _pNewLine (curr_col pstate > columns_soft pstate)
105 | 
106 | -- | Print a single character to the output.
107 | pChr :: Char -> Pretty
108 | pChr c = pStr $ T.singleton c
109 | 
110 | -- | Gets rid of if-then-else lines in the Pretty monad code:
111 | _doIf fncn True  = fncn
112 | _doIf fncn False = return ()
113 | 
114 | -- | Indent by the number of spaces specified in the state.
115 | _pIndent :: Pretty
116 | _pIndent = do
117 |   pstate <- get
118 |   put $ pstate
119 |     { str      = str pstate `T.append` T.replicate (indent pstate) (T.singleton ' ')
120 |     , curr_col = curr_col pstate + indent pstate
121 |     , vis_chrs = vis_chrs pstate + indent pstate
122 |     }
123 | 
124 | -- | Insert a newline
125 | _pNewLine :: Pretty
126 | _pNewLine = do
127 |   pstate <- get
128 |   put $ pstate
129 |     { str = T.snoc (str pstate) '\n'
130 |     , curr_col = 0
131 |     , curr_row = curr_row pstate + 1
132 |     }
133 | 
134 | -- | Run the pretty-printer, returning a 'T.Text'.
135 | pshow :: (Prettify t) => t -> T.Text
136 | pshow t = str $ execState (prettify t) initPState
137 | 
138 | -- | Run the pretty-printer, returning a 'String'.
139 | pshow' :: (Prettify t) => t -> String
140 | pshow' = T.unpack . pshow
141 | 
142 | pshowList :: (Prettify t) => [t] -> T.Text
143 | pshowList t = str $ execState (prettifyList t) initPState
144 | 
145 | pshowList' :: (Prettify t) => [t] -> String
146 | pshowList' = T.unpack . pshowList
147 | 
148 | -- | Run the pretty-printer with a specific indentation level.
149 | pshowIndent :: (Prettify t) => Int -> t -> T.Text
150 | pshowIndent i t = str $ execState (prettify t) $ initPState { indent = i }
151 | 
152 | -- | Plain-vanilla show of something in the 'Pretty' state monad.
153 | rshow :: (Show t) => t -> Pretty
154 | rshow t = do
155 |   pstate <- get
156 |   let s = show t
157 |   put $ pstate
158 |     { str      = str pstate `T.append` T.pack s
159 |     , curr_row = curr_row pstate + (T.length . T.filter (== '\n')) (T.pack s)
160 |     , curr_col = curr_col pstate -- TODO
161 |     }
162 | 
163 | -- | Parenthesize something in 'Pretty'.
164 | pParens fncn = do
165 |   pChr '('
166 |   fncn
167 |   pChr ')'
168 | 
169 | -- | Increment the indentation level by modifying the pretty-printer state.
170 | incrIndent :: Int -> Pretty
171 | incrIndent n = do
172 |   pstate <- get
173 |   put $ pstate { indent = indent pstate + n }
174 | 
175 | -- | Like 'incrIndent' but set indentation level instead of incrementing.
176 | setIndent :: Int -> Pretty
177 | setIndent n = do
178 |   pstate <- get
179 |   put $ pstate { indent = n }
180 | 
181 | -- | Prettify the given value and compute the number of characters consumed as a
182 | --   result.
183 | pCount :: (Prettify v) => v -> PrettyM Int
184 | pCount v = do
185 |   i0 <- indent <$> get
186 |   prettify v
187 |   i1 <- indent <$> get
188 |   return (i1 - i0)
189 | 
190 | -- | Pretty-print a list with one entry per line.
191 | pListLines :: (Prettify v) => [v] -> Pretty
192 | pListLines vs = do
193 |   pStr $ T.pack "[ "
194 |   col0 <- curr_col <$> get
195 |   i0   <- indent   <$> get
196 |   setIndent (col0 - 2)
197 |   sepBy (pLine T.empty >> (pStr $ T.pack ", ")) (map prettify vs)
198 |   pLine T.empty >> pChr ']'
199 |   setIndent i0 -- Reset indentation back to what it was
200 | 
201 | instance (Prettify k, Prettify v) => Prettify (M.Map k v) where
202 |   prettify m = do
203 |     -- (5 == length of "Map: ") ==> TODO: indentation "discipline"
204 |     pStr "Map: "; incrIndent 5
205 |     prettify $ M.toList m -- TODO: prettier map
206 |     incrIndent (-5)
207 | 
208 | instance (Prettify v) => Prettify (Maybe v) where
209 |   prettify Nothing  = pStr "Nope"
210 |   prettify (Just v) = pStr "Yep" >> pParens (prettify v)
211 | 
212 | -- | Prettify a list with possibly more than one entry per line.
213 | prettifyList_ [] = pStr "[]"
214 | prettifyList_ vs = do
215 |   pChr '['
216 |   sepBy (pStr ", ") (map prettify vs)
217 |   pChr ']'
218 | 
219 | instance (Prettify v) => Prettify [v] where
220 |   prettify = prettifyList 
221 | 
222 | -- TODO: template haskell-ify for larger tuples
223 | instance (Prettify a, Prettify b) => Prettify (a,b) where
224 |   prettify (a,b) = do
225 |     pChr '('
226 |     prettify a
227 |     pChr ','
228 |     prettify b
229 |     pChr ')'
230 | 
231 | instance (Prettify a, Prettify b, Prettify c) => Prettify (a,b,c) where
232 |   prettify (a,b,c) = do
233 |     pChr '('
234 |     prettify a
235 |     pChr ','
236 |     prettify b
237 |     pChr ','
238 |     prettify c
239 |     pChr ')'
240 | 
241 | instance (Prettify a, Prettify b, Prettify c, Prettify d) => Prettify (a,b,c,d) where
242 |   prettify (a,b,c,d) = do
243 |     pChr '('
244 |     prettify a
245 |     pChr ','
246 |     prettify b
247 |     pChr ','
248 |     prettify c
249 |     pChr ','
250 |     prettify d
251 |     pChr ')'
252 | 
253 | -- | Pretty-print a list of values, separated by some other pretty-printer.
254 | sepBy s [] = return ()
255 | sepBy s (v:vs) = foldl (_sepBy s) v vs
256 | 
257 | -- | Reorder pretty-printer bind.
258 | _sepBy s ma mb = ma >> s >> mb 
259 | 
260 | instance Prettify Char where
261 |   prettify = pChr
262 |   prettifyList = pStr . T.pack
263 | 
264 | instance Prettify () where prettify = rshow
265 | instance Prettify Bool where prettify = rshow
266 | instance Prettify Int where prettify = rshow
267 | 
268 | instance Prettify Double where prettify = rshow
269 | 
270 | 


--------------------------------------------------------------------------------
/src/Text/ANTLR/Set.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE GADTs, DeriveAnyClass, DeriveGeneric, OverloadedStrings, DeriveLift
  2 |     , QuasiQuotes, TemplateHaskell, DeriveDataTypeable #-}
  3 | {-|
  4 |   Module      : Text.ANTLR.Set
  5 |   Description : Entrypoint for swapping out different underlying set representations
  6 |   Copyright   : (c) Karl Cronburg, 2018
  7 |   License     : BSD3
  8 |   Maintainer  : karl@cs.tufts.edu
  9 |   Stability   : experimental
 10 |   Portability : POSIX
 11 | 
 12 | -}
 13 | module Text.ANTLR.Set
 14 |   ( Set, null, size, member, notMember
 15 |   , empty, singleton, insert, delete, union, unions
 16 |   , difference, intersection, filter, map, foldr, foldl', fold
 17 |   , toList, fromList, (\\), findMin, maybeMin
 18 |   , Hashable(..), Generic(..)
 19 |   ) where
 20 | import Text.ANTLR.Pretty
 21 | 
 22 | import GHC.Generics (Generic, Rep)
 23 | import Data.Hashable (Hashable(..))
 24 | import Language.Haskell.TH.Syntax (Lift(..))
 25 | 
 26 | import qualified Data.Functor         as F
 27 | import qualified Control.Applicative  as A
 28 | import qualified Data.Foldable        as Foldable
 29 | 
 30 | import Data.Map ( Map(..) )
 31 | import qualified Data.Map as M
 32 | 
 33 | import qualified Data.HashSet as S
 34 | import Data.HashSet as S
 35 |   ( HashSet(..), member, toList, union
 36 |   , null, empty, map, size, singleton, insert
 37 |   , delete, unions, difference, intersection, foldl'
 38 |   , fromList
 39 |   )
 40 | 
 41 | import Prelude hiding (null, filter, map, foldr, foldl)
 42 | 
 43 | -- | Use a hash-based set (hashable keys) for our internal set representation
 44 | --   during parsing.
 45 | type Set = S.HashSet
 46 | 
 47 | -- | Is @e@ not a member of the set @s@.
 48 | notMember e s = not $ member e s
 49 | 
 50 | -- | Set fold
 51 | fold = S.foldr
 52 | 
 53 | -- | Set fold
 54 | foldr = S.foldr
 55 | 
 56 | -- | Find the minimum value of an orderable set.
 57 | findMin :: (Ord a, Hashable a) => Set a -> a
 58 | findMin = minimum . toList
 59 | 
 60 | --maybeMin :: (Ord a, Hashable a) => Set a -> Maybe a
 61 | -- | Get minimum of a set without erroring out on empty set.
 62 | maybeMin as
 63 |   | S.size as == 0  = Nothing
 64 |   | otherwise       = Just $ findMin as
 65 | 
 66 | infixl 9 \\
 67 | 
 68 | -- | Set difference
 69 | (\\) :: (Hashable a, Eq a) => Set a -> Set a -> Set a
 70 | m1 \\ m2 = difference m1 m2
 71 | 
 72 | instance (Hashable a, Eq a, Lift a) => Lift (S.HashSet a) where
 73 |   lift set = [| fromList $(lift $ toList set) |]
 74 | 
 75 | instance (Hashable k, Hashable v) => Hashable (Map k v) where
 76 |   hashWithSalt salt mp = salt `hashWithSalt` M.toList mp
 77 | 
 78 | instance (Prettify a, Hashable a, Eq a) => Prettify (S.HashSet a) where
 79 |   prettify s = do
 80 |     pStr "Set: "; incrIndent 5
 81 |     pListLines $ toList s
 82 |     incrIndent (-5)
 83 |     pLine ""
 84 | 
 85 | --filter :: (Hashable a, Eq a) => (a -> Bool) -> Set a -> Set a
 86 | -- | Set filter
 87 | filter f s = S.filter f s
 88 | 
 89 | --instance (Hashable a, Eq a) => Hashable (S.HashSet a) where
 90 | --  hashWithSalt salt set = salt `hashWithSalt` S.toList (run set)
 91 | 
 92 | 
 93 | {-
 94 | 
 95 | --import Data.Set.Monad (Set(..), member, toList, union, notMember)
 96 | --import qualified Data.Set.Monad as Set
 97 | 
 98 | import Prelude hiding (null, filter, map, foldr, foldl)
 99 | import qualified Data.List            as L
100 | --import qualified Data.Set             as S
101 | import qualified Data.Functor         as F
102 | import qualified Control.Applicative  as A
103 | import qualified Data.Foldable        as Foldable
104 | 
105 | import Data.Monoid
106 | import Data.Foldable (Foldable)
107 | import Control.Arrow
108 | import Control.Monad
109 | import Control.DeepSeq
110 | 
111 | import Data.Hashable (Hashable(..))
112 | import GHC.Generics (Generic, Rep)
113 | import Control.DeepSeq (NFData(..))
114 | import Language.Haskell.TH.Syntax (Lift(..))
115 | import Data.Data (Data(..))
116 | 
117 | import Data.Map ( Map(..) )
118 | import qualified Data.Map as M
119 | 
120 | import Text.ANTLR.Pretty
121 | 
122 | instance (Hashable k, Hashable v) => Hashable (Map k v) where
123 |   hashWithSalt salt mp = salt `hashWithSalt` M.toList mp
124 | 
125 | instance (Hashable a, Eq a) => Hashable (Set a) where
126 |   hashWithSalt salt set = salt `hashWithSalt` S.toList (run set)
127 | 
128 | instance (Hashable a, Ord a) => Ord (Set a) where
129 |   s1 <= s2 = S.toList (run s1) <= S.toList (run s2)
130 | 
131 | data Set a where
132 |   Prim   :: (Hashable a, Eq a) => S.HashSet a -> Set a
133 |   Return :: a -> Set a
134 |   Bind   :: Set a -> (a -> Set b) -> Set b
135 |   Zero   :: Set a
136 |   Plus   :: Set a -> Set a -> Set a
137 | 
138 | instance (Data a) => Data (Set a)
139 | 
140 | instance (Hashable a, Eq a, Lift a) => Lift (Set a) where
141 |   lift set = [| fromList $(lift $ toList set) |]
142 | 
143 | run :: (Hashable a, Eq a) => Set a -> S.HashSet a
144 | run (Prim s)                        = s
145 | run (Return a)                      = S.singleton a
146 | run (Zero)                          = S.empty
147 | run (Plus ma mb)                    = run ma `S.union` run mb
148 | run (Bind (Prim s) f)               = S.foldl' S.union S.empty (S.map (run . f) s)
149 | run (Bind (Return a) f)             = run (f a)
150 | run (Bind Zero _)                   = S.empty
151 | run (Bind (Plus (Prim s) ma) f)     = run (Bind (Prim (s `S.union` run ma)) f)
152 | run (Bind (Plus ma (Prim s)) f)     = run (Bind (Prim (run ma `S.union` s)) f)
153 | run (Bind (Plus (Return a) ma) f)   = run (Plus (f a) (Bind ma f))
154 | run (Bind (Plus ma (Return a)) f)   = run (Plus (Bind ma f) (f a))
155 | run (Bind (Plus Zero ma) f)         = run (Bind ma f)
156 | run (Bind (Plus ma Zero) f)         = run (Bind ma f)
157 | run (Bind (Plus (Plus ma mb) mc) f) = run (Bind (Plus ma (Plus mb mc)) f)
158 | run (Bind (Plus ma mb) f)           = run (Plus (Bind ma f) (Bind mb f))
159 | run (Bind (Bind ma f) g)            = run (Bind ma (\a -> Bind (f a) g))
160 | 
161 | instance F.Functor Set where
162 |   fmap = liftM
163 | 
164 | instance A.Applicative Set where
165 |   pure  = return
166 |   (<*>) = ap
167 | 
168 | instance A.Alternative Set where
169 |   empty = Zero
170 |   (<|>) = Plus
171 | 
172 | instance Monad Set where
173 |   return = Return
174 |   (>>=)  = Bind
175 | 
176 | instance MonadPlus Set where
177 |   mzero = Zero
178 |   mplus = Plus
179 | 
180 | instance (Hashable a, Eq a) => Monoid (Set a) where
181 |   mempty  = empty
182 |   mappend = union
183 |   mconcat = unions
184 | 
185 | instance Foldable Set where
186 |     foldr f def m = 
187 |         case m of
188 |             Prim s -> S.foldr f def s
189 |             Return a -> f a def
190 |             Zero -> def
191 |             Plus ma mb -> Foldable.foldr f (Foldable.foldr f def ma) mb
192 |             Bind s g -> Foldable.foldr f' def s
193 |                 where f' x b = Foldable.foldr f b (g x)
194 | 
195 | instance (Hashable a, Eq a) => Eq (Set a) where
196 |   s1 == s2 = run s1 == run s2
197 | 
198 | --instance (Hashable a, Eq a, Ord a) => Ord (Set a) where
199 | --  compare s1 s2 = compare (run s1) (run s2)
200 | 
201 | instance (Show a, Hashable a, Eq a) => Show (Set a) where
202 |   show = show . run
203 | 
204 | instance (Prettify a, Hashable a, Eq a) => Prettify (Set a) where
205 |   prettify s = do
206 |     pStr "Set: "; incrIndent 5
207 |     pListLines $ toList s
208 |     incrIndent (-5)
209 |     pLine ""
210 | 
211 | instance (Read a, Hashable a, Eq a) => Read (Set a) where
212 |   readsPrec i s = L.map (first Prim) (readsPrec i s)
213 | 
214 | instance (NFData a, Hashable a, Eq a) => NFData (Set a) where
215 |   rnf = rnf . run
216 | 
217 | infixl 9 \\
218 | 
219 | (\\) :: (Hashable a, Eq a) => Set a -> Set a -> Set a
220 | m1 \\ m2 = difference m1 m2
221 | 
222 | null :: (Hashable a, Eq a) => Set a -> Bool
223 | null = S.null . run
224 | 
225 | size :: (Hashable a, Eq a) => Set a -> Int
226 | size = S.size . run
227 | 
228 | member :: (Hashable a, Eq a) => a -> Set a -> Bool
229 | member a s = S.member a (run s)
230 | 
231 | notMember :: (Hashable a, Eq a) => a -> Set a -> Bool
232 | notMember a t = not (member a t)
233 | 
234 | empty :: (Hashable a, Eq a) => Set a
235 | empty = Prim S.empty
236 | 
237 | singleton :: (Hashable a, Eq a) => a -> Set a
238 | singleton a = Prim (S.singleton a)
239 | 
240 | insert :: (Hashable a, Eq a) => a -> Set a -> Set a
241 | insert a s = Prim (S.insert a (run s))
242 | 
243 | delete :: (Hashable a, Eq a) => a -> Set a -> Set a
244 | delete a s = Prim (S.delete a (run s))
245 | 
246 | union :: (Hashable a, Eq a) => Set a -> Set a -> Set a
247 | union s1 s2 = Prim (run s1 `S.union` run s2)
248 | 
249 | unions :: (Hashable a, Eq a) => [Set a] -> Set a
250 | unions ss = Prim (S.unions (L.map run ss))
251 | 
252 | difference :: (Hashable a, Eq a) => Set a -> Set a -> Set a
253 | difference s1 s2 = Prim (S.difference (run s1) (run s2))
254 | 
255 | intersection :: (Hashable a, Eq a) => Set a -> Set a -> Set a
256 | intersection s1 s2 = Prim (S.intersection (run s1) (run s2))
257 | 
258 | filter :: (Hashable a, Eq a) => (a -> Bool) -> Set a -> Set a
259 | filter f s = Prim (S.filter f (run s))
260 | 
261 | map :: (Hashable a, Eq a, Hashable b, Eq b) => (a -> b) -> Set a -> Set b
262 | map f s = Prim (S.map f (run s))
263 | 
264 | foldr :: (Hashable a, Eq a) => (a -> b -> b) -> b -> Set a -> b
265 | foldr f z s = S.foldr f z (run s)
266 | 
267 | fold :: (Hashable a, Eq a) => (a -> b -> b) -> b -> Set a -> b
268 | fold f z s = S.foldr f z (run s)
269 | 
270 | foldl' :: (Hashable a, Eq a) => (b -> a -> b) -> b -> Set a -> b
271 | foldl' f z s = S.foldl' f z (run s)
272 | 
273 | toList :: (Hashable a, Eq a) => Set a -> [a]
274 | toList = S.toList . run
275 | 
276 | fromList :: (Hashable a, Eq a) => [a] -> Set a
277 | fromList as = Prim (S.fromList as)
278 | 
279 | findMin :: (Ord a, Hashable a) => Set a -> a
280 | findMin = minimum . toList
281 | 
282 | maybeMin :: (Ord a, Hashable a) => Set a -> Maybe a
283 | maybeMin as
284 |   | size as == 0 = Nothing
285 |   | otherwise    = Just $ findMin as
286 | 
287 | -}
288 | 
289 | 


--------------------------------------------------------------------------------
/stack.yaml:
--------------------------------------------------------------------------------
1 | # resolver: nightly-2019-04-10
2 | resolver: lts-16.15
3 | 
4 | packages:
5 | - .
6 | 
7 | 


--------------------------------------------------------------------------------
/test/GrammarReader/GrammarReader.hs:
--------------------------------------------------------------------------------
 1 | import Text.ParserCombinators.Parsec as Parsec
 2 | import Data.Char
 3 | import ParserGenerator.AllStar
 4 | 
 5 | --Functions for parsing a context-free grammar in BNF form
 6 | 
 7 | grammarFile = endBy production eol
 8 | 
 9 | production =
10 |     do l <- nonterminal
11 |        skipMany spacesAndTabs
12 |        string "::="
13 |        skipMany spacesAndTabs
14 |        r <- rightSide
15 |        return (l, r) 
16 | 
17 | rightSide = sepBy expansion expansionSep
18 | 
19 | expansion = many symbol
20 | 
21 | expansionSep =
22 |     do skipMany spacesAndTabs
23 |        char '|'
24 |        skipMany spacesAndTabs
25 | 
26 | symbol = nonterminal <|> terminal
27 | 
28 | nonterminal = upper
29 | 
30 | terminal    = lower
31 | 
32 | spacesAndTabs = oneOf " \t"
33 | 
34 | eol = char '\n'
35 | 
36 | 
37 | parseGrammar :: String -> Either ParseError [(Char, [String])]
38 | parseGrammar input = Parsec.parse grammarFile "(unknown)" input
39 | 
40 | 
41 | --Functions for converting a parsed grammar to an ATN environment
42 | 
43 | grammarToATNEnv g =
44 |   case parseGrammar g of
45 |     Left e            -> error "could not parse grammar"
46 |     Right productions ->
47 |       let (nonterminals, _) = unzip productions
48 |       in  zip nonterminals (map productionToATN productions)
49 | 
50 | 
51 | productionToATN (nt, expansions) =
52 |   map (\(n, exp) -> expansionToATNPath nt exp n) (zip [1..] expansions)
53 | 
54 | expansionToATNPath nt exp choiceNum =
55 |   let buildMiddleAndFinalStates n symbols =
56 |         case symbols of
57 |           []           -> [(MIDDLE n, EPS, FINAL nt)]
58 |           s : symbols' ->
59 |             let edgeLabel = if isUpper s then NT s else T s
60 |             in  (MIDDLE n, edgeLabel, MIDDLE (n + 1)) : buildMiddleAndFinalStates (n + 1) symbols'
61 |   in  (INIT nt, EPS, CHOICE nt choiceNum)  :
62 |       (CHOICE nt choiceNum, EPS, MIDDLE 1) :
63 |       buildMiddleAndFinalStates 1 exp
64 | 
65 | main =
66 |     do g <- getContents
67 |        putStrLn (show (grammarToATNEnv g))
68 | 


--------------------------------------------------------------------------------
/test/GrammarReader/output/generated_atn_lookup.txt:
--------------------------------------------------------------------------------
1 | [('S',[[(INIT 'S',EPS,CHOICE 'S' 1),(CHOICE 'S' 1,EPS,MIDDLE 1),(MIDDLE 1,NT 'A',MIDDLE 2),(MIDDLE 2,T 'c',MIDDLE 3),(MIDDLE 3,EPS,FINAL 'S')],[(INIT 'S',EPS,CHOICE 'S' 2),(CHOICE 'S' 2,EPS,MIDDLE 1),(MIDDLE 1,NT 'A',MIDDLE 2),(MIDDLE 2,T 'd',MIDDLE 3),(MIDDLE 3,EPS,FINAL 'S')]]),('A',[[(INIT 'A',EPS,CHOICE 'A' 1),(CHOICE 'A' 1,EPS,MIDDLE 1),(MIDDLE 1,T 'a',MIDDLE 2),(MIDDLE 2,NT 'A',MIDDLE 3),(MIDDLE 3,EPS,FINAL 'A')],[(INIT 'A',EPS,CHOICE 'A' 2),(CHOICE 'A' 2,EPS,MIDDLE 1),(MIDDLE 1,T 'b',MIDDLE 2),(MIDDLE 2,EPS,FINAL 'A')]])]
2 | 


--------------------------------------------------------------------------------
/test/GrammarReader/sample_grammars/sample_bnf_grammar.txt:
--------------------------------------------------------------------------------
1 | S ::= Ac | Ad
2 | A ::= aA | b
3 | 
4 | 


--------------------------------------------------------------------------------
/test/allstar/AllStarTests.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE TypeFamilies #-}
  2 | 
  3 | module AllStarTests where
  4 | 
  5 | --import Test.HUnit
  6 | import Text.ANTLR.Allstar.ParserGenerator
  7 | import qualified Data.Set as DS
  8 | import Text.ANTLR.Parser (HasEOF(..))
  9 | import Text.ANTLR.Grammar (Ref(..))
 10 | 
 11 | import Test.Framework
 12 | import Test.Framework.Providers.HUnit
 13 | import Text.ANTLR.HUnit
 14 | import Text.ANTLR.Pretty
 15 | 
 16 | --------------------------------TESTING-----------------------------------------
 17 | 
 18 | {- instance Token Char where
 19 |   type Label Char = Char
 20 |   type Literal Char = Char
 21 |   getLabel c = c
 22 |   getLiteral c = c
 23 | 
 24 | instance Token (a, b) where
 25 |   type Label (a, b) = a
 26 |   type Literal (a, b) = b
 27 |   getLabel (a, b) = a
 28 |   getLiteral (a, b) = b -}
 29 | 
 30 | instance (Show a, Show b) => Prettify (Either a b) where prettify = rshow
 31 | 
 32 | instance Ref Char where
 33 |   type Sym Char = Char
 34 |   getSymbol = id
 35 | 
 36 | instance HasEOF Char where
 37 |   type StripEOF Char = Char
 38 |   isEOF c = False
 39 |   stripEOF c = Just c
 40 | 
 41 | dumbTokenizer [] = []
 42 | dumbTokenizer (t:ts) = [(t,ts)]
 43 | 
 44 | atnEnv = DS.fromList [ -- First path through the 'S' ATN
 45 |                        (Init 'S', GS EPS, Middle 'S' 0 0),
 46 |                        (Middle 'S' 0 0, GS (NT 'A'), Middle 'S' 0 1),
 47 |                        (Middle 'S' 0 1, GS (T 'c'), Middle 'S' 0 2),
 48 |                        (Middle 'S' 0 2, GS EPS, Final 'S'),
 49 | 
 50 |                        -- Second path through the 'S' ATN
 51 |                        (Init 'S', GS EPS, Middle 'S' 1 0),
 52 |                        (Middle 'S' 1 0, GS (NT 'A'), Middle 'S' 1 1),
 53 |                        (Middle 'S' 1 1, GS (T 'd'), Middle 'S' 1 2),
 54 |                        (Middle 'S' 1 2, GS EPS, Final 'S'),
 55 | 
 56 |                        -- First path through the 'A' ATN
 57 |                        (Init 'A', GS EPS, Middle 'A' 0 0),
 58 |                        (Middle 'A' 0 0, GS (T 'a'), Middle 'A' 0 1),
 59 |                        (Middle 'A' 0 1, GS (NT 'A'), Middle 'A' 0 2),
 60 |                        (Middle 'A' 0 2, GS EPS, Final 'A'),
 61 | 
 62 |                        -- Second path through the 'A' ATN
 63 |                        (Init 'A', GS EPS, Middle 'A' 1 0),
 64 |                        (Middle 'A' 1 0, GS (T 'b'), Middle 'A' 1 1),
 65 |                        (Middle 'A' 1 1, GS EPS, Final 'A')]
 66 | 
 67 | 
 68 | -- For now, I'm only checking whether the input was accepted--not checking the derivation.
 69 | 
 70 | -- Example from the manual trace of ALL(*)'s execution
 71 | parseTest1 = ((@=?) --"for parse dumbTokenizer [a, b, c],"
 72 |                                    (Right (Node 'S' [NT 'A', T 'c']
 73 |                                             [Node 'A' [T 'a', NT 'A']
 74 |                                               [Leaf 'a',
 75 |                                                Node 'A' [T 'b']
 76 |                                                 [Leaf 'b']],
 77 |                                              Leaf 'c']))
 78 |                                    (parse dumbTokenizer ['a', 'b', 'c'] (NT 'S') atnEnv True))
 79 |                                    
 80 | -- Example #1 from the ALL(*) paper
 81 | parseTest2 = ((@=?) --"for parse dumbTokenizer [b, c],"
 82 |                                     (Right (Node 'S' [NT 'A', T 'c']
 83 |                                              [Node 'A' [T 'b']
 84 |                                                [Leaf 'b'],
 85 |                                               Leaf 'c']))
 86 |                                     (parse dumbTokenizer ['b', 'c'] (NT 'S') atnEnv True))
 87 |                                     
 88 | -- Example #2 from the ALL(*) paper
 89 | parseTest3 = ((@=?) --"for parse dumbTokenizer [b, d],"
 90 |                                    (Right (Node 'S' [NT 'A', T 'd']
 91 |                                             [Node 'A' [T 'b']
 92 |                                               [Leaf 'b'],
 93 |                                              Leaf 'd']))
 94 |                                    (parse dumbTokenizer ['b', 'd'] (NT 'S') atnEnv True))
 95 |                                     
 96 | -- Input that requires more recursive traversals of the A ATN
 97 | parseTest4 = ((@=?) --"for parse dumbTokenizer [a a a b c],"
 98 |                                    (Right (Node 'S' [NT 'A', T 'c']
 99 |                                             [Node 'A' [T 'a', NT 'A']
100 |                                               [Leaf 'a',
101 |                                                Node 'A' [T 'a', NT 'A']
102 |                                                 [Leaf 'a',
103 |                                                  Node 'A' [T 'a', NT 'A']
104 |                                                   [Leaf 'a',
105 |                                                    Node 'A' [T 'b']
106 |                                                     [Leaf 'b']]]],
107 |                                              Leaf 'c']))
108 |                                    (parse dumbTokenizer ['a', 'a', 'a', 'b', 'c'] (NT 'S') atnEnv True))
109 | 
110 | -- Make sure that the result of parsing an out-of-language string has a Left tag.             
111 | parseTest5 = ((@=?) --"for parse dumbTokenizer [a b a c],"
112 |                                    True
113 |                                    (let parseResult = parse dumbTokenizer ['a', 'b', 'a', 'c'] (NT 'S') atnEnv True
114 |                                         isLeft pr = case pr of
115 |                                                       Left _ -> True
116 |                                                       _ -> False
117 |                                     in  isLeft parseResult))
118 | 
119 | -- To do: Update these tests so that they use the new ATN state representation.
120 | {-
121 | 
122 | conflictsTest = ((@=?) --"for getConflictSetsPerLoc()"
123 |                          
124 |                                       ([[(MIDDLE 5, 1, []), (MIDDLE 5, 2, []),(MIDDLE 5, 3, [])],
125 |                                         [(MIDDLE 5, 1, [MIDDLE 1]), (MIDDLE 5, 2, [MIDDLE 1])],
126 |                                         [(MIDDLE 7, 2, [MIDDLE 6, MIDDLE 1])]] :: [[ATNConfig Char]])
127 |                                          
128 |                                       (getConflictSetsPerLoc (D [(MIDDLE 5, 1, []),
129 |                                                                  (MIDDLE 5, 2, []),
130 |                                                                  (MIDDLE 5, 3, []),
131 |                                                                  (MIDDLE 5, 1, [MIDDLE 1]),
132 |                                                                  (MIDDLE 5, 2, [MIDDLE 1]),
133 |                                                                  (MIDDLE 7, 2, [MIDDLE 6, MIDDLE 1])])))
134 | 
135 | prodsTest = ((@=?) --"for getProdSetsPerState()"
136 |                      
137 |                                   ([[(MIDDLE 5, 1, []),
138 |                                      (MIDDLE 5, 2, []),
139 |                                      (MIDDLE 5, 3, []),
140 |                                      (MIDDLE 5, 1, [MIDDLE 1]),
141 |                                      (MIDDLE 5, 2, [MIDDLE 1])],
142 |                                      
143 |                                     [(MIDDLE 7, 2, [MIDDLE 6, MIDDLE 1])]] :: [[ATNConfig Char]])
144 |                                          
145 |                                   (getProdSetsPerState (D [(MIDDLE 5, 1, []),
146 |                                                            (MIDDLE 5, 2, []),
147 |                                                            (MIDDLE 5, 3, []),
148 |                                                            (MIDDLE 5, 1, [MIDDLE 1]),
149 |                                                            (MIDDLE 5, 2, [MIDDLE 1]),
150 |                                                            (MIDDLE 7, 2, [MIDDLE 6, MIDDLE 1])])))
151 | 
152 | -}
153 | 
154 | 
155 | ambigATNEnv = DS.fromList [(Init 'S', GS EPS, Middle 'S' 0 0),
156 |                            (Middle 'S' 0 0, GS (T 'a'), Middle 'S' 0 1),
157 |                            (Middle 'S' 0 1, GS EPS, Final 'S'),
158 |                
159 |                            (Init 'S', GS EPS, Middle 'S' 1 0),
160 |                            (Middle 'S' 1 0, GS (T 'a'), Middle 'S' 1 1),
161 |                            (Middle 'S' 1 1, GS EPS, Final 'S'),
162 |                
163 |                            (Init 'S', GS EPS, Middle 'S' 2 0),
164 |                            (Middle 'S' 2 0, GS (T 'a'), Middle 'S' 2 1),
165 |                            (Middle 'S' 2 1, GS (T 'b'), Middle 'S' 2 2),
166 |                            (Middle 'S' 2 2, GS EPS, Final 'S')]
167 | 
168 | ambigParseTest1 = ((@=?) --"for parse dumbTokenizer [a],"
169 |                                         True
170 |                                         (let parseResult = parse dumbTokenizer ['a'] (NT 'S') ambigATNEnv True
171 |                                              isLeft pr = case pr of
172 |                                                            Left _ -> True
173 |                                                            _ -> False
174 |                                          in  isLeft parseResult))
175 | 
176 | ambigParseTest2 = ((@=?) --"for parse dumbTokenizer [a b],"
177 |                                         (Right (Node 'S' [T 'a', T 'b']
178 |                                                  [Leaf 'a',
179 |                                                   Leaf 'b']))
180 |                                         (parse dumbTokenizer ['a', 'b'] (NT 'S') ambigATNEnv True))
181 | 
182 |         
183 | tests = [testCase "parseTest1"    parseTest1,
184 |          testCase "parseTest2"    parseTest2,
185 |          testCase "parseTest3"    parseTest3,
186 |          testCase "parseTest4"    parseTest4,
187 |          testCase "parseTest5"    parseTest5,
188 |                   
189 |          --testCase "conflictsTest" conflictsTest,
190 |          --testCase "prodsTest"     prodsTest,
191 | 
192 |          testCase "ambigParseTest1" ambigParseTest1,
193 |          testCase "ambigParseTest2" ambigParseTest2]
194 |        
195 | --main = runTestTT (TestList tests)
196 | 


--------------------------------------------------------------------------------
/test/allstar/ConvertDFA.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE QuasiQuotes, DeriveAnyClass, DeriveGeneric, TypeFamilies
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell #-}
 4 | module ConvertDFA where
 5 | import Language.ANTLR4
 6 | 
 7 | mkInt :: String -> Int
 8 | mkInt _ = 3
 9 | 
10 | data ConvertIt =
11 |     Start String
12 |   | End   Int
13 |   deriving (Eq, Ord, Show)
14 | 
15 | $( return [] )
16 | 
17 | [g4|
18 |   grammar Convert;
19 | 
20 |   root  : 'START' LexemeA -> Start
21 |         | 'END'   LexemeB -> End
22 |         ;
23 | 
24 |   LexemeA : 'abc' -> String ;
25 |   LexemeB : 'efg' -> mkInt ;
26 | |]
27 | 
28 | 


--------------------------------------------------------------------------------
/test/allstar/ConvertP.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, TemplateHaskell, FlexibleContexts #-}
 4 | module ConvertP
 5 |   ( module ConvertDFA
 6 |   , module ConvertP
 7 |   ) where
 8 | import Language.ANTLR4
 9 | import ConvertDFA
10 | 
11 | $(g4_parsers convertAST convertGrammar)
12 | 
13 | 


--------------------------------------------------------------------------------
/test/allstar/Main.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import System.IO.Unsafe (unsafePerformIO)
 4 | import Data.Monoid
 5 | import Test.Framework
 6 | import Test.Framework.Providers.HUnit
 7 | import Test.Framework.Providers.QuickCheck2
 8 | import Test.HUnit
 9 | import Test.QuickCheck (Property, quickCheck, (==>))
10 | import qualified Test.QuickCheck.Monadic as TQM
11 | 
12 | import qualified AllStarTests
13 | import qualified ConvertP
14 | import ConvertP (ConvertIt(..))
15 | 
16 | convertP_simple =
17 |   case ConvertP.allstarParse (const False) "STARTabc" of
18 |     (Right ast) -> ConvertP.ast2root ast @=? (Start "abc")
19 |     (Left err)  -> assertFailure err
20 | 
21 | convertP_simple2 =
22 |   case ConvertP.allstarParse (const False) "ENDefg" of
23 |     (Right ast) -> ConvertP.ast2root ast @=? (End 3)
24 |     (Left err)  -> assertFailure err
25 | 
26 | tests =
27 |   [ testCase "convertP_simple" convertP_simple
28 |   , testCase "convertP_simple2" convertP_simple2
29 |   ]
30 | 
31 | main :: IO ()
32 | main = defaultMainWithOpts
33 |   (AllStarTests.tests ++ tests)
34 |   mempty
35 | 
36 | 


--------------------------------------------------------------------------------
/test/allstar/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Sample grammar for ALL(\*)
 3 | 
 4 | ```
 5 | S -> Ac | Ad
 6 | 
 7 | A -> aA | b 
 8 | ```
 9 | 
10 | ### ALL(\*) Input/output examples
11 | 
12 | ```haskell
13 | *Test.AllStarTests> parse ['a', 'b', 'c'] (NT 'S') atnEnv
14 | (Just True, Node 'S' [Node 'A' [Leaf 'a', Node 'A' [Leaf 'b']], Leaf 'c'])
15 | ```
16 | 
17 | ```haskell
18 | *Test.AllStarTests> parse ['b', 'd'] (NT 'S') atnEnv
19 | (Just True, Node 'S' [Node 'A' [Leaf 'b'], Leaf 'd'])
20 | ```
21 | 
22 | ```haskell
23 | *Test.AllStarTests> parse ['a', 'a', 'a', 'a', 'b', 'c'] (NT 'S') atnEnv
24 | (Just True, Node 'S' [Node 'A' [Leaf 'a', Node 'A' [Leaf 'a', Node 'A' [Leaf 'a', Node 'A' [Leaf 'a', Nod
25 | ```
26 | 
27 | 


--------------------------------------------------------------------------------
/test/atn/ATN.hs:
--------------------------------------------------------------------------------
  1 | module ATN where
  2 | 
  3 | import Text.ANTLR.Grammar
  4 | import Text.ANTLR.Allstar.ATN
  5 | import Example.Grammar
  6 | import Text.ANTLR.Set (fromList, union)
  7 | import System.IO.Unsafe (unsafePerformIO)
  8 | 
  9 | -- This is the Grammar from page 6 of the
 10 | -- 'Adaptive LL(*) Parsing: The Power of Dynamic Analysis'
 11 | -- paper, namely the expected transitions based on Figures
 12 | -- 5 through 8:
 13 | paperATNGrammar = (defaultGrammar "C" :: Grammar () String String ())
 14 |   { ns = fromList ["S", "A"]
 15 |   , ts = fromList ["a", "b", "c", "d"]
 16 |   , s0 = "C"
 17 |   , ps =
 18 |           [ production "S" $ Prod Pass [NT "A", T "c"]
 19 |           , production "S" $ Prod Pass [NT "A", T "d"]
 20 |           , production "A" $ Prod Pass [T "a", NT "A"]
 21 |           , production "A" $ Prod Pass [T "b"]
 22 |           ]
 23 |   }
 24 | 
 25 | s i = ("S", i)
 26 | 
 27 | -- Names as shown in paper:
 28 | pS  = Start  "S"
 29 | pS1 = Middle "S" 0 0
 30 | p1  = Middle "S" 0 1
 31 | p2  = Middle "S" 0 2
 32 | pS2 = Middle "S" 1 0
 33 | p3  = Middle "S" 1 1
 34 | p4  = Middle "S" 1 2
 35 | pS' = Accept "S"
 36 | 
 37 | pA  = Start  "A"
 38 | pA1 = Middle "A" 2 0
 39 | p5  = Middle "A" 2 1
 40 | p6  = Middle "A" 2 2
 41 | pA2 = Middle "A" 3 0
 42 | p7  = Middle "A" 3 1
 43 | pA' = Accept "A"
 44 | 
 45 | exp_paperATN = ATN
 46 |   { _Δ = fromList
 47 |     -- Submachine for S:
 48 |     [ (pS,  Epsilon, pS1)
 49 |     , (pS1, NTE "A", p1)
 50 |     , (p1,  TE  "c", p2)
 51 |     , (p2,  Epsilon, pS')
 52 |     , (pS,  Epsilon, pS2)
 53 |     , (pS2, NTE "A", p3)
 54 |     , (p3,  TE  "d", p4)
 55 |     , (p4,  Epsilon, pS')
 56 |     -- Submachine for A:
 57 |     , (pA,  Epsilon, pA1)
 58 |     , (pA1, TE  "a", p5)
 59 |     , (p5,  NTE "A", p6)
 60 |     , (p6,  Epsilon, pA')
 61 |     , (pA,  Epsilon, pA2)
 62 |     , (pA2, TE  "b", p7)
 63 |     , (p7,  Epsilon, pA')
 64 |     ]
 65 |   }
 66 | 
 67 | --always _ = True
 68 | --never  _ = False
 69 | 
 70 | addPredicates = paperATNGrammar
 71 |   { ps =
 72 |     ps paperATNGrammar ++
 73 |     [ production "A" $ Prod (Sem (Predicate "always"  ()))    [T "a"]
 74 |     , production "A" $ Prod (Sem (Predicate "never"   ()))     []
 75 |     , production "A" $ Prod (Sem (Predicate "always2" ()))   [NT "A", T "a"]
 76 |     ]
 77 |   }
 78 | 
 79 | (pX,pY,pZ) = (Start "A", Start "A", Start "A")
 80 | pX1 = Middle "A" 4 2
 81 | pX2 = Middle "A" 4 0
 82 | pX3 = Middle "A" 4 1
 83 | pX4 = Accept "A"
 84 | 
 85 | pY1 = Middle "A" 5 1
 86 | pY2 = Middle "A" 5 0
 87 | pY3 = Accept "A"
 88 | 
 89 | pZ1 = Middle "A" 6 3
 90 | pZ2 = Middle "A" 6 0
 91 | pZ3 = Middle "A" 6 1
 92 | pZ4 = Middle "A" 6 2
 93 | pZ5 = Accept "A"
 94 | 
 95 | exp_addPredicates = ATN
 96 |   { _Δ = union (_Δ exp_paperATN) $ fromList
 97 |     [ (pX,  Epsilon, pX1)
 98 |     , (pX1, PE $ Predicate "always" (), pX2)
 99 |     , (pX2, TE "a", pX3)
100 |     , (pX3, Epsilon, pX4)
101 | 
102 |     , (pY, Epsilon, pY1)
103 |     , (pY1, PE $ Predicate "never" (), pY2)
104 |     , (pY2, Epsilon, pY3)
105 | 
106 |     , (pZ, Epsilon, pZ1)
107 |     , (pZ1, PE $ Predicate "always2" (), pZ2)
108 |     , (pZ2, NTE "A", pZ3)
109 |     , (pZ3, TE "a", pZ4)
110 |     , (pZ4, Epsilon, pZ5)
111 |     ]
112 |   }
113 | 
114 | fireZeMissiles state = seq
115 |   (unsafePerformIO $ putStrLn "Missiles fired.")
116 |   undefined
117 | 
118 | addMutators = addPredicates
119 |   { ps = ps addPredicates ++
120 |     [ production "A" $ Prod (Action (Mutator "fireZeMissiles" ())) []
121 |     , production "S" $ Prod (Action (Mutator "identity"       ())) []
122 |     ]
123 |   }
124 | 
125 | exp_addMutators = ATN
126 |   { _Δ = union (_Δ exp_addPredicates) $ fromList
127 |     [ (Start "A", Epsilon, Middle "A" 7 0)
128 |     , (Middle "A" 7 0, ME $ Mutator "fireZeMissiles" (), Accept "A")
129 |     , (Start "S", Epsilon, Middle "S" 8 0)
130 |     , (Middle "S" 8 0, ME $ Mutator "identity" (), Accept "S")
131 |     ]
132 |   }
133 | 
134 | 
135 | 


--------------------------------------------------------------------------------
/test/atn/Main.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | import Text.ANTLR.Allstar.ATN
 3 | import ATN
 4 | import Text.ANTLR.Set (fromList, (\\))
 5 | 
 6 | import System.IO.Unsafe (unsafePerformIO)           
 7 | import Data.Monoid                                  
 8 | import Test.Framework                               
 9 | import Test.Framework.Providers.HUnit               
10 | import Test.Framework.Providers.QuickCheck2         
11 | import Test.HUnit                                   
12 | import Test.QuickCheck (Property, quickCheck, (==>))
13 | import qualified Test.QuickCheck.Monadic as TQM     
14 | 
15 | -- ATNs should be same:
16 | test_paperATNGrammar =
17 |   atnOf paperATNGrammar
18 |   @?=
19 |   exp_paperATN
20 | 
21 | -- Set difference to make debugging easier:
22 | test_paperATNGrammar2 =
23 |   ((_Δ . atnOf) paperATNGrammar \\ _Δ exp_paperATN)
24 |   @?=
25 |   fromList []
26 | 
27 | test_addPredicates =
28 |   atnOf addPredicates
29 |   @?=
30 |   exp_addPredicates
31 | 
32 | test_addPredicates2 =
33 |   ((_Δ . atnOf) addPredicates \\ _Δ exp_addPredicates)
34 |   @?=
35 |   fromList []
36 | 
37 | test_addMutators =
38 |   atnOf addMutators
39 |   @?=
40 |   exp_addMutators
41 | 
42 | main :: IO ()
43 | main = defaultMainWithOpts
44 |   [ testCase "paper_ATN_Grammar"  test_paperATNGrammar
45 |   , testCase "paper_ATN_Grammar2" test_paperATNGrammar2
46 |   , testCase "paper_ATN_Predicates2" test_addPredicates2
47 |   , testCase "paper_ATN_Predicates" test_addPredicates
48 |   , testCase "paper_ATN_Mutators" test_addMutators
49 |   ] mempty
50 | 
51 | 


--------------------------------------------------------------------------------
/test/c/Main.hs:
--------------------------------------------------------------------------------
1 | module Main where
2 | import Language.ANTLR4
3 | import CParser
4 | 
5 | main :: IO ()
6 | main = print "done"
7 | 
8 | 


--------------------------------------------------------------------------------
/test/chisel/Language/Chisel/Grammar.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
  2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
  3 |     , FlexibleInstances, UndecidableInstances #-}
  4 | module Language.Chisel.Grammar
  5 |   ( ChiselNTSymbol(..), ChiselTSymbol(..), ChiselAST
  6 |   , lowerID, upperID, prim, int, arrow, lparen, rparen, pound
  7 |   , vertbar, colon, comma, atsymbol, carrot, dot, linecomm, ws
  8 |   , Primitive(..), chiselGrammar, TokenValue(..)
  9 |   , chiselAST, TokenName(..), chiselDFAs, lexeme2value, isWhitespace
 10 |   , ChiselToken(..), list, cons, append
 11 |   ) where
 12 | import Language.ANTLR4
 13 | import Language.Chisel.Syntax as S
 14 | 
 15 | list a = [a]
 16 | cons = (:)
 17 | append = (++)
 18 | 
 19 | [g4|
 20 |   grammar Chisel;
 21 |   chiselProd : prodSimple
 22 |              | '(' prodSimple ')'
 23 |              ;
 24 | 
 25 |   prodSimple : prodID formals magnitude alignment '->' group    -> S.prodFMA
 26 |              | prodID formals '->' group                        -> S.prodF
 27 |              | prodID magnitude alignment '->' group            -> S.prodMA
 28 |              | prodID magnitude '->' group                      -> S.prodM
 29 |              | LowerID prodID magnitude alignment '->' group    -> S.prodNMA
 30 |              ;
 31 | 
 32 |   formals : LowerID formals             -> cons
 33 |           | LowerID                     -> list
 34 |           ;
 35 | 
 36 |   magnitude : '|' '#' sizeArith '|'     -> magWild
 37 |             | '|'     sizeArith '|'     -> magNorm
 38 |             | '|'     prodID    '|'     -> magID
 39 |             ;
 40 | 
 41 |   alignment : '@' '(' sizeArith ')';
 42 | 
 43 |   group :     groupExp1                 -> list
 44 |         | '(' groupExp  ')'
 45 |         ;
 46 |   
 47 |   groupExp : groupExp1                  -> list
 48 |            | groupExp1 ',' groupExp     -> cons
 49 |            ;
 50 | 
 51 |   groupExp1 : '#' chiselProd            -> gProdWild
 52 |             | '#' sizeArith             -> gSizeWild
 53 |             | '(' flags ')'             -> GFlags
 54 |             | chiselProd                -> gProdNorm
 55 |             | sizeArith                 -> gSizeNorm
 56 |             | label                     -> GLabel
 57 |             | arith chiselProd          -> gProdArith
 58 |             | arith prodApp             -> GProdApp
 59 |             | '(' labels ')'            -> GLabels
 60 |             ;
 61 | 
 62 |   flags : prodID                        -> list
 63 |         | prodID '|' flags              -> cons
 64 |         ;
 65 | 
 66 |   labels : label                        -> list
 67 |          | label '|' labels             -> cons
 68 |          ;
 69 | 
 70 |   label : LowerID ':' labelExp    -> Label
 71 |         ;
 72 | 
 73 |   labelExp : '#' chiselProd       -> lProdWild
 74 |            | '#' prodApp          -> lProdAppWild
 75 |            | '#' sizeArith        -> lSizeWild
 76 |            | chiselProd           -> lProd
 77 |            | prodApp              -> lProdApp
 78 |            | sizeArith            -> lSize
 79 |            ;
 80 | 
 81 |   prodApp : prodID prodApp        -> cons
 82 |           | prodID                -> list
 83 |           ;
 84 | 
 85 |   sizeArith : arith Prim          -> SizeArith
 86 |             | Prim                -> singleArith
 87 |             ;
 88 |   
 89 |   arith : INT                     -> SizeInt
 90 |         | LowerID                 -> SizeID
 91 |         | arith '^' arith         -> SizeExp
 92 |         ;
 93 | 
 94 |   prodID  : UpperID               -> id
 95 |           | UpperID '.' prodID    -> append
 96 |           ;
 97 | 
 98 |   Prim     : ( 'bit' | 'byte' ) 's'?      -> Primitive;
 99 |   ArchPrim : ( 'page' | 'word' ) 's'?     -> Primitive;
100 |   UpperID  : [A-Z][a-zA-Z0-9_]*           -> String;
101 |   LowerID  : [a-z][a-zA-Z0-9_]*           -> String;
102 |   INT      : [0-9]+                       -> Int;
103 |   LineComment : '//' (~ '\n')* '\n'       -> String;
104 |   WS      : [ \t\n\r\f\v]+                -> String;
105 | |]
106 | 
107 | -- Types used to the right of the '->' directive must instance Read
108 | 
109 | isWhitespace T_LineComment = True
110 | isWhitespace T_WS = True
111 | isWhitespace _ = False
112 | 
113 | {- Helper functions to construct all the various Tokens from either the desired
114 |  - (arbitrary) lexeme or by looking it up based on the static lexeme it always
115 |  - matches. -}
116 | lowerID  x = Token T_LowerID (V_LowerID x) (length x)
117 | upperID  x = Token T_UpperID (V_UpperID x) (length x)
118 | prim     x = Token T_Prim    (V_Prim x)    (length $ show x)
119 | int      x = Token T_INT     (V_INT x)     (length $ show x)
120 | arrow      = lookupToken "->"
121 | lparen     = lookupToken "("
122 | rparen     = lookupToken ")"
123 | pound      = lookupToken "#"
124 | vertbar    = lookupToken "|"
125 | colon      = lookupToken ":"
126 | comma      = lookupToken ","
127 | atsymbol   = lookupToken "@"
128 | carrot     = lookupToken "^"
129 | dot        = lookupToken "."
130 | linecomm x = Token T_LineComment (V_LineComment x) (length x)
131 | ws       x = Token T_WS          (V_WS x)          (length x)
132 | 
133 | 


--------------------------------------------------------------------------------
/test/chisel/Language/Chisel/Parser.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, TemplateHaskell #-}
 4 | module Language.Chisel.Parser
 5 |   ( module Language.Chisel.Grammar
 6 |   , Language.Chisel.Parser.tokenize, glrParseFast
 7 |   , parse, glrParse
 8 |   ) where
 9 | import Language.ANTLR4
10 | import Language.Chisel.Syntax as S
11 | import Language.Chisel.Grammar
12 | 
13 | $(g4_parsers chiselAST chiselGrammar)
14 | 
15 | $(mkLRParser chiselAST chiselGrammar)
16 | 
17 | parse = glrParse isWhitespace
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/test/chisel/Language/Chisel/Syntax.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric #-}
  2 | module Language.Chisel.Syntax where
  3 | import Language.ANTLR4 (Data(..))
  4 | import Text.ANTLR.Pretty
  5 | import Text.ANTLR.Set (Hashable(..), Generic(..))
  6 | 
  7 | data ChiselProd = ChiselProd
  8 |   { prodID    :: UpperID
  9 |   , count     :: Maybe LowerID
 10 |   , formals   :: Maybe [Formal]
 11 |   , magnitude :: Maybe Magnitude
 12 |   , alignment :: Maybe SizeArith
 13 |   , rhs       :: Group
 14 |   }
 15 | 
 16 | prodFMA s f m a   = ChiselProd s Nothing  (Just f)  (Just m) (Just a)
 17 | prodF s f         = ChiselProd s Nothing  (Just f)  Nothing Nothing
 18 | prodMA s m a      = ChiselProd s Nothing  Nothing   (Just m) (Just a)
 19 | prodM s m         = ChiselProd s Nothing  Nothing   (Just m) Nothing
 20 | prodNMA s n m a   = ChiselProd s (Just n) Nothing   (Just m) (Just a)
 21 | 
 22 | type Formal = String
 23 | 
 24 | -- Whether or not '#' was used:
 25 | type Wild = Bool
 26 | 
 27 | data Magnitude = Mag Wild SizeArith
 28 | 
 29 | magWild = Mag True  -- Variable magnitude
 30 | magNorm = Mag False -- Fixed magnitude
 31 | magID   = Mag False . SizeID
 32 | 
 33 | type Alignment = SizeArith
 34 | 
 35 | type Group = [GroupExp]
 36 | 
 37 | -- Left False == no size annotation
 38 | -- Left True  == '#' annotation
 39 | -- Right sz   == fixed size annotation
 40 | type Count = Either Wild SizeArith
 41 | 
 42 | data GroupExp =
 43 |     GProd Count ChiselProd
 44 |   | GSize Wild  SizeArith
 45 |   | GFlags      [Flag]
 46 |   | GLabel      Label
 47 |   | GLabels     [Label]
 48 |   | GProdApp    SizeArith ProdApp
 49 | 
 50 | gProdWild     = GProd (Left True)
 51 | gProdNorm     = GProd (Left False)
 52 | gProdArith a  = GProd (Right a)
 53 | 
 54 | gSizeWild = GSize True
 55 | gSizeNorm = GSize False
 56 | 
 57 | type Flag = ProdID
 58 | 
 59 | type ProdApp = [ProdID]
 60 | 
 61 | type UpperID = String
 62 | type LowerID = String
 63 | type ProdID = String
 64 | type LabelID = String
 65 | 
 66 | data Label = Label LabelID LabelExp
 67 | 
 68 | data LabelExp =
 69 |     LProd     Wild ChiselProd
 70 |   | LProdApp  Wild ProdApp
 71 |   | LSize     Wild SizeArith
 72 | 
 73 | lProdWild = LProd True
 74 | lProd     = LProd False
 75 | 
 76 | lProdAppWild = LProdApp True
 77 | lProdApp     = LProdApp False
 78 | 
 79 | lSizeWild = LSize True
 80 | lSize     = LSize False
 81 | 
 82 | data SizeArith =
 83 |     SizeInt   Int
 84 |   | SizeID    LowerID
 85 |   | SizeExp   SizeArith SizeArith
 86 |   | SizeArith SizeArith Primitive
 87 | 
 88 | singleArith = SizeArith (SizeInt 1)
 89 | 
 90 | data Primitive = Page | Word | Byte | Bit
 91 |   deriving (Show, Eq, Ord, Generic, Hashable, Data)
 92 | 
 93 | lexeme2prim "page"  = Just Page
 94 | lexeme2prim "pages" = Just Page
 95 | lexeme2prim "word"  = Just Word
 96 | lexeme2prim "words" = Just Word
 97 | lexeme2prim "byte"  = Just Byte
 98 | lexeme2prim "bytes" = Just Byte
 99 | lexeme2prim "bit"   = Just Bit
100 | lexeme2prim "bits"  = Just Bit
101 | lexeme2prim _       = Nothing
102 | 
103 | instance Read Primitive where
104 |   readsPrec _ input = case lexeme2prim input of
105 |     Just x  -> [(x,"")]
106 |     Nothing -> []
107 | 
108 | instance Prettify Primitive where prettify = rshow
109 | 
110 | 


--------------------------------------------------------------------------------
/test/chisel/Main.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE QuasiQuotes, TemplateHaskell, ScopedTypeVariables #-}
  2 | module Main where
  3 | -- Project imports go here, e.g.:
  4 | --import Language.Chisel.Tokenizer
  5 | import Text.ANTLR.Lex.Tokenizer (Token(..))
  6 | import Text.ANTLR.Parser (AST(..))
  7 | import Language.Chisel.Parser
  8 | import Language.Chisel.Syntax
  9 | import Text.ANTLR.Grammar (Grammar(..), ProdElem(..))
 10 | import Language.ANTLR4.FileOpener (open)
 11 | import Language.ANTLR4.Boot.Syntax (Directive(..))
 12 | 
 13 | import System.IO.Unsafe (unsafePerformIO)
 14 | import Data.Monoid
 15 | import Test.Framework
 16 | import Test.Framework.Providers.HUnit
 17 | import Test.Framework.Providers.QuickCheck2
 18 | import Test.HUnit hiding ((@?=), assertEqual)
 19 | import Test.QuickCheck (Property, quickCheck, (==>))
 20 | import qualified Test.QuickCheck.Monadic as TQM
 21 | 
 22 | import Text.ANTLR.HUnit
 23 | import Debug.Trace as D
 24 | import qualified Text.ANTLR.LR as LR
 25 | import Text.ANTLR.Pretty (pshow)
 26 | import qualified Data.Text as T
 27 | 
 28 | chi = id
 29 | 
 30 | ghc_val = [open| test/chisel/Language/Chisel/Examples/GHC.chi |]
 31 | tokenizeGHC_val = tokenize ghc_val
 32 | 
 33 | tokenizeGHC_exp =
 34 |   [ upperID "Heap", ws " ", lowerID "m", ws " ", lowerID "k", ws " ", arrow
 35 |   , ws "\n  ", pound, ws " ", upperID "MegaBlock", ws " "
 36 |   , vertbar, int 2, carrot, lowerID "m", ws " ", prim Byte, vertbar, ws " "
 37 |   , atsymbol, lparen, int 2, carrot, lowerID "m", ws " ", prim Byte, rparen
 38 |   , ws " ", arrow, ws "\n    ", linecomm "// -------Megablock \"Header\"------------------"
 39 |   , ws "\n    ", lparen, ws " ", upperID "Descrs", ws " "
 40 |   , vertbar, int 2, carrot, lowerID "k", ws " ", prim Byte, vertbar, ws " "
 41 |   , arrow, ws "\n      ", lparen, ws " ", lowerID "padMB", ws " ", prim Byte
 42 |   , ws "\n      ", linecomm "// +++++++++++++++++++++++++++++++++++++++++\n"
 43 |   , ws "      ", comma, ws " ", lowerID "bds", ws " "
 44 |   , colon, ws " ", lowerID "n", ws " ", upperID "BlockDescr", ws " "
 45 |   , vertbar, int 2, carrot, lowerID "d", ws " ", prim Byte, vertbar
 46 |   , ws " ", atsymbol, lparen, int 2, carrot, lowerID "k", ws " ", prim Byte, rparen
 47 |   , ws " ", arrow, ws "\n        "
 48 |   , lparen, ws " ", lowerID "start", ws " ", colon, ws " "
 49 |   , upperID "Ptr", ws " ", upperID "Stg", dot, upperID "Word", ws "\n        "
 50 |   , comma, ws " ", lowerID "free", ws "  ", colon, ws " "
 51 |   , upperID "Ptr", ws " ", upperID "Stg", dot, upperID "Word", ws "\n        "
 52 |   , comma, ws " ", lowerID "link", ws "  ", colon, ws " "
 53 |   , upperID "Ptr", ws " ", upperID "BlockDescr", ws "\n        "
 54 |   , comma, ws " ", lparen, ws " ", lowerID "back", ws "   ", colon, ws " "
 55 |   , upperID "Ptr", ws " ", upperID "BlockDescr", ws "\n          "
 56 |   , vertbar, ws " ", lowerID "bitmap", ws " ", colon, ws " "
 57 |   , upperID "Ptr", ws " ", upperID "Stg", dot, upperID "Word", ws "\n          "
 58 |   , vertbar, ws " ", lowerID "scan", ws "   ", colon, ws " "
 59 |   , upperID "Ptr", ws " ", upperID "Stg", dot, upperID "Word", rparen, ws "\n        "
 60 |   , comma, ws " ", lowerID "gen", ws "     ", colon, ws " "
 61 |   , upperID "Ptr", ws " ", upperID "Generation", ws "\n    ", comma, ws " "
 62 |   , lowerID "gen_no", ws "  ", colon, ws " "
 63 |   , upperID "Stg", dot, upperID "Word16", ws "\n      "
 64 |   , comma, ws " ", lowerID "dest_no", ws " ", colon, ws " "
 65 |   , upperID "Stg", dot, upperID "Word16", ws "\n        "
 66 |   , comma, ws " ", lowerID "node", ws "    ", colon, ws " "
 67 |   , upperID "Stg", dot, upperID "Word16", ws "\n        "
 68 |   , comma, ws " ", upperID "Flags", ws " "
 69 |   , vertbar, upperID "Stg", dot, upperID "Word16", vertbar, ws " ", arrow, ws "\n          "
 70 |   , lparen, ws " ", upperID "LARGE", ws "  ", vertbar, ws " "
 71 |   , upperID "EVACUATED", ws "", vertbar, ws " "
 72 |   , upperID "FREE", ws "\n          ", vertbar, ws " "
 73 |   , upperID "PINNED", ws " ", vertbar, ws " "
 74 |   , upperID "MARKED", ws "     ", vertbar, ws " "
 75 |   , upperID "KNOWN", ws "\n          ", vertbar, ws " "
 76 |   , upperID "EXEC", ws "   ", vertbar, ws " "
 77 |   , upperID "FRAGMENTED", ws " ", vertbar, ws " "
 78 |   , upperID "SWEPT", ws "\n          ", vertbar, ws " "
 79 |   , upperID "COMPACT", ws " ", rparen, ws "\n        ", comma, ws " "
 80 |   , lowerID "n_blocks", ws " ", colon, ws " "
 81 |   , upperID "Stg", dot, upperID "Word32", ws "\n        "
 82 |   , comma, ws " ", lowerID "padD", ws " ", prim Byte, rparen, rparen, ws "\n      "
 83 |   , linecomm "// +++++++++++++++++++++++++++++++++++++++++\n", ws "    "
 84 |   , linecomm "// -------Megablock payload-------------------\n", ws "    ", comma, ws " "
 85 |   , lowerID "blocks", ws " ", colon, ws " ", lowerID "n", ws " "
 86 |   , upperID "Block", ws " ", vertbar, int 2, carrot
 87 |   , lowerID "k", ws " ", prim Byte, vertbar, ws " "
 88 |   , atsymbol, lparen, int 2, carrot, lowerID "k", ws " ", prim Byte, rparen
 89 |   , ws " ", arrow, ws "\n      ", lparen, ws " "
 90 |   , lowerID "closures", ws " ", colon, ws " ", pound, ws " "
 91 |   , upperID "Stg", dot, upperID "Closures", ws "\n      ", comma, ws " "
 92 |   , lowerID "free", ws "     ", colon, ws " ", pound, ws " ", prim Byte, rparen
 93 |   , ws "\n    ", rparen, ws "\n", EOF
 94 |   ]
 95 | 
 96 | tokenizeGHC =
 97 |   tokenizeGHC_val
 98 |   @?=
 99 |   tokenizeGHC_exp
100 | 
101 | tokenizeGHC2 =
102 |   dropWhile (\(a,b) -> a == b) (zip tokenizeGHC_val tokenizeGHC_exp)
103 |   @?=
104 |   []
105 | 
106 | tokenizeSmall = tokenize "Foo x -> x Bar"
107 | 
108 | parseTestSmall =
109 |   D.traceShowId (parse "Foo x -> x Bar")
110 |   @?=
111 |   LR.ResultAccept
112 |     ( AST NT_chiselProd [NT NT_prodSimple]
113 |       [ AST NT_prodSimple [NT NT_prodID, NT NT_formals, T T_2, NT NT_group]
114 |         [ AST NT_prodID [T T_UpperID] [Leaf $ upperID "Foo"]
115 |         , AST NT_formals [T T_LowerID] 
116 |           [ Leaf $ lowerID "x"]
117 |         , Leaf arrow
118 |         , AST NT_group [NT NT_groupExp1]
119 |             [ AST NT_groupExp1 [NT NT_arith, NT NT_prodApp]
120 |               [ AST NT_arith [T T_LowerID] [Leaf $ lowerID "x"]
121 |               , AST NT_prodApp [NT NT_prodID]
122 |                 [ AST NT_prodID [T T_UpperID] [Leaf $ upperID "Bar"] ]
123 |                 ]]]])
124 | 
125 | tokenizeSmallTest =
126 |   tokenizeSmall
127 |   @?=
128 |   [upperID "Foo", ws " ", lowerID "x", ws " ", arrow, ws " ", lowerID "x", ws " ", upperID "Bar", EOF]
129 | 
130 | parseGHCTestBig =
131 |   case parse ghc_val of
132 |     (LR.ResultAccept _)   -> (1 :: Int) @?= 1
133 |     e                     -> e @?= LR.ResultAccept LeafEps
134 | 
135 | testPrettify =
136 |   unsafePerformIO (putStr $ T.unpack $ pshow (chiselGrammar :: Grammar () ChiselNTSymbol ChiselTSymbol Directive))
137 |   @?= ()
138 | 
139 | testFast =
140 |   glrParseFast isWhitespace "Foo x -> x Bar"
141 |   @?=
142 |   glrParse isWhitespace "Foo x -> x Bar"
143 | 
144 | main :: IO ()
145 | main = defaultMainWithOpts
146 |   [ testCase "Tokenize GHC" tokenizeGHC
147 |   , testCase "Tokenize GHC2" tokenizeGHC2
148 |   , testCase "Tokenize Small" tokenizeSmallTest
149 |   , testCase "Parse Test (Small)" parseTestSmall
150 |   , testCase "Parse GHC"  parseGHCTestBig
151 |   , testCase "Prettify speed" testPrettify
152 |   , testCase "Run glrParseFast build" testFast
153 |   ] mempty
154 | 
155 | 


--------------------------------------------------------------------------------
/test/coreg4/G4.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE QuasiQuotes, DeriveAnyClass, DeriveGeneric, TypeFamilies
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, FlexibleContexts, DeriveDataTypeable #-}
 4 | module G4 where
 5 | import Language.ANTLR4
 6 | 
 7 | [g4|
 8 |   grammar G4Basic;
 9 |   exp : '1'
10 |       | '2'
11 |       | '3'
12 |       ;
13 | |]
14 | 
15 | 


--------------------------------------------------------------------------------
/test/coreg4/G4Fast.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE TemplateHaskell #-}
 2 | module G4Fast where
 3 | import Language.ANTLR4
 4 | import G4
 5 | import G4Parser
 6 | 
 7 | $(mkLRParser g4BasicAST g4BasicGrammar)
 8 | 
 9 | {-
10 | import System.IO.Unsafe (unsafePerformIO)
11 | import Data.Monoid
12 | import Test.Framework
13 | import Test.Framework.Providers.HUnit
14 | import Test.Framework.Providers.QuickCheck2
15 | import Test.HUnit
16 | import Test.QuickCheck (Property, quickCheck, (==>))
17 | import qualified Test.QuickCheck.Monadic as TQM
18 | -}
19 | 
20 | {-
21 | import Language.ANTLR4 hiding (tokenize)
22 | import Text.ANTLR.Grammar
23 | import qualified Language.ANTLR4.Example.Optionals as Opt
24 | import Language.ANTLR4.Example.G4 as G4
25 | --import Language.ANTLR4.Example.G4 (g4BasicGrammar, G4BasicNTSymbol, G4BasicTSymbol, G4BasicAST)
26 | import Text.ANTLR.Parser (AST(..))
27 | import qualified Text.ANTLR.LR as LR
28 | import qualified Text.ANTLR.Lex.Tokenizer as T
29 | -}
30 | 
31 | {-
32 | import Data.Map.Internal (fromList)
33 | import qualified Text.ANTLR.MultiMap as M
34 | import Text.ANTLR.MultiMap (Map(..))
35 | import qualified Text.ANTLR.Set as S
36 | import qualified Data.HashSet as H
37 | -}
38 | 
39 | 


--------------------------------------------------------------------------------
/test/coreg4/G4Parser.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, TemplateHaskell, FlexibleContexts #-}
 4 | module G4Parser where
 5 | import Language.ANTLR4
 6 | import G4
 7 | 
 8 | $(g4_parsers g4BasicAST g4BasicGrammar)
 9 | 
10 | 


--------------------------------------------------------------------------------
/test/coreg4/Hello.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE QuasiQuotes, DeriveAnyClass, DeriveGeneric, TypeFamilies
 2 | 		, DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, DeriveDataTypeable #-}
 4 | module Hello where
 5 | import Language.ANTLR4
 6 | 
 7 | [g4|
 8 | 	// Hello World grammar
 9 | 	// https://github.com/antlr/grammars-v4/blob/master/antlr4/examples/Hello.g4
10 | 	grammar Hello;
11 | 	r   : 'hello' WS ID;
12 | 	ID  : [a-zA-Z]+ -> String;
13 | 	WS  : [ \t\r\n]+ -> String;
14 | |]
15 | 
16 | 


--------------------------------------------------------------------------------
/test/coreg4/HelloParser.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE QuasiQuotes, TemplateHaskell, DeriveAnyClass, DeriveGeneric, TypeFamilies
 2 | 		, DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, DeriveDataTypeable, FlexibleContexts #-}
 4 | module HelloParser where
 5 | import Language.ANTLR4
 6 | import Hello
 7 | 
 8 | $(g4_parsers helloAST helloGrammar)
 9 | 
10 | 


--------------------------------------------------------------------------------
/test/coreg4/Main.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import System.IO.Unsafe (unsafePerformIO)
 4 | import Data.Monoid
 5 | import Test.Framework
 6 | import Test.Framework.Providers.HUnit
 7 | import Test.Framework.Providers.QuickCheck2
 8 | import Test.HUnit
 9 | import Test.QuickCheck (Property, quickCheck, (==>))
10 | import qualified Test.QuickCheck.Monadic as TQM
11 | 
12 | import Language.ANTLR4 hiding (tokenize, Regex(..))
13 | import qualified G4Parser as G4P
14 | import qualified G4 as G4
15 | import Hello
16 | import HelloParser
17 | import Text.ANTLR.Parser (AST(..))
18 | import qualified Text.ANTLR.LR as LR
19 | import Language.ANTLR4.Boot.Syntax (Regex(..))
20 | --import Language.ANTLR4.Regex (parseRegex)
21 | 
22 | import qualified Language.ANTLR4.G4 as P -- Parser
23 | 
24 | import qualified G4Fast as Fast
25 | 
26 | test_g4_basic_type_check = do
27 |   let _ = G4.g4BasicGrammar
28 |   1 @?= 1
29 | 
30 | hello_g4_test_type_check = do
31 |   let _ = helloGrammar
32 |   1 @?= 1
33 | 
34 | {-
35 | regex_test = do
36 |   parseRegex "[ab]* 'a' 'b' 'b'"
37 |   @?= Right
38 |   (Concat
39 |     [ Kleene $ CharSet "ab"
40 |     , Literal "a"
41 |     , Literal "b"
42 |     , Literal "b"
43 |     ])
44 | -}
45 | 
46 | _1 = G4.lookupToken "1"
47 | 
48 | -- TODO: implement 'read' instance for TokenValue type so that I don't have to
49 | -- hardcode the name for literal terminals (e.g. '1' == T_0 below)
50 | test_g4 =
51 |   G4P.slrParse (G4P.tokenize "1")
52 |   @?=
53 |   LR.ResultAccept (AST G4.NT_exp [T G4.T_0] [Leaf _1])
54 | 
55 | test_hello =
56 |   slrParse (tokenize "hello Matt")
57 |   @?=
58 |   (LR.ResultAccept $
59 |         AST NT_r [T T_0, T T_WS, T T_ID]
60 |         [ Leaf (Token T_0 V_0 1)
61 |         , Leaf (Token T_WS (V_WS " ") 1)
62 |         , Leaf (Token T_ID (V_ID "Matt") 4)
63 |         ]
64 |   )
65 | 
66 | test_hello_allstar =
67 |   allstarParse (const False) ("hello Matt")
68 |   @?=
69 |   Right (AST NT_r [T T_0, T T_WS, T T_ID] [Leaf (Token T_0 V_0 5),Leaf (Token T_WS (V_WS " ") 1),Leaf
70 |   (Token T_ID (V_ID "Matt") 4)])
71 |   --Right (AST NT_r [] [])
72 | 
73 | testFastGLR =
74 |   Fast.glrParseFast (const False) "3"
75 |   @?=
76 |   G4P.glrParse (const False) "3"
77 | 
78 | main :: IO ()
79 | main = defaultMainWithOpts
80 |   [ testCase "g4_basic_compilation_type_check" test_g4_basic_type_check
81 |   , testCase "hello_parse_type_check" hello_g4_test_type_check
82 | --  , testCase "regex_test" regex_test
83 |   , testCase "test_g4" test_g4
84 |   , testCase "test_hello" test_hello
85 |   , testCase "test_hello_allstar" test_hello_allstar
86 |   , testCase "testFastGLR" testFastGLR
87 |   ] mempty
88 | 
89 | 


--------------------------------------------------------------------------------
/test/g4/DoubleSemi.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE QuasiQuotes, DeriveAnyClass, DeriveGeneric, TypeFamilies
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell #-}
 4 | module DoubleSemi where
 5 | import Language.ANTLR4
 6 | 
 7 | $( return [] )
 8 | 
 9 | [g4|
10 |   grammar Dbl;
11 |   dbl : 'a' | 'f' ; // ;
12 |   WS  : [ \t\r\n]+ -> String;
13 | |]
14 | 
15 | isWS T_WS = True
16 | isWS _    = False
17 | 
18 | 


--------------------------------------------------------------------------------
/test/g4/DoubleSemiP.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, TemplateHaskell, FlexibleContexts #-}
 4 | module DoubleSemiP
 5 |   ( module DoubleSemiP
 6 |   , module DoubleSemi
 7 |   ) where
 8 | import Language.ANTLR4
 9 | import DoubleSemi
10 | 
11 | $(g4_parsers dblAST dblGrammar)
12 | 
13 | 


--------------------------------------------------------------------------------
/test/g4/Empty.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE QuasiQuotes, DeriveAnyClass, DeriveGeneric, TypeFamilies
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell #-}
 4 | module Empty where
 5 | import Language.ANTLR4
 6 | 
 7 | $( return [] )
 8 | 
 9 | [g4|
10 |   grammar Empty;
11 |   emp : 'a' b | 'f' d ;
12 |   b   : 'c'
13 |       | // empty!
14 |       ;
15 | 
16 |   d   : // empty!
17 |       | 'd'
18 |       ;
19 | 
20 |   WS  : [ \t\r\n]+ -> String;
21 | |]
22 | 
23 | isWS T_WS = True
24 | isWS _    = False
25 | 
26 | 


--------------------------------------------------------------------------------
/test/g4/EmptyP.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, TemplateHaskell, FlexibleContexts #-}
 4 | module EmptyP
 5 |   ( module Empty
 6 |   , module EmptyP
 7 |   ) where
 8 | import Language.ANTLR4
 9 | import Empty
10 | 
11 | $(g4_parsers emptyAST emptyGrammar)
12 | 
13 | 


--------------------------------------------------------------------------------
/test/g4/Main.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE TemplateHaskell, FlexibleContexts, TypeFamilies #-}
 2 | module Main where
 3 | 
 4 | import System.IO.Unsafe (unsafePerformIO)
 5 | import Data.Monoid
 6 | import Test.Framework
 7 | import Test.Framework.Providers.HUnit
 8 | import Test.Framework.Providers.QuickCheck2
 9 | import Test.HUnit
10 | import Test.QuickCheck (Property, quickCheck, (==>))
11 | import qualified Test.QuickCheck.Monadic as TQM
12 | 
13 | import Language.ANTLR4 hiding (tokenize, Regex(..))
14 | import Text.ANTLR.Grammar
15 | --import Language.ANTLR4.Regex
16 | import Text.ANTLR.Parser (AST(..))
17 | import qualified Text.ANTLR.LR as LR
18 | import qualified Text.ANTLR.Lex.Tokenizer as T
19 | 
20 | import qualified Language.ANTLR4.G4 as P -- Parser
21 | 
22 | import qualified Optional as Opt
23 | import qualified OptionalParser as Opt
24 | 
25 | import qualified EmptyP as E
26 | import qualified Empty  as E
27 | 
28 | test_optional =
29 |   Opt.glrParse Opt.isWS "a"
30 |   @?=
31 |   (LR.ResultAccept $ AST Opt.NT_r [NT Opt.NT_a] [AST Opt.NT_a [T Opt.T_1] [Leaf (Token Opt.T_1 Opt.V_1 1)]])
32 | 
33 | test_optional2 =
34 |   case Opt.glrParse Opt.isWS "a a e b c d" of
35 |     LR.ResultAccept ast -> Opt.ast2r ast @?= "accept"
36 |     err                 -> error $ show err
37 | 
38 | test_optional3 =
39 |   case Opt.glrParse Opt.isWS "a e b b b c c d" of
40 |     LR.ResultAccept ast -> Opt.ast2r ast @?= "accept"
41 |     err                 -> error $ show err
42 | 
43 | test_optional4 =
44 |   case Opt.glrParse Opt.isWS "a" of
45 |     LR.ResultAccept ast -> Opt.ast2r ast @?= "reject"
46 |     err                 -> error $ show err
47 | 
48 | test_e v =
49 |   case v of
50 |     LR.ResultAccept ast -> E.ast2emp ast @?= ()
51 |     err                 -> error $ show err
52 | test_e_fail v =
53 |   case v of
54 |     LR.ResultAccept ast -> assertFailure $ show ast
55 |     err                 -> () @?= ()
56 |   
57 | test_empty = test_e (E.slrParse (E.tokenize "a"))
58 | test_empty2 = test_e (E.slrParse (E.tokenize "f"))
59 | test_empty3 = test_e (E.slrParse (E.tokenize "ac"))
60 | test_empty4 = test_e (E.slrParse (E.tokenize "fd"))
61 | test_empty5 = test_e_fail (E.slrParse (E.tokenize "fc"))
62 | test_empty6 = test_e_fail (E.slrParse (E.tokenize "ad"))
63 | 
64 | main :: IO ()
65 | main = defaultMainWithOpts
66 |   [ testCase "test_optional" test_optional
67 |   , testCase "test_optional2" test_optional2
68 |   , testCase "test_optional3" test_optional3
69 |   , testCase "test_optional4" test_optional4
70 |   , testCase "test_empty"     test_empty
71 |   , testCase "test_empty2"    test_empty2
72 |   , testCase "test_empty3"    test_empty3
73 |   , testCase "test_empty4"    test_empty4
74 |   , testCase "test_empty5"    test_empty5
75 |   , testCase "test_empty6"    test_empty6
76 |   ] mempty
77 | 
78 | 


--------------------------------------------------------------------------------
/test/g4/Optional.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE QuasiQuotes, DeriveAnyClass, DeriveGeneric, TypeFamilies
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell #-}
 4 | module Optional where
 5 | import Language.ANTLR4
 6 | 
 7 | opt a b c d = (1, 'b', 2.0, [1,2,3])
 8 | 
 9 | foo :: () -> Maybe (Int, Char, Double, [Int]) -> String
10 | foo a1 (Just (a,b,c,d)) = "accept"
11 | foo a1 Nothing = "reject"
12 | 
13 | $( return [] )
14 | 
15 | [g4|
16 |   grammar Optional;
17 |   r   : a s? -> foo;
18 |   s   : a? 'e' b* c+ d -> opt;
19 |   a   : 'a';
20 |   b   : 'b';
21 |   c   : 'c';
22 |   d   : 'd';
23 |   
24 |   ID  : [a-zA-Z]+ -> String;
25 |   WS  : [ \t\r\n]+ -> String;
26 | |]
27 | 
28 | isWS T_WS = True
29 | isWS _    = False
30 | 
31 | 


--------------------------------------------------------------------------------
/test/g4/OptionalParser.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, TemplateHaskell, FlexibleContexts #-}
 4 | module OptionalParser where
 5 | import Language.ANTLR4
 6 | import Optional
 7 | 
 8 | $(g4_parsers optionalAST optionalGrammar)
 9 | 
10 | 


--------------------------------------------------------------------------------
/test/lexer/Main.hs:
--------------------------------------------------------------------------------
  1 | module Main where
  2 | 
  3 | import Text.ANTLR.Lex
  4 | import Text.ANTLR.Lex.Automata
  5 | import Text.ANTLR.Lex.NFA as NFA
  6 | import qualified Text.ANTLR.Lex.DFA as DFA
  7 | 
  8 | import Text.ANTLR.Lex.Regex
  9 | 
 10 | import System.IO.Unsafe (unsafePerformIO)
 11 | import Data.Monoid
 12 | import Test.Framework
 13 | import Test.Framework.Providers.HUnit
 14 | import Test.Framework.Providers.QuickCheck2
 15 | import Test.HUnit
 16 | import Test.QuickCheck (Property, quickCheck, (==>))
 17 | import qualified Test.QuickCheck.Monadic as TQM
 18 | 
 19 | import Text.ANTLR.Set (fromList, Set(..), singleton, Hashable)
 20 | 
 21 | singleEdge s = (False, singleton s)
 22 | 
 23 | fL :: (Hashable a, Eq a) => [a] -> Set a
 24 | fL = fromList
 25 | 
 26 | nfa0 :: NFA Char Int
 27 | nfa0 = Automata
 28 |   { _S = fL [0, 1, 2, 3]
 29 |   , _Σ = fL "ab"
 30 |   , s0 = 0
 31 |   , _F = fL [3]
 32 |   , _Δ = fL
 33 |     [ (0, singleEdge $ Edge 'a', 0)
 34 |     , (0, singleEdge $ Edge 'a', 1)
 35 |     , (0, singleEdge $ Edge 'b', 0)
 36 |     , (1, singleEdge $ Edge 'b', 2)
 37 |     , (2, singleEdge $ Edge 'b', 3)
 38 |     ]
 39 |   }
 40 | 
 41 | testValid0 =
 42 |   (  validStartState nfa0
 43 |   && validFinalStates nfa0
 44 |   && validTransitions nfa0
 45 |   )
 46 |   @?=
 47 |   True
 48 | 
 49 | testClosureWith0 =
 50 |   closureWith (Edge 'a' ==) nfa0 (singleton 0)
 51 |   @?=
 52 |   fromList [0, 1]
 53 | 
 54 | testClosureWith1 =
 55 |   closureWith (NFAEpsilon ==) nfa0 (singleton 0)
 56 |   @?=
 57 |   fromList [0]
 58 | 
 59 | testClosureWith2 =
 60 |   closureWith (const True) nfa0 (singleton 0)
 61 |   @?=
 62 |   fromList [0, 1, 2, 3]
 63 | 
 64 | testClosureWith3 =
 65 |   closureWith (Edge 'b' ==) nfa0 (singleton 0)
 66 |   @?=
 67 |   fromList [0]
 68 | 
 69 | testMove0 =
 70 |   move nfa0 (fromList [0,1,2]) (Edge 'a')
 71 |   @?=
 72 |   fromList [0,1]
 73 | 
 74 | nfa334 :: NFA Char Int
 75 | nfa334 = Automata
 76 |   { _S = fL [0 .. 10]
 77 |   , _Σ = fL "ab"
 78 |   , s0 = 0
 79 |   , _F = fL [10]
 80 |   , _Δ = fL
 81 |     [ (0, singleEdge NFAEpsilon, 1)
 82 |     , (0, singleEdge NFAEpsilon, 7)
 83 |     , (1, singleEdge NFAEpsilon, 2)
 84 |     , (1, singleEdge NFAEpsilon, 4)
 85 |     , (2, singleEdge $ Edge 'a', 3)
 86 |     , (3, singleEdge NFAEpsilon, 6)
 87 |     , (4, singleEdge $ Edge 'b', 5)
 88 |     , (5, singleEdge NFAEpsilon, 6)
 89 |     , (6, singleEdge NFAEpsilon, 1)
 90 |     , (6, singleEdge NFAEpsilon, 7)
 91 |     , (7, singleEdge $ Edge 'a', 8)
 92 |     , (8, singleEdge $ Edge 'b', 9)
 93 |     , (9, singleEdge $ Edge 'b', 10)
 94 |     ]
 95 |   }
 96 | 
 97 | _A = fromList [0,1,2,4,7]
 98 | _B = fromList [1,2,3,4,6,7,8]
 99 | _C = fromList [1,2,4,5,6,7]
100 | _D = fromList [1,2,4,5,6,7,9]
101 | _E = fromList [1,2,4,5,6,7,10]
102 | 
103 | a = singleEdge 'a'
104 | b = singleEdge 'b'
105 | 
106 | dfa336 :: DFA.DFA Char (Set Int)
107 | dfa336 = Automata
108 |   { _S = fL [_A, _B, _C, _D, _E]
109 |   , _Σ = fL "ab"
110 |   , s0 = _A
111 |   , _F = fL [_E]
112 |   , _Δ = fL
113 |     [ (_A, a, _B), (_A, b, _C)
114 |     , (_B, a, _B), (_B, b, _D)
115 |     , (_C, a, _B), (_C, b, _C)
116 |     , (_D, a, _B), (_D, b, _E)
117 |     , (_E, a, _B), (_E, b, _C)
118 |     ]
119 |   }
120 | 
121 | nfa2dfa0 =
122 |  nfa2dfa nfa334
123 |  @?=
124 |  dfa336
125 | 
126 | nfa334Eps0 =
127 |   NFA.epsClosure nfa334
128 | 
129 | epsilonNFA = 
130 |   Automata
131 |     { _S = fL [0, 1]
132 |     , _Σ = fL ""
133 |     , s0 = 0
134 |     , _F = fL [1]
135 |     , _Δ = fL [ (0, singleEdge NFAEpsilon, 1) ]
136 |     }
137 | 
138 | regexTest0 =
139 |   regex2nfa Epsilon
140 |   @?=
141 |   epsilonNFA
142 | 
143 | regexTest1 =
144 |   regex2nfa (Symbol 'a')
145 |   @?=
146 |   epsilonNFA { _Σ = fL "a", _Δ = fL [ (0, singleEdge $ Edge 'a', 1) ] }
147 | 
148 | regexTestUnion =
149 |   regex2nfa (Union (Symbol 'a') (Symbol 'b'))
150 |   @?= Automata
151 |     { _S = fL [0..5]
152 |     , _Σ = fL "ab"
153 |     , s0 = 4
154 |     , _F = fL [5]
155 |     , _Δ = fL [ (0, singleEdge $ Edge 'a', 1)
156 |               , (2, singleEdge $ Edge 'b', 3)
157 |               , (4, singleEdge NFAEpsilon, 0)
158 |               , (4, singleEdge NFAEpsilon, 2)
159 |               , (1, singleEdge NFAEpsilon, 5)
160 |               , (3, singleEdge NFAEpsilon, 5) ]
161 |     }
162 | 
163 | regexTestConcat =
164 |   regex2nfa (Concat [Symbol 'a', Symbol 'b'])
165 |   @?= Automata
166 |     { _S = fL [0..3]
167 |     , _Σ = fL "ab"
168 |     , s0 = 0
169 |     , _F = fL [3]
170 |     , _Δ = fL [ (0, singleEdge $ Edge 'a', 1)
171 |               , (1, singleEdge NFAEpsilon, 2)
172 |               , (2, singleEdge $ Edge 'b', 3) ]
173 |     }
174 | 
175 | regexTestKleene =
176 |   regex2nfa (Kleene (Concat [Symbol 'a', Symbol 'b']))
177 |   @?= Automata
178 |     { _S = fL [0..5]
179 |     , _Σ = fL "ab"
180 |     , s0 = 4
181 |     , _F = fL [5]
182 |     , _Δ = fL [ (0, singleEdge $ Edge 'a', 1)
183 |               , (1, singleEdge NFAEpsilon, 2)
184 |               , (2, singleEdge $ Edge 'b', 3)
185 |               , (4, singleEdge NFAEpsilon, 0)
186 |               , (4, singleEdge NFAEpsilon, 5)
187 |               , (3, singleEdge NFAEpsilon, 0)
188 |               , (3, singleEdge NFAEpsilon, 5)]
189 |     }
190 | 
191 | regexTestPosclos =
192 |   regex2nfa (PosClos (Concat [Symbol 'a', Symbol 'b']))
193 |   @?= Automata
194 |     { _S = fL [0..9]
195 |     , _Σ = fL "ab"
196 |     , s0 = 0
197 |     , _F = fL [9]
198 |     , _Δ = fL [ (0, singleEdge $ Edge 'a', 1)
199 |               , (1, singleEdge NFAEpsilon, 2)
200 |               , (2, singleEdge $ Edge 'b', 3)
201 |               , (3, singleEdge NFAEpsilon, 8)
202 |               , (4, singleEdge $ Edge 'a', 5)
203 |               , (5, singleEdge NFAEpsilon, 6)
204 |               , (6, singleEdge $ Edge 'b', 7)
205 |               , (8, singleEdge NFAEpsilon, 4)
206 |               , (8, singleEdge NFAEpsilon, 9)
207 |               , (7, singleEdge NFAEpsilon, 4)
208 |               , (7, singleEdge NFAEpsilon, 9)]
209 |     }
210 | 
211 | dfaABPlus = regex2dfa (PosClos (Concat [Symbol 'a', Symbol 'b']))
212 | dfaWS     = regex2dfa (PosClos (Symbol ' '))
213 | 
214 | {-
215 | dfaGetName x
216 |   | x == dfaWS     = "ws"
217 |   | x == dfaABPlus = "ab+"
218 |   | otherwise      = "Error"
219 | -}
220 | 
221 | tokenizeTest0 =
222 |   tokenize [("ab+", dfaABPlus), ("ws", dfaWS)] const "abab ab ababab"
223 |   @?=
224 |   [ Token "ab+" "abab"    4
225 |   , Token "ws"  " "       1
226 |   , Token "ab+" "ab"      2
227 |   , Token "ws"  " "       1
228 |   , Token "ab+" "ababab"  6
229 |   , EOF
230 |   ]
231 | 
232 | {-
233 | dfaID = regex2dfa
234 |   (PosClos $ MultiUnion
235 |               [ Class ['a' .. 'z']
236 |               , Class ['A' .. 'Z']
237 |               , Symbol '_'
238 |               ])
239 | -}
240 | 
241 | dfaID = regex2dfa
242 |   (PosClos $ Class $ '_' : ['a' .. 'z'] ++ ['A' .. 'Z'])
243 | 
244 | -- For profiling runtime of DFA subset construction (nfa2dfa):
245 | dfaIDTest =
246 |   dfaID
247 |   @?=
248 |   dfaID
249 | 
250 | dfaEQ   = regex2dfa (Symbol '=')
251 | dfaSEMI = regex2dfa (Symbol ';')
252 | 
253 | dfaINT  = regex2dfa (PosClos $ Class [ '0' .. '9' ])
254 | 
255 | data TermSymbol = T_ID | T_INT | T_WS | T_EQ | T_SEMI
256 |   deriving (Eq, Ord, Show)
257 | 
258 | data TermValue =
259 |     ID  String
260 |   | INT Int
261 |   | WS  String
262 |   | EQSIGN
263 |   | SEMI
264 |   deriving (Eq, Ord, Show)
265 | 
266 | lexeme2value lexeme T_WS   = WS lexeme
267 | lexeme2value lexeme T_ID   = ID lexeme
268 | lexeme2value lexeme T_INT  = INT $ read lexeme
269 | lexeme2value lexeme T_EQ   = EQSIGN
270 | lexeme2value lexeme T_SEMI = SEMI
271 | 
272 | tokenizeTest1 =
273 |   tokenize
274 |     [ (T_WS, dfaWS), (T_ID, dfaID), (T_INT, dfaINT)
275 |     , (T_EQ, dfaEQ), (T_SEMI, dfaSEMI) ]
276 |     lexeme2value "_matt = 0;"
277 |   @?=
278 |   [ Token T_ID (ID "_matt") 5
279 |   , Token T_WS (WS " ") 1
280 |   , Token T_EQ EQSIGN 1
281 |   , Token T_WS (WS " ") 1
282 |   , Token T_INT (INT 0) 1
283 |   , Token T_SEMI SEMI 1
284 |   , EOF
285 |   ]
286 | 
287 | lineCommentDFA = regex2dfa $ Concat [Literal "//", Kleene $ NotClass ['\n'], Symbol '\n']
288 | 
289 | lineCommentTest =
290 |   tokenize [("LineComment", lineCommentDFA)] const
291 |   "// This is a line comment.\n"
292 |   @?=
293 |   [ Token "LineComment" "// This is a line comment.\n" 27
294 |   , EOF
295 |   ]
296 | 
297 | main :: IO ()
298 | main = defaultMainWithOpts
299 |   [ testCase "testValid0" testValid0
300 |   , testCase "testClosureWith0" testClosureWith0
301 |   , testCase "testClosureWith1" testClosureWith1
302 |   , testCase "testClosureWith2" testClosureWith2
303 |   , testCase "testClosureWith3" testClosureWith3
304 |   , testCase "testMove0" testMove0
305 |   , testCase "nfa2dfa0" nfa2dfa0
306 |   , testCase "regexTest0" regexTest0
307 |   , testCase "regexTest1" regexTest1
308 |   , testCase "regexTestUnion" regexTestUnion
309 |   , testCase "regexTestConcat" regexTestConcat
310 |   , testCase "regexTestKleene" regexTestKleene
311 |   , testCase "regexTestPosclos" regexTestPosclos
312 |   , testCase "tokenizeTest0" tokenizeTest0
313 |   , testCase "dfaIDTest" dfaIDTest
314 |   , testCase "tokenizeTest1" tokenizeTest1
315 |   , testCase "lineCommentTest" lineCommentTest
316 |   ] mempty
317 | 
318 | 


--------------------------------------------------------------------------------
/test/ll/Main.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE FlexibleContexts #-}
  2 | module Main where
  3 | import Example.Grammar
  4 | import Text.ANTLR.Grammar
  5 | import Text.ANTLR.Parser
  6 | import Text.ANTLR.Pretty
  7 | import qualified Data.Text as T
  8 | import Text.ANTLR.LL1
  9 | 
 10 | import Text.ANTLR.Set (fromList, union, empty, Set(..))
 11 | import qualified Text.ANTLR.Set as Set
 12 | 
 13 | import qualified Data.Map.Strict as M
 14 | 
 15 | import System.IO.Unsafe (unsafePerformIO)
 16 | import Data.Monoid
 17 | import Test.Framework
 18 | import Test.Framework (defaultMainWithOpts)
 19 | import Test.Framework.Providers.HUnit
 20 | import Test.Framework.Providers.QuickCheck2
 21 | import Test.HUnit hiding ((@?=), assertEqual)
 22 | import Test.QuickCheck (Property, quickCheck, (==>))
 23 | import qualified Test.QuickCheck.Monadic as TQM
 24 | 
 25 | import Text.ANTLR.HUnit
 26 | 
 27 | type LL1NonTerminal = String
 28 | type LL1Terminal    = String
 29 | 
 30 | uPIO = unsafePerformIO
 31 | 
 32 | grm :: Grammar () LL1NonTerminal LL1Terminal ()
 33 | grm = dragonBook428
 34 | 
 35 | termination = first grm [NT "E"] @?= first grm [NT "E"]
 36 | 
 37 | firstF = first grm [NT "F"] @?= fromList [Icon "(", Icon "id"]
 38 | 
 39 | noEps = first grm [NT "E"] @?= fromList [Icon "(", Icon "id"]
 40 | 
 41 | firstT' =
 42 |   first grm [NT "T'"]
 43 |   @?=
 44 |   fromList [Icon "*", IconEps]
 45 | 
 46 | foldEpsTest = foldWhileEpsilon union empty
 47 |   [ fromList [Icon "(", Icon "id"]
 48 |   , fromList [Icon ")"]
 49 |   ]
 50 |   @?=
 51 |   fromList [Icon "(", Icon "id"]
 52 | 
 53 | firstAll =
 54 |   ( Set.map ((\nt -> (nt, first grm [nt])) . NT) (ns grm)
 55 |     `union`
 56 |     Set.map ((\t  -> (t,  first grm [t]))  . T)  (ts grm)
 57 |   )
 58 |   @?=
 59 |   fromList
 60 |     [ (NT "E",  fromList [Icon "(", Icon "id"])
 61 |     , (NT "E'", fromList [Icon "+", IconEps])
 62 |     , (NT "F",  fromList [Icon "(", Icon "id"])
 63 |     , (NT "T",  fromList [Icon "(", Icon "id"])
 64 |     , (NT "T'", fromList [Icon "*", IconEps])
 65 |     , (T "(",   fromList [Icon "("])
 66 |     , (T ")",   fromList [Icon ")"])
 67 |     , (T "*",   fromList [Icon "*"])
 68 |     , (T "+",   fromList [Icon "+"])
 69 |     , (T "id",  fromList [Icon "id"])
 70 |     ]
 71 | 
 72 | grm' :: Grammar () LL1NonTerminal LL1Terminal ()
 73 | grm' = grm
 74 | 
 75 | followAll :: IO ()
 76 | followAll = let
 77 |     fncn :: LL1NonTerminal -> (ProdElem LL1NonTerminal LL1Terminal, Set (Icon LL1Terminal))
 78 |     fncn nt = (NT nt, follow grm' nt)
 79 |   in Set.map fncn (ns grm')
 80 |   @?=
 81 |   fromList
 82 |     [ (NT "E",  fromList [Icon ")", IconEOF])
 83 |     , (NT "E'", fromList [Icon ")", IconEOF])
 84 |     , (NT "T",  fromList [Icon ")", Icon "+", IconEOF])
 85 |     , (NT "T'", fromList [Icon ")", Icon "+", IconEOF])
 86 |     , (NT "F",  fromList [Icon ")", Icon "*", Icon "+", IconEOF])
 87 |     ]
 88 | 
 89 | parseTableTest =
 90 |   parseTable grm
 91 |   @?=
 92 |   M.fromList (map (\((a,b),c) -> ((a,b), Set.singleton c))
 93 |     -- Figure 4.17 of dragon book:
 94 |     [ (("E",  Icon "id"), [NT "T", NT "E'"])
 95 |     , (("E",  Icon "("),  [NT "T", NT "E'"])
 96 |     , (("E'", Icon "+"),  [T "+", NT "T", NT "E'"])
 97 |     , (("E'", Icon ")"),  [Eps])
 98 |     , (("E'", IconEOF),       [Eps])
 99 |     , (("T",  Icon "id"), [NT "F", NT "T'"])
100 |     , (("T",  Icon "("),  [NT "F", NT "T'"])
101 |     , (("T'", Icon "+"),  [Eps])
102 |     , (("T'", Icon "*"),  [T "*", NT "F", NT "T'"])
103 |     , (("T'", Icon ")"),  [Eps])
104 |     , (("T'", IconEOF),       [Eps])
105 |     , (("F",  Icon "id"), [T "id"])
106 |     , (("F",  Icon "("),  [T "(", NT "E", T ")"])
107 |     ])
108 | 
109 | type LLAST = AST LL1NonTerminal LL1Terminal
110 | 
111 | action0 EpsE                  = LeafEps
112 | action0 (TermE t)             = Leaf t
113 | action0 (NonTE (nt, ss, us))  = AST nt ss us
114 | 
115 | action1 ::
116 |   (Prettify t, Prettify (StripEOF (Sym t)), Prettify nts)
117 |   => ParseEvent (AST nts t) nts t -> AST nts t
118 | action1 (NonTE (nt, ss, trees)) = uPIO (putStrLn $ T.unpack $ pshow ("Act:", nt, ss, trees)) `seq` action0 $ NonTE (nt,ss,trees)
119 | action1 (TermE x) = uPIO (putStrLn $ T.unpack $ pshow ("Act:", x)) `seq` action0 $ TermE x
120 | action1 EpsE      = action0 EpsE
121 | 
122 | dragonPredParse =
123 |   predictiveParse grm action0 ["id", "+", "id", "*", "id", ""]
124 |   @?=
125 |   (Just $ AST "E" [NT "T", NT "E'"]
126 |             [ AST "T" [NT "F", NT "T'"]
127 |                 [ AST "F"  [T "id"] [Leaf "id"]
128 |                 , AST "T'" [Eps]    [LeafEps]
129 |                 ]
130 |             , AST "E'" [T "+", NT "T", NT "E'"]
131 |                 [ Leaf "+"
132 |                 , AST "T" [NT "F", NT "T'"]
133 |                     [ AST "F" [T "id"] [Leaf "id"]
134 |                     , AST "T'" [T "*", NT "F", NT "T'"]
135 |                         [ Leaf "*"
136 |                         , AST "F" [T "id"] [Leaf "id"]
137 |                         , AST "T'" [Eps] [LeafEps]
138 |                         ]
139 |                     ]
140 |                 , AST "E'" [Eps] [LeafEps]
141 |                 ]
142 |             ])
143 | 
144 | singleLang :: Grammar () String Char ()
145 | singleLang = (defaultGrammar "S" :: Grammar () String Char ())
146 |   { s0 = "S"
147 |   , ns = fromList ["S", "X"]
148 |   , ts = fromList ['a']
149 |   , ps =  [ production "S" $ Prod Pass [NT "X", T 'a']
150 |           , production "X" $ Prod Pass [Eps]
151 |           ]
152 |   }
153 | 
154 | testRemoveEpsilons =
155 |   removeEpsilons singleLang
156 |   @?= singleLang
157 |     { ps =  [ production "S" $ Prod Pass [NT "X", T 'a']
158 |             , production "S" $ Prod Pass [T 'a']
159 |             ]
160 |     }
161 | 
162 | singleLang2 :: Grammar () String Char ()
163 | singleLang2 = singleLang
164 |   { ts = fromList ['a', 'b']
165 |   , ps =  [ production "S" $ Prod Pass [NT "X", T 'a', NT "X", T 'b', NT "X"]
166 |           , production "X" $ Prod Pass [Eps]
167 |           ]
168 |   }
169 | 
170 | testRemoveEpsilons2 =
171 |   (Set.fromList . ps . removeEpsilons) singleLang2
172 |   @?=
173 |   fromList
174 |     [ production "S" $ Prod Pass [        T 'a',         T 'b'        ]
175 |     , production "S" $ Prod Pass [        T 'a',         T 'b', NT "X"]
176 |     , production "S" $ Prod Pass [        T 'a', NT "X", T 'b'        ]
177 |     , production "S" $ Prod Pass [        T 'a', NT "X", T 'b', NT "X"]
178 |     , production "S" $ Prod Pass [NT "X", T 'a',         T 'b'        ]
179 |     , production "S" $ Prod Pass [NT "X", T 'a',         T 'b', NT "X"]
180 |     , production "S" $ Prod Pass [NT "X", T 'a', NT "X", T 'b'        ]
181 |     , production "S" $ Prod Pass [NT "X", T 'a', NT "X", T 'b', NT "X"]
182 |     ]
183 | 
184 | testRemoveEpsilons3 =
185 |   removeEpsilons dragonBook428
186 |   @?= (defaultGrammar "E" :: Grammar () String String ())
187 |     { ns = fromList ["E", "E'", "T", "T'", "F"]
188 |     , ts = fromList ["+", "*", "(", ")", "id"]
189 |     , s0 = "E"
190 |     , ps = [ production "E"  $ Prod Pass [NT "T", NT "E'"]
191 |            , production "E'" $ Prod Pass [T "+", NT "T", NT "E'"]
192 |            , production "E'" $ Prod Pass [Eps] -- Implicitly epsilon
193 |            , production "T"  $ Prod Pass [NT "F", NT "T'"]
194 |            , production "T'" $ Prod Pass [T "*", NT "F", NT "T'"]
195 |            , production "T'" $ Prod Pass [Eps]
196 |            , production "F"  $ Prod Pass [T "(", NT "E", T ")"]
197 |            , production "F"  $ Prod Pass [T "id"]
198 |            ]
199 |     } 
200 | 
201 | leftGrammar0 :: Grammar () Char Char ()
202 | leftGrammar0 = (defaultGrammar 'S' :: Grammar () Char Char ())
203 |   { ns = fromList "SABC"
204 |   , ts = fromList "defg"
205 |   , s0 = 'S'
206 |   , ps = [ production 'S' $ Prod Pass [NT 'A']
207 |          , production 'A' $ Prod Pass [T 'd', T 'e', NT 'B']
208 |          , production 'A' $ Prod Pass [T 'd', T 'e', NT 'C']
209 |          , production 'B' $ Prod Pass [T 'f']
210 |          , production 'C' $ Prod Pass [T 'g']
211 |          ]
212 |   }
213 | 
214 | testLeftFactor =
215 |   leftFactor leftGrammar0
216 |   @?= G
217 |   { ns = fromList $ map Prime [('S', 0), ('A', 0), ('B', 0), ('C', 0)]
218 |   , ts = fromList "defg"
219 |   , s0 = Prime ('S', 0)
220 |   , ps = [ production (Prime ('S', 0)) $ Prod Pass [NT $ Prime ('A', 0)]
221 |          , production (Prime ('A', 0)) $ Prod Pass [T 'd', T 'e', NT $ Prime ('A', 1)]
222 |          , production (Prime ('A', 1)) $ Prod Pass [NT $ Prime ('B', 0)]
223 |          , production (Prime ('A', 1)) $ Prod Pass [NT $ Prime ('C', 0)]
224 |          , production (Prime ('B', 0)) $ Prod Pass [T 'f']
225 |          , production (Prime ('C', 0)) $ Prod Pass [T 'g']
226 |          ]
227 |   , _πs = fromList []
228 |   , _μs = fromList []
229 |   }
230 | 
231 | main :: IO ()
232 | main = defaultMainWithOpts
233 |   [ testCase "fold_epsilon" foldEpsTest
234 |   , testCase "termination" termination
235 |   , testCase "no_epsilon" noEps
236 |   , testCase "firstF" firstF
237 |   , testCase "firstT'" firstT'
238 |   , testCase "firstAll" firstAll
239 |   , testCase "followAll" followAll
240 |   , testCase "dragonHasAllNonTerms" $ hasAllNonTerms grm @?= True
241 |   , testCase "dragonHasAllTerms" $ hasAllTerms grm @?= True
242 |   , testCase "dragonStartIsNonTerm" $ startIsNonTerm grm @?= True
243 |   , testCase "dragonIsValid" $ validGrammar grm @?= True
244 |   , testCase "dragonIsLL1" $ isLL1 grm @?= True
245 |   , testCase "dragonParseTable" parseTableTest
246 |   , testCase "dragonPredParse" dragonPredParse
247 |   , testCase "testRemoveEpsilons" testRemoveEpsilons
248 |   , testCase "testRemoveEpsilons2" testRemoveEpsilons2
249 |   , testCase "testLeftFactor" testLeftFactor
250 |   ] mempty
251 | 
252 | 


--------------------------------------------------------------------------------
/test/lr/EOF.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
 4 |     , DeriveDataTypeable #-}
 5 | module EOF where
 6 | import Language.ANTLR4
 7 | 
 8 | import System.IO.Unsafe (unsafePerformIO)
 9 | import Data.Monoid
10 | import Test.Framework
11 | import Test.Framework.Providers.HUnit
12 | import Test.Framework.Providers.QuickCheck2
13 | import Test.HUnit
14 | import Test.QuickCheck (Property, quickCheck, (==>))
15 | import qualified Test.QuickCheck.Monadic as TQM
16 | 
17 | import Language.Haskell.TH.Syntax (lift)
18 | import Language.ANTLR4.Syntax (stripQuotesReadEscape)
19 | 
20 | import EOFGrammar
21 | 
22 | $(g4_parsers eOFAST eOFGrammar)
23 | 
24 | test_eof = case glrParse (== T_WS) "anything that is not a comma" of
25 |   (ResultAccept ast) -> ast2words ast @?= ["anything", "that", "is", "not", "a", "comma"]
26 |   (ResultSet xs)     -> assertFailure $ "Ambiguous parse: " ++ pshow' xs
27 |   rest               -> assertFailure $ stripQuotesReadEscape $ "\"" ++ pshow' rest ++ "\""
28 | 
29 | 


--------------------------------------------------------------------------------
/test/lr/EOFGrammar.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
 4 |     , DeriveDataTypeable #-}
 5 | module EOFGrammar where
 6 | import Language.ANTLR4
 7 | 
 8 | data Plus = Plus String String | Minus String String
 9 |   deriving (Eq, Show)
10 | 
11 | [g4|
12 |   grammar EOF;
13 | 
14 |   words : word* ;
15 | 
16 |   word : WORD ;
17 | 
18 |   WORD : (~ [, ])* -> String;
19 |   
20 |   WS      : [ \t\n\r\f\v]+     -> String;
21 | |]
22 | 
23 | 


--------------------------------------------------------------------------------
/test/lr/GLRInc.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
 4 |     , DeriveDataTypeable #-}
 5 | module GLRInc where
 6 | import Language.ANTLR4
 7 | 
 8 | import System.IO.Unsafe (unsafePerformIO)
 9 | import Data.Monoid
10 | import Test.Framework
11 | import Test.Framework.Providers.HUnit
12 | import Test.Framework.Providers.QuickCheck2
13 | import Test.HUnit
14 | import Test.QuickCheck (Property, quickCheck, (==>))
15 | import qualified Test.QuickCheck.Monadic as TQM
16 | 
17 | import Language.Haskell.TH.Syntax (lift)
18 | import Language.ANTLR4.Syntax (stripQuotesReadEscape)
19 | 
20 | import GLRIncGrammar
21 | 
22 | $(g4_parsers gLRIncAST gLRIncGrammar)
23 | 
24 | test_GLRInc = case glrParse (== T_WS) "word - foo" of
25 |   (ResultAccept ast) -> ast2plus ast @?= Minus "word" "foo"
26 |   (ResultSet xs)     -> assertFailure $ "Ambiguous parse: " ++ pshow' xs                    
27 |   rest               -> assertFailure $ stripQuotesReadEscape $ "\"" ++ pshow' rest ++ "\""
28 | 
29 | 


--------------------------------------------------------------------------------
/test/lr/GLRIncGrammar.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
 4 |     , DeriveDataTypeable #-}
 5 | module GLRIncGrammar where
 6 | import Language.ANTLR4
 7 | 
 8 | data Plus = Plus String String | Minus String String
 9 |   deriving (Eq, Show)
10 | 
11 | [g4|
12 |   grammar GLRInc;
13 | 
14 |   plus  : LowerID '+' Prim     -> Plus
15 |         | Prim    '-' LowerID  -> Minus
16 |         ;
17 | 
18 |   LowerID : [a-z][a-zA-Z0-9_]* -> String;
19 |   Prim    : 'word'             -> String;
20 | 
21 |   WS      : [ \t\n\r\f\v]+     -> String;
22 | |]
23 | 
24 | 


--------------------------------------------------------------------------------
/test/lr/GLRPartial.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
 4 |     , DeriveDataTypeable #-}
 5 | module GLRPartial where
 6 | import Language.ANTLR4
 7 | 
 8 | import System.IO.Unsafe (unsafePerformIO)
 9 | import Data.Monoid
10 | import Test.Framework
11 | import Test.Framework.Providers.HUnit
12 | import Test.Framework.Providers.QuickCheck2
13 | import Test.HUnit
14 | import Test.QuickCheck (Property, quickCheck, (==>))
15 | import qualified Test.QuickCheck.Monadic as TQM
16 | 
17 | import Language.Haskell.TH.Syntax (lift)
18 | import qualified Text.ANTLR.LR as LR
19 | import Language.ANTLR4.Syntax (stripQuotesReadEscape)
20 | 
21 | import GLRPartialGrammar
22 | 
23 | $(g4_parsers gLRPartialAST gLRPartialGrammar)
24 | 
25 | test_GLRPartial = case glrParse (== T_WS) "word\nword\nword\nw0rd" of
26 |   (ResultAccept ast)         -> assertFailure $ "Was not suppose to parse: " ++ pshow' (ast2words ast)
27 |   (ResultSet xs)             -> assertFailure $ "Ambiguous parse: " ++ pshow' xs                    
28 |   (ErrorNoAction cfg asts _) -> return () -- Correct, should not have parsed
29 |   rest                       -> assertFailure $ stripQuotesReadEscape $ "\"" ++ pshow' rest ++ "\""
30 | 
31 | 


--------------------------------------------------------------------------------
/test/lr/GLRPartialGrammar.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
 4 |     , DeriveDataTypeable #-}
 5 | module GLRPartialGrammar where
 6 | import Language.ANTLR4
 7 | 
 8 | [g4|
 9 |   grammar GLRPartial;
10 | 
11 |   words : word+ ;
12 | 
13 |   word : Prim ;
14 | 
15 |   LowerID : [a-z][a-zA-Z0-9_]* -> String;
16 |   Prim    : 'word'             -> String;
17 | 
18 |   WS      : [ \t\n\r\f\v]+     -> String;
19 | |]
20 | 
21 | 


--------------------------------------------------------------------------------
/test/sexpression/Grammar.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, TemplateHaskell #-}
 4 | module Grammar where
 5 | import Language.ANTLR4
 6 | 
 7 | {-
 8 |   The MIT License
 9 |   Copyright (c) 2008 Robert Stehwien
10 |   Permission is hereby granted, free of charge, to any person obtaining a copy
11 |   of this software and associated documentation files (the "Software"), to deal
12 |   in the Software without restriction, including without limitation the rights
13 |   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 |   copies of the Software, and to permit persons to whom the Software is
15 |   furnished to do so, subject to the following conditions:
16 |   The above copyright notice and this permission notice shall be included in
17 |   all copies or substantial portions of the Software.
18 |   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 |   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 |   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 |   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 |   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 |   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 |   THE SOFTWARE.
25 |   
26 |   Port to Antlr4 by Tom Everett
27 |   Subsequent port to antlr-haskell by Karl Cronburg
28 | -}
29 | 
30 | data Atom
31 |   = Str String
32 |   | Symb String
33 |   | Number Double
34 |   deriving (Eq, Ord, Show)
35 | 
36 | data Item
37 |   = Atm   Atom
38 |   | List  [Item]
39 |   | Field Item Item
40 |   deriving (Eq, Ord, Show)
41 | 
42 | [g4|
43 |   grammar Sexpression;
44 | 
45 |   sexpr
46 |      : item*
47 |      ;
48 | 
49 |   item
50 |      : atom                   -> ${\a -> Atm a}
51 |      | list                   -> List
52 |      | '(' item '.' item ')'  -> Field
53 |      ;
54 | 
55 |   list
56 |      : '(' item* ')'
57 |      ;
58 | 
59 |   atom
60 |      : STRING -> Str
61 |      | SYMBOL -> Symb
62 |      | NUMBER -> Number
63 |      ;
64 | 
65 |   STRING : ["] ( ('\\' .) | ~ ["\\] )* ["] -> String;
66 | 
67 |   WHITESPACE : [ \n\t\r]+ -> String;
68 | 
69 |   NUMBER : [-+]? DIGIT+ ([\.] DIGIT+)? -> Double;
70 | 
71 |   SYMBOL : SYMBOL_START (SYMBOL_START | DIGIT)* -> String;
72 | 
73 |   fragment SYMBOL_START : [a-zA-Z+\-*/] ;
74 | 
75 |   fragment DIGIT : [0-9] ;
76 | 
77 | |]
78 | 
79 | isWS T_WHITESPACE = True
80 | isWS _ = False
81 | 
82 | 


--------------------------------------------------------------------------------
/test/sexpression/Parser.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, TemplateHaskell #-}
 4 | module Parser
 5 |   ( module Grammar
 6 |   , glrParse, ast2sexpr
 7 | --  , glrParseFast
 8 |   ) where
 9 | import Language.ANTLR4
10 | import Grammar
11 | 
12 | --import qualified GHC.Types as G
13 | import qualified Text.ANTLR.LR as LR
14 | 
15 | $(g4_parsers sexpressionAST sexpressionGrammar)
16 | 
17 | -- $(mkLRParser the_ast sexpressionGrammar)
18 | 
19 | 


--------------------------------------------------------------------------------
/test/sexpression/sexpression.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | import Language.ANTLR4
 3 | import Parser
 4 | import qualified Text.ANTLR.Set as S
 5 | 
 6 | getAST (ResultAccept ast) = ast
 7 | getAST _ = error "non-AST in ResultSet"
 8 | 
 9 | main =
10 |   case glrParse isWS "((m1lk ju1ce 3.1) . (h0ney marmalade \"jam\"))" of
11 |     (ResultAccept ast) -> print $ ast2sexpr ast
12 |     (ResultSet xs)     -> mapM_ (print . ast2sexpr . getAST) (S.toList xs)
13 | 
14 | 


--------------------------------------------------------------------------------
/test/shared-hunit/Text/ANTLR/HUnit.hs:
--------------------------------------------------------------------------------
 1 | module Text.ANTLR.HUnit where
 2 | import           Control.DeepSeq
 3 | import           Control.Exception as E
 4 | import           Control.Monad
 5 | import           Data.List
 6 | import           Data.Typeable
 7 | import           Data.CallStack
 8 | import Test.HUnit.Lang hiding (assertEqual, (@?=), (@=?))
 9 | 
10 | import Text.ANTLR.Pretty
11 | import qualified Data.Text as T
12 | 
13 | location :: HasCallStack => Maybe SrcLoc
14 | location = case reverse callStack of
15 |   (_, loc) : _ -> Just loc
16 |   [] -> Nothing
17 | 
18 | -- | Asserts that the specified actual value is equal to the expected value.
19 | -- The output message will contain the prefix, the expected value, and the
20 | -- actual value.
21 | --
22 | -- If the prefix is the empty string (i.e., @\"\"@), then the prefix is omitted
23 | -- and only the expected and actual values are output.
24 | assertEqual :: (HasCallStack, Eq a, Prettify a)
25 |                               => String -- ^ The message prefix
26 |                               -> a      -- ^ The expected value
27 |                               -> a      -- ^ The actual value
28 |                               -> Assertion
29 | assertEqual preface expected actual =
30 |   unless (actual == expected) $ do
31 |     (prefaceMsg `deepseq` expectedMsg `deepseq` actualMsg `deepseq` E.throwIO (HUnitFailure location $ ExpectedButGot prefaceMsg expectedMsg actualMsg))
32 |   where
33 |     prefaceMsg
34 |       | null preface = Nothing
35 |       | otherwise = Just preface
36 |     expectedMsg = '\n' : T.unpack (pshowIndent 4 expected)
37 |     actualMsg   = '\n' : T.unpack (pshowIndent 4 actual)
38 | 
39 | -- | Asserts that the specified actual value is equal to the expected value
40 | --   (with the actual value on the left-hand side).
41 | (@?=) :: (HasCallStack, Eq a, Prettify a)
42 |                         => a -- ^ The actual value
43 |                         -> a -- ^ The expected value
44 |                         -> Assertion
45 | actual @?= expected = assertEqual "" expected actual
46 | 
47 | (@=?) a b = (@?=) b a
48 | 
49 | 


--------------------------------------------------------------------------------
/test/shared/Example/Grammar.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE ExplicitForAll, DeriveAnyClass, DeriveGeneric, TypeFamilies
  2 |  , DeriveDataTypeable #-}
  3 | module Example.Grammar where
  4 | import Text.ANTLR.Set (fromList, member, (\\), empty, Generic(..), Hashable(..))
  5 | import Text.ANTLR.Grammar
  6 | import Text.ANTLR.Pretty
  7 | import Data.Data (toConstr, Data(..))
  8 | 
  9 | data NS0 = A  | B  | C  deriving (Eq, Ord, Generic, Hashable, Bounded, Enum, Show, Data)
 10 | data TS0 = A_ | B_ | C_ deriving (Eq, Ord, Generic, Hashable, Bounded, Enum, Show, Data)
 11 | a = A_
 12 | b = B_
 13 | c = C_
 14 | 
 15 | -- TODO: boilerplate identity type classes for bounded enums
 16 | instance Ref NS0 where
 17 |   type Sym NS0 = NS0
 18 |   getSymbol = id
 19 | instance Ref TS0 where
 20 |   type Sym TS0 = TS0
 21 |   getSymbol = id
 22 | instance Prettify NS0 where prettify = rshow . toConstr
 23 | instance Prettify TS0 where prettify = rshow . toConstr
 24 | dG :: Grammar () NS0 TS0 ()
 25 | dG = defaultGrammar C
 26 | 
 27 | production :: nts -> (ProdRHS s nts ts) -> Production s nts ts dt
 28 | production lhs rhs = Production lhs rhs Nothing
 29 | 
 30 | mattToolG :: Grammar () NS0 TS0 ()
 31 | mattToolG = dG
 32 |   { ns = fromList [A, B, C]
 33 |   , ts = fromList [a, b, c]
 34 |   , s0 = C
 35 |   , ps =
 36 |           [ production A $ Prod Pass [T a, T b]
 37 |           , production A $ Prod Pass [T a]
 38 |           , production B $ Prod Pass [NT A, T b]
 39 |           , production B $ Prod Pass [T b]
 40 |           , production C $ Prod Pass [NT A, NT B, NT C]
 41 |           ]
 42 |   }
 43 | 
 44 | dG' :: Grammar () String String ()
 45 | dG' = defaultGrammar "A"
 46 | 
 47 | dragonBook428 :: Grammar () String String ()
 48 | dragonBook428 = dG'
 49 |   { ns = fromList ["E", "E'", "T", "T'", "F"]
 50 |   , ts = fromList ["+", "*", "(", ")", "id"]
 51 |   , s0 = "E"
 52 |   , ps = [ production "E"  $ Prod Pass [NT "T", NT "E'"]
 53 |          , production "E'" $ Prod Pass [T "+", NT "T", NT "E'"]
 54 |          , production "E'" $ Prod Pass [Eps] -- Implicitly epsilon
 55 |          , production "T"  $ Prod Pass [NT "F", NT "T'"]
 56 |          , production "T'" $ Prod Pass [T "*", NT "F", NT "T'"]
 57 |          , production "T'" $ Prod Pass [Eps]
 58 |          , production "F"  $ Prod Pass [T "(", NT "E", T ")"]
 59 |          , production "F"  $ Prod Pass [T "id"]
 60 |          ]
 61 |   }
 62 | 
 63 | dragonBook41 :: Grammar () String String ()
 64 | dragonBook41 = dG'
 65 |   { ns = fromList ["E'", "E", "T", "F"]
 66 |   , ts = fromList ["+", "*", "(", ")", "id"]
 67 |   , s0 = "E"
 68 |   , ps =  [ production "E" $ Prod Pass [NT "E", T "+", NT "T"]
 69 |           , production "E" $ Prod Pass [NT "T"]
 70 |           , production "T" $ Prod Pass [NT "T", T "*", NT "F"]
 71 |           , production "T" $ Prod Pass [NT "F"]
 72 |           , production "F" $ Prod Pass [T "(", NT "E", T ")"]
 73 |           , production "F" $ Prod Pass [T "id"]
 74 |           ]
 75 |   }
 76 | 
 77 | dragonBook455 :: Grammar () String String ()
 78 | dragonBook455 = dG'
 79 |   { ns = fromList ["S", "C"]
 80 |   , ts = fromList ["c", "d"]
 81 |   , s0 = "S"
 82 |   , ps =  [ production "S" $ Prod Pass [NT "C", NT "C"]
 83 |           , production "C" $ Prod Pass [T "c", NT "C"]
 84 |           , production "C" $ Prod Pass [T "d"]
 85 |           ]
 86 |   }
 87 | 
 88 | dumbGrammar :: Grammar () String String ()
 89 | dumbGrammar = dG'
 90 |   { ns = fromList ["S", "A", "B", "I", "D"]
 91 |   , ts = fromList ["1","2","3","+","-","*"]
 92 |   , s0 = "S"
 93 |   , ps = [ production "S" $ Prod Pass [NT "A"]
 94 |          , production "S" $ Prod Pass [NT "B"]
 95 |          , production "S" $ Prod Pass [NT "D"]
 96 |          , production "A" $ Prod Pass [NT "I", T "+", NT "I"]
 97 |          , production "B" $ Prod Pass [NT "I", T "-", NT "I"]
 98 |          , production "I" $ Prod Pass [T "1"]
 99 |          , production "I" $ Prod Pass [T "2"]
100 |          , production "I" $ Prod Pass [T "3"]
101 |          , production "D" $ Prod Pass [NT "I", T "*", NT "I"]
102 |          ]
103 |   --, us = [(\_ -> True)]
104 |   }
105 | 
106 | 


--------------------------------------------------------------------------------
/test/simple/Grammar.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
 4 |     , DeriveDataTypeable #-}
 5 | module Grammar where
 6 | import Language.ANTLR4
 7 | 
 8 | import System.IO.Unsafe (unsafePerformIO)
 9 | import Data.Monoid
10 | import Test.Framework
11 | import Test.Framework.Providers.HUnit
12 | import Test.Framework.Providers.QuickCheck2
13 | import Test.HUnit
14 | import Test.QuickCheck (Property, quickCheck, (==>))
15 | import qualified Test.QuickCheck.Monadic as TQM
16 | 
17 | import Language.Haskell.TH.Syntax (lift)
18 | 
19 | data Attr = A | B
20 | 
21 | data Decl = Foo | Bar
22 | 
23 | [g4|
24 | 	grammar Simple;
25 | 
26 | 	attrDecl : attr* decl ;
27 | 
28 |   attrDecl2 : attr? decl ;
29 | 
30 |   attrDecl3 : attr+ decl ;
31 | 
32 | 	attr  : 'a' ';' -> A
33 |         | 'b' ';' -> B
34 |         ;
35 | 
36 |   decl  : 'foo' -> Foo
37 |         | 'bar' -> Bar
38 |         ;
39 |   
40 |   UNICODE : '\u0008' -> String ;
41 | 
42 | |]
43 | 
44 | 


--------------------------------------------------------------------------------
/test/simple/Main.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
 4 |     , DeriveDataTypeable #-}
 5 | module Main where
 6 | import Language.ANTLR4
 7 | 
 8 | import System.IO.Unsafe (unsafePerformIO)
 9 | import Data.Monoid
10 | import Test.Framework
11 | import Test.Framework.Providers.HUnit
12 | import Test.Framework.Providers.QuickCheck2
13 | import Test.HUnit
14 | import Test.QuickCheck (Property, quickCheck, (==>))
15 | import qualified Test.QuickCheck.Monadic as TQM
16 | 
17 | import Language.Haskell.TH.Syntax (lift)
18 | import qualified Text.ANTLR.LR as LR
19 | 
20 | import Grammar
21 | 
22 | --foo = [ $(lift $ LR.lr1Table simpleGrammar) ]
23 | 
24 | --test_star = foo @?= []
25 | test_star = () @?= ()
26 | 
27 | main :: IO ()
28 | main = defaultMainWithOpts
29 |   [ testCase "test_star" test_star
30 |   ] mempty
31 | 
32 | 


--------------------------------------------------------------------------------
/test/swift/Parser.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, TemplateHaskell #-}
 4 | module Parser
 5 |   ( module Grammar
 6 |   , glrParse, ast2sexpr
 7 |   ) where
 8 | import Language.ANTLR4
 9 | import Grammar
10 | 
11 | import qualified Text.ANTLR.LR as LR
12 | 
13 | $(g4_parsers swiftAST swiftGrammar)
14 | -- $(mkLRParser the_ast sexpressionGrammar)
15 | 
16 | 


--------------------------------------------------------------------------------
/test/swift/swift.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | import Language.ANTLR4
 3 | import Parser
 4 | import qualified Text.ANTLR.Set as S
 5 | 
 6 | getAST (ResultAccept ast) = ast
 7 | getAST _ = error "non-AST in ResultSet"
 8 | 
 9 | main =
10 |   case glrParse isWS "var i = 0;" of
11 |     (ResultAccept ast) -> print $ ast2topLevel ast
12 |     (ResultSet xs)     -> mapM_ (print . ast2topLevel . getAST) (S.toList xs)
13 | 
14 | 


--------------------------------------------------------------------------------
/test/template/Main.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import System.IO.Unsafe (unsafePerformIO)
 4 | import Data.Monoid
 5 | import Test.Framework
 6 | import Test.Framework.Providers.HUnit
 7 | import Test.Framework.Providers.QuickCheck2
 8 | import Test.HUnit
 9 | import Test.QuickCheck (Property, quickCheck, (==>))
10 | import qualified Test.QuickCheck.Monadic as TQM
11 | 
12 | main :: IO ()
13 | main = defaultMainWithOpts
14 |   [
15 |   ] mempty
16 | 
17 | 


--------------------------------------------------------------------------------
/test/unit/Main.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
 4 |     , DeriveDataTypeable #-}
 5 | module Main where
 6 | import Language.ANTLR4
 7 | 
 8 | import System.IO.Unsafe (unsafePerformIO)
 9 | import Data.Monoid
10 | import Test.Framework
11 | import Test.Framework.Providers.HUnit
12 | import Test.Framework.Providers.QuickCheck2
13 | import Test.HUnit
14 | import Test.QuickCheck (Property, quickCheck, (==>))
15 | import qualified Test.QuickCheck.Monadic as TQM
16 | 
17 | import Language.Haskell.TH.Syntax (lift)
18 | import qualified Text.ANTLR.LR as LR
19 | 
20 | import PlusBug0
21 | $(g4_parsers plusBug0Grammar plusBug0AST)
22 | 
23 | test_plusBug0 = case glrParse (== T_WS) "foo bar baz" of
24 |   (ResultAccept ast) -> ast2plus ast @?= Plus [ "foo", "bar", "baz" ]
25 |   _ -> assertFailure "Ambiguous parse"
26 | 
27 | main :: IO ()
28 | main = defaultMainWithOpts
29 |   [ testCase "test_plusBug0" test_plusBug0
30 |   ] mempty
31 | 
32 | 


--------------------------------------------------------------------------------
/test/unit/PlusBug0.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
 4 |     , DeriveDataTypeable #-}
 5 | module PlusBug0 where
 6 | import Language.ANTLR4
 7 | 
 8 | data Plus = Plus [String] | NotPlus [String]
 9 |   deriving (Eq, Show)
10 | 
11 | [g4|
12 |   grammar PlusBug0;
13 | 
14 |   plus : LowerID+ -> Plus ;
15 | 
16 |   LowerID : [a-z][a-zA-Z0-9_]* -> String;
17 |   WS      : [ \t\n\r\f\v]+     -> String;
18 | |]
19 | 
20 | 


--------------------------------------------------------------------------------
/test/unit0/DupTerms.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
 4 |     , DeriveDataTypeable #-}
 5 | module DupTerms where
 6 | import Language.ANTLR4
 7 | 
 8 | import System.IO.Unsafe (unsafePerformIO)
 9 | import Data.Monoid
10 | import Test.Framework
11 | import Test.Framework.Providers.HUnit
12 | import Test.Framework.Providers.QuickCheck2
13 | import Test.HUnit
14 | import Test.QuickCheck (Property, quickCheck, (==>))
15 | import qualified Test.QuickCheck.Monadic as TQM
16 | 
17 | import Language.Haskell.TH.Syntax (lift)
18 | import qualified Text.ANTLR.LR as LR
19 | 
20 | import DupTermsGrammar
21 | 
22 | $(g4_parsers dupTermsAST dupTermsGrammar)
23 | 
24 | test_dup_terms = case glrParse (== T_WS) "(" of
25 |   (ResultAccept ast) -> ast2justParen ast @?= 3
26 |   rest -> assertFailure $ "Did not parse: " ++ pshow' rest
27 | 
28 | 


--------------------------------------------------------------------------------
/test/unit0/DupTermsGrammar.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
 4 |     , DeriveDataTypeable #-}
 5 | module DupTermsGrammar where
 6 | import Language.ANTLR4
 7 | 
 8 | doesThisFire :: Int
 9 | doesThisFire = 3
10 | 
11 | orDoesThisFire :: String -> Int
12 | orDoesThisFire _ = 4
13 | 
14 | $( return [] )
15 | 
16 | [g4|
17 |   grammar DupTerms;
18 | 
19 |   justParen : '(' -> doesThisFire ;
20 | 
21 |   LParen : '(' -> orDoesThisFire ;
22 | 
23 |   WS      : [ \t\n\r\f\v]+     -> String;
24 | |]
25 | 
26 | 


--------------------------------------------------------------------------------
/test/unit0/Main.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
 4 |     , DeriveDataTypeable #-}
 5 | module Main where
 6 | import Language.ANTLR4
 7 | 
 8 | import System.IO.Unsafe (unsafePerformIO)
 9 | import Data.Monoid
10 | import Test.Framework
11 | import Test.Framework.Providers.HUnit
12 | import Test.Framework.Providers.QuickCheck2
13 | import Test.HUnit
14 | import Test.QuickCheck (Property, quickCheck, (==>))
15 | import qualified Test.QuickCheck.Monadic as TQM
16 | 
17 | import Language.Haskell.TH.Syntax (lift)
18 | import qualified Text.ANTLR.LR as LR
19 | 
20 | import Star0
21 | import Star1
22 | import DupTerms
23 | 
24 | main :: IO ()
25 | main = defaultMainWithOpts
26 |   [ testCase "test_star0" test_star0
27 |   , testCase "test_star1" test_star1
28 |   , testCase "duplicate_terminals" test_dup_terms
29 |   ] mempty
30 | 
31 | 


--------------------------------------------------------------------------------
/test/unit0/Star0.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
 4 |     , DeriveDataTypeable #-}
 5 | module Star0 where
 6 | import Language.ANTLR4
 7 | 
 8 | import System.IO.Unsafe (unsafePerformIO)
 9 | import Data.Monoid
10 | import Test.Framework
11 | import Test.Framework.Providers.HUnit
12 | import Test.Framework.Providers.QuickCheck2
13 | import Test.HUnit
14 | import Test.QuickCheck (Property, quickCheck, (==>))
15 | import qualified Test.QuickCheck.Monadic as TQM
16 | 
17 | import Language.Haskell.TH.Syntax (lift)
18 | import qualified Text.ANTLR.LR as LR
19 | 
20 | import Star0Grammar
21 | 
22 | $(g4_parsers star0AST star0Grammar)
23 | 
24 | test_star0 = case glrParse (== T_WS) "page page" of
25 |   (ResultAccept ast) -> ast2words ast @?= ["page", "page"]
26 |   rest -> assertFailure $ "Did not parse: " ++ pshow' rest
27 | 
28 | 


--------------------------------------------------------------------------------
/test/unit0/Star0Grammar.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
 4 |     , DeriveDataTypeable #-}
 5 | module Star0Grammar where
 6 | import Language.ANTLR4
 7 | 
 8 | [g4|
 9 |   grammar Star0;
10 | 
11 |   words   : page* -> ${\ps -> ps} ;
12 | 
13 |   page    : Page ;
14 | 
15 |   Page : 'page' -> String ;
16 | 
17 |   WS      : [ \t\n\r\f\v]+     -> String;
18 | |]
19 | 
20 | 


--------------------------------------------------------------------------------
/test/unit0/Star1.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
 4 |     , DeriveDataTypeable #-}
 5 | module Star1 where
 6 | import Language.ANTLR4
 7 | 
 8 | import System.IO.Unsafe (unsafePerformIO)
 9 | import Data.Monoid
10 | import Test.Framework
11 | import Test.Framework.Providers.HUnit
12 | import Test.Framework.Providers.QuickCheck2
13 | import Test.HUnit
14 | import Test.QuickCheck (Property, quickCheck, (==>))
15 | import qualified Test.QuickCheck.Monadic as TQM
16 | 
17 | import Language.Haskell.TH.Syntax (lift)
18 | import qualified Text.ANTLR.LR as LR
19 | import Debug.Trace as D
20 | 
21 | import Star1Grammar
22 | 
23 | $(g4_parsers star1AST star1Grammar)
24 | 
25 | test_star1 = D.trace (pshow' star1Grammar) $
26 |   case glrParse (== T_WS) "me page you { byte, byte }" of
27 |     (ResultAccept ast) -> ast2words ast @?= Frst (Yep ("me", "page")) [] [Byte, Byte]
28 |     rest -> assertFailure $ "Did not parse: " ++ pshow' rest
29 | 
30 | 


--------------------------------------------------------------------------------
/test/unit0/Star1Grammar.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
 2 |     , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
 3 |     , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
 4 |     , DeriveDataTypeable #-}
 5 | module Star1Grammar where
 6 | import Language.ANTLR4
 7 | 
 8 | data MyMaybe x = Nope | Yep x
 9 |   deriving (Eq, Ord, Show)
10 | 
11 | data Mem = Byte
12 |   deriving (Eq, Ord, Show)
13 | 
14 | data Words =
15 |     Frst (MyMaybe (String, String)) [String] [Mem]
16 |   | Snd
17 |   | Thrd (MyMaybe (String, String))
18 |   deriving (Eq, Ord, Show)
19 | 
20 | [g4|
21 |   grammar Star1;
22 | 
23 |   words   : me 'you' page* '{' bytes '}'  -> Frst
24 |           | me                            -> Thrd
25 |           | 'woops'                       -> Snd
26 |           ;
27 | 
28 |   bytes : 'byte'            -> ${ [Byte] }
29 |         | bytes ',' 'byte'  -> ${\bs -> Byte : bs}
30 |         ;
31 | 
32 |   me  : me2? -> ${\m -> case m of Nothing -> Nope ; Just x -> Yep x};
33 |   me2 : me3 ;
34 |   me3 : Me page ;
35 |   Me  : 'me' -> String ;
36 | 
37 |   page    : Page ;
38 |   Page : 'page' -> String ;
39 | 
40 |   WS      : [ \t\n\r\f\v]+     -> String;
41 | |]
42 | 
43 | 


--------------------------------------------------------------------------------