├── Setup.hs
├── .gitignore
├── test
    ├── Spec.hs
    ├── Print_Unittests.hs
    ├── SExpr_Unittests.hs
    ├── Parse_Unittests.hs
    └── SchemeR5RS_Unittests.hs
├── LICENSE
├── src
    └── Data
    │   └── SExpresso
    │       ├── Parse.hs
    │       ├── Print.hs
    │       ├── Parse
    │           ├── Char.hs
    │           ├── Location.hs
    │           └── Generic.hs
    │       ├── Print
    │           └── Lazy.hs
    │       ├── SExpr.hs
    │       └── Language
    │           └── SchemeR5RS.hs
├── package.yaml
├── ChangeLog.md
├── stack.yaml
└── README.md


/Setup.hs:
--------------------------------------------------------------------------------
1 | import Distribution.Simple
2 | main = defaultMain
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .stack-work/
2 | sexpresso.cabal
3 | *~
4 | .#*
5 | *.lock
6 | 


--------------------------------------------------------------------------------
/test/Spec.hs:
--------------------------------------------------------------------------------
 1 | 
 2 | module Main where
 3 | 
 4 | import Test.Tasty
 5 | import SExpr_Unittests
 6 | import Parse_Unittests
 7 | import Print_Unittests
 8 | import SchemeR5RS_Unittests
 9 | 
10 | main :: IO ()
11 | main = defaultMain tests
12 | 
13 | tests :: TestTree
14 | tests = testGroup "tests" [sexpTestTree, parseTestTree, printTestTree, r5rsTestTree]
15 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Zero-Clause BSD
 2 | 
 3 | Permission to use, copy, modify, and/or distribute this software for
 4 | any purpose with or without fee is hereby granted.
 5 | 
 6 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
 7 | WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
 8 | WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
 9 | AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
10 | DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
11 | PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
12 | TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
13 | PERFORMANCE OF THIS SOFTWARE.
14 | 


--------------------------------------------------------------------------------
/src/Data/SExpresso/Parse.hs:
--------------------------------------------------------------------------------
 1 | -- |
 2 | -- Module      :  Data.SExpresso.Parse
 3 | -- Copyright   :  © 2019 Vincent Archambault
 4 | -- License     :  0BSD
 5 | --
 6 | -- Maintainer  :  Vincent Archambault <archambault.v@gmail.com>
 7 | -- Stability   :  experimental
 8 | --
 9 | -- This module re-exports everything from
10 | -- "Data.SExpresso.Parse.Generic", "Data.SExpresso.Parse.Char" and
11 | -- "Data.SExpresso.Parse.Location".
12 | 
13 | module Data.SExpresso.Parse
14 |   (
15 |     module Data.SExpresso.Parse.Generic,
16 |     module Data.SExpresso.Parse.Char,
17 |     module Data.SExpresso.Parse.Location
18 |     )
19 |   where
20 | 
21 | import Data.SExpresso.Parse.Generic
22 | import Data.SExpresso.Parse.Location
23 | import Data.SExpresso.Parse.Char
24 | 


--------------------------------------------------------------------------------
/src/Data/SExpresso/Print.hs:
--------------------------------------------------------------------------------
 1 | -- |
 2 | -- Module      :  Data.SExpresso.Print
 3 | -- Copyright   :  © 2019 Vincent Archambault
 4 | -- License     :  0BSD
 5 | --
 6 | -- Maintainer  :  Vincent Archambault <archambault.v@gmail.com>
 7 | -- Stability   :  experimental
 8 | --
 9 | -- Printing 'SExpr' as 'Data.Text'. To print as lazy text
10 | -- ("Data.Text.Lazy") see "Data.Sexpresso.Print.Lazy"
11 | 
12 | module Data.SExpresso.Print (
13 |   PL.SExprPrinter(..),
14 |   PL.mkPrinter,
15 |   flatPrint
16 |   ) where
17 | 
18 | import qualified Data.Text as T
19 | import qualified Data.Text.Lazy as L
20 | import Data.SExpresso.SExpr
21 | import qualified Data.SExpresso.Print.Lazy as PL
22 | 
23 | -- | Prints an 'SExpr' on a single line
24 | flatPrint :: PL.SExprPrinter b a -> SExpr b a -> T.Text
25 | flatPrint p s = L.toStrict $ PL.flatPrint p s
26 | 


--------------------------------------------------------------------------------
/src/Data/SExpresso/Parse/Char.hs:
--------------------------------------------------------------------------------
 1 | -- |
 2 | -- Module      :  Data.SExpresso.Parse.Char
 3 | -- Copyright   :  © 2019 Vincent Archambault
 4 | -- License     :  0BSD
 5 | --
 6 | -- Maintainer  :  Vincent Archambault <archambault.v@gmail.com>
 7 | -- Stability   :  experimental
 8 | --
 9 | -- The module "Data.SExpresso.Parse" re-exports the functions of this
10 | -- module.
11 | 
12 | {-# LANGUAGE OverloadedStrings #-}
13 | {-# LANGUAGE TypeFamilies #-}
14 | 
15 | module Data.SExpresso.Parse.Char
16 |   (
17 |     plainSExprParser
18 |    )
19 |   where
20 | 
21 | import Text.Megaparsec
22 | import Text.Megaparsec.Char
23 | import Data.SExpresso.Parse.Generic
24 | 
25 | 
26 | -- | The function 'plainSExprParser' accepts a parser for atoms and
27 | -- returns a 'SExprParser' for a stream of 'Char' with the following
28 | -- properties :
29 | --
30 | --  * The opening tag is (.
31 | --  * The closing tag is ).
32 | --  * The space parser is 'space1'.
33 | --  * Space is always mandatory between atoms.
34 | plainSExprParser :: (MonadParsec e s m, Token s ~ Char) =>
35 |                     m a -> SExprParser m () a
36 | plainSExprParser p = SExprParser
37 |                      (char '(' >> return ())
38 |                      (\_ -> char ')' >> return ())
39 |                      p
40 |                      space1
41 |                      spaceIsMandatory
42 | 


--------------------------------------------------------------------------------
/package.yaml:
--------------------------------------------------------------------------------
 1 | name:                sexpresso
 2 | version:             1.2.5.0
 3 | github:              "archambaultv/sexpresso"
 4 | license:             OtherLicense # 0BSD
 5 | author:              "Vincent Archambault-Bouffard"
 6 | maintainer:          "archambault.v@gmail.com"
 7 | copyright:           "Vincent Archambault-Bouffard"
 8 | 
 9 | extra-source-files:
10 | - README.md
11 | - ChangeLog.md
12 | 
13 | # Metadata used when publishing your package
14 | synopsis:            A flexible library for parsing and printing S-expression
15 | category:            Data
16 | 
17 | # To avoid duplicated efforts in documentation and dealing with the
18 | # complications of embedding Haddock markup inside cabal files, it is
19 | # common to point users to the README.md file.
20 | description:         Please see the README on GitHub at <https://github.com/archambaultv/sexpresso#readme>
21 | 
22 | dependencies:
23 | - base >= 4.7 && < 5
24 | - text >= 0.2 && < 2.2
25 | - megaparsec >= 7.0 && < 9.7
26 | - containers >= 0.5 && < 0.8
27 | - bifunctors >= 5.5 && < 5.7
28 | - recursion-schemes >= 5.1 && < 5.3
29 | 
30 | library:
31 |   source-dirs: src
32 | 
33 | tests:
34 |   sexpresso-test:
35 |     main:                Spec.hs
36 |     source-dirs:         test
37 |     ghc-options:
38 |     - -threaded
39 |     - -rtsopts
40 |     - -with-rtsopts=-N
41 |     dependencies:
42 |     - sexpresso
43 |     - tasty >= 0.8
44 |     - tasty-hunit >= 0.10.0.1
45 |     - tasty-smallcheck >= 0.8
46 |     - smallcheck >= 1.0
47 | 
48 | ghc-options:
49 | - -Wall
50 | 


--------------------------------------------------------------------------------
/ChangeLog.md:
--------------------------------------------------------------------------------
 1 | # Changelog for S-expresso
 2 | 
 3 | Version 1.2.5.0
 4 | ---------------
 5 | * Relax upperbounds.
 6 | 
 7 | Version 1.2.4.0
 8 | ---------------
 9 | * Build with `bifunctors-5.6`
10 | 
11 | Version 1.2.3.0
12 | ---------------
13 | * Build with `text-2.0`
14 | 
15 | Version 1.2.2.0
16 | ---------------
17 | * Build with `megaparsec-9.3.0`
18 | 
19 | Version 1.2.1.0
20 | ---------------
21 | * Add `Data` instances. [#12](https://github.com/archambaultv/sexpresso/pull/12).
22 | 
23 | Version 1.2.0.0
24 | ---------------
25 | * Update to stack lts 18.10
26 | * Thanks to Ollie Charles from asking for this update
27 | 
28 | Version 1.1.0.0
29 | ---------------
30 | 
31 | * Add startPosPretty and endPosPretty function
32 | * Add Bifunctor, Bifoldable and Bitraversable instances for SExpr
33 | * Add Base SExpr, Recursive, Corecursive instances (see package recursion-schemes)
34 | * Add Functor instance for Located
35 | * Fix SExprPrinter constructor name (SExprParser -> SExprPrinter)
36 | * Improve documentation
37 | * Merge [pull request \#6](https://github.com/archambaultv/sexpresso/pull/6) to prepare for MonadFail
38 | * Fix bug with R5RS negative number (issue \#7 on [github](https://github.com/archambaultv/sexpresso/issues/7))
39 | * Tested with stack version 14.27 and 15.3
40 | 
41 | Version 1.0.0.2
42 | ---------------
43 | 
44 | * Initial Hackage Release
45 | * SExpr datatype for representing S-expression
46 | * Generic SExpr parser
47 | * Specialized SExpr parser for character
48 | * SExpr flat printer
49 | * Scheme R5RS parser implementation
50 | 


--------------------------------------------------------------------------------
/src/Data/SExpresso/Parse/Location.hs:
--------------------------------------------------------------------------------
 1 | -- |
 2 | -- Module      :  Data.SExpresso.Parse.Location
 3 | -- Copyright   :  © 2019 Vincent Archambault
 4 | -- License     :  0BSD
 5 | --
 6 | -- Maintainer  :  Vincent Archambault <archambault.v@gmail.com>
 7 | -- Stability   :  experimental
 8 | --
 9 | -- The module "Data.SExpresso.Parse" re-exports the functions and
10 | -- datatypes of this module.
11 | 
12 | {-# LANGUAGE DeriveFunctor #-}
13 | 
14 | module Data.SExpresso.Parse.Location
15 |   (
16 |     Location(..),
17 |     Located(..),
18 |     located,
19 |     startPosPretty,
20 |     endPosPretty
21 |    )
22 |   where
23 | 
24 | import Text.Megaparsec
25 | 
26 | -- Taken from https://www.reddit.com/r/haskell/comments/4x22f9/labelling_ast_nodes_with_locations/d6cmdy9/
27 | 
28 | -- | The 'Location' datatype represents a source span 
29 | data Location = Span SourcePos SourcePos
30 |               deriving (Eq, Ord, Show)
31 | 
32 | -- | Pretty prints @S1@ of a @'Span' S1 _@ object with 'sourcePosPretty'
33 | startPosPretty :: Location -> String
34 | startPosPretty (Span s _) = sourcePosPretty s
35 | 
36 | -- | Pretty prints @S2@ of a @'Span' _ S2@ object with 'sourcePosPretty'
37 | endPosPretty :: Location -> String
38 | endPosPretty (Span _ s) = sourcePosPretty s
39 | 
40 | -- | The 'Located' datatype adds a source span to the type @a@
41 | data Located a = At Location a
42 |                deriving (Eq, Ord, Show, Functor)
43 | 
44 | -- | The 'located' function adds a source span to a parser.
45 | located :: (MonadParsec e s m, TraversableStream s) => m a -> m (Located a)
46 | located parser = do
47 |   begin <- getSourcePos
48 |   result <- parser
49 |   end <- getSourcePos
50 |   return $ At (Span begin end) result
51 | 


--------------------------------------------------------------------------------
/src/Data/SExpresso/Print/Lazy.hs:
--------------------------------------------------------------------------------
 1 | -- |
 2 | -- Module      :  Data.SExpresso.Print.Lazy
 3 | -- Copyright   :  © 2019 Vincent Archambault
 4 | -- License     :  0BSD
 5 | --
 6 | -- Maintainer  :  Vincent Archambault <archambault.v@gmail.com>
 7 | -- Stability   :  experimental
 8 | --
 9 | -- Printing 'SExpr' as 'Data.Text.Lazy'. To print as strict text
10 | -- ("Data.Text") see "Data.Sexpresso.Print"
11 | 
12 | {-# LANGUAGE OverloadedStrings #-}
13 | 
14 | module Data.SExpresso.Print.Lazy (
15 |   SExprPrinter(..),
16 |   mkPrinter,
17 |   flatPrint,
18 |   flatPrintBuilder
19 |   ) where
20 | 
21 | import qualified Data.Text as T
22 | import qualified Data.Text.Lazy as L
23 | import qualified Data.Text.Lazy.Builder as B
24 | import Data.SExpresso.SExpr
25 | 
26 | -- | The 'SExprPrinter' defines how to print an 'SExpr'.
27 | data SExprPrinter b a = SExprPrinter {
28 |   -- | The opening and closing tags based on the content of the 'SList'
29 |   printTags :: b -> [SExpr b a] -> (T.Text, T.Text),
30 |   -- | How to print an atom
31 |   printAtom :: a -> T.Text
32 |   }
33 | 
34 | -- | An 'SExprPrinter' with the opening tag defined as '(' and the
35 | -- closing tag defined as ')'
36 | mkPrinter :: (a -> T.Text) -> SExprPrinter b a
37 | mkPrinter p = SExprPrinter (\_ _ -> ("(", ")")) p
38 | 
39 | -- | Prints an 'SExpr' on a single line. Returns a 'B.Builder' instead of a lazy text 'L.Text'
40 | flatPrintBuilder :: SExprPrinter b a -> SExpr b a -> B.Builder
41 | flatPrintBuilder p (SAtom a) = B.fromText $ printAtom p a
42 | flatPrintBuilder p (SList b xs) =
43 |   let (sTag, eTag) = printTags p b xs
44 |   in B.fromText sTag <> flatPrintList xs <> B.fromText eTag
45 | 
46 |   where flatPrintList [] = B.fromText ""
47 |         flatPrintList [x] = flatPrintBuilder p x
48 |         flatPrintList (y : ys) = flatPrintBuilder p y <> B.fromText " " <> flatPrintList ys
49 | 
50 | -- | Prints an 'SExpr' on a single line
51 | flatPrint :: SExprPrinter b a -> SExpr b a -> L.Text
52 | flatPrint p s = B.toLazyText $ flatPrintBuilder p s
53 | 


--------------------------------------------------------------------------------
/test/Print_Unittests.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE OverloadedStrings #-}
 2 | {-# LANGUAGE FlexibleInstances, MultiParamTypeClasses #-}
 3 | 
 4 | module Print_Unittests (
 5 |   printTestTree
 6 |   )where
 7 | 
 8 | import Data.Void
 9 | import Test.Tasty
10 | import Test.Tasty.HUnit
11 | import Test.Tasty.SmallCheck as SC
12 | import Test.SmallCheck.Series
13 | import Text.Megaparsec
14 | import Text.Megaparsec.Char
15 | import qualified Data.Text as T
16 | import Data.SExpresso.SExpr
17 | import Data.SExpresso.Print
18 | import Data.SExpresso.Parse
19 | 
20 | type Parser = Parsec Void T.Text
21 | 
22 | instance (Serial m b, Serial m a) => Serial m (SExpr b a) where
23 |   series = cons1 SAtom \/ cons2 SList
24 | 
25 | printer :: SExprPrinter () Integer
26 | printer = mkPrinter (T.pack . show)
27 | 
28 | pDigit :: Parser Integer
29 | pDigit = do
30 |   sign <- optional (char '-')
31 |   n <- fmap read (some digitChar)
32 |   case sign of
33 |     Nothing -> return n
34 |     Just _ -> return (-1 * n)
35 | 
36 | sexpParser :: SExprParser Parser () Integer
37 | sexpParser = plainSExprParser pDigit
38 | 
39 | printTestTree :: TestTree
40 | printTestTree = testGroup "Print.hs unit tests" $
41 |   [testGroup "flatPrint" [
42 |       testCase "Empty SList" $ flatPrint printer (SList () [] :: Sexp Integer) @?= "()",
43 |       testCase "Singleton SList" $ flatPrint printer (SList () [SAtom 1] :: Sexp Integer) @?= "(1)",
44 |       testCase "SList 1/3" $ flatPrint printer (SList () [SAtom 1, SAtom 2, SAtom 3] :: Sexp Integer) @?= "(1 2 3)",
45 |       testCase "SList 2/3" $ flatPrint printer (SList () [SAtom 1, SList () [SAtom 2], SAtom 3] :: Sexp Integer) @?= "(1 (2) 3)",
46 |       testCase "SList 3/3" $ flatPrint printer (SList () [SList () [SAtom 1], SAtom 2, SList () [SAtom 3]] :: Sexp Integer) @?= "((1) 2 (3))",
47 |       testCase "SAtom" $ flatPrint printer (SAtom 3 :: Sexp Integer) @?= "3",
48 |       SC.testProperty "decodeOne inverse of flatPrint" $
49 |       \s -> parse (decodeOne sexpParser) "" (flatPrint printer (s :: Sexp Integer)) == Right s
50 |       ]
51 |   ]
52 | 


--------------------------------------------------------------------------------
/stack.yaml:
--------------------------------------------------------------------------------
 1 | # This file was automatically generated by 'stack init'
 2 | #
 3 | # Some commonly used options have been documented as comments in this file.
 4 | # For advanced use and comprehensive documentation of the format, please see:
 5 | # https://docs.haskellstack.org/en/stable/yaml_configuration/
 6 | 
 7 | # Resolver to choose a 'specific' stackage snapshot or a compiler version.
 8 | # A snapshot resolver dictates the compiler version and the set of packages
 9 | # to be used for project dependencies. For example:
10 | #
11 | # resolver: lts-3.5
12 | # resolver: nightly-2015-09-21
13 | # resolver: ghc-7.10.2
14 | #
15 | # The location of a snapshot can be provided as a file or url. Stack assumes
16 | # a snapshot provided as a file might change, whereas a url resource does not.
17 | #
18 | # resolver: ./custom-snapshot.yaml
19 | # resolver: https://example.com/snapshots/2018-01-01.yaml
20 | resolver: lts-18.10
21 | 
22 | # User packages to be built.
23 | # Various formats can be used as shown in the example below.
24 | #
25 | # packages:
26 | # - some-directory
27 | # - https://example.com/foo/bar/baz-0.0.2.tar.gz
28 | #   subdirs:
29 | #   - auto-update
30 | #   - wai
31 | packages:
32 | - .
33 | # Dependency packages to be pulled from upstream that are not in the resolver.
34 | # These entries can reference officially published versions as well as
35 | # forks / in-progress versions pinned to a git hash. For example:
36 | #
37 | # extra-deps:
38 | # - acme-missiles-0.3
39 | # - git: https://github.com/commercialhaskell/stack.git
40 | #   commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a
41 | #
42 | # extra-deps: []
43 | 
44 | # Override default flag values for local packages and extra-deps
45 | # flags: {}
46 | 
47 | # Extra package databases containing global packages
48 | # extra-package-dbs: []
49 | 
50 | # Control whether we use the GHC we find on the path
51 | # system-ghc: true
52 | #
53 | # Require a specific version of stack, using version ranges
54 | # require-stack-version: -any # Default
55 | # require-stack-version: ">=2.1"
56 | #
57 | # Override the architecture used by stack, especially useful on Windows
58 | # arch: i386
59 | # arch: x86_64
60 | #
61 | # Extra directories used by stack for building
62 | # extra-include-dirs: [/path/to/dir]
63 | # extra-lib-dirs: [/path/to/dir]
64 | #
65 | # Allow a newer minor version of GHC than the snapshot specifies
66 | # compiler-check: newer-minor
67 | 


--------------------------------------------------------------------------------
/test/SExpr_Unittests.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE FlexibleInstances #-}
 2 | {-# LANGUAGE MultiParamTypeClasses #-}
 3 | 
 4 | module SExpr_Unittests (
 5 |   sexpTestTree
 6 |   )where
 7 | 
 8 | import Test.Tasty
 9 | import Test.Tasty.HUnit
10 | import Data.SExpresso.SExpr
11 | 
12 | sexpTestTree :: TestTree
13 | sexpTestTree = testGroup "Sexpr.hs unit tests"
14 | 
15 |   [testGroup "isList" [
16 |       testCase "Empty SList" $ isList (SList () [] :: Sexp Int) @?= True,
17 |       testCase "SList" $ isList (SList () [SAtom 1, SAtom 2] :: Sexp Int) @?= True,
18 |       testCase "SAtom" $ isList (SAtom 1 :: Sexp Int) @?= False],
19 | 
20 |    testGroup "sList" [
21 |       testCase "Empty SList" $ sList (SList () [] :: Sexp Int) @?= Just [],
22 |       testCase "SList" $ sList (SList () [SAtom 1, SAtom 2] :: Sexp Int) @?= Just [SAtom 1, SAtom 2],
23 |       testCase "SAtom" $ sList (SAtom 1 :: Sexp Int) @?= Nothing],
24 |     
25 |    testGroup "isAtom" [
26 |       testCase "Empty SList" $ isAtom (SList () [] :: Sexp Int) @?= False,
27 |       testCase "SList" $ isAtom (SList () [SAtom 1, SAtom 2] :: Sexp Int) @?= False,
28 |       testCase "SAtom" $ isAtom (SAtom 1 :: Sexp Int) @?= True],
29 | 
30 |    testGroup "sAtom" [
31 |       testCase "Empty SList" $ sAtom (SList () [] :: Sexp Int) @?= Nothing,
32 |       testCase "SList" $ sAtom (SList () [SAtom 1, SAtom 2] :: Sexp Int) @?= Nothing,
33 |       testCase "SAtom" $ sAtom (SAtom 1 :: Sexp Int) @?= Just 1],
34 | 
35 |    testGroup "Pattern synonyms" [
36 |       testCase "L - empty list (1/2)" $
37 |         (case (SList () []) of
38 |            L [] -> True
39 |            _ -> False) @?= True,
40 |       testCase "L - empty list (2/2)" $
41 |         (case (SList () [SAtom 1 :: Sexp Int]) of
42 |            L [] -> True
43 |            _ -> False) @?= False,
44 |       testCase "L - singleton list (1/2)" $
45 |         (case (SList () [SAtom 1 :: Sexp Int]) of
46 |            L [_] -> True
47 |            _ -> False) @?= True,
48 |       testCase "L - singleton list (2/2)" $
49 |         (case (SList () [SAtom 1 :: Sexp Int]) of
50 |            L [] -> True
51 |            _ -> False) @?= False,
52 |       testCase "L - atom" $
53 |         (case (SAtom 1 :: Sexp Int) of
54 |            L _ -> True
55 |            _ -> False) @?= False,
56 |       testCase "A - atom (1/2)" $
57 |         (case (SAtom 1 :: Sexp Int) of
58 |            A 1 -> True
59 |            _ -> False) @?= True,
60 |       testCase "A - atom (2/2)" $
61 |         (case (A 1 :: Sexp Int) of
62 |            SAtom 1 -> True
63 |            _ -> False) @?= True,
64 |       testCase "A - singleton list" $
65 |         (case (SList () [SAtom 1 :: Sexp Int]) of
66 |            A 1 -> True
67 |            _ -> False) @?= False,
68 |       testCase "Sexp - empty List" $
69 |         (Sexp [] :: Sexp Int) @?= SList () [],
70 |       testCase "Sexp - non empty List" $
71 |         Sexp [A 1 :: Sexp Int, A 2] @?= SList () [SAtom 1, SAtom 2],
72 |       testCase "Sexp and L" $
73 |         (case (SList () [SAtom 1 :: Sexp Int, SList () []]) of
74 |            Sexp [A 1, L []] -> True
75 |            _ -> False) @?= True,
76 |       testCase "::: (1/2)" $
77 |         (case (SList () [SAtom 1 :: Sexp Int, SAtom 2]) of
78 |            (A 1 ::: A 2 ::: L []) -> True
79 |            _ -> False) @?= True,
80 |       testCase "::: (2/2)" $
81 |         (case (SList () [SAtom 1 :: Sexp Int, SAtom 2, SAtom 3]) of
82 |            (A 1 ::: L xs) -> xs == [SAtom 2, SAtom 3]
83 |            _ -> False) @?= True,
84 |       testCase "Nil (1/2)" $
85 |         (case (SList () [SAtom 1 :: Sexp Int, SAtom 2]) of
86 |            (A 1 ::: A 2 ::: Nil) -> True
87 |            _ -> False) @?= True,
88 |       testCase "Nil (2/2)" $
89 |         (case (SList () [SAtom 1 :: Sexp Int, SAtom 2, SAtom 3]) of
90 |            (A 1 ::: A 2 ::: Nil) -> True
91 |            _ -> False) @?= False
92 |       ],
93 | 
94 |     testGroup "Functor" [
95 |       testCase "Empty SList" $ fmap (\x -> x + 1) (SList () [] :: Sexp Int) @?= (SList () []),
96 |       testCase "Singleton SList" $ fmap (\x -> x + 1) (SList () [SAtom 5] :: Sexp Int) @?= (SList () [SAtom 6])]
97 |   ]
98 | 


--------------------------------------------------------------------------------
/src/Data/SExpresso/SExpr.hs:
--------------------------------------------------------------------------------
  1 | -- |
  2 | -- Module      :  Data.SExpresso.SExpr
  3 | -- Copyright   :  © 2019 Vincent Archambault
  4 | -- License     :  0BSD
  5 | --
  6 | -- Maintainer  :  Vincent Archambault <archambault.v@gmail.com>
  7 | -- Stability   :  experimental
  8 | --
  9 | -- Definition of S-expression
 10 | 
 11 | {-# LANGUAGE PatternSynonyms #-}
 12 | {-# LANGUAGE ViewPatterns #-}
 13 | {-# LANGUAGE TemplateHaskell #-}
 14 | {-# LANGUAGE KindSignatures #-}
 15 | {-# LANGUAGE TypeFamilies #-}
 16 | {-# LANGUAGE DeriveFunctor, DeriveFoldable, DeriveTraversable, DeriveDataTypeable #-}
 17 | 
 18 | 
 19 | module Data.SExpresso.SExpr
 20 |   (
 21 |     SExpr(..),
 22 |     Sexp,
 23 |     pattern A,
 24 |     pattern L,
 25 |     pattern Sexp,
 26 |     pattern (:::),
 27 |     pattern Nil,
 28 |     isAtom,
 29 |     sAtom,
 30 |     isList,
 31 |     sList
 32 |   )
 33 |   where
 34 | 
 35 | import Data.Bifunctor.TH
 36 | import Data.Data
 37 | import Data.Functor.Foldable.TH
 38 | 
 39 | -- | The datatype 'SExpr' is the definition of an S-expression for the
 40 | -- library S-expresso.
 41 | --
 42 | -- The parameter @a@ allows you to specify the datatype of atoms and
 43 | -- the parameter @b@ is usefull for keeping metadata about S-expression
 44 | -- like source position for example.
 45 | data SExpr b a = SList b [SExpr b a]
 46 |                | SAtom a
 47 |                deriving (Eq, Show, Functor, Traversable, Foldable, Data)
 48 | 
 49 | $(deriveBifunctor ''SExpr)
 50 | $(deriveBifoldable ''SExpr)
 51 | $(deriveBitraversable ''SExpr)
 52 | $(makeBaseFunctor ''SExpr)
 53 | 
 54 | -- | The type synonym 'Sexp' is a variant of the more general 'SExpr'
 55 | -- datatype with no data for the 'SList' constructor.
 56 | type Sexp a = SExpr () a
 57 | 
 58 | -- | Bidirectional pattern synonym for the type synonym 'Sexp'. See
 59 | -- also the 'L' pattern synonym.
 60 | --
 61 | -- >foo (Sexp x) = x -- Equivalent to foo (SList () x) = x
 62 | -- >s = Sexp []      -- Equivalent to s = SList () []
 63 | pattern Sexp :: [Sexp a] -> Sexp a
 64 | pattern Sexp xs = SList () xs
 65 | 
 66 | -- | Pattern for matching only the sublist of the 'SList' constructor.
 67 | -- See also the Sexp pattern synonym.
 68 | --
 69 | -- >foo (L xs) = xs -- Equivalent to foo (SList _ xs) = xs
 70 | pattern L :: [SExpr b a] -> SExpr b a
 71 | pattern L xs <- SList _ xs
 72 | 
 73 | -- | Shorthand for 'SAtom'.
 74 | --
 75 | -- >foo (A x) = x -- Equivalent to foo (SAtom x) = x
 76 | -- > a = A 3      -- Equivalent to a = SAtom 3
 77 | pattern A :: a -> SExpr b a
 78 | pattern A x = SAtom x
 79 | 
 80 | uncons :: SExpr b a -> Maybe (SExpr b a, SExpr b a)
 81 | uncons (SAtom _) = Nothing
 82 | uncons (SList _ []) = Nothing
 83 | uncons (SList b (x:xs)) = Just (x, SList b xs)
 84 | 
 85 | -- | Pattern specifying the shape of the sublist of the 'SList' constructor.
 86 | -- See also 'Nil'.
 87 | --
 88 | -- Although it aims to mimic the behavior of the cons (:) constructor
 89 | -- for list, this pattern behavior is a little bit different. Indeed
 90 | -- its signature is @SExpr b a -> SExpr b a -> SExpr b a@ while the
 91 | -- cons (:) constructor signature is @a -> [a] -> [a]@. The first
 92 | -- argument type is different in the case of the cons constructor but all
 93 | -- the types are identical for the pattern `:::`.
 94 | --
 95 | -- This implies that the following code
 96 | --
 97 | -- >foo (x ::: xs) = ...
 98 | -- is equivalent to
 99 | --
100 | -- >foo (SList b (x : rest)) = let xs = SList b rest
101 | -- >                           in ...
102 | -- If you wish for the @xs@ above to match the remaining of the list,
103 | -- you need to use the 'L' pattern
104 | --
105 | -- >foo (A x ::: L xs)
106 | -- which is equivalent to
107 | -- 
108 | -- >foo (SList b (x : rest)) = let (SList _ xs) = SList b rest
109 | -- >                           in ...
110 | --
111 | -- Other examples :
112 | --
113 | -- >foo (A x1 ::: A x2 ::: Nil)   -- Equivalent to foo (SList _ [SAtom x1, SAtom x2])
114 | -- >foo (L ys ::: A x ::: L xs)   -- Equivalent to foo (SList _ (SList _ ys : SAtom x : xs))
115 | infixr 5 :::
116 | pattern (:::) :: SExpr b a -> SExpr b a -> SExpr b a
117 | pattern x ::: xs <- (uncons -> Just (x, xs))
118 | 
119 | -- | Pattern to mark the end of the list when using the pattern synonym ':::'
120 | pattern Nil :: SExpr b a
121 | pattern Nil <- SList _ []
122 | 
123 | -- | The 'isAtom' function returns 'True' iff its argument is of the
124 | -- form @SAtom _@.
125 | isAtom :: SExpr b a -> Bool
126 | isAtom (A _) = True
127 | isAtom _ = False
128 | 
129 | -- | The 'sAtom' function returns 'Nothing' if its argument is of the
130 | -- form @SList _ _@ and @'Just' a@ if its argument is of the form @SAtom _@..
131 | sAtom :: SExpr b a -> Maybe a
132 | sAtom (A x) = Just x
133 | sAtom _ = Nothing
134 | 
135 | -- | The 'isList' function returns 'True' iff its argument is of the
136 | -- form @SList _ _@.
137 | isList :: SExpr b a -> Bool
138 | isList (L _) = True
139 | isList _ = False
140 | 
141 | -- | The 'sList' function returns 'Nothing' if its argument is of the
142 | -- form @SAtom _@ and the sublist @xs@ if its argument is of the form
143 | -- @SList _ xs@.
144 | sList :: SExpr b a -> Maybe [SExpr b a]
145 | sList (L l) = Just l
146 | sList _ = Nothing
147 | 


--------------------------------------------------------------------------------
/test/Parse_Unittests.hs:
--------------------------------------------------------------------------------
  1 | module Parse_Unittests (
  2 |   parseTestTree
  3 |   )where
  4 | 
  5 | import Data.Void
  6 | import Data.Either
  7 | import Data.Bifunctor (first)
  8 | import Test.Tasty
  9 | import Test.Tasty.HUnit
 10 | import Text.Megaparsec as M
 11 | import Text.Megaparsec.Char
 12 | import Data.SExpresso.SExpr
 13 | import Data.SExpresso.Parse
 14 | 
 15 | type Parser = Parsec Void String
 16 | 
 17 | asciiLetter :: Parser Char
 18 | asciiLetter = oneOf (['a' .. 'z'] ++ ['A' .. 'Z'])
 19 | 
 20 | pIdent :: Parser String
 21 | pIdent = some asciiLetter
 22 | 
 23 | pDigit :: Parser String
 24 | pDigit = some digitChar
 25 | 
 26 | sexpParser :: SExprParser Parser () String
 27 | sexpParser = plainSExprParser (pIdent <|> pDigit)
 28 | 
 29 | pSExpr :: Parser (Sexp String)
 30 | pSExpr = parseSExpr sexpParser
 31 | 
 32 | pDecodeOne :: Parser (Sexp String)
 33 | pDecodeOne = decodeOne sexpParser
 34 | 
 35 | pDecode :: Parser [Sexp String]
 36 | pDecode = decode sexpParser
 37 | 
 38 | pOptionalSpace :: Parser (Sexp String)
 39 | pOptionalSpace = decodeOne $ setSpacingRule spaceIsOptional sexpParser
 40 | 
 41 | parseTestTree :: TestTree
 42 | parseTestTree = testGroup "Parse/Generic.hs & Parse/Char.hs unit tests" $
 43 |   let tparse :: Parser a -> String -> Either String a
 44 |       tparse p s = first M.errorBundlePretty $ M.parse p "" s
 45 | 
 46 |       sExprTests :: (Eq a, Show a) => Parser a -> (Sexp String -> a) -> [TestTree]
 47 |       sExprTests p f = [
 48 |         let s = "()" in testCase (show s) $ tparse p s @?= (Right $ f (SList () [])),
 49 |         let s = "(   )" in testCase (show s) $ tparse p s @?= (Right $ f (SList () [])),
 50 |         let s = "foo" in testCase (show s) $ tparse p s @?= (Right $ f (SAtom "foo")),
 51 |         let s = "1234" in testCase (show s) $ tparse p s @?= (Right $ f (SAtom "1234")),
 52 |         let s = "(foo)" in testCase (show s) $ tparse p s @?= (Right $ f (SList () [SAtom "foo"])),
 53 |         let s = "( foo)" in testCase (show s) $ tparse p s @?= (Right $ f (SList () [SAtom "foo"])),
 54 |         let s = "(foo )" in testCase (show s) $ tparse p s @?= (Right $ f (SList () [SAtom "foo"])),
 55 |         let s = "(foo bar baz)"
 56 |         in testCase (show s) $ tparse p s @?= (Right $ f (SList () [SAtom "foo", SAtom "bar", SAtom "baz"])),
 57 |         let s = "(foo (bar baz))"
 58 |         in testCase (show s) $ tparse p s @?=
 59 |            (Right $ f (SList () [SAtom "foo", SList () [SAtom "bar", SAtom "baz"]])),
 60 |         let s = "(foo(bar baz))"
 61 |         in testCase (show s) $ tparse p s @?=
 62 |            (Right $ f (SList () [SAtom "foo", SList () [SAtom "bar", SAtom "baz"]])),
 63 |         let s = "((foo bar)baz)"
 64 |         in testCase (show s) $ tparse p s @?=
 65 |            (Right $ f (SList () [SList () [SAtom "foo", SAtom "bar"], SAtom "baz"])),
 66 |         let s = "(foo1234)"
 67 |         in testCase (show s) $ (isLeft $ tparse p "(foo1234)") @? "Parsing must fail. foo and 1234 are not separated by whitespace"
 68 |         ]
 69 |                          
 70 |       decodeCommon :: (Eq a, Show a) => Parser a -> (Sexp String -> a) -> [TestTree]
 71 |       decodeCommon p f = [
 72 |         let s = " () " in testCase (show s) $ tparse p s @?= (Right $ f (SList () [])),
 73 |         let s = " ()" in testCase (show s) $ tparse p s @?= (Right $ f (SList () [])),
 74 |         let s = "() " in testCase (show s) $ tparse p s @?= (Right $ f (SList () [])),
 75 |         let s = "   ()   " in testCase (show s) $ tparse p s @?= (Right $ f (SList () []))
 76 |         ]
 77 |   in
 78 |     [
 79 |       testGroup "parseSExpr" $ sExprTests pSExpr id ++
 80 |       [
 81 |         let s = " foo"
 82 |         in testCase (show s) $ (isLeft $ tparse pSExpr s) @? "Parsing must fail. parseSExpr should not parse whitespace"
 83 |       ],
 84 |       testGroup "decondeOne" $ sExprTests pDecodeOne id ++
 85 |       decodeCommon pDecodeOne id ++
 86 |       [
 87 |         let s = "() err" in testCase (show s) $ (isLeft $ tparse pDecodeOne s) @? "Parsing must fail. 2 SExpr",
 88 |         let s = "err ()" in testCase (show s) $ (isLeft $ tparse pDecodeOne s) @? "Parsing must fail. 2 SExpr",
 89 |         let s = "()err" in testCase (show s) $ (isLeft $ tparse pDecodeOne s) @? "Parsing must fail. 2 SExpr",
 90 |         let s = "err()" in testCase (show s) $ (isLeft $ tparse pDecodeOne s) @? "Parsing must fail. 2 SExpr"
 91 |       ],
 92 |      testGroup "decode" $ sExprTests pDecode (\x -> [x]) ++
 93 |      decodeCommon pDecode (\x -> [x]) ++
 94 |       [
 95 |         let s = "()()" in testCase (show s) $ tparse pDecode s @?= Right [SList () [], SList () []],
 96 |         let s = " ()()" in testCase (show s) $ tparse pDecode s @?= Right [SList () [], SList () []],
 97 |         let s = "() ()" in testCase (show s) $ tparse pDecode s @?= Right [SList () [], SList () []],
 98 |         let s = "()() " in testCase (show s) $ tparse pDecode s @?= Right [SList () [], SList () []],
 99 |         let s = "  ()  ()  " in testCase (show s) $ tparse pDecode s @?= Right [SList () [], SList () []],
100 |         let s = "(foo)(1234)" in testCase (show s) $ tparse pDecode s @?= Right [SList () [SAtom "foo"], SList () [SAtom "1234"]],
101 |         let s = " (foo)(1234)" in testCase (show s) $ tparse pDecode s @?= Right [SList () [SAtom "foo"], SList () [SAtom "1234"]],
102 |         let s = "(foo) (1234)" in testCase (show s) $ tparse pDecode s @?= Right [SList () [SAtom "foo"], SList () [SAtom "1234"]],
103 |         let s = "(foo)(1234) " in testCase (show s) $ tparse pDecode s @?= Right [SList () [SAtom "foo"], SList () [SAtom "1234"]],
104 |         let s = "  (foo)  (1234)  " in testCase (show s) $ tparse pDecode s @?= Right [SList () [SAtom "foo"], SList () [SAtom "1234"]],
105 |         let s = "(foo) 1234" in testCase (show s) $ tparse pDecode s @?= Right [SList () [SAtom "foo"], SAtom "1234"],
106 |         let s = "foo 1234" in testCase (show s) $ tparse pDecode s @?= Right [SAtom "foo", SAtom "1234"],
107 |         let s = "foo(1234)" in testCase (show s) $ tparse pDecode s @?= Right [SAtom "foo", SList () [SAtom "1234"]],
108 |         let s = "(foo)1234" in testCase (show s) $ tparse pDecode s @?= Right [SList () [SAtom "foo"], SAtom "1234"],
109 |         let s = "bar1234"
110 |         in testCase (show s) $ (isLeft $ tparse pDecode s) @? "Parsing must fail. bar and 1234 are not separated by whitespace"
111 |       ],
112 |      testGroup "spaceIsOptional" $ [
113 |         let s = "(foo1234)"
114 |         in testCase (show s) $ tparse pOptionalSpace s @?= (Right (SList () [SAtom "foo", SAtom "1234"])),
115 |         let s = "(foo 1234)"
116 |         in testCase (show s) $ tparse pOptionalSpace s @?= (Right (SList () [SAtom "foo", SAtom "1234"])),
117 |         let s = "(foo1234 bar)"
118 |         in testCase (show s) $ tparse pOptionalSpace s @?= (Right (SList () [SAtom "foo", SAtom "1234", SAtom "bar"])),
119 |         let s = "( foo1234 )"
120 |         in testCase (show s) $ tparse pOptionalSpace s @?= (Right (SList () [SAtom "foo", SAtom "1234"]))
121 |         ]
122 |   ]
123 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![License OBSD](https://img.shields.io/badge/license-0BSD-brightgreen.svg)](https://opensource.org/licenses/0BSD)
  2 | [![Hackage](https://img.shields.io/hackage/v/sexpresso.svg)](http://hackage.haskell.org/package/sexpresso)
  3 | 
  4 | # S-expresso
  5 | 
  6 | S-expresso is a Haskell library designed to help you parse and print
  7 | data or source code encoded as an S-expression. It provides a very
  8 | flexible parser and (for now) a flat printer.
  9 | 
 10 | # What is an S-expression
 11 | Basically, an S-expression is a special form of tree structured
 12 | data. An S-expression object is either an atom or a list of atoms and other S-expressions.
 13 | 
 14 | This datatype is the definition of an S-expression for
 15 | S-expresso. 
 16 | 
 17 | ~~~haskell
 18 | data SExpr b a = SList b [SExpr b a]
 19 |                | SAtom a
 20 | ~~~
 21 | 
 22 | The parameter `a` allows you to specify the datatype of atoms and the
 23 | parameter `b` is usefull for keeping metadata about S-expression like
 24 | source position for example.
 25 | 
 26 | `SExpr` is not equivalent to `[a]` because the later cannot
 27 | distinguish between an atom `(SAtom _)` and a tree containing only one
 28 | atom `(SList _ [SAtom _])`. `SExpr` is also not equivalent to `Tree a`
 29 | from `Data.Tree` because the later cannot encode the empty tree
 30 | `(SList _ [])` and does not enforce that atoms are at the leaves.
 31 | 
 32 | ## The Sexp type
 33 | If you are only interested by the atoms, you can use the type alias
 34 | `Sexp` that is a variant of the more general 'SExpr' data type with no
 35 | data for the 'SList' constructor.
 36 | ~~~haskell
 37 | type Sexp a = SExpr () a
 38 | ~~~
 39 | 
 40 | This type also comes with a bidirectional pattern synonym also named
 41 | `Sexp` for object of the form `SExpr () _`.
 42 | ~~~
 43 | x = Sexp [A 3]                   <-> x = SList () [SAtom 3]
 44 | foo (Sexp xs)                    <-> foo (SList () xs)
 45 | foo (Sexp (Sexp ys : A x : xs))  <-> foo (SList () (SList () ys : SAtom x : xs))
 46 | ~~~
 47 | 
 48 | ## Pattern synonyms
 49 | S-expresso defines four pattern synonyms to ease your programming with
 50 | `SExpr`. The patterns `L` helps you match the `SList` constructor and only
 51 | its sublist, disregarding the `b` field. The pattern `:::` and `Nil` helps
 52 | you specify the shape of the sublist of an `SList` constructor and
 53 | finally the pattern `A` is a shorthand for `SAtom`.
 54 | 
 55 | Together they make working with `SExpr` a little easier.
 56 | ~~~
 57 | a = A 3                      <-> a = SAtom 3
 58 | foo (A x)                    <-> foo (SAtom x)
 59 | foo (A x1 ::: A x2 ::: Nil)  <-> foo (SList _ [SAtom x1, SAtom x2])
 60 | foo (A x ::: L xs))          <-> foo (SList _ (SAtom x : xs))
 61 | foo (L ys ::: A x ::: L xs)) <-> foo (SList _ (SList _ ys : SAtom x : xs))
 62 | foo (L x)                    <-> foo (SList _ x)
 63 | ~~~
 64 | 
 65 | Notice that you need to end the pattern `:::` with `Nil` for the empty
 66 | list or `L xs` for matching the remainder of the list. Indeed, if you write
 67 | 
 68 | ~~~
 69 | foo (x ::: xs) = ...
 70 | ~~~
 71 | 
 72 | this is equivalent to :
 73 | 
 74 | ~~~
 75 | foo (SList b (x : rest)) = let xs = SList b rest
 76 |                            in ...
 77 | ~~~
 78 | 
 79 | You can refer to the documentation of the `:::` constructor for more information.
 80 | 
 81 | # Parsing S-expressions
 82 | The parsing is based on
 83 | [megaparsec](http://hackage.haskell.org/package/megaparsec). S-expresso
 84 | allows you to customize the following :
 85 | * The parser for atoms
 86 | * The opening tag (usually "("), the closing tag (usually ")") and a
 87 |   possible dependency of the closing tag on the opening one.
 88 | * If some space is required or optional between any pair of atoms.
 89 | * How to parse space (ex: treat comments as whitespace)
 90 | 
 91 | The library offers amoung others the `decodeOne` and `decode`
 92 | functions. The former only reads one S-expression while the other
 93 | parses many S-expressions.  Both functions creates a megaparsec
 94 | parser from a `SExprParser` argument.
 95 | 
 96 | The `SExprParser` is the data type that defines how to read an
 97 | S-expression.  The easiest way to create a `SExprParser` is to use the
 98 | function `plainSExprParser` with your own custom atom parser. This
 99 | will create a parser where S-expression starts with "(", ends with ")"
100 | and space is mandatory between atoms.
101 | 
102 | ~~~haskell
103 | import Data.Void
104 | import qualified Data.Text as T
105 | import Text.Megaparsec
106 | import Text.Megaparsec.Char
107 | import qualified Text.Megaparsec.Char.Lexer as L
108 | 
109 | atom = some letter
110 | 
111 | sexp = decode $ plainSExprParser atom
112 | 
113 | -- Returns (SList () [SAtom "hello", SAtom "world"])
114 | ex1 = parse sexp "" "(hello world)"
115 | 
116 | -- Returns (SList () [SAtom "hello", SAtom "world", SList () [SAtom "bonjour"]])
117 | ex2 = parse sexp "" "  (hello world(bonjour))  "
118 | 
119 | -- Returns SAtom "hola"
120 | ex2 = parse sexp "" "hola"
121 | ~~~
122 | 
123 | ## Customizing the SExprParser
124 | S-expresso provides many functions to modify the behavior of the
125 | parser. For example, you can use the functions `setTags`,
126 | `setTagsFromList`, `setSpace` and `setSpacingRule` to modify the
127 | behavior of the parser. Following on the preceding example:
128 | 
129 | ~~~haskell
130 | -- setTags
131 | data MyType = List | Vector
132 | 
133 | listOrVector =
134 |   let sTag = (char '(' >> return List) <|> (string "#(" >> return Vector)
135 |       eTag = \t -> char ')' >> return t
136 |       p = setTags sTag eTag $
137 |           plainSExprParser atom
138 |   in decode p
139 | 
140 | -- Returns (SList List [SList Vector [SAtom "a", SAtom "b"], SAtom "c"])
141 | ex3 = parse listOrVector "" "(#(a b) c)"
142 | 
143 | -- setTagsFromList
144 | listOrVector2 = decode $ 
145 |                 setTagsFromList [("(",")",List),("#(",")",Vector)] $
146 |                 plainSExprParser atom
147 | 
148 | 
149 | -- Returns (SList List [SList Vector [SAtom "a", SAtom "b"], SAtom "c"])
150 | ex4 = parse listOrVector2 "" "(#(a b) c)"
151 | 
152 | -- setSpace
153 | withComments = decode $
154 |                -- See megaparsec Space in Megaparsec.Char.Lexer
155 |                setSpace (L.Space Space1 (skipLineComment ";") empty) $
156 |                plainSExprParser atom
157 | 
158 | -- Returns (SList () [SAtom "hello", SList () [SAtom "bonjour"]])
159 | ex5 = parse withComments "" "(hello ;world\n (bonjour))"
160 | 
161 | -- setSpacingRule
162 | optionalSpace = decode $
163 |                 setSpacingRule spaceIsOptional $
164 |                 plainSExprParser (some letter <|> some digitChar)
165 | 
166 | -- Returns (SList () [SAtom "hello", SAtom "1234", SAtom "world"])
167 | ex5 = parse optionalSpace "" "(hello1234world)"
168 | ~~~
169 | 
170 | You can also directly build a custom SExprParser with the constructor `SExprParser`.
171 | 
172 | ## Adding Source Location
173 | If you need the source position of the atoms and s-expression, the
174 | function `withLocation` transforms an `SExprParser b a` into
175 | `SExprParser (Located b) (Located a)`. The `Located` datatype is
176 | defined
177 | [here](https://github.com/archambaultv/sexpresso/blob/master/src/Data/SExpresso/Parse/Location.hs).
178 | 


--------------------------------------------------------------------------------
/src/Data/SExpresso/Parse/Generic.hs:
--------------------------------------------------------------------------------
  1 | -- |
  2 | -- Module      :  Data.SExpresso.Parse.Generic
  3 | -- Copyright   :  © 2019 Vincent Archambault
  4 | -- License     :  0BSD
  5 | --
  6 | -- Maintainer  :  Vincent Archambault <archambault.v@gmail.com>
  7 | -- Stability   :  experimental
  8 | --
  9 | -- This module includes everything you need to write a parser for
 10 | -- S-expression ('SExpr'). It is based on the "Text.Megaparsec"
 11 | -- library and parsers can be defined for any kind of ('MonadParsec' e
 12 | -- s m) instance. This is quite generic, if you are working with
 13 | -- streams of 'Char', we suggest you also import
 14 | -- "Data.SExpresso.Parse.Char" or simply "Data.SExpresso.Parse" which
 15 | -- re-exports everything.
 16 | --
 17 | -- You can customize your 'SExpr' parser by specifying the following:
 18 | --
 19 | --   * The parser for atoms
 20 | --
 21 | --   * The opening tag, the closing tag, and a possible dependency of
 22 | --     the closing tag on the opening one.
 23 | --
 24 | --   * If some space is required or optional between any pair of
 25 | --     atoms.
 26 | --
 27 | --   * How to parse space (ex: treat comments as whitespace)
 28 | 
 29 | {-# LANGUAGE OverloadedStrings #-}
 30 | {-# LANGUAGE ExistentialQuantification #-}
 31 | 
 32 | module Data.SExpresso.Parse.Generic
 33 |   (
 34 |     SExprParser(..),
 35 |     
 36 |     getAtom,
 37 |     getSpace,
 38 |     getSpacingRule,
 39 | 
 40 |     setTags,
 41 |     setTagsFromList,
 42 |     setTagsFromMap,
 43 |     setSpace,
 44 |     setSpacingRule,
 45 |     setAtom,
 46 | 
 47 |     SpacingRule(..),
 48 |     spaceIsMandatory,
 49 |     spaceIsOptional,
 50 |     mkSpacingRule,
 51 |     
 52 |     withLocation,
 53 | 
 54 |     parseSExprList,
 55 |     parseSExpr,
 56 |     decodeOne,
 57 |     decode
 58 |    )
 59 |   where
 60 | 
 61 | import Data.Maybe
 62 | import qualified Data.Map as M
 63 | import Control.Applicative
 64 | import Control.Monad (mzero)
 65 | import Text.Megaparsec
 66 | import Data.SExpresso.SExpr
 67 | import Data.SExpresso.Parse.Location
 68 | 
 69 | -- | The 'SpacingRule' datatype is used to indicate if space is optional or mandatory between two consecutive @'SAtom' _@.
 70 | data SpacingRule =
 71 |   -- | Space is mandatory
 72 |   SMandatory
 73 |   -- | Space is optional
 74 |   | SOptional
 75 |    deriving (Show, Eq)
 76 | 
 77 | -- | The @'SExprParser' m b a@ datatype defines how to parse an
 78 | -- @'SExpr' b a@. Most parsing functions require the underlying monad
 79 | -- @m@ to be an instance of ('MonadParsec' e s m).
 80 | 
 81 | data SExprParser m b a
 82 |   -- | The @c@ parameter in the first two arguments is the type of the
 83 |   -- relation between the opening tag and the closing one.
 84 |   = forall c. SExprParser
 85 |   (m c) -- ^ The parser for the opening tag. Returns an object of an
 86 |         -- arbitrary type @c@ that will be used to create the closing
 87 |         -- tag parser.
 88 |   (c -> m b) -- ^ A function that takes the object returned by the
 89 |              -- opening tag parser and provide a parser for the
 90 |              -- closing tag.
 91 |   (m a) -- ^ The parser for atoms
 92 |   (m ()) -- ^ A parser for space tokens which does not accept empty
 93 |          -- input (e.g. 'Text.Megaparsec.Char.space1')
 94 |   (a -> a -> SpacingRule) -- ^ A function to tell if two consecutive
 95 |                           -- atoms must be separated by space or
 96 |                           -- not. See also 'mkSpacingRule' and
 97 |                           -- 'setSpacingRule'
 98 | 
 99 | -- | The 'getSpace' function returns the parser for whitespace of an 'SExprParser' object.
100 | getSpace :: SExprParser m b a -> m ()
101 | getSpace (SExprParser _ _ _ sp _) = sp
102 | 
103 | -- | The 'getSpacingRule' function returns spacing rule function of an 'SExprParser' object.
104 | getSpacingRule :: SExprParser m b a -> (a -> a -> SpacingRule)
105 | getSpacingRule (SExprParser _ _ _ _ sr) = sr
106 | 
107 | -- | The 'getAtom' function returns the parser for atoms of an 'SExprParser' object.
108 | getAtom :: SExprParser m b a -> m a
109 | getAtom (SExprParser _ _ a _ _) = a
110 | 
111 | -- | The 'withLocation' function adds source location to a @'SExprParser'@. See also 'Location'.
112 | withLocation :: (MonadParsec e s m, TraversableStream s) => SExprParser m b a -> SExprParser m (Located b) (Located a)
113 | withLocation (SExprParser pSTag pETag atom sp sr) =
114 |   let s = do
115 |         pos <- getSourcePos
116 |         c <- pSTag
117 |         return (pos, c)
118 |       e = \(pos, c) -> do
119 |         b <- pETag c
120 |         pos2 <- getSourcePos
121 |         return $ At (Span pos pos2) b
122 |   in SExprParser s e (located atom) sp (\(At _ a1) (At _ a2) -> sr a1 a2)
123 | 
124 | -- | The 'setAtom' function updates a parser with a new parser for atoms and and new spacing rule function.
125 | setAtom :: m a -> (a -> a -> SpacingRule) -> SExprParser m b a' -> SExprParser m b a
126 | setAtom a sr (SExprParser pSTag pETag _ sp _) = SExprParser pSTag pETag a sp sr
127 | 
128 | -- | The 'setTags' function updates a parser with a new parser for the opening and closing tags.
129 | setTags :: m c -> (c -> m b) -> SExprParser m b' a -> SExprParser m b a
130 | setTags s e (SExprParser _ _ a sp sr) = SExprParser s e a sp sr
131 | 
132 | -- | The 'setTagsFromList' function helps you build the opening and
133 | -- closing parsers from a list of triplets. Each triplet specifies a
134 | -- stream of tokens to parse as the opening tag, a stream of tokens to
135 | -- parse at the closing tag and what to return when this pair is
136 | -- encountered. The 'setTagsFromList' can handle multiple triplets
137 | -- with the same opening tags. See also 'setTagsFromMap'.
138 | --
139 | -- The example e1 parses "()" as @'SList' () []@.
140 | --
141 | -- > e1 = setTagsFromList [("(", ")", ()] p
142 | --
143 | -- The example e2 parses both "()" and "[]" as @'SList' () []@ but does
144 | -- not parse "(]" or "[)"
145 | --
146 | -- > e2 = setTagsFromList [("(", ")", ()), ("[", "]", ())] p 
147 | --
148 | -- The example e3 parses "()" as @'SList' List []@ and "#()" as
149 | -- @'SList' Vector []@, but does not parse "(]" or "[)"
150 | --
151 | -- > e3 = setTagsFromList [("(", ")", List), ("#(",")",Vector)] p
152 | --
153 | -- The example e4 parses "()" as @'SList' ')' []@ and "(]" as
154 | -- @'SList' ']' []@, but does not parse "])"
155 | --
156 | -- > e4 = setTagsFromList [("(", ")", ')'), ("(", "]", ']')] p 
157 | setTagsFromList ::  (MonadParsec e s m) =>
158 |                     [(Tokens s, Tokens s, b)] -> SExprParser m b' a -> SExprParser m b a
159 | setTagsFromList l p =
160 |   let m = M.fromListWith (++) $ map (\(s,e,b) -> (s, [(e,b)])) l
161 |   in setTagsFromMap m p
162 | 
163 | -- | The 'setTagsFromMap' function helps you build the opening and
164 | -- closing parsers from a map. Each key specifies a stream of tokens to
165 | -- parse as the opening tag and the value of the map specifies one or
166 | -- more streams of tokens to parse at the closing tag and what to
167 | -- return when this pair is encountered. See also 'setTagsFromList'.
168 | --
169 | -- The example e1 parses "()" as @'SList' () []@.
170 | --
171 | -- > e1 = setTagsFromList $ M.fromList [("(", [")", ()]] p
172 | --
173 | -- The example e2 parses both "()" and "[]" as @'SList' () []@ but does
174 | -- not parse "(]" or "[)"
175 | --
176 | -- > e2 = setTagsFromList $ M.fromList [("(", [")", ()]), ("[", ["]", ()])] p 
177 | --
178 | -- The example e3 parses "()" as @'SList' List []@ and "#()" as
179 | -- @'SList' Vector []@, but does not parse "(]" or "[)"
180 | --
181 | -- > e3 = setTagsFromList $ M.fromList [("(", [")", List]), ("#(", [")",Vector])] p
182 | --
183 | -- The example e4 parses "()" as @'SList' ')' []@ and "(]" as
184 | -- @'SList' ']' []@, but does not parse "])"
185 | --
186 | -- > e4 = setTagsFromList $ M.fromList [("(", [(")", ')'), ("]", ']')])] p 
187 | setTagsFromMap :: (MonadParsec e s m) =>
188 |                   M.Map (Tokens s) [(Tokens s, b)] -> SExprParser m b' a -> SExprParser m b a
189 | setTagsFromMap m p =
190 |   let l = M.toList m
191 | 
192 |       choose [] = empty
193 |       choose ((s, eb) : ts) = (chunk s >> return eb) <|> choose ts
194 |       
195 |       stag = choose l
196 |       
197 |       etag = \xs -> choice $ map (\(e, b) -> chunk e >> return b) xs
198 |   in setTags stag etag p
199 | 
200 | -- | The 'spaceIsMandatory' function is a spacing rule where space is always mandatory. See also 'getSpacingRule'.
201 | spaceIsMandatory :: a -> a -> SpacingRule
202 | spaceIsMandatory = \_ _ -> SMandatory
203 | 
204 | -- | The 'spaceIsOptional' function is a spacing rule where space is always optional. See also 'getSpacingRule'.
205 | spaceIsOptional :: a -> a -> SpacingRule
206 | spaceIsOptional = \_ _ -> SOptional
207 | 
208 | -- | The 'setSpacingRule' function modifies a 'SExprParser' by setting
209 | -- the function to tell if two consecutive atoms must be separated by
210 | -- space or not. See also 'mkSpacingRule'.
211 | setSpacingRule :: (a -> a -> SpacingRule) -> SExprParser m b a -> SExprParser m b a
212 | setSpacingRule r p@(SExprParser pSTag pETag _ _ _) = SExprParser pSTag pETag (getAtom p) (getSpace p) r
213 | 
214 | -- | The 'mkSpacingRule' function is a helper to create a valid
215 | -- spacing rule function for 'SExprParser' when some atoms have the
216 | -- same 'SpacingRule' both before and after no matter what the other
217 | -- atom is. It takes as argument a function @f@ that takes a single
218 | -- atom and returns the 'SpacingRule' that applies both before and
219 | -- after this atom.
220 | --
221 | -- For example, to create a spacing rule where space is optional both
222 | -- before and after the fictitious @MyString@ token:
223 | --
224 | -- > s (MyString _) = SOptional
225 | -- > s _ = Mandatory
226 | -- > spacingRule = mkSpacingRule s
227 | --
228 | -- The above is equivalent to :
229 | --
230 | -- > spacingRule (MyString _) _ = SOptional
231 | -- > spacingRule _ (MyString _) = SOptional
232 | -- > spacingRule _ _ = SMandatory
233 | 
234 | mkSpacingRule :: (a -> SpacingRule) -> (a -> a -> SpacingRule)
235 | mkSpacingRule f = \a1 a2 -> case f a1 of
236 |                               SOptional -> SOptional
237 |                               SMandatory -> f a2
238 | 
239 | -- | The 'setSpace' function modifies a 'SExprParser' by setting the
240 | -- parser to parse whitespace. The parser for whitespace must not
241 | -- accept the empty input (e.g. 'Text.Megaparsec.Char.space1')
242 | setSpace :: m () -> SExprParser m b a -> SExprParser m b a
243 | setSpace sp (SExprParser s e a _ sr) = SExprParser s e a sp sr
244 | 
245 | -- Tells if the space (or absence of) between two atoms is valid or not 
246 | spaceIsOK :: (a -> a -> SpacingRule) -> (SExpr b a) -> (SExpr b a) -> Bool -> Bool
247 | spaceIsOK getSpacingRule' sexp1 sexp2 spaceInBetween =
248 |   case (sexp1, sexp2, spaceInBetween) of
249 |     (_, _, True) -> True
250 |     (SList _ _, _, _) -> True
251 |     (_, SList _ _, _) -> True
252 |     (SAtom a1, SAtom a2, _) -> getSpacingRule' a1 a2 == SOptional
253 | 
254 | sepEndBy' :: (MonadParsec e s m, TraversableStream s) => m (SExpr b a) -> m () -> (a -> a -> SpacingRule) -> m [SExpr b a]
255 | sepEndBy' p sep f = sepEndBy1' p sep f <|> pure []
256 | 
257 | sepEndBy1' :: (MonadParsec e s m, TraversableStream s) => m (SExpr b a) -> m () -> (a -> a -> SpacingRule) -> m [SExpr b a]
258 | sepEndBy1' p sep f = do
259 |   x <- p
260 |   xs <- parseContent x
261 |   return $ x : xs
262 | 
263 |   where parseContent a1 = do
264 |           s <- maybe False (const True) <$> optional sep
265 |           mpos <- if not s then Just <$> getSourcePos else return Nothing 
266 |           mx <- optional p
267 |           case mx of
268 |             Nothing -> return []
269 |             Just a2 ->
270 |               if spaceIsOK f a1 a2 s
271 |               then do
272 |                 xs <- parseContent a2
273 |                 return $ a2 : xs
274 |               else label ("The previous two atoms are not separated by space.\n" <>
275 |                          "A space was expected at " <> sourcePosPretty (fromJust mpos)) mzero
276 | 
277 | -- | The 'parseSExprList' function return a parser for parsing S-expression of the form @'SList' _ _@.
278 | parseSExprList :: (MonadParsec e s m, TraversableStream s) =>
279 |                 SExprParser m b a -> m (SExpr b a)
280 | parseSExprList def@(SExprParser pSTag pETag _ sp sr)  = do
281 |           c <- pSTag
282 |           _ <- optional sp
283 |           xs <- sepEndBy' (parseSExpr def) sp sr
284 |           b <- pETag c
285 |           return $ SList b xs
286 | 
287 | -- | The 'parseSExpr' function return a parser for parsing
288 | -- S-expression ('SExpr'), that is either an atom (@'SAtom' _@) or a
289 | -- list @'SList' _ _@. See also 'decodeOne' and 'decode'.
290 | parseSExpr :: (MonadParsec e s m, TraversableStream s) =>
291 |               SExprParser m b a -> m (SExpr b a)
292 | parseSExpr def = (getAtom def >>= return . SAtom) <|> (parseSExprList def)
293 | 
294 | -- | The 'decodeOne' function return a parser for parsing a file
295 | -- containing only one S-expression ('SExpr'). It can parse extra
296 | -- whitespace at the beginning and at the end of the file. See also
297 | -- 'parseSExpr' and 'decode'.
298 | decodeOne :: (MonadParsec e s m, TraversableStream s) => SExprParser m b a -> m (SExpr b a)
299 | decodeOne def =
300 |   let ws = getSpace def
301 |   in optional ws *> parseSExpr def <* (optional ws >> eof)
302 | 
303 | -- | The 'decode' function return a parser for parsing a file
304 | -- containing many S-expression ('SExpr'). It can parse extra
305 | -- whitespace at the beginning and at the end of the file. See also
306 | -- 'parseSExpr' and 'decodeOne'.
307 | decode :: (MonadParsec e s m, TraversableStream s) => SExprParser m b a -> m [SExpr b a]
308 | decode def =
309 |   let ws = getSpace def
310 |   in optional ws *> sepEndBy' (parseSExpr def) ws (getSpacingRule def) <* eof
311 | 


--------------------------------------------------------------------------------
/test/SchemeR5RS_Unittests.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | {-# LANGUAGE TupleSections #-}
  3 | 
  4 | module SchemeR5RS_Unittests (
  5 |   r5rsTestTree
  6 |   )where
  7 | 
  8 | import Data.Void
  9 | import qualified Data.Text as T
 10 | import Data.Either
 11 | import Data.Bifunctor (first)
 12 | import Test.Tasty
 13 | import Test.Tasty.HUnit
 14 | import Text.Megaparsec
 15 | --import Text.Megaparsec.Char
 16 | import Data.SExpresso.SExpr
 17 | import Data.SExpresso.Parse
 18 | import Data.SExpresso.Language.SchemeR5RS as R5
 19 | 
 20 | type Parser = Parsec Void T.Text
 21 | 
 22 | pSExpr :: Parser [SExpr R5.SExprType R5.SchemeToken]
 23 | pSExpr = decode R5.sexpr
 24 | 
 25 | -- tparse parses the whole input
 26 | tparse :: Parser a -> T.Text -> Either String a
 27 | tparse p s = first errorBundlePretty $ parse (p <* eof) "" s
 28 | 
 29 | -- Runs a test that must succeed. Accepts the parser and
 30 | -- the string to parse and the expected value as tuple
 31 | okTest :: (Eq a, Show a) => Parser a -> (T.Text, a) -> TestTree
 32 | okTest p (str, expected) = testCase ("Should parse " ++ show str)
 33 |                          $ tparse p str @?= Right expected
 34 | 
 35 | okDatum :: (T.Text, [Datum]) -> TestTree
 36 | okDatum (t, d) = okTest (sexpr2Datum <$> pSExpr) (t, Right d)
 37 | 
 38 | -- Runs a test that must fail. Accepts the parser
 39 | -- and the string to parse
 40 | koTest :: Parser a -> T.Text -> TestTree
 41 | koTest p str = testCase ("Should not parse " ++ show str)
 42 |              $ (isLeft $ tparse p str) @?
 43 |                ("Parsing must fail on " ++ show str)
 44 | 
 45 | koDatum :: T.Text -> TestTree
 46 | koDatum str = testCase ("Should not parse " ++ show str)
 47 |              $ (isLeft $ (tparse pSExpr str >>= sexpr2Datum)) @?
 48 |                ("Parsing must fail on " ++ show str)
 49 | 
 50 | -- Tests about whitespace that must succeed (see okTest)
 51 | okWhitespace :: [(T.Text, ())]
 52 | okWhitespace = map (,()) [" ", "\t", "\n", "\r\n"]
 53 | 
 54 | -- Tests about comment that must succeed (see okTest)
 55 | okComment :: [(T.Text, ())]
 56 | okComment = map (,()) [";", ";hello world", ";hello\n", ";abcdef\r\n"]
 57 | 
 58 | okInterTokenSpace1 :: [(T.Text, ())]
 59 | okInterTokenSpace1 = okWhitespace
 60 |                   ++ okComment
 61 |                   ++ map (,()) [" ;comment\n    ",
 62 |                      "\t\n;comment   \n   ",
 63 |                      "\n\n\n\n\n",
 64 |                      "\r\n;Hello World",
 65 |                      "  ;comment\n   ;comment"]
 66 | 
 67 | okInterTokenSpace :: [(T.Text, ())]
 68 | okInterTokenSpace = okInterTokenSpace1 ++ map (,()) [""]
 69 | 
 70 | okChar :: [(T.Text, Char)]
 71 | okChar = [("#\\t", 't'),
 72 |           ("#\\a", 'a'),
 73 |           ("#\\space", ' '),
 74 |           ("#\\newline", '\n'),
 75 |           ("#\\\n", '\n'),
 76 |           ("#\\ ", ' '),
 77 |           ("#\\\t", '\t')]
 78 | 
 79 | okBool :: [(T.Text, Bool)]
 80 | okBool = [("#t", True),
 81 |           ("#f", False)]
 82 | 
 83 | okIdentifier :: [(T.Text, T.Text)]
 84 | okIdentifier = map (\x -> (x,x)) ["foo", "x2", "!hot!", "+", "-", "...",
 85 |                                   "helloWorld"]
 86 | 
 87 | okString :: [(T.Text, T.Text)]
 88 | okString = [("\"abc def ghi\"", "abc def ghi"),
 89 |             ("\"\"", ""),
 90 |             ("\"\n\"", "\n"),
 91 |             ("\" \"", " "),
 92 |             ("\"\t\"", "\t"),
 93 |             (T.pack ['"','\\','\\','"'], "\\"),
 94 |             (T.pack ['"','\\','"','"'], "\"")]
 95 | 
 96 | okNumber :: [(T.Text, SchemeNumber)]
 97 | okNumber =  [("-1", SchemeNumber Exact $
 98 |                     CReal (SInteger Minus (UInteger 1))),
 99 |     ("-0", SchemeNumber Exact $
100 |                     CReal (SInteger Minus (UInteger 0 ))),
101 |     ("0", SchemeNumber Exact $
102 |                     CReal (SInteger Plus (UInteger 0))),
103 |     ("1", SchemeNumber Exact $
104 |                     CReal (SInteger Plus (UInteger 1))),
105 | 
106 | 
107 |     ("#e1", SchemeNumber Exact $
108 |                     CReal (SInteger Plus (UInteger 1))),
109 | 
110 |     ("#i1", SchemeNumber Inexact $
111 |                     CReal (SInteger Plus (UInteger 1))),
112 | 
113 |     ("#b1", SchemeNumber Exact $
114 |                     CReal (SInteger Plus (UInteger 1))),
115 |     ("#o1", SchemeNumber Exact $
116 |                     CReal (SInteger Plus (UInteger 1))),
117 |     ("#d1", SchemeNumber Exact $
118 |                     CReal (SInteger Plus (UInteger 1))),
119 |     ("#x1", SchemeNumber Exact $
120 |                     CReal (SInteger Plus (UInteger 1))),
121 |     ("#xa", SchemeNumber Exact $ CReal (SInteger Plus (UInteger 10))),
122 |     ("#xb", SchemeNumber Exact $
123 |                     CReal (SInteger Plus (UInteger 11))),
124 |     ("#xc", SchemeNumber Exact $
125 |                     CReal (SInteger Plus (UInteger 12))),
126 |     ("#xd", SchemeNumber Exact $
127 |                     CReal (SInteger Plus (UInteger 13))),
128 |     ("#xe", SchemeNumber Exact $
129 |                     CReal (SInteger Plus (UInteger 14))),
130 |     ("#xf", SchemeNumber Exact $
131 |                     CReal (SInteger Plus (UInteger 15))),
132 |     ("-0001", SchemeNumber Exact $
133 |                     CReal (SInteger Minus (UInteger 1))),
134 |     ("-0000", SchemeNumber Exact $
135 |                     CReal (SInteger Minus (UInteger 0))),
136 |     ("0000", SchemeNumber Exact $
137 |                     CReal (SInteger Plus (UInteger 0))),
138 |     ("0001", SchemeNumber Exact $
139 |                     CReal (SInteger Plus (UInteger 1))),
140 | 
141 |     ("-1#", SchemeNumber Inexact $
142 |                     CReal (SInteger Minus (UIntPounds 1 1))),
143 |     ("-0#", SchemeNumber Inexact $
144 |                     CReal (SInteger Minus (UIntPounds 0 1))),
145 |     ("0#", SchemeNumber Inexact $
146 |                     CReal (SInteger Plus (UIntPounds 0 1))),
147 |     ("1#", SchemeNumber Inexact $
148 |                     CReal (SInteger Plus (UIntPounds 1 1))),
149 | 
150 |     ("-1###", SchemeNumber Inexact $
151 |                     CReal (SInteger Minus (UIntPounds 1 3))),
152 |     ("-0###", SchemeNumber Inexact $
153 |                     CReal (SInteger Minus (UIntPounds 0 3))),
154 |     ("0###", SchemeNumber Inexact $
155 |                     CReal (SInteger Plus (UIntPounds 0 3))),
156 |     ("1###", SchemeNumber Inexact $
157 |                     CReal (SInteger Plus (UIntPounds 1 3))),
158 | 
159 |     ("-12345", SchemeNumber Exact $
160 |                     CReal (SInteger Minus (UInteger 12345))),
161 |     ("12345", SchemeNumber Exact $
162 |                     CReal (SInteger Plus (UInteger 12345))),
163 | 
164 |     ("-12345/5", SchemeNumber Exact $
165 |                     CReal (SRational Minus (UInteger 12345) (UInteger 5))),
166 |     ("12345/5", SchemeNumber Exact $
167 |                     CReal (SRational Plus (UInteger 12345) (UInteger 5))),
168 |     ("-12345#/5", SchemeNumber Inexact $
169 |                     CReal (SRational Minus (UIntPounds 12345 1) (UInteger 5))),
170 |     ("12345/5##", SchemeNumber Inexact $
171 |                     CReal (SRational Plus (UInteger 12345) (UIntPounds 5 2))),
172 |     ("-12345##/5", SchemeNumber Inexact $
173 |                     CReal (SRational Minus (UIntPounds 12345 2) (UInteger 5))),
174 |     ("12345####/5#", SchemeNumber Inexact $
175 |                     CReal (SRational Plus (UIntPounds 12345 4) (UIntPounds 5 1))),
176 | 
177 | 
178 |     ("-12345.0", SchemeNumber Inexact $
179 |                     CReal (SDecimal Minus (UInteger 12345) (UInteger 0) Nothing)),
180 |     (".0", SchemeNumber Inexact $
181 |                     CReal (SDecimal Plus (UInteger 0) (UInteger 0) Nothing)),
182 |     ("0.", SchemeNumber Inexact $
183 |                     CReal (SDecimal Plus (UInteger 0) (UInteger 0) Nothing)),
184 | 
185 |     ("0.###", SchemeNumber Inexact $
186 |                     CReal (SDecimal Plus (UInteger 0) (UPounds 3) Nothing)),
187 |     ("-.569", SchemeNumber Inexact $
188 |                     CReal (SDecimal Minus (UInteger 0) (UInteger 569) Nothing)),
189 |     ("-245#.", SchemeNumber Inexact $
190 |                     CReal (SDecimal Minus (UIntPounds 245 1) (UPounds 0) Nothing)),
191 |     ("#e-.569", SchemeNumber Exact $
192 |                     CReal (SDecimal Minus (UInteger 0) (UInteger 569) Nothing)),
193 |     ("1e10", SchemeNumber Inexact $
194 |                     CReal (SDecimal Plus (UInteger 1)
195 |                                                        (UInteger 0)
196 |                                                        (Just $ Suffix PDefault Plus 10))),
197 |     ("1e-10", SchemeNumber Inexact $
198 |                     CReal (SDecimal Plus (UInteger 1)
199 |                                                        (UInteger 0)
200 |                                                        (Just $ Suffix PDefault Minus 10))),
201 |     ("1s10", SchemeNumber Inexact $
202 |                     CReal (SDecimal Plus (UInteger 1)
203 |                                                        (UInteger 0)
204 |                                                        (Just $ Suffix PShort Plus 10))),
205 |     ("1f10", SchemeNumber Inexact $
206 |                     CReal (SDecimal Plus (UInteger 1)
207 |                                                        (UInteger 0)
208 |                                                        (Just $ Suffix PSingle Plus 10))),
209 |     ("1d10", SchemeNumber Inexact $
210 |                     CReal (SDecimal Plus (UInteger 1)
211 |                                                        (UInteger 0)
212 |                                                        (Just $ Suffix PDouble Plus 10))),
213 |     ("1l10", SchemeNumber Inexact $
214 |                     CReal (SDecimal Plus (UInteger 1)
215 |                                                        (UInteger 0)
216 |                                                        (Just $ Suffix PLong Plus 10))),
217 | 
218 |     ("1+i", SchemeNumber Exact $
219 |                     CAbsolute (SInteger Plus (UInteger 1)) (SInteger Plus (UInteger 1))),
220 | 
221 |     ("1-i", SchemeNumber Exact $
222 |                     CAbsolute (SInteger Plus (UInteger 1)) (SInteger Minus (UInteger 1))),
223 | 
224 |     ("0.5+i", SchemeNumber Inexact $
225 |                     CAbsolute (SDecimal Plus (UInteger 0)
226 |                                                   (UInteger 5)
227 |                                                   Nothing) (SInteger Plus (UInteger 1))),
228 | 
229 |     ("-8i", SchemeNumber Exact $
230 |                     CAbsolute (SInteger Plus (UInteger 0)) (SInteger Minus (UInteger 8))),
231 | 
232 | 
233 |     ("-8.25i", SchemeNumber Inexact $
234 |                     CAbsolute (SInteger Plus (UInteger 0)) (SDecimal Minus (UInteger 8)
235 |                                                                                     (UInteger 25)
236 |                                                                                     Nothing)),
237 |     ("0@25", SchemeNumber Exact $
238 |                     CAngle (SInteger Plus (UInteger 0)) (SInteger Plus (UInteger 25))),
239 | 
240 |     ("1/4@-25", SchemeNumber Exact $
241 |                     CAngle (SRational Plus (UInteger 1) (UInteger 4)) (SInteger Minus (UInteger 25))),
242 | 
243 |     ("1#/4@-25##", SchemeNumber Inexact $
244 |                     CAngle (SRational Plus (UIntPounds 1 1) (UInteger 4)) (SInteger Minus (UIntPounds 25 2)))]
245 | 
246 | -- Returns all the "ok..." series of tests except the one provided
247 | -- as input. Since R5RS grammar is non ambiguous, a token parser should not be able
248 | -- to parse the valid input of other token parser.
249 | mkKoTest :: [T.Text] -> [T.Text]
250 | mkKoTest goodStr =
251 |   let allTests = map fst okInterTokenSpace ++
252 |                  map fst okChar ++
253 |                  map fst okBool ++
254 |                  map fst okIdentifier ++
255 |                  map fst okString ++
256 |                  map fst okNumber
257 |   in filter (not . (`elem` goodStr)) (alwaysBad ++ allTests)
258 | 
259 | -- Always bad string input
260 | alwaysBad :: [T.Text]
261 | alwaysBad = ["#", "#T", "#F", "#true", "#false"] ++
262 |           -- Ill formated number
263 |             ["#b3", "#o9", "#da", "#xA", "#b1.1", "#o1.1", "#x1.1", "#b.1",
264 |              "#o.1", "#x.1", "123##.12"]
265 | 
266 | r5rsTestTree :: TestTree
267 | r5rsTestTree = testGroup "Language/R5RS.hs" $ [
268 |   testGroup "whitespace" $
269 |       map (okTest R5.whitespace) okWhitespace ++
270 |       map (koTest R5.whitespace) (mkKoTest $ map fst okWhitespace),
271 |   testGroup "comment" $
272 |       map (okTest R5.comment) okComment ++
273 |       map (koTest R5.comment) (mkKoTest $ map fst okComment),
274 |   testGroup "interTokenSpace" $
275 |       map (okTest R5.interTokenSpace) okInterTokenSpace ++
276 |       map (koTest R5.interTokenSpace) (mkKoTest $ map fst okInterTokenSpace),
277 |   testGroup "interTokenSpace1" $
278 |       map (okTest R5.interTokenSpace1) okInterTokenSpace1 ++
279 |       map (koTest R5.interTokenSpace1) (mkKoTest $ map fst okInterTokenSpace1),
280 |   testGroup "character" $
281 |       map (okTest R5.character) okChar ++
282 |       map (koTest R5.character) (mkKoTest $ map fst okChar),
283 |   testGroup "boolean" $
284 |       map (okTest R5.boolean) okBool ++
285 |       map (koTest R5.boolean) (mkKoTest $ map fst okBool),
286 |   testGroup "identifier" $
287 |         map (okTest R5.identifier) okIdentifier ++
288 |         map (koTest R5.identifier) (mkKoTest $ map fst okIdentifier),
289 |   testGroup "string" $
290 |         map (okTest R5.stringParser) okString ++
291 |         map (koTest R5.stringParser) (mkKoTest $ map fst okString),
292 |   testGroup "number" $
293 |         map (okTest R5.number) okNumber ++
294 |         map (koTest R5.number) (mkKoTest $ map fst okNumber),
295 | 
296 |     testGroup "datum" $
297 |       map okDatum (map (fmap ( (:[]) . DChar)) okChar) ++
298 |       map okDatum (map (fmap ( (:[]) . DBoolean)) okBool) ++
299 |       map okDatum (map (fmap ( (:[]) . DIdentifier)) okIdentifier) ++
300 |       map okDatum (map (fmap ( (:[]) . DString)) okString) ++
301 |       map okDatum (map (fmap ( (:[]) . DNumber)) okNumber) ++
302 |       map okDatum
303 |       [("(foo #\\a)", [DList [DIdentifier "foo", DChar 'a']]),
304 |        ("(foo #\\a) \"hello\"", [DList [DIdentifier "foo", DChar 'a'], DString "hello"]),
305 |        ("'foo", [DQuote (DIdentifier "foo")]),
306 |        ("`foo", [DQuasiquote (DIdentifier "foo")]),
307 |        ("`(foo ,a)", [DQuasiquote (DList [DIdentifier "foo", DComma (DIdentifier "a")])]),
308 |        ("`(foo , a)", [DQuasiquote (DList [DIdentifier "foo", DComma (DIdentifier "a")])]),
309 |        ("`(foo, a)", [DQuasiquote (DList [DIdentifier "foo", DComma (DIdentifier "a")])]),
310 |        ("`(foo ,@a)", [DQuasiquote (DList [DIdentifier "foo", DCommaAt (DIdentifier "a")])]),
311 |        ("`(foo ,@ a)", [DQuasiquote (DList [DIdentifier "foo", DCommaAt (DIdentifier "a")])]),
312 |        ("`(foo,@ a)", [DQuasiquote (DList [DIdentifier "foo", DCommaAt (DIdentifier "a")])]),
313 |        ("(foo . a)", [DDotList [DIdentifier "foo"] (DIdentifier "a")]),
314 |        ("(foo a b c . d)",
315 |         [DDotList [DIdentifier "foo", DIdentifier "a", DIdentifier "b", DIdentifier "c"] (DIdentifier "d")])]
316 |       ++
317 |       map koDatum ["(foo .)", "(foo ')", "(foo `)", "(foo ,)", "(foo ,@)", "(foo a b . c d)"]
318 |   ]
319 | 


--------------------------------------------------------------------------------
/src/Data/SExpresso/Language/SchemeR5RS.hs:
--------------------------------------------------------------------------------
  1 | -- |
  2 | -- Module      :  Data.SExpresso.Language.SchemeR5RS
  3 | -- Copyright   :  © 2019 Vincent Archambault
  4 | -- License     :  0BSD
  5 | --
  6 | -- Maintainer  :  Vincent Archambault <archambault.v@gmail.com>
  7 | -- Stability   :  experimental
  8 | --
  9 | -- Module for parsing the Scheme R5RS language.
 10 | --
 11 | -- Scheme R5RS s-expressions are parsed as @'SExpr' 'SExprType'
 12 | -- 'SchemeToken'@.  Such s-expressions can be converted into a Scheme
 13 | -- R5RS datum (see 'Datum') by the function 'sexpr2Datum'.
 14 | 
 15 | 
 16 | {-# LANGUAGE DeriveDataTypeable #-}
 17 | {-# LANGUAGE OverloadedStrings #-}
 18 | {-# LANGUAGE TypeFamilies #-}
 19 | {-# LANGUAGE ScopedTypeVariables #-}
 20 | 
 21 | -- Parsing library for some parts of the Scheme R5RS language
 22 | -- as defined in section 7 of the report
 23 | -- The library does parse tab and \r\n and whitespace
 24 | module Data.SExpresso.Language.SchemeR5RS (
 25 |   -- * SchemeToken and Datum related data types and functions
 26 |   SExprType(..),
 27 |   SchemeToken(..),
 28 |   tokenParser,
 29 |   sexpr,
 30 | 
 31 |   Datum(..),
 32 |   sexpr2Datum,
 33 | 
 34 |   -- * Scheme R5RS whitespace parsers
 35 |   whitespace,
 36 |   comment,
 37 |   interTokenSpace,
 38 |   interTokenSpace1,
 39 | 
 40 |   -- * Individual parser for each of the constructors of SchemeToken
 41 |   identifier,
 42 |   boolean,
 43 |   character,
 44 |   stringParser,
 45 |   quote,
 46 |   quasiquote,
 47 |   comma,
 48 |   commaAt,
 49 |   dot,
 50 | 
 51 |   -- ** Scheme Number
 52 |   --
 53 |   -- | Scheme R5RS numbers are quite exotic. They can have exactness
 54 |   -- prefix, radix prefix and the pound sign (#) can replace a
 55 |   -- digit. On top of that, you can define integer, rational, decimal
 56 |   -- and complex numbers of arbitrary precision. Decimal numbers can
 57 |   -- also have a suffix indicating the machine precision.
 58 |   --
 59 |   -- Since Haskell does not have native types to express this
 60 |   -- complexity, this module defines the 'SchemeNumber' data type to
 61 |   -- encode the parsed number. User of this module can then convert a
 62 |   -- 'SchemeNumber' object to a more appropriate data type according
 63 |   -- to their needs.
 64 |   SchemeNumber(..),
 65 |   Exactness(..),
 66 |   Complex(..),
 67 |   SReal(..),
 68 |   Sign(..),
 69 |   UInteger(..),
 70 |   Pounds,
 71 |   Precision(..),
 72 |   Suffix(..),
 73 |   number,
 74 | 
 75 | 
 76 |   ) where
 77 | 
 78 | import Control.Monad (mzero)
 79 | import Data.Data
 80 | import Data.Maybe
 81 | import Data.Proxy
 82 | import Data.List
 83 | import qualified Data.Char as C
 84 | import qualified Data.Text as T
 85 | import qualified Data.Text.Lazy as L
 86 | import qualified Data.Text.Lazy.Builder as B
 87 | import Data.Foldable
 88 | import Text.Megaparsec
 89 | import Text.Megaparsec.Char
 90 | import qualified Text.Megaparsec.Char.Lexer as ML
 91 | import Data.SExpresso.SExpr
 92 | import Data.SExpresso.Parse
 93 | 
 94 | -- | The 'SchemeToken' data type defines the atoms of an Scheme R5RS
 95 | -- s-expression. An @'SExpr' 'SExprType' 'SchemeToken'@ object
 96 | -- containning the atoms 'TQuote', 'TQuasiquote', 'TComma', 'TCommaAt'
 97 | -- and 'TDot' need futher processing in order to get what the R5RS
 98 | -- report calls a datum. See also 'Datum'.
 99 | data SchemeToken =
100 |   -- | A boolean.
101 |   TBoolean Bool
102 |   -- | A number. See 'SchemeNumber'.
103 |   | TNumber SchemeNumber
104 |   -- | A unicode character.
105 |   | TChar Char
106 |   -- | A string.
107 |   | TString T.Text
108 |   -- | A valid R5RS identifier.
109 |   | TIdentifier T.Text
110 |   -- | The quote (') symbol.
111 |   | TQuote
112 |   -- | The quasiquote (`) symbol.
113 |   | TQuasiquote
114 |   -- | The comma (,) symbol.
115 |   | TComma
116 |   -- | The comma at (,\@) symbol.
117 |   | TCommaAt
118 |   -- | The dot (.) symbol.
119 |   | TDot
120 |   deriving (Eq, Show, Data)
121 | 
122 | -- | The 'tokenParser' parses a 'SchemeToken'
123 | tokenParser :: (MonadParsec e s m, Token s ~ Char) => m SchemeToken
124 | tokenParser = (boolean >>= return . TBoolean) <|>
125 |               -- character must come before number
126 |               (character >>= return . TChar) <|>
127 |               (stringParser >>= return . TString) <|>
128 |               -- We must try number because it can conflict with
129 |               -- the dot ex : .2 and (a . b)
130 |               -- and identifier ex : - and -1
131 |               (try number >>= return . TNumber) <|>
132 |               (identifier >>= return . TIdentifier) <|>
133 |               (quote >> return TQuote) <|>
134 |               (quasiquote >> return TQuasiquote) <|>
135 |               -- commaAt must come before comma
136 |               (commaAt >> return TCommaAt) <|>
137 |               (comma >> return TComma) <|>
138 |               (dot >> return TDot)
139 | 
140 | 
141 | spacingRule :: SchemeToken -> SpacingRule
142 | spacingRule (TString _) = SOptional
143 | spacingRule TQuote = SOptional
144 | spacingRule TQuasiquote  = SOptional
145 | spacingRule TComma = SOptional
146 | spacingRule TCommaAt = SOptional
147 | spacingRule _ = SMandatory
148 | 
149 | -- | Scheme R5RS defines two types of s-expressions. Standard list
150 | -- beginning with '(' and vector beginning with '#('. The 'SExprType'
151 | -- data type indicates which one was parsed.
152 | data SExprType =
153 |   -- | A standard list
154 |   STList
155 |   -- | A vector
156 |   | STVector
157 |   deriving (Eq, Show, Data)
158 | 
159 | -- | The 'sexpr' defines a 'SExprParser' to parse a Scheme R5RS
160 | -- s-expression as an @'SExpr' 'SExprType' 'SchemeToken'@. If you also
161 | -- want source position see the 'withLocation' function.
162 | --
163 | -- Space is optional before and after the following tokens:
164 | --
165 | -- * 'TString'
166 | -- * 'TQuote'
167 | -- * 'TQuasiquote'
168 | -- * 'TComma'
169 | -- * 'TCommaAt'
170 | sexpr :: forall e s m . (MonadParsec e s m, Token s ~ Char) => SExprParser m SExprType SchemeToken
171 | sexpr =
172 |   let sTag = (single '(' >> return STList) <|> (chunk (tokensToChunk (Proxy :: Proxy s) "#(") >> return STVector)
173 |       eTag = \t -> single ')' >> return t
174 |   in SExprParser sTag eTag tokenParser interTokenSpace1 (mkSpacingRule spacingRule)
175 | 
176 | -- | The 'Datum' data type implements the Scheme R5RS definition of a Datum. See also 'sexpr2Datum'.
177 | data Datum = DBoolean Bool
178 |            | DNumber SchemeNumber
179 |            | DChar Char
180 |            | DString T.Text
181 |            | DIdentifier T.Text
182 |            | DList [Datum]
183 |            | DDotList [Datum] Datum
184 |            | DQuote Datum
185 |            | DQuasiquote Datum
186 |            | DComma Datum
187 |            | DCommaAt Datum
188 |            | DVector [Datum]
189 |            deriving (Eq, Show, Data)
190 | 
191 | -- | The 'sexpr2Datum' function takes a list of 'SchemeToken' and
192 | -- returns a list of 'Datum'. In case of failure it will report an
193 | -- error, hence the 'Either' data type in the signature.
194 | --
195 | -- As defined in the Scheme R5RS report, the 'TQuote', 'TQuasiquote',
196 | -- 'TComma', 'TCommaAt' and 'TDot' tokens must be followed by another
197 | -- token.
198 | sexpr2Datum :: [SExpr SExprType SchemeToken] -> Either String [Datum]
199 | sexpr2Datum = foldrM vectorFold []
200 |   where vectorFold :: SExpr SExprType SchemeToken -> [Datum] -> Either String [Datum]
201 |         vectorFold (SAtom TQuote) [] = Left $ "Expecting a datum after a quote"
202 |         vectorFold (SAtom TQuote) (x : xs) = pure $ DQuote x : xs
203 |         vectorFold (SAtom TQuasiquote) [] = Left $ "Expecting a datum after a quasiquote"
204 |         vectorFold (SAtom TQuasiquote) (x : xs) = pure $ DQuasiquote x : xs
205 |         vectorFold (SAtom TComma) [] = Left $ "Expecting a datum after a comma"
206 |         vectorFold (SAtom TComma) (x : xs) = pure $ DComma x : xs
207 |         vectorFold (SAtom TCommaAt) [] = Left $ "Expecting a datum after a commaAt"
208 |         vectorFold (SAtom TCommaAt) (x : xs) = pure $ DCommaAt x : xs
209 |         vectorFold (SAtom TDot) _ = Left "Unexpected dot"
210 |         vectorFold (SList STVector xs) acc = ((:) . DVector) <$> sexpr2Datum xs <*> pure acc
211 |         vectorFold (SList STList xs) acc =
212 |           let chooseConstructor (isDotList, ls) = (:) (if isDotList
213 |                                                        then DDotList (init ls) (last ls)
214 |                                                        else DList ls)
215 |           in chooseConstructor <$> (foldrM listFold (False, []) xs) <*> pure acc
216 |         vectorFold (SAtom x) acc = pure $ simpleToken x : acc
217 | 
218 |         simpleToken :: SchemeToken -> Datum
219 |         simpleToken (TBoolean x) = DBoolean x
220 |         simpleToken (TNumber x) = DNumber x
221 |         simpleToken (TChar x) = DChar x
222 |         simpleToken (TString x) = DString x
223 |         simpleToken (TIdentifier x) = DIdentifier x
224 |         simpleToken _ = error "simpleToken only handles a subset of SchemeToken constructors"
225 | 
226 |         listFold :: SExpr SExprType SchemeToken -> (Bool, [Datum]) -> Either String (Bool, [Datum])
227 |         listFold (SAtom TDot) (_, [x]) = pure (True, [x])
228 |         listFold x (d, acc) = (,) d <$> vectorFold x acc
229 | 
230 | ------------------------- Whitespace and comments -------------------------
231 | -- | The 'whitespace' parser  parses one space, tab or end of line (\\n and \\r\\n).
232 | whitespace :: (MonadParsec e s m, Token s ~ Char) => m ()
233 | whitespace = (char ' ' >> return ()) <|>
234 |              (char '\t' >> return ()) <|>
235 |              (eol >> return ())
236 | 
237 | -- | The 'comment' parser parses a semi-colon (;) character and
238 | -- everything until the end of line included.
239 | comment :: (MonadParsec e s m, Token s ~ Char) => m ()
240 | comment = char ';' >>
241 |           takeWhileP Nothing (\c -> c /= '\n' && c /= '\r') >>
242 |           ((eol >> return ()) <|> eof)
243 | 
244 | atmosphere :: (MonadParsec e s m, Token s ~ Char) => m ()
245 | atmosphere = whitespace <|> comment
246 | 
247 | -- | The 'interTokenSpace' parser parses zero or more whitespace or comment.
248 | interTokenSpace :: (MonadParsec e s m, Token s ~ Char) => m ()
249 | interTokenSpace = many atmosphere >> return ()
250 | 
251 | -- | The 'interTokenSpace1' parser parses one or more whitespace or comment.
252 | interTokenSpace1 :: (MonadParsec e s m, Token s ~ Char) => m ()
253 | interTokenSpace1 = some atmosphere >> return ()
254 | 
255 | ------------------------- Identifier -------------------------
256 | 
257 | -- | The 'identifier' parser parses a Scheme R5RS identifier.
258 | identifier :: forall e s m . (MonadParsec e s m, Token s ~ Char) => m T.Text
259 | identifier = standardIdentifier <|> peculiarIdentifier
260 |   where standardIdentifier = do
261 |           i <- oneOf initialList
262 |           is <- takeWhileP Nothing (\c -> c `elem` subsequentList)
263 |           return $ T.pack $ (i : chunkToTokens (Proxy :: Proxy s) is)
264 | 
265 | initialList :: String
266 | initialList = ['a'..'z'] ++ ['A'..'Z'] ++ "!$%&*/:<=>?^_~"
267 | 
268 | subsequentList :: String
269 | subsequentList = initialList ++ ['0'..'9'] ++ "+-.@"
270 | 
271 | peculiarIdentifier :: forall e s m . (MonadParsec e s m, Token s ~ Char) => m T.Text
272 | peculiarIdentifier = (single '+' >> return "+") <|>
273 |                      (single '-' >> return "-") <|>
274 |                      (chunk (tokensToChunk (Proxy :: Proxy s) "...") >> return "...")
275 | 
276 | ------------------------- Booleans -------------------------
277 | -- | The 'boolean' parser parses a Scheme R5RS boolean (\#t or \#f).
278 | boolean :: forall e s m . (MonadParsec e s m, Token s ~ Char) => m Bool
279 | boolean = (chunk (tokensToChunk (Proxy :: Proxy s) "#t") >> return True) <|>
280 |           (chunk (tokensToChunk (Proxy :: Proxy s) "#f") >> return False)
281 | 
282 | 
283 | ------------------------- Character -------------------------
284 | -- | The 'character' parser parses a Scheme R5RS character.
285 | character :: forall e s m . (MonadParsec e s m, Token s ~ Char) => m Char
286 | character = do
287 |   _ <- chunk (tokensToChunk (Proxy :: Proxy s) "#\\")
288 |   (chunk (tokensToChunk (Proxy :: Proxy s) "newline") >> return '\n') <|>
289 |    (chunk (tokensToChunk (Proxy :: Proxy s) "space") >> return ' ') <|>
290 |    anySingle
291 | 
292 | ------------------------- String -------------------------
293 | -- | The 'stringParser' parser parses a Scheme R5RS character.
294 | stringParser :: forall e s m . (MonadParsec e s m, Token s ~ Char) => m T.Text
295 | stringParser = do
296 |   _ <- char '"'
297 |   xs <- consume
298 |   return $ L.toStrict $ B.toLazyText xs
299 | 
300 |   where consume :: (MonadParsec e s m, Token s ~ Char) => m B.Builder
301 |         consume = do
302 |           x <- takeWhileP Nothing (\c -> c /= '\\' && c /= '"')
303 |           c <- char '\\' <|> char '"'
304 |           let xB = B.fromString $ chunkToTokens (Proxy :: Proxy s) x
305 |           case c of
306 |             '"' -> return $ xB
307 |             _ -> do
308 |                c1 <- char '\\' <|> char '"'
309 |                x2 <- consume
310 |                return $ xB <> B.fromString [c1] <> x2
311 | 
312 | 
313 | ------------------------- Numbers -------------------------
314 | data Radix = R2 | R8 | R10 | R16
315 |            deriving (Eq, Show, Data)
316 | 
317 | -- | A Scheme R5RS number is either exact or inexact. The paragraph
318 | -- 6.4.2 from the R5RS report should clarify the meaning of exact and
319 | -- inexact :
320 | --
321 | -- \"\"\"A numerical constant may be specified to be either
322 | -- exact or inexact by a prefix.  The prefixes are \#e for exact, and \#i
323 | -- for inexact.  An exactness prefix may appear before or after any
324 | -- radix prefix that is used.  If the written representation of a
325 | -- number has no exactness prefix, the constant may be either inexact
326 | -- or exact.  It is inexact if it contains a decimal point, an
327 | -- exponent, or a \“#\” character in the place of a digit, otherwise it
328 | -- is exact.\"\"\"
329 | data Exactness = Exact | Inexact
330 |                deriving (Eq, Show, Data)
331 | 
332 | -- | The 'Sign' datatype indicates if a number is positive ('Plus') or negative ('Minus')
333 | data Sign = Plus | Minus
334 |           deriving (Eq, Show, Data)
335 | 
336 | -- | A Scheme R5RS number can have many # signs at the end. This type alias
337 | -- indicates the number of # signs parsed.
338 | type Pounds = Integer
339 | 
340 | -- | A Scheme R5RS unsigned integer can be written in three ways.
341 | --
342 | -- * With digits only
343 | -- * With digits and # signs
344 | -- * With only # signs in some special context.
345 | data UInteger =
346 |   -- | Integer made only of digits
347 |   UInteger Integer
348 |   -- | Integer made of digits and #. The first argument is the number
349 |   -- that was parsed and the second the number of # signs. For
350 |   -- example, 123## is represented as @UIntPounds 123 2@. Do not take
351 |   -- the first argument as a good approximation of the number. It
352 |   -- needs to be shifted by the number of pounds.
353 |   | UIntPounds Integer Pounds
354 |   -- | Integer made only of #. It can only appear as the third argument in numbers of the form @'SDecimal' _ _ _ _@.
355 |   | UPounds Pounds
356 |   deriving (Eq, Show, Data)
357 | 
358 | hasPounds :: UInteger -> Bool
359 | hasPounds (UInteger _) = False
360 | hasPounds _ = True
361 | 
362 | isInexactI :: UInteger -> Bool
363 | isInexactI = hasPounds
364 | 
365 | -- | Scheme R5RS defines 5 types of machine precision for a decimal
366 | -- number. The machine precision is specified in the suffix (see
367 | -- 'Suffix').
368 | data Precision =
369 |   -- | Suffix starting with e.
370 |   PDefault |
371 |   -- | Suffix starting with s.
372 |   PShort |
373 |   -- | Suffix starting with f.
374 |   PSingle |
375 |   -- | Suffix starting with d.
376 |   PDouble |
377 |   -- | Suffix starting with l.
378 |   PLong
379 |   deriving (Eq, Show, Data)
380 | 
381 | -- | The 'Suffix' data type represents the suffix for a Scheme R5RS
382 | -- decimal number. It is a based 10 exponent.
383 | data Suffix = Suffix Precision Sign Integer
384 |             deriving (Eq, Show, Data)
385 | 
386 | -- | The 'SReal' data type represents a Scheme R5RS real number.
387 | data SReal =
388 |   -- | A signed integer.
389 |   SInteger Sign UInteger
390 |   -- | A signed rational. The first number is the numerator and the
391 |   -- second one the denominator.
392 |   | SRational Sign UInteger UInteger
393 |   -- | A signed decimal number. The first number appears before the
394 |   -- dot, the second one after the dot.
395 |   | SDecimal Sign UInteger UInteger (Maybe Suffix)
396 |   deriving (Eq, Show, Data)
397 | 
398 | isInexactR :: SReal -> Bool
399 | isInexactR (SInteger _ i) = isInexactI i
400 | isInexactR (SRational _ i1 i2) = isInexactI i1 || isInexactI i2
401 | isInexactR (SDecimal _ _ _ _) = True
402 | 
403 | -- | The 'Complex' data type represents a Scheme R5RS complex number.
404 | data Complex =
405 |   -- | A real number.
406 |   CReal SReal
407 |   -- | A complex number in angular notation.
408 |   | CAngle SReal SReal
409 |   -- | A complex number in absolute notation.
410 |   | CAbsolute SReal SReal
411 |   deriving (Eq, Show, Data)
412 | 
413 | isInexact :: Complex -> Bool
414 | isInexact (CReal s) = isInexactR s
415 | isInexact (CAngle s1 s2) = isInexactR s1 || isInexactR s2
416 | isInexact (CAbsolute s1 s2) = isInexactR s1 || isInexactR s2
417 | 
418 | -- | A Scheme R5RS number is an exact or inexact complex number.
419 | data SchemeNumber = SchemeNumber Exactness Complex
420 |                   deriving (Eq, Show, Data)
421 | 
422 | -- | The 'number' parser parses a Scheme R5RS number.
423 | number :: (MonadParsec e s m, Token s ~ Char) => m SchemeNumber
424 | number = do
425 |   (r, e) <- prefix
426 |   c <- complex (fromMaybe R10 r)
427 |   let e' = fromMaybe (if isInexact c then Inexact else Exact) e
428 |   return $ SchemeNumber e'  c
429 | 
430 | complex :: forall e s m . (MonadParsec e s m, Token s ~ Char) => Radix -> m Complex
431 | complex r = do
432 |   ms <- optional sign
433 |   case ms of
434 |     Nothing -> complex' Plus
435 |     Just s -> i s <|> complex' s
436 | 
437 |   where
438 |     -- Parser for +i and -i
439 |     i s = char 'i' >> (return $ CAbsolute (SInteger Plus (UInteger 0)) (SInteger s (UInteger 1)))
440 | 
441 |     -- Parser for complex except +i and -i
442 |     complex' sr = do
443 |       -- First parse a number
444 |       n1 <- ureal r sr
445 |       -- Check if the number is followed by any of these characters
446 |       c <- optional (char '@' <|> char '+' <|> char '-' <|> char 'i')
447 |       case c of
448 |           -- Plain real number
449 |           Nothing -> return $ CReal n1
450 |           -- Complex angular number
451 |           Just '@' -> do
452 |             n2 <- real r
453 |             return $ CAngle n1 n2
454 |           -- Pure imaginary number
455 |           Just 'i' -> return $ CAbsolute (SInteger Plus (UInteger 0)) n1
456 |           -- Real +/- Imaginary number
457 |           Just '+' -> imaginaryPart n1 Plus
458 |           Just _ -> imaginaryPart n1 Minus
459 | 
460 |     imaginaryPart realN si = do
461 |       u <- optional (ureal r si)
462 |       _ <- char 'i'
463 |       case u of
464 |         Nothing -> return $ CAbsolute realN (SInteger si (UInteger 1))
465 |         Just n2 -> return $ CAbsolute realN n2
466 | 
467 | real :: (MonadParsec e s m, Token s ~ Char) => Radix -> m SReal
468 | real r = do
469 |   s <- option Plus sign
470 |   ureal r s
471 | 
472 | ureal :: forall e s m . (MonadParsec e s m, Token s ~ Char) => Radix -> Sign -> m SReal
473 | ureal r s = dotN <|> ureal'
474 | 
475 |   where dotN =  do
476 |           _ <- char '.'
477 |           if r /= R10
478 |           then label "Numbers containing decimal point must be in decimal radix" mzero
479 |           else do
480 |              n <- uinteger R10
481 |              sf <- optional suffix
482 |              return $ SDecimal s (UInteger 0) n sf
483 | 
484 |         ureal' = do
485 |           -- First parse an integer
486 |           u1 <- uinteger r
487 |           -- Check if the integer is followed by these characters
488 |           mc <- optional (char '/' <|> char '.')
489 |           case mc of
490 |             -- Integer with or without suffix
491 |             Nothing -> plainInteger u1
492 |             -- Rational
493 |             Just '/' -> rational u1
494 |             -- Decimal
495 |             Just _ -> decimal u1
496 | 
497 |         plainInteger u1 = do
498 |             sf <- optional suffix
499 |             case sf of
500 |               Just _ -> return $ SDecimal s u1 (UInteger 0) sf
501 |               Nothing -> return $ SInteger s u1
502 | 
503 |         rational u1 = do
504 |           u2 <- uinteger r
505 |           return $ SRational s u1 u2
506 | 
507 |         decimal u1 = do
508 |           if r /= R10
509 |           then label "Numbers containing decimal point must be in decimal radix" mzero
510 |           else do
511 |              -- If u1 has # character, only other # are
512 |              -- allowed. Otherwise a number may be present
513 |              n <- if hasPounds u1 then return Nothing else optional (udigit R10) :: m (Maybe Integer)
514 |              pounds <- takeWhileP Nothing (== '#')
515 |              sf <- optional suffix
516 |              let nbPounds = toInteger $ chunkLength (Proxy :: Proxy s) pounds
517 |              let u2 = case (hasPounds u1, nbPounds, n) of
518 |                          (True, p, _) -> UPounds p
519 |                          (False, 0, Nothing) -> UInteger 0
520 |                          (False, 0, (Just x)) -> UInteger x
521 |                          (False, p, Nothing) -> UPounds p
522 |                          (False, p, (Just x)) -> UIntPounds x p
523 |              return $ SDecimal s u1 u2 sf
524 | 
525 | uinteger :: forall e s m . (MonadParsec e s m, Token s ~ Char) => Radix -> m UInteger
526 | uinteger r = do
527 |   n <- udigit r
528 |   pounds <- takeWhileP Nothing (== '#')
529 |   let nbPounds = toInteger $ chunkLength (Proxy :: Proxy s) pounds
530 |   if nbPounds <= 0
531 |   then return $ UInteger n
532 |   else return $ UIntPounds n nbPounds
533 | 
534 | 
535 | prefix :: (MonadParsec e s m, Token s ~ Char) => m (Maybe Radix, Maybe Exactness)
536 | prefix = do
537 |   x <- optional $ char '#'
538 |   case x of
539 |     Nothing -> return (Nothing, Nothing)
540 |     _ -> do
541 |       c <- char 'i' <|> char 'e' <|> char 'b' <|>
542 |            char 'o' <|> char 'd' <|> char 'x'
543 |       case c of
544 |         'i' -> optional radix >>= \r -> return (r, Just Inexact)
545 |         'e' -> optional radix >>= \r -> return (r, Just Exact)
546 |         'b' -> optional exactness >>= \e -> return (Just R2, e)
547 |         'o' -> optional exactness >>= \e -> return (Just R8, e)
548 |         'd' -> optional exactness >>= \e -> return (Just R10, e)
549 |         _ -> optional exactness >>= \e -> return (Just R16, e)
550 | 
551 | exactness :: forall e s m . (MonadParsec e s m, Token s ~ Char) => m Exactness
552 | exactness = (chunk (tokensToChunk (Proxy :: Proxy s) "#e") >> return Exact) <|>
553 |             (chunk (tokensToChunk (Proxy :: Proxy s) "#i") >> return Inexact)
554 | 
555 | radix :: forall e s m . (MonadParsec e s m, Token s ~ Char) => m  Radix
556 | radix =
557 |   (chunk (tokensToChunk (Proxy :: Proxy s) "#b") >> return R2) <|>
558 |   (chunk (tokensToChunk (Proxy :: Proxy s) "#o") >> return R8) <|>
559 |   (chunk (tokensToChunk (Proxy :: Proxy s) "#d") >> return R10) <|>
560 |   (chunk (tokensToChunk (Proxy :: Proxy s) "#x") >> return R16)
561 | 
562 | udigit :: forall e s m a . (MonadParsec e s m, Token s ~ Char, Integral a) => Radix -> m a
563 | udigit r = do
564 |   case r of
565 |     R2 -> ML.binary
566 |     R8 -> ML.octal
567 |     R10 -> ML.decimal
568 |     R16 -> hexadecimal -- ML.hexadecimal also parses uppercase "ABCDEF"
569 |   where hexadecimal = mkNum
570 |                       <$> takeWhile1P Nothing (\c -> c `elem` ("0123456789abcdef" :: String))
571 |                       <?> "hexadecimal integer"
572 | 
573 |         mkNum    = foldl' step 0 . chunkToTokens (Proxy :: Proxy s)
574 |         step a c = a * 16 + fromIntegral (C.digitToInt c)
575 | 
576 | sign :: (MonadParsec e s m, Token s ~ Char) => m  Sign
577 | sign = (char '-' >> return Minus) <|> (char '+' >> return Plus)
578 | 
579 | suffix :: (MonadParsec e s m, Token s ~ Char) => m Suffix
580 | suffix = do
581 |   p <- (char 'e' >> return PDefault)  <|>
582 |        (char 's' >> return PShort)  <|>
583 |        (char 'f' >> return PSingle) <|>
584 |        (char 'd' >> return PDouble) <|>
585 |        (char 'l' >> return PLong)
586 |   s <- option Plus sign
587 |   n <- udigit R10
588 |   return $ Suffix p s n
589 | 
590 | ------------------------- Other tokens -------------------------
591 | -- | The 'quote' parser parses a quote character (').
592 | quote :: (MonadParsec e s m, Token s ~ Char) => m Char
593 | quote = char '\''
594 | 
595 | -- | The 'quasiquote' parser parses a quasiquote character (`).
596 | quasiquote :: (MonadParsec e s m, Token s ~ Char) => m Char
597 | quasiquote = char '`'
598 | 
599 | -- | The 'comma' parser parses a comma (,).
600 | comma :: (MonadParsec e s m, Token s ~ Char) => m Char
601 | comma = char ','
602 | 
603 | -- | The 'commaAt' parser parses a comma followed by \@ (,\@).
604 | commaAt :: forall e s m . (MonadParsec e s m, Token s ~ Char) => m T.Text
605 | commaAt = chunk (tokensToChunk (Proxy :: Proxy s) ",@") >> return ",@"
606 | 
607 | -- | The 'dot' parser parses a single dot character (.).
608 | dot :: (MonadParsec e s m, Token s ~ Char) => m Char
609 | dot = char '.'
610 | 


--------------------------------------------------------------------------------