├── Setup.hs ├── .gitignore ├── test ├── Spec.hs ├── Print_Unittests.hs ├── SExpr_Unittests.hs ├── Parse_Unittests.hs └── SchemeR5RS_Unittests.hs ├── LICENSE ├── src └── Data │ └── SExpresso │ ├── Parse.hs │ ├── Print.hs │ ├── Parse │ ├── Char.hs │ ├── Location.hs │ └── Generic.hs │ ├── Print │ └── Lazy.hs │ ├── SExpr.hs │ └── Language │ └── SchemeR5RS.hs ├── package.yaml ├── ChangeLog.md ├── stack.yaml └── README.md /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .stack-work/ 2 | sexpresso.cabal 3 | *~ 4 | .#* 5 | *.lock 6 | -------------------------------------------------------------------------------- /test/Spec.hs: -------------------------------------------------------------------------------- 1 | 2 | module Main where 3 | 4 | import Test.Tasty 5 | import SExpr_Unittests 6 | import Parse_Unittests 7 | import Print_Unittests 8 | import SchemeR5RS_Unittests 9 | 10 | main :: IO () 11 | main = defaultMain tests 12 | 13 | tests :: TestTree 14 | tests = testGroup "tests" [sexpTestTree, parseTestTree, printTestTree, r5rsTestTree] 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Zero-Clause BSD 2 | 3 | Permission to use, copy, modify, and/or distribute this software for 4 | any purpose with or without fee is hereby granted. 5 | 6 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL 7 | WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED 8 | WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE 9 | AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL 10 | DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR 11 | PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER 12 | TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 13 | PERFORMANCE OF THIS SOFTWARE. 14 | -------------------------------------------------------------------------------- /src/Data/SExpresso/Parse.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : Data.SExpresso.Parse 3 | -- Copyright : © 2019 Vincent Archambault 4 | -- License : 0BSD 5 | -- 6 | -- Maintainer : Vincent Archambault 7 | -- Stability : experimental 8 | -- 9 | -- This module re-exports everything from 10 | -- "Data.SExpresso.Parse.Generic", "Data.SExpresso.Parse.Char" and 11 | -- "Data.SExpresso.Parse.Location". 12 | 13 | module Data.SExpresso.Parse 14 | ( 15 | module Data.SExpresso.Parse.Generic, 16 | module Data.SExpresso.Parse.Char, 17 | module Data.SExpresso.Parse.Location 18 | ) 19 | where 20 | 21 | import Data.SExpresso.Parse.Generic 22 | import Data.SExpresso.Parse.Location 23 | import Data.SExpresso.Parse.Char 24 | -------------------------------------------------------------------------------- /src/Data/SExpresso/Print.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : Data.SExpresso.Print 3 | -- Copyright : © 2019 Vincent Archambault 4 | -- License : 0BSD 5 | -- 6 | -- Maintainer : Vincent Archambault 7 | -- Stability : experimental 8 | -- 9 | -- Printing 'SExpr' as 'Data.Text'. To print as lazy text 10 | -- ("Data.Text.Lazy") see "Data.Sexpresso.Print.Lazy" 11 | 12 | module Data.SExpresso.Print ( 13 | PL.SExprPrinter(..), 14 | PL.mkPrinter, 15 | flatPrint 16 | ) where 17 | 18 | import qualified Data.Text as T 19 | import qualified Data.Text.Lazy as L 20 | import Data.SExpresso.SExpr 21 | import qualified Data.SExpresso.Print.Lazy as PL 22 | 23 | -- | Prints an 'SExpr' on a single line 24 | flatPrint :: PL.SExprPrinter b a -> SExpr b a -> T.Text 25 | flatPrint p s = L.toStrict $ PL.flatPrint p s 26 | -------------------------------------------------------------------------------- /src/Data/SExpresso/Parse/Char.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : Data.SExpresso.Parse.Char 3 | -- Copyright : © 2019 Vincent Archambault 4 | -- License : 0BSD 5 | -- 6 | -- Maintainer : Vincent Archambault 7 | -- Stability : experimental 8 | -- 9 | -- The module "Data.SExpresso.Parse" re-exports the functions of this 10 | -- module. 11 | 12 | {-# LANGUAGE OverloadedStrings #-} 13 | {-# LANGUAGE TypeFamilies #-} 14 | 15 | module Data.SExpresso.Parse.Char 16 | ( 17 | plainSExprParser 18 | ) 19 | where 20 | 21 | import Text.Megaparsec 22 | import Text.Megaparsec.Char 23 | import Data.SExpresso.Parse.Generic 24 | 25 | 26 | -- | The function 'plainSExprParser' accepts a parser for atoms and 27 | -- returns a 'SExprParser' for a stream of 'Char' with the following 28 | -- properties : 29 | -- 30 | -- * The opening tag is (. 31 | -- * The closing tag is ). 32 | -- * The space parser is 'space1'. 33 | -- * Space is always mandatory between atoms. 34 | plainSExprParser :: (MonadParsec e s m, Token s ~ Char) => 35 | m a -> SExprParser m () a 36 | plainSExprParser p = SExprParser 37 | (char '(' >> return ()) 38 | (\_ -> char ')' >> return ()) 39 | p 40 | space1 41 | spaceIsMandatory 42 | -------------------------------------------------------------------------------- /package.yaml: -------------------------------------------------------------------------------- 1 | name: sexpresso 2 | version: 1.2.5.0 3 | github: "archambaultv/sexpresso" 4 | license: OtherLicense # 0BSD 5 | author: "Vincent Archambault-Bouffard" 6 | maintainer: "archambault.v@gmail.com" 7 | copyright: "Vincent Archambault-Bouffard" 8 | 9 | extra-source-files: 10 | - README.md 11 | - ChangeLog.md 12 | 13 | # Metadata used when publishing your package 14 | synopsis: A flexible library for parsing and printing S-expression 15 | category: Data 16 | 17 | # To avoid duplicated efforts in documentation and dealing with the 18 | # complications of embedding Haddock markup inside cabal files, it is 19 | # common to point users to the README.md file. 20 | description: Please see the README on GitHub at 21 | 22 | dependencies: 23 | - base >= 4.7 && < 5 24 | - text >= 0.2 && < 2.2 25 | - megaparsec >= 7.0 && < 9.7 26 | - containers >= 0.5 && < 0.8 27 | - bifunctors >= 5.5 && < 5.7 28 | - recursion-schemes >= 5.1 && < 5.3 29 | 30 | library: 31 | source-dirs: src 32 | 33 | tests: 34 | sexpresso-test: 35 | main: Spec.hs 36 | source-dirs: test 37 | ghc-options: 38 | - -threaded 39 | - -rtsopts 40 | - -with-rtsopts=-N 41 | dependencies: 42 | - sexpresso 43 | - tasty >= 0.8 44 | - tasty-hunit >= 0.10.0.1 45 | - tasty-smallcheck >= 0.8 46 | - smallcheck >= 1.0 47 | 48 | ghc-options: 49 | - -Wall 50 | -------------------------------------------------------------------------------- /ChangeLog.md: -------------------------------------------------------------------------------- 1 | # Changelog for S-expresso 2 | 3 | Version 1.2.5.0 4 | --------------- 5 | * Relax upperbounds. 6 | 7 | Version 1.2.4.0 8 | --------------- 9 | * Build with `bifunctors-5.6` 10 | 11 | Version 1.2.3.0 12 | --------------- 13 | * Build with `text-2.0` 14 | 15 | Version 1.2.2.0 16 | --------------- 17 | * Build with `megaparsec-9.3.0` 18 | 19 | Version 1.2.1.0 20 | --------------- 21 | * Add `Data` instances. [#12](https://github.com/archambaultv/sexpresso/pull/12). 22 | 23 | Version 1.2.0.0 24 | --------------- 25 | * Update to stack lts 18.10 26 | * Thanks to Ollie Charles from asking for this update 27 | 28 | Version 1.1.0.0 29 | --------------- 30 | 31 | * Add startPosPretty and endPosPretty function 32 | * Add Bifunctor, Bifoldable and Bitraversable instances for SExpr 33 | * Add Base SExpr, Recursive, Corecursive instances (see package recursion-schemes) 34 | * Add Functor instance for Located 35 | * Fix SExprPrinter constructor name (SExprParser -> SExprPrinter) 36 | * Improve documentation 37 | * Merge [pull request \#6](https://github.com/archambaultv/sexpresso/pull/6) to prepare for MonadFail 38 | * Fix bug with R5RS negative number (issue \#7 on [github](https://github.com/archambaultv/sexpresso/issues/7)) 39 | * Tested with stack version 14.27 and 15.3 40 | 41 | Version 1.0.0.2 42 | --------------- 43 | 44 | * Initial Hackage Release 45 | * SExpr datatype for representing S-expression 46 | * Generic SExpr parser 47 | * Specialized SExpr parser for character 48 | * SExpr flat printer 49 | * Scheme R5RS parser implementation 50 | -------------------------------------------------------------------------------- /src/Data/SExpresso/Parse/Location.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : Data.SExpresso.Parse.Location 3 | -- Copyright : © 2019 Vincent Archambault 4 | -- License : 0BSD 5 | -- 6 | -- Maintainer : Vincent Archambault 7 | -- Stability : experimental 8 | -- 9 | -- The module "Data.SExpresso.Parse" re-exports the functions and 10 | -- datatypes of this module. 11 | 12 | {-# LANGUAGE DeriveFunctor #-} 13 | 14 | module Data.SExpresso.Parse.Location 15 | ( 16 | Location(..), 17 | Located(..), 18 | located, 19 | startPosPretty, 20 | endPosPretty 21 | ) 22 | where 23 | 24 | import Text.Megaparsec 25 | 26 | -- Taken from https://www.reddit.com/r/haskell/comments/4x22f9/labelling_ast_nodes_with_locations/d6cmdy9/ 27 | 28 | -- | The 'Location' datatype represents a source span 29 | data Location = Span SourcePos SourcePos 30 | deriving (Eq, Ord, Show) 31 | 32 | -- | Pretty prints @S1@ of a @'Span' S1 _@ object with 'sourcePosPretty' 33 | startPosPretty :: Location -> String 34 | startPosPretty (Span s _) = sourcePosPretty s 35 | 36 | -- | Pretty prints @S2@ of a @'Span' _ S2@ object with 'sourcePosPretty' 37 | endPosPretty :: Location -> String 38 | endPosPretty (Span _ s) = sourcePosPretty s 39 | 40 | -- | The 'Located' datatype adds a source span to the type @a@ 41 | data Located a = At Location a 42 | deriving (Eq, Ord, Show, Functor) 43 | 44 | -- | The 'located' function adds a source span to a parser. 45 | located :: (MonadParsec e s m, TraversableStream s) => m a -> m (Located a) 46 | located parser = do 47 | begin <- getSourcePos 48 | result <- parser 49 | end <- getSourcePos 50 | return $ At (Span begin end) result 51 | -------------------------------------------------------------------------------- /src/Data/SExpresso/Print/Lazy.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : Data.SExpresso.Print.Lazy 3 | -- Copyright : © 2019 Vincent Archambault 4 | -- License : 0BSD 5 | -- 6 | -- Maintainer : Vincent Archambault 7 | -- Stability : experimental 8 | -- 9 | -- Printing 'SExpr' as 'Data.Text.Lazy'. To print as strict text 10 | -- ("Data.Text") see "Data.Sexpresso.Print" 11 | 12 | {-# LANGUAGE OverloadedStrings #-} 13 | 14 | module Data.SExpresso.Print.Lazy ( 15 | SExprPrinter(..), 16 | mkPrinter, 17 | flatPrint, 18 | flatPrintBuilder 19 | ) where 20 | 21 | import qualified Data.Text as T 22 | import qualified Data.Text.Lazy as L 23 | import qualified Data.Text.Lazy.Builder as B 24 | import Data.SExpresso.SExpr 25 | 26 | -- | The 'SExprPrinter' defines how to print an 'SExpr'. 27 | data SExprPrinter b a = SExprPrinter { 28 | -- | The opening and closing tags based on the content of the 'SList' 29 | printTags :: b -> [SExpr b a] -> (T.Text, T.Text), 30 | -- | How to print an atom 31 | printAtom :: a -> T.Text 32 | } 33 | 34 | -- | An 'SExprPrinter' with the opening tag defined as '(' and the 35 | -- closing tag defined as ')' 36 | mkPrinter :: (a -> T.Text) -> SExprPrinter b a 37 | mkPrinter p = SExprPrinter (\_ _ -> ("(", ")")) p 38 | 39 | -- | Prints an 'SExpr' on a single line. Returns a 'B.Builder' instead of a lazy text 'L.Text' 40 | flatPrintBuilder :: SExprPrinter b a -> SExpr b a -> B.Builder 41 | flatPrintBuilder p (SAtom a) = B.fromText $ printAtom p a 42 | flatPrintBuilder p (SList b xs) = 43 | let (sTag, eTag) = printTags p b xs 44 | in B.fromText sTag <> flatPrintList xs <> B.fromText eTag 45 | 46 | where flatPrintList [] = B.fromText "" 47 | flatPrintList [x] = flatPrintBuilder p x 48 | flatPrintList (y : ys) = flatPrintBuilder p y <> B.fromText " " <> flatPrintList ys 49 | 50 | -- | Prints an 'SExpr' on a single line 51 | flatPrint :: SExprPrinter b a -> SExpr b a -> L.Text 52 | flatPrint p s = B.toLazyText $ flatPrintBuilder p s 53 | -------------------------------------------------------------------------------- /test/Print_Unittests.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE FlexibleInstances, MultiParamTypeClasses #-} 3 | 4 | module Print_Unittests ( 5 | printTestTree 6 | )where 7 | 8 | import Data.Void 9 | import Test.Tasty 10 | import Test.Tasty.HUnit 11 | import Test.Tasty.SmallCheck as SC 12 | import Test.SmallCheck.Series 13 | import Text.Megaparsec 14 | import Text.Megaparsec.Char 15 | import qualified Data.Text as T 16 | import Data.SExpresso.SExpr 17 | import Data.SExpresso.Print 18 | import Data.SExpresso.Parse 19 | 20 | type Parser = Parsec Void T.Text 21 | 22 | instance (Serial m b, Serial m a) => Serial m (SExpr b a) where 23 | series = cons1 SAtom \/ cons2 SList 24 | 25 | printer :: SExprPrinter () Integer 26 | printer = mkPrinter (T.pack . show) 27 | 28 | pDigit :: Parser Integer 29 | pDigit = do 30 | sign <- optional (char '-') 31 | n <- fmap read (some digitChar) 32 | case sign of 33 | Nothing -> return n 34 | Just _ -> return (-1 * n) 35 | 36 | sexpParser :: SExprParser Parser () Integer 37 | sexpParser = plainSExprParser pDigit 38 | 39 | printTestTree :: TestTree 40 | printTestTree = testGroup "Print.hs unit tests" $ 41 | [testGroup "flatPrint" [ 42 | testCase "Empty SList" $ flatPrint printer (SList () [] :: Sexp Integer) @?= "()", 43 | testCase "Singleton SList" $ flatPrint printer (SList () [SAtom 1] :: Sexp Integer) @?= "(1)", 44 | testCase "SList 1/3" $ flatPrint printer (SList () [SAtom 1, SAtom 2, SAtom 3] :: Sexp Integer) @?= "(1 2 3)", 45 | testCase "SList 2/3" $ flatPrint printer (SList () [SAtom 1, SList () [SAtom 2], SAtom 3] :: Sexp Integer) @?= "(1 (2) 3)", 46 | testCase "SList 3/3" $ flatPrint printer (SList () [SList () [SAtom 1], SAtom 2, SList () [SAtom 3]] :: Sexp Integer) @?= "((1) 2 (3))", 47 | testCase "SAtom" $ flatPrint printer (SAtom 3 :: Sexp Integer) @?= "3", 48 | SC.testProperty "decodeOne inverse of flatPrint" $ 49 | \s -> parse (decodeOne sexpParser) "" (flatPrint printer (s :: Sexp Integer)) == Right s 50 | ] 51 | ] 52 | -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | # This file was automatically generated by 'stack init' 2 | # 3 | # Some commonly used options have been documented as comments in this file. 4 | # For advanced use and comprehensive documentation of the format, please see: 5 | # https://docs.haskellstack.org/en/stable/yaml_configuration/ 6 | 7 | # Resolver to choose a 'specific' stackage snapshot or a compiler version. 8 | # A snapshot resolver dictates the compiler version and the set of packages 9 | # to be used for project dependencies. For example: 10 | # 11 | # resolver: lts-3.5 12 | # resolver: nightly-2015-09-21 13 | # resolver: ghc-7.10.2 14 | # 15 | # The location of a snapshot can be provided as a file or url. Stack assumes 16 | # a snapshot provided as a file might change, whereas a url resource does not. 17 | # 18 | # resolver: ./custom-snapshot.yaml 19 | # resolver: https://example.com/snapshots/2018-01-01.yaml 20 | resolver: lts-18.10 21 | 22 | # User packages to be built. 23 | # Various formats can be used as shown in the example below. 24 | # 25 | # packages: 26 | # - some-directory 27 | # - https://example.com/foo/bar/baz-0.0.2.tar.gz 28 | # subdirs: 29 | # - auto-update 30 | # - wai 31 | packages: 32 | - . 33 | # Dependency packages to be pulled from upstream that are not in the resolver. 34 | # These entries can reference officially published versions as well as 35 | # forks / in-progress versions pinned to a git hash. For example: 36 | # 37 | # extra-deps: 38 | # - acme-missiles-0.3 39 | # - git: https://github.com/commercialhaskell/stack.git 40 | # commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a 41 | # 42 | # extra-deps: [] 43 | 44 | # Override default flag values for local packages and extra-deps 45 | # flags: {} 46 | 47 | # Extra package databases containing global packages 48 | # extra-package-dbs: [] 49 | 50 | # Control whether we use the GHC we find on the path 51 | # system-ghc: true 52 | # 53 | # Require a specific version of stack, using version ranges 54 | # require-stack-version: -any # Default 55 | # require-stack-version: ">=2.1" 56 | # 57 | # Override the architecture used by stack, especially useful on Windows 58 | # arch: i386 59 | # arch: x86_64 60 | # 61 | # Extra directories used by stack for building 62 | # extra-include-dirs: [/path/to/dir] 63 | # extra-lib-dirs: [/path/to/dir] 64 | # 65 | # Allow a newer minor version of GHC than the snapshot specifies 66 | # compiler-check: newer-minor 67 | -------------------------------------------------------------------------------- /test/SExpr_Unittests.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleInstances #-} 2 | {-# LANGUAGE MultiParamTypeClasses #-} 3 | 4 | module SExpr_Unittests ( 5 | sexpTestTree 6 | )where 7 | 8 | import Test.Tasty 9 | import Test.Tasty.HUnit 10 | import Data.SExpresso.SExpr 11 | 12 | sexpTestTree :: TestTree 13 | sexpTestTree = testGroup "Sexpr.hs unit tests" 14 | 15 | [testGroup "isList" [ 16 | testCase "Empty SList" $ isList (SList () [] :: Sexp Int) @?= True, 17 | testCase "SList" $ isList (SList () [SAtom 1, SAtom 2] :: Sexp Int) @?= True, 18 | testCase "SAtom" $ isList (SAtom 1 :: Sexp Int) @?= False], 19 | 20 | testGroup "sList" [ 21 | testCase "Empty SList" $ sList (SList () [] :: Sexp Int) @?= Just [], 22 | testCase "SList" $ sList (SList () [SAtom 1, SAtom 2] :: Sexp Int) @?= Just [SAtom 1, SAtom 2], 23 | testCase "SAtom" $ sList (SAtom 1 :: Sexp Int) @?= Nothing], 24 | 25 | testGroup "isAtom" [ 26 | testCase "Empty SList" $ isAtom (SList () [] :: Sexp Int) @?= False, 27 | testCase "SList" $ isAtom (SList () [SAtom 1, SAtom 2] :: Sexp Int) @?= False, 28 | testCase "SAtom" $ isAtom (SAtom 1 :: Sexp Int) @?= True], 29 | 30 | testGroup "sAtom" [ 31 | testCase "Empty SList" $ sAtom (SList () [] :: Sexp Int) @?= Nothing, 32 | testCase "SList" $ sAtom (SList () [SAtom 1, SAtom 2] :: Sexp Int) @?= Nothing, 33 | testCase "SAtom" $ sAtom (SAtom 1 :: Sexp Int) @?= Just 1], 34 | 35 | testGroup "Pattern synonyms" [ 36 | testCase "L - empty list (1/2)" $ 37 | (case (SList () []) of 38 | L [] -> True 39 | _ -> False) @?= True, 40 | testCase "L - empty list (2/2)" $ 41 | (case (SList () [SAtom 1 :: Sexp Int]) of 42 | L [] -> True 43 | _ -> False) @?= False, 44 | testCase "L - singleton list (1/2)" $ 45 | (case (SList () [SAtom 1 :: Sexp Int]) of 46 | L [_] -> True 47 | _ -> False) @?= True, 48 | testCase "L - singleton list (2/2)" $ 49 | (case (SList () [SAtom 1 :: Sexp Int]) of 50 | L [] -> True 51 | _ -> False) @?= False, 52 | testCase "L - atom" $ 53 | (case (SAtom 1 :: Sexp Int) of 54 | L _ -> True 55 | _ -> False) @?= False, 56 | testCase "A - atom (1/2)" $ 57 | (case (SAtom 1 :: Sexp Int) of 58 | A 1 -> True 59 | _ -> False) @?= True, 60 | testCase "A - atom (2/2)" $ 61 | (case (A 1 :: Sexp Int) of 62 | SAtom 1 -> True 63 | _ -> False) @?= True, 64 | testCase "A - singleton list" $ 65 | (case (SList () [SAtom 1 :: Sexp Int]) of 66 | A 1 -> True 67 | _ -> False) @?= False, 68 | testCase "Sexp - empty List" $ 69 | (Sexp [] :: Sexp Int) @?= SList () [], 70 | testCase "Sexp - non empty List" $ 71 | Sexp [A 1 :: Sexp Int, A 2] @?= SList () [SAtom 1, SAtom 2], 72 | testCase "Sexp and L" $ 73 | (case (SList () [SAtom 1 :: Sexp Int, SList () []]) of 74 | Sexp [A 1, L []] -> True 75 | _ -> False) @?= True, 76 | testCase "::: (1/2)" $ 77 | (case (SList () [SAtom 1 :: Sexp Int, SAtom 2]) of 78 | (A 1 ::: A 2 ::: L []) -> True 79 | _ -> False) @?= True, 80 | testCase "::: (2/2)" $ 81 | (case (SList () [SAtom 1 :: Sexp Int, SAtom 2, SAtom 3]) of 82 | (A 1 ::: L xs) -> xs == [SAtom 2, SAtom 3] 83 | _ -> False) @?= True, 84 | testCase "Nil (1/2)" $ 85 | (case (SList () [SAtom 1 :: Sexp Int, SAtom 2]) of 86 | (A 1 ::: A 2 ::: Nil) -> True 87 | _ -> False) @?= True, 88 | testCase "Nil (2/2)" $ 89 | (case (SList () [SAtom 1 :: Sexp Int, SAtom 2, SAtom 3]) of 90 | (A 1 ::: A 2 ::: Nil) -> True 91 | _ -> False) @?= False 92 | ], 93 | 94 | testGroup "Functor" [ 95 | testCase "Empty SList" $ fmap (\x -> x + 1) (SList () [] :: Sexp Int) @?= (SList () []), 96 | testCase "Singleton SList" $ fmap (\x -> x + 1) (SList () [SAtom 5] :: Sexp Int) @?= (SList () [SAtom 6])] 97 | ] 98 | -------------------------------------------------------------------------------- /src/Data/SExpresso/SExpr.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : Data.SExpresso.SExpr 3 | -- Copyright : © 2019 Vincent Archambault 4 | -- License : 0BSD 5 | -- 6 | -- Maintainer : Vincent Archambault 7 | -- Stability : experimental 8 | -- 9 | -- Definition of S-expression 10 | 11 | {-# LANGUAGE PatternSynonyms #-} 12 | {-# LANGUAGE ViewPatterns #-} 13 | {-# LANGUAGE TemplateHaskell #-} 14 | {-# LANGUAGE KindSignatures #-} 15 | {-# LANGUAGE TypeFamilies #-} 16 | {-# LANGUAGE DeriveFunctor, DeriveFoldable, DeriveTraversable, DeriveDataTypeable #-} 17 | 18 | 19 | module Data.SExpresso.SExpr 20 | ( 21 | SExpr(..), 22 | Sexp, 23 | pattern A, 24 | pattern L, 25 | pattern Sexp, 26 | pattern (:::), 27 | pattern Nil, 28 | isAtom, 29 | sAtom, 30 | isList, 31 | sList 32 | ) 33 | where 34 | 35 | import Data.Bifunctor.TH 36 | import Data.Data 37 | import Data.Functor.Foldable.TH 38 | 39 | -- | The datatype 'SExpr' is the definition of an S-expression for the 40 | -- library S-expresso. 41 | -- 42 | -- The parameter @a@ allows you to specify the datatype of atoms and 43 | -- the parameter @b@ is usefull for keeping metadata about S-expression 44 | -- like source position for example. 45 | data SExpr b a = SList b [SExpr b a] 46 | | SAtom a 47 | deriving (Eq, Show, Functor, Traversable, Foldable, Data) 48 | 49 | $(deriveBifunctor ''SExpr) 50 | $(deriveBifoldable ''SExpr) 51 | $(deriveBitraversable ''SExpr) 52 | $(makeBaseFunctor ''SExpr) 53 | 54 | -- | The type synonym 'Sexp' is a variant of the more general 'SExpr' 55 | -- datatype with no data for the 'SList' constructor. 56 | type Sexp a = SExpr () a 57 | 58 | -- | Bidirectional pattern synonym for the type synonym 'Sexp'. See 59 | -- also the 'L' pattern synonym. 60 | -- 61 | -- >foo (Sexp x) = x -- Equivalent to foo (SList () x) = x 62 | -- >s = Sexp [] -- Equivalent to s = SList () [] 63 | pattern Sexp :: [Sexp a] -> Sexp a 64 | pattern Sexp xs = SList () xs 65 | 66 | -- | Pattern for matching only the sublist of the 'SList' constructor. 67 | -- See also the Sexp pattern synonym. 68 | -- 69 | -- >foo (L xs) = xs -- Equivalent to foo (SList _ xs) = xs 70 | pattern L :: [SExpr b a] -> SExpr b a 71 | pattern L xs <- SList _ xs 72 | 73 | -- | Shorthand for 'SAtom'. 74 | -- 75 | -- >foo (A x) = x -- Equivalent to foo (SAtom x) = x 76 | -- > a = A 3 -- Equivalent to a = SAtom 3 77 | pattern A :: a -> SExpr b a 78 | pattern A x = SAtom x 79 | 80 | uncons :: SExpr b a -> Maybe (SExpr b a, SExpr b a) 81 | uncons (SAtom _) = Nothing 82 | uncons (SList _ []) = Nothing 83 | uncons (SList b (x:xs)) = Just (x, SList b xs) 84 | 85 | -- | Pattern specifying the shape of the sublist of the 'SList' constructor. 86 | -- See also 'Nil'. 87 | -- 88 | -- Although it aims to mimic the behavior of the cons (:) constructor 89 | -- for list, this pattern behavior is a little bit different. Indeed 90 | -- its signature is @SExpr b a -> SExpr b a -> SExpr b a@ while the 91 | -- cons (:) constructor signature is @a -> [a] -> [a]@. The first 92 | -- argument type is different in the case of the cons constructor but all 93 | -- the types are identical for the pattern `:::`. 94 | -- 95 | -- This implies that the following code 96 | -- 97 | -- >foo (x ::: xs) = ... 98 | -- is equivalent to 99 | -- 100 | -- >foo (SList b (x : rest)) = let xs = SList b rest 101 | -- > in ... 102 | -- If you wish for the @xs@ above to match the remaining of the list, 103 | -- you need to use the 'L' pattern 104 | -- 105 | -- >foo (A x ::: L xs) 106 | -- which is equivalent to 107 | -- 108 | -- >foo (SList b (x : rest)) = let (SList _ xs) = SList b rest 109 | -- > in ... 110 | -- 111 | -- Other examples : 112 | -- 113 | -- >foo (A x1 ::: A x2 ::: Nil) -- Equivalent to foo (SList _ [SAtom x1, SAtom x2]) 114 | -- >foo (L ys ::: A x ::: L xs) -- Equivalent to foo (SList _ (SList _ ys : SAtom x : xs)) 115 | infixr 5 ::: 116 | pattern (:::) :: SExpr b a -> SExpr b a -> SExpr b a 117 | pattern x ::: xs <- (uncons -> Just (x, xs)) 118 | 119 | -- | Pattern to mark the end of the list when using the pattern synonym ':::' 120 | pattern Nil :: SExpr b a 121 | pattern Nil <- SList _ [] 122 | 123 | -- | The 'isAtom' function returns 'True' iff its argument is of the 124 | -- form @SAtom _@. 125 | isAtom :: SExpr b a -> Bool 126 | isAtom (A _) = True 127 | isAtom _ = False 128 | 129 | -- | The 'sAtom' function returns 'Nothing' if its argument is of the 130 | -- form @SList _ _@ and @'Just' a@ if its argument is of the form @SAtom _@.. 131 | sAtom :: SExpr b a -> Maybe a 132 | sAtom (A x) = Just x 133 | sAtom _ = Nothing 134 | 135 | -- | The 'isList' function returns 'True' iff its argument is of the 136 | -- form @SList _ _@. 137 | isList :: SExpr b a -> Bool 138 | isList (L _) = True 139 | isList _ = False 140 | 141 | -- | The 'sList' function returns 'Nothing' if its argument is of the 142 | -- form @SAtom _@ and the sublist @xs@ if its argument is of the form 143 | -- @SList _ xs@. 144 | sList :: SExpr b a -> Maybe [SExpr b a] 145 | sList (L l) = Just l 146 | sList _ = Nothing 147 | -------------------------------------------------------------------------------- /test/Parse_Unittests.hs: -------------------------------------------------------------------------------- 1 | module Parse_Unittests ( 2 | parseTestTree 3 | )where 4 | 5 | import Data.Void 6 | import Data.Either 7 | import Data.Bifunctor (first) 8 | import Test.Tasty 9 | import Test.Tasty.HUnit 10 | import Text.Megaparsec as M 11 | import Text.Megaparsec.Char 12 | import Data.SExpresso.SExpr 13 | import Data.SExpresso.Parse 14 | 15 | type Parser = Parsec Void String 16 | 17 | asciiLetter :: Parser Char 18 | asciiLetter = oneOf (['a' .. 'z'] ++ ['A' .. 'Z']) 19 | 20 | pIdent :: Parser String 21 | pIdent = some asciiLetter 22 | 23 | pDigit :: Parser String 24 | pDigit = some digitChar 25 | 26 | sexpParser :: SExprParser Parser () String 27 | sexpParser = plainSExprParser (pIdent <|> pDigit) 28 | 29 | pSExpr :: Parser (Sexp String) 30 | pSExpr = parseSExpr sexpParser 31 | 32 | pDecodeOne :: Parser (Sexp String) 33 | pDecodeOne = decodeOne sexpParser 34 | 35 | pDecode :: Parser [Sexp String] 36 | pDecode = decode sexpParser 37 | 38 | pOptionalSpace :: Parser (Sexp String) 39 | pOptionalSpace = decodeOne $ setSpacingRule spaceIsOptional sexpParser 40 | 41 | parseTestTree :: TestTree 42 | parseTestTree = testGroup "Parse/Generic.hs & Parse/Char.hs unit tests" $ 43 | let tparse :: Parser a -> String -> Either String a 44 | tparse p s = first M.errorBundlePretty $ M.parse p "" s 45 | 46 | sExprTests :: (Eq a, Show a) => Parser a -> (Sexp String -> a) -> [TestTree] 47 | sExprTests p f = [ 48 | let s = "()" in testCase (show s) $ tparse p s @?= (Right $ f (SList () [])), 49 | let s = "( )" in testCase (show s) $ tparse p s @?= (Right $ f (SList () [])), 50 | let s = "foo" in testCase (show s) $ tparse p s @?= (Right $ f (SAtom "foo")), 51 | let s = "1234" in testCase (show s) $ tparse p s @?= (Right $ f (SAtom "1234")), 52 | let s = "(foo)" in testCase (show s) $ tparse p s @?= (Right $ f (SList () [SAtom "foo"])), 53 | let s = "( foo)" in testCase (show s) $ tparse p s @?= (Right $ f (SList () [SAtom "foo"])), 54 | let s = "(foo )" in testCase (show s) $ tparse p s @?= (Right $ f (SList () [SAtom "foo"])), 55 | let s = "(foo bar baz)" 56 | in testCase (show s) $ tparse p s @?= (Right $ f (SList () [SAtom "foo", SAtom "bar", SAtom "baz"])), 57 | let s = "(foo (bar baz))" 58 | in testCase (show s) $ tparse p s @?= 59 | (Right $ f (SList () [SAtom "foo", SList () [SAtom "bar", SAtom "baz"]])), 60 | let s = "(foo(bar baz))" 61 | in testCase (show s) $ tparse p s @?= 62 | (Right $ f (SList () [SAtom "foo", SList () [SAtom "bar", SAtom "baz"]])), 63 | let s = "((foo bar)baz)" 64 | in testCase (show s) $ tparse p s @?= 65 | (Right $ f (SList () [SList () [SAtom "foo", SAtom "bar"], SAtom "baz"])), 66 | let s = "(foo1234)" 67 | in testCase (show s) $ (isLeft $ tparse p "(foo1234)") @? "Parsing must fail. foo and 1234 are not separated by whitespace" 68 | ] 69 | 70 | decodeCommon :: (Eq a, Show a) => Parser a -> (Sexp String -> a) -> [TestTree] 71 | decodeCommon p f = [ 72 | let s = " () " in testCase (show s) $ tparse p s @?= (Right $ f (SList () [])), 73 | let s = " ()" in testCase (show s) $ tparse p s @?= (Right $ f (SList () [])), 74 | let s = "() " in testCase (show s) $ tparse p s @?= (Right $ f (SList () [])), 75 | let s = " () " in testCase (show s) $ tparse p s @?= (Right $ f (SList () [])) 76 | ] 77 | in 78 | [ 79 | testGroup "parseSExpr" $ sExprTests pSExpr id ++ 80 | [ 81 | let s = " foo" 82 | in testCase (show s) $ (isLeft $ tparse pSExpr s) @? "Parsing must fail. parseSExpr should not parse whitespace" 83 | ], 84 | testGroup "decondeOne" $ sExprTests pDecodeOne id ++ 85 | decodeCommon pDecodeOne id ++ 86 | [ 87 | let s = "() err" in testCase (show s) $ (isLeft $ tparse pDecodeOne s) @? "Parsing must fail. 2 SExpr", 88 | let s = "err ()" in testCase (show s) $ (isLeft $ tparse pDecodeOne s) @? "Parsing must fail. 2 SExpr", 89 | let s = "()err" in testCase (show s) $ (isLeft $ tparse pDecodeOne s) @? "Parsing must fail. 2 SExpr", 90 | let s = "err()" in testCase (show s) $ (isLeft $ tparse pDecodeOne s) @? "Parsing must fail. 2 SExpr" 91 | ], 92 | testGroup "decode" $ sExprTests pDecode (\x -> [x]) ++ 93 | decodeCommon pDecode (\x -> [x]) ++ 94 | [ 95 | let s = "()()" in testCase (show s) $ tparse pDecode s @?= Right [SList () [], SList () []], 96 | let s = " ()()" in testCase (show s) $ tparse pDecode s @?= Right [SList () [], SList () []], 97 | let s = "() ()" in testCase (show s) $ tparse pDecode s @?= Right [SList () [], SList () []], 98 | let s = "()() " in testCase (show s) $ tparse pDecode s @?= Right [SList () [], SList () []], 99 | let s = " () () " in testCase (show s) $ tparse pDecode s @?= Right [SList () [], SList () []], 100 | let s = "(foo)(1234)" in testCase (show s) $ tparse pDecode s @?= Right [SList () [SAtom "foo"], SList () [SAtom "1234"]], 101 | let s = " (foo)(1234)" in testCase (show s) $ tparse pDecode s @?= Right [SList () [SAtom "foo"], SList () [SAtom "1234"]], 102 | let s = "(foo) (1234)" in testCase (show s) $ tparse pDecode s @?= Right [SList () [SAtom "foo"], SList () [SAtom "1234"]], 103 | let s = "(foo)(1234) " in testCase (show s) $ tparse pDecode s @?= Right [SList () [SAtom "foo"], SList () [SAtom "1234"]], 104 | let s = " (foo) (1234) " in testCase (show s) $ tparse pDecode s @?= Right [SList () [SAtom "foo"], SList () [SAtom "1234"]], 105 | let s = "(foo) 1234" in testCase (show s) $ tparse pDecode s @?= Right [SList () [SAtom "foo"], SAtom "1234"], 106 | let s = "foo 1234" in testCase (show s) $ tparse pDecode s @?= Right [SAtom "foo", SAtom "1234"], 107 | let s = "foo(1234)" in testCase (show s) $ tparse pDecode s @?= Right [SAtom "foo", SList () [SAtom "1234"]], 108 | let s = "(foo)1234" in testCase (show s) $ tparse pDecode s @?= Right [SList () [SAtom "foo"], SAtom "1234"], 109 | let s = "bar1234" 110 | in testCase (show s) $ (isLeft $ tparse pDecode s) @? "Parsing must fail. bar and 1234 are not separated by whitespace" 111 | ], 112 | testGroup "spaceIsOptional" $ [ 113 | let s = "(foo1234)" 114 | in testCase (show s) $ tparse pOptionalSpace s @?= (Right (SList () [SAtom "foo", SAtom "1234"])), 115 | let s = "(foo 1234)" 116 | in testCase (show s) $ tparse pOptionalSpace s @?= (Right (SList () [SAtom "foo", SAtom "1234"])), 117 | let s = "(foo1234 bar)" 118 | in testCase (show s) $ tparse pOptionalSpace s @?= (Right (SList () [SAtom "foo", SAtom "1234", SAtom "bar"])), 119 | let s = "( foo1234 )" 120 | in testCase (show s) $ tparse pOptionalSpace s @?= (Right (SList () [SAtom "foo", SAtom "1234"])) 121 | ] 122 | ] 123 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![License OBSD](https://img.shields.io/badge/license-0BSD-brightgreen.svg)](https://opensource.org/licenses/0BSD) 2 | [![Hackage](https://img.shields.io/hackage/v/sexpresso.svg)](http://hackage.haskell.org/package/sexpresso) 3 | 4 | # S-expresso 5 | 6 | S-expresso is a Haskell library designed to help you parse and print 7 | data or source code encoded as an S-expression. It provides a very 8 | flexible parser and (for now) a flat printer. 9 | 10 | # What is an S-expression 11 | Basically, an S-expression is a special form of tree structured 12 | data. An S-expression object is either an atom or a list of atoms and other S-expressions. 13 | 14 | This datatype is the definition of an S-expression for 15 | S-expresso. 16 | 17 | ~~~haskell 18 | data SExpr b a = SList b [SExpr b a] 19 | | SAtom a 20 | ~~~ 21 | 22 | The parameter `a` allows you to specify the datatype of atoms and the 23 | parameter `b` is usefull for keeping metadata about S-expression like 24 | source position for example. 25 | 26 | `SExpr` is not equivalent to `[a]` because the later cannot 27 | distinguish between an atom `(SAtom _)` and a tree containing only one 28 | atom `(SList _ [SAtom _])`. `SExpr` is also not equivalent to `Tree a` 29 | from `Data.Tree` because the later cannot encode the empty tree 30 | `(SList _ [])` and does not enforce that atoms are at the leaves. 31 | 32 | ## The Sexp type 33 | If you are only interested by the atoms, you can use the type alias 34 | `Sexp` that is a variant of the more general 'SExpr' data type with no 35 | data for the 'SList' constructor. 36 | ~~~haskell 37 | type Sexp a = SExpr () a 38 | ~~~ 39 | 40 | This type also comes with a bidirectional pattern synonym also named 41 | `Sexp` for object of the form `SExpr () _`. 42 | ~~~ 43 | x = Sexp [A 3] <-> x = SList () [SAtom 3] 44 | foo (Sexp xs) <-> foo (SList () xs) 45 | foo (Sexp (Sexp ys : A x : xs)) <-> foo (SList () (SList () ys : SAtom x : xs)) 46 | ~~~ 47 | 48 | ## Pattern synonyms 49 | S-expresso defines four pattern synonyms to ease your programming with 50 | `SExpr`. The patterns `L` helps you match the `SList` constructor and only 51 | its sublist, disregarding the `b` field. The pattern `:::` and `Nil` helps 52 | you specify the shape of the sublist of an `SList` constructor and 53 | finally the pattern `A` is a shorthand for `SAtom`. 54 | 55 | Together they make working with `SExpr` a little easier. 56 | ~~~ 57 | a = A 3 <-> a = SAtom 3 58 | foo (A x) <-> foo (SAtom x) 59 | foo (A x1 ::: A x2 ::: Nil) <-> foo (SList _ [SAtom x1, SAtom x2]) 60 | foo (A x ::: L xs)) <-> foo (SList _ (SAtom x : xs)) 61 | foo (L ys ::: A x ::: L xs)) <-> foo (SList _ (SList _ ys : SAtom x : xs)) 62 | foo (L x) <-> foo (SList _ x) 63 | ~~~ 64 | 65 | Notice that you need to end the pattern `:::` with `Nil` for the empty 66 | list or `L xs` for matching the remainder of the list. Indeed, if you write 67 | 68 | ~~~ 69 | foo (x ::: xs) = ... 70 | ~~~ 71 | 72 | this is equivalent to : 73 | 74 | ~~~ 75 | foo (SList b (x : rest)) = let xs = SList b rest 76 | in ... 77 | ~~~ 78 | 79 | You can refer to the documentation of the `:::` constructor for more information. 80 | 81 | # Parsing S-expressions 82 | The parsing is based on 83 | [megaparsec](http://hackage.haskell.org/package/megaparsec). S-expresso 84 | allows you to customize the following : 85 | * The parser for atoms 86 | * The opening tag (usually "("), the closing tag (usually ")") and a 87 | possible dependency of the closing tag on the opening one. 88 | * If some space is required or optional between any pair of atoms. 89 | * How to parse space (ex: treat comments as whitespace) 90 | 91 | The library offers amoung others the `decodeOne` and `decode` 92 | functions. The former only reads one S-expression while the other 93 | parses many S-expressions. Both functions creates a megaparsec 94 | parser from a `SExprParser` argument. 95 | 96 | The `SExprParser` is the data type that defines how to read an 97 | S-expression. The easiest way to create a `SExprParser` is to use the 98 | function `plainSExprParser` with your own custom atom parser. This 99 | will create a parser where S-expression starts with "(", ends with ")" 100 | and space is mandatory between atoms. 101 | 102 | ~~~haskell 103 | import Data.Void 104 | import qualified Data.Text as T 105 | import Text.Megaparsec 106 | import Text.Megaparsec.Char 107 | import qualified Text.Megaparsec.Char.Lexer as L 108 | 109 | atom = some letter 110 | 111 | sexp = decode $ plainSExprParser atom 112 | 113 | -- Returns (SList () [SAtom "hello", SAtom "world"]) 114 | ex1 = parse sexp "" "(hello world)" 115 | 116 | -- Returns (SList () [SAtom "hello", SAtom "world", SList () [SAtom "bonjour"]]) 117 | ex2 = parse sexp "" " (hello world(bonjour)) " 118 | 119 | -- Returns SAtom "hola" 120 | ex2 = parse sexp "" "hola" 121 | ~~~ 122 | 123 | ## Customizing the SExprParser 124 | S-expresso provides many functions to modify the behavior of the 125 | parser. For example, you can use the functions `setTags`, 126 | `setTagsFromList`, `setSpace` and `setSpacingRule` to modify the 127 | behavior of the parser. Following on the preceding example: 128 | 129 | ~~~haskell 130 | -- setTags 131 | data MyType = List | Vector 132 | 133 | listOrVector = 134 | let sTag = (char '(' >> return List) <|> (string "#(" >> return Vector) 135 | eTag = \t -> char ')' >> return t 136 | p = setTags sTag eTag $ 137 | plainSExprParser atom 138 | in decode p 139 | 140 | -- Returns (SList List [SList Vector [SAtom "a", SAtom "b"], SAtom "c"]) 141 | ex3 = parse listOrVector "" "(#(a b) c)" 142 | 143 | -- setTagsFromList 144 | listOrVector2 = decode $ 145 | setTagsFromList [("(",")",List),("#(",")",Vector)] $ 146 | plainSExprParser atom 147 | 148 | 149 | -- Returns (SList List [SList Vector [SAtom "a", SAtom "b"], SAtom "c"]) 150 | ex4 = parse listOrVector2 "" "(#(a b) c)" 151 | 152 | -- setSpace 153 | withComments = decode $ 154 | -- See megaparsec Space in Megaparsec.Char.Lexer 155 | setSpace (L.Space Space1 (skipLineComment ";") empty) $ 156 | plainSExprParser atom 157 | 158 | -- Returns (SList () [SAtom "hello", SList () [SAtom "bonjour"]]) 159 | ex5 = parse withComments "" "(hello ;world\n (bonjour))" 160 | 161 | -- setSpacingRule 162 | optionalSpace = decode $ 163 | setSpacingRule spaceIsOptional $ 164 | plainSExprParser (some letter <|> some digitChar) 165 | 166 | -- Returns (SList () [SAtom "hello", SAtom "1234", SAtom "world"]) 167 | ex5 = parse optionalSpace "" "(hello1234world)" 168 | ~~~ 169 | 170 | You can also directly build a custom SExprParser with the constructor `SExprParser`. 171 | 172 | ## Adding Source Location 173 | If you need the source position of the atoms and s-expression, the 174 | function `withLocation` transforms an `SExprParser b a` into 175 | `SExprParser (Located b) (Located a)`. The `Located` datatype is 176 | defined 177 | [here](https://github.com/archambaultv/sexpresso/blob/master/src/Data/SExpresso/Parse/Location.hs). 178 | -------------------------------------------------------------------------------- /src/Data/SExpresso/Parse/Generic.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : Data.SExpresso.Parse.Generic 3 | -- Copyright : © 2019 Vincent Archambault 4 | -- License : 0BSD 5 | -- 6 | -- Maintainer : Vincent Archambault 7 | -- Stability : experimental 8 | -- 9 | -- This module includes everything you need to write a parser for 10 | -- S-expression ('SExpr'). It is based on the "Text.Megaparsec" 11 | -- library and parsers can be defined for any kind of ('MonadParsec' e 12 | -- s m) instance. This is quite generic, if you are working with 13 | -- streams of 'Char', we suggest you also import 14 | -- "Data.SExpresso.Parse.Char" or simply "Data.SExpresso.Parse" which 15 | -- re-exports everything. 16 | -- 17 | -- You can customize your 'SExpr' parser by specifying the following: 18 | -- 19 | -- * The parser for atoms 20 | -- 21 | -- * The opening tag, the closing tag, and a possible dependency of 22 | -- the closing tag on the opening one. 23 | -- 24 | -- * If some space is required or optional between any pair of 25 | -- atoms. 26 | -- 27 | -- * How to parse space (ex: treat comments as whitespace) 28 | 29 | {-# LANGUAGE OverloadedStrings #-} 30 | {-# LANGUAGE ExistentialQuantification #-} 31 | 32 | module Data.SExpresso.Parse.Generic 33 | ( 34 | SExprParser(..), 35 | 36 | getAtom, 37 | getSpace, 38 | getSpacingRule, 39 | 40 | setTags, 41 | setTagsFromList, 42 | setTagsFromMap, 43 | setSpace, 44 | setSpacingRule, 45 | setAtom, 46 | 47 | SpacingRule(..), 48 | spaceIsMandatory, 49 | spaceIsOptional, 50 | mkSpacingRule, 51 | 52 | withLocation, 53 | 54 | parseSExprList, 55 | parseSExpr, 56 | decodeOne, 57 | decode 58 | ) 59 | where 60 | 61 | import Data.Maybe 62 | import qualified Data.Map as M 63 | import Control.Applicative 64 | import Control.Monad (mzero) 65 | import Text.Megaparsec 66 | import Data.SExpresso.SExpr 67 | import Data.SExpresso.Parse.Location 68 | 69 | -- | The 'SpacingRule' datatype is used to indicate if space is optional or mandatory between two consecutive @'SAtom' _@. 70 | data SpacingRule = 71 | -- | Space is mandatory 72 | SMandatory 73 | -- | Space is optional 74 | | SOptional 75 | deriving (Show, Eq) 76 | 77 | -- | The @'SExprParser' m b a@ datatype defines how to parse an 78 | -- @'SExpr' b a@. Most parsing functions require the underlying monad 79 | -- @m@ to be an instance of ('MonadParsec' e s m). 80 | 81 | data SExprParser m b a 82 | -- | The @c@ parameter in the first two arguments is the type of the 83 | -- relation between the opening tag and the closing one. 84 | = forall c. SExprParser 85 | (m c) -- ^ The parser for the opening tag. Returns an object of an 86 | -- arbitrary type @c@ that will be used to create the closing 87 | -- tag parser. 88 | (c -> m b) -- ^ A function that takes the object returned by the 89 | -- opening tag parser and provide a parser for the 90 | -- closing tag. 91 | (m a) -- ^ The parser for atoms 92 | (m ()) -- ^ A parser for space tokens which does not accept empty 93 | -- input (e.g. 'Text.Megaparsec.Char.space1') 94 | (a -> a -> SpacingRule) -- ^ A function to tell if two consecutive 95 | -- atoms must be separated by space or 96 | -- not. See also 'mkSpacingRule' and 97 | -- 'setSpacingRule' 98 | 99 | -- | The 'getSpace' function returns the parser for whitespace of an 'SExprParser' object. 100 | getSpace :: SExprParser m b a -> m () 101 | getSpace (SExprParser _ _ _ sp _) = sp 102 | 103 | -- | The 'getSpacingRule' function returns spacing rule function of an 'SExprParser' object. 104 | getSpacingRule :: SExprParser m b a -> (a -> a -> SpacingRule) 105 | getSpacingRule (SExprParser _ _ _ _ sr) = sr 106 | 107 | -- | The 'getAtom' function returns the parser for atoms of an 'SExprParser' object. 108 | getAtom :: SExprParser m b a -> m a 109 | getAtom (SExprParser _ _ a _ _) = a 110 | 111 | -- | The 'withLocation' function adds source location to a @'SExprParser'@. See also 'Location'. 112 | withLocation :: (MonadParsec e s m, TraversableStream s) => SExprParser m b a -> SExprParser m (Located b) (Located a) 113 | withLocation (SExprParser pSTag pETag atom sp sr) = 114 | let s = do 115 | pos <- getSourcePos 116 | c <- pSTag 117 | return (pos, c) 118 | e = \(pos, c) -> do 119 | b <- pETag c 120 | pos2 <- getSourcePos 121 | return $ At (Span pos pos2) b 122 | in SExprParser s e (located atom) sp (\(At _ a1) (At _ a2) -> sr a1 a2) 123 | 124 | -- | The 'setAtom' function updates a parser with a new parser for atoms and and new spacing rule function. 125 | setAtom :: m a -> (a -> a -> SpacingRule) -> SExprParser m b a' -> SExprParser m b a 126 | setAtom a sr (SExprParser pSTag pETag _ sp _) = SExprParser pSTag pETag a sp sr 127 | 128 | -- | The 'setTags' function updates a parser with a new parser for the opening and closing tags. 129 | setTags :: m c -> (c -> m b) -> SExprParser m b' a -> SExprParser m b a 130 | setTags s e (SExprParser _ _ a sp sr) = SExprParser s e a sp sr 131 | 132 | -- | The 'setTagsFromList' function helps you build the opening and 133 | -- closing parsers from a list of triplets. Each triplet specifies a 134 | -- stream of tokens to parse as the opening tag, a stream of tokens to 135 | -- parse at the closing tag and what to return when this pair is 136 | -- encountered. The 'setTagsFromList' can handle multiple triplets 137 | -- with the same opening tags. See also 'setTagsFromMap'. 138 | -- 139 | -- The example e1 parses "()" as @'SList' () []@. 140 | -- 141 | -- > e1 = setTagsFromList [("(", ")", ()] p 142 | -- 143 | -- The example e2 parses both "()" and "[]" as @'SList' () []@ but does 144 | -- not parse "(]" or "[)" 145 | -- 146 | -- > e2 = setTagsFromList [("(", ")", ()), ("[", "]", ())] p 147 | -- 148 | -- The example e3 parses "()" as @'SList' List []@ and "#()" as 149 | -- @'SList' Vector []@, but does not parse "(]" or "[)" 150 | -- 151 | -- > e3 = setTagsFromList [("(", ")", List), ("#(",")",Vector)] p 152 | -- 153 | -- The example e4 parses "()" as @'SList' ')' []@ and "(]" as 154 | -- @'SList' ']' []@, but does not parse "])" 155 | -- 156 | -- > e4 = setTagsFromList [("(", ")", ')'), ("(", "]", ']')] p 157 | setTagsFromList :: (MonadParsec e s m) => 158 | [(Tokens s, Tokens s, b)] -> SExprParser m b' a -> SExprParser m b a 159 | setTagsFromList l p = 160 | let m = M.fromListWith (++) $ map (\(s,e,b) -> (s, [(e,b)])) l 161 | in setTagsFromMap m p 162 | 163 | -- | The 'setTagsFromMap' function helps you build the opening and 164 | -- closing parsers from a map. Each key specifies a stream of tokens to 165 | -- parse as the opening tag and the value of the map specifies one or 166 | -- more streams of tokens to parse at the closing tag and what to 167 | -- return when this pair is encountered. See also 'setTagsFromList'. 168 | -- 169 | -- The example e1 parses "()" as @'SList' () []@. 170 | -- 171 | -- > e1 = setTagsFromList $ M.fromList [("(", [")", ()]] p 172 | -- 173 | -- The example e2 parses both "()" and "[]" as @'SList' () []@ but does 174 | -- not parse "(]" or "[)" 175 | -- 176 | -- > e2 = setTagsFromList $ M.fromList [("(", [")", ()]), ("[", ["]", ()])] p 177 | -- 178 | -- The example e3 parses "()" as @'SList' List []@ and "#()" as 179 | -- @'SList' Vector []@, but does not parse "(]" or "[)" 180 | -- 181 | -- > e3 = setTagsFromList $ M.fromList [("(", [")", List]), ("#(", [")",Vector])] p 182 | -- 183 | -- The example e4 parses "()" as @'SList' ')' []@ and "(]" as 184 | -- @'SList' ']' []@, but does not parse "])" 185 | -- 186 | -- > e4 = setTagsFromList $ M.fromList [("(", [(")", ')'), ("]", ']')])] p 187 | setTagsFromMap :: (MonadParsec e s m) => 188 | M.Map (Tokens s) [(Tokens s, b)] -> SExprParser m b' a -> SExprParser m b a 189 | setTagsFromMap m p = 190 | let l = M.toList m 191 | 192 | choose [] = empty 193 | choose ((s, eb) : ts) = (chunk s >> return eb) <|> choose ts 194 | 195 | stag = choose l 196 | 197 | etag = \xs -> choice $ map (\(e, b) -> chunk e >> return b) xs 198 | in setTags stag etag p 199 | 200 | -- | The 'spaceIsMandatory' function is a spacing rule where space is always mandatory. See also 'getSpacingRule'. 201 | spaceIsMandatory :: a -> a -> SpacingRule 202 | spaceIsMandatory = \_ _ -> SMandatory 203 | 204 | -- | The 'spaceIsOptional' function is a spacing rule where space is always optional. See also 'getSpacingRule'. 205 | spaceIsOptional :: a -> a -> SpacingRule 206 | spaceIsOptional = \_ _ -> SOptional 207 | 208 | -- | The 'setSpacingRule' function modifies a 'SExprParser' by setting 209 | -- the function to tell if two consecutive atoms must be separated by 210 | -- space or not. See also 'mkSpacingRule'. 211 | setSpacingRule :: (a -> a -> SpacingRule) -> SExprParser m b a -> SExprParser m b a 212 | setSpacingRule r p@(SExprParser pSTag pETag _ _ _) = SExprParser pSTag pETag (getAtom p) (getSpace p) r 213 | 214 | -- | The 'mkSpacingRule' function is a helper to create a valid 215 | -- spacing rule function for 'SExprParser' when some atoms have the 216 | -- same 'SpacingRule' both before and after no matter what the other 217 | -- atom is. It takes as argument a function @f@ that takes a single 218 | -- atom and returns the 'SpacingRule' that applies both before and 219 | -- after this atom. 220 | -- 221 | -- For example, to create a spacing rule where space is optional both 222 | -- before and after the fictitious @MyString@ token: 223 | -- 224 | -- > s (MyString _) = SOptional 225 | -- > s _ = Mandatory 226 | -- > spacingRule = mkSpacingRule s 227 | -- 228 | -- The above is equivalent to : 229 | -- 230 | -- > spacingRule (MyString _) _ = SOptional 231 | -- > spacingRule _ (MyString _) = SOptional 232 | -- > spacingRule _ _ = SMandatory 233 | 234 | mkSpacingRule :: (a -> SpacingRule) -> (a -> a -> SpacingRule) 235 | mkSpacingRule f = \a1 a2 -> case f a1 of 236 | SOptional -> SOptional 237 | SMandatory -> f a2 238 | 239 | -- | The 'setSpace' function modifies a 'SExprParser' by setting the 240 | -- parser to parse whitespace. The parser for whitespace must not 241 | -- accept the empty input (e.g. 'Text.Megaparsec.Char.space1') 242 | setSpace :: m () -> SExprParser m b a -> SExprParser m b a 243 | setSpace sp (SExprParser s e a _ sr) = SExprParser s e a sp sr 244 | 245 | -- Tells if the space (or absence of) between two atoms is valid or not 246 | spaceIsOK :: (a -> a -> SpacingRule) -> (SExpr b a) -> (SExpr b a) -> Bool -> Bool 247 | spaceIsOK getSpacingRule' sexp1 sexp2 spaceInBetween = 248 | case (sexp1, sexp2, spaceInBetween) of 249 | (_, _, True) -> True 250 | (SList _ _, _, _) -> True 251 | (_, SList _ _, _) -> True 252 | (SAtom a1, SAtom a2, _) -> getSpacingRule' a1 a2 == SOptional 253 | 254 | sepEndBy' :: (MonadParsec e s m, TraversableStream s) => m (SExpr b a) -> m () -> (a -> a -> SpacingRule) -> m [SExpr b a] 255 | sepEndBy' p sep f = sepEndBy1' p sep f <|> pure [] 256 | 257 | sepEndBy1' :: (MonadParsec e s m, TraversableStream s) => m (SExpr b a) -> m () -> (a -> a -> SpacingRule) -> m [SExpr b a] 258 | sepEndBy1' p sep f = do 259 | x <- p 260 | xs <- parseContent x 261 | return $ x : xs 262 | 263 | where parseContent a1 = do 264 | s <- maybe False (const True) <$> optional sep 265 | mpos <- if not s then Just <$> getSourcePos else return Nothing 266 | mx <- optional p 267 | case mx of 268 | Nothing -> return [] 269 | Just a2 -> 270 | if spaceIsOK f a1 a2 s 271 | then do 272 | xs <- parseContent a2 273 | return $ a2 : xs 274 | else label ("The previous two atoms are not separated by space.\n" <> 275 | "A space was expected at " <> sourcePosPretty (fromJust mpos)) mzero 276 | 277 | -- | The 'parseSExprList' function return a parser for parsing S-expression of the form @'SList' _ _@. 278 | parseSExprList :: (MonadParsec e s m, TraversableStream s) => 279 | SExprParser m b a -> m (SExpr b a) 280 | parseSExprList def@(SExprParser pSTag pETag _ sp sr) = do 281 | c <- pSTag 282 | _ <- optional sp 283 | xs <- sepEndBy' (parseSExpr def) sp sr 284 | b <- pETag c 285 | return $ SList b xs 286 | 287 | -- | The 'parseSExpr' function return a parser for parsing 288 | -- S-expression ('SExpr'), that is either an atom (@'SAtom' _@) or a 289 | -- list @'SList' _ _@. See also 'decodeOne' and 'decode'. 290 | parseSExpr :: (MonadParsec e s m, TraversableStream s) => 291 | SExprParser m b a -> m (SExpr b a) 292 | parseSExpr def = (getAtom def >>= return . SAtom) <|> (parseSExprList def) 293 | 294 | -- | The 'decodeOne' function return a parser for parsing a file 295 | -- containing only one S-expression ('SExpr'). It can parse extra 296 | -- whitespace at the beginning and at the end of the file. See also 297 | -- 'parseSExpr' and 'decode'. 298 | decodeOne :: (MonadParsec e s m, TraversableStream s) => SExprParser m b a -> m (SExpr b a) 299 | decodeOne def = 300 | let ws = getSpace def 301 | in optional ws *> parseSExpr def <* (optional ws >> eof) 302 | 303 | -- | The 'decode' function return a parser for parsing a file 304 | -- containing many S-expression ('SExpr'). It can parse extra 305 | -- whitespace at the beginning and at the end of the file. See also 306 | -- 'parseSExpr' and 'decodeOne'. 307 | decode :: (MonadParsec e s m, TraversableStream s) => SExprParser m b a -> m [SExpr b a] 308 | decode def = 309 | let ws = getSpace def 310 | in optional ws *> sepEndBy' (parseSExpr def) ws (getSpacingRule def) <* eof 311 | -------------------------------------------------------------------------------- /test/SchemeR5RS_Unittests.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE TupleSections #-} 3 | 4 | module SchemeR5RS_Unittests ( 5 | r5rsTestTree 6 | )where 7 | 8 | import Data.Void 9 | import qualified Data.Text as T 10 | import Data.Either 11 | import Data.Bifunctor (first) 12 | import Test.Tasty 13 | import Test.Tasty.HUnit 14 | import Text.Megaparsec 15 | --import Text.Megaparsec.Char 16 | import Data.SExpresso.SExpr 17 | import Data.SExpresso.Parse 18 | import Data.SExpresso.Language.SchemeR5RS as R5 19 | 20 | type Parser = Parsec Void T.Text 21 | 22 | pSExpr :: Parser [SExpr R5.SExprType R5.SchemeToken] 23 | pSExpr = decode R5.sexpr 24 | 25 | -- tparse parses the whole input 26 | tparse :: Parser a -> T.Text -> Either String a 27 | tparse p s = first errorBundlePretty $ parse (p <* eof) "" s 28 | 29 | -- Runs a test that must succeed. Accepts the parser and 30 | -- the string to parse and the expected value as tuple 31 | okTest :: (Eq a, Show a) => Parser a -> (T.Text, a) -> TestTree 32 | okTest p (str, expected) = testCase ("Should parse " ++ show str) 33 | $ tparse p str @?= Right expected 34 | 35 | okDatum :: (T.Text, [Datum]) -> TestTree 36 | okDatum (t, d) = okTest (sexpr2Datum <$> pSExpr) (t, Right d) 37 | 38 | -- Runs a test that must fail. Accepts the parser 39 | -- and the string to parse 40 | koTest :: Parser a -> T.Text -> TestTree 41 | koTest p str = testCase ("Should not parse " ++ show str) 42 | $ (isLeft $ tparse p str) @? 43 | ("Parsing must fail on " ++ show str) 44 | 45 | koDatum :: T.Text -> TestTree 46 | koDatum str = testCase ("Should not parse " ++ show str) 47 | $ (isLeft $ (tparse pSExpr str >>= sexpr2Datum)) @? 48 | ("Parsing must fail on " ++ show str) 49 | 50 | -- Tests about whitespace that must succeed (see okTest) 51 | okWhitespace :: [(T.Text, ())] 52 | okWhitespace = map (,()) [" ", "\t", "\n", "\r\n"] 53 | 54 | -- Tests about comment that must succeed (see okTest) 55 | okComment :: [(T.Text, ())] 56 | okComment = map (,()) [";", ";hello world", ";hello\n", ";abcdef\r\n"] 57 | 58 | okInterTokenSpace1 :: [(T.Text, ())] 59 | okInterTokenSpace1 = okWhitespace 60 | ++ okComment 61 | ++ map (,()) [" ;comment\n ", 62 | "\t\n;comment \n ", 63 | "\n\n\n\n\n", 64 | "\r\n;Hello World", 65 | " ;comment\n ;comment"] 66 | 67 | okInterTokenSpace :: [(T.Text, ())] 68 | okInterTokenSpace = okInterTokenSpace1 ++ map (,()) [""] 69 | 70 | okChar :: [(T.Text, Char)] 71 | okChar = [("#\\t", 't'), 72 | ("#\\a", 'a'), 73 | ("#\\space", ' '), 74 | ("#\\newline", '\n'), 75 | ("#\\\n", '\n'), 76 | ("#\\ ", ' '), 77 | ("#\\\t", '\t')] 78 | 79 | okBool :: [(T.Text, Bool)] 80 | okBool = [("#t", True), 81 | ("#f", False)] 82 | 83 | okIdentifier :: [(T.Text, T.Text)] 84 | okIdentifier = map (\x -> (x,x)) ["foo", "x2", "!hot!", "+", "-", "...", 85 | "helloWorld"] 86 | 87 | okString :: [(T.Text, T.Text)] 88 | okString = [("\"abc def ghi\"", "abc def ghi"), 89 | ("\"\"", ""), 90 | ("\"\n\"", "\n"), 91 | ("\" \"", " "), 92 | ("\"\t\"", "\t"), 93 | (T.pack ['"','\\','\\','"'], "\\"), 94 | (T.pack ['"','\\','"','"'], "\"")] 95 | 96 | okNumber :: [(T.Text, SchemeNumber)] 97 | okNumber = [("-1", SchemeNumber Exact $ 98 | CReal (SInteger Minus (UInteger 1))), 99 | ("-0", SchemeNumber Exact $ 100 | CReal (SInteger Minus (UInteger 0 ))), 101 | ("0", SchemeNumber Exact $ 102 | CReal (SInteger Plus (UInteger 0))), 103 | ("1", SchemeNumber Exact $ 104 | CReal (SInteger Plus (UInteger 1))), 105 | 106 | 107 | ("#e1", SchemeNumber Exact $ 108 | CReal (SInteger Plus (UInteger 1))), 109 | 110 | ("#i1", SchemeNumber Inexact $ 111 | CReal (SInteger Plus (UInteger 1))), 112 | 113 | ("#b1", SchemeNumber Exact $ 114 | CReal (SInteger Plus (UInteger 1))), 115 | ("#o1", SchemeNumber Exact $ 116 | CReal (SInteger Plus (UInteger 1))), 117 | ("#d1", SchemeNumber Exact $ 118 | CReal (SInteger Plus (UInteger 1))), 119 | ("#x1", SchemeNumber Exact $ 120 | CReal (SInteger Plus (UInteger 1))), 121 | ("#xa", SchemeNumber Exact $ CReal (SInteger Plus (UInteger 10))), 122 | ("#xb", SchemeNumber Exact $ 123 | CReal (SInteger Plus (UInteger 11))), 124 | ("#xc", SchemeNumber Exact $ 125 | CReal (SInteger Plus (UInteger 12))), 126 | ("#xd", SchemeNumber Exact $ 127 | CReal (SInteger Plus (UInteger 13))), 128 | ("#xe", SchemeNumber Exact $ 129 | CReal (SInteger Plus (UInteger 14))), 130 | ("#xf", SchemeNumber Exact $ 131 | CReal (SInteger Plus (UInteger 15))), 132 | ("-0001", SchemeNumber Exact $ 133 | CReal (SInteger Minus (UInteger 1))), 134 | ("-0000", SchemeNumber Exact $ 135 | CReal (SInteger Minus (UInteger 0))), 136 | ("0000", SchemeNumber Exact $ 137 | CReal (SInteger Plus (UInteger 0))), 138 | ("0001", SchemeNumber Exact $ 139 | CReal (SInteger Plus (UInteger 1))), 140 | 141 | ("-1#", SchemeNumber Inexact $ 142 | CReal (SInteger Minus (UIntPounds 1 1))), 143 | ("-0#", SchemeNumber Inexact $ 144 | CReal (SInteger Minus (UIntPounds 0 1))), 145 | ("0#", SchemeNumber Inexact $ 146 | CReal (SInteger Plus (UIntPounds 0 1))), 147 | ("1#", SchemeNumber Inexact $ 148 | CReal (SInteger Plus (UIntPounds 1 1))), 149 | 150 | ("-1###", SchemeNumber Inexact $ 151 | CReal (SInteger Minus (UIntPounds 1 3))), 152 | ("-0###", SchemeNumber Inexact $ 153 | CReal (SInteger Minus (UIntPounds 0 3))), 154 | ("0###", SchemeNumber Inexact $ 155 | CReal (SInteger Plus (UIntPounds 0 3))), 156 | ("1###", SchemeNumber Inexact $ 157 | CReal (SInteger Plus (UIntPounds 1 3))), 158 | 159 | ("-12345", SchemeNumber Exact $ 160 | CReal (SInteger Minus (UInteger 12345))), 161 | ("12345", SchemeNumber Exact $ 162 | CReal (SInteger Plus (UInteger 12345))), 163 | 164 | ("-12345/5", SchemeNumber Exact $ 165 | CReal (SRational Minus (UInteger 12345) (UInteger 5))), 166 | ("12345/5", SchemeNumber Exact $ 167 | CReal (SRational Plus (UInteger 12345) (UInteger 5))), 168 | ("-12345#/5", SchemeNumber Inexact $ 169 | CReal (SRational Minus (UIntPounds 12345 1) (UInteger 5))), 170 | ("12345/5##", SchemeNumber Inexact $ 171 | CReal (SRational Plus (UInteger 12345) (UIntPounds 5 2))), 172 | ("-12345##/5", SchemeNumber Inexact $ 173 | CReal (SRational Minus (UIntPounds 12345 2) (UInteger 5))), 174 | ("12345####/5#", SchemeNumber Inexact $ 175 | CReal (SRational Plus (UIntPounds 12345 4) (UIntPounds 5 1))), 176 | 177 | 178 | ("-12345.0", SchemeNumber Inexact $ 179 | CReal (SDecimal Minus (UInteger 12345) (UInteger 0) Nothing)), 180 | (".0", SchemeNumber Inexact $ 181 | CReal (SDecimal Plus (UInteger 0) (UInteger 0) Nothing)), 182 | ("0.", SchemeNumber Inexact $ 183 | CReal (SDecimal Plus (UInteger 0) (UInteger 0) Nothing)), 184 | 185 | ("0.###", SchemeNumber Inexact $ 186 | CReal (SDecimal Plus (UInteger 0) (UPounds 3) Nothing)), 187 | ("-.569", SchemeNumber Inexact $ 188 | CReal (SDecimal Minus (UInteger 0) (UInteger 569) Nothing)), 189 | ("-245#.", SchemeNumber Inexact $ 190 | CReal (SDecimal Minus (UIntPounds 245 1) (UPounds 0) Nothing)), 191 | ("#e-.569", SchemeNumber Exact $ 192 | CReal (SDecimal Minus (UInteger 0) (UInteger 569) Nothing)), 193 | ("1e10", SchemeNumber Inexact $ 194 | CReal (SDecimal Plus (UInteger 1) 195 | (UInteger 0) 196 | (Just $ Suffix PDefault Plus 10))), 197 | ("1e-10", SchemeNumber Inexact $ 198 | CReal (SDecimal Plus (UInteger 1) 199 | (UInteger 0) 200 | (Just $ Suffix PDefault Minus 10))), 201 | ("1s10", SchemeNumber Inexact $ 202 | CReal (SDecimal Plus (UInteger 1) 203 | (UInteger 0) 204 | (Just $ Suffix PShort Plus 10))), 205 | ("1f10", SchemeNumber Inexact $ 206 | CReal (SDecimal Plus (UInteger 1) 207 | (UInteger 0) 208 | (Just $ Suffix PSingle Plus 10))), 209 | ("1d10", SchemeNumber Inexact $ 210 | CReal (SDecimal Plus (UInteger 1) 211 | (UInteger 0) 212 | (Just $ Suffix PDouble Plus 10))), 213 | ("1l10", SchemeNumber Inexact $ 214 | CReal (SDecimal Plus (UInteger 1) 215 | (UInteger 0) 216 | (Just $ Suffix PLong Plus 10))), 217 | 218 | ("1+i", SchemeNumber Exact $ 219 | CAbsolute (SInteger Plus (UInteger 1)) (SInteger Plus (UInteger 1))), 220 | 221 | ("1-i", SchemeNumber Exact $ 222 | CAbsolute (SInteger Plus (UInteger 1)) (SInteger Minus (UInteger 1))), 223 | 224 | ("0.5+i", SchemeNumber Inexact $ 225 | CAbsolute (SDecimal Plus (UInteger 0) 226 | (UInteger 5) 227 | Nothing) (SInteger Plus (UInteger 1))), 228 | 229 | ("-8i", SchemeNumber Exact $ 230 | CAbsolute (SInteger Plus (UInteger 0)) (SInteger Minus (UInteger 8))), 231 | 232 | 233 | ("-8.25i", SchemeNumber Inexact $ 234 | CAbsolute (SInteger Plus (UInteger 0)) (SDecimal Minus (UInteger 8) 235 | (UInteger 25) 236 | Nothing)), 237 | ("0@25", SchemeNumber Exact $ 238 | CAngle (SInteger Plus (UInteger 0)) (SInteger Plus (UInteger 25))), 239 | 240 | ("1/4@-25", SchemeNumber Exact $ 241 | CAngle (SRational Plus (UInteger 1) (UInteger 4)) (SInteger Minus (UInteger 25))), 242 | 243 | ("1#/4@-25##", SchemeNumber Inexact $ 244 | CAngle (SRational Plus (UIntPounds 1 1) (UInteger 4)) (SInteger Minus (UIntPounds 25 2)))] 245 | 246 | -- Returns all the "ok..." series of tests except the one provided 247 | -- as input. Since R5RS grammar is non ambiguous, a token parser should not be able 248 | -- to parse the valid input of other token parser. 249 | mkKoTest :: [T.Text] -> [T.Text] 250 | mkKoTest goodStr = 251 | let allTests = map fst okInterTokenSpace ++ 252 | map fst okChar ++ 253 | map fst okBool ++ 254 | map fst okIdentifier ++ 255 | map fst okString ++ 256 | map fst okNumber 257 | in filter (not . (`elem` goodStr)) (alwaysBad ++ allTests) 258 | 259 | -- Always bad string input 260 | alwaysBad :: [T.Text] 261 | alwaysBad = ["#", "#T", "#F", "#true", "#false"] ++ 262 | -- Ill formated number 263 | ["#b3", "#o9", "#da", "#xA", "#b1.1", "#o1.1", "#x1.1", "#b.1", 264 | "#o.1", "#x.1", "123##.12"] 265 | 266 | r5rsTestTree :: TestTree 267 | r5rsTestTree = testGroup "Language/R5RS.hs" $ [ 268 | testGroup "whitespace" $ 269 | map (okTest R5.whitespace) okWhitespace ++ 270 | map (koTest R5.whitespace) (mkKoTest $ map fst okWhitespace), 271 | testGroup "comment" $ 272 | map (okTest R5.comment) okComment ++ 273 | map (koTest R5.comment) (mkKoTest $ map fst okComment), 274 | testGroup "interTokenSpace" $ 275 | map (okTest R5.interTokenSpace) okInterTokenSpace ++ 276 | map (koTest R5.interTokenSpace) (mkKoTest $ map fst okInterTokenSpace), 277 | testGroup "interTokenSpace1" $ 278 | map (okTest R5.interTokenSpace1) okInterTokenSpace1 ++ 279 | map (koTest R5.interTokenSpace1) (mkKoTest $ map fst okInterTokenSpace1), 280 | testGroup "character" $ 281 | map (okTest R5.character) okChar ++ 282 | map (koTest R5.character) (mkKoTest $ map fst okChar), 283 | testGroup "boolean" $ 284 | map (okTest R5.boolean) okBool ++ 285 | map (koTest R5.boolean) (mkKoTest $ map fst okBool), 286 | testGroup "identifier" $ 287 | map (okTest R5.identifier) okIdentifier ++ 288 | map (koTest R5.identifier) (mkKoTest $ map fst okIdentifier), 289 | testGroup "string" $ 290 | map (okTest R5.stringParser) okString ++ 291 | map (koTest R5.stringParser) (mkKoTest $ map fst okString), 292 | testGroup "number" $ 293 | map (okTest R5.number) okNumber ++ 294 | map (koTest R5.number) (mkKoTest $ map fst okNumber), 295 | 296 | testGroup "datum" $ 297 | map okDatum (map (fmap ( (:[]) . DChar)) okChar) ++ 298 | map okDatum (map (fmap ( (:[]) . DBoolean)) okBool) ++ 299 | map okDatum (map (fmap ( (:[]) . DIdentifier)) okIdentifier) ++ 300 | map okDatum (map (fmap ( (:[]) . DString)) okString) ++ 301 | map okDatum (map (fmap ( (:[]) . DNumber)) okNumber) ++ 302 | map okDatum 303 | [("(foo #\\a)", [DList [DIdentifier "foo", DChar 'a']]), 304 | ("(foo #\\a) \"hello\"", [DList [DIdentifier "foo", DChar 'a'], DString "hello"]), 305 | ("'foo", [DQuote (DIdentifier "foo")]), 306 | ("`foo", [DQuasiquote (DIdentifier "foo")]), 307 | ("`(foo ,a)", [DQuasiquote (DList [DIdentifier "foo", DComma (DIdentifier "a")])]), 308 | ("`(foo , a)", [DQuasiquote (DList [DIdentifier "foo", DComma (DIdentifier "a")])]), 309 | ("`(foo, a)", [DQuasiquote (DList [DIdentifier "foo", DComma (DIdentifier "a")])]), 310 | ("`(foo ,@a)", [DQuasiquote (DList [DIdentifier "foo", DCommaAt (DIdentifier "a")])]), 311 | ("`(foo ,@ a)", [DQuasiquote (DList [DIdentifier "foo", DCommaAt (DIdentifier "a")])]), 312 | ("`(foo,@ a)", [DQuasiquote (DList [DIdentifier "foo", DCommaAt (DIdentifier "a")])]), 313 | ("(foo . a)", [DDotList [DIdentifier "foo"] (DIdentifier "a")]), 314 | ("(foo a b c . d)", 315 | [DDotList [DIdentifier "foo", DIdentifier "a", DIdentifier "b", DIdentifier "c"] (DIdentifier "d")])] 316 | ++ 317 | map koDatum ["(foo .)", "(foo ')", "(foo `)", "(foo ,)", "(foo ,@)", "(foo a b . c d)"] 318 | ] 319 | -------------------------------------------------------------------------------- /src/Data/SExpresso/Language/SchemeR5RS.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : Data.SExpresso.Language.SchemeR5RS 3 | -- Copyright : © 2019 Vincent Archambault 4 | -- License : 0BSD 5 | -- 6 | -- Maintainer : Vincent Archambault 7 | -- Stability : experimental 8 | -- 9 | -- Module for parsing the Scheme R5RS language. 10 | -- 11 | -- Scheme R5RS s-expressions are parsed as @'SExpr' 'SExprType' 12 | -- 'SchemeToken'@. Such s-expressions can be converted into a Scheme 13 | -- R5RS datum (see 'Datum') by the function 'sexpr2Datum'. 14 | 15 | 16 | {-# LANGUAGE DeriveDataTypeable #-} 17 | {-# LANGUAGE OverloadedStrings #-} 18 | {-# LANGUAGE TypeFamilies #-} 19 | {-# LANGUAGE ScopedTypeVariables #-} 20 | 21 | -- Parsing library for some parts of the Scheme R5RS language 22 | -- as defined in section 7 of the report 23 | -- The library does parse tab and \r\n and whitespace 24 | module Data.SExpresso.Language.SchemeR5RS ( 25 | -- * SchemeToken and Datum related data types and functions 26 | SExprType(..), 27 | SchemeToken(..), 28 | tokenParser, 29 | sexpr, 30 | 31 | Datum(..), 32 | sexpr2Datum, 33 | 34 | -- * Scheme R5RS whitespace parsers 35 | whitespace, 36 | comment, 37 | interTokenSpace, 38 | interTokenSpace1, 39 | 40 | -- * Individual parser for each of the constructors of SchemeToken 41 | identifier, 42 | boolean, 43 | character, 44 | stringParser, 45 | quote, 46 | quasiquote, 47 | comma, 48 | commaAt, 49 | dot, 50 | 51 | -- ** Scheme Number 52 | -- 53 | -- | Scheme R5RS numbers are quite exotic. They can have exactness 54 | -- prefix, radix prefix and the pound sign (#) can replace a 55 | -- digit. On top of that, you can define integer, rational, decimal 56 | -- and complex numbers of arbitrary precision. Decimal numbers can 57 | -- also have a suffix indicating the machine precision. 58 | -- 59 | -- Since Haskell does not have native types to express this 60 | -- complexity, this module defines the 'SchemeNumber' data type to 61 | -- encode the parsed number. User of this module can then convert a 62 | -- 'SchemeNumber' object to a more appropriate data type according 63 | -- to their needs. 64 | SchemeNumber(..), 65 | Exactness(..), 66 | Complex(..), 67 | SReal(..), 68 | Sign(..), 69 | UInteger(..), 70 | Pounds, 71 | Precision(..), 72 | Suffix(..), 73 | number, 74 | 75 | 76 | ) where 77 | 78 | import Control.Monad (mzero) 79 | import Data.Data 80 | import Data.Maybe 81 | import Data.Proxy 82 | import Data.List 83 | import qualified Data.Char as C 84 | import qualified Data.Text as T 85 | import qualified Data.Text.Lazy as L 86 | import qualified Data.Text.Lazy.Builder as B 87 | import Data.Foldable 88 | import Text.Megaparsec 89 | import Text.Megaparsec.Char 90 | import qualified Text.Megaparsec.Char.Lexer as ML 91 | import Data.SExpresso.SExpr 92 | import Data.SExpresso.Parse 93 | 94 | -- | The 'SchemeToken' data type defines the atoms of an Scheme R5RS 95 | -- s-expression. An @'SExpr' 'SExprType' 'SchemeToken'@ object 96 | -- containning the atoms 'TQuote', 'TQuasiquote', 'TComma', 'TCommaAt' 97 | -- and 'TDot' need futher processing in order to get what the R5RS 98 | -- report calls a datum. See also 'Datum'. 99 | data SchemeToken = 100 | -- | A boolean. 101 | TBoolean Bool 102 | -- | A number. See 'SchemeNumber'. 103 | | TNumber SchemeNumber 104 | -- | A unicode character. 105 | | TChar Char 106 | -- | A string. 107 | | TString T.Text 108 | -- | A valid R5RS identifier. 109 | | TIdentifier T.Text 110 | -- | The quote (') symbol. 111 | | TQuote 112 | -- | The quasiquote (`) symbol. 113 | | TQuasiquote 114 | -- | The comma (,) symbol. 115 | | TComma 116 | -- | The comma at (,\@) symbol. 117 | | TCommaAt 118 | -- | The dot (.) symbol. 119 | | TDot 120 | deriving (Eq, Show, Data) 121 | 122 | -- | The 'tokenParser' parses a 'SchemeToken' 123 | tokenParser :: (MonadParsec e s m, Token s ~ Char) => m SchemeToken 124 | tokenParser = (boolean >>= return . TBoolean) <|> 125 | -- character must come before number 126 | (character >>= return . TChar) <|> 127 | (stringParser >>= return . TString) <|> 128 | -- We must try number because it can conflict with 129 | -- the dot ex : .2 and (a . b) 130 | -- and identifier ex : - and -1 131 | (try number >>= return . TNumber) <|> 132 | (identifier >>= return . TIdentifier) <|> 133 | (quote >> return TQuote) <|> 134 | (quasiquote >> return TQuasiquote) <|> 135 | -- commaAt must come before comma 136 | (commaAt >> return TCommaAt) <|> 137 | (comma >> return TComma) <|> 138 | (dot >> return TDot) 139 | 140 | 141 | spacingRule :: SchemeToken -> SpacingRule 142 | spacingRule (TString _) = SOptional 143 | spacingRule TQuote = SOptional 144 | spacingRule TQuasiquote = SOptional 145 | spacingRule TComma = SOptional 146 | spacingRule TCommaAt = SOptional 147 | spacingRule _ = SMandatory 148 | 149 | -- | Scheme R5RS defines two types of s-expressions. Standard list 150 | -- beginning with '(' and vector beginning with '#('. The 'SExprType' 151 | -- data type indicates which one was parsed. 152 | data SExprType = 153 | -- | A standard list 154 | STList 155 | -- | A vector 156 | | STVector 157 | deriving (Eq, Show, Data) 158 | 159 | -- | The 'sexpr' defines a 'SExprParser' to parse a Scheme R5RS 160 | -- s-expression as an @'SExpr' 'SExprType' 'SchemeToken'@. If you also 161 | -- want source position see the 'withLocation' function. 162 | -- 163 | -- Space is optional before and after the following tokens: 164 | -- 165 | -- * 'TString' 166 | -- * 'TQuote' 167 | -- * 'TQuasiquote' 168 | -- * 'TComma' 169 | -- * 'TCommaAt' 170 | sexpr :: forall e s m . (MonadParsec e s m, Token s ~ Char) => SExprParser m SExprType SchemeToken 171 | sexpr = 172 | let sTag = (single '(' >> return STList) <|> (chunk (tokensToChunk (Proxy :: Proxy s) "#(") >> return STVector) 173 | eTag = \t -> single ')' >> return t 174 | in SExprParser sTag eTag tokenParser interTokenSpace1 (mkSpacingRule spacingRule) 175 | 176 | -- | The 'Datum' data type implements the Scheme R5RS definition of a Datum. See also 'sexpr2Datum'. 177 | data Datum = DBoolean Bool 178 | | DNumber SchemeNumber 179 | | DChar Char 180 | | DString T.Text 181 | | DIdentifier T.Text 182 | | DList [Datum] 183 | | DDotList [Datum] Datum 184 | | DQuote Datum 185 | | DQuasiquote Datum 186 | | DComma Datum 187 | | DCommaAt Datum 188 | | DVector [Datum] 189 | deriving (Eq, Show, Data) 190 | 191 | -- | The 'sexpr2Datum' function takes a list of 'SchemeToken' and 192 | -- returns a list of 'Datum'. In case of failure it will report an 193 | -- error, hence the 'Either' data type in the signature. 194 | -- 195 | -- As defined in the Scheme R5RS report, the 'TQuote', 'TQuasiquote', 196 | -- 'TComma', 'TCommaAt' and 'TDot' tokens must be followed by another 197 | -- token. 198 | sexpr2Datum :: [SExpr SExprType SchemeToken] -> Either String [Datum] 199 | sexpr2Datum = foldrM vectorFold [] 200 | where vectorFold :: SExpr SExprType SchemeToken -> [Datum] -> Either String [Datum] 201 | vectorFold (SAtom TQuote) [] = Left $ "Expecting a datum after a quote" 202 | vectorFold (SAtom TQuote) (x : xs) = pure $ DQuote x : xs 203 | vectorFold (SAtom TQuasiquote) [] = Left $ "Expecting a datum after a quasiquote" 204 | vectorFold (SAtom TQuasiquote) (x : xs) = pure $ DQuasiquote x : xs 205 | vectorFold (SAtom TComma) [] = Left $ "Expecting a datum after a comma" 206 | vectorFold (SAtom TComma) (x : xs) = pure $ DComma x : xs 207 | vectorFold (SAtom TCommaAt) [] = Left $ "Expecting a datum after a commaAt" 208 | vectorFold (SAtom TCommaAt) (x : xs) = pure $ DCommaAt x : xs 209 | vectorFold (SAtom TDot) _ = Left "Unexpected dot" 210 | vectorFold (SList STVector xs) acc = ((:) . DVector) <$> sexpr2Datum xs <*> pure acc 211 | vectorFold (SList STList xs) acc = 212 | let chooseConstructor (isDotList, ls) = (:) (if isDotList 213 | then DDotList (init ls) (last ls) 214 | else DList ls) 215 | in chooseConstructor <$> (foldrM listFold (False, []) xs) <*> pure acc 216 | vectorFold (SAtom x) acc = pure $ simpleToken x : acc 217 | 218 | simpleToken :: SchemeToken -> Datum 219 | simpleToken (TBoolean x) = DBoolean x 220 | simpleToken (TNumber x) = DNumber x 221 | simpleToken (TChar x) = DChar x 222 | simpleToken (TString x) = DString x 223 | simpleToken (TIdentifier x) = DIdentifier x 224 | simpleToken _ = error "simpleToken only handles a subset of SchemeToken constructors" 225 | 226 | listFold :: SExpr SExprType SchemeToken -> (Bool, [Datum]) -> Either String (Bool, [Datum]) 227 | listFold (SAtom TDot) (_, [x]) = pure (True, [x]) 228 | listFold x (d, acc) = (,) d <$> vectorFold x acc 229 | 230 | ------------------------- Whitespace and comments ------------------------- 231 | -- | The 'whitespace' parser parses one space, tab or end of line (\\n and \\r\\n). 232 | whitespace :: (MonadParsec e s m, Token s ~ Char) => m () 233 | whitespace = (char ' ' >> return ()) <|> 234 | (char '\t' >> return ()) <|> 235 | (eol >> return ()) 236 | 237 | -- | The 'comment' parser parses a semi-colon (;) character and 238 | -- everything until the end of line included. 239 | comment :: (MonadParsec e s m, Token s ~ Char) => m () 240 | comment = char ';' >> 241 | takeWhileP Nothing (\c -> c /= '\n' && c /= '\r') >> 242 | ((eol >> return ()) <|> eof) 243 | 244 | atmosphere :: (MonadParsec e s m, Token s ~ Char) => m () 245 | atmosphere = whitespace <|> comment 246 | 247 | -- | The 'interTokenSpace' parser parses zero or more whitespace or comment. 248 | interTokenSpace :: (MonadParsec e s m, Token s ~ Char) => m () 249 | interTokenSpace = many atmosphere >> return () 250 | 251 | -- | The 'interTokenSpace1' parser parses one or more whitespace or comment. 252 | interTokenSpace1 :: (MonadParsec e s m, Token s ~ Char) => m () 253 | interTokenSpace1 = some atmosphere >> return () 254 | 255 | ------------------------- Identifier ------------------------- 256 | 257 | -- | The 'identifier' parser parses a Scheme R5RS identifier. 258 | identifier :: forall e s m . (MonadParsec e s m, Token s ~ Char) => m T.Text 259 | identifier = standardIdentifier <|> peculiarIdentifier 260 | where standardIdentifier = do 261 | i <- oneOf initialList 262 | is <- takeWhileP Nothing (\c -> c `elem` subsequentList) 263 | return $ T.pack $ (i : chunkToTokens (Proxy :: Proxy s) is) 264 | 265 | initialList :: String 266 | initialList = ['a'..'z'] ++ ['A'..'Z'] ++ "!$%&*/:<=>?^_~" 267 | 268 | subsequentList :: String 269 | subsequentList = initialList ++ ['0'..'9'] ++ "+-.@" 270 | 271 | peculiarIdentifier :: forall e s m . (MonadParsec e s m, Token s ~ Char) => m T.Text 272 | peculiarIdentifier = (single '+' >> return "+") <|> 273 | (single '-' >> return "-") <|> 274 | (chunk (tokensToChunk (Proxy :: Proxy s) "...") >> return "...") 275 | 276 | ------------------------- Booleans ------------------------- 277 | -- | The 'boolean' parser parses a Scheme R5RS boolean (\#t or \#f). 278 | boolean :: forall e s m . (MonadParsec e s m, Token s ~ Char) => m Bool 279 | boolean = (chunk (tokensToChunk (Proxy :: Proxy s) "#t") >> return True) <|> 280 | (chunk (tokensToChunk (Proxy :: Proxy s) "#f") >> return False) 281 | 282 | 283 | ------------------------- Character ------------------------- 284 | -- | The 'character' parser parses a Scheme R5RS character. 285 | character :: forall e s m . (MonadParsec e s m, Token s ~ Char) => m Char 286 | character = do 287 | _ <- chunk (tokensToChunk (Proxy :: Proxy s) "#\\") 288 | (chunk (tokensToChunk (Proxy :: Proxy s) "newline") >> return '\n') <|> 289 | (chunk (tokensToChunk (Proxy :: Proxy s) "space") >> return ' ') <|> 290 | anySingle 291 | 292 | ------------------------- String ------------------------- 293 | -- | The 'stringParser' parser parses a Scheme R5RS character. 294 | stringParser :: forall e s m . (MonadParsec e s m, Token s ~ Char) => m T.Text 295 | stringParser = do 296 | _ <- char '"' 297 | xs <- consume 298 | return $ L.toStrict $ B.toLazyText xs 299 | 300 | where consume :: (MonadParsec e s m, Token s ~ Char) => m B.Builder 301 | consume = do 302 | x <- takeWhileP Nothing (\c -> c /= '\\' && c /= '"') 303 | c <- char '\\' <|> char '"' 304 | let xB = B.fromString $ chunkToTokens (Proxy :: Proxy s) x 305 | case c of 306 | '"' -> return $ xB 307 | _ -> do 308 | c1 <- char '\\' <|> char '"' 309 | x2 <- consume 310 | return $ xB <> B.fromString [c1] <> x2 311 | 312 | 313 | ------------------------- Numbers ------------------------- 314 | data Radix = R2 | R8 | R10 | R16 315 | deriving (Eq, Show, Data) 316 | 317 | -- | A Scheme R5RS number is either exact or inexact. The paragraph 318 | -- 6.4.2 from the R5RS report should clarify the meaning of exact and 319 | -- inexact : 320 | -- 321 | -- \"\"\"A numerical constant may be specified to be either 322 | -- exact or inexact by a prefix. The prefixes are \#e for exact, and \#i 323 | -- for inexact. An exactness prefix may appear before or after any 324 | -- radix prefix that is used. If the written representation of a 325 | -- number has no exactness prefix, the constant may be either inexact 326 | -- or exact. It is inexact if it contains a decimal point, an 327 | -- exponent, or a \“#\” character in the place of a digit, otherwise it 328 | -- is exact.\"\"\" 329 | data Exactness = Exact | Inexact 330 | deriving (Eq, Show, Data) 331 | 332 | -- | The 'Sign' datatype indicates if a number is positive ('Plus') or negative ('Minus') 333 | data Sign = Plus | Minus 334 | deriving (Eq, Show, Data) 335 | 336 | -- | A Scheme R5RS number can have many # signs at the end. This type alias 337 | -- indicates the number of # signs parsed. 338 | type Pounds = Integer 339 | 340 | -- | A Scheme R5RS unsigned integer can be written in three ways. 341 | -- 342 | -- * With digits only 343 | -- * With digits and # signs 344 | -- * With only # signs in some special context. 345 | data UInteger = 346 | -- | Integer made only of digits 347 | UInteger Integer 348 | -- | Integer made of digits and #. The first argument is the number 349 | -- that was parsed and the second the number of # signs. For 350 | -- example, 123## is represented as @UIntPounds 123 2@. Do not take 351 | -- the first argument as a good approximation of the number. It 352 | -- needs to be shifted by the number of pounds. 353 | | UIntPounds Integer Pounds 354 | -- | Integer made only of #. It can only appear as the third argument in numbers of the form @'SDecimal' _ _ _ _@. 355 | | UPounds Pounds 356 | deriving (Eq, Show, Data) 357 | 358 | hasPounds :: UInteger -> Bool 359 | hasPounds (UInteger _) = False 360 | hasPounds _ = True 361 | 362 | isInexactI :: UInteger -> Bool 363 | isInexactI = hasPounds 364 | 365 | -- | Scheme R5RS defines 5 types of machine precision for a decimal 366 | -- number. The machine precision is specified in the suffix (see 367 | -- 'Suffix'). 368 | data Precision = 369 | -- | Suffix starting with e. 370 | PDefault | 371 | -- | Suffix starting with s. 372 | PShort | 373 | -- | Suffix starting with f. 374 | PSingle | 375 | -- | Suffix starting with d. 376 | PDouble | 377 | -- | Suffix starting with l. 378 | PLong 379 | deriving (Eq, Show, Data) 380 | 381 | -- | The 'Suffix' data type represents the suffix for a Scheme R5RS 382 | -- decimal number. It is a based 10 exponent. 383 | data Suffix = Suffix Precision Sign Integer 384 | deriving (Eq, Show, Data) 385 | 386 | -- | The 'SReal' data type represents a Scheme R5RS real number. 387 | data SReal = 388 | -- | A signed integer. 389 | SInteger Sign UInteger 390 | -- | A signed rational. The first number is the numerator and the 391 | -- second one the denominator. 392 | | SRational Sign UInteger UInteger 393 | -- | A signed decimal number. The first number appears before the 394 | -- dot, the second one after the dot. 395 | | SDecimal Sign UInteger UInteger (Maybe Suffix) 396 | deriving (Eq, Show, Data) 397 | 398 | isInexactR :: SReal -> Bool 399 | isInexactR (SInteger _ i) = isInexactI i 400 | isInexactR (SRational _ i1 i2) = isInexactI i1 || isInexactI i2 401 | isInexactR (SDecimal _ _ _ _) = True 402 | 403 | -- | The 'Complex' data type represents a Scheme R5RS complex number. 404 | data Complex = 405 | -- | A real number. 406 | CReal SReal 407 | -- | A complex number in angular notation. 408 | | CAngle SReal SReal 409 | -- | A complex number in absolute notation. 410 | | CAbsolute SReal SReal 411 | deriving (Eq, Show, Data) 412 | 413 | isInexact :: Complex -> Bool 414 | isInexact (CReal s) = isInexactR s 415 | isInexact (CAngle s1 s2) = isInexactR s1 || isInexactR s2 416 | isInexact (CAbsolute s1 s2) = isInexactR s1 || isInexactR s2 417 | 418 | -- | A Scheme R5RS number is an exact or inexact complex number. 419 | data SchemeNumber = SchemeNumber Exactness Complex 420 | deriving (Eq, Show, Data) 421 | 422 | -- | The 'number' parser parses a Scheme R5RS number. 423 | number :: (MonadParsec e s m, Token s ~ Char) => m SchemeNumber 424 | number = do 425 | (r, e) <- prefix 426 | c <- complex (fromMaybe R10 r) 427 | let e' = fromMaybe (if isInexact c then Inexact else Exact) e 428 | return $ SchemeNumber e' c 429 | 430 | complex :: forall e s m . (MonadParsec e s m, Token s ~ Char) => Radix -> m Complex 431 | complex r = do 432 | ms <- optional sign 433 | case ms of 434 | Nothing -> complex' Plus 435 | Just s -> i s <|> complex' s 436 | 437 | where 438 | -- Parser for +i and -i 439 | i s = char 'i' >> (return $ CAbsolute (SInteger Plus (UInteger 0)) (SInteger s (UInteger 1))) 440 | 441 | -- Parser for complex except +i and -i 442 | complex' sr = do 443 | -- First parse a number 444 | n1 <- ureal r sr 445 | -- Check if the number is followed by any of these characters 446 | c <- optional (char '@' <|> char '+' <|> char '-' <|> char 'i') 447 | case c of 448 | -- Plain real number 449 | Nothing -> return $ CReal n1 450 | -- Complex angular number 451 | Just '@' -> do 452 | n2 <- real r 453 | return $ CAngle n1 n2 454 | -- Pure imaginary number 455 | Just 'i' -> return $ CAbsolute (SInteger Plus (UInteger 0)) n1 456 | -- Real +/- Imaginary number 457 | Just '+' -> imaginaryPart n1 Plus 458 | Just _ -> imaginaryPart n1 Minus 459 | 460 | imaginaryPart realN si = do 461 | u <- optional (ureal r si) 462 | _ <- char 'i' 463 | case u of 464 | Nothing -> return $ CAbsolute realN (SInteger si (UInteger 1)) 465 | Just n2 -> return $ CAbsolute realN n2 466 | 467 | real :: (MonadParsec e s m, Token s ~ Char) => Radix -> m SReal 468 | real r = do 469 | s <- option Plus sign 470 | ureal r s 471 | 472 | ureal :: forall e s m . (MonadParsec e s m, Token s ~ Char) => Radix -> Sign -> m SReal 473 | ureal r s = dotN <|> ureal' 474 | 475 | where dotN = do 476 | _ <- char '.' 477 | if r /= R10 478 | then label "Numbers containing decimal point must be in decimal radix" mzero 479 | else do 480 | n <- uinteger R10 481 | sf <- optional suffix 482 | return $ SDecimal s (UInteger 0) n sf 483 | 484 | ureal' = do 485 | -- First parse an integer 486 | u1 <- uinteger r 487 | -- Check if the integer is followed by these characters 488 | mc <- optional (char '/' <|> char '.') 489 | case mc of 490 | -- Integer with or without suffix 491 | Nothing -> plainInteger u1 492 | -- Rational 493 | Just '/' -> rational u1 494 | -- Decimal 495 | Just _ -> decimal u1 496 | 497 | plainInteger u1 = do 498 | sf <- optional suffix 499 | case sf of 500 | Just _ -> return $ SDecimal s u1 (UInteger 0) sf 501 | Nothing -> return $ SInteger s u1 502 | 503 | rational u1 = do 504 | u2 <- uinteger r 505 | return $ SRational s u1 u2 506 | 507 | decimal u1 = do 508 | if r /= R10 509 | then label "Numbers containing decimal point must be in decimal radix" mzero 510 | else do 511 | -- If u1 has # character, only other # are 512 | -- allowed. Otherwise a number may be present 513 | n <- if hasPounds u1 then return Nothing else optional (udigit R10) :: m (Maybe Integer) 514 | pounds <- takeWhileP Nothing (== '#') 515 | sf <- optional suffix 516 | let nbPounds = toInteger $ chunkLength (Proxy :: Proxy s) pounds 517 | let u2 = case (hasPounds u1, nbPounds, n) of 518 | (True, p, _) -> UPounds p 519 | (False, 0, Nothing) -> UInteger 0 520 | (False, 0, (Just x)) -> UInteger x 521 | (False, p, Nothing) -> UPounds p 522 | (False, p, (Just x)) -> UIntPounds x p 523 | return $ SDecimal s u1 u2 sf 524 | 525 | uinteger :: forall e s m . (MonadParsec e s m, Token s ~ Char) => Radix -> m UInteger 526 | uinteger r = do 527 | n <- udigit r 528 | pounds <- takeWhileP Nothing (== '#') 529 | let nbPounds = toInteger $ chunkLength (Proxy :: Proxy s) pounds 530 | if nbPounds <= 0 531 | then return $ UInteger n 532 | else return $ UIntPounds n nbPounds 533 | 534 | 535 | prefix :: (MonadParsec e s m, Token s ~ Char) => m (Maybe Radix, Maybe Exactness) 536 | prefix = do 537 | x <- optional $ char '#' 538 | case x of 539 | Nothing -> return (Nothing, Nothing) 540 | _ -> do 541 | c <- char 'i' <|> char 'e' <|> char 'b' <|> 542 | char 'o' <|> char 'd' <|> char 'x' 543 | case c of 544 | 'i' -> optional radix >>= \r -> return (r, Just Inexact) 545 | 'e' -> optional radix >>= \r -> return (r, Just Exact) 546 | 'b' -> optional exactness >>= \e -> return (Just R2, e) 547 | 'o' -> optional exactness >>= \e -> return (Just R8, e) 548 | 'd' -> optional exactness >>= \e -> return (Just R10, e) 549 | _ -> optional exactness >>= \e -> return (Just R16, e) 550 | 551 | exactness :: forall e s m . (MonadParsec e s m, Token s ~ Char) => m Exactness 552 | exactness = (chunk (tokensToChunk (Proxy :: Proxy s) "#e") >> return Exact) <|> 553 | (chunk (tokensToChunk (Proxy :: Proxy s) "#i") >> return Inexact) 554 | 555 | radix :: forall e s m . (MonadParsec e s m, Token s ~ Char) => m Radix 556 | radix = 557 | (chunk (tokensToChunk (Proxy :: Proxy s) "#b") >> return R2) <|> 558 | (chunk (tokensToChunk (Proxy :: Proxy s) "#o") >> return R8) <|> 559 | (chunk (tokensToChunk (Proxy :: Proxy s) "#d") >> return R10) <|> 560 | (chunk (tokensToChunk (Proxy :: Proxy s) "#x") >> return R16) 561 | 562 | udigit :: forall e s m a . (MonadParsec e s m, Token s ~ Char, Integral a) => Radix -> m a 563 | udigit r = do 564 | case r of 565 | R2 -> ML.binary 566 | R8 -> ML.octal 567 | R10 -> ML.decimal 568 | R16 -> hexadecimal -- ML.hexadecimal also parses uppercase "ABCDEF" 569 | where hexadecimal = mkNum 570 | <$> takeWhile1P Nothing (\c -> c `elem` ("0123456789abcdef" :: String)) 571 | "hexadecimal integer" 572 | 573 | mkNum = foldl' step 0 . chunkToTokens (Proxy :: Proxy s) 574 | step a c = a * 16 + fromIntegral (C.digitToInt c) 575 | 576 | sign :: (MonadParsec e s m, Token s ~ Char) => m Sign 577 | sign = (char '-' >> return Minus) <|> (char '+' >> return Plus) 578 | 579 | suffix :: (MonadParsec e s m, Token s ~ Char) => m Suffix 580 | suffix = do 581 | p <- (char 'e' >> return PDefault) <|> 582 | (char 's' >> return PShort) <|> 583 | (char 'f' >> return PSingle) <|> 584 | (char 'd' >> return PDouble) <|> 585 | (char 'l' >> return PLong) 586 | s <- option Plus sign 587 | n <- udigit R10 588 | return $ Suffix p s n 589 | 590 | ------------------------- Other tokens ------------------------- 591 | -- | The 'quote' parser parses a quote character ('). 592 | quote :: (MonadParsec e s m, Token s ~ Char) => m Char 593 | quote = char '\'' 594 | 595 | -- | The 'quasiquote' parser parses a quasiquote character (`). 596 | quasiquote :: (MonadParsec e s m, Token s ~ Char) => m Char 597 | quasiquote = char '`' 598 | 599 | -- | The 'comma' parser parses a comma (,). 600 | comma :: (MonadParsec e s m, Token s ~ Char) => m Char 601 | comma = char ',' 602 | 603 | -- | The 'commaAt' parser parses a comma followed by \@ (,\@). 604 | commaAt :: forall e s m . (MonadParsec e s m, Token s ~ Char) => m T.Text 605 | commaAt = chunk (tokensToChunk (Proxy :: Proxy s) ",@") >> return ",@" 606 | 607 | -- | The 'dot' parser parses a single dot character (.). 608 | dot :: (MonadParsec e s m, Token s ~ Char) => m Char 609 | dot = char '.' 610 | --------------------------------------------------------------------------------