├── .github └── workflows │ └── main.yml ├── LICENSE ├── Makefile ├── README.md ├── lib └── github.com │ └── diku-dk │ └── sml-parse │ ├── .gitignore │ ├── CHAR_TOKEN.sig │ ├── CharToken.sml │ ├── Makefile │ ├── PARSE.sig │ ├── Parse.sml │ ├── REGION.sig │ ├── Region.sml │ ├── SCAN_UTIL.sig │ ├── SIMPLE_TOKEN.sig │ ├── ScanUtil.sml │ ├── SimpleToken.sml │ ├── char_token.mlb │ ├── parse.mlb │ ├── scan-util.mlb │ ├── simple_token.mlb │ └── test │ ├── .gitignore │ ├── Makefile │ ├── test1.mlb │ ├── test1.out.ok │ ├── test1.sml │ ├── test2.mlb │ ├── test2.out.ok │ ├── test2.sml │ ├── test3.mlb │ ├── test3.out.ok │ └── test3.sml └── sml.pkg /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | 5 | push: 6 | branches: [ main ] 7 | 8 | pull_request: 9 | branches: [ main ] 10 | 11 | # Allows you to run this workflow manually from the Actions tab 12 | workflow_dispatch: 13 | 14 | jobs: 15 | 16 | build-test: 17 | 18 | strategy: 19 | matrix: 20 | os: [ubuntu-20.04, macos-12] 21 | mlcomp: [mlkit, mlton] 22 | 23 | runs-on: ${{ matrix.os }} 24 | 25 | steps: 26 | 27 | - uses: actions/checkout@v2 28 | 29 | - name: Setup environment 30 | run: | 31 | echo "OS=$(uname -s | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV 32 | echo "RUNHOME=$(echo $HOME)" >> $GITHUB_ENV 33 | 34 | - name: Install MLKit and smlpkg 35 | working-directory: ${{ env.RUNHOME }} 36 | run: | 37 | echo "[OS: $OS, HOME: $RUNHOME]" 38 | wget https://github.com/diku-dk/smlpkg/releases/download/v0.1.4/smlpkg-bin-dist-${{env.OS}}.tgz 39 | tar xzf smlpkg-bin-dist-${{env.OS}}.tgz 40 | echo "$HOME/smlpkg-bin-dist-${{env.OS}}/bin" >> $GITHUB_PATH 41 | wget https://github.com/melsman/mlkit/releases/download/v4.5.4/mlkit-bin-dist-${{env.OS}}.tgz 42 | tar xzf mlkit-bin-dist-${{env.OS}}.tgz 43 | echo "$HOME/mlkit-bin-dist-${{env.OS}}/bin" >> $GITHUB_PATH 44 | mkdir -p .mlkit 45 | echo "SML_LIB $HOME/mlkit-bin-dist-${{env.OS}}/lib/mlkit" > .mlkit/mlb-path-map 46 | 47 | - name: Check 48 | run: | 49 | mlkit --version 50 | smlpkg --version 51 | 52 | - name: Install MLton (linux) 53 | if: ${{ env.OS == 'linux' && matrix.mlcomp == 'mlton' }} 54 | run: | 55 | sudo apt-get install -y mlton 56 | mlton 57 | 58 | - name: Install MLton (macos) 59 | if: ${{ env.OS == 'darwin' && matrix.mlcomp == 'mlton' }} 60 | run: | 61 | brew install mlton 62 | mlton 63 | 64 | - name: Build 65 | run: MLCOMP=${{ matrix.mlcomp }} make clean all 66 | 67 | - name: Run tests 68 | run: MLCOMP=${{ matrix.mlcomp }} make test 69 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 DIKU 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all 2 | all: 3 | $(MAKE) -C lib/github.com/diku-dk/sml-parse all 4 | 5 | .PHONY: test 6 | test: 7 | $(MAKE) -C lib/github.com/diku-dk/sml-parse test 8 | 9 | .PHONY: clean 10 | clean: 11 | $(MAKE) -C lib/github.com/diku-dk/sml-parse clean 12 | rm -rf MLB *~ .*~ 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sml-parse [![CI](https://github.com/diku-dk/sml-parse/workflows/CI/badge.svg)](https://github.com/diku-dk/sml-parse/actions) 2 | 3 | Standard ML Parser Combinator Library and Scan Utilities. 4 | 5 | ## Overview of MLB files 6 | 7 | - `lib/github.com/diku-dk/sml-parse/scan-util.mlb`: 8 | 9 | - **signature** [`SCAN_UTIL`](lib/github.com/diku-dk/sml-parse/SCAN_UTIL.sig) 10 | - **structure** `ScanUtil` :> `SCAN_UTIL` 11 | 12 | - `lib/github.com/diku-dk/sml-parse/parse.mlb`: 13 | 14 | - **signature** [`REGION`](lib/github.com/diku-dk/sml-parse/REGION.sig) 15 | - **signature** [`PARSE`](lib/github.com/diku-dk/sml-parse/PARSE.sig) 16 | - **structure** `Region` :> `REGION` 17 | - **structure** `Parse` :> `PARSE` 18 | 19 | - `lib/github.com/diku-dk/sml-parse/char_token.mlb`: 20 | 21 | - **signature** [`CHAR_TOKEN`](lib/github.com/diku-dk/sml-parse/CHAR_TOKEN.sig) 22 | - **structure** `CharToken` :> `CHAR_TOKEN` 23 | 24 | - `lib/github.com/diku-dk/sml-parse/simple_token.mlb`: 25 | 26 | - **signature** [`SIMPLE_TOKEN`](lib/github.com/diku-dk/sml-parse/SIMPLE_TOKEN.sig) 27 | - **structure** `SimpleToken` :> `SIMPLE_TOKEN` 28 | 29 | ## Use of the package 30 | 31 | This library is set up to work well with the SML package manager 32 | [smlpkg](https://github.com/diku-dk/smlpkg). To use the package, in 33 | the root of your project directory, execute the command: 34 | 35 | ``` 36 | $ smlpkg add github.com/diku-dk/sml-parse 37 | ``` 38 | 39 | This command will add a _requirement_ (a line) to the `sml.pkg` file in your 40 | project directory (and create the file, if there is no file `sml.pkg` 41 | already). 42 | 43 | To download the library into the directory 44 | `lib/github.com/diku-dk/sml-parse` (along with other necessary 45 | libraries), execute the command: 46 | 47 | ``` 48 | $ smlpkg sync 49 | ``` 50 | 51 | You can now reference the `mlb`-file using relative paths from within 52 | your project's `mlb`-files. 53 | 54 | Notice that you can choose either to treat the downloaded package as 55 | part of your own project sources (vendoring) or you can add the 56 | `sml.pkg` file to your project sources and make the `smlpkg sync` 57 | command part of your build process. 58 | 59 | ## Try it! 60 | 61 | The parser combinator library works with either 62 | [MLton](http://mlton.org) or [MLKit](http://elsman.com/mlkit/). 63 | 64 | Now write 65 | 66 | $ smlpkg sync 67 | 68 | Then simply write `make test` in your shell. 69 | 70 | To use the MLKit as a compiler, write instead: 71 | 72 | $ MLCOMP=mlkit make clean test 73 | 74 | ## Authors 75 | 76 | Copyright (c) 2015-2021 Martin Elsman, Martin Dybdal, University of 77 | Copenhagen. 78 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/.gitignore: -------------------------------------------------------------------------------- 1 | MLB 2 | run 3 | *~ -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/CHAR_TOKEN.sig: -------------------------------------------------------------------------------- 1 | (** Char tokenisation library. 2 | 3 | The tokenisation library is very basic and considers every char a token. The 4 | library does not treat white space except that it returns region information 5 | that respects the location in the input (e.g., newline characters has influence 6 | on the returned region information). 7 | 8 | *) 9 | 10 | signature CHAR_TOKEN = sig 11 | 12 | type reg = Region.reg 13 | 14 | type token = char 15 | 16 | val pp_token : token -> string 17 | 18 | val tokenise : {srcname:string,input:string} 19 | -> (token*reg) list 20 | end 21 | 22 | (** 23 | 24 | [type reg] The region type specifying location information in the source. 25 | 26 | [type token] The token type. 27 | 28 | [pp_token t] returns a string representing the token t. 29 | 30 | [tokenise src] returns a list of tokens paired with region information. The 31 | argument src specifies the input source. 32 | 33 | *) 34 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/CharToken.sml: -------------------------------------------------------------------------------- 1 | (** Char tokenisation library. 2 | 3 | The tokenisation library is very basic and considers every char a token. The 4 | library does not treat white space except that it returns region information 5 | that respects the location in the input (e.g., newline characters has influence 6 | on the returned region information). 7 | 8 | *) 9 | 10 | structure CharToken :> CHAR_TOKEN = struct 11 | 12 | type reg = Region.reg 13 | type loc = Region.loc 14 | 15 | type token = char 16 | 17 | fun pp_token t = Char.toString t 18 | 19 | fun tokenise {srcname:string, input:string} : (token*reg) list = 20 | let fun next (c, (l:loc,ts:(token*reg)list)) : loc * (token*reg)list = 21 | let val l' = if c = #"\n" then Region.newline l 22 | else if Char.isPrint c then Region.next l 23 | else l 24 | in (l', (c,Region.mkReg(l,l))::ts) 25 | end 26 | val (_,ts) = CharVector.foldl next (Region.loc0 srcname, nil) input 27 | in rev ts 28 | end 29 | 30 | end 31 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/Makefile: -------------------------------------------------------------------------------- 1 | MLCOMP ?= mlkit 2 | 3 | .PHONY: all 4 | all: 5 | $(MLCOMP) -output parse.exe parse.mlb 6 | 7 | .PHONY: test 8 | test: 9 | $(MAKE) -C test test 10 | 11 | .PHONY: clean 12 | clean: 13 | $(MAKE) -C test clean 14 | rm -rf MLB *~ parse.exe 15 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/PARSE.sig: -------------------------------------------------------------------------------- 1 | (** Simple parser combinator library. 2 | 3 | This monadic parser combinator library provides a series of combinators for 4 | assembling parsers. The library also keeps track of position information, which 5 | makes it ideal for providing good error messages. 6 | 7 | *) 8 | 9 | signature PARSE = sig 10 | type token 11 | datatype ('a,'b) either = OK of 'a | NO of 'b 12 | type locerr = Region.loc * (unit -> string) 13 | type tokenstream = (token*Region.reg) list 14 | type 'a p 15 | 16 | val parse : 'a p -> tokenstream -> ('a, locerr) either 17 | val parse' : 'a p -> tokenstream -> ('a * tokenstream, locerr) either 18 | val accept : 'a -> 'a p 19 | val next : token p 20 | val reject : string -> 'a p 21 | val delay : ('a -> 'b p) -> 'a -> 'b p 22 | val eof : unit p 23 | val ign : 'a p -> unit p 24 | val eat : token -> unit p 25 | val satisfy : (token -> bool) -> token p 26 | val many : 'a p -> 'a list p 27 | val some : 'a p -> 'a list p 28 | val enclose : 'a p -> 'b p -> 'c p -> 'c p 29 | val choice : 'a p list -> 'a p 30 | 31 | val >>= : 'a p * ('a -> 'b p) -> 'b p 32 | val <|> : 'a p * 'a p -> 'a p 33 | val <*> : ('a -> 'b) p * 'a p -> 'b p 34 | val <* : 'a p * 'b p -> 'a p 35 | val *> : 'a p * 'b p -> 'b p 36 | val <$> : ('a -> 'b) * 'a p -> 'b p 37 | val <$$> : ('a * Region.reg -> 'b) * 'a p -> 'b p 38 | 39 | val >>> : 'a p * 'b p -> ('a*'b) p 40 | val ?? : 'a p * 'b p -> ('a * 'b -> 'a) -> 'a p 41 | val ??? : 'a p * 'b p -> ('a * 'b * Region.reg -> 'a) -> 'a p 42 | val ??* : 'a p * 'b p -> ('a * 'b -> 'a) -> 'a p 43 | end 44 | 45 | (** 46 | 47 | [token] type of tokens. 48 | 49 | ['a p] type of parsers that parse values of type `'a`. 50 | 51 | [p >>= f] returns a parser that first parses using `p` and then uses the result 52 | of the parse as an argument to `f`, thereby producing a new parser. 53 | 54 | [accept v] returns a parser that returns the value `v`. 55 | 56 | [reject s] returns a parser that always fails with the error message `s`. 57 | 58 | [a >>> b] returns a parser that first parses using `a`, then parses 59 | using `b`, and finally returns the pair of the two parse results. 60 | 61 | [a <*> b] returns a parser that first parses using `a`, then parses using `b`, 62 | and finally returns the result obtained by applying the function returned by the 63 | first parser to the value obtained by the second parser. 64 | 65 | [a *> b] returns a parser that first parses using `a`, then parses using `b`, 66 | and finally returns the second parse result. 67 | 68 | [a <* b] returns a parser that first parses using `a`, then parses using `b`, 69 | and finally returns the first parse result. 70 | 71 | [(a ?? b) f] first parses using `a` and maybe continues using `b`, if both 72 | succeeds, combines the parse results using `f`. 73 | 74 | [(a ??? b) f] is defined as `??`, but allows for passing region information to 75 | `f`. 76 | 77 | [(a ??* b) f] first parses using `a` followed by parsing zero or more times 78 | using `b`, while iteratively combining successful parse results using `f`. 79 | 80 | [a <|> b] returns a parser that first attempts to parse using `a`. On success, 81 | the result is returned. Otherwise, returns the result of parsing using `b`. 82 | 83 | [f <$> p] returns a parser that parses using `p` and applies `f` to the result 84 | on success. 85 | 86 | [ign p] discard the result of a parser `p`. 87 | 88 | [eat t ts] "eat" one token `t` from list `ts`. 89 | 90 | [f <$$> p] is defined as `<$>`, but allows for passing region information to 91 | `f`. 92 | 93 | *) 94 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/Parse.sml: -------------------------------------------------------------------------------- 1 | functor Parse (eqtype token 2 | val pp_token : token -> string) 3 | :> PARSE where type token = token = 4 | struct 5 | 6 | type loc = Region.loc 7 | type reg = Region.reg 8 | 9 | type token = token 10 | 11 | (* keep track of the max location - the longest parse *) 12 | datatype ('a,'b) either = OK of 'a | NO of 'b 13 | 14 | type locerr = loc * (unit -> string) 15 | 16 | type tokenstream = (token*reg)list 17 | 18 | type 'a p = tokenstream -> ('a * reg * tokenstream, locerr) either 19 | 20 | fun parse' f ts = 21 | case f ts of 22 | OK(x,_,ts') => OK (x, ts') 23 | | NO l => NO l 24 | 25 | fun parse f ts = 26 | case f ts of 27 | OK(x,_,_) => OK x 28 | | NO l => NO l 29 | 30 | infixr >>= <|> *> <* 31 | infix >>> ?? ??? ??* <*> <$> <$$> 32 | 33 | fun p >>= f = 34 | fn ts => 35 | case p ts of 36 | NO err => NO err 37 | | OK (v1,r1,ts1) => 38 | case f v1 ts1 of 39 | NO err => NO err 40 | | OK (v2,r2,ts2) => OK(v2,Region.plus "bind" r1 r2,ts2) 41 | 42 | val next : token p = 43 | fn ts => 44 | case ts of 45 | (t,r)::ts' => OK (t,r,ts') 46 | | nil => NO (Region.botloc, fn () => "eos") 47 | 48 | fun peek ((_,r)::_) = (case Region.unReg r of 49 | SOME (l,_) => l 50 | | NONE => Region.botloc) 51 | | peek nil = Region.botloc 52 | 53 | fun accept x = 54 | fn ts => OK(x,Region.emp,ts) 55 | 56 | fun reject (s:string) : 'a p = 57 | fn ts => NO(peek ts, fn () => s) 58 | 59 | fun delay (fp: 'a -> 'b p) (x: 'a) : 'b p = 60 | fn ts => fp x ts 61 | 62 | val eof : unit p = 63 | fn ts => if null ts then OK((),Region.emp,ts) 64 | else NO(peek ts, fn () => "expecting end of stream") 65 | 66 | fun satisfy p = 67 | next >>= (fn c => if p c then accept c else reject "satisfy") 68 | 69 | fun maxLocerr (l1:locerr) l2 = 70 | if Region.lt (#1 l1) (#1 l2) then l2 71 | else l1 72 | 73 | fun p1 <*> p2 = 74 | p1 >>= (fn f => p2 >>= (fn x => accept (f x))) 75 | 76 | fun p1 <* p2 = 77 | accept (fn x => fn _ => x) <*> p1 <*> p2 78 | 79 | fun p1 *> p2 = 80 | accept (fn _ => fn x => x) <*> p1 <*> p2 81 | 82 | fun p1 >>> p2 = 83 | p1 >>= (fn v1 => p2 >>= (fn v2 => accept (v1,v2))) 84 | 85 | fun f <$> p = 86 | p >>= (fn x => accept (f x)) 87 | 88 | fun p1 ?? p2 = fn f => fn ts => 89 | case p1 ts of 90 | OK(v1,r1,ts) => 91 | (case p2 ts of 92 | OK(v2,r2,ts) => OK(f(v1,v2), Region.plus "??" r1 r2, ts) 93 | | _ => OK(v1,r1,ts)) 94 | | NO l => NO l 95 | 96 | fun p1 ??? p2 = fn f => fn ts => 97 | case p1 ts of 98 | OK(v1,r1,ts) => 99 | (case p2 ts of 100 | OK(v2,r2,ts) => 101 | let val r = Region.plus "???" r1 r2 102 | in OK(f(v1,v2,r), r, ts) 103 | end 104 | | NO l => OK(v1,r1,ts)) 105 | | NO l => NO l 106 | 107 | fun p1 <|> p2 = fn ts => 108 | case p1 ts of 109 | OK(v,r,ts) => OK(v,r,ts) 110 | | NO l1 => case p2 ts of 111 | OK(v,r,ts) => OK(v,r,ts) 112 | | NO l2 => NO (maxLocerr l1 l2) 113 | 114 | fun p1 ??* p2 = fn f => fn ts => 115 | case p1 ts of 116 | OK(v1,r1,ts) => 117 | let fun repeat (v1,r1,ts) = 118 | case p2 ts of 119 | OK(v2,r2,ts) => repeat (f(v1,v2), Region.plus "??*" r1 r2, ts) 120 | | _ => OK(v1,r1,ts) 121 | in repeat (v1,r1,ts) 122 | end 123 | | NO l => NO l 124 | 125 | fun f <$$> p = fn ts => 126 | case p ts of 127 | OK(v,r,ts) => OK(f(v,r),r,ts) 128 | | NO l => NO l 129 | 130 | fun ign (p:'a p) : unit p = 131 | (fn _ => ()) <$> p 132 | 133 | fun many p = 134 | (p >>= (fn x => many p >>= (fn xs => accept (x :: xs)))) <|> delay accept [] 135 | 136 | fun some p = 137 | p >>= (fn x => many p >>= (fn xs => accept (x :: xs))) 138 | 139 | fun enclose (lb:'a p) (rb:'b p) (p:'c p) : 'c p = lb *> p <* rb 140 | 141 | fun choice [] = reject "choice" 142 | | choice (p :: ps) = p <|> delay choice ps 143 | 144 | fun eat t ts = 145 | case ts of 146 | nil => NO (Region.botloc,fn() => ("expecting token " ^ pp_token t ^ 147 | " but reached the end")) 148 | | (t',r:Region.reg)::ts' => 149 | if t=t' then OK ((),r,ts') 150 | else NO (case Region.unReg r of 151 | SOME (l,_) => l 152 | | NONE => Region.botloc, 153 | fn() => ("expecting token " ^ pp_token t ^ 154 | " but found token " ^ pp_token t')) 155 | end 156 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/REGION.sig: -------------------------------------------------------------------------------- 1 | 2 | (** Source region information. *) 3 | 4 | signature REGION = sig 5 | type srcname = string 6 | type loc = int * int * srcname 7 | val botloc : loc 8 | val loc0 : srcname -> loc (* line 1, char 1 *) 9 | val newline : loc -> loc 10 | val next : loc -> loc 11 | val lt : loc -> loc -> bool 12 | val ppLoc : loc -> string 13 | 14 | (* Regions *) 15 | type reg 16 | val emp : reg (* the empty region *) 17 | val wf : reg -> bool 18 | val pp : reg -> string 19 | val plus : string -> reg -> reg -> reg 20 | 21 | val mkReg : loc * loc -> reg 22 | val unReg : reg -> (loc * loc) option 23 | end 24 | 25 | (** 26 | 27 | [botloc] represents "end of source". 28 | 29 | [loc0 s] returns the first location of a source (line 1, char 1 of 30 | source s). 31 | 32 | [newline l] returns the location of the first position in the line 33 | following that of `l`. 34 | 35 | [next l] returns the location following `l` on the same line as `l`. 36 | 37 | [lt a b] returns true iff location `a` is strictly before location 38 | `b`. 39 | 40 | [wf r] returns true if and only if the region `r` is well-formed. 41 | 42 | [ppLoc l] returns a pretty-printed version of the location `l`. 43 | 44 | [pp r] returns a pretty-printed version of the region `r`. 45 | 46 | [plus s r1 r2] returns the region that is the result of merging region 47 | `r1` and `r2`. The string `s` is used for error reporting. 48 | 49 | *) 50 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/Region.sml: -------------------------------------------------------------------------------- 1 | structure Region :> REGION = struct 2 | type srcname = string 3 | type loc = int * int * srcname 4 | val botloc = (0,0,"") 5 | fun loc0 f = (1,0,f) 6 | fun newline l = 7 | if l = botloc then 8 | raise Fail "Region.newline: botloc is not a real location" 9 | else (#1 l + 1,0,#3 l) 10 | fun next l = 11 | if l = botloc then 12 | raise Fail "Region.next: botloc is not a real location" 13 | else (#1 l, #2 l + 1, #3 l) 14 | fun lt (l1:loc) (l2:loc) = 15 | if l2 = botloc then false 16 | else l1 = botloc orelse 17 | #1 l1 < #1 l2 orelse (#1 l1 = #1 l2 andalso #2 l1 < #2 l2) 18 | fun ppLoc0 (a,b,_) = Int.toString a ^ "." ^ Int.toString b 19 | fun ppLoc (l:loc) = #3 l ^ ":" ^ ppLoc0 l 20 | 21 | type reg = (loc * loc) option 22 | 23 | val emp : reg = NONE 24 | 25 | fun wf NONE = true 26 | | wf (SOME (l1,l2)) = #3 l1 <> #3 l2 orelse l1 = l2 orelse lt l1 l2 27 | 28 | fun pp NONE = "nowhere" 29 | | pp (SOME(a,b)) = 30 | if a = b then ppLoc a 31 | else if #3 a = #3 b then #3 a ^ ":" ^ ppLoc0 a ^ "-" ^ ppLoc0 b 32 | else ppLoc a ^ "-" ^ ppLoc b 33 | 34 | fun plus s r1 r2 = 35 | case (r1,r2) of 36 | (NONE,_) => r2 37 | | (_,NONE) => r1 38 | | (SOME (l1,l1'), SOME(l2,l2')) => 39 | if wf r1 andalso wf r2 andalso (lt l1' l2 orelse #3 l1' <> #3 l2) then 40 | SOME(l1, l2') 41 | else raise Fail ("Region " ^ pp r1 ^ " cannot be merged with region " ^ pp r2 ^ " at " ^ s) 42 | 43 | fun mkReg (l1,l2) = SOME (l1,l2) 44 | fun unReg x = x 45 | end 46 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/SCAN_UTIL.sig: -------------------------------------------------------------------------------- 1 | (** Utilities for basis library scan functionality. 2 | 3 | This utility library features combinators for assembling scanners 4 | (parsers) from more basic scanners. The library is designed to work 5 | well together with the scanner functionality provided in the Standard 6 | ML Basis Library (i.e., StringCvt.reader). 7 | 8 | A scanner (or parser) of type `('a,'st) p` parses values of type 9 | `'a`. Such a parser is a function that takes a char-reader (of type 10 | `(char,'st) reader`, and returns a reader that reads values of type 11 | `'a`. 12 | 13 | When opening ScanUtil, include the following infix-declaration: 14 | 15 | infix >>> ->> >>- >>? || >>@ >>* ?? 16 | 17 | *) 18 | 19 | signature SCAN_UTIL = 20 | sig 21 | type ('a,'st) reader = ('a,'st) StringCvt.reader 22 | type ('a,'st) p = (char,'st) reader -> ('a,'st) reader 23 | 24 | val >>> : ('a,'st) p * ('b,'st) p -> ('a * 'b,'st) p 25 | val >>@ : ('a,'st) p * ('a -> 'b) -> ('b,'st) p 26 | val || : ('a,'st) p * ('a,'st) p -> ('a,'st) p 27 | val ->> : ('a,'st) p * ('b,'st) p -> ('b,'st) p 28 | val >>- : ('a,'st) p * ('b,'st) p -> ('a,'st) p 29 | val >>? : ('a,'st) p * ('b,'st) p -> ('a * 'b -> 'a) -> ('a,'st) p 30 | val >>* : ('a,'st) p * ('b,'st) p -> ('a * 'b -> 'a) -> ('a,'st) p 31 | val ?? : ('a,'st) p * ('a -> 'b option) -> ('b,'st) p 32 | 33 | val ign : ('a,'st) p -> (unit,'st) p 34 | val con : string * 'a -> ('a,'st) p 35 | val str : string -> (string,'st) p 36 | val eos : (unit,'st) p 37 | 38 | val scanChar : (char -> bool) -> (char,'st) p 39 | val scanChars : (char -> bool) -> (string,'st) p 40 | 41 | val list : ('a,'st) p -> ('a list,'st) p 42 | val option : ('a,'st) p -> ('a option,'st) p 43 | 44 | val skipChars : (char -> bool) -> ('a,'st) p -> ('a,'st) p 45 | val skipWS : ('a,'st) p -> ('a,'st) p 46 | val noSkipWS : ('a,'st) p -> ('a,'st) p 47 | 48 | val remainder : (string,'st) p 49 | 50 | val scanId : (string,'st) p 51 | end 52 | 53 | (** 54 | 55 | [type ('a,'st) reader] The generic reader type, which is a type 56 | abbreviation for the type `'st -> ('a * 'st) option`. 57 | 58 | [a >>> b] returns a scanner that first scans using `a`, then scans 59 | using `b`, and finally returns the pair of the two scan results. 60 | 61 | [a ->> b] returns a scanner that first scans using `a`, then scans 62 | using `b`, and finally returns the second scan result. 63 | 64 | [a >>- b] returns a scanner that first scans using `a`, then scans 65 | using `b`, and finally returns the first scan result. 66 | 67 | [(a >>? b) f] first scans using `a` and maybe continues using `b`, if 68 | both succeeds, combines the scan results using `f`. 69 | 70 | [(a >>* b) f] first scans using `a` followed by scanning zero or more 71 | times using `b`, while iteratively combining successful scan results 72 | using `f`. 73 | 74 | [a || b] returns a scanner that first attempts to scan using `a`. On 75 | success, the result is returned. Otherwise, returns the result of 76 | scanning using `b`. 77 | 78 | [p >>@ f] returns a scanner that scans using `p` and applies `f` to the 79 | result on success. 80 | 81 | [ign p] returns a scanner that discards the result of scanning using 82 | `p`. 83 | 84 | [p ?? f] returns a scanner that scans using `p` and applies `f` on the 85 | result, which will either succeed or fail. 86 | 87 | [eos] a scanner that accepts only the empty stream. 88 | 89 | [skipWS p] returns a scanner that will skip initial white space before 90 | scanning using `p`. 91 | 92 | [noSkipWS p] returns a scanner that does not skip initial white space 93 | when scanning using `p`. 94 | 95 | [str s] returns a scanner that will accept only the initial string `s` 96 | and returns the string value `s`. 97 | 98 | [con (s,v)] returns a scanner that will accept only the initial string 99 | `s` and which returns the value `v`. 100 | 101 | [scanChar P] returns a scanner that accepts a character if it 102 | satisfies the predicate `P`. 103 | 104 | [scanChars P] returns a scanner that accepts a string of characters 105 | for which each character satisfies the predicate `P`. 106 | 107 | [skipChars P p] returns a scanner identical to `p` but that skips 108 | initial characters that satisfy the predicate `P`. 109 | 110 | [list p] returns a scanner that constructs a list of values by 111 | repeatedly scanning using p until p does not succeed. 112 | 113 | [option p] returns a scanner that constructs an option value by trying 114 | to scan using p. 115 | 116 | [remainder] scans all remaining characters (until end-of-stream). For 117 | large input streams, it may be more efficient to apply a 118 | state-specific extractor. 119 | 120 | [scanId] returns a scanner that scans identifiers consisting of an 121 | initial alphabetic character followed by zero or more alpha-numerical 122 | characters. 123 | 124 | *) 125 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/SIMPLE_TOKEN.sig: -------------------------------------------------------------------------------- 1 | (** Simple tokenisation library. 2 | 3 | The tokenisation library has basic support for symbols, identifiers, 4 | and numbers. It also associates region information to each individual 5 | token and thereby provides good support for error handling. 6 | 7 | *) 8 | 9 | signature SIMPLE_TOKEN = sig 10 | 11 | type reg = Region.reg 12 | 13 | datatype token = Symb of string 14 | | Id of string 15 | | Num of string 16 | 17 | val pp_token : token -> string 18 | 19 | val tokenise : {sep_chars : string, (* single-char symbols *) 20 | symb_chars : string, (* multi-char symbols *) 21 | is_id : string -> bool, (* is a string an id? *) 22 | is_num : string -> bool} (* is a string a number? *) 23 | -> {srcname:string,input:string} 24 | -> (token*reg) list 25 | end 26 | 27 | (** 28 | 29 | [type reg] The region type specifying location information in the source. 30 | 31 | [type token] The token type. 32 | 33 | [pp_token t] returns a string representing the token t. 34 | 35 | [tokenise {sep_chars,symb_chars,is_id,is_num} src] returns a list of 36 | tokens paired with region information. Characters in sep_chars are 37 | those that should be considered single symbols (e.g., parentheses), 38 | whereas those in symb_chars are those that may be used to form 39 | combined symbols (e.g., & for &&). The functions is_id and is_num are 40 | meant to identify whether a string is an identifier or a number, 41 | respectively. The argument src specifies the input source. 42 | 43 | *) 44 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/ScanUtil.sml: -------------------------------------------------------------------------------- 1 | structure ScanUtil : SCAN_UTIL = struct 2 | 3 | type ('a,'st) reader = 'st -> ('a * 'st) option 4 | type ('a,'st) p = (char,'st) reader -> ('a,'st) reader 5 | 6 | infix >>> ->> >>- >>? || >>@ >>* ?? 7 | 8 | fun (p1 >>> p2) get s = 9 | case p1 get s of 10 | SOME (a,s) => (case p2 get s of 11 | SOME (b,s) => SOME((a,b),s) 12 | | NONE => NONE) 13 | | NONE => NONE 14 | 15 | fun (p >>@ f) get s = 16 | case p get s of 17 | SOME (a,s) => SOME (f a,s) 18 | | NONE => NONE 19 | 20 | fun (p1 >>- p2) = (p1 >>> p2) >>@ (#1) 21 | fun (p1 ->> p2) = (p1 >>> p2) >>@ (#2) 22 | 23 | fun (p1 || p2) get s = 24 | case p1 get s of 25 | NONE => p2 get s 26 | | r => r 27 | 28 | fun (p1 >>? p2) f get s = 29 | case p1 get s of 30 | NONE => NONE 31 | | SOME (a,s) => case p2 get s of 32 | SOME (b,s) => SOME(f(a,b),s) 33 | | NONE => SOME(a,s) 34 | 35 | fun (p1 >>* p2) f get s = 36 | case p1 get s of 37 | SOME (a,s) => 38 | let fun repeat (a,s) = 39 | case p2 get s of 40 | SOME (b,s) => repeat (f(a,b), s) 41 | | _ => SOME (a,s) 42 | in repeat (a,s) 43 | end 44 | | NONE => NONE 45 | 46 | fun ign scan get s = 47 | case scan get s of 48 | SOME (_,s) => SOME((),s) 49 | | NONE => NONE 50 | 51 | fun (p ?? f) get s = 52 | case p get s of 53 | NONE => NONE 54 | | SOME (v,s) => case f v of 55 | SOME v' => SOME(v',s) 56 | | NONE => NONE 57 | 58 | fun con (str,con) get s0 = 59 | let fun loop (i,s) = 60 | if i >= size str then SOME(con,s) 61 | else case get s of 62 | SOME (c,s) => 63 | if c = String.sub(str,i) then loop (i+1,s) 64 | else NONE 65 | | NONE => NONE 66 | in loop (0,s0) 67 | end 68 | 69 | fun str s = con (s,s) 70 | 71 | fun eos get s = 72 | case get s of 73 | NONE => SOME((),s) 74 | | SOME _ => NONE 75 | 76 | fun skipChars P p get s0 = 77 | case get s0 of 78 | SOME(c,s) => if P c then skipChars P p get s 79 | else p get s0 80 | | NONE => p get s0 81 | 82 | fun scanChar P get s = 83 | case get s of 84 | SOME(c,s) => if P c then SOME(c,s) 85 | else NONE 86 | | NONE => NONE 87 | 88 | fun scanChars P = 89 | (scanChar P >>@ (fn c => [c]) >>* scanChar P) (fn (a,b) => b::a) >>@ (implode o rev) 90 | 91 | fun noSkipWS p get s = 92 | case get s of 93 | SOME(c,_) => if Char.isSpace c then NONE 94 | else p get s 95 | | NONE => NONE 96 | 97 | fun skipWS p = skipChars Char.isSpace p 98 | 99 | fun option p g s = 100 | case p g s of 101 | SOME(v,s) => SOME(SOME v,s) 102 | | NONE => SOME(NONE,s) 103 | 104 | fun list p g s = 105 | let fun loop s acc = 106 | case p g s of 107 | NONE => SOME(rev acc,s) 108 | | SOME(e,s) => loop s (e::acc) 109 | in loop s nil 110 | end 111 | 112 | fun scanAnyChar get s = get s 113 | 114 | fun remainder get = 115 | (list scanAnyChar >>@ implode >>- eos) get 116 | 117 | fun scanId get = 118 | (scanChar Char.isAlpha >>@ String.str >>? 119 | scanChars Char.isAlphaNum 120 | ) (op ^) get 121 | 122 | end 123 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/SimpleToken.sml: -------------------------------------------------------------------------------- 1 | structure SimpleToken :> SIMPLE_TOKEN = struct 2 | 3 | type reg = Region.reg 4 | type loc = Region.loc 5 | 6 | datatype token = Symb of string 7 | | Id of string 8 | | Num of string 9 | 10 | fun pp_token t = 11 | case t of 12 | Symb s => s 13 | | Id s => s 14 | | Num s => s 15 | 16 | fun iter (i:int) (f : 'a -> 'a) (a:'a) : 'a = 17 | if i <= 0 then a 18 | else iter (i-1) f (f a) 19 | 20 | fun close (con:string -> token) (l:loc) (s:string) : token * reg = 21 | let val l' = iter (size s - 1) Region.next l 22 | in (con s, Region.mkReg(l,l')) 23 | end 24 | 25 | datatype state = IdS of loc * string 26 | | SymbS of loc * string 27 | | NumS of loc * string 28 | | CommentS of loc * string 29 | | BeginS 30 | 31 | fun tokenise {sep_chars : string, (* single-char symbols *) 32 | symb_chars : string, (* multi-char symbols *) 33 | is_id : string -> bool, (* is a string an id? *) 34 | is_num : string -> bool} (* is a string a number? *) 35 | {srcname:string, input:string} : (token*reg) list = 36 | let 37 | fun isSymbChar c = CharVector.exists (fn c' => c=c') symb_chars 38 | fun isIdChar0 c = is_id(String.str c) 39 | fun isSepChar c = CharVector.exists (fn c' => c=c') sep_chars 40 | fun next (l:loc,l':loc,c:char,s:state,ts:(token*reg)list) 41 | : loc * state * (token*reg)list = 42 | case s of 43 | CommentS (l0,"*") => 44 | if c = #")" then (l',BeginS,ts) 45 | else (l',CommentS(l0, ""),ts) 46 | | CommentS(l0,"(") => 47 | if c = #"*" then (l',CommentS(l0,""),ts) 48 | else next (l,l',c,BeginS,close Symb l0 "(" :: ts) 49 | | CommentS (l0,"") => 50 | if c = #"*" then (l',CommentS(l0,"*"),ts) 51 | else (l',CommentS(l0,""),ts) 52 | | BeginS => 53 | if c = #"(" then (l',CommentS (l,"("),ts) 54 | else if isSepChar c then (l',BeginS,close Symb l (String.str c)::ts) 55 | else if isSymbChar c then (l',SymbS(l,String.str c),ts) 56 | else if isIdChar0 c then (l',IdS(l,String.str c),ts) 57 | else if Char.isDigit c then (l',NumS(l,String.str c),ts) 58 | else if Char.isSpace c then (l',BeginS,ts) 59 | else raise Fail ("lex error at location " ^ Region.ppLoc l') 60 | | SymbS (l0,s) => 61 | if c = #"(" then (l',CommentS (l,"("), close Symb l0 s :: ts) 62 | else if isSepChar c then (l',BeginS,close Symb l (String.str c) :: close Symb l0 s :: ts) 63 | else if isSymbChar c then (l',SymbS(l0,s ^ String.str c),ts) 64 | else if Char.isDigit c then 65 | (if is_num (s ^ String.str c) then 66 | (l',NumS(l0, s ^ String.str c), ts) 67 | else (l',NumS(l,String.str c), close Symb l0 s :: ts)) 68 | else if isIdChar0 c then (l',IdS(l,String.str c), close Symb l0 s :: ts) 69 | else if Char.isSpace c then (l',BeginS, close Symb l0 s :: ts) 70 | else raise Fail ("lex error at location " ^ Region.ppLoc l') 71 | | NumS (l0,s) => 72 | if c = #"(" then (l',CommentS (l,"("), close Num l0 s ::ts) 73 | else if isSepChar c then (l',BeginS, close Symb l (String.str c) :: close Num l0 s :: ts) 74 | else if isSymbChar c then (l',SymbS(l,String.str c), close Num l0 s :: ts) 75 | else if is_num(s ^ String.str c) then (l',NumS(l0,s ^ String.str c),ts) 76 | else if Char.isSpace c then (l',BeginS, close Num l0 s :: ts) 77 | else raise Fail ("lex error at location " ^ Region.ppLoc l') 78 | | IdS (l0,s) => 79 | if c = #"(" then (l',CommentS (l,"("), close Id l0 s :: ts) 80 | else if isSepChar c then (l',BeginS, close Symb l (String.str c) :: close Id l0 s :: ts) 81 | else if isSymbChar c then (l',SymbS(l,String.str c), close Id l0 s :: ts) 82 | else if is_id (s ^ String.str c) then (l',IdS(l0,s ^ String.str c),ts) 83 | else if Char.isSpace c then (l',BeginS, close Id l0 s :: ts) 84 | else raise Fail ("lex error at location " ^ Region.ppLoc l') 85 | | CommentS _ => raise Fail "lex: impossible" 86 | 87 | val s0 = (Region.loc0 srcname, BeginS, nil) 88 | val (l',s,ts) = 89 | CharVector.foldl (fn (c, (l,s,ts)) => 90 | let val l' = if c = #"\n" then Region.newline l 91 | else Region.next l 92 | in next (l,l',c,s,ts) 93 | end) s0 input 94 | val ts = 95 | case s of 96 | SymbS(l,s) => close Symb l s :: ts 97 | | NumS(l,s) => close Num l s :: ts 98 | | IdS(l,s) => close Id l s :: ts 99 | | CommentS _ => raise Fail "lex error: non-closed comment" 100 | | BeginS => ts 101 | in rev ts 102 | end 103 | 104 | end 105 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/char_token.mlb: -------------------------------------------------------------------------------- 1 | local 2 | $(SML_LIB)/basis/basis.mlb 3 | parse.mlb 4 | in 5 | CHAR_TOKEN.sig 6 | CharToken.sml 7 | end 8 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/parse.mlb: -------------------------------------------------------------------------------- 1 | local $(SML_LIB)/basis/basis.mlb 2 | in REGION.sig 3 | Region.sml 4 | PARSE.sig 5 | Parse.sml 6 | end 7 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/scan-util.mlb: -------------------------------------------------------------------------------- 1 | local $(SML_LIB)/basis/basis.mlb 2 | in SCAN_UTIL.sig 3 | ScanUtil.sml 4 | end 5 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/simple_token.mlb: -------------------------------------------------------------------------------- 1 | local 2 | $(SML_LIB)/basis/basis.mlb 3 | parse.mlb 4 | in 5 | SIMPLE_TOKEN.sig 6 | SimpleToken.sml 7 | end 8 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/test/.gitignore: -------------------------------------------------------------------------------- 1 | *.res 2 | *.exe 3 | *~ 4 | *.out 5 | MLB -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/test/Makefile: -------------------------------------------------------------------------------- 1 | 2 | MLCOMP ?= mlkit 3 | 4 | .PHONY: test 5 | test: test1.res test2.res test3.res 6 | cat $^ 7 | 8 | %.res: %.out 9 | @(diff -aq $< $<.ok > /dev/null 2>&1; \ 10 | if [ $$? -eq 0 ]; then \ 11 | echo "OK: $*" > $@ \ 12 | ; else \ 13 | if [ -e $<.ok ]; then \ 14 | echo "ERR: $* - file $< differs from $<.ok"; \ 15 | echo "ERR: $* - file $< differs from $<.ok" > $@ \ 16 | ; else \ 17 | echo "ERR: $* - file $<.ok does not exist"; \ 18 | echo "ERR: $* - file $<.ok does not exist" > $@ \ 19 | ; fi \ 20 | ; exit 1 \ 21 | ;fi) 22 | 23 | %.out: %.exe 24 | ./$< > $@ 25 | 26 | %.exe: %.mlb %.sml 27 | $(MLCOMP) -output $@ $< 28 | 29 | .PHONY: clean 30 | clean: 31 | rm -rf MLB *.out *~ *.exe *.res run 32 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/test/test1.mlb: -------------------------------------------------------------------------------- 1 | 2 | local $(SML_LIB)/basis/basis.mlb 3 | ../parse.mlb 4 | ../simple_token.mlb 5 | in 6 | test1.sml 7 | end 8 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/test/test1.out.ok: -------------------------------------------------------------------------------- 1 | Tokens: 2 | stdin:1.0-1.2:let, stdin:1.4:y, stdin:1.6:=, stdin:1.8-1.9:82, stdin:1.11:+, stdin:1.13:2, stdin:1.15-1.16:in, stdin:1.18-1.20:let, stdin:1.22:x, stdin:1.24:=, stdin:1.26-1.28:454, stdin:2.0-2.1:in, stdin:2.3:y, stdin:2.5:+, stdin:2.7:x, stdin:2.9:+, stdin:2.11:y, stdin:3.0:+, stdin:3.2-3.3:23, 3 | 4 | Eval = 645 5 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/test/test1.sml: -------------------------------------------------------------------------------- 1 | 2 | structure T = SimpleToken 3 | 4 | fun is_id s = 5 | size s > 0 andalso 6 | let val c0 = String.sub(s,0) 7 | in Char.isAlpha c0 orelse c0 = #"_" 8 | end andalso CharVector.all (fn c => Char.isAlphaNum c orelse c = #"_") s 9 | 10 | fun is_num s = 11 | size s > 0 andalso 12 | (s = "0" orelse 13 | let val c0 = String.sub(s,0) 14 | in (Char.isDigit c0 andalso c0 <> #"0") 15 | end andalso CharVector.all (fn c => Char.isDigit c) s) 16 | 17 | fun tokens s = 18 | T.tokenise {sep_chars="(){}[],.;", 19 | symb_chars="#&|+-~^?*:!%/='<>@", 20 | is_id=is_id, 21 | is_num=is_num} 22 | {srcname="stdin",input=s} 23 | 24 | fun lexing () = 25 | let val s = "let y = 82 + 2 in let x = 454\nin y + x + y (* ok *)\n+ 23" 26 | val ts = tokens s 27 | val () = print "Tokens:\n" 28 | val () = app (fn (t,r) => print (Region.pp r ^ ":" ^ T.pp_token t ^ ", ")) ts 29 | val () = print "\n\n" 30 | in ts 31 | end 32 | 33 | val ts = lexing () 34 | 35 | structure P = Parse(type token = T.token 36 | val pp_token = T.pp_token) 37 | 38 | open P 39 | 40 | datatype e = Int of int | Let of string * e * e | Add of e * e | Var of string 41 | 42 | fun look nil x = raise Fail ("eval: non-bound variable: " ^ x) 43 | | look ((k,v)::E) x = if k = x then v else look E x 44 | 45 | type env = (string*int)list 46 | 47 | fun eval (E:env) (e:e) : int = 48 | case e of 49 | Int d => d 50 | | Let(x,e1,e2) => eval ((x,eval E e1)::E) e2 51 | | Var x => look E x 52 | | Add(e1,e2) => eval E e1 + eval E e2 53 | 54 | infix >>= 55 | 56 | val p_int : int p = 57 | next >>= (fn T.Num n => (case Int.fromString n of 58 | SOME n => accept n 59 | | NONE => reject "expecting int") 60 | | _ => reject "expecting int") 61 | 62 | fun qq s = "'" ^ s ^ "'" 63 | 64 | fun p_kw (s:string) : unit p = 65 | next >>= (fn T.Id k => 66 | if k = s then accept () 67 | else reject ("expecting keyword " ^ qq s) 68 | | _ => reject ("expecting keyword " ^ qq s)) 69 | 70 | val p_var : string p = 71 | next >>= (fn T.Id k => accept k | _ => reject "expecting variable") 72 | 73 | fun p_symb (s:string) : unit p = 74 | next >>= (fn T.Symb k => 75 | if k = s then accept () 76 | else reject ("expecting symbol " ^ qq s) 77 | | _ => reject ("expecting symbol " ^ qq s)) 78 | 79 | infixr <|> *> <* 80 | infix >>> ?? <*> <$> <$$> 81 | 82 | fun delay0 p = delay p () 83 | 84 | fun p_e () : e p = 85 | (choice [(fn (v,(e1,e2)) => Let(v,e1,e2)) <$> ((p_kw "let" *> p_var) >>> ((p_symb "=" *> delay p_e ()) >>> (p_kw "in" *> delay0 p_e))), 86 | Var <$> p_var, 87 | Int <$> p_int] ?? delay0 p_next) (fn (e,f) => f e) 88 | 89 | and p_next () : (e -> e) p = 90 | (fn e2 => fn e1 => Add(e1,e2)) <$> (p_symb "+" *> delay0 p_e) 91 | 92 | val res = case parse (delay p_e ()) ts of 93 | OK e => Int.toString(eval nil e) 94 | | NO(r,f) => f() 95 | 96 | val i = print ("Eval = " ^ res ^ "\n") 97 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/test/test2.mlb: -------------------------------------------------------------------------------- 1 | local 2 | $(SML_LIB)/basis/basis.mlb 3 | ../scan-util.mlb 4 | in 5 | test2.sml 6 | end 7 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/test/test2.out.ok: -------------------------------------------------------------------------------- 1 | Lexing string: 2 | "let y = 82 + 2 in let x = 454 in y + x + y (* ok *) + 23" 3 | Tokens: 4 | [let, y, =, 82, +, 2, in, let, x, =, 454, in, y, +, x, +, y, (, *, ok, *, ), +, 23] 5 | Lexing string: 6 | "let y = 82 + 2 in let x = & 454 in y + x + y (* ok *) + 23" 7 | ERROR: Error at position 26: expecting token 8 | Lexing string: 9 | "let y = 82 + 2 in let x = 454 in y + x + y (* ok *) + 23" 10 | Value: 645 11 | Lexing string: 12 | "let y = 82 + 2 in let x = & 454 in y + x + y (* ok *) + 23" 13 | ERROR: Error at position 26: expecting token 14 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/test/test2.sml: -------------------------------------------------------------------------------- 1 | 2 | open ScanUtil 3 | 4 | infix >>> ->> >>- >>? || >>@ >>* ?? 5 | 6 | fun curry f a b = f (a,b) 7 | 8 | type st = CharVectorSlice.slice 9 | 10 | val scanNum = scanChars Char.isDigit 11 | 12 | fun isSymb c = CharVector.exists (curry (op =) c) "#|+-~^?*:!%/='<>@" 13 | 14 | fun isSep c = CharVector.exists (curry (op =) c) "(){}[],.;" 15 | 16 | val scanSymb = scanChar isSymb >>@ String.str 17 | 18 | val scanSep = scanChar isSep >>@ String.str 19 | 20 | fun scanErr get s = 21 | case get s of 22 | SOME _ => raise Fail ("Error at position " ^ 23 | Int.toString (#2 (CharVectorSlice.base s)) ^ 24 | ": expecting token") 25 | 26 | | NONE => NONE 27 | 28 | (* Example: Lexing only *) 29 | 30 | fun lexing s = 31 | let 32 | val scanIdOrSymbOrSep = 33 | scanId || scanSymb || scanSep || scanNum || scanErr 34 | 35 | val scanToken = skipChars Char.isSpace scanIdOrSymbOrSep 36 | 37 | val scanTokens : (string list,st) p = 38 | (scanToken >>@ (fn s => [s]) >>* 39 | scanToken 40 | ) (fn (a,b) => b :: a) 41 | >>@ List.rev 42 | >>- eos 43 | 44 | val sl = CharVectorSlice.full s 45 | 46 | val () = print ("Lexing string:\n \"" ^ s ^ "\"\n") 47 | 48 | in 49 | (case scanTokens CharVectorSlice.getItem sl of 50 | SOME(ts,_) => 51 | print ("Tokens: \n [" ^ 52 | String.concatWith ", " ts ^ 53 | "]\n") 54 | | NONE => raise Fail "lex failure" 55 | ) handle Fail msg => print ("ERROR: " ^ msg ^ "\n") 56 | end 57 | 58 | val ex1 = "let y = 82 + 2 in let x = 454 in y + x + y (* ok *) + 23" 59 | 60 | val () = lexing ex1 61 | 62 | val ex2 = "let y = 82 + 2 in let x = & 454 in y + x + y (* ok *) + 23" 63 | 64 | val () = lexing ex2 65 | 66 | (* Example: Lexing and parsing *) 67 | 68 | datatype e = Int of int | Let of string * e * e | Add of e * e | Var of string 69 | 70 | fun look nil x = raise Fail ("eval: non-bound variable: " ^ x) 71 | | look ((k,v)::E) x = if k = x then v else look E x 72 | 73 | type env = (string*int)list 74 | 75 | fun eval (E:env) (e:e) : int = 76 | case e of 77 | Int d => d 78 | | Let(x,e1,e2) => eval ((x,eval E e1)::E) e2 79 | | Var x => look E x 80 | | Add(e1,e2) => eval E e1 + eval E e2 81 | 82 | (* some utilities *) 83 | 84 | val scanComment = 85 | ign (str "(*") ->> 86 | skipChars (fn c => c <> #"*") (ign (str "*)")) 87 | 88 | val skipWS = fn p => 89 | skipWS scanComment ->> skipWS p 90 | || skipWS p 91 | 92 | fun p_kw s : (unit,st) p = (* parse a keyword *) 93 | skipWS(ign (str s)) 94 | 95 | val p_int : (int,st) p = 96 | skipWS scanNum ?? Int.fromString 97 | 98 | val p_comment : (unit,st) p = 99 | skipWS scanComment 100 | 101 | val keywords = ["in", "let"] 102 | val p_var : (string, st) p = 103 | skipWS scanId ?? (fn id => if List.exists (fn s => id=s) keywords then NONE 104 | else SOME id) 105 | fun parse s = 106 | let 107 | val rec p_e : (e,st) p = 108 | fn g => 109 | ( (((((p_kw "let" ->> p_var) >>> ((p_kw "=" ->> p_e) >>> (p_kw "in" ->> p_e))) >>@ (fn (v,(e1,e2)) => Let(v,e1,e2))) >>? p_next) (fn (e,f) => f e)) 110 | || (((p_var >>@ Var) >>? p_next) (fn (e,f) => f e)) 111 | || (((p_int >>@ Int) >>? p_next) (fn (e,f) => f e)) 112 | || skipWS scanErr 113 | ) g 114 | 115 | and p_next : (e -> e,st) p = 116 | fn g => 117 | ( ((p_kw "+" ->> p_e) >>@ (fn e2 => fn e1 => Add(e1,e2))) 118 | ) g 119 | 120 | val sl = CharVectorSlice.full s 121 | 122 | val () = print ("Lexing string:\n \"" ^ s ^ "\"\n") 123 | in 124 | (case (p_e >>- skipWS eos) CharVectorSlice.getItem sl of 125 | SOME(e,_) => 126 | print ("Value: " ^ Int.toString(eval nil e) ^ "\n") 127 | | NONE => raise Fail "parse failure" 128 | ) handle Fail msg => print ("ERROR: " ^ msg ^ "\n") 129 | end 130 | 131 | val () = parse ex1 132 | 133 | val () = parse ex2 134 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/test/test3.mlb: -------------------------------------------------------------------------------- 1 | 2 | local $(SML_LIB)/basis/basis.mlb 3 | ../parse.mlb 4 | ../char_token.mlb 5 | in 6 | test3.sml 7 | end 8 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/test/test3.out.ok: -------------------------------------------------------------------------------- 1 | OK: arr_arr_tup2_i64_f32 2 | OK: arr_arr_i64 3 | -------------------------------------------------------------------------------- /lib/github.com/diku-dk/sml-parse/test/test3.sml: -------------------------------------------------------------------------------- 1 | 2 | structure T = CharToken 3 | 4 | fun constituent c = 5 | Char.isAlphaNum c orelse c = #"'" orelse c = #"_" 6 | 7 | fun isValidName s = 8 | List.all constituent (explode s) 9 | 10 | fun checkValidName s = 11 | if isValidName s then () 12 | else raise Fail ("\"" ^ s ^ "\" is an invalid SML identifier.") 13 | 14 | fun intersperse y [] = [] 15 | | intersperse y [x] = [x] 16 | | intersperse y (x :: xs) = 17 | x :: y :: intersperse y xs 18 | 19 | fun punctuate c = concat o intersperse c 20 | 21 | structure Parser = Parse(type token = T.token 22 | val pp_token = T.pp_token) 23 | 24 | local 25 | open Parser 26 | 27 | infixr >>= <|> *> <* 28 | infix <*> <$> 29 | 30 | fun char c = 31 | satisfy (fn c' => c = c') 32 | 33 | val space = many (satisfy (fn c => c = #" ")) 34 | 35 | fun lexeme p = p <* space 36 | 37 | fun lChar c = lexeme (char c) 38 | 39 | fun lString s = 40 | let fun loop [] = accept () 41 | | loop (c :: cs) = char c *> loop cs 42 | in lexeme (loop (explode s)) 43 | end 44 | 45 | fun sepBy1 p sep = 46 | p >>= 47 | (fn x => 48 | choice 49 | [ sep *> delay (sepBy1 p) sep >>= (fn xs => accept (x :: xs)) 50 | , delay accept [x] 51 | ]) 52 | 53 | fun sepBy p sep = 54 | choice [delay (sepBy1 p) sep, delay accept []] 55 | 56 | fun parens p = 57 | enclose (lChar #"(") (lChar #")") p 58 | 59 | fun braces p = 60 | enclose (lChar #"{") (lChar #"}") p 61 | 62 | fun constituent c = Char.isAlpha c 63 | 64 | local 65 | val firstChar = satisfy Char.isAlpha 66 | val secondChar = satisfy (fn c => Char.isAlphaNum c orelse c = #"'") 67 | in 68 | val lName = lexeme ((fn c => fn cs => 69 | implode (c :: cs)) <$> firstChar <*> many secondChar) 70 | end 71 | 72 | fun delay0 f = delay f () 73 | 74 | (* Futhark type. *) 75 | datatype T = 76 | TVar of string 77 | | TArray of T 78 | | TTuple of T list 79 | | TRecord of (string * T) list 80 | 81 | fun pT () = 82 | choice 83 | [ TVar <$> lName 84 | , parens (delay0 pT) 85 | , TArray <$> (lString "[]" *> delay0 pT) 86 | , TTuple <$> parens (sepBy (delay0 pT) (lChar #",")) 87 | , TRecord <$> braces (sepBy (delay0 pField) (lChar #",")) 88 | ] 89 | and pField () = 90 | (fn v => fn t => (v, t)) <$> (lName <* lChar #":") <*> pT () 91 | 92 | fun showT (TVar s) = s 93 | | showT (TArray t) = "[]" ^ showT t 94 | | showT (TTuple ts) = 95 | "(" ^ punctuate "," (map showT ts) ^ ")" 96 | | showT (TRecord ts) = 97 | "{" ^ punctuate "," (map (fn (v, t) => v ^ ":" ^ showT t) ts) ^ "}" 98 | 99 | (* Make a string a valid SML identifier, whatever it may presently be. *) 100 | fun escapeName name = 101 | let 102 | fun escape c = 103 | if constituent c then 104 | str c 105 | else 106 | case c of 107 | #"[" => "_LB_" 108 | | #"]" => "_RB_" 109 | | _ => "_" 110 | val name' = concat (map escape (explode name)) 111 | in 112 | if name <> name' then "unrep_" ^ name' else name 113 | end 114 | 115 | fun showTSML (TVar s) = s 116 | | showTSML (TArray t) = "arr_" ^ showTSML t 117 | | showTSML (TTuple ts) = 118 | "tup" ^ Int.toString (length ts) ^ "_" ^ punctuate "_" (map showTSML ts) 119 | | showTSML (TRecord ts) = 120 | "rec" ^ Int.toString (length ts) ^ "_" ^ punctuate "_" (map #1 ts) 121 | 122 | fun println s = print (s ^ "\n") 123 | 124 | fun prTypeName s = 125 | let val ts = T.tokenise {srcname="",input=s} 126 | in case parse ((space *> pT ()) <* eof) ts of 127 | OK x => println ("OK: " ^ showTSML x) 128 | | NO (loc,err) => println ("ERR at " ^ Region.ppLoc loc ^ ": " ^ err()) 129 | end 130 | in 131 | 132 | val () = prTypeName "[][](i64,f32)" 133 | 134 | val () = prTypeName "[][]i64" 135 | 136 | (*val () = prTypeName (List.nth (CommandLine.arguments (), 0)) *) 137 | end 138 | -------------------------------------------------------------------------------- /sml.pkg: -------------------------------------------------------------------------------- 1 | package github.com/diku-dk/sml-parse 2 | 3 | require { 4 | } 5 | --------------------------------------------------------------------------------