├── .gitignore ├── tests ├── testthat.R └── testthat │ ├── test_repetition.R │ ├── test_functions2.R │ ├── test_word2num.R │ └── test_functions.R ├── docs └── ramble-introduction.pdf ├── .Rbuildignore ├── .travis.yml ├── R ├── Ramble.R ├── utils.R └── parser.R ├── man ├── Ramble.Rd ├── satisfy.Rd ├── Unlist.Rd ├── grapes-using-grapes.Rd ├── literal.Rd ├── grapes-alt-grapes.Rd ├── grapes-then-grapes.Rd ├── maybe.Rd ├── Space.Rd ├── identifier.Rd ├── nat.Rd ├── Digit.Rd ├── natural.Rd ├── symbol.Rd ├── ident.Rd ├── Alpha.Rd ├── Lower.Rd ├── Upper.Rd ├── item.Rd ├── SpaceCheck.Rd ├── AlphaNum.Rd ├── thentree.Rd ├── then.Rd ├── grapes-thentree-grapes.Rd ├── token.Rd ├── String.Rd ├── succeed.Rd ├── alt.Rd ├── using.Rd ├── many.Rd └── some.Rd ├── Ramble.Rproj ├── DESCRIPTION ├── .gitattributes ├── NAMESPACE ├── cran-comments.md ├── joss-paper ├── paper.bib └── paper.md ├── LICENSE ├── example ├── example.R ├── example-xml.R └── example-word2num.R ├── readme.md └── vignettes └── Higher-Order_Functions_for_Parsing_in_R.Rmd /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | inst/doc 5 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(Ramble) 3 | 4 | test_check("Ramble") 5 | -------------------------------------------------------------------------------- /docs/ramble-introduction.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NoRaincheck/Ramble/HEAD/docs/ramble-introduction.pdf -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | example.*?.R 4 | readme.md 5 | ^\.travis\.yml$ 6 | cran-comments.md 7 | docs/* 8 | joss-paper/* 9 | example/* 10 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r 2 | 3 | language: R 4 | sudo: false 5 | cache: packages 6 | after_success: 7 | - Rscript -e 'install.packages("covr"); covr::codecov()' 8 | -------------------------------------------------------------------------------- /R/Ramble.R: -------------------------------------------------------------------------------- 1 | #' Ramble is a parser generator using combinatory parsers. 2 | #' 3 | #' Ramble allows you to write parsers in a functional manner, inspired by 4 | #' Haskell's Parsec library. 5 | #' 6 | #' @docType package 7 | #' @name Ramble 8 | #' @aliases Ramble ramble package-ramble 9 | NULL -------------------------------------------------------------------------------- /man/Ramble.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Ramble.R 3 | \docType{package} 4 | \name{Ramble} 5 | \alias{Ramble} 6 | \alias{ramble} 7 | \alias{package-ramble} 8 | \alias{Ramble-package} 9 | \title{Ramble is a parser generator using combinatory parsers.} 10 | \description{ 11 | Ramble allows you to write parsers in a functional manner, inspired by 12 | Haskell's Parsec library. 13 | } 14 | -------------------------------------------------------------------------------- /Ramble.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | PackageCheckArgs: --as-cran 19 | PackageRoxygenize: rd,collate,namespace 20 | -------------------------------------------------------------------------------- /man/satisfy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{satisfy} 4 | \alias{satisfy} 5 | \title{\code{satisfy} is a function which allows us to make parsers that recognise single symbols.} 6 | \usage{ 7 | satisfy(p) 8 | } 9 | \arguments{ 10 | \item{p}{is the predicate to determine if the arbitrary symbol is a member.} 11 | } 12 | \description{ 13 | \code{satisfy} is a function which allows us to make parsers that recognise single symbols. 14 | } 15 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | #' Unlist is the same as unlist, but doesn't recurse all the way to 2 | #' preserve the type. This function is not well optimised. 3 | #' 4 | #' @param obj is a list to be flatten 5 | #' @importFrom methods is 6 | Unlist <- function(obj) { 7 | ret <- list() 8 | for (i in seq_along(obj)) { 9 | if (is(obj[[i]], "list") && 10 | is.null(names(obj[[i]]))) { 11 | ret <- append(ret, Unlist(obj[[i]])) 12 | } else { 13 | ret <- append(ret, obj[i]) 14 | } 15 | } 16 | ret 17 | } 18 | 19 | -------------------------------------------------------------------------------- /man/Unlist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{Unlist} 4 | \alias{Unlist} 5 | \title{Unlist is the same as unlist, but doesn't recurse all the way to 6 | preserve the type. This function is not well optimised.} 7 | \usage{ 8 | Unlist(obj) 9 | } 10 | \arguments{ 11 | \item{obj}{is a list to be flatten} 12 | } 13 | \description{ 14 | Unlist is the same as unlist, but doesn't recurse all the way to 15 | preserve the type. This function is not well optimised. 16 | } 17 | -------------------------------------------------------------------------------- /man/grapes-using-grapes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{\%using\%} 4 | \alias{\%using\%} 5 | \title{\code{\%using\%} is the infix operator for using} 6 | \usage{ 7 | p \%using\% f 8 | } 9 | \arguments{ 10 | \item{p}{is the parser to be applied} 11 | 12 | \item{f}{is the function to be applied to each result of \code{p}.} 13 | } 14 | \description{ 15 | \code{\%using\%} is the infix operator for using 16 | } 17 | \examples{ 18 | (item() \%using\% as.numeric) ("1abc") 19 | } 20 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: Ramble 2 | Type: Package 3 | Title: Parser Combinator for R 4 | Version: 0.1.1 5 | Date: 2016-10-23 6 | Author: Chapman Siu 7 | Maintainer: Chapman Siu 8 | Description: Parser generator for R using combinatory parsers. It 9 | is inspired by combinatory parsers developed in Haskell. 10 | License: MIT + file LICENSE 11 | Imports: 12 | methods 13 | Suggests: 14 | testthat, 15 | knitr, 16 | rmarkdown 17 | VignetteBuilder: knitr 18 | LazyData: true 19 | RoxygenNote: 6.0.1 20 | URL: https://github.com/chappers/Ramble 21 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | *.sln merge=union 7 | *.csproj merge=union 8 | *.vbproj merge=union 9 | *.fsproj merge=union 10 | *.dbproj merge=union 11 | 12 | # Standard to msysgit 13 | *.doc diff=astextplain 14 | *.DOC diff=astextplain 15 | *.docx diff=astextplain 16 | *.DOCX diff=astextplain 17 | *.dot diff=astextplain 18 | *.DOT diff=astextplain 19 | *.pdf diff=astextplain 20 | *.PDF diff=astextplain 21 | *.rtf diff=astextplain 22 | *.RTF diff=astextplain 23 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export("%alt%") 4 | export("%then%") 5 | export("%thentree%") 6 | export("%using%") 7 | export(Alpha) 8 | export(AlphaNum) 9 | export(Digit) 10 | export(Lower) 11 | export(SpaceCheck) 12 | export(String) 13 | export(Upper) 14 | export(ident) 15 | export(identifier) 16 | export(item) 17 | export(literal) 18 | export(many) 19 | export(maybe) 20 | export(nat) 21 | export(natural) 22 | export(satisfy) 23 | export(some) 24 | export(space) 25 | export(succeed) 26 | export(symbol) 27 | export(thentree) 28 | export(token) 29 | importFrom(methods,is) 30 | -------------------------------------------------------------------------------- /man/literal.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{literal} 4 | \alias{literal} 5 | \title{\code{literal} is a parser for single symbols. It will attempt to match the 6 | single symbol with the first character in the string.} 7 | \usage{ 8 | literal(char) 9 | } 10 | \arguments{ 11 | \item{char}{is the character to be matched} 12 | } 13 | \description{ 14 | \code{literal} is a parser for single symbols. It will attempt to match the 15 | single symbol with the first character in the string. 16 | } 17 | \examples{ 18 | literal("a") ("abc") 19 | } 20 | -------------------------------------------------------------------------------- /man/grapes-alt-grapes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{\%alt\%} 4 | \alias{\%alt\%} 5 | \title{\code{\%alt\%} is the infix notation for the \code{alt} function.} 6 | \usage{ 7 | p1 \%alt\% p2 8 | } 9 | \arguments{ 10 | \item{p1}{the first parser} 11 | 12 | \item{p2}{the second parser} 13 | } 14 | \value{ 15 | Returns the first parser if it suceeds otherwise the second parser 16 | } 17 | \description{ 18 | \code{\%alt\%} is the infix notation for the \code{alt} function. 19 | } 20 | \examples{ 21 | (item() \%alt\% succeed("2")) ("abcdef") 22 | } 23 | -------------------------------------------------------------------------------- /man/grapes-then-grapes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{\%then\%} 4 | \alias{\%then\%} 5 | \title{\code{\%then\%} is the infix operator for the then combinator.} 6 | \usage{ 7 | p1 \%then\% p2 8 | } 9 | \arguments{ 10 | \item{p1}{the first parser} 11 | 12 | \item{p2}{the second parser} 13 | } 14 | \value{ 15 | recognises anything that \code{p1} and \code{p2} would if placed in succession. 16 | } 17 | \description{ 18 | \code{\%then\%} is the infix operator for the then combinator. 19 | } 20 | \examples{ 21 | (item() \%then\% succeed("123")) ("abc") 22 | } 23 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## Release Summary 2 | * Minor fixes for Unlist utility function 3 | * Added helper function for thentree to help visualize the parse tree 4 | 5 | ## Test environments 6 | * local Windows 10 install, R 3.3.1 7 | * ubuntu 12.04 (on travis-ci), R 3.3.1 8 | * win-builder (r-devel, r-release) 9 | 10 | ## R CMD check results 11 | There were no ERRORs or WARNINGs. 12 | 13 | Notes include "possible mis-spelled words", which were spelt correctly: 14 | * Combinator (3:15) 15 | * Haskell (9:53) 16 | * combinatory (8:43, 9:20) 17 | * parsers (8:55, 9:32) 18 | 19 | 20 | ## Downstream dependencies 21 | * Did not check downstream dependencies -------------------------------------------------------------------------------- /man/maybe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{maybe} 4 | \alias{maybe} 5 | \title{\code{maybe} matches 0 or 1 of pattern \code{p}. In EBNF notation, this 6 | corresponds to a question mark ('?').} 7 | \usage{ 8 | maybe(p) 9 | } 10 | \arguments{ 11 | \item{p}{is the parser to be matched 0 or 1 times.} 12 | } 13 | \description{ 14 | \code{maybe} matches 0 or 1 of pattern \code{p}. In EBNF notation, this 15 | corresponds to a question mark ('?'). 16 | } 17 | \examples{ 18 | maybe(Digit())("123abc") 19 | maybe(Digit())("abc123") 20 | } 21 | \seealso{ 22 | \code{\link{many}}, \code{\link{some}} 23 | } 24 | -------------------------------------------------------------------------------- /man/Space.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{space} 4 | \alias{space} 5 | \title{\code{space} matches zero or more space characters.} 6 | \usage{ 7 | space() 8 | } 9 | \description{ 10 | \code{space} matches zero or more space characters. 11 | } 12 | \examples{ 13 | space() (" abc") 14 | } 15 | \seealso{ 16 | \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 17 | \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 18 | \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 19 | \code{\link{token}}, \code{\link{identifier}}, 20 | \code{\link{natural}}, \code{\link{symbol}} 21 | } 22 | -------------------------------------------------------------------------------- /man/identifier.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{identifier} 4 | \alias{identifier} 5 | \title{\code{identifier} creates an identifier} 6 | \usage{ 7 | identifier(...) 8 | } 9 | \arguments{ 10 | \item{...}{takes in token primitives} 11 | } 12 | \description{ 13 | \code{identifier} creates an identifier 14 | } 15 | \seealso{ 16 | \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 17 | \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 18 | \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 19 | \code{\link{space}}, \code{\link{token}}, 20 | \code{\link{natural}}, \code{\link{symbol}} 21 | } 22 | -------------------------------------------------------------------------------- /man/nat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{nat} 4 | \alias{nat} 5 | \title{\code{nat} is a parser which matches one or more numeric characters.} 6 | \usage{ 7 | nat() 8 | } 9 | \description{ 10 | \code{nat} is a parser which matches one or more numeric characters. 11 | } 12 | \examples{ 13 | nat() ("123 + 456") 14 | } 15 | \seealso{ 16 | \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 17 | \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 18 | \code{\link{String}}, \code{\link{ident}}, 19 | \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}}, 20 | \code{\link{natural}}, \code{\link{symbol}} 21 | } 22 | -------------------------------------------------------------------------------- /man/Digit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{Digit} 4 | \alias{Digit} 5 | \title{Digit checks for single digit} 6 | \usage{ 7 | Digit(...) 8 | } 9 | \arguments{ 10 | \item{...}{additional arguments for the primitives to be parsed} 11 | } 12 | \description{ 13 | Digit checks for single digit 14 | } 15 | \examples{ 16 | Digit()("123") 17 | } 18 | \seealso{ 19 | \code{\link{Lower}}, \code{\link{Upper}}, 20 | \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 21 | \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 22 | \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}}, 23 | \code{\link{natural}}, \code{\link{symbol}} 24 | } 25 | -------------------------------------------------------------------------------- /man/natural.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{natural} 4 | \alias{natural} 5 | \title{\code{natural} creates a token parser for natural numbers} 6 | \usage{ 7 | natural(...) 8 | } 9 | \arguments{ 10 | \item{...}{additional arguments for the parser} 11 | } 12 | \description{ 13 | \code{natural} creates a token parser for natural numbers 14 | } 15 | \seealso{ 16 | \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 17 | \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 18 | \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 19 | \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}}, 20 | \code{\link{symbol}} 21 | } 22 | -------------------------------------------------------------------------------- /joss-paper/paper.bib: -------------------------------------------------------------------------------- 1 | @article{Hutton1992, 2 | doi = {10.1017/s0956796800000411}, 3 | url = {http://dx.doi.org/10.1017/S0956796800000411}, 4 | year = {1992}, 5 | month = {jul}, 6 | publisher = {Cambridge University Press ({CUP})}, 7 | volume = {2}, 8 | number = {03}, 9 | pages = {323--343}, 10 | author = {Graham Hutton}, 11 | title = {Higher-order functions for parsing}, 12 | journal = {Journal of Functional Programming} 13 | } 14 | 15 | @Manual{R, 16 | title = {R: A Language and Environment for Statistical Computing}, 17 | author = {{R Core Team}}, 18 | organization = {R Foundation for Statistical Computing}, 19 | address = {Vienna, Austria}, 20 | year = {2016}, 21 | url = {https://www.R-project.org/}, 22 | } 23 | -------------------------------------------------------------------------------- /man/symbol.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{symbol} 4 | \alias{symbol} 5 | \title{\code{symbol} creates a token for a symbol} 6 | \usage{ 7 | symbol(xs) 8 | } 9 | \arguments{ 10 | \item{xs}{takes in a string to create a token} 11 | } 12 | \description{ 13 | \code{symbol} creates a token for a symbol 14 | } 15 | \examples{ 16 | symbol("[") (" [123]") 17 | } 18 | \seealso{ 19 | \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 20 | \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 21 | \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 22 | \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}}, 23 | \code{\link{natural}} 24 | } 25 | -------------------------------------------------------------------------------- /man/ident.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{ident} 4 | \alias{ident} 5 | \title{\code{ident} is a parser which matches zero or more alphanumeric 6 | characters.} 7 | \usage{ 8 | ident() 9 | } 10 | \description{ 11 | \code{ident} is a parser which matches zero or more alphanumeric 12 | characters. 13 | } 14 | \examples{ 15 | ident() ("variable1 = 123") 16 | } 17 | \seealso{ 18 | \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 19 | \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 20 | \code{\link{String}}, \code{\link{nat}}, 21 | \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}}, 22 | \code{\link{natural}}, \code{\link{symbol}} 23 | } 24 | -------------------------------------------------------------------------------- /man/Alpha.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{Alpha} 4 | \alias{Alpha} 5 | \title{Alpha checks for single alphabet character} 6 | \usage{ 7 | Alpha(...) 8 | } 9 | \arguments{ 10 | \item{...}{additional arguments for the primitives to be parsed} 11 | } 12 | \description{ 13 | Alpha checks for single alphabet character 14 | } 15 | \examples{ 16 | Alpha()("abc") 17 | } 18 | \seealso{ 19 | \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 20 | \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 21 | \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 22 | \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}}, 23 | \code{\link{natural}}, \code{\link{symbol}} 24 | } 25 | -------------------------------------------------------------------------------- /man/Lower.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{Lower} 4 | \alias{Lower} 5 | \title{Lower checks for single lower case character} 6 | \usage{ 7 | Lower(...) 8 | } 9 | \arguments{ 10 | \item{...}{additional arguments for the primitives to be parsed} 11 | } 12 | \description{ 13 | Lower checks for single lower case character 14 | } 15 | \examples{ 16 | Lower() ("abc") 17 | } 18 | \seealso{ 19 | \code{\link{Digit}}, \code{\link{Upper}}, 20 | \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 21 | \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 22 | \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}}, 23 | \code{\link{natural}}, \code{\link{symbol}} 24 | } 25 | -------------------------------------------------------------------------------- /man/Upper.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{Upper} 4 | \alias{Upper} 5 | \title{Upper checks for a single upper case character} 6 | \usage{ 7 | Upper(...) 8 | } 9 | \arguments{ 10 | \item{...}{additional arguments for the primitives to be parsed} 11 | } 12 | \description{ 13 | Upper checks for a single upper case character 14 | } 15 | \examples{ 16 | Upper()("Abc") 17 | } 18 | \seealso{ 19 | \code{\link{Digit}}, \code{\link{Lower}}, 20 | \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 21 | \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 22 | \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}}, 23 | \code{\link{natural}}, \code{\link{symbol}} 24 | } 25 | -------------------------------------------------------------------------------- /man/item.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{item} 4 | \alias{item} 5 | \title{\code{item} is a parser that consumes the first character of the string and 6 | returns the rest. If it cannot consume a single character from the string, it 7 | will emit the empty list, indicating the parser has failed.} 8 | \usage{ 9 | item(...) 10 | } 11 | \arguments{ 12 | \item{...}{additional arguments for the parser} 13 | } 14 | \description{ 15 | \code{item} is a parser that consumes the first character of the string and 16 | returns the rest. If it cannot consume a single character from the string, it 17 | will emit the empty list, indicating the parser has failed. 18 | } 19 | \examples{ 20 | item() ("abc") 21 | item() ("") 22 | } 23 | -------------------------------------------------------------------------------- /man/SpaceCheck.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{SpaceCheck} 4 | \alias{SpaceCheck} 5 | \title{SpaceCheck checks for a single space character} 6 | \usage{ 7 | SpaceCheck(...) 8 | } 9 | \arguments{ 10 | \item{...}{additional arguments for the primitives to be parsed} 11 | } 12 | \description{ 13 | SpaceCheck checks for a single space character 14 | } 15 | \examples{ 16 | SpaceCheck()(" 123") 17 | } 18 | \seealso{ 19 | \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 20 | \code{\link{Alpha}}, \code{\link{AlphaNum}}, 21 | \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 22 | \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}}, 23 | \code{\link{natural}}, \code{\link{symbol}} 24 | } 25 | -------------------------------------------------------------------------------- /man/AlphaNum.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{AlphaNum} 4 | \alias{AlphaNum} 5 | \title{AlphaNum checks for a single alphanumeric character} 6 | \usage{ 7 | AlphaNum(...) 8 | } 9 | \arguments{ 10 | \item{...}{additional arguments for the primitives to be parsed} 11 | } 12 | \description{ 13 | AlphaNum checks for a single alphanumeric character 14 | } 15 | \examples{ 16 | AlphaNum()("123") 17 | AlphaNum()("abc123") 18 | } 19 | \seealso{ 20 | \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 21 | \code{\link{Alpha}}, \code{\link{SpaceCheck}}, 22 | \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 23 | \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}}, 24 | \code{\link{natural}}, \code{\link{symbol}} 25 | } 26 | -------------------------------------------------------------------------------- /man/thentree.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{thentree} 4 | \alias{thentree} 5 | \title{\code{thentree} keeps the full tree representation of the results of parsing. 6 | Otherwise, it is identical to \code{then}.} 7 | \usage{ 8 | thentree(p1, p2) 9 | } 10 | \arguments{ 11 | \item{p1}{the first parser} 12 | 13 | \item{p2}{the second parser} 14 | } 15 | \value{ 16 | recognises anything that \code{p1} and \code{p2} would if placed in 17 | succession. 18 | } 19 | \description{ 20 | \code{thentree} keeps the full tree representation of the results of parsing. 21 | Otherwise, it is identical to \code{then}. 22 | } 23 | \examples{ 24 | (item() \%thentree\% succeed("123")) ("abc") 25 | 26 | } 27 | \seealso{ 28 | \code{\link{alt}}, \code{\link{thentree}} 29 | } 30 | -------------------------------------------------------------------------------- /man/then.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{then} 4 | \alias{then} 5 | \title{\code{then} combinator corresponds to sequencing in BNF. The parser 6 | \code{(then(p1, p2))} recognises anything that \code{p1} and \code{p2} would 7 | if placed in succession.} 8 | \usage{ 9 | then(p1, p2) 10 | } 11 | \arguments{ 12 | \item{p1}{the first parser} 13 | 14 | \item{p2}{the second parser} 15 | } 16 | \value{ 17 | recognises anything that \code{p1} and \code{p2} would if placed in 18 | succession. 19 | } 20 | \description{ 21 | \code{\%then\%} is the infix operator for the then combinator, and it is the 22 | preferred way to use the \code{then} operator. 23 | } 24 | \examples{ 25 | (item() \%then\% succeed("123")) ("abc") 26 | } 27 | \seealso{ 28 | \code{\link{alt}}, \code{\link{thentree}} 29 | } 30 | -------------------------------------------------------------------------------- /man/grapes-thentree-grapes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{\%thentree\%} 4 | \alias{\%thentree\%} 5 | \title{\code{\%thentree\%} is the infix operator for the then combinator, and it is 6 | the preferred way to use the \code{thentree} operator.} 7 | \usage{ 8 | p1 \%thentree\% p2 9 | } 10 | \arguments{ 11 | \item{p1}{the first parser} 12 | 13 | \item{p2}{the second parser} 14 | } 15 | \value{ 16 | recognises anything that \code{p1} and \code{p2} would if placed in 17 | succession. 18 | } 19 | \description{ 20 | \code{\%thentree\%} is the infix operator for the then combinator, and it is 21 | the preferred way to use the \code{thentree} operator. 22 | } 23 | \examples{ 24 | (item() \%thentree\% succeed("123")) ("abc") 25 | } 26 | \seealso{ 27 | \code{\link{alt}}, \code{\link{thentree}} 28 | } 29 | -------------------------------------------------------------------------------- /joss-paper/paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Ramble: Parser Combinator for R" 3 | tags: 4 | - R 5 | - Parser combinator 6 | authors: 7 | - name: Chapman Siu 8 | orcid: 0000-0002-2089-3796 9 | affiliation: 1 10 | affiliations: 11 | - name: Chapman Siu 12 | index: 1 13 | date: 6 December 2016 14 | bibliography: paper.bib 15 | --- 16 | 17 | # Summary 18 | 19 | Ramble is a parser combinator for the [`R`](https://www.r-project.org/) [@R] language using the higher order functions. The combinatory parsing approached used in Ramble mirrors approaches used in functional languages such as Miranda [@Hutton1992], and is able to handle ambiguous grammars, and provide full backtracking if it is needed. 20 | 21 | Ramble is capable of going beyond simply parsing, even adding semantic actions, allowing their results to be manipulated in any way we please. 22 | 23 | # References 24 | -------------------------------------------------------------------------------- /man/token.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{token} 4 | \alias{token} 5 | \title{\code{token} is a new primitive that ignores any space before and after 6 | applying a parser to a token.} 7 | \usage{ 8 | token(p) 9 | } 10 | \arguments{ 11 | \item{p}{is the parser to have spaces stripped.} 12 | } 13 | \description{ 14 | \code{token} is a new primitive that ignores any space before and after 15 | applying a parser to a token. 16 | } 17 | \examples{ 18 | token(ident()) (" variable1 ") 19 | } 20 | \seealso{ 21 | \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 22 | \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 23 | \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 24 | \code{\link{space}}, \code{\link{identifier}}, 25 | \code{\link{natural}}, \code{\link{symbol}} 26 | } 27 | -------------------------------------------------------------------------------- /man/String.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{String} 4 | \alias{String} 5 | \title{\code{String} is a combinator which allows us to build parsers which 6 | recognise strings of symbols, rather than just single symbols} 7 | \usage{ 8 | String(string) 9 | } 10 | \arguments{ 11 | \item{string}{is the string to be matched} 12 | } 13 | \description{ 14 | \code{String} is a combinator which allows us to build parsers which 15 | recognise strings of symbols, rather than just single symbols 16 | } 17 | \examples{ 18 | String("123")("123 abc") 19 | } 20 | \seealso{ 21 | \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 22 | \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 23 | \code{\link{ident}}, \code{\link{nat}}, 24 | \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}}, 25 | \code{\link{natural}}, \code{\link{symbol}} 26 | } 27 | -------------------------------------------------------------------------------- /man/succeed.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{succeed} 4 | \alias{succeed} 5 | \title{\code{succeed} is based on the empty string symbol in the BNF notation The 6 | \code{succeed} parser always succeeds, without actually consuming any input 7 | string. Since the outcome of succeed does not depend on its input, its result 8 | value must be pre-detemined, so it is included as an extra parameter.} 9 | \usage{ 10 | succeed(string) 11 | } 12 | \arguments{ 13 | \item{string}{the result value of succeed parser} 14 | } 15 | \description{ 16 | \code{succeed} is based on the empty string symbol in the BNF notation The 17 | \code{succeed} parser always succeeds, without actually consuming any input 18 | string. Since the outcome of succeed does not depend on its input, its result 19 | value must be pre-detemined, so it is included as an extra parameter. 20 | } 21 | \examples{ 22 | succeed("1") ("abc") 23 | } 24 | -------------------------------------------------------------------------------- /tests/testthat/test_repetition.R: -------------------------------------------------------------------------------- 1 | context("Test repetition functions") 2 | 3 | test_that("maybe", { 4 | expect_equal(maybe(Digit())("a123"), 5 | list(result=NULL, leftover="a123")) 6 | expect_equal(maybe(Digit())("123"), 7 | list(result="1", 8 | leftover="23")) 9 | }) 10 | 11 | test_that("many", { 12 | expect_equal(many(Digit())("a123"), 13 | list(result=NULL, leftover="a123")) 14 | expect_equal(many(Digit())("123"), 15 | list(result=list("1", "2", "3", NULL), 16 | leftover="")) 17 | }) 18 | 19 | test_that("some", { 20 | expect_equal(some(Digit())("a123"), 21 | list()) 22 | expect_equal(some(Digit())("1a23"), 23 | list(result=list("1", NULL), 24 | leftover="a23")) 25 | expect_equal(some(Digit())("123a"), 26 | list(result=list("1", "2", "3", NULL), 27 | leftover="a")) 28 | }) 29 | -------------------------------------------------------------------------------- /man/alt.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{alt} 4 | \alias{alt} 5 | \title{\code{alt} combinator is similar to alternation in BNF. the parser 6 | \code{(alt(p1, p2))} recognises anything that \code{p1} or \code{p2} would. 7 | The approach taken in this parser follows (Fairbairn86), in which either is 8 | interpretted in a sequential (or exclusive) manner, returning the result of 9 | the first parser to succeed, and failure if neither does.} 10 | \usage{ 11 | alt(p1, p2) 12 | } 13 | \arguments{ 14 | \item{p1}{the first parser} 15 | 16 | \item{p2}{the second parser} 17 | } 18 | \value{ 19 | Returns the first parser if it suceeds otherwise the second parser 20 | } 21 | \description{ 22 | \code{\%alt\%} is the infix notation for the \code{alt} function, and it is the 23 | preferred way to use the \code{alt} operator. 24 | } 25 | \examples{ 26 | (item() \%alt\% succeed("2")) ("abcdef") 27 | } 28 | \seealso{ 29 | \code{\link{then}} 30 | } 31 | -------------------------------------------------------------------------------- /man/using.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{using} 4 | \alias{using} 5 | \title{\code{using} combinator allows us to manipulate results from a parser, for 6 | example building a parse tree. The parser \code{(p \%using\% f)} has the same 7 | behaviour as the parser \code{p}, except that the function \code{f} is 8 | applied to each of its result values.} 9 | \usage{ 10 | using(p, f) 11 | } 12 | \arguments{ 13 | \item{p}{is the parser to be applied} 14 | 15 | \item{f}{is the function to be applied to each result of \code{p}.} 16 | } 17 | \value{ 18 | The parser \code{(p \%using\% f)} has the same behaviour as the 19 | parser \code{p}, except that the function \code{f} is applied to each of 20 | its result values. 21 | } 22 | \description{ 23 | \code{\%using\%} is the infix operator for \code{using}, and it is the 24 | preferred way to use the \code{using} operator. 25 | } 26 | \examples{ 27 | (item() \%using\% as.numeric) ("1abc") 28 | } 29 | -------------------------------------------------------------------------------- /man/many.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{many} 4 | \alias{many} 5 | \title{\code{many} matches 0 or more of pattern \code{p}. In BNF notation, 6 | repetition occurs often enough to merit its own abbreviation. When zero or 7 | more repetitions of a phrase \code{p} are admissible, we simply write 8 | \code{p*}. The \code{many} combinator corresponds directly to this operator, 9 | and is defined in much the same way.} 10 | \usage{ 11 | many(p) 12 | } 13 | \arguments{ 14 | \item{p}{is the parser to match 0 or more times.} 15 | } 16 | \description{ 17 | This implementation of \code{many} differs from (Hutton92) due to the nature 18 | of R's data structures. Since R does not support the concept of a list of 19 | tuples, we must revert to using a list rather than a vector, since all values 20 | in an R vector must be the same datatype. 21 | } 22 | \examples{ 23 | Digit <- function(...) {satisfy(function(x) {return(grepl("[0-9]", x))})} 24 | many(Digit()) ("123abc") 25 | many(Digit()) ("abc") 26 | } 27 | \seealso{ 28 | \code{\link{maybe}}, \code{\link{some}} 29 | } 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Chapman Siu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /man/some.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{some} 4 | \alias{some} 5 | \title{\code{some} matches 1 or more of pattern \code{p}. in BNF notation, repetition occurs often enough to merit its own abbreviation. When zero or 6 | more repetitions of a phrase \code{p} are admissible, we simply write 7 | \code{p+}. The \code{some} combinator corresponds directly to this operator, 8 | and is defined in much the same way.} 9 | \usage{ 10 | some(p) 11 | } 12 | \arguments{ 13 | \item{p}{is the parser to match 1 or more times.} 14 | } 15 | \description{ 16 | \code{some} matches 1 or more of pattern \code{p}. in BNF notation, repetition occurs often enough to merit its own abbreviation. When zero or 17 | more repetitions of a phrase \code{p} are admissible, we simply write 18 | \code{p+}. The \code{some} combinator corresponds directly to this operator, 19 | and is defined in much the same way. 20 | } 21 | \examples{ 22 | Digit <- function(...) {satisfy(function(x) {return(grepl("[0-9]", x))})} 23 | some(Digit()) ("123abc") 24 | } 25 | \seealso{ 26 | \code{\link{maybe}}, \code{\link{many}} 27 | } 28 | -------------------------------------------------------------------------------- /example/example.R: -------------------------------------------------------------------------------- 1 | #' Example with expressions, will not be exported 2 | #' expression example 3 | #' expr :: = term + term | term - term | term 4 | #' term :: = factor * factor | factor / factor | factor 5 | #' factor :: = (expr) | digit+ 6 | 7 | expr <- ((term %then% 8 | symbol("+") %then% 9 | expr %using% function(x) { 10 | print(unlist(c(x))) 11 | return(sum(as.numeric(unlist(c(x))[c(1,3)]))) 12 | }) %alt% 13 | (term %then% 14 | symbol("-") %then% 15 | expr %using% function(x) { 16 | print(unlist(c(x))) 17 | return(Reduce("-", as.numeric(unlist(c(x))[c(1,3)]))) 18 | }) %alt% term) 19 | 20 | 21 | term <- ((factor %then% 22 | symbol("*") %then% 23 | term %using% function(x) { 24 | print(unlist(c(x))) 25 | return(prod(as.numeric(unlist(c(x))[c(1,3)]))) 26 | }) %alt% 27 | (factor %then% 28 | symbol("/") %then% 29 | term %using% function(x) { 30 | print(unlist(c(x))) 31 | return(Reduce("/", as.numeric(unlist(c(x))[c(1,3)]))) 32 | }) %alt% factor) 33 | 34 | factor <- ((symbol("(") %then% 35 | expr %then% 36 | symbol(")") %using% function(x){ 37 | print(unlist(c(x))) 38 | return(as.numeric(unlist(c(x))[2])) 39 | }) %alt% natural()) 40 | 41 | expr("1+2+3+4+5+6") 42 | expr("1+(2+3)*4+5+6") 43 | factor("(1)") 44 | factor("1") 45 | expr("1+(2*2)") 46 | expr("(1+1)*2") 47 | expr("(1+2)*3") 48 | expr("1*(2+3)*4*5") 49 | expr("(4-2)+3") 50 | expr("4-2+3") # order is done incorrectly. 51 | expr("4/2") 52 | 53 | -------------------------------------------------------------------------------- /example/example-xml.R: -------------------------------------------------------------------------------- 1 | #' XML parser example 2 | 3 | xml = ' 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | ' 16 | 17 | 18 | xmlParser <- (many(startTag %alt% singleTag) %then% 19 | many(endTag %alt% singleTag)) 20 | 21 | 22 | startTag <- ( 23 | symbol("<") %then% 24 | identifier() %then% 25 | many(attributes) %then% 26 | symbol(">") %using% function(x) { 27 | els <- unlist(c(x)) 28 | #return(unlist(c(x))) 29 | return(list(name=els[2], all=els)) 30 | } 31 | ) 32 | 33 | endTag <- ( 34 | symbol("") %using% function(x) { 37 | return(unlist(c(x))) 38 | } 39 | ) 40 | 41 | singleTag <- ( 42 | symbol("<") %then% 43 | identifier() %then% 44 | many(attributes) %then% 45 | symbol("/>") %using% function(x) { 46 | els <- unlist(c(x)) 47 | return(unlist(c(x))) 48 | } 49 | ) 50 | 51 | attributes <- ( 52 | identifier() %then% 53 | symbol("=") %then% 54 | quoteString 55 | ) 56 | 57 | quoteString <- ( 58 | symbol('"') %then% 59 | many(satisfy(function(x) {return(!!length(grep('[^"]+', x)))})) %then% 60 | symbol('"') %using% function(x) { 61 | return(paste0(unlist(c(x)), collapse="")) 62 | } 63 | ) 64 | 65 | xmlParser(xml) 66 | 67 | 68 | -------------------------------------------------------------------------------- /tests/testthat/test_functions2.R: -------------------------------------------------------------------------------- 1 | library(Ramble) 2 | 3 | test_that("empty strings (ie. character(0)) are treated correctly", { 4 | expect_equal(item()(character(0)), NULL) 5 | expect_equal(satisfy()(character(0)), list()) 6 | }) 7 | 8 | test_that("literal correctly parses character", { 9 | expect_equal(literal("a")("a"), 10 | list(result="a", leftover="")) 11 | }) 12 | 13 | test_that("then correctly works when we have empty 2nd predicate", { 14 | expect_equal((literal("a") %then% item())("a"), list()) 15 | }) 16 | 17 | test_that("using works as expected", { 18 | expect_equal((literal("a") %using% toupper)("abc"), 19 | list(result="A", leftover="bc")) 20 | }) 21 | 22 | test_that("using correctly returns list when there is no result",{ 23 | expect_equal(((literal("a") %then% item()) %using% toupper)("a"), 24 | list()) 25 | }) 26 | 27 | test_that("some has the correct leftover when it matches multiple things", { 28 | expect_equal((some(Digit())("123abc")), 29 | list(result=list("1", "2", "3", NULL), leftover="abc")) 30 | }) 31 | 32 | test_that("derived primatives work", { 33 | expect_equal(Lower()("abc"), 34 | list(result="a", leftover="bc")) 35 | expect_equal(Upper()("Abc"), 36 | list(result="A", leftover="bc")) 37 | expect_equal(Alpha()("abc"), 38 | list(result="a", leftover="bc")) 39 | expect_equal(AlphaNum()("abc123"), 40 | list(result="a", leftover="bc123")) 41 | expect_equal(AlphaNum()("123"), 42 | list(result="1", leftover="23")) 43 | expect_equal(String("123")("123 abc"), 44 | list(result="123", leftover=" abc")) 45 | expect_equal(String("")("abc"), 46 | list(result=NULL, leftover="abc")) 47 | expect_equal(nat()("123 + 456"), 48 | list(result="123", leftover=" + 456")) 49 | expect_equal(natural()("123"), 50 | list(result=list("123"), leftover="")) 51 | expect_equal(symbol("[")(" [123]"), 52 | list(result=list("["), leftover="123]")) 53 | }) 54 | 55 | -------------------------------------------------------------------------------- /example/example-word2num.R: -------------------------------------------------------------------------------- 1 | library(Ramble) 2 | 3 | # we might have hyphens or spaces, 4 | # e.g. ninety-one or ninety one 5 | remove_space_hyphen <- maybe(token(String("-"))) %using% function(...) return(0) 6 | 7 | token_string <- function(x) token(String(x)) 8 | 9 | unit_definition <- (remove_space_hyphen %alt% succeed(NULL)) %then% ( 10 | (token_string("ten") %using% function(...) return(10)) %alt% 11 | (token_string("eleven") %using% function(...) return(11)) %alt% 12 | (token_string("twelve") %using% function(...) return(12)) %alt% 13 | (token_string("thirteen") %using% function(...) return(13)) %alt% 14 | (token_string("fourteen") %using% function(...) return(14)) %alt% 15 | (token_string("fifteen") %using% function(...) return(15)) %alt% 16 | (token_string("sixteen") %using% function(...) return(16)) %alt% 17 | (token_string("seventeen") %using% function(...) return(17)) %alt% 18 | (token_string("eighteen") %using% function(...) return(18)) %alt% 19 | (token_string("nineteen") %using% function(...) return(19)) %alt% 20 | (token_string("zero") %using% function(...) return(0)) %alt% 21 | (token_string("oh") %using% function(...) return(0)) %alt% 22 | (token_string("zip") %using% function(...) return(0)) %alt% 23 | (token_string("zilch") %using% function(...) return(0)) %alt% 24 | (token_string("nada") %using% function(...) return(0)) %alt% 25 | (token_string("one") %using% function(...) return(1)) %alt% 26 | (token_string("two") %using% function(...) return(2)) %alt% 27 | (token_string("three") %using% function(...) return(3)) %alt% 28 | (token_string("four") %using% function(...) return(4)) %alt% 29 | (token_string("five") %using% function(...) return(5)) %alt% 30 | (token_string("six") %using% function(...) return(6)) %alt% 31 | (token_string("seven") %using% function(...) return(7)) %alt% 32 | (token_string("eight") %using% function(...) return(8)) %alt% 33 | (token_string("nine") %using% function(...) return(9)) 34 | ) 35 | 36 | tens_definition <- ( 37 | (token_string("ten") %using% function(...) return(10)) %alt% 38 | (token_string("twenty") %using% function(...) return(20)) %alt% 39 | (token_string("thirty") %using% function(...) return(30)) %alt% 40 | (token_string("forty") %using% function(...) return(40)) %alt% 41 | (token_string("fourty") %using% function(...) return(40)) %alt% 42 | (token_string("fifty") %using% function(...) return(50)) %alt% 43 | (token_string("sixty") %using% function(...) return(60)) %alt% 44 | (token_string("seventy") %using% function(...) return(70)) %alt% 45 | (token_string("eighty") %using% function(...) return(80)) %alt% 46 | (token_string("ninety") %using% function(...) return(90)) 47 | ) 48 | 49 | # optional tens_defintion + units 50 | word2num <- ((tens_definition %alt% succeed(NULL)) %then% unit_definition) %using% function(x) return(sum(unlist(x))) 51 | word2num("ninety one")$result 52 | word2num("twenty-two")$result 53 | -------------------------------------------------------------------------------- /tests/testthat/test_word2num.R: -------------------------------------------------------------------------------- 1 | library(Ramble) 2 | 3 | # we might have hyphens or spaces, 4 | # e.g. ninety-one or ninety one 5 | remove_space_hyphen <- maybe(token(String("-"))) %using% function(...) return(0) 6 | 7 | token_string <- function(x) token(String(x)) 8 | 9 | unit_definition <- (remove_space_hyphen %alt% succeed(NULL)) %then% ( 10 | (token_string("ten") %using% function(...) return(10)) %alt% 11 | (token_string("eleven") %using% function(...) return(11)) %alt% 12 | (token_string("twelve") %using% function(...) return(12)) %alt% 13 | (token_string("thirteen") %using% function(...) return(13)) %alt% 14 | (token_string("fourteen") %using% function(...) return(14)) %alt% 15 | (token_string("fifteen") %using% function(...) return(15)) %alt% 16 | (token_string("sixteen") %using% function(...) return(16)) %alt% 17 | (token_string("seventeen") %using% function(...) return(17)) %alt% 18 | (token_string("eighteen") %using% function(...) return(18)) %alt% 19 | (token_string("nineteen") %using% function(...) return(19)) %alt% 20 | (token_string("zero") %using% function(...) return(0)) %alt% 21 | (token_string("oh") %using% function(...) return(0)) %alt% 22 | (token_string("zip") %using% function(...) return(0)) %alt% 23 | (token_string("zilch") %using% function(...) return(0)) %alt% 24 | (token_string("nada") %using% function(...) return(0)) %alt% 25 | (token_string("one") %using% function(...) return(1)) %alt% 26 | (token_string("two") %using% function(...) return(2)) %alt% 27 | (token_string("three") %using% function(...) return(3)) %alt% 28 | (token_string("four") %using% function(...) return(4)) %alt% 29 | (token_string("five") %using% function(...) return(5)) %alt% 30 | (token_string("six") %using% function(...) return(6)) %alt% 31 | (token_string("seven") %using% function(...) return(7)) %alt% 32 | (token_string("eight") %using% function(...) return(8)) %alt% 33 | (token_string("nine") %using% function(...) return(9)) 34 | ) 35 | 36 | tens_definition <- ( 37 | (token_string("ten") %using% function(...) return(10)) %alt% 38 | (token_string("twenty") %using% function(...) return(20)) %alt% 39 | (token_string("thirty") %using% function(...) return(30)) %alt% 40 | (token_string("forty") %using% function(...) return(40)) %alt% 41 | (token_string("fourty") %using% function(...) return(40)) %alt% 42 | (token_string("fifty") %using% function(...) return(50)) %alt% 43 | (token_string("sixty") %using% function(...) return(60)) %alt% 44 | (token_string("seventy") %using% function(...) return(70)) %alt% 45 | (token_string("eighty") %using% function(...) return(80)) %alt% 46 | (token_string("ninety") %using% function(...) return(90)) 47 | ) 48 | 49 | # optional tens_defintion + units 50 | word2num <- ((tens_definition %alt% succeed(NULL)) %then% unit_definition) %using% function(x) return(sum(unlist(x))) 51 | 52 | 53 | test_that("word2num", { 54 | expect_equal(word2num("ninety one")$result, 91) 55 | expect_equal(word2num("ninety-two")$result, 92) 56 | expect_equal(word2num("ninety tone"), list()) 57 | expect_equal(word2num("forty six")$result, 46) 58 | expect_equal(word2num("nineteen")$result, 19) 59 | expect_equal(word2num("thirty seven")$result, 37) 60 | }) 61 | -------------------------------------------------------------------------------- /tests/testthat/test_functions.R: -------------------------------------------------------------------------------- 1 | library(Ramble) 2 | 3 | test_that("succeed", { 4 | expect_equal(succeed("1")("abc"), 5 | list(result="1", leftover="abc")) 6 | }) 7 | 8 | test_that("item", { 9 | expect_equal(item()("abc"), 10 | list(result="a", leftover="bc")) 11 | }) 12 | 13 | test_that("then", { 14 | expect_equal((item() %then% succeed("123"))("abc"), 15 | list(result=list("a", "123"), 16 | leftover="bc")) 17 | ## Differentiate "then" from "thentree" 18 | newparam <- function(x) { 19 | function(string) { 20 | ret <- succeed(x)(string) 21 | ret$result <- list(value=ret$result, 22 | more=TRUE) 23 | class(ret$result) <- "newparam" 24 | ret 25 | } 26 | } 27 | expect_equal((item() %then% newparam("123"))("abc"), 28 | list(result=list("a", 29 | structure(list(value="123", more=TRUE), 30 | .Names=c("value", "more"), 31 | class="newparam")), 32 | leftover="bc")) 33 | }) 34 | 35 | test_that("thentree", { 36 | expect_equal((item() %thentree% succeed("123"))("abc"), 37 | list(result=list("a", "123"), 38 | leftover="bc")) 39 | newparam <- function(x) { 40 | function(string) { 41 | ret <- succeed(x)(string) 42 | ret$result <- list(value=ret$result, 43 | more=TRUE) 44 | ret 45 | } 46 | } 47 | expect_equal((item() %thentree% newparam("123"))("abc"), 48 | list(result=list("a", list(value="123", more=TRUE)), 49 | leftover="bc")) 50 | }) 51 | 52 | test_that("alternation", { 53 | expect_equal((item() %alt% succeed("2"))("abcdef"), 54 | list(result="a", leftover="bcdef")) 55 | expect_equal((Digit() %alt% succeed("2"))("abcdef"), 56 | list(result="2", leftover="abcdef")) 57 | }) 58 | 59 | test_that("many", { 60 | expect_equal(length(unlist(many(Digit())("123abc")$result)), 3) 61 | expect_equal(many(Digit())("123abc"), 62 | list(result=list("1", "2", "3", NULL), 63 | leftover="abc")) 64 | }) 65 | 66 | test_that("identifier", { 67 | expect_equal(identifier()(" variable1 "), 68 | list(result=list("variable1"), 69 | leftover="")) 70 | }) 71 | 72 | test_that("token", { 73 | expect_equal(token(Digit())("123"), 74 | list(result=list("1"), 75 | leftover="23")) 76 | expect_equal(token(Digit())(" 123"), 77 | list(result=list("1"), 78 | leftover="23")) 79 | expect_equal(token(Digit())(" 1 23"), 80 | list(result=list("1"), 81 | leftover="23")) 82 | expect_equal(token(Digit())(" 1 23"), 83 | list(result=list("1"), 84 | leftover="23")) 85 | expect_equal(token(Digit())(" a 23"), 86 | list()) 87 | ## Keep attributes now 88 | newparam <- function(x) { 89 | function(string) { 90 | ret <- succeed(x)(string) 91 | ret$result <- list(value=ret$result, 92 | more=TRUE) 93 | ret 94 | } 95 | } 96 | expect_equal(token(String("abc") %thentree% newparam("123"))(" abc "), 97 | list(result=list("abc", list(value="123", more=TRUE)), 98 | leftover="")) 99 | }) 100 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | Ramble 2 | ====== 3 | 4 | [![status](http://joss.theoj.org/papers/1b33a8141e698aa4cf038b3c5c9bbfd8/status.svg)](http://joss.theoj.org/papers/1b33a8141e698aa4cf038b3c5c9bbfd8) 5 | [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/Ramble)](http://cran.r-project.org/package=Ramble) 6 | [![Travis-CI Build Status](https://travis-ci.org/chappers/Ramble.svg?branch=master)](https://travis-ci.org/chappers/Ramble) 7 | [![Coverage Status](http://codecov.io/github/chappers/Ramble/coverage.svg?branch=master)](http://codecov.io/github/chappers/Ramble?branch=master) 8 | 9 | This project is just an example to examine the functional components of R. 10 | 11 | Installation 12 | ============ 13 | 14 | Ramble is now on CRAN: 15 | 16 | install.packages("Ramble") 17 | 18 | The development version can be installed from github: 19 | 20 | # install.packages("devtools") 21 | devtools::install_github("chappers/Ramble") 22 | 23 | Goals 24 | ===== 25 | 26 | Create a [parser combinator](http://en.wikipedia.org/wiki/Parser_combinator) written in 27 | pure R. This is mostly a proof of concept, but could be useful or helpful to someone. 28 | 29 | This is inspired by **Programming in Haskell by Graham Hutton**, and also the 30 | [JavaScript port](https://github.com/matthandlersux/functional-parser), and 31 | Python's [recursive descent parsing library](https://pypi.python.org/pypi/funcparserlib/). 32 | 33 | **References:** 34 | 35 | * [Higher-order functions for parsing](http://eprints.nottingham.ac.uk/221/1/parsing.pdf) 36 | 37 | Contributing 38 | ============ 39 | 40 | You can contribute by opening issues on Github or implementing things yourself and making a pull request. 41 | 42 | Please ensure that package passes all checks with `--as-cran` flag (i.e. via `devtools::check(args = c('--as-cran'))`) before submitting a pull request. 43 | 44 | How it Works 45 | ============ 46 | 47 | To understand the differences between Ramble and other combinatory parsers please read [Ramble: A Parser Combinator in R](https://github.com/chappers/Ramble/blob/master/docs/ramble-introduction.pdf). 48 | 49 | Example 50 | ======= 51 | 52 | You may view examples for: 53 | 54 | * Parsing xml file 55 | * Creating a simple calculator 56 | * Reading a number given in words, and converting it to the appropriate numeric value 57 | 58 | Within the `examples/*` folder. Below is the calculator example. 59 | 60 | ```r 61 | #' expr :: = term + term | term - term | term 62 | #' term :: = factor * factor | factor / factor | factor 63 | #' factor :: = (expr) | digit+ 64 | 65 | expr <- ((term %then% 66 | symbol("+") %then% 67 | expr %using% function(x) { 68 | print(unlist(c(x))) 69 | return(sum(as.numeric(unlist(c(x))[c(1,3)]))) 70 | }) %alt% 71 | (term %then% 72 | symbol("-") %then% 73 | expr %using% function(x) { 74 | print(unlist(c(x))) 75 | return(Reduce("-", as.numeric(unlist(c(x))[c(1,3)]))) 76 | }) %alt% term) 77 | 78 | 79 | term <- ((factor %then% 80 | symbol("*") %then% 81 | term %using% function(x) { 82 | print(unlist(c(x))) 83 | return(prod(as.numeric(unlist(c(x))[c(1,3)]))) 84 | }) %alt% 85 | (factor %then% 86 | symbol("/") %then% 87 | term %using% function(x) { 88 | print(unlist(c(x))) 89 | return(Reduce("/", as.numeric(unlist(c(x))[c(1,3)]))) 90 | }) %alt% factor) 91 | 92 | factor <- (( 93 | symbol("(") %then% 94 | expr %then% 95 | symbol(")") %using% 96 | function(x){ 97 | print(unlist(c(x))) 98 | return(as.numeric(unlist(c(x))[2])) 99 | }) 100 | %alt% natural()) 101 | ``` 102 | 103 | **Output**: 104 | 105 | ```r 106 | > expr("(1+1)*2") 107 | [1] "1" "+" "1" 108 | [1] "(" "2" ")" 109 | [1] "2" "*" "2" 110 | [1] "1" "+" "1" 111 | [1] "(" "2" ")" 112 | [1] "2" "*" "2" 113 | [1] "1" "+" "1" 114 | [1] "(" "2" ")" 115 | [1] "2" "*" "2" 116 | $result 117 | [1] 4 118 | 119 | $leftover 120 | [1] "" 121 | 122 | > expr("(1+2)*3") 123 | [1] "1" "+" "2" 124 | [1] "(" "3" ")" 125 | [1] "3" "*" "3" 126 | [1] "1" "+" "2" 127 | [1] "(" "3" ")" 128 | [1] "3" "*" "3" 129 | [1] "1" "+" "2" 130 | [1] "(" "3" ")" 131 | [1] "3" "*" "3" 132 | $result 133 | [1] 9 134 | 135 | $leftover 136 | [1] "" 137 | 138 | > expr("1*(2+3)*4*5") 139 | [1] "2" "+" "3" 140 | [1] "(" "5" ")" 141 | [1] "4" "*" "5" 142 | [1] "5" "*" "20" 143 | [1] "1" "*" "100" 144 | [1] "2" "+" "3" 145 | [1] "(" "5" ")" 146 | [1] "4" "*" "5" 147 | [1] "5" "*" "20" 148 | [1] "1" "*" "100" 149 | [1] "2" "+" "3" 150 | [1] "(" "5" ")" 151 | [1] "4" "*" "5" 152 | [1] "5" "*" "20" 153 | [1] "1" "*" "100" 154 | $result 155 | [1] 100 156 | 157 | $leftover 158 | [1] "" 159 | 160 | > expr("(4-2)+3") 161 | [1] "4" "-" "2" 162 | [1] "(" "2" ")" 163 | [1] "4" "-" "2" 164 | [1] "(" "2" ")" 165 | [1] "4" "-" "2" 166 | [1] "(" "2" ")" 167 | [1] "2" "+" "3" 168 | $result 169 | [1] 5 170 | 171 | $leftover 172 | [1] "" 173 | ``` 174 | -------------------------------------------------------------------------------- /vignettes/Higher-Order_Functions_for_Parsing_in_R.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Higher-Order Functions for Parsing in R" 3 | author: "Chapman Siu" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{High Order Functions for Parsing in R} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\usepackage[utf8]{inputenc} 10 | --- 11 | 12 | # Introduction 13 | 14 | A parser is any program which analyses text to determine its logical structure. For example, the parser phase in a compiler takes a program text, and produces a parse tree which expounds the structure of the program. The form of this input is usually defined by a context-free grammar, using BNF notation. Prasers themselves may be built by hand, but are most often generated automatically using tools like Lex and Yacc from Unix (Aho86). 15 | 16 | Combinatory parsing is a technique which has been explored in functional languages such as Miranda (Hutton92). Combinator parsing is able to handle ambiguous grammars, and providing full backtracking if it is needed. It can go beyond simply parsing, but even adding semantic actions, allowing their results to manipulated in any way we please. 17 | 18 | This paper will apply techniques discussed in (Hutton92) in the context of the functional parts of the R programming language. 19 | 20 | # Functional Parts of R 21 | 22 | ## Closures 23 | 24 | R, at its heart is a functional programming language (Wickham14). R has what is known as first class functions; meaning functions can be passed as arguments to other functions, returning them from other functions, assigning them to variables and stored in data structures. 25 | 26 | Functions can be written by other functions, this is known as closures. In the following example (Wickham14) we can generate a family of power functions in which a parent function `(power())` creates two child functions (`square()` and `cube()`). 27 | 28 | ```{r closure-example} 29 | power <- function(exponent) { 30 | function(x) { 31 | x ^ exponent 32 | } 33 | } 34 | square <- power(2) 35 | square(2) 36 | cube <- power(3) 37 | cube(2) 38 | ``` 39 | 40 | # Parsing using Combinators 41 | 42 | We will first consider the _type_ of parser. A parser may be viewed as a function from a string of symbols to a result value. Since a parser might not consume the entire string, part of this result will be a suffix of the input string. Sometimes a parser may not be able to produce a result at all. For example, it may be expecting a letter, but find a digit. Rather than define a special type for the success or failure of a parser, we choose to have parsers return a list of pairs as their result, with the empty list `list()` denoting failure, and a list of lists `list(result=v, leftover=xs)` indicating success, with value `v` and unconsumed input `xs`. 43 | 44 | Since we want to specify the type of any parser, regardless of the kind of symbols and results involved, this means we must use a heterogeneous data structure. Compared with Miranda, R treats its data structures in different manner. For example, the idea of a tuple does not exist within R. Data types can be divided into five different groups (Wickham14): 45 | 46 | **Homogeneous** **Heterogeneous** 47 | --- --------------- ----------------- 48 | 1d Atomic Vector List 49 | 2d Matrix Data Frame 50 | nd Array 51 | --- --------------- ----------------- 52 | 53 | This means that the value `v`, which indicates success **must** be a list of lists, since this value may be heterogeneous. 54 | 55 | ## Primitive parsers 56 | 57 | `succeed` is based on the empty string symbol in the BNF notation The `succeed` parser always succeeds, without actually consuming any input string. Since the outcome of succeed does not depend on its input, its resultvalue must be pre-detemined, so it is included as an extra parameter. 58 | 59 | ```{r, succeed} 60 | succeed <- function(string) { 61 | return(function(nextString) { 62 | return(list(result = string, leftover=nextString)) 63 | }) 64 | } 65 | succeed("1") ("abc") 66 | ``` 67 | 68 | The next function `item`, allows us to consume the first character of the string and return the rest. If it cannot consume a single character from the string it will emit the empty list, indicating the parser has failed. 69 | 70 | ```{r, item} 71 | item <- function(...){ 72 | return(function(string){ 73 | if(length(string)==0){return(NULL)} 74 | return (if(string=="") list() else list(result=substr(string, 1, 1), leftover=substring(string, 2))) 75 | }) 76 | } 77 | item() ("abc") 78 | ``` 79 | 80 | `item` can be further rewritten in a more useful way. The `satisfy` function allows us to make parsers that recognise single symbols. Rather than enumerating the acceptable symbols, we will allow a predicate to be set, which determines if an arbitary symbol is a member. Successful parses return the consumed symbol as their result value. 81 | 82 | ```{r, satisfy} 83 | satisfy <- function(p) { 84 | return(function(string) { 85 | if (length(string)==0) { 86 | return(list()) 87 | } 88 | else if (string==""){ 89 | return(list()) 90 | } 91 | else { 92 | result_ = list(result=substr(string, 1, 1), leftover=substring(string, 2)) 93 | if (p(result_$result)) { 94 | return(succeed(result_$result)(result_$leftover)) 95 | } 96 | else{ 97 | return(list()) 98 | } 99 | } 100 | }) 101 | } 102 | satisfy(function(x) {x == "a"}) ("abc") 103 | ``` 104 | 105 | Using `satisfy` we can define a parser for single symbols: 106 | 107 | ```{r, literal} 108 | literal <- function(char) { 109 | satisfy(function(x){return(x==char)}) 110 | } 111 | literal("a") ("abc") 112 | ``` 113 | 114 | ## Combinators 115 | 116 | Now that we have the basic building blocks, we consider how they should be put together to form useful parsers. In BNF notation, larger grammars are built price-wise from smaller ones using `|` to denote alternation, and juxtaposition to indicate sequencing. So taht our parasers resemble BNF notation, we define higher order functions which correspond directly to these operators. Since higher order functions like these combine parsers to form other parsers, they are often referreedto as combinators. 117 | 118 | The `alt` combinator corresponds to alternation in BNF. The parser `alt(p1, p2)` recognises anything that `p1` or `p2` would. The approach taken in this parser follows (Fairbairn86), in which either is interpretted in a sequential (or exclusive) manner, returning the results of the first parser to succeed, and failure if neither does. Note that we use the infix notation ``` `%f%` ``` to convert `alt` to an infix operator. The infix notation is merely a syntactic convenience: ```(a `%f%` b)``` is equivalent to `(f (a,b))`. 119 | 120 | ```{r, alt} 121 | alt <- function(p1, p2) { 122 | return(function(string){ 123 | result <- p1 (string) 124 | if(!is.null(result$leftover)) {return(result)} 125 | else{ 126 | return(p2 (string)) 127 | } 128 | }) 129 | } 130 | `%alt%` <- alt 131 | (item() %alt% succeed("2"))("abcdef") 132 | alt(item(), succeed("2")) ("abcdef") 133 | ``` 134 | 135 | The `then` combinator corresponds to sequencing in BNF. The parser(`p1 %then% p2`) recognises anything that `p1` and `p2` would if placed in succession. 136 | 137 | ```{r, then} 138 | then <- function(p1, p2) { 139 | return(function(string) { 140 | result <- p1 (string) 141 | if (length(result) == 0) { 142 | return (list()) 143 | } 144 | else { 145 | result_ <- p2 (result$leftover) 146 | if (length(result_$leftover) == 0 || is.null(result_$leftover)) {return(list())} 147 | return(list(result=append(list(result$result), result_$result), leftover=result_$leftover)) 148 | } 149 | }) 150 | } 151 | `%then%` <- then 152 | (literal("a") %then% literal("b")) ("abc") 153 | ``` 154 | 155 | ## Manipulating Values 156 | 157 | Part of the result from a parser is a value. The `using` combinator allows us to manipulate these results, building a parse tree being the most common application. The parser(`p %using% f`) has the same behaviour as the parser `p`, except that the function `f` is aplied to each of the result values: 158 | 159 | ```{r, using} 160 | using <- function(p, f) { 161 | return(function(string) { 162 | result <- p (string) 163 | if(length(result) == 0) {return(list())} 164 | return(list(result=f(result$result), 165 | leftover=result$leftover)) 166 | }) 167 | } 168 | `%using%` <- using 169 | (item() %using% function(x) {as.numeric(x) + 100}) ("1abc") 170 | ``` 171 | 172 | Although `using` has no counterpart in pure BNF notation, it does have much in common with the `{...}` operator in Yacc (Aho86). In fact, the `using` combinator does not restrict us to building parse trees. Arbitrary semantic actions can be used. 173 | 174 | In BNF notation, repetition occurs often enough to merit its own abbreviation. When zero or more repetitions of a phrase `p` are admissible, we simply write `p*`. Formally, this notation is defined by the equation `p* = p p * | e`. The `many` combinator corresponds directly to this operator, and is defined in much the same way: 175 | 176 | ```{r, many} 177 | many <- function(p) { 178 | return(function(string) { 179 | ((p %then% many(p)) %alt% succeed(NULL)) (string) 180 | }) 181 | } 182 | many(literal("1")) ("111223") 183 | ``` 184 | 185 | Nor surprisingly, the next parser corresponds to the other common iterative form in BNF, defined by `p+ = p p*`. The parser (`some p`) has the same behaviour as (`many p`), except that it accepts one or more repetitions of `p`, rather of zero or more: 186 | 187 | ```{r, some} 188 | some <- function(p) { 189 | return(function(string){ 190 | (p %then% many(p)) (string) 191 | }) 192 | } 193 | some(literal("a"))("aaabbc") 194 | ``` 195 | 196 | Note that (`some p`) may fail, whereas (`many p`) always succeeds. 197 | 198 | ## Derived Primitives 199 | 200 | Using the basic parsers together with sequencing and choice, we can now define a number of other useful parsing primitives. 201 | 202 | Firstly using `satisfy` with the appropriate predicates, we can define parsers for single digits, lower-case letters, upper-case letters, arbitrary letters, alphanumeric characters, and specific characters. We have already demonstrated how we can parser specific characters (see `literal`), but the others can be defined in a similar way: 203 | 204 | ```{r, derived} 205 | Digit <- function(...) {satisfy(function(x) {return(grepl("[0-9]", x))})} 206 | Lower <- function(...) {satisfy(function(x) {return(grepl("[a-z]", x))})} 207 | Upper <- function(...) satisfy(function(x) {return(grepl("[A-Z]", x))}) 208 | Alpha <- function(...) satisfy(function(x) {return(grepl("[A-Za-z]", x))}) 209 | AlphaNum <- function(...) satisfy(function(x) {return(grepl("[A-Za-z0-9]", x))}) 210 | SpaceCheck <- function(...) satisfy(function(x) {return(grepl("\\s", x))}) 211 | ``` 212 | 213 | In a similar many we can define a parser `String` for the string of characters, with the string itself returned as the result value: 214 | 215 | ```{r, String} 216 | String <- function(string) { 217 | if (string=="") { 218 | return (succeed(NULL)) 219 | } 220 | else { 221 | result_=substr(string, 1, 1) 222 | leftover_=substring(string, 2) 223 | return((literal(result_) %then% 224 | String(leftover_)) %using% 225 | function(x) {paste(unlist(c(x)), collapse="")}) 226 | } 227 | } 228 | String("123")("123 abc") 229 | ``` 230 | 231 | Note that `String` is defined using recursion, and only succeeds if the entire target string is consumed. The base case states that the empty string is always parsed. The recursive case states that a non-empty string can be parsed by parsing the first character, parsing the remaining characters, and returning the entire string as the result value. 232 | 233 | Similarly we can create parsers to match identifiers (`ident`), natural numbers (`nat`), spaces (`space`): 234 | 235 | ```{r, ident} 236 | ident <- function() {(many(AlphaNum()) %using% 237 | function(x) paste0(unlist(c(x)), collapse=""))} 238 | nat <- function() { 239 | some(Digit()) %using% 240 | function(x) {paste(unlist(c(x)), collapse="")} 241 | } 242 | space <- function() { 243 | many(SpaceCheck()) %using% 244 | function(x) {return("")} 245 | } 246 | ident() ("var1 = 123") 247 | nat() ("123456") 248 | ``` 249 | 250 | ## Handling spacing 251 | 252 | To handle spaces we will define a new primitive `token` which ignores any space before and after applying a parser for a token: 253 | 254 | ```{r, token} 255 | token <- function(p) { 256 | space() %then% 257 | p %then% 258 | space() %using% 259 | function(x) {return(unlist(c(x))[2])} 260 | } 261 | token(ident()) (" var1 ") 262 | ``` 263 | This can then be expanded for identifiers, natural numbers and symbols: 264 | 265 | ```{r, identifier} 266 | identifier <- function(...) {token(ident())} 267 | natural <- function(...) {token(nat())} 268 | symbol <- function(xs) {token(String(xs))} 269 | identifier() (" var1 ") 270 | ``` 271 | 272 | # Example 273 | 274 | To conclude our introduction to combinator parsing, we will work through the derivation of a simple parser. Suppose we have a program which works with arithmetic expressions, defined as follows: 275 | 276 | ``` 277 | Example with expressions, will not be exported 278 | expression example 279 | expr :: = term + term | term - term | term 280 | term :: = factor * factor | factor / factor | factor 281 | factor :: = (expr) | digit+ 282 | ``` 283 | 284 | Having this structure allows multiplication and divsion to have higher precedence than addition and subtraction. We can simply rewrite the BNF grammar above as follows: 285 | 286 | ```{r, arith} 287 | expr <- ((term %then% 288 | symbol("+") %then% 289 | expr %using% function(x) { 290 | print(unlist(c(x))) 291 | return(sum(as.numeric(unlist(c(x))[c(1,3)]))) 292 | }) %alt% 293 | (term %then% 294 | symbol("-") %then% 295 | expr %using% function(x) { 296 | print(unlist(c(x))) 297 | return(Reduce("-", as.numeric(unlist(c(x))[c(1,3)]))) 298 | }) %alt% term) 299 | 300 | 301 | term <- ((factor %then% 302 | symbol("*") %then% 303 | term %using% function(x) { 304 | print(unlist(c(x))) 305 | return(prod(as.numeric(unlist(c(x))[c(1,3)]))) 306 | }) %alt% 307 | (factor %then% 308 | symbol("/") %then% 309 | term %using% function(x) { 310 | print(unlist(c(x))) 311 | return(Reduce("/", as.numeric(unlist(c(x))[c(1,3)]))) 312 | }) %alt% factor) 313 | 314 | factor <- ((symbol("(") %then% 315 | expr %then% 316 | symbol(")") %using% function(x){ 317 | print(unlist(c(x))) 318 | return(as.numeric(unlist(c(x))[2])) 319 | }) %alt% natural()) 320 | ``` 321 | 322 | This will evaluate the arithmetic expressions: 323 | 324 | ```{r, exp} 325 | expr("2+(4-1)*3") 326 | ``` 327 | 328 | 329 | 330 | 331 | 332 | 333 | -------------------------------------------------------------------------------- /R/parser.R: -------------------------------------------------------------------------------- 1 | #' \code{succeed} is based on the empty string symbol in the BNF notation The 2 | #' \code{succeed} parser always succeeds, without actually consuming any input 3 | #' string. Since the outcome of succeed does not depend on its input, its result 4 | #' value must be pre-detemined, so it is included as an extra parameter. 5 | #' 6 | #' @param string the result value of succeed parser 7 | #' @export 8 | #' @examples 9 | #' succeed("1") ("abc") 10 | succeed <- function(string) { 11 | function(nextString) { 12 | list(result=string, 13 | leftover=nextString) 14 | } 15 | } 16 | 17 | #' \code{item} is a parser that consumes the first character of the string and 18 | #' returns the rest. If it cannot consume a single character from the string, it 19 | #' will emit the empty list, indicating the parser has failed. 20 | #' 21 | #' @param ... additional arguments for the parser 22 | #' @export 23 | #' @examples 24 | #' item() ("abc") 25 | #' item() ("") 26 | item <- function(...){ 27 | return(function(string){ 28 | if(length(string)==0){ 29 | return(NULL) 30 | } 31 | if(string=="") { 32 | list() 33 | } else { 34 | list(result=substr(string, 1, 1), 35 | leftover=substring(string, 2)) 36 | } 37 | }) 38 | } 39 | 40 | #' \code{satisfy} is a function which allows us to make parsers that recognise single symbols. 41 | #' 42 | #' @param p is the predicate to determine if the arbitrary symbol is a member. 43 | #' @export 44 | satisfy <- function(p) { 45 | return(function(string) { 46 | if (length(string) == 0) { 47 | return(list()) 48 | } 49 | else if (string == "") { 50 | return(list()) 51 | } 52 | else { 53 | result_ <- list(result=substr(string, 1, 1), 54 | leftover=substring(string, 2)) 55 | if (p(result_$result)) { 56 | return(succeed(result_$result)(result_$leftover)) 57 | } else { 58 | return(list()) 59 | } 60 | } 61 | }) 62 | } 63 | 64 | #' \code{literal} is a parser for single symbols. It will attempt to match the 65 | #' single symbol with the first character in the string. 66 | #' 67 | #' @param char is the character to be matched 68 | #' @export 69 | #' @examples 70 | #' literal("a") ("abc") 71 | literal <- function(char) { 72 | satisfy(function(x){ 73 | return(x==char) 74 | }) 75 | } 76 | 77 | ## Building Combinators ## 78 | 79 | #' \code{alt} combinator is similar to alternation in BNF. the parser 80 | #' \code{(alt(p1, p2))} recognises anything that \code{p1} or \code{p2} would. 81 | #' The approach taken in this parser follows (Fairbairn86), in which either is 82 | #' interpretted in a sequential (or exclusive) manner, returning the result of 83 | #' the first parser to succeed, and failure if neither does. 84 | #' 85 | #' \code{\%alt\%} is the infix notation for the \code{alt} function, and it is the 86 | #' preferred way to use the \code{alt} operator. 87 | #' 88 | #' @param p1 the first parser 89 | #' @param p2 the second parser 90 | #' @return Returns the first parser if it suceeds otherwise the second parser 91 | #' @examples 92 | #' (item() %alt% succeed("2")) ("abcdef") 93 | #' @seealso \code{\link{then}} 94 | alt <- function(p1, p2) { 95 | function(string){ 96 | result <- p1(string) 97 | if(!is.null(result$leftover)) { 98 | result 99 | } else { 100 | p2(string) 101 | } 102 | } 103 | } 104 | 105 | #' \code{\%alt\%} is the infix notation for the \code{alt} function. 106 | #' 107 | #' @param p1 the first parser 108 | #' @param p2 the second parser 109 | #' @return Returns the first parser if it suceeds otherwise the second parser 110 | #' @export 111 | #' @examples 112 | #' (item() %alt% succeed("2")) ("abcdef") 113 | `%alt%` <- alt 114 | 115 | #' \code{then} combinator corresponds to sequencing in BNF. The parser 116 | #' \code{(then(p1, p2))} recognises anything that \code{p1} and \code{p2} would 117 | #' if placed in succession. 118 | #' 119 | #' \code{\%then\%} is the infix operator for the then combinator, and it is the 120 | #' preferred way to use the \code{then} operator. 121 | #' 122 | #' @param p1 the first parser 123 | #' @param p2 the second parser 124 | #' @return recognises anything that \code{p1} and \code{p2} would if placed in 125 | #' succession. 126 | #' @examples 127 | #' (item() %then% succeed("123")) ("abc") 128 | #' @seealso \code{\link{alt}}, \code{\link{thentree}} 129 | then <- function(p1, p2) { 130 | function(string) { 131 | result <- p1(string) 132 | if (length(result) == 0) { 133 | list() 134 | } else { 135 | result_ <- p2(result$leftover) 136 | if (length(result_$leftover) == 0 || 137 | is.null(result_$leftover)) { 138 | list() 139 | } else { 140 | list(result=Unlist(append(list(result$result), 141 | list(result_$result))), 142 | leftover=result_$leftover) 143 | } 144 | } 145 | } 146 | } 147 | 148 | #' \code{\%then\%} is the infix operator for the then combinator. 149 | #' 150 | #' @param p1 the first parser 151 | #' @param p2 the second parser 152 | #' @return recognises anything that \code{p1} and \code{p2} would if placed in succession. 153 | #' @export 154 | #' @examples 155 | #' (item() %then% succeed("123")) ("abc") 156 | `%then%` <- then 157 | 158 | #' \code{thentree} keeps the full tree representation of the results of parsing. 159 | #' Otherwise, it is identical to \code{then}. 160 | #' 161 | #' @param p1 the first parser 162 | #' @param p2 the second parser 163 | #' @return recognises anything that \code{p1} and \code{p2} would if placed in 164 | #' succession. 165 | #' @export 166 | #' @examples 167 | #' (item() %thentree% succeed("123")) ("abc") 168 | #' 169 | #' @seealso \code{\link{alt}}, \code{\link{thentree}} 170 | thentree <- function(p1, p2) { 171 | function(string) { 172 | result <- p1(string) 173 | if (length(result) == 0) { 174 | list() 175 | } else { 176 | result_ <- p2(result$leftover) 177 | if (length(result_$leftover) == 0 || 178 | is.null(result_$leftover)) { 179 | list() 180 | } else { 181 | list(result=list(result$result, 182 | result_$result), 183 | leftover=result_$leftover) 184 | } 185 | } 186 | } 187 | } 188 | 189 | #' \code{\%thentree\%} is the infix operator for the then combinator, and it is 190 | #' the preferred way to use the \code{thentree} operator. 191 | #' @export 192 | #' @param p1 the first parser 193 | #' @param p2 the second parser 194 | #' @return recognises anything that \code{p1} and \code{p2} would if placed in 195 | #' succession. 196 | #' @examples 197 | #' (item() %thentree% succeed("123")) ("abc") 198 | #' @seealso \code{\link{alt}}, \code{\link{thentree}} 199 | `%thentree%` <- thentree 200 | 201 | #' \code{using} combinator allows us to manipulate results from a parser, for 202 | #' example building a parse tree. The parser \code{(p \%using\% f)} has the same 203 | #' behaviour as the parser \code{p}, except that the function \code{f} is 204 | #' applied to each of its result values. 205 | #' 206 | #' \code{\%using\%} is the infix operator for \code{using}, and it is the 207 | #' preferred way to use the \code{using} operator. 208 | #' 209 | #' @param p is the parser to be applied 210 | #' @param f is the function to be applied to each result of \code{p}. 211 | #' @return The parser \code{(p \%using\% f)} has the same behaviour as the 212 | #' parser \code{p}, except that the function \code{f} is applied to each of 213 | #' its result values. 214 | #' @examples 215 | #' (item() %using% as.numeric) ("1abc") 216 | using <- function(p, f) { 217 | return(function(string) { 218 | result <- p (string) 219 | if(length(result) == 0) { 220 | return(list()) 221 | } 222 | list(result=f(result$result), 223 | leftover=result$leftover) 224 | }) 225 | } 226 | 227 | #' \code{\%using\%} is the infix operator for using 228 | #' 229 | #' @param p is the parser to be applied 230 | #' @param f is the function to be applied to each result of \code{p}. 231 | #' @export 232 | #' @examples 233 | #' (item() %using% as.numeric) ("1abc") 234 | `%using%` <- using 235 | 236 | #' \code{maybe} matches 0 or 1 of pattern \code{p}. In EBNF notation, this 237 | #' corresponds to a question mark ('?'). 238 | #' 239 | #' @param p is the parser to be matched 0 or 1 times. 240 | #' @export 241 | #' @examples 242 | #' maybe(Digit())("123abc") 243 | #' maybe(Digit())("abc123") 244 | #' @seealso \code{\link{many}}, \code{\link{some}} 245 | maybe <- function(p) { 246 | function(string) { 247 | (p %alt% succeed(NULL))(string) 248 | } 249 | } 250 | 251 | #' \code{many} matches 0 or more of pattern \code{p}. In BNF notation, 252 | #' repetition occurs often enough to merit its own abbreviation. When zero or 253 | #' more repetitions of a phrase \code{p} are admissible, we simply write 254 | #' \code{p*}. The \code{many} combinator corresponds directly to this operator, 255 | #' and is defined in much the same way. 256 | #' 257 | #' This implementation of \code{many} differs from (Hutton92) due to the nature 258 | #' of R's data structures. Since R does not support the concept of a list of 259 | #' tuples, we must revert to using a list rather than a vector, since all values 260 | #' in an R vector must be the same datatype. 261 | #' 262 | #' @param p is the parser to match 0 or more times. 263 | #' @export 264 | #' @examples 265 | #' Digit <- function(...) {satisfy(function(x) {return(grepl("[0-9]", x))})} 266 | #' many(Digit()) ("123abc") 267 | #' many(Digit()) ("abc") 268 | #' @seealso \code{\link{maybe}}, \code{\link{some}} 269 | many <- function(p) { 270 | function(string) { 271 | ((p %then% many(p)) %alt% succeed(NULL))(string) 272 | } 273 | } 274 | 275 | #' \code{some} matches 1 or more of pattern \code{p}. in BNF notation, repetition occurs often enough to merit its own abbreviation. When zero or 276 | #' more repetitions of a phrase \code{p} are admissible, we simply write 277 | #' \code{p+}. The \code{some} combinator corresponds directly to this operator, 278 | #' and is defined in much the same way. 279 | #' 280 | #' @param p is the parser to match 1 or more times. 281 | #' @export 282 | #' @examples 283 | #' Digit <- function(...) {satisfy(function(x) {return(grepl("[0-9]", x))})} 284 | #' some(Digit()) ("123abc") 285 | #' @seealso \code{\link{maybe}}, \code{\link{many}} 286 | some <- function(p) { 287 | function(string) { 288 | (p %then% many(p))(string) 289 | } 290 | } 291 | 292 | ## Define the derived primitives ## 293 | 294 | #' Digit checks for single digit 295 | #' 296 | #' @param ... additional arguments for the primitives to be parsed 297 | #' @export 298 | #' @examples 299 | #' Digit()("123") 300 | #' @seealso \code{\link{Lower}}, \code{\link{Upper}}, 301 | #' \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 302 | #' \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 303 | #' \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}}, 304 | #' \code{\link{natural}}, \code{\link{symbol}} 305 | Digit <- function(...) { 306 | satisfy(function(x) { 307 | grepl("[0-9]", x) 308 | }) 309 | } 310 | 311 | #' Lower checks for single lower case character 312 | #' 313 | #' @param ... additional arguments for the primitives to be parsed 314 | #' @export 315 | #' @examples 316 | #' Lower() ("abc") 317 | #' @seealso \code{\link{Digit}}, \code{\link{Upper}}, 318 | #' \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 319 | #' \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 320 | #' \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}}, 321 | #' \code{\link{natural}}, \code{\link{symbol}} 322 | Lower <- function(...) { 323 | satisfy(function(x) { 324 | grepl("[a-z]", x) 325 | }) 326 | } 327 | 328 | #' Upper checks for a single upper case character 329 | #' 330 | #' @param ... additional arguments for the primitives to be parsed 331 | #' @export 332 | #' @examples 333 | #' Upper()("Abc") 334 | #' @seealso \code{\link{Digit}}, \code{\link{Lower}}, 335 | #' \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 336 | #' \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 337 | #' \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}}, 338 | #' \code{\link{natural}}, \code{\link{symbol}} 339 | Upper <- function(...) { 340 | satisfy(function(x) { 341 | grepl("[A-Z]", x) 342 | }) 343 | } 344 | 345 | #' Alpha checks for single alphabet character 346 | #' 347 | #' @param ... additional arguments for the primitives to be parsed 348 | #' @export 349 | #' @examples 350 | #' Alpha()("abc") 351 | #' @seealso \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 352 | #' \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 353 | #' \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 354 | #' \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}}, 355 | #' \code{\link{natural}}, \code{\link{symbol}} 356 | Alpha <- function(...) { 357 | satisfy(function(x) { 358 | grepl("[A-Za-z]", x) 359 | }) 360 | } 361 | 362 | #' AlphaNum checks for a single alphanumeric character 363 | #' 364 | #' @param ... additional arguments for the primitives to be parsed 365 | #' @export 366 | #' @examples 367 | #' AlphaNum()("123") 368 | #' AlphaNum()("abc123") 369 | #' @seealso \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 370 | #' \code{\link{Alpha}}, \code{\link{SpaceCheck}}, 371 | #' \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 372 | #' \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}}, 373 | #' \code{\link{natural}}, \code{\link{symbol}} 374 | AlphaNum <- function(...) { 375 | satisfy(function(x) { 376 | grepl("[A-Za-z0-9]", x) 377 | }) 378 | } 379 | 380 | #' SpaceCheck checks for a single space character 381 | #' 382 | #' @param ... additional arguments for the primitives to be parsed 383 | #' @export 384 | #' @examples 385 | #' SpaceCheck()(" 123") 386 | #' @seealso \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 387 | #' \code{\link{Alpha}}, \code{\link{AlphaNum}}, 388 | #' \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 389 | #' \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}}, 390 | #' \code{\link{natural}}, \code{\link{symbol}} 391 | SpaceCheck <- function(...) { 392 | satisfy(function(x) { 393 | grepl("\\s", x) 394 | }) 395 | } 396 | 397 | #' \code{String} is a combinator which allows us to build parsers which 398 | #' recognise strings of symbols, rather than just single symbols 399 | #' 400 | #' @param string is the string to be matched 401 | #' @export 402 | #' @examples 403 | #' String("123")("123 abc") 404 | #' @seealso \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 405 | #' \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 406 | #' \code{\link{ident}}, \code{\link{nat}}, 407 | #' \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}}, 408 | #' \code{\link{natural}}, \code{\link{symbol}} 409 | String <- function(string) { 410 | if (string=="") { 411 | succeed(NULL) 412 | } else { 413 | result_ <- substr(string, 1, 1) 414 | leftover_ <- substring(string, 2) 415 | (literal(result_) %then% 416 | String(leftover_)) %using% 417 | function(x) { 418 | paste(unlist(c(x)), collapse="") 419 | } 420 | } 421 | } 422 | 423 | #' \code{ident} is a parser which matches zero or more alphanumeric 424 | #' characters. 425 | #' 426 | #' @export 427 | #' @examples 428 | #' ident() ("variable1 = 123") 429 | #' @seealso \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 430 | #' \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 431 | #' \code{\link{String}}, \code{\link{nat}}, 432 | #' \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}}, 433 | #' \code{\link{natural}}, \code{\link{symbol}} 434 | ident <- function() { 435 | (many(AlphaNum()) %using% 436 | function(x) paste0(unlist(c(x)), collapse="")) 437 | } 438 | 439 | #' \code{nat} is a parser which matches one or more numeric characters. 440 | #' 441 | #' @export 442 | #' @examples 443 | #' nat() ("123 + 456") 444 | #' @seealso \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 445 | #' \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 446 | #' \code{\link{String}}, \code{\link{ident}}, 447 | #' \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}}, 448 | #' \code{\link{natural}}, \code{\link{symbol}} 449 | nat <- function() { 450 | some(Digit()) %using% 451 | function(x) { 452 | paste(unlist(c(x)), collapse="") 453 | } 454 | } 455 | 456 | #' \code{space} matches zero or more space characters. 457 | #' 458 | #' @export 459 | #' @examples 460 | #' space() (" abc") 461 | #' @seealso \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 462 | #' \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 463 | #' \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 464 | #' \code{\link{token}}, \code{\link{identifier}}, 465 | #' \code{\link{natural}}, \code{\link{symbol}} 466 | space <- function() { 467 | many(SpaceCheck()) %using% 468 | function(x) { 469 | "" 470 | } 471 | } 472 | 473 | #' \code{token} is a new primitive that ignores any space before and after 474 | #' applying a parser to a token. 475 | #' 476 | #' @param p is the parser to have spaces stripped. 477 | #' @export 478 | #' @examples 479 | #' token(ident()) (" variable1 ") 480 | #' @seealso \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 481 | #' \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 482 | #' \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 483 | #' \code{\link{space}}, \code{\link{identifier}}, 484 | #' \code{\link{natural}}, \code{\link{symbol}} 485 | token <- function(p) { 486 | space() %then% 487 | p %then% 488 | space() %using% 489 | function(x) { 490 | x <- x[-1] 491 | x <- x[-length(x)] 492 | x 493 | } 494 | } 495 | 496 | #' \code{identifier} creates an identifier 497 | #' 498 | #' @param ... takes in token primitives 499 | #' @export 500 | #' @seealso \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 501 | #' \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 502 | #' \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 503 | #' \code{\link{space}}, \code{\link{token}}, 504 | #' \code{\link{natural}}, \code{\link{symbol}} 505 | identifier <- function(...) { 506 | token(ident()) 507 | } 508 | 509 | #' \code{natural} creates a token parser for natural numbers 510 | #' 511 | #' @param ... additional arguments for the parser 512 | #' @export 513 | #' @seealso \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 514 | #' \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 515 | #' \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 516 | #' \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}}, 517 | #' \code{\link{symbol}} 518 | natural <- function(...) { 519 | token(nat()) 520 | } 521 | 522 | #' \code{symbol} creates a token for a symbol 523 | #' 524 | #' @param xs takes in a string to create a token 525 | #' @export 526 | #' @examples 527 | #' symbol("[") (" [123]") 528 | #' @seealso \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 529 | #' \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 530 | #' \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 531 | #' \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}}, 532 | #' \code{\link{natural}} 533 | symbol <- function(xs) { 534 | token(String(xs)) 535 | } 536 | --------------------------------------------------------------------------------