├── .gitignore
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test_repetition.R
    │   ├── test_functions2.R
    │   ├── test_word2num.R
    │   └── test_functions.R
├── docs
    └── ramble-introduction.pdf
├── .Rbuildignore
├── .travis.yml
├── R
    ├── Ramble.R
    ├── utils.R
    └── parser.R
├── man
    ├── Ramble.Rd
    ├── satisfy.Rd
    ├── Unlist.Rd
    ├── grapes-using-grapes.Rd
    ├── literal.Rd
    ├── grapes-alt-grapes.Rd
    ├── grapes-then-grapes.Rd
    ├── maybe.Rd
    ├── Space.Rd
    ├── identifier.Rd
    ├── nat.Rd
    ├── Digit.Rd
    ├── natural.Rd
    ├── symbol.Rd
    ├── ident.Rd
    ├── Alpha.Rd
    ├── Lower.Rd
    ├── Upper.Rd
    ├── item.Rd
    ├── SpaceCheck.Rd
    ├── AlphaNum.Rd
    ├── thentree.Rd
    ├── then.Rd
    ├── grapes-thentree-grapes.Rd
    ├── token.Rd
    ├── String.Rd
    ├── succeed.Rd
    ├── alt.Rd
    ├── using.Rd
    ├── many.Rd
    └── some.Rd
├── Ramble.Rproj
├── DESCRIPTION
├── .gitattributes
├── NAMESPACE
├── cran-comments.md
├── joss-paper
    ├── paper.bib
    └── paper.md
├── LICENSE
├── example
    ├── example.R
    ├── example-xml.R
    └── example-word2num.R
├── readme.md
└── vignettes
    └── Higher-Order_Functions_for_Parsing_in_R.Rmd


/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | inst/doc
5 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(Ramble)
3 | 
4 | test_check("Ramble")
5 | 


--------------------------------------------------------------------------------
/docs/ramble-introduction.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NoRaincheck/Ramble/HEAD/docs/ramble-introduction.pdf


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | example.*?.R
 4 | readme.md
 5 | ^\.travis\.yml$
 6 | cran-comments.md
 7 | docs/*
 8 | joss-paper/*
 9 | example/*
10 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
2 | 
3 | language: R
4 | sudo: false
5 | cache: packages
6 | after_success:
7 | - Rscript -e 'install.packages("covr"); covr::codecov()'
8 | 


--------------------------------------------------------------------------------
/R/Ramble.R:
--------------------------------------------------------------------------------
1 | #' Ramble is a parser generator using combinatory parsers.
2 | #' 
3 | #' Ramble allows you to write parsers in a functional manner, inspired by 
4 | #' Haskell's Parsec library.
5 | #' 
6 | #' @docType package
7 | #' @name Ramble
8 | #' @aliases Ramble ramble package-ramble
9 | NULL


--------------------------------------------------------------------------------
/man/Ramble.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Ramble.R
 3 | \docType{package}
 4 | \name{Ramble}
 5 | \alias{Ramble}
 6 | \alias{ramble}
 7 | \alias{package-ramble}
 8 | \alias{Ramble-package}
 9 | \title{Ramble is a parser generator using combinatory parsers.}
10 | \description{
11 | Ramble allows you to write parsers in a functional manner, inspired by 
12 | Haskell's Parsec library.
13 | }
14 | 


--------------------------------------------------------------------------------
/Ramble.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | PackageCheckArgs: --as-cran
19 | PackageRoxygenize: rd,collate,namespace
20 | 


--------------------------------------------------------------------------------
/man/satisfy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{satisfy}
 4 | \alias{satisfy}
 5 | \title{\code{satisfy} is a function which allows us to make parsers that recognise single symbols.}
 6 | \usage{
 7 | satisfy(p)
 8 | }
 9 | \arguments{
10 | \item{p}{is the predicate to determine if the arbitrary symbol is a member.}
11 | }
12 | \description{
13 | \code{satisfy} is a function which allows us to make parsers that recognise single symbols.
14 | }
15 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
 1 | #' Unlist is the same as unlist, but doesn't recurse all the way to
 2 | #' preserve the type. This function is not well optimised.
 3 | #' 
 4 | #' @param obj is a list to be flatten
 5 | #' @importFrom methods is
 6 | Unlist <- function(obj) {
 7 |   ret <- list()
 8 |   for (i in seq_along(obj)) {
 9 |     if (is(obj[[i]], "list") &&
10 |         is.null(names(obj[[i]]))) {
11 |       ret <- append(ret, Unlist(obj[[i]]))
12 |     } else {
13 |       ret <- append(ret, obj[i])
14 |     }
15 |   }
16 |   ret
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/man/Unlist.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{Unlist}
 4 | \alias{Unlist}
 5 | \title{Unlist is the same as unlist, but doesn't recurse all the way to
 6 | preserve the type. This function is not well optimised.}
 7 | \usage{
 8 | Unlist(obj)
 9 | }
10 | \arguments{
11 | \item{obj}{is a list to be flatten}
12 | }
13 | \description{
14 | Unlist is the same as unlist, but doesn't recurse all the way to
15 | preserve the type. This function is not well optimised.
16 | }
17 | 


--------------------------------------------------------------------------------
/man/grapes-using-grapes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{\%using\%}
 4 | \alias{\%using\%}
 5 | \title{\code{\%using\%} is the infix operator for using}
 6 | \usage{
 7 | p \%using\% f
 8 | }
 9 | \arguments{
10 | \item{p}{is the parser to be applied}
11 | 
12 | \item{f}{is the function to be applied to each result of \code{p}.}
13 | }
14 | \description{
15 | \code{\%using\%} is the infix operator for using
16 | }
17 | \examples{
18 | (item() \%using\% as.numeric) ("1abc")
19 | }
20 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: Ramble
 2 | Type: Package
 3 | Title: Parser Combinator for R
 4 | Version: 0.1.1
 5 | Date: 2016-10-23
 6 | Author: Chapman Siu
 7 | Maintainer: Chapman Siu <chpmn.siu@gmail.com>
 8 | Description: Parser generator for R using combinatory parsers. It
 9 |     is inspired by combinatory parsers developed in Haskell.
10 | License: MIT + file LICENSE
11 | Imports:
12 |     methods
13 | Suggests:
14 |     testthat,
15 |     knitr,
16 |     rmarkdown
17 | VignetteBuilder: knitr
18 | LazyData: true
19 | RoxygenNote: 6.0.1
20 | URL: https://github.com/chappers/Ramble
21 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | *.sln    merge=union
 7 | *.csproj merge=union
 8 | *.vbproj merge=union
 9 | *.fsproj merge=union
10 | *.dbproj merge=union
11 | 
12 | # Standard to msysgit
13 | *.doc	 diff=astextplain
14 | *.DOC	 diff=astextplain
15 | *.docx diff=astextplain
16 | *.DOCX diff=astextplain
17 | *.dot  diff=astextplain
18 | *.DOT  diff=astextplain
19 | *.pdf  diff=astextplain
20 | *.PDF	 diff=astextplain
21 | *.rtf	 diff=astextplain
22 | *.RTF	 diff=astextplain
23 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export("%alt%")
 4 | export("%then%")
 5 | export("%thentree%")
 6 | export("%using%")
 7 | export(Alpha)
 8 | export(AlphaNum)
 9 | export(Digit)
10 | export(Lower)
11 | export(SpaceCheck)
12 | export(String)
13 | export(Upper)
14 | export(ident)
15 | export(identifier)
16 | export(item)
17 | export(literal)
18 | export(many)
19 | export(maybe)
20 | export(nat)
21 | export(natural)
22 | export(satisfy)
23 | export(some)
24 | export(space)
25 | export(succeed)
26 | export(symbol)
27 | export(thentree)
28 | export(token)
29 | importFrom(methods,is)
30 | 


--------------------------------------------------------------------------------
/man/literal.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{literal}
 4 | \alias{literal}
 5 | \title{\code{literal} is a parser for single symbols. It will attempt to match the
 6 | single symbol with the first character in the string.}
 7 | \usage{
 8 | literal(char)
 9 | }
10 | \arguments{
11 | \item{char}{is the character to be matched}
12 | }
13 | \description{
14 | \code{literal} is a parser for single symbols. It will attempt to match the
15 | single symbol with the first character in the string.
16 | }
17 | \examples{
18 | literal("a") ("abc")
19 | }
20 | 


--------------------------------------------------------------------------------
/man/grapes-alt-grapes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{\%alt\%}
 4 | \alias{\%alt\%}
 5 | \title{\code{\%alt\%} is the infix notation for the \code{alt} function.}
 6 | \usage{
 7 | p1 \%alt\% p2
 8 | }
 9 | \arguments{
10 | \item{p1}{the first parser}
11 | 
12 | \item{p2}{the second parser}
13 | }
14 | \value{
15 | Returns the first parser if it suceeds otherwise the second parser
16 | }
17 | \description{
18 | \code{\%alt\%} is the infix notation for the \code{alt} function.
19 | }
20 | \examples{
21 | (item() \%alt\% succeed("2")) ("abcdef")
22 | }
23 | 


--------------------------------------------------------------------------------
/man/grapes-then-grapes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{\%then\%}
 4 | \alias{\%then\%}
 5 | \title{\code{\%then\%} is the infix operator for the then combinator.}
 6 | \usage{
 7 | p1 \%then\% p2
 8 | }
 9 | \arguments{
10 | \item{p1}{the first parser}
11 | 
12 | \item{p2}{the second parser}
13 | }
14 | \value{
15 | recognises anything that \code{p1} and \code{p2} would if placed in succession.
16 | }
17 | \description{
18 | \code{\%then\%} is the infix operator for the then combinator.
19 | }
20 | \examples{
21 | (item() \%then\% succeed("123")) ("abc")
22 | }
23 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Release Summary
 2 | * Minor fixes for Unlist utility function
 3 | * Added helper function for thentree to help visualize the parse tree
 4 | 
 5 | ## Test environments
 6 | * local Windows 10 install, R 3.3.1
 7 | * ubuntu 12.04 (on travis-ci), R 3.3.1
 8 | * win-builder (r-devel, r-release)
 9 | 
10 | ## R CMD check results
11 | There were no ERRORs or WARNINGs.
12 | 
13 | Notes include "possible mis-spelled words", which were spelt correctly:
14 | *  Combinator (3:15)
15 | *  Haskell (9:53)
16 | *  combinatory (8:43, 9:20)
17 | *  parsers (8:55, 9:32)
18 | 
19 | 
20 | ## Downstream dependencies
21 | *  Did not check downstream dependencies


--------------------------------------------------------------------------------
/man/maybe.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{maybe}
 4 | \alias{maybe}
 5 | \title{\code{maybe} matches 0 or 1 of pattern \code{p}.  In EBNF notation, this
 6 | corresponds to a question mark ('?').}
 7 | \usage{
 8 | maybe(p)
 9 | }
10 | \arguments{
11 | \item{p}{is the parser to be matched 0 or 1 times.}
12 | }
13 | \description{
14 | \code{maybe} matches 0 or 1 of pattern \code{p}.  In EBNF notation, this
15 | corresponds to a question mark ('?').
16 | }
17 | \examples{
18 | maybe(Digit())("123abc")
19 | maybe(Digit())("abc123")
20 | }
21 | \seealso{
22 | \code{\link{many}}, \code{\link{some}}
23 | }
24 | 


--------------------------------------------------------------------------------
/man/Space.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{space}
 4 | \alias{space}
 5 | \title{\code{space} matches zero or more space characters.}
 6 | \usage{
 7 | space()
 8 | }
 9 | \description{
10 | \code{space} matches zero or more space characters.
11 | }
12 | \examples{
13 | space() ("  abc")
14 | }
15 | \seealso{
16 | \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 
17 |   \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 
18 |   \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 
19 |   \code{\link{token}}, \code{\link{identifier}},
20 |   \code{\link{natural}}, \code{\link{symbol}}
21 | }
22 | 


--------------------------------------------------------------------------------
/man/identifier.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{identifier}
 4 | \alias{identifier}
 5 | \title{\code{identifier} creates an identifier}
 6 | \usage{
 7 | identifier(...)
 8 | }
 9 | \arguments{
10 | \item{...}{takes in token primitives}
11 | }
12 | \description{
13 | \code{identifier} creates an identifier
14 | }
15 | \seealso{
16 | \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 
17 |   \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 
18 |   \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 
19 |   \code{\link{space}}, \code{\link{token}},
20 |   \code{\link{natural}}, \code{\link{symbol}}
21 | }
22 | 


--------------------------------------------------------------------------------
/man/nat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{nat}
 4 | \alias{nat}
 5 | \title{\code{nat} is a parser which matches one or more numeric characters.}
 6 | \usage{
 7 | nat()
 8 | }
 9 | \description{
10 | \code{nat} is a parser which matches one or more numeric characters.
11 | }
12 | \examples{
13 | nat() ("123 + 456")
14 | }
15 | \seealso{
16 | \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 
17 |   \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 
18 |   \code{\link{String}}, \code{\link{ident}},
19 |   \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}},
20 |   \code{\link{natural}}, \code{\link{symbol}}
21 | }
22 | 


--------------------------------------------------------------------------------
/man/Digit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{Digit}
 4 | \alias{Digit}
 5 | \title{Digit checks for single digit}
 6 | \usage{
 7 | Digit(...)
 8 | }
 9 | \arguments{
10 | \item{...}{additional arguments for the primitives to be parsed}
11 | }
12 | \description{
13 | Digit checks for single digit
14 | }
15 | \examples{
16 | Digit()("123")
17 | }
18 | \seealso{
19 | \code{\link{Lower}}, \code{\link{Upper}}, 
20 |   \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 
21 |   \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 
22 |   \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}},
23 |   \code{\link{natural}}, \code{\link{symbol}}
24 | }
25 | 


--------------------------------------------------------------------------------
/man/natural.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{natural}
 4 | \alias{natural}
 5 | \title{\code{natural} creates a token parser for natural numbers}
 6 | \usage{
 7 | natural(...)
 8 | }
 9 | \arguments{
10 | \item{...}{additional arguments for the parser}
11 | }
12 | \description{
13 | \code{natural} creates a token parser for natural numbers
14 | }
15 | \seealso{
16 | \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 
17 |   \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 
18 |   \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 
19 |   \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}},
20 |   \code{\link{symbol}}
21 | }
22 | 


--------------------------------------------------------------------------------
/joss-paper/paper.bib:
--------------------------------------------------------------------------------
 1 | @article{Hutton1992,
 2 |   doi = {10.1017/s0956796800000411},
 3 |   url = {http://dx.doi.org/10.1017/S0956796800000411},
 4 |   year  = {1992},
 5 |   month = {jul},
 6 |   publisher = {Cambridge University Press ({CUP})},
 7 |   volume = {2},
 8 |   number = {03},
 9 |   pages = {323--343},
10 |   author = {Graham Hutton},
11 |   title = {Higher-order functions for parsing},
12 |   journal = {Journal of Functional Programming}
13 | }
14 | 
15 | @Manual{R,
16 |     title = {R: A Language and Environment for Statistical Computing},
17 |     author = {{R Core Team}},
18 |     organization = {R Foundation for Statistical Computing},
19 |     address = {Vienna, Austria},
20 |     year = {2016},
21 |     url = {https://www.R-project.org/},
22 | }
23 | 


--------------------------------------------------------------------------------
/man/symbol.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{symbol}
 4 | \alias{symbol}
 5 | \title{\code{symbol} creates a token for a symbol}
 6 | \usage{
 7 | symbol(xs)
 8 | }
 9 | \arguments{
10 | \item{xs}{takes in a string to create a token}
11 | }
12 | \description{
13 | \code{symbol} creates a token for a symbol
14 | }
15 | \examples{
16 | symbol("[") ("  [123]")
17 | }
18 | \seealso{
19 | \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 
20 |   \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 
21 |   \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 
22 |   \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}},
23 |   \code{\link{natural}}
24 | }
25 | 


--------------------------------------------------------------------------------
/man/ident.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{ident}
 4 | \alias{ident}
 5 | \title{\code{ident} is a parser which matches zero or more alphanumeric
 6 | characters.}
 7 | \usage{
 8 | ident()
 9 | }
10 | \description{
11 | \code{ident} is a parser which matches zero or more alphanumeric
12 | characters.
13 | }
14 | \examples{
15 | ident() ("variable1 = 123")
16 | }
17 | \seealso{
18 | \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 
19 |   \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 
20 |   \code{\link{String}}, \code{\link{nat}}, 
21 |   \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}},
22 |   \code{\link{natural}}, \code{\link{symbol}}
23 | }
24 | 


--------------------------------------------------------------------------------
/man/Alpha.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{Alpha}
 4 | \alias{Alpha}
 5 | \title{Alpha checks for single alphabet character}
 6 | \usage{
 7 | Alpha(...)
 8 | }
 9 | \arguments{
10 | \item{...}{additional arguments for the primitives to be parsed}
11 | }
12 | \description{
13 | Alpha checks for single alphabet character
14 | }
15 | \examples{
16 | Alpha()("abc")
17 | }
18 | \seealso{
19 | \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 
20 |   \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 
21 |   \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 
22 |   \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}},
23 |   \code{\link{natural}}, \code{\link{symbol}}
24 | }
25 | 


--------------------------------------------------------------------------------
/man/Lower.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{Lower}
 4 | \alias{Lower}
 5 | \title{Lower checks for single lower case character}
 6 | \usage{
 7 | Lower(...)
 8 | }
 9 | \arguments{
10 | \item{...}{additional arguments for the primitives to be parsed}
11 | }
12 | \description{
13 | Lower checks for single lower case character
14 | }
15 | \examples{
16 | Lower() ("abc")
17 | }
18 | \seealso{
19 | \code{\link{Digit}}, \code{\link{Upper}}, 
20 |   \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 
21 |   \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 
22 |   \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}},
23 |   \code{\link{natural}}, \code{\link{symbol}}
24 | }
25 | 


--------------------------------------------------------------------------------
/man/Upper.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{Upper}
 4 | \alias{Upper}
 5 | \title{Upper checks for a single upper case character}
 6 | \usage{
 7 | Upper(...)
 8 | }
 9 | \arguments{
10 | \item{...}{additional arguments for the primitives to be parsed}
11 | }
12 | \description{
13 | Upper checks for a single upper case character
14 | }
15 | \examples{
16 | Upper()("Abc")
17 | }
18 | \seealso{
19 | \code{\link{Digit}}, \code{\link{Lower}}, 
20 |   \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 
21 |   \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 
22 |   \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}},
23 |   \code{\link{natural}}, \code{\link{symbol}}
24 | }
25 | 


--------------------------------------------------------------------------------
/man/item.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{item}
 4 | \alias{item}
 5 | \title{\code{item} is a parser that consumes the first character of the string and
 6 | returns the rest. If it cannot consume a single character from the string, it
 7 | will emit the empty list, indicating the parser has failed.}
 8 | \usage{
 9 | item(...)
10 | }
11 | \arguments{
12 | \item{...}{additional arguments for the parser}
13 | }
14 | \description{
15 | \code{item} is a parser that consumes the first character of the string and
16 | returns the rest. If it cannot consume a single character from the string, it
17 | will emit the empty list, indicating the parser has failed.
18 | }
19 | \examples{
20 | item() ("abc")
21 | item() ("")
22 | }
23 | 


--------------------------------------------------------------------------------
/man/SpaceCheck.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{SpaceCheck}
 4 | \alias{SpaceCheck}
 5 | \title{SpaceCheck checks for a single space character}
 6 | \usage{
 7 | SpaceCheck(...)
 8 | }
 9 | \arguments{
10 | \item{...}{additional arguments for the primitives to be parsed}
11 | }
12 | \description{
13 | SpaceCheck checks for a single space character
14 | }
15 | \examples{
16 | SpaceCheck()(" 123")
17 | }
18 | \seealso{
19 | \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 
20 |   \code{\link{Alpha}}, \code{\link{AlphaNum}},
21 |   \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 
22 |   \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}},
23 |   \code{\link{natural}}, \code{\link{symbol}}
24 | }
25 | 


--------------------------------------------------------------------------------
/man/AlphaNum.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{AlphaNum}
 4 | \alias{AlphaNum}
 5 | \title{AlphaNum checks for a single alphanumeric character}
 6 | \usage{
 7 | AlphaNum(...)
 8 | }
 9 | \arguments{
10 | \item{...}{additional arguments for the primitives to be parsed}
11 | }
12 | \description{
13 | AlphaNum checks for a single alphanumeric character
14 | }
15 | \examples{
16 | AlphaNum()("123")
17 | AlphaNum()("abc123")
18 | }
19 | \seealso{
20 | \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 
21 |   \code{\link{Alpha}}, \code{\link{SpaceCheck}}, 
22 |   \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 
23 |   \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}},
24 |   \code{\link{natural}}, \code{\link{symbol}}
25 | }
26 | 


--------------------------------------------------------------------------------
/man/thentree.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{thentree}
 4 | \alias{thentree}
 5 | \title{\code{thentree} keeps the full tree representation of the results of parsing.
 6 | Otherwise, it is identical to \code{then}.}
 7 | \usage{
 8 | thentree(p1, p2)
 9 | }
10 | \arguments{
11 | \item{p1}{the first parser}
12 | 
13 | \item{p2}{the second parser}
14 | }
15 | \value{
16 | recognises anything that \code{p1} and \code{p2} would if placed in 
17 |   succession.
18 | }
19 | \description{
20 | \code{thentree} keeps the full tree representation of the results of parsing.
21 | Otherwise, it is identical to \code{then}.
22 | }
23 | \examples{
24 | (item() \%thentree\% succeed("123")) ("abc")
25 | 
26 | }
27 | \seealso{
28 | \code{\link{alt}}, \code{\link{thentree}}
29 | }
30 | 


--------------------------------------------------------------------------------
/man/then.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{then}
 4 | \alias{then}
 5 | \title{\code{then} combinator corresponds to sequencing in BNF. The parser 
 6 | \code{(then(p1, p2))} recognises anything that \code{p1} and \code{p2} would 
 7 | if placed in succession.}
 8 | \usage{
 9 | then(p1, p2)
10 | }
11 | \arguments{
12 | \item{p1}{the first parser}
13 | 
14 | \item{p2}{the second parser}
15 | }
16 | \value{
17 | recognises anything that \code{p1} and \code{p2} would if placed in 
18 |   succession.
19 | }
20 | \description{
21 | \code{\%then\%} is the infix operator for the then combinator, and it is the
22 | preferred way to use the \code{then} operator.
23 | }
24 | \examples{
25 | (item() \%then\% succeed("123")) ("abc")
26 | }
27 | \seealso{
28 | \code{\link{alt}}, \code{\link{thentree}}
29 | }
30 | 


--------------------------------------------------------------------------------
/man/grapes-thentree-grapes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{\%thentree\%}
 4 | \alias{\%thentree\%}
 5 | \title{\code{\%thentree\%} is the infix operator for the then combinator, and it is
 6 | the preferred way to use the \code{thentree} operator.}
 7 | \usage{
 8 | p1 \%thentree\% p2
 9 | }
10 | \arguments{
11 | \item{p1}{the first parser}
12 | 
13 | \item{p2}{the second parser}
14 | }
15 | \value{
16 | recognises anything that \code{p1} and \code{p2} would if placed in 
17 |   succession.
18 | }
19 | \description{
20 | \code{\%thentree\%} is the infix operator for the then combinator, and it is
21 | the preferred way to use the \code{thentree} operator.
22 | }
23 | \examples{
24 | (item() \%thentree\% succeed("123")) ("abc")
25 | }
26 | \seealso{
27 | \code{\link{alt}}, \code{\link{thentree}}
28 | }
29 | 


--------------------------------------------------------------------------------
/joss-paper/paper.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Ramble: Parser Combinator for R"
 3 | tags:
 4 |   - R
 5 |   - Parser combinator
 6 | authors:
 7 |   - name: Chapman Siu
 8 |     orcid: 0000-0002-2089-3796
 9 |     affiliation: 1
10 | affiliations:
11 |   - name: Chapman Siu
12 |     index: 1
13 | date: 6 December 2016
14 | bibliography: paper.bib
15 | ---
16 | 
17 | # Summary
18 | 
19 | Ramble is a parser combinator for the [`R`](https://www.r-project.org/) [@R] language using the higher order functions. The combinatory parsing approached used in Ramble mirrors approaches used in functional languages such as Miranda [@Hutton1992], and is able to handle ambiguous grammars, and provide full backtracking if it is needed.
20 | 
21 | Ramble is capable of going beyond simply parsing, even adding semantic actions, allowing their results to be manipulated in any way we please.
22 | 
23 | # References
24 | 


--------------------------------------------------------------------------------
/man/token.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{token}
 4 | \alias{token}
 5 | \title{\code{token} is a new primitive that ignores any space before and after
 6 | applying a parser to a token.}
 7 | \usage{
 8 | token(p)
 9 | }
10 | \arguments{
11 | \item{p}{is the parser to have spaces stripped.}
12 | }
13 | \description{
14 | \code{token} is a new primitive that ignores any space before and after
15 | applying a parser to a token.
16 | }
17 | \examples{
18 | token(ident()) ("   variable1   ")
19 | }
20 | \seealso{
21 | \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 
22 |   \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 
23 |   \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 
24 |   \code{\link{space}}, \code{\link{identifier}},
25 |   \code{\link{natural}}, \code{\link{symbol}}
26 | }
27 | 


--------------------------------------------------------------------------------
/man/String.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{String}
 4 | \alias{String}
 5 | \title{\code{String} is a combinator which allows us to build parsers which
 6 | recognise strings of symbols, rather than just single symbols}
 7 | \usage{
 8 | String(string)
 9 | }
10 | \arguments{
11 | \item{string}{is the string to be matched}
12 | }
13 | \description{
14 | \code{String} is a combinator which allows us to build parsers which
15 | recognise strings of symbols, rather than just single symbols
16 | }
17 | \examples{
18 | String("123")("123 abc")
19 | }
20 | \seealso{
21 | \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 
22 |   \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 
23 |   \code{\link{ident}}, \code{\link{nat}}, 
24 |   \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}},
25 |   \code{\link{natural}}, \code{\link{symbol}}
26 | }
27 | 


--------------------------------------------------------------------------------
/man/succeed.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{succeed}
 4 | \alias{succeed}
 5 | \title{\code{succeed} is based on the empty string symbol in the BNF notation The 
 6 | \code{succeed} parser always succeeds, without actually consuming any input 
 7 | string. Since the outcome of succeed does not depend on its input, its result
 8 | value must be pre-detemined, so it is included as an extra parameter.}
 9 | \usage{
10 | succeed(string)
11 | }
12 | \arguments{
13 | \item{string}{the result value of succeed parser}
14 | }
15 | \description{
16 | \code{succeed} is based on the empty string symbol in the BNF notation The 
17 | \code{succeed} parser always succeeds, without actually consuming any input 
18 | string. Since the outcome of succeed does not depend on its input, its result
19 | value must be pre-detemined, so it is included as an extra parameter.
20 | }
21 | \examples{
22 | succeed("1") ("abc")
23 | }
24 | 


--------------------------------------------------------------------------------
/tests/testthat/test_repetition.R:
--------------------------------------------------------------------------------
 1 | context("Test repetition functions")
 2 | 
 3 | test_that("maybe", {
 4 |   expect_equal(maybe(Digit())("a123"),
 5 |                list(result=NULL, leftover="a123"))
 6 |   expect_equal(maybe(Digit())("123"),
 7 |                list(result="1",
 8 |                     leftover="23"))
 9 | })
10 | 
11 | test_that("many", {
12 |   expect_equal(many(Digit())("a123"),
13 |                list(result=NULL, leftover="a123"))
14 |   expect_equal(many(Digit())("123"),
15 |                list(result=list("1", "2", "3", NULL),
16 |                     leftover=""))
17 | })
18 | 
19 | test_that("some", {
20 |   expect_equal(some(Digit())("a123"),
21 |                list())
22 |   expect_equal(some(Digit())("1a23"),
23 |                list(result=list("1", NULL),
24 |                     leftover="a23"))
25 |   expect_equal(some(Digit())("123a"),
26 |                list(result=list("1", "2", "3", NULL),
27 |                     leftover="a"))
28 | })
29 | 


--------------------------------------------------------------------------------
/man/alt.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{alt}
 4 | \alias{alt}
 5 | \title{\code{alt} combinator is similar to alternation in BNF. the parser 
 6 | \code{(alt(p1, p2))} recognises anything that \code{p1} or \code{p2} would. 
 7 | The approach taken in this parser follows (Fairbairn86), in which either is 
 8 | interpretted in a sequential (or exclusive) manner, returning the result of
 9 | the first parser to succeed, and failure if neither does.}
10 | \usage{
11 | alt(p1, p2)
12 | }
13 | \arguments{
14 | \item{p1}{the first parser}
15 | 
16 | \item{p2}{the second parser}
17 | }
18 | \value{
19 | Returns the first parser if it suceeds otherwise the second parser
20 | }
21 | \description{
22 | \code{\%alt\%} is the infix notation for the \code{alt} function, and it is the
23 | preferred way to use the \code{alt} operator.
24 | }
25 | \examples{
26 | (item() \%alt\% succeed("2")) ("abcdef")
27 | }
28 | \seealso{
29 | \code{\link{then}}
30 | }
31 | 


--------------------------------------------------------------------------------
/man/using.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{using}
 4 | \alias{using}
 5 | \title{\code{using} combinator allows us to manipulate results from a parser, for 
 6 | example building a parse tree. The parser \code{(p \%using\% f)} has the same 
 7 | behaviour as the parser \code{p}, except that the function \code{f} is
 8 | applied to each of its result values.}
 9 | \usage{
10 | using(p, f)
11 | }
12 | \arguments{
13 | \item{p}{is the parser to be applied}
14 | 
15 | \item{f}{is the function to be applied to each result of \code{p}.}
16 | }
17 | \value{
18 | The parser \code{(p \%using\% f)} has the same behaviour as the
19 |   parser \code{p}, except that the function \code{f} is applied to each of
20 |   its result values.
21 | }
22 | \description{
23 | \code{\%using\%} is the infix operator for \code{using}, and it is the
24 | preferred way to use the \code{using} operator.
25 | }
26 | \examples{
27 | (item() \%using\% as.numeric) ("1abc")
28 | }
29 | 


--------------------------------------------------------------------------------
/man/many.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{many}
 4 | \alias{many}
 5 | \title{\code{many} matches 0 or more of pattern \code{p}. In BNF notation, 
 6 | repetition occurs often enough to merit its own abbreviation. When zero or 
 7 | more repetitions of a phrase \code{p} are admissible, we simply write 
 8 | \code{p*}. The \code{many} combinator corresponds directly to this operator, 
 9 | and is defined in much the same way.}
10 | \usage{
11 | many(p)
12 | }
13 | \arguments{
14 | \item{p}{is the parser to match 0 or more times.}
15 | }
16 | \description{
17 | This implementation of \code{many} differs from (Hutton92) due to the nature 
18 | of R's data structures. Since R does not support the concept of a list of
19 | tuples, we must revert to using a list rather than a vector, since all values
20 | in an R vector must be the same datatype.
21 | }
22 | \examples{
23 | Digit <- function(...) {satisfy(function(x) {return(grepl("[0-9]", x))})}
24 | many(Digit()) ("123abc")
25 | many(Digit()) ("abc")
26 | }
27 | \seealso{
28 | \code{\link{maybe}}, \code{\link{some}}
29 | }
30 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Chapman Siu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/man/some.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parser.R
 3 | \name{some}
 4 | \alias{some}
 5 | \title{\code{some} matches 1 or more of pattern \code{p}. in BNF notation, repetition occurs often enough to merit its own abbreviation. When zero or 
 6 | more repetitions of a phrase \code{p} are admissible, we simply write 
 7 | \code{p+}. The \code{some} combinator corresponds directly to this operator,
 8 | and is defined in much the same way.}
 9 | \usage{
10 | some(p)
11 | }
12 | \arguments{
13 | \item{p}{is the parser to match 1 or more times.}
14 | }
15 | \description{
16 | \code{some} matches 1 or more of pattern \code{p}. in BNF notation, repetition occurs often enough to merit its own abbreviation. When zero or 
17 | more repetitions of a phrase \code{p} are admissible, we simply write 
18 | \code{p+}. The \code{some} combinator corresponds directly to this operator,
19 | and is defined in much the same way.
20 | }
21 | \examples{
22 | Digit <- function(...) {satisfy(function(x) {return(grepl("[0-9]", x))})}
23 | some(Digit()) ("123abc")
24 | }
25 | \seealso{
26 | \code{\link{maybe}}, \code{\link{many}}
27 | }
28 | 


--------------------------------------------------------------------------------
/example/example.R:
--------------------------------------------------------------------------------
 1 | #' Example with expressions, will not be exported
 2 | #' expression example
 3 | #' expr :: = term + term | term - term | term
 4 | #' term :: = factor * factor | factor / factor | factor
 5 | #' factor :: = (expr) | digit+
 6 | 
 7 | expr <- ((term %then% 
 8 |             symbol("+") %then%
 9 |             expr %using% function(x) {
10 |               print(unlist(c(x)))
11 |               return(sum(as.numeric(unlist(c(x))[c(1,3)])))
12 |             }) %alt% 
13 |           (term %then% 
14 |             symbol("-") %then%
15 |             expr %using% function(x) {
16 |               print(unlist(c(x)))
17 |               return(Reduce("-", as.numeric(unlist(c(x))[c(1,3)])))
18 |             }) %alt% term)
19 | 
20 | 
21 | term <- ((factor %then% 
22 |             symbol("*") %then%
23 |               term %using% function(x) {
24 |                 print(unlist(c(x)))
25 |                 return(prod(as.numeric(unlist(c(x))[c(1,3)])))
26 |               }) %alt% 
27 |          (factor %then% 
28 |            symbol("/") %then%
29 |            term %using% function(x) {
30 |              print(unlist(c(x)))
31 |              return(Reduce("/", as.numeric(unlist(c(x))[c(1,3)])))
32 |           }) %alt% factor)
33 | 
34 | factor <- ((symbol("(") %then%
35 |             expr %then%
36 |             symbol(")") %using% function(x){
37 |               print(unlist(c(x)))
38 |               return(as.numeric(unlist(c(x))[2]))
39 |             }) %alt% natural())
40 | 
41 | expr("1+2+3+4+5+6")
42 | expr("1+(2+3)*4+5+6")
43 | factor("(1)")
44 | factor("1")
45 | expr("1+(2*2)")
46 | expr("(1+1)*2")
47 | expr("(1+2)*3")
48 | expr("1*(2+3)*4*5")
49 | expr("(4-2)+3")
50 | expr("4-2+3") # order is done incorrectly.
51 | expr("4/2")
52 | 
53 | 


--------------------------------------------------------------------------------
/example/example-xml.R:
--------------------------------------------------------------------------------
 1 | #' XML parser example
 2 | 
 3 | xml = '<complexType name="SubjectType">
 4 |     <choice>
 5 |         <sequence>
 6 |             <choice>
 7 |                 <element ref="saml:BaseID"/>
 8 |                 <element ref="saml:NameID"/>
 9 |                 <element ref="saml:EncryptedID"/>
10 |             </choice>
11 |             <element ref="saml:SubjectConfirmation" minOccurs="0" maxOccurs="unbounded"/>
12 |         </sequence>
13 |         <element ref="saml:SubjectConfirmation" maxOccurs="unbounded"/>
14 |     </choice>
15 | </complexType>'
16 | 
17 | 
18 | xmlParser <- (many(startTag %alt% singleTag) %then%
19 |           many(endTag %alt% singleTag))
20 | 
21 | 
22 | startTag <- (
23 |   symbol("<") %then%
24 |     identifier() %then% 
25 |     many(attributes) %then%
26 |   symbol(">") %using% function(x) {
27 |     els <- unlist(c(x))
28 |     #return(unlist(c(x)))
29 |     return(list(name=els[2], all=els))
30 |   }
31 | )
32 | 
33 | endTag <- (
34 |   symbol("</") %then%
35 |     identifier() %then% 
36 |   symbol(">") %using% function(x) {
37 |     return(unlist(c(x)))
38 |   }
39 |   )
40 | 
41 | singleTag <- (
42 |   symbol("<") %then%
43 |     identifier() %then% 
44 |     many(attributes) %then%
45 |     symbol("/>") %using% function(x) {
46 |       els <- unlist(c(x))
47 |       return(unlist(c(x)))
48 |     }
49 | )
50 | 
51 | attributes <- (
52 |   identifier() %then% 
53 |     symbol("=") %then%
54 |     quoteString
55 |   )
56 | 
57 | quoteString <- (
58 |   symbol('"') %then%
59 |     many(satisfy(function(x) {return(!!length(grep('[^"]+', x)))})) %then%
60 |     symbol('"') %using% function(x) {
61 |       return(paste0(unlist(c(x)), collapse=""))
62 |     } 
63 |   )
64 | 
65 | xmlParser(xml)
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/tests/testthat/test_functions2.R:
--------------------------------------------------------------------------------
 1 | library(Ramble)
 2 | 
 3 | test_that("empty strings (ie. character(0)) are treated correctly", {
 4 |   expect_equal(item()(character(0)), NULL)
 5 |   expect_equal(satisfy()(character(0)), list())
 6 | })
 7 | 
 8 | test_that("literal correctly parses character", {
 9 |   expect_equal(literal("a")("a"),
10 |                list(result="a", leftover=""))
11 | }) 
12 | 
13 | test_that("then correctly works when we have empty 2nd predicate", {
14 |   expect_equal((literal("a") %then% item())("a"), list())
15 | }) 
16 | 
17 | test_that("using works as expected", {
18 |   expect_equal((literal("a") %using% toupper)("abc"),
19 |                list(result="A", leftover="bc"))
20 | })
21 | 
22 | test_that("using correctly returns list when there is no result",{
23 |   expect_equal(((literal("a") %then% item()) %using% toupper)("a"),
24 |                list())
25 | })
26 | 
27 | test_that("some has the correct leftover when it matches multiple things", {
28 |   expect_equal((some(Digit())("123abc")),
29 |                list(result=list("1", "2", "3", NULL), leftover="abc"))
30 | })
31 | 
32 | test_that("derived primatives work", {
33 |   expect_equal(Lower()("abc"),
34 |                list(result="a", leftover="bc"))
35 |   expect_equal(Upper()("Abc"),
36 |                list(result="A", leftover="bc"))
37 |   expect_equal(Alpha()("abc"),
38 |                list(result="a", leftover="bc"))
39 |   expect_equal(AlphaNum()("abc123"),
40 |                list(result="a", leftover="bc123"))
41 |   expect_equal(AlphaNum()("123"),
42 |                list(result="1", leftover="23"))
43 |   expect_equal(String("123")("123 abc"),
44 |                list(result="123", leftover=" abc"))
45 |   expect_equal(String("")("abc"),
46 |                list(result=NULL, leftover="abc"))
47 |   expect_equal(nat()("123 + 456"),
48 |                list(result="123", leftover=" + 456"))
49 |   expect_equal(natural()("123"),
50 |                list(result=list("123"), leftover=""))
51 |   expect_equal(symbol("[")("  [123]"),
52 |                list(result=list("["), leftover="123]"))
53 | })
54 | 
55 | 


--------------------------------------------------------------------------------
/example/example-word2num.R:
--------------------------------------------------------------------------------
 1 | library(Ramble)
 2 | 
 3 | # we might have hyphens or spaces, 
 4 | # e.g. ninety-one or ninety one
 5 | remove_space_hyphen <- maybe(token(String("-"))) %using% function(...) return(0)
 6 | 
 7 | token_string <- function(x) token(String(x))
 8 | 
 9 | unit_definition <- (remove_space_hyphen %alt% succeed(NULL)) %then% (
10 |   (token_string("ten") %using% function(...) return(10)) %alt% 
11 |     (token_string("eleven") %using% function(...) return(11)) %alt% 
12 |     (token_string("twelve") %using% function(...) return(12)) %alt% 
13 |     (token_string("thirteen") %using% function(...) return(13)) %alt% 
14 |     (token_string("fourteen") %using% function(...) return(14)) %alt% 
15 |     (token_string("fifteen") %using% function(...) return(15)) %alt% 
16 |     (token_string("sixteen") %using% function(...) return(16)) %alt% 
17 |     (token_string("seventeen") %using% function(...) return(17)) %alt% 
18 |     (token_string("eighteen") %using% function(...) return(18)) %alt% 
19 |     (token_string("nineteen") %using% function(...) return(19)) %alt%
20 |   (token_string("zero") %using% function(...) return(0)) %alt% 
21 |     (token_string("oh") %using% function(...) return(0)) %alt% 
22 |     (token_string("zip") %using% function(...) return(0)) %alt% 
23 |     (token_string("zilch") %using% function(...) return(0)) %alt% 
24 |     (token_string("nada") %using% function(...) return(0)) %alt% 
25 |     (token_string("one") %using% function(...) return(1)) %alt% 
26 |     (token_string("two") %using% function(...) return(2)) %alt% 
27 |     (token_string("three") %using% function(...) return(3)) %alt% 
28 |     (token_string("four") %using% function(...) return(4)) %alt% 
29 |     (token_string("five") %using% function(...) return(5)) %alt% 
30 |     (token_string("six") %using% function(...) return(6)) %alt% 
31 |     (token_string("seven") %using% function(...) return(7)) %alt% 
32 |     (token_string("eight") %using% function(...) return(8)) %alt% 
33 |     (token_string("nine") %using% function(...) return(9)) 
34 | )
35 | 
36 | tens_definition <- (
37 |   (token_string("ten")     %using% function(...) return(10)) %alt%
38 |     (token_string("twenty")  %using% function(...) return(20)) %alt%
39 |     (token_string("thirty")  %using% function(...) return(30)) %alt%
40 |     (token_string("forty")   %using% function(...) return(40)) %alt%
41 |     (token_string("fourty")  %using% function(...) return(40)) %alt% 
42 |     (token_string("fifty")   %using% function(...) return(50)) %alt%
43 |     (token_string("sixty")   %using% function(...) return(60)) %alt%
44 |     (token_string("seventy") %using% function(...) return(70)) %alt%
45 |     (token_string("eighty")  %using% function(...) return(80)) %alt%
46 |     (token_string("ninety")  %using% function(...) return(90))
47 | )
48 | 
49 | # optional tens_defintion + units
50 | word2num <- ((tens_definition %alt% succeed(NULL)) %then% unit_definition) %using% function(x) return(sum(unlist(x)))
51 | word2num("ninety one")$result
52 | word2num("twenty-two")$result
53 | 


--------------------------------------------------------------------------------
/tests/testthat/test_word2num.R:
--------------------------------------------------------------------------------
 1 | library(Ramble)
 2 | 
 3 | # we might have hyphens or spaces, 
 4 | # e.g. ninety-one or ninety one
 5 | remove_space_hyphen <- maybe(token(String("-"))) %using% function(...) return(0)
 6 | 
 7 | token_string <- function(x) token(String(x))
 8 | 
 9 | unit_definition <- (remove_space_hyphen %alt% succeed(NULL)) %then% (
10 |   (token_string("ten") %using% function(...) return(10)) %alt% 
11 |     (token_string("eleven") %using% function(...) return(11)) %alt% 
12 |     (token_string("twelve") %using% function(...) return(12)) %alt% 
13 |     (token_string("thirteen") %using% function(...) return(13)) %alt% 
14 |     (token_string("fourteen") %using% function(...) return(14)) %alt% 
15 |     (token_string("fifteen") %using% function(...) return(15)) %alt% 
16 |     (token_string("sixteen") %using% function(...) return(16)) %alt% 
17 |     (token_string("seventeen") %using% function(...) return(17)) %alt% 
18 |     (token_string("eighteen") %using% function(...) return(18)) %alt% 
19 |     (token_string("nineteen") %using% function(...) return(19)) %alt%
20 |   (token_string("zero") %using% function(...) return(0)) %alt% 
21 |     (token_string("oh") %using% function(...) return(0)) %alt% 
22 |     (token_string("zip") %using% function(...) return(0)) %alt% 
23 |     (token_string("zilch") %using% function(...) return(0)) %alt% 
24 |     (token_string("nada") %using% function(...) return(0)) %alt% 
25 |     (token_string("one") %using% function(...) return(1)) %alt% 
26 |     (token_string("two") %using% function(...) return(2)) %alt% 
27 |     (token_string("three") %using% function(...) return(3)) %alt% 
28 |     (token_string("four") %using% function(...) return(4)) %alt% 
29 |     (token_string("five") %using% function(...) return(5)) %alt% 
30 |     (token_string("six") %using% function(...) return(6)) %alt% 
31 |     (token_string("seven") %using% function(...) return(7)) %alt% 
32 |     (token_string("eight") %using% function(...) return(8)) %alt% 
33 |     (token_string("nine") %using% function(...) return(9)) 
34 | )
35 | 
36 | tens_definition <- (
37 |   (token_string("ten")     %using% function(...) return(10)) %alt%
38 |     (token_string("twenty")  %using% function(...) return(20)) %alt%
39 |     (token_string("thirty")  %using% function(...) return(30)) %alt%
40 |     (token_string("forty")   %using% function(...) return(40)) %alt%
41 |     (token_string("fourty")  %using% function(...) return(40)) %alt% 
42 |     (token_string("fifty")   %using% function(...) return(50)) %alt%
43 |     (token_string("sixty")   %using% function(...) return(60)) %alt%
44 |     (token_string("seventy") %using% function(...) return(70)) %alt%
45 |     (token_string("eighty")  %using% function(...) return(80)) %alt%
46 |     (token_string("ninety")  %using% function(...) return(90))
47 | )
48 | 
49 | # optional tens_defintion + units
50 | word2num <- ((tens_definition %alt% succeed(NULL)) %then% unit_definition) %using% function(x) return(sum(unlist(x)))
51 | 
52 | 
53 | test_that("word2num", {
54 |   expect_equal(word2num("ninety one")$result, 91)
55 |   expect_equal(word2num("ninety-two")$result, 92)
56 |   expect_equal(word2num("ninety tone"), list())
57 |   expect_equal(word2num("forty six")$result, 46)
58 |   expect_equal(word2num("nineteen")$result, 19)
59 |   expect_equal(word2num("thirty seven")$result, 37)
60 | })
61 | 


--------------------------------------------------------------------------------
/tests/testthat/test_functions.R:
--------------------------------------------------------------------------------
  1 | library(Ramble)
  2 | 
  3 | test_that("succeed", {
  4 |   expect_equal(succeed("1")("abc"),
  5 |                list(result="1", leftover="abc"))
  6 | })
  7 | 
  8 | test_that("item", {
  9 |   expect_equal(item()("abc"),
 10 |                list(result="a", leftover="bc"))
 11 | })
 12 | 
 13 | test_that("then", {
 14 |   expect_equal((item() %then% succeed("123"))("abc"),
 15 |                list(result=list("a", "123"),
 16 |                     leftover="bc"))
 17 |   ## Differentiate "then" from "thentree"
 18 |   newparam <- function(x) {
 19 |     function(string) {
 20 |       ret <- succeed(x)(string)
 21 |       ret$result <- list(value=ret$result,
 22 |                          more=TRUE)
 23 |       class(ret$result) <- "newparam"
 24 |       ret
 25 |     }
 26 |   }
 27 |   expect_equal((item() %then% newparam("123"))("abc"),
 28 |                list(result=list("a",
 29 |                                 structure(list(value="123", more=TRUE),
 30 |                                           .Names=c("value", "more"),
 31 |                                           class="newparam")),
 32 |                     leftover="bc"))
 33 | })
 34 | 
 35 | test_that("thentree", {
 36 |   expect_equal((item() %thentree% succeed("123"))("abc"),
 37 |                list(result=list("a", "123"),
 38 |                     leftover="bc"))
 39 |   newparam <- function(x) {
 40 |     function(string) {
 41 |       ret <- succeed(x)(string)
 42 |       ret$result <- list(value=ret$result,
 43 |                          more=TRUE)
 44 |       ret
 45 |     }
 46 |   }
 47 |   expect_equal((item() %thentree% newparam("123"))("abc"),
 48 |                list(result=list("a", list(value="123", more=TRUE)),
 49 |                     leftover="bc"))
 50 | })
 51 | 
 52 | test_that("alternation", {
 53 |   expect_equal((item() %alt% succeed("2"))("abcdef"),
 54 |                list(result="a", leftover="bcdef"))
 55 |   expect_equal((Digit() %alt% succeed("2"))("abcdef"),
 56 |                list(result="2", leftover="abcdef"))
 57 | })
 58 | 
 59 | test_that("many", {
 60 |   expect_equal(length(unlist(many(Digit())("123abc")$result)), 3)
 61 |   expect_equal(many(Digit())("123abc"),
 62 |                list(result=list("1", "2", "3", NULL),
 63 |                     leftover="abc"))
 64 | })
 65 | 
 66 | test_that("identifier", {
 67 |   expect_equal(identifier()("  variable1  "),
 68 |                list(result=list("variable1"),
 69 |                     leftover=""))
 70 | })
 71 | 
 72 | test_that("token", {
 73 |   expect_equal(token(Digit())("123"),
 74 |                list(result=list("1"),
 75 |                     leftover="23"))
 76 |   expect_equal(token(Digit())(" 123"),
 77 |                list(result=list("1"),
 78 |                     leftover="23"))
 79 |   expect_equal(token(Digit())(" 1 23"),
 80 |                list(result=list("1"),
 81 |                     leftover="23"))
 82 |   expect_equal(token(Digit())(" 1   23"),
 83 |                list(result=list("1"),
 84 |                     leftover="23"))
 85 |   expect_equal(token(Digit())(" a 23"),
 86 |                list())
 87 |   ## Keep attributes now
 88 |   newparam <- function(x) {
 89 |     function(string) {
 90 |       ret <- succeed(x)(string)
 91 |       ret$result <- list(value=ret$result,
 92 |                          more=TRUE)
 93 |       ret
 94 |     }
 95 |   }
 96 |   expect_equal(token(String("abc") %thentree% newparam("123"))(" abc "),
 97 |                list(result=list("abc", list(value="123", more=TRUE)),
 98 |                     leftover=""))
 99 | })
100 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | Ramble
  2 | ======
  3 | 
  4 | [![status](http://joss.theoj.org/papers/1b33a8141e698aa4cf038b3c5c9bbfd8/status.svg)](http://joss.theoj.org/papers/1b33a8141e698aa4cf038b3c5c9bbfd8)
  5 | [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/Ramble)](http://cran.r-project.org/package=Ramble)
  6 | [![Travis-CI Build Status](https://travis-ci.org/chappers/Ramble.svg?branch=master)](https://travis-ci.org/chappers/Ramble)
  7 | [![Coverage Status](http://codecov.io/github/chappers/Ramble/coverage.svg?branch=master)](http://codecov.io/github/chappers/Ramble?branch=master)
  8 | 
  9 | This project is just an example to examine the functional components of R. 
 10 | 
 11 | Installation
 12 | ============
 13 | 
 14 | Ramble is now on CRAN:
 15 | 
 16 |     install.packages("Ramble")
 17 |     
 18 | The development version can be installed from github:
 19 | 
 20 |     # install.packages("devtools")
 21 |     devtools::install_github("chappers/Ramble")
 22 | 
 23 | Goals
 24 | =====
 25 | 
 26 | Create a [parser combinator](http://en.wikipedia.org/wiki/Parser_combinator) written in
 27 | pure R. This is mostly a proof of concept, but could be useful or helpful to someone.
 28 | 
 29 | This is inspired by **Programming in Haskell by Graham Hutton**, and also the 
 30 | [JavaScript port](https://github.com/matthandlersux/functional-parser), and 
 31 | Python's [recursive descent parsing library](https://pypi.python.org/pypi/funcparserlib/).
 32 | 
 33 | **References:**  
 34 | 
 35 | *  [Higher-order functions for parsing](http://eprints.nottingham.ac.uk/221/1/parsing.pdf)
 36 | 
 37 | Contributing
 38 | ============
 39 | 
 40 | You can contribute by opening issues on Github or implementing things yourself and making a pull request. 
 41 | 
 42 | Please ensure that package passes all checks with `--as-cran` flag (i.e. via `devtools::check(args = c('--as-cran'))`) before submitting a pull request. 
 43 | 
 44 | How it Works
 45 | ============
 46 | 
 47 | To understand the differences between Ramble and other combinatory parsers please read [Ramble: A Parser Combinator in R](https://github.com/chappers/Ramble/blob/master/docs/ramble-introduction.pdf).
 48 | 
 49 | Example
 50 | =======
 51 | 
 52 | You may view examples for:
 53 | 
 54 | *  Parsing xml file
 55 | *  Creating a simple calculator
 56 | *  Reading a number given in words, and converting it to the appropriate numeric value
 57 | 
 58 | Within the `examples/*` folder. Below is the calculator example.
 59 | 
 60 | ```r
 61 | #' expr :: = term + term | term - term | term
 62 | #' term :: = factor * factor | factor / factor | factor
 63 | #' factor :: = (expr) | digit+
 64 | 
 65 | expr <- ((term %then% 
 66 |             symbol("+") %then%
 67 |             expr %using% function(x) {
 68 |               print(unlist(c(x)))
 69 |               return(sum(as.numeric(unlist(c(x))[c(1,3)])))
 70 |             }) %alt% 
 71 |            (term %then% 
 72 |               symbol("-") %then%
 73 |               expr %using% function(x) {
 74 |                 print(unlist(c(x)))
 75 |                 return(Reduce("-", as.numeric(unlist(c(x))[c(1,3)])))
 76 |               }) %alt% term)
 77 | 
 78 | 
 79 | term <- ((factor %then% 
 80 |              symbol("*") %then%
 81 |              term %using% function(x) {
 82 |                print(unlist(c(x)))
 83 |                return(prod(as.numeric(unlist(c(x))[c(1,3)])))
 84 |              }) %alt% 
 85 |            (factor %then% 
 86 |               symbol("/") %then%
 87 |               term %using% function(x) {
 88 |                 print(unlist(c(x)))
 89 |                 return(Reduce("/", as.numeric(unlist(c(x))[c(1,3)])))
 90 |               }) %alt% factor)
 91 | 
 92 | factor <- ((
 93 |     symbol("(") %then%
 94 |       expr %then%
 95 |       symbol(")") %using% 
 96 |       function(x){
 97 |         print(unlist(c(x)))
 98 |         return(as.numeric(unlist(c(x))[2]))
 99 |         })
100 |     %alt% natural())
101 | ```
102 | 
103 | **Output**:  
104 | 
105 | ```r
106 | > expr("(1+1)*2")
107 | [1] "1" "+" "1"
108 | [1] "(" "2" ")"
109 | [1] "2" "*" "2"
110 | [1] "1" "+" "1"
111 | [1] "(" "2" ")"
112 | [1] "2" "*" "2"
113 | [1] "1" "+" "1"
114 | [1] "(" "2" ")"
115 | [1] "2" "*" "2"
116 | $result
117 | [1] 4
118 | 
119 | $leftover
120 | [1] ""
121 | 
122 | > expr("(1+2)*3")
123 | [1] "1" "+" "2"
124 | [1] "(" "3" ")"
125 | [1] "3" "*" "3"
126 | [1] "1" "+" "2"
127 | [1] "(" "3" ")"
128 | [1] "3" "*" "3"
129 | [1] "1" "+" "2"
130 | [1] "(" "3" ")"
131 | [1] "3" "*" "3"
132 | $result
133 | [1] 9
134 | 
135 | $leftover
136 | [1] ""
137 | 
138 | > expr("1*(2+3)*4*5")
139 | [1] "2" "+" "3"
140 | [1] "(" "5" ")"
141 | [1] "4" "*" "5"
142 | [1] "5"  "*"  "20"
143 | [1] "1"   "*"   "100"
144 | [1] "2" "+" "3"
145 | [1] "(" "5" ")"
146 | [1] "4" "*" "5"
147 | [1] "5"  "*"  "20"
148 | [1] "1"   "*"   "100"
149 | [1] "2" "+" "3"
150 | [1] "(" "5" ")"
151 | [1] "4" "*" "5"
152 | [1] "5"  "*"  "20"
153 | [1] "1"   "*"   "100"
154 | $result
155 | [1] 100
156 | 
157 | $leftover
158 | [1] ""
159 | 
160 | > expr("(4-2)+3")
161 | [1] "4" "-" "2"
162 | [1] "(" "2" ")"
163 | [1] "4" "-" "2"
164 | [1] "(" "2" ")"
165 | [1] "4" "-" "2"
166 | [1] "(" "2" ")"
167 | [1] "2" "+" "3"
168 | $result
169 | [1] 5
170 | 
171 | $leftover
172 | [1] ""
173 | ```
174 | 


--------------------------------------------------------------------------------
/vignettes/Higher-Order_Functions_for_Parsing_in_R.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Higher-Order Functions for Parsing in R"
  3 | author: "Chapman Siu"
  4 | date: "`r Sys.Date()`"
  5 | output: rmarkdown::html_vignette
  6 | vignette: >
  7 |   %\VignetteIndexEntry{High Order Functions for Parsing in R}
  8 |   %\VignetteEngine{knitr::rmarkdown}
  9 |   %\usepackage[utf8]{inputenc}
 10 | ---
 11 | 
 12 | # Introduction
 13 | 
 14 | A parser is any program which analyses text to determine its logical structure. For example, the parser phase in a compiler takes a program text, and produces a parse tree which expounds the structure of the program. The form of this input is usually defined by a context-free grammar, using BNF notation. Prasers themselves may be built by hand, but are most often generated automatically using tools like Lex and Yacc from Unix (Aho86).
 15 | 
 16 | Combinatory parsing is a technique which has been explored in functional languages such as Miranda (Hutton92). Combinator parsing is able to handle ambiguous grammars, and providing full backtracking if it is needed. It can go beyond simply parsing, but even adding semantic actions, allowing their results to manipulated in any way we please. 
 17 | 
 18 | This paper will apply techniques discussed in (Hutton92) in the context of the functional parts of the R programming language. 
 19 | 
 20 | # Functional Parts of R 
 21 | 
 22 | ## Closures
 23 | 
 24 | R, at its heart is a functional programming language (Wickham14). R has what is known as first class functions; meaning functions can be passed as arguments to other functions, returning them from other functions, assigning them to variables and stored in data structures. 
 25 | 
 26 | Functions can be written by other functions, this is known as closures. In the following example (Wickham14) we can generate a family of power functions in which a parent function `(power())` creates two child functions (`square()` and `cube()`).
 27 | 
 28 | ```{r closure-example}
 29 | power <- function(exponent) {
 30 |   function(x) {
 31 |     x ^ exponent
 32 |   }
 33 | }
 34 | square <- power(2)
 35 | square(2)
 36 | cube <- power(3)
 37 | cube(2)
 38 | ```
 39 | 
 40 | # Parsing using Combinators
 41 | 
 42 | We will first consider the _type_ of parser. A parser may be viewed as a function from a string of symbols to a result value. Since a parser might not consume the entire string, part of this result will be a suffix of the input string. Sometimes a parser may not be able to produce a result at all. For example, it may be expecting a letter, but find a digit. Rather than define a special type for the success or failure of a parser, we choose to have parsers return a list of pairs as their result, with the empty list `list()` denoting failure, and a list of lists `list(result=v, leftover=xs)` indicating success, with value `v` and unconsumed input `xs`. 
 43 | 
 44 | Since we want to specify the type of any parser, regardless of the kind of symbols and results involved, this means we must use a heterogeneous data structure. Compared with Miranda, R treats its data structures in different manner. For example, the idea of a tuple does not exist within R. Data types can be divided into five different groups (Wickham14):
 45 | 
 46 |     **Homogeneous** **Heterogeneous**  
 47 | --- --------------- ----------------- 
 48 | 1d  Atomic Vector      List              
 49 | 2d  Matrix             Data Frame        
 50 | nd  Array                            
 51 | --- --------------- ----------------- 
 52 | 
 53 | This means that the value `v`, which indicates success **must** be a list of lists, since this value may be heterogeneous. 
 54 | 
 55 | ## Primitive parsers
 56 | 
 57 | `succeed` is based on the empty string symbol in the BNF notation The `succeed` parser always succeeds, without actually consuming any input string. Since the outcome of succeed does not depend on its input, its resultvalue must be pre-detemined, so it is included as an extra parameter.
 58 | 
 59 | ```{r, succeed}
 60 | succeed <- function(string) {
 61 |   return(function(nextString) {
 62 |     return(list(result = string, leftover=nextString))
 63 |   })
 64 | }
 65 | succeed("1") ("abc")
 66 | ```
 67 | 
 68 | The next function `item`, allows us to consume the first character of the string and return the rest. If it cannot consume a single character from the string it will emit the empty list, indicating the parser has failed. 
 69 | 
 70 | ```{r, item}
 71 | item <- function(...){
 72 |   return(function(string){
 73 |     if(length(string)==0){return(NULL)}
 74 |     return (if(string=="") list() else list(result=substr(string, 1, 1), leftover=substring(string, 2)))
 75 |   })
 76 | }
 77 | item() ("abc")
 78 | ```
 79 | 
 80 | `item` can be further rewritten in a more useful way. The `satisfy` function allows us to make parsers that recognise single symbols. Rather than enumerating the acceptable symbols, we will allow a predicate to be set, which determines if an arbitary symbol is a member. Successful parses return the consumed symbol as their result value.
 81 | 
 82 | ```{r, satisfy}
 83 | satisfy <- function(p) {
 84 |   return(function(string) {
 85 |     if (length(string)==0) {
 86 |       return(list())
 87 |     }
 88 |     else if (string==""){
 89 |       return(list())
 90 |     }
 91 |     else {
 92 |       result_ = list(result=substr(string, 1, 1), leftover=substring(string, 2))
 93 |       if (p(result_$result)) {
 94 |         return(succeed(result_$result)(result_$leftover))
 95 |       }
 96 |       else{
 97 |         return(list())
 98 |       }
 99 |     }    
100 |   })
101 | }
102 | satisfy(function(x) {x == "a"}) ("abc")
103 | ```
104 | 
105 | Using `satisfy` we can define a parser for single symbols:
106 | 
107 | ```{r, literal}
108 | literal <- function(char) {
109 |   satisfy(function(x){return(x==char)})
110 | }
111 | literal("a") ("abc")
112 | ```
113 | 
114 | ## Combinators
115 | 
116 | Now that we have the basic building blocks, we consider how they should be put together to form useful parsers. In BNF notation, larger grammars are built price-wise from smaller ones using `|` to denote alternation, and juxtaposition to indicate sequencing. So taht our parasers resemble BNF notation, we define higher order functions which correspond directly to these operators. Since higher order functions like these combine parsers to form other parsers, they are often referreedto as combinators.
117 | 
118 | The `alt` combinator corresponds to alternation in BNF. The parser `alt(p1, p2)` recognises anything that `p1` or `p2` would. The approach taken in this parser follows (Fairbairn86), in which either is interpretted in a sequential (or exclusive) manner, returning the results of the first parser to succeed, and failure if neither does. Note that we use the infix notation ``` `%f%` ``` to convert `alt` to an infix operator. The infix notation is merely a syntactic convenience: ```(a `%f%` b)``` is equivalent to `(f (a,b))`. 
119 | 
120 | ```{r, alt}
121 | alt <- function(p1, p2) {
122 |   return(function(string){
123 |     result <- p1 (string)
124 |     if(!is.null(result$leftover)) {return(result)}
125 |     else{
126 |       return(p2 (string))
127 |     }
128 |   })
129 | }
130 | `%alt%` <- alt
131 | (item() %alt% succeed("2"))("abcdef")
132 | alt(item(), succeed("2")) ("abcdef")
133 | ```
134 |   
135 | The `then` combinator corresponds to sequencing in BNF. The parser(`p1 %then% p2`) recognises anything that `p1` and `p2` would if placed in succession.
136 | 
137 | ```{r, then}
138 | then <- function(p1, p2) {
139 |   return(function(string) {
140 |     result <- p1 (string)
141 |     if (length(result) == 0) {
142 |       return (list())
143 |     }
144 |     else {
145 |       result_ <- p2 (result$leftover)
146 |       if (length(result_$leftover) == 0 || is.null(result_$leftover)) {return(list())}
147 |       return(list(result=append(list(result$result), result_$result), leftover=result_$leftover))
148 |     }
149 |   })
150 | }
151 | `%then%` <- then
152 | (literal("a") %then% literal("b")) ("abc")
153 | ```
154 | 
155 | ## Manipulating Values
156 | 
157 | Part of the result from a parser is a value. The `using` combinator allows us to manipulate these results, building a parse tree being the most common application. The parser(`p %using% f`) has the same behaviour as the parser `p`, except that the function `f` is aplied to each of the result values:
158 | 
159 | ```{r, using}
160 | using <- function(p, f) {
161 |   return(function(string) {
162 |     result <- p (string) 
163 |     if(length(result) == 0) {return(list())}
164 |     return(list(result=f(result$result),
165 |                 leftover=result$leftover))
166 |   })
167 | }
168 | `%using%` <- using
169 | (item() %using% function(x) {as.numeric(x) + 100}) ("1abc")
170 | ```
171 | 
172 | Although `using` has no counterpart in pure BNF notation, it does have much in common with the `{...}` operator in Yacc (Aho86). In fact, the `using` combinator does not restrict us to building parse trees. Arbitrary semantic actions can be used. 
173 | 
174 | In BNF notation, repetition occurs often enough to merit its own abbreviation. When zero or more repetitions of a phrase `p` are admissible, we simply write `p*`. Formally, this notation is defined by the equation `p* = p p * | e`. The `many` combinator corresponds directly to this operator, and is defined in much the same way:
175 | 
176 | ```{r, many}
177 | many <- function(p) {
178 |   return(function(string) {
179 |     ((p %then% many(p)) %alt% succeed(NULL)) (string)
180 |   })
181 | }
182 | many(literal("1")) ("111223")
183 | ```
184 | 
185 | Nor surprisingly, the next parser corresponds to the other common iterative form in BNF, defined by `p+ = p p*`. The parser (`some p`) has the same behaviour as (`many p`), except that it accepts one or more repetitions of `p`, rather of zero or more: 
186 | 
187 | ```{r, some}
188 | some <- function(p) {
189 |   return(function(string){
190 |     (p %then% many(p)) (string)
191 |   })
192 | }
193 | some(literal("a"))("aaabbc")
194 | ```
195 | 
196 | Note that (`some p`) may fail, whereas (`many p`) always succeeds. 
197 | 
198 | ## Derived Primitives
199 | 
200 | Using the basic parsers together with sequencing and choice, we can now define a number of other useful parsing primitives. 
201 | 
202 | Firstly using `satisfy` with the appropriate predicates, we can define parsers for single digits, lower-case letters, upper-case letters, arbitrary letters, alphanumeric characters, and specific characters. We have already demonstrated how we can parser specific characters (see `literal`), but the others can be defined in a similar way:
203 | 
204 | ```{r, derived}
205 | Digit <- function(...) {satisfy(function(x) {return(grepl("[0-9]", x))})}
206 | Lower <- function(...) {satisfy(function(x) {return(grepl("[a-z]", x))})}
207 | Upper <- function(...) satisfy(function(x) {return(grepl("[A-Z]", x))})
208 | Alpha <- function(...) satisfy(function(x) {return(grepl("[A-Za-z]", x))})
209 | AlphaNum <- function(...) satisfy(function(x) {return(grepl("[A-Za-z0-9]", x))})
210 | SpaceCheck <- function(...) satisfy(function(x) {return(grepl("\\s", x))})
211 | ```
212 | 
213 | In a similar many we can define a parser `String` for the string of characters, with the string itself returned as the result value:
214 | 
215 | ```{r, String}
216 | String <- function(string) {
217 |   if (string=="") {
218 |     return (succeed(NULL))
219 |   }
220 |   else {
221 |     result_=substr(string, 1, 1)
222 |     leftover_=substring(string, 2)
223 |     return((literal(result_) %then% 
224 |             String(leftover_)) %using% 
225 |              function(x) {paste(unlist(c(x)), collapse="")})
226 |   }
227 | }
228 | String("123")("123 abc")
229 | ```
230 | 
231 | Note that `String` is defined using recursion, and only succeeds if the entire target string is consumed. The base case states that the empty string is always parsed. The recursive case states that a non-empty string can be parsed by parsing the first character, parsing the remaining characters, and returning the entire string as the result value.
232 | 
233 | Similarly we can create parsers to match identifiers (`ident`), natural numbers (`nat`), spaces (`space`):
234 | 
235 | ```{r, ident}
236 | ident <- function() {(many(AlphaNum()) %using%
237 |           function(x) paste0(unlist(c(x)), collapse=""))}
238 | nat <- function() {
239 |   some(Digit()) %using%
240 |   function(x) {paste(unlist(c(x)), collapse="")}
241 | }
242 | space <- function() {
243 |   many(SpaceCheck()) %using%
244 |   function(x) {return("")}
245 | }
246 | ident() ("var1 = 123")
247 | nat() ("123456")
248 | ```
249 | 
250 | ## Handling spacing
251 | 
252 | To handle spaces we will define a new primitive `token` which ignores any space before and after applying a parser for a token:
253 | 
254 | ```{r, token}
255 | token <- function(p) {
256 |   space() %then%
257 |     p %then%
258 |     space() %using%
259 |     function(x) {return(unlist(c(x))[2])}
260 | }
261 | token(ident()) ("   var1   ")
262 | ```
263 | This can then be expanded for identifiers, natural numbers and symbols:
264 | 
265 | ```{r, identifier}
266 | identifier <- function(...) {token(ident())}
267 | natural <- function(...) {token(nat())}
268 | symbol <- function(xs) {token(String(xs))}
269 | identifier() ("   var1   ")
270 | ```
271 | 
272 | # Example
273 | 
274 | To conclude our introduction to combinator parsing, we will work through the derivation of a simple parser. Suppose we have a program which works with arithmetic expressions, defined as follows:
275 | 
276 | ```
277 | Example with expressions, will not be exported
278 | expression example
279 | expr :: = term + term | term - term | term
280 | term :: = factor * factor | factor / factor | factor
281 | factor :: = (expr) | digit+
282 | ```
283 | 
284 | Having this structure allows multiplication and divsion to have higher precedence than addition and subtraction. We can simply rewrite the BNF grammar above as follows:
285 | 
286 | ```{r, arith}
287 | expr <- ((term %then% 
288 |             symbol("+") %then%
289 |             expr %using% function(x) {
290 |               print(unlist(c(x)))
291 |               return(sum(as.numeric(unlist(c(x))[c(1,3)])))
292 |             }) %alt% 
293 |           (term %then% 
294 |             symbol("-") %then%
295 |             expr %using% function(x) {
296 |               print(unlist(c(x)))
297 |               return(Reduce("-", as.numeric(unlist(c(x))[c(1,3)])))
298 |             }) %alt% term)
299 | 
300 | 
301 | term <- ((factor %then% 
302 |             symbol("*") %then%
303 |               term %using% function(x) {
304 |                 print(unlist(c(x)))
305 |                 return(prod(as.numeric(unlist(c(x))[c(1,3)])))
306 |               }) %alt% 
307 |          (factor %then% 
308 |            symbol("/") %then%
309 |            term %using% function(x) {
310 |              print(unlist(c(x)))
311 |              return(Reduce("/", as.numeric(unlist(c(x))[c(1,3)])))
312 |           }) %alt% factor)
313 | 
314 | factor <- ((symbol("(") %then%
315 |             expr %then%
316 |             symbol(")") %using% function(x){
317 |               print(unlist(c(x)))
318 |               return(as.numeric(unlist(c(x))[2]))
319 |             }) %alt% natural())
320 | ```
321 | 
322 | This will evaluate the arithmetic expressions:
323 | 
324 | ```{r, exp}
325 | expr("2+(4-1)*3")
326 | ```
327 | 
328 | 
329 | 
330 | 
331 | 
332 | 
333 | 


--------------------------------------------------------------------------------
/R/parser.R:
--------------------------------------------------------------------------------
  1 | #' \code{succeed} is based on the empty string symbol in the BNF notation The 
  2 | #' \code{succeed} parser always succeeds, without actually consuming any input 
  3 | #' string. Since the outcome of succeed does not depend on its input, its result
  4 | #' value must be pre-detemined, so it is included as an extra parameter.
  5 | #' 
  6 | #' @param string the result value of succeed parser
  7 | #' @export
  8 | #' @examples 
  9 | #' succeed("1") ("abc")
 10 | succeed <- function(string) {
 11 |   function(nextString) {
 12 |     list(result=string,
 13 |          leftover=nextString)
 14 |   }
 15 | }
 16 | 
 17 | #' \code{item} is a parser that consumes the first character of the string and
 18 | #' returns the rest. If it cannot consume a single character from the string, it
 19 | #' will emit the empty list, indicating the parser has failed.
 20 | #' 
 21 | #' @param ... additional arguments for the parser
 22 | #' @export
 23 | #' @examples
 24 | #' item() ("abc")
 25 | #' item() ("")
 26 | item <- function(...){
 27 |   return(function(string){
 28 |     if(length(string)==0){
 29 |       return(NULL)
 30 |     }
 31 |     if(string=="") {
 32 |       list()
 33 |     } else {
 34 |       list(result=substr(string, 1, 1),
 35 |            leftover=substring(string, 2))
 36 |     }
 37 |   })
 38 | }
 39 | 
 40 | #' \code{satisfy} is a function which allows us to make parsers that recognise single symbols. 
 41 | #' 
 42 | #' @param p is the predicate to determine if the arbitrary symbol is a member.
 43 | #' @export
 44 | satisfy <- function(p) {
 45 |   return(function(string) {
 46 |     if (length(string) == 0) {
 47 |       return(list())
 48 |     }
 49 |     else if (string == "") {
 50 |       return(list())
 51 |     }
 52 |     else {
 53 |       result_ <- list(result=substr(string, 1, 1),
 54 |                       leftover=substring(string, 2))
 55 |       if (p(result_$result)) {
 56 |         return(succeed(result_$result)(result_$leftover))
 57 |       } else {
 58 |         return(list())
 59 |       }
 60 |     }    
 61 |   })
 62 | }
 63 | 
 64 | #' \code{literal} is a parser for single symbols. It will attempt to match the
 65 | #' single symbol with the first character in the string.
 66 | #' 
 67 | #' @param char is the character to be matched
 68 | #' @export
 69 | #' @examples
 70 | #' literal("a") ("abc")
 71 | literal <- function(char) {
 72 |   satisfy(function(x){
 73 |     return(x==char)
 74 |   })
 75 | }
 76 | 
 77 | ## Building Combinators ##
 78 | 
 79 | #' \code{alt} combinator is similar to alternation in BNF. the parser 
 80 | #' \code{(alt(p1, p2))} recognises anything that \code{p1} or \code{p2} would. 
 81 | #' The approach taken in this parser follows (Fairbairn86), in which either is 
 82 | #' interpretted in a sequential (or exclusive) manner, returning the result of
 83 | #' the first parser to succeed, and failure if neither does.
 84 | #' 
 85 | #' \code{\%alt\%} is the infix notation for the \code{alt} function, and it is the
 86 | #' preferred way to use the \code{alt} operator.
 87 | #' 
 88 | #' @param p1 the first parser
 89 | #' @param p2 the second parser
 90 | #' @return Returns the first parser if it suceeds otherwise the second parser
 91 | #' @examples
 92 | #' (item() %alt% succeed("2")) ("abcdef")
 93 | #' @seealso \code{\link{then}}
 94 | alt <- function(p1, p2) {
 95 |   function(string){
 96 |     result <- p1(string)
 97 |     if(!is.null(result$leftover)) {
 98 |       result
 99 |     } else {
100 |       p2(string)
101 |     }
102 |   }
103 | }
104 | 
105 | #' \code{\%alt\%} is the infix notation for the \code{alt} function. 
106 | #' 
107 | #' @param p1 the first parser
108 | #' @param p2 the second parser 
109 | #' @return Returns the first parser if it suceeds otherwise the second parser
110 | #' @export
111 | #' @examples
112 | #' (item() %alt% succeed("2")) ("abcdef")
113 | `%alt%` <- alt
114 | 
115 | #' \code{then} combinator corresponds to sequencing in BNF. The parser 
116 | #' \code{(then(p1, p2))} recognises anything that \code{p1} and \code{p2} would 
117 | #' if placed in succession.
118 | #' 
119 | #' \code{\%then\%} is the infix operator for the then combinator, and it is the
120 | #' preferred way to use the \code{then} operator.
121 | #' 
122 | #' @param p1 the first parser
123 | #' @param p2 the second parser
124 | #' @return recognises anything that \code{p1} and \code{p2} would if placed in 
125 | #'   succession.
126 | #' @examples
127 | #' (item() %then% succeed("123")) ("abc")
128 | #' @seealso \code{\link{alt}}, \code{\link{thentree}}
129 | then <- function(p1, p2) {
130 |   function(string) {
131 |     result <- p1(string)
132 |     if (length(result) == 0) {
133 |       list()
134 |     } else {
135 |       result_ <- p2(result$leftover)
136 |       if (length(result_$leftover) == 0 ||
137 |           is.null(result_$leftover)) {
138 |         list()
139 |       } else {
140 |         list(result=Unlist(append(list(result$result),
141 |                                   list(result_$result))),
142 |              leftover=result_$leftover)
143 |       }
144 |     }
145 |   }
146 | }
147 | 
148 | #' \code{\%then\%} is the infix operator for the then combinator.
149 | #' 
150 | #' @param p1 the first parser
151 | #' @param p2 the second parser 
152 | #' @return recognises anything that \code{p1} and \code{p2} would if placed in succession.
153 | #' @export
154 | #' @examples
155 | #' (item() %then% succeed("123")) ("abc")
156 | `%then%` <- then
157 | 
158 | #' \code{thentree} keeps the full tree representation of the results of parsing.
159 | #' Otherwise, it is identical to \code{then}.
160 | #' 
161 | #' @param p1 the first parser
162 | #' @param p2 the second parser
163 | #' @return recognises anything that \code{p1} and \code{p2} would if placed in 
164 | #'   succession.
165 | #' @export
166 | #' @examples
167 | #' (item() %thentree% succeed("123")) ("abc")
168 | #' 
169 | #' @seealso \code{\link{alt}}, \code{\link{thentree}}
170 | thentree <- function(p1, p2) {
171 |   function(string) {
172 |     result <- p1(string)
173 |     if (length(result) == 0) {
174 |       list()
175 |     } else {
176 |       result_ <- p2(result$leftover)
177 |       if (length(result_$leftover) == 0 ||
178 |           is.null(result_$leftover)) {
179 |         list()
180 |       } else {
181 |         list(result=list(result$result,
182 |                          result_$result),
183 |              leftover=result_$leftover)
184 |       }
185 |     }
186 |   }
187 | }
188 | 
189 | #' \code{\%thentree\%} is the infix operator for the then combinator, and it is
190 | #' the preferred way to use the \code{thentree} operator.
191 | #' @export
192 | #' @param p1 the first parser
193 | #' @param p2 the second parser
194 | #' @return recognises anything that \code{p1} and \code{p2} would if placed in 
195 | #'   succession.
196 | #' @examples
197 | #' (item() %thentree% succeed("123")) ("abc")
198 | #' @seealso \code{\link{alt}}, \code{\link{thentree}}
199 | `%thentree%` <- thentree
200 | 
201 | #' \code{using} combinator allows us to manipulate results from a parser, for 
202 | #' example building a parse tree. The parser \code{(p \%using\% f)} has the same 
203 | #' behaviour as the parser \code{p}, except that the function \code{f} is
204 | #' applied to each of its result values.
205 | #' 
206 | #' \code{\%using\%} is the infix operator for \code{using}, and it is the
207 | #' preferred way to use the \code{using} operator.
208 | #' 
209 | #' @param p is the parser to be applied
210 | #' @param f is the function to be applied to each result of \code{p}.
211 | #' @return The parser \code{(p \%using\% f)} has the same behaviour as the
212 | #'   parser \code{p}, except that the function \code{f} is applied to each of
213 | #'   its result values.
214 | #' @examples
215 | #' (item() %using% as.numeric) ("1abc")
216 | using <- function(p, f) {
217 |   return(function(string) {
218 |     result <- p (string) 
219 |     if(length(result) == 0) {
220 |       return(list())
221 |     }
222 |     list(result=f(result$result),
223 |          leftover=result$leftover)
224 |   })
225 | }
226 | 
227 | #' \code{\%using\%} is the infix operator for using
228 | #' 
229 | #' @param p is the parser to be applied
230 | #' @param f is the function to be applied to each result of \code{p}.
231 | #' @export
232 | #' @examples
233 | #' (item() %using% as.numeric) ("1abc")
234 | `%using%` <- using
235 | 
236 | #' \code{maybe} matches 0 or 1 of pattern \code{p}.  In EBNF notation, this
237 | #' corresponds to a question mark ('?').
238 | #' 
239 | #' @param p is the parser to be matched 0 or 1 times.
240 | #' @export
241 | #' @examples
242 | #' maybe(Digit())("123abc")
243 | #' maybe(Digit())("abc123")
244 | #' @seealso \code{\link{many}}, \code{\link{some}}
245 | maybe <- function(p) {
246 |   function(string) {
247 |     (p %alt% succeed(NULL))(string)
248 |   }
249 | }
250 | 
251 | #' \code{many} matches 0 or more of pattern \code{p}. In BNF notation, 
252 | #' repetition occurs often enough to merit its own abbreviation. When zero or 
253 | #' more repetitions of a phrase \code{p} are admissible, we simply write 
254 | #' \code{p*}. The \code{many} combinator corresponds directly to this operator, 
255 | #' and is defined in much the same way.
256 | #' 
257 | #' This implementation of \code{many} differs from (Hutton92) due to the nature 
258 | #' of R's data structures. Since R does not support the concept of a list of
259 | #' tuples, we must revert to using a list rather than a vector, since all values
260 | #' in an R vector must be the same datatype.
261 | #' 
262 | #' @param p is the parser to match 0 or more times.
263 | #' @export
264 | #' @examples
265 | #' Digit <- function(...) {satisfy(function(x) {return(grepl("[0-9]", x))})}
266 | #' many(Digit()) ("123abc")
267 | #' many(Digit()) ("abc")
268 | #' @seealso \code{\link{maybe}}, \code{\link{some}}
269 | many <- function(p) {
270 |   function(string) {
271 |     ((p %then% many(p)) %alt% succeed(NULL))(string)
272 |   }
273 | }
274 | 
275 | #' \code{some} matches 1 or more of pattern \code{p}. in BNF notation, repetition occurs often enough to merit its own abbreviation. When zero or 
276 | #' more repetitions of a phrase \code{p} are admissible, we simply write 
277 | #' \code{p+}. The \code{some} combinator corresponds directly to this operator,
278 | #' and is defined in much the same way.
279 | #' 
280 | #' @param p is the parser to match 1 or more times.
281 | #' @export
282 | #' @examples
283 | #' Digit <- function(...) {satisfy(function(x) {return(grepl("[0-9]", x))})}
284 | #' some(Digit()) ("123abc")
285 | #' @seealso \code{\link{maybe}}, \code{\link{many}}
286 | some <- function(p) {
287 |   function(string) {
288 |     (p %then% many(p))(string)
289 |   }
290 | }
291 | 
292 | ## Define the derived primitives ##
293 | 
294 | #' Digit checks for single digit
295 | #' 
296 | #' @param ... additional arguments for the primitives to be parsed
297 | #' @export
298 | #' @examples
299 | #' Digit()("123")
300 | #' @seealso \code{\link{Lower}}, \code{\link{Upper}}, 
301 | #'   \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 
302 | #'   \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 
303 | #'   \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}},
304 | #'   \code{\link{natural}}, \code{\link{symbol}}
305 | Digit <- function(...) {
306 |   satisfy(function(x) {
307 |     grepl("[0-9]", x)
308 |   })
309 | }
310 | 
311 | #' Lower checks for single lower case character
312 | #' 
313 | #' @param ... additional arguments for the primitives to be parsed
314 | #' @export
315 | #' @examples
316 | #' Lower() ("abc")
317 | #' @seealso \code{\link{Digit}}, \code{\link{Upper}}, 
318 | #'   \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 
319 | #'   \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 
320 | #'   \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}},
321 | #'   \code{\link{natural}}, \code{\link{symbol}}
322 | Lower <- function(...) {
323 |   satisfy(function(x) {
324 |     grepl("[a-z]", x)
325 |   })
326 | }
327 | 
328 | #' Upper checks for a single upper case character
329 | #' 
330 | #' @param ... additional arguments for the primitives to be parsed
331 | #' @export
332 | #' @examples
333 | #' Upper()("Abc")
334 | #' @seealso \code{\link{Digit}}, \code{\link{Lower}}, 
335 | #'   \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 
336 | #'   \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 
337 | #'   \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}},
338 | #'   \code{\link{natural}}, \code{\link{symbol}}
339 | Upper <- function(...) {
340 |   satisfy(function(x) {
341 |     grepl("[A-Z]", x)
342 |   })
343 | }
344 | 
345 | #' Alpha checks for single alphabet character
346 | #' 
347 | #' @param ... additional arguments for the primitives to be parsed
348 | #' @export
349 | #' @examples
350 | #' Alpha()("abc")
351 | #' @seealso \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 
352 | #'   \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 
353 | #'   \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 
354 | #'   \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}},
355 | #'   \code{\link{natural}}, \code{\link{symbol}}
356 | Alpha <- function(...) {
357 |   satisfy(function(x) {
358 |     grepl("[A-Za-z]", x)
359 |   })
360 | }
361 | 
362 | #' AlphaNum checks for a single alphanumeric character
363 | #' 
364 | #' @param ... additional arguments for the primitives to be parsed
365 | #' @export
366 | #' @examples
367 | #' AlphaNum()("123")
368 | #' AlphaNum()("abc123")
369 | #' @seealso \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 
370 | #'   \code{\link{Alpha}}, \code{\link{SpaceCheck}}, 
371 | #'   \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 
372 | #'   \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}},
373 | #'   \code{\link{natural}}, \code{\link{symbol}}
374 | AlphaNum <- function(...) {
375 |   satisfy(function(x) {
376 |     grepl("[A-Za-z0-9]", x)
377 |   })
378 | }
379 | 
380 | #' SpaceCheck checks for a single space character
381 | #' 
382 | #' @param ... additional arguments for the primitives to be parsed
383 | #' @export
384 | #' @examples
385 | #' SpaceCheck()(" 123")
386 | #' @seealso \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 
387 | #'   \code{\link{Alpha}}, \code{\link{AlphaNum}},
388 | #'   \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 
389 | #'   \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}},
390 | #'   \code{\link{natural}}, \code{\link{symbol}}
391 | SpaceCheck <- function(...) {
392 |   satisfy(function(x) {
393 |     grepl("\\s", x)
394 |   })
395 | }
396 | 
397 | #' \code{String} is a combinator which allows us to build parsers which
398 | #' recognise strings of symbols, rather than just single symbols
399 | #' 
400 | #' @param string is the string to be matched
401 | #' @export
402 | #' @examples
403 | #' String("123")("123 abc")
404 | #' @seealso \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 
405 | #'   \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 
406 | #'   \code{\link{ident}}, \code{\link{nat}}, 
407 | #'   \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}},
408 | #'   \code{\link{natural}}, \code{\link{symbol}}
409 | String <- function(string) {
410 |   if (string=="") {
411 |     succeed(NULL)
412 |   } else {
413 |     result_ <- substr(string, 1, 1)
414 |     leftover_ <- substring(string, 2)
415 |     (literal(result_) %then% 
416 |       String(leftover_)) %using% 
417 |       function(x) {
418 |         paste(unlist(c(x)), collapse="")
419 |       }
420 |   }
421 | }
422 | 
423 | #' \code{ident} is a parser which matches zero or more alphanumeric
424 | #' characters. 
425 | #' 
426 | #' @export
427 | #' @examples
428 | #' ident() ("variable1 = 123")
429 | #' @seealso \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 
430 | #'   \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 
431 | #'   \code{\link{String}}, \code{\link{nat}}, 
432 | #'   \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}},
433 | #'   \code{\link{natural}}, \code{\link{symbol}}
434 | ident <- function() {
435 |   (many(AlphaNum()) %using%
436 |      function(x) paste0(unlist(c(x)), collapse=""))
437 | }
438 | 
439 | #' \code{nat} is a parser which matches one or more numeric characters.
440 | #' 
441 | #' @export
442 | #' @examples
443 | #' nat() ("123 + 456")
444 | #' @seealso \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 
445 | #'   \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 
446 | #'   \code{\link{String}}, \code{\link{ident}},
447 | #'   \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}},
448 | #'   \code{\link{natural}}, \code{\link{symbol}}
449 | nat <- function() {
450 |   some(Digit()) %using%
451 |     function(x) {
452 |       paste(unlist(c(x)), collapse="")
453 |     }
454 | }
455 | 
456 | #' \code{space} matches zero or more space characters.
457 | #' 
458 | #' @export
459 | #' @examples
460 | #' space() ("  abc")
461 | #' @seealso \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 
462 | #'   \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 
463 | #'   \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 
464 | #'   \code{\link{token}}, \code{\link{identifier}},
465 | #'   \code{\link{natural}}, \code{\link{symbol}}
466 | space <- function() {
467 |   many(SpaceCheck()) %using%
468 |     function(x) {
469 |       ""
470 |     }
471 | }
472 | 
473 | #' \code{token} is a new primitive that ignores any space before and after
474 | #' applying a parser to a token.
475 | #' 
476 | #' @param p is the parser to have spaces stripped.
477 | #' @export
478 | #' @examples
479 | #' token(ident()) ("   variable1   ")
480 | #' @seealso \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 
481 | #'   \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 
482 | #'   \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 
483 | #'   \code{\link{space}}, \code{\link{identifier}},
484 | #'   \code{\link{natural}}, \code{\link{symbol}}
485 | token <- function(p) {
486 |   space() %then%
487 |     p %then%
488 |     space() %using%
489 |     function(x) {
490 |       x <- x[-1]
491 |       x <- x[-length(x)]
492 |       x
493 |     }
494 | }
495 | 
496 | #' \code{identifier} creates an identifier
497 | #' 
498 | #' @param ... takes in token primitives
499 | #' @export
500 | #' @seealso \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 
501 | #'   \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 
502 | #'   \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 
503 | #'   \code{\link{space}}, \code{\link{token}},
504 | #'   \code{\link{natural}}, \code{\link{symbol}}
505 | identifier <- function(...) {
506 |   token(ident())
507 | }
508 | 
509 | #' \code{natural} creates a token parser for natural numbers
510 | #' 
511 | #' @param ... additional arguments for the parser
512 | #' @export
513 | #' @seealso \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 
514 | #'   \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 
515 | #'   \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 
516 | #'   \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}},
517 | #'   \code{\link{symbol}}
518 | natural <- function(...) {
519 |   token(nat())
520 | }
521 | 
522 | #' \code{symbol} creates a token for a symbol
523 | #' 
524 | #' @param xs takes in a string to create a token
525 | #' @export
526 | #' @examples
527 | #' symbol("[") ("  [123]")
528 | #' @seealso \code{\link{Digit}}, \code{\link{Lower}}, \code{\link{Upper}}, 
529 | #'   \code{\link{Alpha}}, \code{\link{AlphaNum}}, \code{\link{SpaceCheck}}, 
530 | #'   \code{\link{String}}, \code{\link{ident}}, \code{\link{nat}}, 
531 | #'   \code{\link{space}}, \code{\link{token}}, \code{\link{identifier}},
532 | #'   \code{\link{natural}}
533 | symbol <- function(xs) {
534 |   token(String(xs))
535 | }
536 | 


--------------------------------------------------------------------------------