├── project ├── build.properties └── plugins.sbt ├── .github ├── dependabot.yml └── workflows │ ├── cla.yml │ ├── release.yml │ └── ci.yml ├── CODE_OF_CONDUCT.md ├── shared └── src │ ├── main │ ├── scala-2.13- │ │ └── scala │ │ │ └── util │ │ │ └── parsing │ │ │ └── input │ │ │ └── ScalaVersionSpecificPagedSeq.scala │ ├── scala │ │ └── scala │ │ │ └── util │ │ │ └── parsing │ │ │ ├── input │ │ │ ├── NoPosition.scala │ │ │ ├── Positional.scala │ │ │ ├── CharArrayReader.scala │ │ │ ├── Reader.scala │ │ │ ├── PagedSeqReader.scala │ │ │ ├── CharSequenceReader.scala │ │ │ ├── StreamReader.scala │ │ │ ├── Position.scala │ │ │ ├── OffsetPosition.scala │ │ │ └── PagedSeq.scala │ │ │ └── combinator │ │ │ ├── syntactical │ │ │ ├── TokenParsers.scala │ │ │ ├── StandardTokenParsers.scala │ │ │ └── StdTokenParsers.scala │ │ │ ├── token │ │ │ ├── StdTokens.scala │ │ │ └── Tokens.scala │ │ │ ├── lexical │ │ │ ├── Lexical.scala │ │ │ ├── Scanners.scala │ │ │ └── StdLexical.scala │ │ │ ├── SubSequence.scala │ │ │ ├── ImplicitConversions.scala │ │ │ ├── JavaTokenParsers.scala │ │ │ ├── RegexParsers.scala │ │ │ ├── PackratParsers.scala │ │ │ └── Parsers.scala │ └── scala-2.13+ │ │ └── scala │ │ └── util │ │ └── parsing │ │ └── input │ │ └── ScalaVersionSpecificPagedSeq.scala │ └── test │ └── scala │ └── scala │ └── util │ └── parsing │ ├── combinator │ ├── t5669.scala │ ├── t7483.scala │ ├── t4138.scala │ ├── t6067.scala │ ├── t1229.scala │ ├── t1100.scala │ ├── t3212.scala │ ├── gh72.scala │ ├── t0700.scala │ ├── gh29.scala │ ├── t8879.scala │ ├── LongestMatchTest.scala │ ├── t5514.scala │ ├── gh45.scala │ ├── t6464.scala │ ├── gh56.scala │ ├── lexical │ │ └── StdLexicalTest.scala │ ├── JavaTokenParsersTest.scala │ ├── gh242.scala │ ├── RegexParsersTest.scala │ └── PackratParsersTest.scala │ └── input │ ├── gh178.scala │ ├── gh64.scala │ └── OffsetPositionTest.scala ├── NOTICE ├── jvm └── src │ └── main │ └── scala │ └── scala │ └── util │ └── parsing │ └── input │ └── PositionCache.scala ├── js └── src │ └── main │ └── scala │ └── scala │ └── util │ └── parsing │ └── input │ └── PositionCache.scala ├── native └── src │ └── main │ └── scala │ └── scala │ └── util │ └── parsing │ └── input │ └── PositionCache.scala ├── .gitignore ├── .mailmap ├── README.md ├── docs └── Getting_Started.md └── LICENSE /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.11.7 2 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | all repositories in these organizations: 2 | 3 | * [scala](https://github.com/scala) 4 | * [scalacenter](https://github.com/scalacenter) 5 | * [lampepfl](https://github.com/lampepfl) 6 | 7 | are covered by the Scala Code of Conduct: https://scala-lang.org/conduct/ 8 | -------------------------------------------------------------------------------- /.github/workflows/cla.yml: -------------------------------------------------------------------------------- 1 | name: "Check Scala CLA" 2 | on: 3 | pull_request: 4 | jobs: 5 | cla-check: 6 | runs-on: ubuntu-latest 7 | steps: 8 | - name: Verify CLA 9 | uses: scala/cla-checker@v1 10 | with: 11 | author: ${{ github.event.pull_request.user.login }} 12 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.scala-lang.modules" % "sbt-scala-module" % "3.4.0") 2 | 3 | addSbtPlugin("org.portable-scala" % "sbt-scalajs-crossproject" % "1.3.2") 4 | addSbtPlugin("org.scala-js" % "sbt-scalajs" % "1.20.1") 5 | 6 | addSbtPlugin("org.portable-scala" % "sbt-scala-native-crossproject" % "1.3.2") 7 | addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.5.9") 8 | -------------------------------------------------------------------------------- /shared/src/main/scala-2.13-/scala/util/parsing/input/ScalaVersionSpecificPagedSeq.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala.util.parsing.input 14 | 15 | private[input] trait ScalaVersionSpecificPagedSeq[T] { 16 | // Nothing for 2.12! 17 | } -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Scala parser combinators 2 | Copyright (c) 2002-2025 EPFL 3 | Copyright (c) 2011-2025 Lightbend, Inc. dba Akka 4 | 5 | Scala includes software developed at 6 | LAMP/EPFL (https://lamp.epfl.ch/) and 7 | Akka (https://akka.io/). 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"). 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | on: 3 | push: 4 | tags: ["*"] 5 | jobs: 6 | publish: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v6 10 | with: 11 | fetch-depth: 0 12 | - uses: actions/setup-java@v5 13 | with: 14 | distribution: temurin 15 | java-version: 8 16 | - uses: sbt/setup-sbt@v1 17 | - run: sbt versionCheck ci-release 18 | env: 19 | PGP_PASSPHRASE: ${{secrets.PGP_PASSPHRASE}} 20 | PGP_SECRET: ${{secrets.PGP_SECRET}} 21 | SONATYPE_PASSWORD: ${{secrets.SONATYPE_PASSWORD}} 22 | SONATYPE_USERNAME: ${{secrets.SONATYPE_USERNAME}} 23 | -------------------------------------------------------------------------------- /shared/src/main/scala/scala/util/parsing/input/NoPosition.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala 14 | package util.parsing.input 15 | 16 | /** Undefined position. 17 | */ 18 | object NoPosition extends Position { 19 | def line = 0 20 | def column = 0 21 | override def toString = "" 22 | override def longString = toString 23 | def lineContents = "" 24 | } 25 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | on: 3 | push: 4 | branches: 5 | - main 6 | pull_request: 7 | jobs: 8 | test: 9 | strategy: 10 | fail-fast: false 11 | matrix: 12 | java: [8, 11, 17, 21, 25] 13 | scala: [2.12.x, 2.13.x, 3.x] 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v6 17 | with: 18 | fetch-depth: 0 19 | - uses: coursier/cache-action@v7 20 | - uses: actions/setup-java@v5 21 | with: 22 | distribution: temurin 23 | java-version: ${{matrix.java}} 24 | - uses: sbt/setup-sbt@v1 25 | - name: Test 26 | run: sbt ++${{matrix.scala}} test doc headerCheck versionPolicyCheck package 27 | -------------------------------------------------------------------------------- /shared/src/main/scala-2.13+/scala/util/parsing/input/ScalaVersionSpecificPagedSeq.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala.util.parsing.input 14 | 15 | private[input] trait ScalaVersionSpecificPagedSeq[T] { self: PagedSeq[T] => 16 | // Members declared in scala.collection.Seq 17 | override def iterableFactory: collection.SeqFactory[collection.IndexedSeq] = collection.IndexedSeq 18 | 19 | } 20 | -------------------------------------------------------------------------------- /shared/src/test/scala/scala/util/parsing/combinator/t5669.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala.util.parsing.combinator 14 | 15 | import scala.util.parsing.input.OffsetPosition 16 | 17 | import org.junit.Test 18 | import org.junit.Assert.assertEquals 19 | 20 | class t5669 { 21 | @Test 22 | def test: Unit = { 23 | val op = new OffsetPosition("foo\rbar", 4) 24 | assertEquals(2, op.line) 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /shared/src/test/scala/scala/util/parsing/input/gh178.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala.util.parsing.input 14 | 15 | import org.junit.Assert.assertEquals 16 | import org.junit.Test 17 | 18 | class gh178 { 19 | 20 | @Test 21 | def test: Unit = { 22 | val len = 100000 23 | val i = Iterator.fill(len)("A") 24 | val pagedSeq = PagedSeq.fromStrings(i) 25 | assertEquals(len, pagedSeq.slice(0).length) // should not fail with StackOverflowError 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /jvm/src/main/scala/scala/util/parsing/input/PositionCache.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala.util.parsing.input 14 | 15 | private[input] trait PositionCache { 16 | private lazy val indexCacheTL = 17 | // not DynamicVariable as that would share the map from parent to child :-( 18 | new ThreadLocal[java.util.Map[CharSequence, Array[Int]]] { 19 | override def initialValue = new java.util.WeakHashMap[CharSequence, Array[Int]] 20 | } 21 | 22 | private[input] def indexCache = indexCacheTL.get 23 | } 24 | -------------------------------------------------------------------------------- /js/src/main/scala/scala/util/parsing/input/PositionCache.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala.util.parsing.input 14 | 15 | import java.util.Collections 16 | 17 | private[input] trait PositionCache { 18 | private[input] lazy val indexCache: java.util.Map[CharSequence,Array[Int]] = 19 | new java.util.AbstractMap[CharSequence, Array[Int]] { 20 | override def entrySet() = Collections.emptySet() 21 | 22 | // the /dev/null of Maps 23 | override def put(ch: CharSequence, a: Array[Int]) = null 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /shared/src/test/scala/scala/util/parsing/combinator/t7483.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala.util.parsing.combinator 14 | 15 | import scala.util.parsing.input.{CharSequenceReader, OffsetPosition} 16 | 17 | import org.junit.Test 18 | import org.junit.Assert.assertEquals 19 | 20 | class t7483 { 21 | val s = "foo\nbar" 22 | val reader = new CharSequenceReader(s, 0) 23 | val p = reader.pos.asInstanceOf[OffsetPosition] 24 | 25 | @Test 26 | def test: Unit = { 27 | assertEquals("foo", p.lineContents) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /native/src/main/scala/scala/util/parsing/input/PositionCache.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala.util.parsing.input 14 | 15 | import java.lang.CharSequence 16 | import java.util.Collections 17 | 18 | private[input] trait PositionCache { 19 | private[input] lazy val indexCache: java.util.Map[CharSequence,Array[Int]] = 20 | new java.util.AbstractMap[CharSequence, Array[Int]] { 21 | override def entrySet() = Collections.emptySet() 22 | 23 | // the /dev/null of Maps 24 | override def put(ch: CharSequence, a: Array[Int]) = null 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /shared/src/test/scala/scala/util/parsing/combinator/t4138.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | import org.junit.Test 14 | import org.junit.Assert.assertEquals 15 | 16 | class T4138 { 17 | object p extends scala.util.parsing.combinator.JavaTokenParsers 18 | 19 | @Test 20 | def test: Unit = { 21 | assertEquals("""[1.45] parsed: "lir 'de\' ' \\ \n / upa \"new\" \t parsing"""", p.parse(p.stringLiteral, """"lir 'de\' ' \\ \n / upa \"new\" \t parsing"""").toString) 22 | assertEquals("""[1.5] parsed: "s """", p.parse(p.stringLiteral, """"s " lkjse"""").toString) 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /shared/src/test/scala/scala/util/parsing/input/gh64.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala.util.parsing.input 14 | 15 | import org.junit.Assert._ 16 | import org.junit.Test 17 | 18 | class gh64 { 19 | 20 | @Test 21 | def test: Unit = { 22 | val len = 4096 * 20000 23 | val i = Iterator.fill(len)(true) // use `true` to make this test more lightweight 24 | val pagedSeq = PagedSeq.fromIterator(i) 25 | pagedSeq.slice(len - 1) // load the whole pagedSeq without caching `latest` element 26 | assertEquals(len, pagedSeq.length) // should not throw StackOverflowError 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /shared/src/test/scala/scala/util/parsing/combinator/t6067.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | import scala.util.parsing.combinator._ 14 | 15 | import org.junit.Test 16 | import org.junit.Assert.assertEquals 17 | 18 | class t6067 extends RegexParsers { 19 | object TestParser extends RegexParsers { 20 | def p: TestParser.ParseResult[TestParser.~[List[String], String]] = parseAll(rep(commit("a")) ~ "b", "aaab") 21 | } 22 | 23 | val expected = """[1.4] error: 'a' expected but 'b' found 24 | 25 | aaab 26 | ^""" 27 | @Test 28 | def test: Unit = { 29 | assertEquals(expected, TestParser.p.toString) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /shared/src/main/scala/scala/util/parsing/input/Positional.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala 14 | package util.parsing.input 15 | 16 | /** A trait for objects that have a source position. 17 | */ 18 | trait Positional { 19 | 20 | /** The source position of this object, initially set to undefined. */ 21 | var pos: Position = NoPosition 22 | 23 | /** If current source position is undefined, update it with given position `newpos` 24 | * @return the object itself 25 | */ 26 | def setPos(newpos: Position): this.type = { 27 | if (pos eq NoPosition) pos = newpos 28 | this 29 | } 30 | } 31 | 32 | 33 | -------------------------------------------------------------------------------- /shared/src/test/scala/scala/util/parsing/combinator/t1229.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | import scala.util.parsing.combinator.RegexParsers 14 | 15 | import org.junit.Test 16 | import org.junit.Assert.assertEquals 17 | 18 | class t1229 extends RegexParsers { 19 | val number = """0|[1-9]\d*""".r ^^ { _.toInt } 20 | 21 | val parser: Parser[Int] = number - "42" 22 | 23 | @Test 24 | def test: Unit = { 25 | assertEquals("[1.3] parsed: 21", parse(phrase(parser), "21").toString) 26 | 27 | val expected = """[1.1] failure: Expected failure 28 | 29 | 42 30 | ^""" 31 | assertEquals(expected, parse(phrase(parser), "42").toString ) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # 2 | # Are you tempted to edit this file? 3 | # 4 | # First consider if the changes make sense for all, 5 | # or if they are specific to your workflow/system. 6 | # If it is the latter, you can augment this list with 7 | # entries in .git/info/excludes 8 | # 9 | # see also test/files/.gitignore 10 | # 11 | 12 | *.jar 13 | *~ 14 | 15 | build.properties 16 | 17 | # target directories for ant build 18 | /build/ 19 | /dists/ 20 | 21 | # other 22 | /out/ 23 | /bin/ 24 | /sandbox/ 25 | 26 | # eclipse, intellij 27 | /.classpath 28 | /.project 29 | /src/intellij/*.iml 30 | /src/intellij/*.ipr 31 | /src/intellij/*.iws 32 | /.cache 33 | /.idea 34 | /.settings 35 | 36 | # vscode, metals 37 | .bloop/ 38 | /.metals/ 39 | /.vscode/ 40 | /project/**/metals.sbt 41 | 42 | # bak files produced by ./cleanup-commit 43 | *.bak 44 | 45 | # Standard symbolic link to build/quick/bin 46 | qbin 47 | 48 | # Mac specific, but that is common enough a dev platform to warrant inclusion. 49 | .DS_Store 50 | 51 | # sbt 52 | target/ 53 | /.bsp/ -------------------------------------------------------------------------------- /shared/src/main/scala/scala/util/parsing/input/CharArrayReader.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala 14 | package util.parsing.input 15 | 16 | /** An object encapsulating basic character constants. 17 | */ 18 | object CharArrayReader { 19 | final val EofCh = '\u001a' 20 | } 21 | 22 | /** A character array reader reads a stream of characters (keeping track of their positions) 23 | * from an array. 24 | * 25 | * @param chars an array of characters 26 | * @param index starting offset into the array; the first element returned will be `source(index)` 27 | */ 28 | class CharArrayReader(chars: Array[Char], index: Int = 0) 29 | extends CharSequenceReader(java.nio.CharBuffer.wrap(chars), index) 30 | -------------------------------------------------------------------------------- /shared/src/main/scala/scala/util/parsing/combinator/syntactical/TokenParsers.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala 14 | package util.parsing 15 | package combinator 16 | package syntactical 17 | 18 | /** This is the core component for token-based parsers. 19 | */ 20 | trait TokenParsers extends Parsers { 21 | /** `Tokens` is the abstract type of the `Token`s consumed by the parsers in this component. */ 22 | type Tokens <: token.Tokens 23 | 24 | /** `lexical` is the component responsible for consuming some basic kind of 25 | * input (usually character-based) and turning it into the tokens 26 | * understood by these parsers. 27 | */ 28 | val lexical: Tokens 29 | 30 | /** The input-type for these parsers*/ 31 | type Elem = lexical.Token 32 | 33 | } 34 | 35 | 36 | -------------------------------------------------------------------------------- /shared/src/test/scala/scala/util/parsing/combinator/t1100.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | import scala.util.parsing.combinator.Parsers 14 | import scala.util.parsing.input.CharSequenceReader 15 | 16 | import org.junit.Test 17 | import org.junit.Assert.assertEquals 18 | 19 | class T1100 { 20 | class TestParsers extends Parsers { 21 | type Elem = Char 22 | 23 | def p: Parser[List[Char]] = rep1(p1) 24 | def p1: Parser[Char] = accept('a') | err("errors are propagated") 25 | } 26 | 27 | val expected = """[1.4] error: errors are propagated 28 | 29 | aaab 30 | ^""" 31 | 32 | @Test 33 | def test(): Unit = { 34 | val tstParsers = new TestParsers 35 | val s = new CharSequenceReader("aaab") 36 | assertEquals(expected, tstParsers.p(s).toString) 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /shared/src/test/scala/scala/util/parsing/combinator/t3212.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala.util.parsing.combinator 14 | 15 | import org.junit.Test 16 | import org.junit.Assert.assertEquals 17 | 18 | class t3212 extends RegexParsers { 19 | 20 | sealed trait BuySell 21 | case object BUY extends BuySell 22 | case object SELL extends BuySell 23 | 24 | def buy_sell: Parser[BuySell] = 25 | "to" ~> "buy" ^^^ BUY | 26 | "to" ~> "sell" ^^^ SELL | 27 | failure("buy or sell expected") 28 | 29 | @Test 30 | def test: Unit = { 31 | val parseResult = parse[BuySell](phrase(buy_sell), "bought") 32 | 33 | val expected = """[1.1] failure: buy or sell expected 34 | 35 | bought 36 | ^""" 37 | assertEquals(expected, parseResult.toString) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /shared/src/main/scala/scala/util/parsing/combinator/token/StdTokens.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala 14 | package util.parsing 15 | package combinator 16 | package token 17 | 18 | /** This component provides the standard `Token`s for a simple, Scala-like language. 19 | */ 20 | trait StdTokens extends Tokens { 21 | /** The class of keyword tokens */ 22 | case class Keyword(chars: String) extends Token { 23 | override def toString = s"'$chars'" 24 | } 25 | 26 | /** The class of numeric literal tokens */ 27 | case class NumericLit(chars: String) extends Token { 28 | override def toString = chars 29 | } 30 | 31 | /** The class of string literal tokens */ 32 | case class StringLit(chars: String) extends Token { 33 | override def toString = s""""$chars"""" 34 | } 35 | 36 | /** The class of identifier tokens */ 37 | case class Identifier(chars: String) extends Token { 38 | override def toString = s"identifier $chars" 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /shared/src/test/scala/scala/util/parsing/combinator/gh72.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | import scala.util.parsing.combinator.Parsers 14 | import scala.util.parsing.input.CharSequenceReader 15 | 16 | import org.junit.Test 17 | import org.junit.Assert.assertEquals 18 | 19 | class gh72 { 20 | class TestParsers extends Parsers { 21 | type Elem = Char 22 | val left: Parser[String] = 'a' ~ 'b' ~ 'c' ^^^ "left" withFailureMessage "failure on left" 23 | val right: Parser[String] = 'a' ~ 'b' ~ 'c' ^^^ "right" withFailureMessage "failure on right" 24 | def p: Parser[String] = left ||| right 25 | } 26 | 27 | @Test 28 | def test(): Unit = { 29 | val tstParsers = new TestParsers 30 | val s = new CharSequenceReader("abc") 31 | assertEquals("[1.4] parsed: left", tstParsers.p(s).toString) 32 | 33 | val t = new CharSequenceReader("def") 34 | val expectedFailure = """[1.1] failure: failure on left 35 | 36 | def 37 | ^""" 38 | assertEquals(expectedFailure, tstParsers.p(t).toString) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /shared/src/test/scala/scala/util/parsing/combinator/t0700.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | import java.io.StringReader 14 | 15 | import scala.util.parsing.combinator.Parsers 16 | import scala.util.parsing.input.{CharArrayReader, StreamReader} 17 | 18 | import org.junit.Test 19 | import org.junit.Assert.assertEquals 20 | 21 | class T0700 { 22 | class TestParsers extends Parsers { 23 | type Elem = Char 24 | 25 | def p: Parser[List[Int]] = rep(p1 | p2) 26 | def p1: Parser[Int] = 'a' ~ nl ~ 'b' ~ nl ^^^ 1 27 | def p2: Parser[Int] = 'a' ~ nl ^^^ 2 28 | def nl: Parser[Int] = rep(accept('\n') | accept('\r')) ^^^ 0 29 | } 30 | 31 | @Test 32 | def test: Unit = { 33 | val tstParsers = new TestParsers 34 | val s = "a\na\na" 35 | val r1 = new CharArrayReader(s.toCharArray()) 36 | val r2 = StreamReader(new StringReader(s)) 37 | assertEquals("[3.2] parsed: List(2, 2, 2)", tstParsers.p(r1).toString) 38 | assertEquals("[3.2] parsed: List(2, 2, 2)", tstParsers.p(r2).toString) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /shared/src/test/scala/scala/util/parsing/combinator/gh29.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala.util.parsing.combinator 14 | 15 | import org.junit.Test 16 | import org.junit.Assert.assertEquals 17 | 18 | class gh29 { 19 | object Foo extends JavaTokenParsers { 20 | def word(x: String) = s"\\b$x\\b".r 21 | 22 | lazy val expr = aSentence | something 23 | 24 | lazy val aSentence = noun ~ verb ~ obj 25 | 26 | lazy val noun = word("noun") 27 | lazy val verb = word("verb") | err("not a verb!") 28 | lazy val obj = word("object") 29 | 30 | lazy val something = word("FOO") 31 | } 32 | 33 | val expected = 34 | """[1.6] error: not a verb! 35 | 36 | noun vedsfasdf 37 | ^""".stripMargin 38 | 39 | @Test 40 | def test(): Unit = { 41 | val f = Foo.parseAll(Foo.expr, "noun verb object") 42 | 43 | assertEquals("[1.17] parsed: ((noun~verb)~object)", f.toString) 44 | 45 | val g = Foo.parseAll(Foo.expr, "noun vedsfasdf") 46 | assertEquals(expected, g.toString) 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /shared/src/main/scala/scala/util/parsing/combinator/syntactical/StandardTokenParsers.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala 14 | package util.parsing 15 | package combinator 16 | package syntactical 17 | 18 | import token._ 19 | import lexical.StdLexical 20 | import scala.language.implicitConversions 21 | 22 | /** This component provides primitive parsers for the standard tokens defined in `StdTokens`. 23 | */ 24 | class StandardTokenParsers extends StdTokenParsers { 25 | type Tokens = StdTokens 26 | val lexical: StdLexical = new StdLexical() // type annotation added for dotty 27 | 28 | //an implicit keyword function that gives a warning when a given word is not in the reserved/delimiters list 29 | override implicit def keyword(chars : String): Parser[String] = 30 | if(lexical.reserved.contains(chars) || lexical.delimiters.contains(chars)) super.keyword(chars) 31 | else failure("You are trying to parse \""+chars+"\", but it is neither contained in the delimiters list, nor in the reserved keyword list of your lexical object") 32 | 33 | } 34 | -------------------------------------------------------------------------------- /shared/src/test/scala/scala/util/parsing/combinator/t8879.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | import scala.util.parsing.input._ 14 | 15 | import org.junit.Test 16 | import org.junit.Assert.fail 17 | 18 | class t8879 { 19 | 20 | @Test 21 | def test: Unit = { 22 | val testPagedSeq = { 23 | var nbpage = 0 24 | def more(data: Array[Char], start: Int, len: Int): Int = { 25 | if (nbpage < 1) { 26 | var i = 0 27 | while (i < len && nbpage < 3) { 28 | if (i % 100 != 0) { 29 | data(start + i) = 'a' 30 | } else { 31 | data(start + i) = '\n' 32 | } 33 | i += 1 34 | } 35 | if (i == 0) -1 else { 36 | nbpage += 1 37 | i 38 | } 39 | } else { 40 | fail("Should not read more than 1 page!") 41 | 0 42 | } 43 | } 44 | 45 | new PagedSeq(more(_: Array[Char], _: Int, _: Int)) 46 | } 47 | 48 | val s = new StreamReader(testPagedSeq, 0, 1) 49 | 50 | // should not trigger reading of the second page 51 | s.drop(20) 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /shared/src/main/scala/scala/util/parsing/combinator/lexical/Lexical.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala 14 | package util.parsing 15 | package combinator 16 | package lexical 17 | 18 | import token._ 19 | import input.CharArrayReader.EofCh 20 | 21 | /** This component complements the `Scanners` component with 22 | * common operations for lexical parsers. 23 | * 24 | * Refer to [[scala.util.parsing.combinator.lexical.StdLexical]] 25 | * for a concrete implementation for a simple, Scala-like language. 26 | */ 27 | abstract class Lexical extends Scanners with Tokens { 28 | 29 | /** A character-parser that matches a letter (and returns it).*/ 30 | def letter = elem("letter", _.isLetter) 31 | 32 | /** A character-parser that matches a digit (and returns it).*/ 33 | def digit = elem("digit", _.isDigit) 34 | 35 | /** A character-parser that matches any character except the ones given in `cs` (and returns it).*/ 36 | def chrExcept(cs: Char*) = elem("", ch => !cs.contains(ch)) 37 | 38 | /** A character-parser that matches a white-space character (and returns it).*/ 39 | def whitespaceChar = elem("space char", ch => ch <= ' ' && ch != EofCh) 40 | } 41 | -------------------------------------------------------------------------------- /shared/src/main/scala/scala/util/parsing/combinator/SubSequence.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala 14 | package util.parsing.combinator 15 | 16 | // A shallow wrapper over another CharSequence (usually a String) 17 | // 18 | // See SI-7710: in jdk7u6 String.subSequence stopped sharing the char array of the original 19 | // string and began copying it. 20 | // RegexParsers calls subSequence twice per input character: that's a lot of array copying! 21 | private[combinator] class SubSequence(s: CharSequence, start: Int, val length: Int) extends CharSequence { 22 | def this(s: CharSequence, start: Int) = this(s, start, s.length - start) 23 | 24 | def charAt(i: Int) = 25 | if (i >= 0 && i < length) s.charAt(start + i) else throw new IndexOutOfBoundsException(s"index: $i, length: $length") 26 | 27 | def subSequence(_start: Int, _end: Int) = { 28 | if (_start < 0 || _end < 0 || _end > length || _start > _end) 29 | throw new IndexOutOfBoundsException(s"start: ${_start}, end: ${_end}, length: $length") 30 | 31 | new SubSequence(s, start + _start, _end - _start) 32 | } 33 | 34 | override def toString = s.subSequence(start, start + length).toString 35 | } 36 | -------------------------------------------------------------------------------- /shared/src/main/scala/scala/util/parsing/combinator/token/Tokens.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala 14 | package util.parsing 15 | package combinator 16 | package token 17 | 18 | /** This component provides the notion of `Token`, the unit of information that is passed from lexical 19 | * parsers in the `Lexical` component to the parsers in the `TokenParsers` component. 20 | */ 21 | trait Tokens { 22 | /** Objects of this type are produced by a lexical parser or ``scanner``, and consumed by a parser. 23 | * 24 | * @see [[scala.util.parsing.combinator.syntactical.TokenParsers]] 25 | */ 26 | abstract class Token { 27 | def chars: String 28 | } 29 | 30 | /** A class of error tokens. Error tokens are used to communicate 31 | * errors detected during lexical analysis 32 | */ 33 | case class ErrorToken(msg: String) extends Token { 34 | def chars = s"*** error: $msg" 35 | } 36 | 37 | /** A class for end-of-file tokens */ 38 | case object EOF extends Token { 39 | def chars = "" 40 | } 41 | 42 | /** This token is produced by a scanner `Scanner` when scanning failed. */ 43 | def errorToken(msg: String): Token = ErrorToken(msg) 44 | } 45 | -------------------------------------------------------------------------------- /shared/src/test/scala/scala/util/parsing/combinator/LongestMatchTest.scala: -------------------------------------------------------------------------------- 1 | package scala.util.parsing.combinator 2 | 3 | import java.io.StringReader 4 | 5 | import scala.util.parsing.input.StreamReader 6 | 7 | import org.junit.Test 8 | import org.junit.Assert.{ assertEquals, fail } 9 | 10 | class LongestMatchTest { 11 | class TestParsers extends Parsers { 12 | type Elem = Char 13 | 14 | def ab: Parser[String] = 'a' ~ 'b' ^^^ "ab" 15 | def a: Parser[String] = 'a' ^^^ "a" 16 | def ab_alt: Parser[String] = 'a' ~ 'b' ^^^ "alt" 17 | } 18 | 19 | @Test 20 | def longestMatchFirst: Unit = { 21 | val tParsers = new TestParsers 22 | val reader = StreamReader(new StringReader("ab")) 23 | val p = tParsers.ab ||| tParsers.a 24 | p(reader) match { 25 | case tParsers.Success(result, _) => assertEquals("ab", result) 26 | case _ => fail() 27 | } 28 | } 29 | 30 | @Test 31 | def longestMatchSecond: Unit = { 32 | val tParsers = new TestParsers 33 | val reader = StreamReader(new StringReader("ab")) 34 | val p = tParsers.a ||| tParsers.ab 35 | p(reader) match { 36 | case tParsers.Success(result, _) => assertEquals("ab", result) 37 | case _ => fail() 38 | } 39 | } 40 | 41 | @Test 42 | def tieGoesToFirst: Unit = { 43 | val tParsers = new TestParsers 44 | val reader = StreamReader(new StringReader("ab")) 45 | val p = tParsers.ab ||| tParsers.ab_alt 46 | p(reader) match { 47 | case tParsers.Success(result, _) => assertEquals("ab", result) 48 | case _ => fail() 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /shared/src/test/scala/scala/util/parsing/combinator/t5514.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | import scala.util.parsing.combinator.Parsers 14 | import scala.util.parsing.input.Reader 15 | import scala.util.parsing.input.Position 16 | 17 | import org.junit.Test 18 | import org.junit.Assert.assertEquals 19 | 20 | class T5514 extends Parsers { 21 | var readerCount = 0 22 | class DemoReader(n: Int) extends Reader[String] { 23 | def atEnd = n == 0 24 | def first = if (n >= 0) "s" + n else throw new IllegalArgumentException("No more input.") 25 | def rest = new DemoReader(n - 1) 26 | def pos = new Position { 27 | def line = 0 28 | def column = 0 29 | def lineContents = first 30 | } 31 | readerCount += 1 32 | } 33 | 34 | 35 | type Elem = String 36 | def startsWith(prefix: String) = acceptIf(_ startsWith prefix)("Error: " + _) 37 | 38 | @Test 39 | def test: Unit = { 40 | val resrep = startsWith("s").*(new DemoReader(10)) 41 | assertEquals("[0.0] parsed: List(s10, s9, s8, s7, s6, s5, s4, s3, s2, s1)", resrep.toString) 42 | assertEquals(11, readerCount) 43 | 44 | readerCount = 0 45 | val resrep5 = repN(5, startsWith("s"))(new DemoReader(10)) 46 | assertEquals("[0.0] parsed: List(s10, s9, s8, s7, s6)", resrep5.toString) 47 | assertEquals(6, readerCount) 48 | } 49 | } 50 | 51 | 52 | -------------------------------------------------------------------------------- /shared/src/test/scala/scala/util/parsing/combinator/gh45.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala.util.parsing.combinator 14 | 15 | import scala.util.parsing.input._ 16 | 17 | import org.junit.Test 18 | import org.junit.Assert.assertTrue 19 | 20 | import scala.util.parsing.combinator.syntactical.StandardTokenParsers 21 | 22 | class gh45 { 23 | 24 | @Test 25 | def test4: Unit = { 26 | def check(rd: Reader[Char]): Unit = { 27 | val g = new grammar 28 | val p = g.phrase(g.script) 29 | val parseResult = p(new g.lexical.Scanner(rd)) 30 | assertTrue(parseResult.isInstanceOf[g.Success[_]]) 31 | } 32 | 33 | val str = "x once y" 34 | check(new CharSequenceReader(str)) 35 | /* Note that this only tests PagedSeq.rest since neither 36 | * PackratReader nor lexical.Scanner override/use the drop method. 37 | */ 38 | check(new PagedSeqReader(PagedSeq.fromStrings(List(str)))) 39 | } 40 | 41 | } 42 | 43 | private final class grammar extends StandardTokenParsers with PackratParsers { 44 | lexical.reserved ++= List("x", "y", "z", "once") 45 | 46 | var onceCnt: Int = 0 47 | lazy val once: PackratParser[String] = memo("once") ^? { 48 | case s if onceCnt == 0 => 49 | onceCnt += 1 50 | s 51 | } 52 | 53 | lazy val script: PackratParser[Any] = 54 | ( "x" ~ once ~ "z" 55 | | "x" ~ once ~ "y" 56 | ) 57 | } 58 | -------------------------------------------------------------------------------- /shared/src/test/scala/scala/util/parsing/combinator/t6464.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | import scala.util.parsing.input.CharSequenceReader 14 | import scala.util.parsing.combinator.RegexParsers 15 | 16 | import org.junit.Test 17 | import org.junit.Assert.assertEquals 18 | 19 | class t6464 { 20 | object SspParser extends RegexParsers { 21 | val ok: Parser[Any] = 22 | ("<%" ~! rep(' ') ~ "\\w+".r ~ rep(' ') ~ "%>" 23 | | "<%" ~! err("should not fail here, because of ~!")) 24 | 25 | val buggy: Parser[Any] = 26 | ("<%" ~! rep(' ') ~> "\\w+".r <~ rep(' ') ~ "%>" 27 | | "<%" ~! err("should not fail here, because of ~!")) 28 | 29 | } 30 | 31 | @Test 32 | def test: Unit = { 33 | assertEquals( 34 | "[1.9] parsed: ((((<%~List( ))~hi)~List( ))~%>)", 35 | SspParser.phrase(SspParser.ok)(new CharSequenceReader("<% hi %>")).toString) 36 | 37 | val expected = """[1.7] error: string matching regex '\w+' expected but '%' found 38 | 39 | <% %> 40 | ^""" 41 | 42 | assertEquals( 43 | expected, 44 | SspParser.phrase(SspParser.ok)(new CharSequenceReader("<% %>")).toString) 45 | 46 | assertEquals( 47 | "[1.9] parsed: hi", 48 | SspParser.phrase(SspParser.buggy)(new CharSequenceReader("<% hi %>")).toString) 49 | 50 | assertEquals( 51 | expected, 52 | SspParser.phrase(SspParser.buggy)(new CharSequenceReader("<% %>")).toString) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /shared/src/main/scala/scala/util/parsing/combinator/syntactical/StdTokenParsers.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala 14 | package util.parsing 15 | package combinator 16 | package syntactical 17 | 18 | import token._ 19 | import scala.collection.mutable 20 | import scala.language.implicitConversions 21 | 22 | /** This component provides primitive parsers for the standard tokens defined in `StdTokens`. 23 | */ 24 | trait StdTokenParsers extends TokenParsers { 25 | type Tokens <: StdTokens 26 | import lexical.{Keyword, NumericLit, StringLit, Identifier} 27 | 28 | protected val keywordCache = mutable.HashMap[String, Parser[String]]() 29 | 30 | /** A parser which matches a single keyword token. 31 | * 32 | * @param chars The character string making up the matched keyword. 33 | * @return a `Parser` that matches the given string 34 | */ 35 | implicit def keyword(chars: String): Parser[String] = 36 | keywordCache.getOrElseUpdate(chars, accept(Keyword(chars)) ^^ (_.chars)) 37 | 38 | /** A parser which matches a numeric literal */ 39 | def numericLit: Parser[String] = 40 | elem("number", _.isInstanceOf[NumericLit]) ^^ (_.chars) 41 | 42 | /** A parser which matches a string literal */ 43 | def stringLit: Parser[String] = 44 | elem("string literal", _.isInstanceOf[StringLit]) ^^ (_.chars) 45 | 46 | /** A parser which matches an identifier */ 47 | def ident: Parser[String] = 48 | elem("identifier", _.isInstanceOf[Identifier]) ^^ (_.chars) 49 | } 50 | -------------------------------------------------------------------------------- /shared/src/main/scala/scala/util/parsing/input/Reader.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala 14 | package util.parsing.input 15 | 16 | 17 | /** An interface for streams of values that have positions. 18 | */ 19 | abstract class Reader[+T] { 20 | 21 | /** If this is a reader over character sequences, the underlying char sequence. 22 | * If not, throws a `NoSuchMethodError` exception. 23 | * 24 | * @throws [[java.lang.NoSuchMethodError]] if this not a char sequence reader. 25 | */ 26 | def source: java.lang.CharSequence = 27 | throw new NoSuchMethodError("not a char sequence reader") 28 | 29 | def offset: Int = 30 | throw new NoSuchMethodError("not a char sequence reader") 31 | 32 | /** Returns the first element of the reader 33 | */ 34 | def first: T 35 | 36 | /** Returns an abstract reader consisting of all elements except the first 37 | * 38 | * @return If `atEnd` is `true`, the result will be `this`; 39 | * otherwise, it's a `Reader` containing more elements. 40 | */ 41 | def rest: Reader[T] 42 | 43 | /** Returns an abstract reader consisting of all elements except the first `n` elements. 44 | */ 45 | def drop(n: Int): Reader[T] = { 46 | var r: Reader[T] = this 47 | var cnt = n 48 | while (cnt > 0) { 49 | r = r.rest; cnt -= 1 50 | } 51 | r 52 | } 53 | 54 | /** The position of the first element in the reader. 55 | */ 56 | def pos: Position 57 | 58 | /** `true` iff there are no more elements in this reader. 59 | */ 60 | def atEnd: Boolean 61 | } 62 | -------------------------------------------------------------------------------- /shared/src/test/scala/scala/util/parsing/combinator/gh56.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala.util.parsing.combinator 14 | 15 | import scala.language.postfixOps 16 | import scala.util.parsing.combinator.syntactical.StandardTokenParsers 17 | 18 | import org.junit.Assert.{assertEquals, assertTrue} 19 | import org.junit.Test 20 | 21 | /** 22 | * Test for issue 56: https://github.com/scala/scala-parser-combinators/issues/56 23 | * 24 | * Makes sure that lineContents (and thus longString) in the Position trait doesn't 25 | * include a newline 26 | */ 27 | class gh56 { 28 | private object grammar extends StandardTokenParsers with PackratParsers { 29 | lazy val term = (numericLit | stringLit | ident)+ 30 | } 31 | 32 | @Test 33 | def test1: Unit = { 34 | import grammar._ 35 | 36 | val expr = 37 | """/* an unclosed comment 38 | |of multiple lines 39 | |just to check longString/lineContents 40 | |""".stripMargin 41 | 42 | val fail = 43 | """[4.1] failure: identifier expected 44 | | 45 | | 46 | |^""".stripMargin 47 | 48 | val parseResult = phrase(term)(new lexical.Scanner(expr)) 49 | assertTrue(parseResult.isInstanceOf[Failure]) 50 | assertEquals(fail, parseResult.toString) 51 | } 52 | 53 | 54 | @Test 55 | def test2: Unit = { 56 | import grammar._ 57 | 58 | val expr = "/* an unclosed comment without newline" 59 | 60 | val fail = 61 | """[1.39] failure: identifier expected 62 | | 63 | |/* an unclosed comment without newline 64 | | ^""".stripMargin 65 | 66 | val parseResult = phrase(term)(new lexical.Scanner(expr)) 67 | assertTrue(parseResult.isInstanceOf[Failure]) 68 | assertEquals(fail, parseResult.toString) 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /shared/src/main/scala/scala/util/parsing/combinator/ImplicitConversions.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala 14 | package util.parsing.combinator 15 | 16 | import scala.language.implicitConversions 17 | 18 | /** This object contains implicit conversions that come in handy when using the `^^` combinator. 19 | * 20 | * Refer to [[scala.util.parsing.combinator.Parsers]] to construct an AST from the concrete syntax. 21 | * 22 | * The reason for this is that the sequential composition combinator (`~`) combines its constituents 23 | * into a ~. When several `~`s are combined, this results in nested `~`s (to the left). 24 | * The `flatten*` coercions makes it easy to apply an `n`-argument function to a nested `~` of 25 | * depth `n-1` 26 | * 27 | * The `headOptionTailToFunList` converts a function that takes a `List[A]` to a function that 28 | * accepts a `~[A, Option[List[A]]]` (this happens when parsing something of the following 29 | * shape: `p ~ opt("." ~ repsep(p, "."))` -- where `p` is a parser that yields an `A`). 30 | */ 31 | trait ImplicitConversions { self: Parsers => 32 | implicit def flatten2[A, B, C] (f: (A, B) => C): A ~ B => C = 33 | (p: ~[A, B]) => p match {case a ~ b => f(a, b)} 34 | implicit def flatten3[A, B, C, D] (f: (A, B, C) => D): A ~ B ~ C => D = 35 | (p: ~[~[A, B], C]) => p match {case a ~ b ~ c => f(a, b, c)} 36 | implicit def flatten4[A, B, C, D, E] (f: (A, B, C, D) => E): A ~ B ~ C ~ D => E = 37 | (p: ~[~[~[A, B], C], D]) => p match {case a ~ b ~ c ~ d => f(a, b, c, d)} 38 | implicit def flatten5[A, B, C, D, E, F](f: (A, B, C, D, E) => F): A ~ B ~ C ~ D ~ E => F = 39 | (p: ~[~[~[~[A, B], C], D], E]) => p match {case a ~ b ~ c ~ d ~ e=> f(a, b, c, d, e)} 40 | implicit def headOptionTailToFunList[A, T] (f: List[A] => T): A ~ Option[List[A]] => T = 41 | (p: ~[A, Option[List[A]]]) => f(p._1 :: (p._2 match { case Some(xs) => xs case None => Nil})) 42 | } 43 | -------------------------------------------------------------------------------- /shared/src/main/scala/scala/util/parsing/input/PagedSeqReader.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala 14 | package util.parsing.input 15 | 16 | /** An object encapsulating basic character constants. 17 | */ 18 | object PagedSeqReader { 19 | final val EofCh = '\u001a' 20 | } 21 | 22 | /** A character array reader reads a stream of characters (keeping track of their positions) 23 | * from an array. 24 | * 25 | * @param seq the source sequence 26 | * @param offset starting offset. 27 | */ 28 | class PagedSeqReader(seq: PagedSeq[Char], 29 | override val offset: Int) extends Reader[Char] { outer => 30 | import PagedSeqReader._ 31 | 32 | override val source: java.lang.CharSequence = new SeqCharSequence(seq) 33 | 34 | /** Construct a `PagedSeqReader` with its first element at 35 | * `source(0)` and position `(1,1)`. 36 | */ 37 | def this(seq: PagedSeq[Char]) = this(seq, 0) 38 | 39 | /** Returns the first element of the reader, or EofCh if reader is at its end 40 | */ 41 | def first = 42 | if (seq.isDefinedAt(offset)) seq(offset) else EofCh 43 | 44 | /** Returns a PagedSeqReader consisting of all elements except the first 45 | * 46 | * @return If `atEnd` is `true`, the result will be `this`; 47 | * otherwise, it's a `PagedSeqReader` containing the rest of input. 48 | */ 49 | def rest: PagedSeqReader = 50 | if (seq.isDefinedAt(offset)) new PagedSeqReader(seq, offset + 1) { 51 | override val source: java.lang.CharSequence = outer.source 52 | } 53 | else this 54 | 55 | /** The position of the first element in the reader. 56 | */ 57 | def pos: Position = new OffsetPosition(source, offset) 58 | 59 | /** true iff there are no more elements in this reader (except for trailing 60 | * EofCh's). 61 | */ 62 | def atEnd = !seq.isDefinedAt(offset) 63 | 64 | /** Returns an abstract reader consisting of all elements except the first 65 | * `n` elements. 66 | */ 67 | override def drop(n: Int): PagedSeqReader = 68 | new PagedSeqReader(seq, offset + n) { 69 | override val source: java.lang.CharSequence = outer.source 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /shared/src/main/scala/scala/util/parsing/combinator/lexical/Scanners.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala 14 | package util.parsing 15 | package combinator 16 | package lexical 17 | 18 | import input._ 19 | 20 | /** This component provides core functionality for lexical parsers. 21 | * 22 | * See its subclasses [[scala.util.parsing.combinator.lexical.Lexical]] and -- most interestingly 23 | * [[scala.util.parsing.combinator.lexical.StdLexical]], for more functionality. 24 | */ 25 | trait Scanners extends Parsers { 26 | type Elem = Char 27 | type Token 28 | 29 | /** This token is produced by a scanner `Scanner` when scanning failed. */ 30 | def errorToken(msg: String): Token 31 | 32 | /** A parser that produces a token (from a stream of characters). */ 33 | def token: Parser[Token] 34 | 35 | /** A parser for white-space -- its result will be discarded. */ 36 | def whitespace: Parser[Any] 37 | 38 | /** `Scanner` is essentially¹ a parser that produces `Token`s 39 | * from a stream of characters. The tokens it produces are typically 40 | * passed to parsers in `TokenParsers`. 41 | * 42 | * @note ¹ `Scanner` is really a `Reader` of `Token`s 43 | */ 44 | class Scanner(in: Reader[Char]) extends Reader[Token] { 45 | /** Convenience constructor (makes a character reader out of the given string) */ 46 | def this(in: String) = this(new CharArrayReader(in.toCharArray)) 47 | private val (tok, rest1, rest2) = whitespace(in) match { 48 | case Success(_, in1) => 49 | token(in1) match { 50 | case Success(tok, in2) => (tok, in1, in2) 51 | case ns: NoSuccess => (errorToken(ns.msg), ns.next, skip(ns.next)) 52 | } 53 | case ns: NoSuccess => (errorToken(ns.msg), ns.next, skip(ns.next)) 54 | } 55 | private def skip(in: Reader[Char]) = if (in.atEnd) in else in.rest 56 | 57 | override def source: java.lang.CharSequence = in.source 58 | override def offset: Int = in.offset 59 | def first = tok 60 | def rest: Scanner = new Scanner(rest2) 61 | def pos = rest1.pos 62 | def atEnd = in.atEnd || (whitespace(in) match { case Success(_, in1) => in1.atEnd case _ => false }) 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /shared/src/test/scala/scala/util/parsing/input/OffsetPositionTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala.util.parsing.input 14 | 15 | import org.junit.Test 16 | import org.junit.Assert.assertEquals 17 | 18 | class OffsetPositionTest { 19 | @Test 20 | def lineContentsWithTrailingLF: Unit = { 21 | val op = new OffsetPosition("\n", 1) 22 | assertEquals("", op.lineContents) 23 | } 24 | 25 | @Test 26 | def lineContentsWithTrailingCR: Unit = { 27 | val op = new OffsetPosition("\r", 1) 28 | assertEquals("", op.lineContents) 29 | } 30 | 31 | @Test 32 | def lineContentsWithTrailingCRLF: Unit = { 33 | val op = new OffsetPosition("\r\n", 2) 34 | assertEquals("", op.lineContents) 35 | } 36 | 37 | @Test 38 | def lineContentsWithEmptySource: Unit = { 39 | val op = new OffsetPosition("", 0) 40 | assertEquals("", op.lineContents) 41 | } 42 | 43 | @Test 44 | def linesWithLF: Unit = { 45 | val op = new OffsetPosition("foo\nbar", 4) 46 | assertEquals(2, op.line) 47 | } 48 | 49 | @Test 50 | def linesWithCR: Unit = { 51 | val op = new OffsetPosition("foo\rbar", 4) 52 | assertEquals(2, op.line) 53 | } 54 | 55 | @Test 56 | def linesWithCRLF: Unit = { 57 | val op = new OffsetPosition("foo\r\nbar", 5) 58 | assertEquals(2, op.line) 59 | } 60 | 61 | @Test 62 | def linesWithTrailingLFs: Unit = { 63 | val op = new OffsetPosition("foo\n\n", 5) 64 | assertEquals(3, op.line) 65 | } 66 | 67 | @Test 68 | def linesWithTrailingCRs: Unit = { 69 | val op = new OffsetPosition("foo\r\r", 5) 70 | assertEquals(3, op.line) 71 | } 72 | 73 | @Test 74 | def linesWithTrailingCRLFs: Unit = { 75 | val op = new OffsetPosition("foo\r\n\r\n", 7) 76 | assertEquals(3, op.line) 77 | } 78 | 79 | @Test 80 | def linesWithLeadingLF: Unit = { 81 | val op = new OffsetPosition("\n", 1) 82 | assertEquals(2, op.line) 83 | } 84 | 85 | @Test 86 | def linesWithLeadingCR: Unit = { 87 | val op = new OffsetPosition("\r", 1) 88 | assertEquals(2, op.line) 89 | } 90 | 91 | @Test 92 | def linesWithLeadingCRLF: Unit = { 93 | val op = new OffsetPosition("\r\n", 2) 94 | assertEquals(2, op.line) 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /shared/src/main/scala/scala/util/parsing/input/CharSequenceReader.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala 14 | package util.parsing.input 15 | 16 | /** An object encapsulating basic character constants. 17 | */ 18 | object CharSequenceReader { 19 | final val EofCh = '\u001a' 20 | } 21 | 22 | /** A character array reader reads a stream of characters (keeping track of their positions) 23 | * from an array. 24 | * 25 | * @param source the source sequence 26 | * @param offset starting offset. 27 | */ 28 | class CharSequenceReader(override val source: java.lang.CharSequence, 29 | override val offset: Int) extends Reader[Char] { 30 | import CharSequenceReader._ 31 | 32 | /** Construct a `CharSequenceReader` with its first element at 33 | * `source(0)` and position `(1,1)`. 34 | */ 35 | def this(source: java.lang.CharSequence) = this(source, 0) 36 | 37 | /** Returns the first element of the reader, or EofCh if reader is at its end. 38 | */ 39 | def first = 40 | if (offset < source.length) source.charAt(offset) else EofCh 41 | 42 | /** Returns a CharSequenceReader consisting of all elements except the first. 43 | * 44 | * @return If `atEnd` is `true`, the result will be `this`; 45 | * otherwise, it's a `CharSequenceReader` containing the rest of input. 46 | */ 47 | def rest: CharSequenceReader = 48 | if (offset < source.length) new CharSequenceReader(source, offset + 1) 49 | else this 50 | 51 | /** The position of the first element in the reader. 52 | */ 53 | def pos: Position = new OffsetPosition(source, offset) 54 | 55 | /** true iff there are no more elements in this reader (except for trailing 56 | * EofCh's) 57 | */ 58 | def atEnd = offset >= source.length 59 | 60 | /** Returns an abstract reader consisting of all elements except the first 61 | * `n` elements. 62 | */ 63 | override def drop(n: Int): CharSequenceReader = 64 | new CharSequenceReader(source, offset + n) 65 | 66 | /** Returns a String in the form `CharSequenceReader(first, ...)`, 67 | * or `CharSequenceReader()` if this is `atEnd`. 68 | */ 69 | override def toString: String = { 70 | val c = if (atEnd) "" else s"'$first', ..." 71 | s"CharSequenceReader($c)" 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /shared/src/main/scala/scala/util/parsing/input/StreamReader.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala 14 | package util.parsing.input 15 | 16 | /** An object to create a `StreamReader` from a `java.io.Reader`. 17 | */ 18 | object StreamReader { 19 | final val EofCh = '\u001a' 20 | 21 | /** Create a `StreamReader` from a `java.io.Reader`. 22 | * 23 | * @param in the `java.io.Reader` that provides the underlying 24 | * stream of characters for this Reader. 25 | */ 26 | def apply(in: java.io.Reader): StreamReader = { 27 | new StreamReader(PagedSeq.fromReader(in), 0, 1) 28 | } 29 | } 30 | 31 | /** A StreamReader reads from a character sequence, typically created as a PagedSeq 32 | * from a java.io.Reader 33 | * 34 | * NOTE: 35 | * StreamReaders do not really fulfill the new contract for readers, which 36 | * requires a `source` CharSequence representing the full input. 37 | * Instead source is treated line by line. 38 | * As a consequence, regex matching cannot extend beyond a single line 39 | * when a StreamReader are used for input. 40 | * 41 | * If you need to match regexes spanning several lines you should consider 42 | * class `PagedSeqReader` instead. 43 | */ 44 | sealed class StreamReader private (seq: PagedSeq[Char], off: Int, lnum: Int, nextEol0: Int) extends PagedSeqReader(seq, off) { 45 | def this(seq: PagedSeq[Char], off: Int, lnum: Int) = this(seq, off, lnum, -1) 46 | 47 | import StreamReader.EofCh 48 | 49 | override def rest: StreamReader = 50 | if (!seq.isDefinedAt(off)) this 51 | else if (seq(off) == '\n') 52 | new StreamReader(seq.slice(off + 1), 0, lnum + 1, -1) 53 | else new StreamReader(seq, off + 1, lnum, nextEol0) 54 | 55 | private def nextEol = if (nextEol0 == -1) { 56 | var i = off 57 | while (seq.isDefinedAt(i) && seq(i) != '\n' && seq(i) != EofCh) i += 1 58 | i 59 | } else nextEol0 60 | 61 | override def drop(n: Int): StreamReader = { 62 | val eolPos = nextEol 63 | if (eolPos < off + n && seq.isDefinedAt(eolPos)) 64 | new StreamReader(seq.slice(eolPos + 1), 0, lnum + 1, -1).drop(off + n - (eolPos + 1)) 65 | else 66 | new StreamReader(seq, off + n, lnum, eolPos) 67 | } 68 | 69 | override def pos: Position = new Position { 70 | def line = lnum 71 | def column = off + 1 72 | def lineContents = seq.slice(0, nextEol).toString 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /shared/src/main/scala/scala/util/parsing/combinator/JavaTokenParsers.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala 14 | package util.parsing.combinator 15 | 16 | /** `JavaTokenParsers` differs from [[scala.util.parsing.combinator.RegexParsers]] 17 | * by adding the following definitions: 18 | * 19 | * - `ident` 20 | * - `wholeNumber` 21 | * - `decimalNumber` 22 | * - `stringLiteral` 23 | * - `floatingPointNumber` 24 | */ 25 | trait JavaTokenParsers extends RegexParsers { 26 | /** Anything that is a valid Java identifier, according to 27 | * The Java Language Spec. 28 | * Generally, this means a letter, followed by zero or more letters or numbers. 29 | */ 30 | def ident: Parser[String] = 31 | "" ~> // handle whitespace 32 | rep1(acceptIf(Character.isJavaIdentifierStart)("identifier expected but '" + _ + "' found"), 33 | elem("identifier part", Character.isJavaIdentifierPart(_: Char))) ^^ (_.mkString) 34 | 35 | /** An integer, without sign or with a negative sign. */ 36 | def wholeNumber: Parser[String] = 37 | """-?\d+""".r 38 | 39 | /** Number following one of these rules: 40 | * 41 | * - An integer. For example: `13` 42 | * - An integer followed by a decimal point. For example: `3.` 43 | * - An integer followed by a decimal point and fractional part. For example: `3.14` 44 | * - A decimal point followed by a fractional part. For example: `.1` 45 | */ 46 | def decimalNumber: Parser[String] = 47 | """(\d+(\.\d*)?|\d*\.\d+)""".r 48 | 49 | /** Double quotes (`"`) enclosing a sequence of: 50 | * 51 | * - Any character except double quotes, control characters or backslash (`\`) 52 | * - A backslash followed by another backslash, a single or double quote, or one 53 | * of the letters `b`, `f`, `n`, `r` or `t` 54 | * - `\` followed by `u` followed by four hexadecimal digits 55 | */ 56 | def stringLiteral: Parser[String] = 57 | ("\""+"""([^"\x00-\x1F\x7F\\]|\\[\\'"bfnrt]|\\u[a-fA-F0-9]{4})*"""+"\"").r 58 | 59 | /** A number following the rules of `decimalNumber`, with the following 60 | * optional additions: 61 | * 62 | * - Preceded by a negative sign 63 | * - Followed by `e` or `E` and an optionally signed integer 64 | * - Followed by `f`, `f`, `d` or `D` (after the above rule, if both are used) 65 | */ 66 | def floatingPointNumber: Parser[String] = 67 | """-?(\d+(\.\d*)?|\d*\.\d+)([eE][+-]?\d+)?[fFdD]?""".r 68 | } 69 | -------------------------------------------------------------------------------- /shared/src/main/scala/scala/util/parsing/input/Position.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala 14 | package util.parsing.input 15 | 16 | /** `Position` is the base trait for objects describing a position in a `document`. 17 | * 18 | * It provides functionality for: 19 | * - generating a visual representation of this position (`longString`); 20 | * - comparing two positions (`<`). 21 | * 22 | * To use this class for a concrete kind of `document`, implement the `lineContents` method. 23 | */ 24 | trait Position { 25 | 26 | /** The line number referred to by the position; line numbers start at 1. */ 27 | def line: Int 28 | 29 | /** The column number referred to by the position; column numbers start at 1. */ 30 | def column: Int 31 | 32 | /** The contents of the line at this position. (must not contain a new-line character). 33 | */ 34 | protected def lineContents: String 35 | 36 | /** Returns a string representation of the `Position`, of the form `line.column`. */ 37 | override def toString = s"$line.$column" 38 | 39 | /** Returns a more ``visual`` representation of this position. 40 | * More precisely, the resulting string consists of two lines: 41 | * 1. the line in the document referred to by this position 42 | * 2. a caret indicating the column 43 | * 44 | * Example: 45 | * {{{ 46 | * List(this, is, a, line, from, the, document) 47 | * ^ 48 | * }}} 49 | */ 50 | def longString = lineContents+"\n"+lineContents.take(column-1).map{x => if (x == '\t') x else ' ' } + "^" 51 | 52 | /** Compare this position to another, by first comparing their line numbers, 53 | * and then -- if necessary -- using the columns to break a tie. 54 | * 55 | * @param `that` a `Position` to compare to this `Position` 56 | * @return true if this position's line number or (in case of equal line numbers) 57 | * column is smaller than the corresponding components of `that` 58 | */ 59 | def <(that: Position) = { 60 | this.line < that.line || 61 | this.line == that.line && this.column < that.column 62 | } 63 | 64 | /** Compare this position to another, checking for equality. 65 | * 66 | * @param `that` a `Position` to compare to this `Position` 67 | * @return true if the line numbers and column numbers are equal. 68 | */ 69 | override def equals(other: Any) = { 70 | other match { 71 | case that: Position => this.line == that.line && this.column == that.column 72 | case _ => false 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /.mailmap: -------------------------------------------------------------------------------- 1 | Adriaan Moors 2 | Adriaan Moors 3 | Adriaan Moors 4 | Aleksandar Prokopec 5 | Aleksandar Prokopec 6 | Aleksandar Prokopec 7 | Aleksandar Prokopec 8 | Aleksandar Prokopec 9 | Aleksandar Prokopec 10 | Aleksandar Prokopec 11 | Alex Cruise 12 | Alex Cruise 13 | Antonio Cunei 14 | Antonio Cunei 15 | Buraq Emir 16 | Caoyuan Deng 17 | Chris Hodapp 18 | Chris James 19 | Christopher Vogt 20 | Christopher Vogt 21 | Christopher Vogt 22 | Damien Obristi 23 | Daniel C. Sobral 24 | Daniel C. Sobral 25 | Daniel Lorch 26 | Erik Stenman 27 | Eugene Burmako 28 | Eugene Burmako 29 | Eugene Vigdorchik 30 | Geoff Reedy 31 | Ilya Sergei 32 | Ingo Maier 33 | Ingo Maier 34 | Josh Suereth 35 | Josh Suereth 36 | Julien Eberle 37 | Kenji Yoshida <6b656e6a69@gmail.com> 38 | Luc Bourlier 39 | Luc Bourlier 40 | Luc Bourlier 41 | Martin Odersky 42 | Martin Odersky 43 | Michael Pradel 44 | Michel Schinz 45 | Miguel Garcia 46 | Miguel Garcia 47 | Mirco Dotta 48 | Mirco Dotta 49 | Moez A. Abdel-Gawad 50 | Mohsen Lesani 51 | Nada Amin 52 | Nada Amin 53 | Nada Amin 54 | Natallie Baikevich 55 | Nikolay Mihaylov 56 | Paolo Giarrusso 57 | Pavel Pavlov 58 | Philipp Haller 59 | Philipp Haller 60 | Philippe Altherr 61 | Philippus Baalman 62 | Raphaël Noir 63 | Roland Kuhn 64 | Rüdiger Klaehn 65 | Scala Steward 66 | Sebastian Hack 67 | Simon Ochsenreither 68 | Stepan Koltsov 69 | Stéphane Micheloud 70 | Unknown Committer 71 | Unknown Committer 72 | Unknown Committer 73 | Viktor Klang 74 | Vincent Cremet 75 | Vojin Jovanovic 76 | Vojin Jovanovic 77 | -------------------------------------------------------------------------------- /shared/src/main/scala/scala/util/parsing/input/OffsetPosition.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala 14 | package util.parsing.input 15 | 16 | import scala.collection.mutable.ArrayBuffer 17 | 18 | /** `OffsetPosition` is a standard class for positions 19 | * represented as offsets into a source ``document``. 20 | * 21 | * @param source The source document 22 | * @param offset The offset indicating the position 23 | */ 24 | case class OffsetPosition(source: CharSequence, offset: Int) extends Position { 25 | 26 | /** An index that contains all line starts, including first line, and eof. */ 27 | private lazy val index: Array[Int] = { 28 | Option(OffsetPosition.indexCache.get(source)) match { 29 | case Some(index) => index 30 | case None => 31 | val index = genIndex 32 | OffsetPosition.indexCache.put(source, index) 33 | index 34 | } 35 | } 36 | 37 | private def genIndex: Array[Int] = { 38 | val lineStarts = new ArrayBuffer[Int] 39 | lineStarts += 0 40 | for (i <- 0 until source.length) 41 | if (source.charAt(i) == '\n' || 42 | (source.charAt(i) == '\r' && (i == (source.length - 1) || source.charAt(i + 1) != '\n'))) { 43 | lineStarts += (i + 1) 44 | } 45 | lineStarts += source.length 46 | lineStarts.toArray 47 | } 48 | 49 | /** The line number referred to by the position; line numbers start at 1. */ 50 | def line: Int = { 51 | var lo = 0 52 | var hi = index.length - 1 53 | while (lo + 1 < hi) { 54 | val mid = lo + ((hi - lo) / 2) 55 | if (offset < index(mid)) hi = mid 56 | else lo = mid 57 | } 58 | lo + 1 59 | } 60 | 61 | /** The column number referred to by the position; column numbers start at 1. */ 62 | def column: Int = offset - index(line - 1) + 1 63 | 64 | /** The contents of the line numbered at the current offset. 65 | * 66 | * @return the line at `offset` (not including a newline) 67 | */ 68 | def lineContents: String = { 69 | val lineStart = index(line - 1) 70 | val lineEnd = index(line) 71 | val endIndex = 72 | if (lineStart < lineEnd - 1 && source.charAt(lineEnd - 2) == '\r' && source.charAt(lineEnd - 1) == '\n') { 73 | lineEnd - 2 74 | } else if (lineStart < lineEnd && (source.charAt(lineEnd - 1) == '\r' || source.charAt(lineEnd - 1) == '\n')) { 75 | lineEnd - 1 76 | } else { 77 | lineEnd 78 | } 79 | source.subSequence(lineStart, endIndex).toString 80 | } 81 | 82 | /** Returns a string representation of the `Position`, of the form `line.column`. */ 83 | override def toString = s"$line.$column" 84 | 85 | /** Compare this position to another, by first comparing their line numbers, 86 | * and then -- if necessary -- using the columns to break a tie. 87 | * 88 | * @param that a `Position` to compare to this `Position` 89 | * @return true if this position's line number or (in case of equal line numbers) 90 | * column is smaller than the corresponding components of `that` 91 | */ 92 | override def <(that: Position) = that match { 93 | case OffsetPosition(_, that_offset) => 94 | this.offset < that_offset 95 | case _ => 96 | this.line < that.line || 97 | this.line == that.line && this.column < that.column 98 | } 99 | } 100 | 101 | /** An object holding the index cache. 102 | */ 103 | object OffsetPosition extends scala.runtime.AbstractFunction2[CharSequence,Int,OffsetPosition] with PositionCache 104 | -------------------------------------------------------------------------------- /shared/src/main/scala/scala/util/parsing/combinator/lexical/StdLexical.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala 14 | package util.parsing 15 | package combinator 16 | package lexical 17 | 18 | import token._ 19 | import input.CharArrayReader.EofCh 20 | import scala.annotation.nowarn 21 | import scala.collection.mutable 22 | 23 | /** This component provides a standard lexical parser for a simple, 24 | * [[http://scala-lang.org Scala]]-like language. It parses keywords and 25 | * identifiers, numeric literals (integers), strings, and delimiters. 26 | * 27 | * To distinguish between identifiers and keywords, it uses a set of 28 | * reserved identifiers: every string contained in `reserved` is returned 29 | * as a keyword token. (Note that `=>` is hard-coded as a keyword.) 30 | * Additionally, the kinds of delimiters can be specified by the 31 | * `delimiters` set. 32 | * 33 | * Usually this component is used to break character-based input into 34 | * bigger tokens, which are then passed to a token-parser (see 35 | * [[scala.util.parsing.combinator.syntactical.TokenParsers]].) 36 | */ 37 | class StdLexical extends Lexical with StdTokens { 38 | // see `token` in `Scanners` 39 | def token: Parser[Token] = 40 | ( identChar ~ rep( identChar | digit ) ^^ { case first ~ rest => processIdent(first :: rest mkString "") } 41 | | digit ~ rep( digit ) ^^ { case first ~ rest => NumericLit(first :: rest mkString "") } 42 | | '\'' ~> rep( chrExcept('\'', '\n') ) >> { chars => stringEnd('\'', chars) } 43 | | '\"' ~> rep( chrExcept('\"', '\n') ) >> { chars => stringEnd('\"', chars) } 44 | | EofCh ^^^ EOF 45 | | delim 46 | | failure("illegal character") 47 | ) 48 | 49 | /** Returns the legal identifier chars, except digits. */ 50 | def identChar = letter | elem('_') 51 | 52 | /** Parses the final quote of a string literal or fails if it is unterminated. */ 53 | private def stringEnd(quoteChar: Char, chars: List[Char]): Parser[Token] = { 54 | { elem(quoteChar) ^^^ StringLit(chars mkString "") } | err("unclosed string literal") 55 | } 56 | 57 | // see `whitespace in `Scanners` 58 | @nowarn("cat=lint-infer-any") 59 | def whitespace: Parser[Any] = rep[Any]( 60 | whitespaceChar 61 | | '/' ~ '*' ~ comment 62 | | '/' ~ '/' ~ rep( chrExcept(EofCh, '\n') ) 63 | | '/' ~ '*' ~ rep( elem("", _ => true) ) ~> err("unclosed comment") 64 | ) 65 | 66 | protected def comment: Parser[Any] = ( 67 | rep (chrExcept (EofCh, '*')) ~ '*' ~ '/' ^^ { _ => ' ' } 68 | | rep (chrExcept (EofCh, '*')) ~ '*' ~ comment ^^ { _ => ' ' } 69 | ) 70 | 71 | /** The set of reserved identifiers: these will be returned as `Keyword`s. */ 72 | val reserved = new mutable.HashSet[String] 73 | 74 | /** The set of delimiters (ordering does not matter). */ 75 | val delimiters = new mutable.HashSet[String] 76 | 77 | protected def processIdent(name: String) = 78 | if (reserved contains name) Keyword(name) else Identifier(name) 79 | 80 | private lazy val _delim: Parser[Token] = { 81 | // construct parser for delimiters by |'ing together the parsers for the individual delimiters, 82 | // starting with the longest one -- otherwise a delimiter D will never be matched if there is 83 | // another delimiter that is a prefix of D 84 | def parseDelim(s: String): Parser[Token] = accept(s.toList) ^^ { _ => Keyword(s) } 85 | 86 | val d = new Array[String](delimiters.size) 87 | delimiters.copyToArray(d, 0) 88 | scala.util.Sorting.quickSort(d) 89 | (d.toList map parseDelim).foldRight(failure("no matching delimiter"): Parser[Token])((x, y) => y | x) 90 | } 91 | protected def delim: Parser[Token] = _delim 92 | } 93 | -------------------------------------------------------------------------------- /shared/src/test/scala/scala/util/parsing/combinator/lexical/StdLexicalTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala.util.parsing.combinator.lexical 14 | 15 | import org.junit.Test 16 | import org.junit.Assert.assertEquals 17 | 18 | import scala.util.parsing.input.Reader 19 | 20 | import scala.collection.mutable.ListBuffer 21 | 22 | class StdLexicalTest { 23 | private def lex[Lexer <: StdLexical](lexer: Lexer, input: String): List[lexer.Token] = { 24 | var scanner: Reader[lexer.Token] = new lexer.Scanner(input) 25 | val listBuffer = ListBuffer[lexer.Token]() 26 | while (!scanner.atEnd) { 27 | listBuffer += scanner.first 28 | scanner = scanner.rest 29 | } 30 | listBuffer.toList 31 | } 32 | 33 | @Test 34 | def parseKeyword: Unit = { 35 | object Lexer extends StdLexical 36 | Lexer.reserved add "keyword" 37 | import Lexer._ 38 | assertEquals( 39 | List(Keyword("keyword"), Identifier("id")), 40 | lex(Lexer, "keyword id") 41 | ) 42 | } 43 | 44 | @Test 45 | def parseDelimiters: Unit = { 46 | object Lexer extends StdLexical 47 | Lexer.delimiters ++= List("(", ")", "=>") 48 | import Lexer._ 49 | assertEquals( 50 | List(Keyword("("), Identifier("id1"), Keyword(")"), Keyword("=>"), Identifier("id2")), 51 | lex(Lexer, "(id1) => id2") 52 | ) 53 | } 54 | 55 | @Test 56 | def parseNumericLiterals: Unit = { 57 | object Lexer extends StdLexical 58 | import Lexer._ 59 | assertEquals( 60 | List(NumericLit("1"), NumericLit("21"), NumericLit("321")), 61 | lex(Lexer, " 1 21 321 ") 62 | ) 63 | } 64 | 65 | @Test 66 | def parseStringLiterals: Unit = { 67 | object Lexer extends StdLexical 68 | import Lexer._ 69 | assertEquals( 70 | List(StringLit("double double"), StringLit("single single"), StringLit("double'double"), StringLit("single\"single")), 71 | lex(Lexer, """ 72 | "double double" 73 | 'single single' 74 | "double'double" 75 | 'single"single' 76 | """) 77 | ) 78 | } 79 | 80 | @Test 81 | def parseUnclosedStringLiterals: Unit = { 82 | object Lexer extends StdLexical 83 | import Lexer._ 84 | 85 | // Unclosed double quoted string at end of input. 86 | assertEquals( 87 | List(Identifier("id"), ErrorToken("unclosed string literal")), 88 | lex(Lexer, """id """") 89 | ) 90 | 91 | // Unclosed single quoted string at end of input. 92 | assertEquals( 93 | List(Identifier("id"), ErrorToken("unclosed string literal")), 94 | lex(Lexer, "id '") 95 | ) 96 | 97 | // Unclosed double quoted string _not_ at end of input. 98 | assertEquals( 99 | List(Identifier("id"), ErrorToken("unclosed string literal")), 100 | lex(Lexer, """id "string""") 101 | ) 102 | 103 | // Unclosed single quoted string _not_ at end of input. 104 | assertEquals( 105 | List(Identifier("id"), ErrorToken("unclosed string literal")), 106 | lex(Lexer, "id 'string") 107 | ) 108 | } 109 | 110 | @Test 111 | def parseIllegalCharacter: Unit = { 112 | object Lexer extends StdLexical 113 | import Lexer._ 114 | assertEquals( 115 | List(Identifier("we"), ErrorToken("illegal character"), Identifier("scala")), 116 | lex(Lexer, "we\u2665scala") 117 | ) 118 | } 119 | 120 | @Test 121 | def parseComments: Unit = { 122 | object Lexer extends StdLexical 123 | import Lexer._ 124 | 125 | // Single-line comments. 126 | assertEquals( 127 | List(Identifier("id")), 128 | lex(Lexer, "//\n// comment\nid // ") 129 | ) 130 | 131 | // Multi-line comments. 132 | assertEquals( 133 | List(Identifier("id1"), Identifier("id2")), 134 | lex(Lexer, "/* single */ id1 /* multi \n line */ id2") 135 | ) 136 | } 137 | 138 | @Test 139 | def parseUnclosedComments: Unit = { 140 | object Lexer extends StdLexical 141 | import Lexer._ 142 | 143 | assertEquals( 144 | List(Identifier("id"), ErrorToken("unclosed comment")), 145 | lex(Lexer, "id /*") 146 | ) 147 | 148 | assertEquals( 149 | List(Identifier("id"), ErrorToken("unclosed comment")), 150 | lex(Lexer, "id /* ") 151 | ) 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /shared/src/test/scala/scala/util/parsing/combinator/JavaTokenParsersTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala.util.parsing.combinator 14 | 15 | import scala.util.parsing.input.CharArrayReader 16 | 17 | import org.junit.Test 18 | import org.junit.Assert.assertEquals 19 | 20 | class JavaTokenParsersTest { 21 | 22 | @Test 23 | def parseDecimalNumber: Unit = { 24 | object TestJavaTokenParsers extends JavaTokenParsers 25 | import TestJavaTokenParsers._ 26 | assertEquals("1.1", decimalNumber(new CharArrayReader("1.1".toCharArray)).get) 27 | assertEquals("1.", decimalNumber(new CharArrayReader("1.".toCharArray)).get) 28 | assertEquals(".1", decimalNumber(new CharArrayReader(".1".toCharArray)).get) 29 | // should fail to parse and we should get Failure as ParseResult 30 | val failure = decimalNumber(new CharArrayReader("!1".toCharArray)).asInstanceOf[Failure] 31 | assertEquals("""string matching regex '(\d+(\.\d*)?|\d*\.\d+)' expected but '!' found""", failure.msg) 32 | } 33 | 34 | @Test 35 | def parseJavaIdent: Unit = { 36 | object javaTokenParser extends JavaTokenParsers 37 | import javaTokenParser._ 38 | def parseSuccess(s: String): Unit = { 39 | val parseResult = parseAll(ident, s) 40 | parseResult match { 41 | case Success(r, _) => assertEquals(s, r) 42 | case _ => sys.error(parseResult.toString) 43 | } 44 | } 45 | def parseFailure(s: String, errorColPos: Int): Unit = { 46 | val parseResult = parseAll(ident, s) 47 | parseResult match { 48 | case Failure(_, next) => 49 | val pos = next.pos 50 | assertEquals(1, pos.line) 51 | assertEquals(errorColPos, pos.column) 52 | case _ => sys.error(parseResult.toString) 53 | } 54 | } 55 | parseSuccess("simple") 56 | parseSuccess("with123") 57 | parseSuccess("with$") 58 | parseSuccess("with\u00f8\u00df\u00f6\u00e8\u00e6") 59 | parseSuccess("with_") 60 | parseSuccess("_with") 61 | 62 | parseFailure("", 1) 63 | parseFailure("3start", 1) 64 | parseFailure("-start", 1) 65 | parseFailure("with-s", 5) 66 | // we♥scala 67 | parseFailure("we\u2665scala", 3) 68 | parseFailure("with space", 5) 69 | } 70 | 71 | @Test 72 | def repeatedlyParsesTest: Unit = { 73 | object TestTokenParser extends JavaTokenParsers 74 | import TestTokenParser._ 75 | val p = ident ~ "(?i)AND".r.* 76 | 77 | val parseResult = parseAll(p, "start") 78 | parseResult match { 79 | case Success(r, _) => 80 | assertEquals("start", r._1) 81 | assertEquals(0, r._2.size) 82 | case _ => sys.error(parseResult.toString) 83 | } 84 | 85 | val parseResult1 = parseAll(p, "start start") 86 | parseResult1 match { 87 | case Failure(message, next) => 88 | assertEquals(next.pos.line, 1) 89 | assertEquals(next.pos.column, 7) 90 | assert(message.endsWith("string matching regex '(?i)AND' expected but 's' found")) 91 | case _ => sys.error(parseResult1.toString) 92 | } 93 | 94 | val parseResult2 = parseAll(p, "start AND AND") 95 | parseResult2 match { 96 | case Success(r, _) => 97 | assertEquals("start", r._1) 98 | assertEquals("AND AND", r._2.mkString(" ")) 99 | case _ => sys.error(parseResult2.toString) 100 | } 101 | } 102 | 103 | @Test 104 | def optionParserTest: Unit = { 105 | object TestTokenParser extends JavaTokenParsers 106 | import TestTokenParser._ 107 | val p = opt(ident) 108 | 109 | val parseResult = parseAll(p, "-start") 110 | parseResult match { 111 | case Failure(message, next) => 112 | assertEquals(next.pos.line, 1) 113 | assertEquals(next.pos.column, 1) 114 | assert(message.endsWith(s"identifier expected but '-' found")) 115 | case _ => sys.error(parseResult.toString) 116 | } 117 | 118 | val parseResult2 = parseAll(p, "start ") 119 | parseResult2 match { 120 | case Success(r, _) => 121 | assertEquals(r, Some("start")) 122 | case _ => 123 | sys.error(parseResult2.toString) 124 | } 125 | 126 | val parseResult3 = parseAll(p, "start") 127 | parseResult3 match { 128 | case Success(r, _) => 129 | assertEquals(r, Some("start")) 130 | case _ => sys.error(parseResult3.toString) 131 | } 132 | } 133 | 134 | 135 | } 136 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # scala-parser-combinators 2 | 3 | [![build](https://github.com/scala/scala-parser-combinators/workflows/test/badge.svg)](https://github.com/scala/scala-parser-combinators/actions/workflows/ci.yml?query=branch%3Amain) 4 | [](http://search.maven.org/#search%7Cga%7C1%7Cg%3Aorg.scala-lang.modules%20a%3Ascala-parser-combinators_2.12) 5 | [](http://search.maven.org/#search%7Cga%7C1%7Cg%3Aorg.scala-lang.modules%20a%3Ascala-parser-combinators_2.13) 6 | [](http://search.maven.org/#search%7Cga%7C1%7Cg%3Aorg.scala-lang.modules%20a%3Ascala-parser-combinators_3) 7 | 8 | This was originally part of the Scala standard library, but is now community-maintained, under the guidance of the Scala team at Akka (formerly Lightbend). If you are interested in joining the maintainers team, please contact [@Philippus](https://github.com/philippus) or [@SethTisue](https://github.com/SethTisue). 9 | 10 | ## Choosing a parsing library 11 | 12 | This library's main strengths are: 13 | 14 | * Stability. It's been around and in wide use for more than a decade. 15 | * The codebase is modest in size and its internals are fairly simple. 16 | * It's plain vanilla Scala. No macros, code generation, or other magic is involved. 17 | * Multiple versions of Scala (2.12, 2.13, 3) are supported on all back ends (JVM, JS, Native). 18 | 19 | Its main weaknesses are: 20 | 21 | * Performance. If you are ingesting large amounts of data, you may want something faster. 22 | * Minimal feature set. 23 | * Inflexible, unstructured error reporting. 24 | 25 | A number of other parsing libraries for Scala are available -- [see list on Scaladex](https://index.scala-lang.org/awesome/parsing?sort=stars). 26 | 27 | ## Documentation 28 | 29 | * [Current API](https://javadoc.io/page/org.scala-lang.modules/scala-parser-combinators_2.13/latest/scala/util/parsing/combinator/index.html) 30 | * The [Getting Started](docs/Getting_Started.md) guide 31 | * A more complicated example, [Building a lexer and parser with Scala's Parser Combinators](https://enear.github.io/2016/03/31/parser-combinators/) 32 | * "Combinator Parsing", chapter 33 of [_Programming in Scala, Third Edition_](http://www.artima.com/shop/programming_in_scala), shows how to apply this library to e.g. parsing of arithmetic expressions. The second half of the chapter examines how the library is implemented. 33 | 34 | ## Adding an sbt dependency 35 | 36 | To depend on scala-parser-combinators in sbt, add something like this to your build.sbt: 37 | 38 | ``` 39 | libraryDependencies += "org.scala-lang.modules" %% "scala-parser-combinators" % 40 | ``` 41 | 42 | To support multiple Scala versions, see the example in [scala/scala-module-dependency-sample](https://github.com/scala/scala-module-dependency-sample). 43 | 44 | ### Scala.js and Scala Native 45 | 46 | Scala-parser-combinators is also available for Scala.js and Scala Native: 47 | 48 | ``` 49 | libraryDependencies += "org.scala-lang.modules" %%% "scala-parser-combinators" % 50 | ``` 51 | 52 | ## Example 53 | 54 | ```scala 55 | import scala.util.parsing.combinator._ 56 | 57 | case class WordFreq(word: String, count: Int) { 58 | override def toString = s"Word <$word> occurs with frequency $count" 59 | } 60 | 61 | class SimpleParser extends RegexParsers { 62 | def word: Parser[String] = """[a-z]+""".r ^^ { _.toString } 63 | def number: Parser[Int] = """(0|[1-9]\d*)""".r ^^ { _.toInt } 64 | def freq: Parser[WordFreq] = word ~ number ^^ { case wd ~ fr => WordFreq(wd,fr) } 65 | } 66 | 67 | object TestSimpleParser extends SimpleParser { 68 | def main(args: Array[String]) = { 69 | parse(freq, "johnny 121") match { 70 | case Success(matched,_) => println(matched) 71 | case Failure(msg,_) => println(s"FAILURE: $msg") 72 | case Error(msg,_) => println(s"ERROR: $msg") 73 | } 74 | } 75 | } 76 | ``` 77 | 78 | For a detailed unpacking of this example see 79 | [Getting Started](docs/Getting_Started.md). 80 | 81 | ## Contributing 82 | 83 | * See the [Scala Developer Guidelines](https://github.com/scala/scala/blob/2.13.x/CONTRIBUTING.md) for general contributing guidelines 84 | * Have a look at [existing issues](https://github.com/scala/scala-parser-combinators/issues) 85 | * Ask questions and discuss [in GitHub Discussions](https://github.com/scala/scala-parser-combinators/discussions) 86 | * Feel free to open draft pull requests with partially completed changes, to get feedback. 87 | -------------------------------------------------------------------------------- /shared/src/test/scala/scala/util/parsing/combinator/gh242.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | import org.junit.Assert.assertEquals 14 | import org.junit.Test 15 | 16 | import scala.util.parsing.combinator.Parsers 17 | import scala.util.parsing.input.CharSequenceReader 18 | 19 | class gh242 { 20 | class TestWithSeparator extends Parsers { 21 | type Elem = Char 22 | val csv: Parser[List[Char]] = repNM(5, 10, 'a', ',') 23 | } 24 | 25 | class TestWithoutSeparator extends Parsers { 26 | type Elem = Char 27 | val csv: Parser[List[Char]] = repNM(5, 10, 'a') 28 | } 29 | 30 | @Test 31 | def testEmpty(): Unit = { 32 | val tstParsers = new TestWithSeparator 33 | val s = new CharSequenceReader("") 34 | val expectedFailure = """[1.1] failure: end of input 35 | | 36 | | 37 | |^""".stripMargin 38 | assertEquals(expectedFailure, tstParsers.csv(s).toString) 39 | } 40 | 41 | @Test 42 | def testBelowMinimum(): Unit = { 43 | val tstParsers = new TestWithSeparator 44 | val s = new CharSequenceReader("a,a,a,a") 45 | val expectedFailure = """[1.8] failure: end of input 46 | | 47 | |a,a,a,a 48 | | ^""".stripMargin 49 | assertEquals(expectedFailure, tstParsers.csv(s).toString) 50 | } 51 | 52 | @Test 53 | def testMinimum(): Unit = { 54 | val tstParsers = new TestWithSeparator 55 | val s = new CharSequenceReader("a,a,a,a,a") 56 | val expected = List.fill[Char](5)('a') 57 | val actual = tstParsers.csv(s) 58 | assertEquals(9, actual.next.offset) 59 | assert(actual.successful) 60 | assertEquals(expected, actual.get) 61 | } 62 | 63 | @Test 64 | def testInRange(): Unit = { 65 | val tstParsers = new TestWithSeparator 66 | val s = new CharSequenceReader("a,a,a,a,a,a,a,a") 67 | val expected = List.fill[Char](8)('a') 68 | val actual = tstParsers.csv(s) 69 | assertEquals(15, actual.next.offset) 70 | assert(actual.successful) 71 | assertEquals(expected, actual.get) 72 | } 73 | 74 | @Test 75 | def testMaximum(): Unit = { 76 | val tstParsers = new TestWithSeparator 77 | val s = new CharSequenceReader("a,a,a,a,a,a,a,a,a,a") 78 | val expected = List.fill[Char](10)('a') 79 | val actual = tstParsers.csv(s) 80 | assertEquals(19, actual.next.offset) 81 | assert(actual.successful) 82 | assertEquals(expected, actual.get) 83 | } 84 | 85 | @Test 86 | def testAboveMaximum(): Unit = { 87 | val tstParsers = new TestWithSeparator 88 | val s = new CharSequenceReader("a,a,a,a,a,a,a,a,a,a,a,a") 89 | val expected = List.fill[Char](10)('a') 90 | val actual = tstParsers.csv(s) 91 | assertEquals(19, actual.next.offset) 92 | assert(actual.successful) 93 | assertEquals(expected, actual.get) 94 | } 95 | 96 | @Test 97 | def testEmptyWithoutSep(): Unit = { 98 | val tstParsers = new TestWithoutSeparator 99 | val s = new CharSequenceReader("") 100 | val expectedFailure = """[1.1] failure: end of input 101 | | 102 | | 103 | |^""".stripMargin 104 | assertEquals(expectedFailure, tstParsers.csv(s).toString) 105 | } 106 | 107 | @Test 108 | def testBelowMinimumWithoutSep(): Unit = { 109 | val tstParsers = new TestWithoutSeparator 110 | val s = new CharSequenceReader("aaaa") 111 | val expectedFailure = """[1.5] failure: end of input 112 | | 113 | |aaaa 114 | | ^""".stripMargin 115 | assertEquals(expectedFailure, tstParsers.csv(s).toString) 116 | } 117 | 118 | @Test 119 | def testMinimumWithoutSep(): Unit = { 120 | val tstParsers = new TestWithoutSeparator 121 | val s = new CharSequenceReader("aaaaa") 122 | val expected = List.fill[Char](5)('a') 123 | val actual = tstParsers.csv(s) 124 | assertEquals(5, actual.next.offset) 125 | assert(actual.successful) 126 | assertEquals(expected, actual.get) 127 | } 128 | 129 | @Test 130 | def testInRangeWithoutSep(): Unit = { 131 | val tstParsers = new TestWithoutSeparator 132 | val s = new CharSequenceReader("aaaaaaaa") 133 | val expected = List.fill[Char](8)('a') 134 | val actual = tstParsers.csv(s) 135 | assertEquals(8, actual.next.offset) 136 | assert(actual.successful) 137 | assertEquals(expected, actual.get) 138 | } 139 | 140 | @Test 141 | def testMaximumWithoutSep(): Unit = { 142 | val tstParsers = new TestWithoutSeparator 143 | val s = new CharSequenceReader("aaaaaaaaaa") 144 | val expected = List.fill[Char](10)('a') 145 | val actual = tstParsers.csv(s) 146 | assertEquals(10, actual.next.offset) 147 | assert(actual.successful) 148 | assertEquals(expected, actual.get) 149 | } 150 | 151 | @Test 152 | def testAboveMaximumWithoutSep(): Unit = { 153 | val tstParsers = new TestWithoutSeparator 154 | val s = new CharSequenceReader("aaaaaaaaaaaa") 155 | val expected = List.fill[Char](10)('a') 156 | val actual = tstParsers.csv(s) 157 | assertEquals(10, actual.next.offset) 158 | assert(actual.successful) 159 | assertEquals(expected, actual.get) 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /shared/src/test/scala/scala/util/parsing/combinator/RegexParsersTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala.util.parsing.combinator 14 | 15 | import org.junit.Test 16 | import org.junit.Assert.{ assertEquals, assertTrue } 17 | 18 | class RegexParsersTest { 19 | @Test 20 | def parserNoSuccessMessage: Unit = { 21 | object parser extends RegexParsers { 22 | def sign = "-" 23 | def number = "\\d+".r 24 | type ResultType = Option[String] ~ String 25 | def p: Parser[ResultType] = sign.? ~ number withErrorMessage "Number expected!" 26 | def q: Parser[ResultType] = sign.? ~! number withErrorMessage "Number expected!" 27 | } 28 | import parser._ 29 | def extractResult(r: ParseResult[ResultType]): ResultType = r match { 30 | case Success(r, _) => r 31 | case r => sys.error(r.toString) 32 | } 33 | def result(num: Int): ResultType = { 34 | val minusSign = if (num < 0) Some("-") else None 35 | val absNumStr = Math.abs(num).toString 36 | new ~(minusSign, absNumStr) 37 | } 38 | 39 | val failure1 = parseAll(p, "-x").asInstanceOf[Failure] 40 | assertEquals("string matching regex '\\d+' expected but 'x' found", failure1.msg) 41 | val failure2 = parseAll(p, "x").asInstanceOf[Failure] 42 | assertEquals("string matching regex '\\d+' expected but 'x' found", failure2.msg) 43 | assertEquals(result(-5), extractResult(parseAll(p, "-5"))) 44 | assertEquals(result(5), extractResult(parseAll(p, "5"))) 45 | val error1 = parseAll(q, "-x").asInstanceOf[Error] 46 | assertEquals("Number expected!", error1.msg) 47 | val error2 = parseAll(q, "x").asInstanceOf[Error] 48 | assertEquals("Number expected!", error2.msg) 49 | assertEquals(result(-5), extractResult(parseAll(q, "-5"))) 50 | assertEquals(result(5), extractResult(parseAll(q, "5"))) 51 | } 52 | 53 | @Test 54 | def parserSkippingResult: Unit = { 55 | object parser extends RegexParsers { 56 | def quote = "\"" 57 | def string = """[a-zA-Z]*""".r 58 | type ResultType = String 59 | def p: Parser[ResultType] = quote ~> string <~ quote 60 | def q: Parser[ResultType] = quote ~>! string <~! quote 61 | def halfQuoted = quote ~ string ^^ { case q ~ s => q + s } 62 | } 63 | import parser._ 64 | assertTrue(parseAll(p, "\"asdf").isInstanceOf[Failure]) 65 | assertTrue(parseAll(p, "asdf\"").isInstanceOf[Failure]) 66 | assertTrue(parseAll(q | quote, "\"").isInstanceOf[Error]) 67 | assertTrue(parseAll(q | halfQuoted, "\"asdf").isInstanceOf[Error]) 68 | 69 | val successP = parseAll(p, "\"asdf\"").get 70 | assertEquals(successP, "asdf") 71 | val successPBacktrackL = parseAll(p | quote, "\"").get 72 | assertEquals(successPBacktrackL, "\"") 73 | val successPBacktrackR = parseAll(p | halfQuoted, "\"asdf").get 74 | assertEquals(successPBacktrackR, "\"asdf") 75 | 76 | val successQ = parseAll(q, "\"asdf\"").get 77 | assertEquals(successQ, "asdf") 78 | } 79 | 80 | @Test 81 | def parserFilter: Unit = { 82 | object parser extends RegexParsers { 83 | val keywords = Set("if", "false") 84 | def word: Parser[String] = "\\w+".r 85 | 86 | def keyword: Parser[String] = word filter (keywords.contains) 87 | def ident: Parser[String] = word filter(!keywords.contains(_)) 88 | 89 | def test: Parser[String ~ String] = keyword ~ ident 90 | } 91 | import parser._ 92 | 93 | val failure1 = parseAll(test, "if false").asInstanceOf[Failure] 94 | assertEquals("Input doesn't match filter: false", failure1.msg) 95 | val failure2 = parseAll(test, "not true").asInstanceOf[Failure] 96 | assertEquals("Input doesn't match filter: not", failure2.msg) 97 | val success = parseAll(test, "if true").asInstanceOf[Success[String ~ String]] 98 | assertEquals(new ~("if", "true"), success.get) 99 | } 100 | 101 | @Test 102 | def parserForFilter: Unit = { 103 | object parser extends RegexParsers { 104 | def word: Parser[String] = "\\w+".r 105 | 106 | def twoWords = for { 107 | (a ~ b) <- word ~ word 108 | } yield (b, a) 109 | } 110 | import parser._ 111 | 112 | val success = parseAll(twoWords, "first second").asInstanceOf[Success[(String, String)]] 113 | assertEquals(("second", "first"), success.get) 114 | } 115 | 116 | @Test 117 | def errorConsumesWhitespace: Unit = { 118 | object parser extends RegexParsers { 119 | def num = "\\d+".r 120 | 121 | def twoNums = num ~ (num | err("error!")) 122 | } 123 | import parser._ 124 | 125 | // this used to return a Failure (for the second num) 126 | val error = parseAll(twoNums, "458 bar") 127 | assertTrue(s"expected an Error but got: ${error.getClass.getName}", error.isInstanceOf[Error]) 128 | assertEquals("error!", error.asInstanceOf[Error].msg) 129 | } 130 | 131 | @Test 132 | def hierarchicalRepSuccess: Unit = { 133 | case class Node(a: String, b: String) 134 | 135 | object parser extends RegexParsers { 136 | def top: Parser[List[List[Node]]] = rep(nodes) 137 | def nodes: Parser[List[Node]] = "{" ~> rep(node) <~ "}" 138 | def node: Parser[Node] = "[a-z]+".r ~ ":" ~ "[a-z]+".r ^^ { case a ~ _ ~ b => Node(a, b) } 139 | } 140 | 141 | import parser._ 142 | 143 | val success0 = parseAll(top, "{ a : b c : d}").get 144 | assertEquals(List(List(Node("a", "b"), Node("c", "d"))), success0) 145 | val success1 = parseAll(top, "{ a : b } { c : d }").get 146 | assertEquals(List(List(Node("a", "b")), List(Node("c", "d"))), success1) 147 | val success2 = parseAll(top, "{} {}").get 148 | assertEquals(List(List(), List()), success2) 149 | val success3 = parseAll(top, "").get 150 | assertEquals(List(), success3) 151 | } 152 | 153 | @Test 154 | def hierarchicalRepFailure: Unit = { 155 | case class Node(a: String, b: String) 156 | 157 | object parser extends RegexParsers { 158 | def top: Parser[List[List[Node]]] = rep(nodes) 159 | def nodes: Parser[List[Node]] = "{" ~> rep(node) <~ "}" 160 | def node: Parser[Node] = "[a-z]+".r ~ ":" ~ "[a-z]+".r ^^ { case a ~ _ ~ b => Node(a, b) } 161 | } 162 | 163 | def test(src: String, expect: String, column: Int): Unit = { 164 | import parser._ 165 | val result = parseAll(top, src) 166 | result match { 167 | case Failure(msg, next) => 168 | assertEquals(column, next.pos.column) 169 | assertEquals(expect, msg) 170 | case _ => 171 | sys.error(result.toString) 172 | } 173 | } 174 | 175 | test("{ a : b c : }", "string matching regex '[a-z]+' expected but '}' found", 13) 176 | test("{", "'}' expected but end of source found", 2) 177 | } 178 | 179 | @Test 180 | def ifElseTest: Unit = { 181 | object parser extends RegexParsers { 182 | def top: Parser[List[Unit]] = rep(ifelse) 183 | def ifelse: Parser[Unit] = "IF" ~ condition ~ "THEN" ~ "1"~ "END" ^^ { _ => } 184 | def condition: Parser[String] = "TRUE" | "FALSE" 185 | } 186 | 187 | import parser._ 188 | val res = parseAll(top, "IF FALSE THEN 1 IF TRUE THEN 1 END") 189 | res match { 190 | case Failure(msg, next) => 191 | assertEquals(17, next.pos.column) 192 | assertEquals("'END' expected but 'I' found", msg) 193 | case _ => 194 | sys.error(res.toString) 195 | } 196 | } 197 | } 198 | -------------------------------------------------------------------------------- /shared/src/main/scala/scala/util/parsing/combinator/RegexParsers.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala 14 | package util.parsing.combinator 15 | 16 | import scala.util.matching.Regex 17 | import scala.util.parsing.input._ 18 | import scala.language.implicitConversions 19 | 20 | /** The ''most important'' differences between `RegexParsers` and 21 | * [[scala.util.parsing.combinator.Parsers]] are: 22 | * 23 | * - `Elem` is defined to be [[scala.Char]] 24 | * - There's an implicit conversion from [[java.lang.String]] to `Parser[String]`, 25 | * so that string literals can be used as parser combinators. 26 | * - There's an implicit conversion from [[scala.util.matching.Regex]] to `Parser[String]`, 27 | * so that regex expressions can be used as parser combinators. 28 | * - The parsing methods call the method `skipWhitespace` (defaults to `true`) and, if true, 29 | * skip any whitespace before each parser is called. 30 | * - Protected val `whiteSpace` returns a regex that identifies whitespace. 31 | * 32 | * For example, this creates a very simple calculator receiving `String` input: 33 | * 34 | * {{{ 35 | * object Calculator extends RegexParsers { 36 | * def number: Parser[Double] = """\d+(\.\d*)?""".r ^^ { _.toDouble } 37 | * def factor: Parser[Double] = number | "(" ~> expr <~ ")" 38 | * def term : Parser[Double] = factor ~ rep( "*" ~ factor | "/" ~ factor) ^^ { 39 | * case number ~ list => list.foldLeft(number) { 40 | * case (x, "*" ~ y) => x * y 41 | * case (x, "/" ~ y) => x / y 42 | * } 43 | * } 44 | * def expr : Parser[Double] = term ~ rep("+" ~ log(term)("Plus term") | "-" ~ log(term)("Minus term")) ^^ { 45 | * case number ~ list => list.foldLeft(number) { 46 | * case (x, "+" ~ y) => x + y 47 | * case (x, "-" ~ y) => x - y 48 | * } 49 | * } 50 | * 51 | * def apply(input: String): Double = parseAll(expr, input) match { 52 | * case Success(result, _) => result 53 | * case failure : NoSuccess => scala.sys.error(failure.msg) 54 | * } 55 | * } 56 | * }}} 57 | */ 58 | trait RegexParsers extends Parsers { 59 | 60 | type Elem = Char 61 | 62 | protected val whiteSpace = """\s+""".r 63 | 64 | def skipWhitespace = whiteSpace.toString.length > 0 65 | 66 | /** Method called to handle whitespace before parsers. 67 | * 68 | * It checks `skipWhitespace` and, if true, skips anything 69 | * matching `whiteSpace` starting from the current offset. 70 | * 71 | * @param source The input being parsed. 72 | * @param offset The offset into `source` from which to match. 73 | * @return The offset to be used for the next parser. 74 | */ 75 | protected def handleWhiteSpace(source: java.lang.CharSequence, offset: Int): Int = 76 | if (skipWhitespace) 77 | (whiteSpace findPrefixMatchOf (new SubSequence(source, offset))) match { 78 | case Some(matched) => offset + matched.end 79 | case None => offset 80 | } 81 | else 82 | offset 83 | 84 | /** A parser that matches a literal string */ 85 | implicit def literal(s: String): Parser[String] = new Parser[String] { 86 | def apply(in: Input) = { 87 | val source = in.source 88 | val offset = in.offset 89 | val start = handleWhiteSpace(source, offset) 90 | var i = 0 91 | var j = start 92 | while (i < s.length && j < source.length && s.charAt(i) == source.charAt(j)) { 93 | i += 1 94 | j += 1 95 | } 96 | if (i == s.length) 97 | Success(source.subSequence(start, j).toString, in.drop(j - offset), None) 98 | else { 99 | val found = if (start == source.length()) "end of source" else "'"+source.charAt(start)+"'" 100 | Failure("'"+s+"' expected but "+found+" found", in.drop(start - offset)) 101 | } 102 | } 103 | } 104 | 105 | /** A parser that matches a regex string */ 106 | implicit def regex(r: Regex): Parser[String] = new Parser[String] { 107 | def apply(in: Input) = { 108 | val source = in.source 109 | val offset = in.offset 110 | val start = handleWhiteSpace(source, offset) 111 | (r findPrefixMatchOf (new SubSequence(source, start))) match { 112 | case Some(matched) => 113 | Success(source.subSequence(start, start + matched.end).toString, 114 | in.drop(start + matched.end - offset), 115 | None) 116 | case None => 117 | val found = if (start == source.length()) "end of source" else "'"+source.charAt(start)+"'" 118 | Failure("string matching regex '"+r+"' expected but "+found+" found", in.drop(start - offset)) 119 | } 120 | } 121 | } 122 | 123 | /** `positioned` decorates a parser's result with the start position of the input it consumed. 124 | * If whitespace is being skipped, then it is skipped before the start position is recorded. 125 | * 126 | * @param p a `Parser` whose result conforms to `Positional`. 127 | * @return A parser that has the same behaviour as `p`, but which marks its result with the 128 | * start position of the input it consumed after whitespace has been skipped, if it 129 | * didn't already have a position. 130 | */ 131 | override def positioned[T <: Positional](p: => Parser[T]): Parser[T] = { 132 | val pp = super.positioned(p) 133 | new Parser[T] { 134 | def apply(in: Input) = { 135 | val offset = in.offset 136 | val start = handleWhiteSpace(in.source, offset) 137 | pp(in.drop (start - offset)) 138 | } 139 | } 140 | } 141 | 142 | // we might want to make it public/protected in a future version 143 | private def ws[T](p: Parser[T]): Parser[T] = new Parser[T] { 144 | def apply(in: Input) = { 145 | val offset = in.offset 146 | val start = handleWhiteSpace(in.source, offset) 147 | p(in.drop (start - offset)) 148 | } 149 | } 150 | 151 | /** 152 | * @inheritdoc 153 | * 154 | * This parser additionally skips whitespace if `skipWhitespace` returns true. 155 | */ 156 | override def err(msg: String) = ws(super.err(msg)) 157 | 158 | /** 159 | * A parser generator delimiting whole phrases (i.e. programs). 160 | * 161 | * `phrase(p)` succeeds if `p` succeeds and no input is left over after `p`. 162 | * 163 | * @param p the parser that must consume all input for the resulting parser 164 | * to succeed. 165 | * 166 | * @return a parser that has the same result as `p`, but that only succeeds 167 | * if `p` consumed all the input. 168 | */ 169 | override def phrase[T](p: Parser[T]): Parser[T] = 170 | super.phrase(p <~ "".r) 171 | 172 | /** Parse some prefix of reader `in` with parser `p`. */ 173 | def parse[T](p: Parser[T], in: Reader[Char]): ParseResult[T] = 174 | p(in) 175 | 176 | /** Parse some prefix of character sequence `in` with parser `p`. */ 177 | def parse[T](p: Parser[T], in: java.lang.CharSequence): ParseResult[T] = 178 | p(new CharSequenceReader(in)) 179 | 180 | /** Parse some prefix of reader `in` with parser `p`. */ 181 | def parse[T](p: Parser[T], in: java.io.Reader): ParseResult[T] = 182 | p(new PagedSeqReader(PagedSeq.fromReader(in))) 183 | 184 | /** Parse all of reader `in` with parser `p`. */ 185 | def parseAll[T](p: Parser[T], in: Reader[Char]): ParseResult[T] = 186 | parse(phrase(p), in) 187 | 188 | /** Parse all of reader `in` with parser `p`. */ 189 | def parseAll[T](p: Parser[T], in: java.io.Reader): ParseResult[T] = 190 | parse(phrase(p), in) 191 | 192 | /** Parse all of character sequence `in` with parser `p`. */ 193 | def parseAll[T](p: Parser[T], in: java.lang.CharSequence): ParseResult[T] = 194 | parse(phrase(p), in) 195 | } 196 | -------------------------------------------------------------------------------- /shared/src/main/scala/scala/util/parsing/input/PagedSeq.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala 14 | package util.parsing.input 15 | 16 | import java.io.{File, FileReader, Reader => JReader} 17 | import scala.reflect.ClassTag 18 | 19 | /** The `PagedSeq` object defines a lazy implementations of 20 | * a random access sequence. 21 | * 22 | * Provides utility methods that return instances of `PagedSeq[Char]`. 23 | * `fromIterator` and `fromIterable` provide generalised instances of `PagedSeq` 24 | */ 25 | object PagedSeq { 26 | final val UndeterminedEnd = Int.MaxValue 27 | 28 | /** Constructs a paged sequence from an iterator */ 29 | def fromIterator[T: ClassTag](source: Iterator[T]): PagedSeq[T] = 30 | new PagedSeq[T]((data: Array[T], start: Int, len: Int) => { 31 | var i = 0 32 | while (i < len && source.hasNext) { 33 | data(start + i) = source.next() 34 | i += 1 35 | } 36 | if (i == 0) -1 else i 37 | }) 38 | 39 | /** Constructs a paged sequence from an iterable */ 40 | def fromIterable[T: ClassTag](source: Iterable[T]): PagedSeq[T] = 41 | fromIterator(source.iterator) 42 | 43 | /** Constructs a paged character sequence from a string iterator */ 44 | def fromStrings(source: Iterator[String]): PagedSeq[Char] = 45 | fromIterator(source.flatMap(_.iterator)) 46 | 47 | /** Constructs a paged character sequence from a string iterable */ 48 | def fromStrings(source: Iterable[String]): PagedSeq[Char] = 49 | fromStrings(source.iterator) 50 | 51 | /** Constructs a paged character sequence from a line iterator 52 | * Lines do not contain trailing `\n` characters; The method inserts 53 | * a line separator `\n` between any two lines in the sequence. 54 | */ 55 | def fromLines(source: Iterator[String]): PagedSeq[Char] = { 56 | var isFirst = true 57 | fromStrings(source map { line => 58 | if (isFirst) { 59 | isFirst = false 60 | line 61 | } else "\n"+line 62 | }) 63 | } 64 | 65 | /** Constructs a paged character sequence from a line iterable 66 | * Lines do not contain trailing `\n` characters; The method inserts 67 | * a line separator `\n` between any two lines in the sequence. 68 | */ 69 | def fromLines(source: Iterable[String]): PagedSeq[Char] = 70 | fromLines(source.iterator) 71 | 72 | /** Constructs a paged character sequence from an input reader 73 | */ 74 | def fromReader(source: JReader): PagedSeq[Char] = 75 | new PagedSeq(source.read(_: Array[Char], _: Int, _: Int)) 76 | 77 | /** Constructs a paged character sequence from an input file 78 | */ 79 | def fromFile(source: File): PagedSeq[Char] = 80 | fromReader(new FileReader(source)) 81 | 82 | /** Constructs a paged character sequence from a file with given name 83 | */ 84 | def fromFile(source: String): PagedSeq[Char] = 85 | fromFile(new File(source)) 86 | 87 | /** Constructs a paged character sequence from a scala.io.Source value 88 | */ 89 | def fromSource(source: scala.io.Source) = 90 | fromLines(source.getLines()) 91 | } 92 | 93 | 94 | import PagedSeq._ 95 | 96 | /** An implementation of lazily computed sequences, where elements are stored 97 | * in "pages", i.e. arrays of fixed size. 98 | * 99 | * A paged sequence is constructed from a function that produces more elements when asked. 100 | * The producer function - `more`, is similar to the read method in java.io.Reader. 101 | * The `more` function takes three parameters: an array of elements, a start index, and an end index. 102 | * It should try to fill the array between start and end indices (excluding end index). 103 | * It returns the number of elements produced, or -1 if end of logical input stream was reached 104 | * before reading any element. 105 | * 106 | * @tparam T the type of the elements contained in this paged sequence, with an `ClassTag` context bound. 107 | * 108 | * @define Coll `PagedSeq` 109 | * @define coll paged sequence 110 | * @define mayNotTerminateInf 111 | * @define willNotTerminateInf 112 | */ 113 | class PagedSeq[T: ClassTag] protected( 114 | more: (Array[T], Int, Int) => Int, 115 | first1: Page[T], 116 | start: Int, 117 | end: Int) 118 | extends scala.collection.AbstractSeq[T] 119 | with scala.collection.IndexedSeq[T] 120 | with ScalaVersionSpecificPagedSeq[T] 121 | { 122 | def this(more: (Array[T], Int, Int) => Int) = this(more, new Page[T](0), 0, UndeterminedEnd) 123 | 124 | private var current: Page[T] = first1 125 | 126 | private def latest = first1.latest 127 | 128 | private def addMore() = latest.addMore(more) 129 | 130 | private def page(absindex: Int) = { 131 | if (absindex < current.start) 132 | current = first1 133 | while (absindex >= current.end && current.next != null) 134 | current = current.next 135 | while (absindex >= current.end && !current.isLast) { 136 | current = addMore() 137 | } 138 | current 139 | } 140 | 141 | /** The length of the paged sequence 142 | * @note Calling this method will force the entire sequence to be read. 143 | */ 144 | def length: Int = { 145 | while (!latest.isLast && latest.end < end) addMore() 146 | (latest.end min end) - start 147 | } 148 | 149 | /** The element at position `index`. 150 | */ 151 | def apply(index: Int) = 152 | if (isDefinedAt(index)) page(index + start)(index + start) 153 | else throw new IndexOutOfBoundsException(index.toString) 154 | 155 | /** Predicate method to check if an element is defined 156 | * at position `index` of the current sequence. 157 | * Unlike `length` this operation does not force reading 158 | * a lazy sequence to the end. 159 | */ 160 | override def isDefinedAt(index: Int) = 161 | index >= 0 && index < end - start && { 162 | val absidx = index + start 163 | absidx >= 0 && absidx < page(absidx).end 164 | } 165 | 166 | /** The subsequence from index `start` up to `end -1` if `end` 167 | * is lesser than the length of the current sequence and up to 168 | * length of the sequence otherwise. This is limited up to the length 169 | * of the current sequence if `end` is larger than its length. 170 | */ 171 | override def slice(_start: Int, _end: Int): PagedSeq[T] = { 172 | page(start) 173 | val s = start + _start 174 | val e = if (_end == UndeterminedEnd) _end else start + _end 175 | var f = first1 176 | while (f.end <= s && !f.isLast) { 177 | if (f.next eq null) f = f.addMore(more) 178 | else f = f.next 179 | } 180 | // Warning -- not refining `more` means that slices can freely request and obtain 181 | // data outside of their slice. This is part of the design of PagedSeq 182 | // (to read pages!) but can be surprising. 183 | new PagedSeq(more, f, s, e) 184 | } 185 | 186 | /** The subsequence from index `start` up to 187 | * the length of the current sequence. 188 | */ 189 | def slice(start: Int): PagedSeq[T] = slice(start, UndeterminedEnd) 190 | 191 | /** Convert sequence to string */ 192 | override def toString = { 193 | val buf = new StringBuilder 194 | for (ch <- PagedSeq.this.iterator) buf append ch 195 | buf.toString 196 | } 197 | } 198 | 199 | 200 | /** Page containing up to PageSize characters of the input sequence. 201 | */ 202 | private class Page[T: ClassTag](val num: Int) { 203 | 204 | private final val PageSize = 4096 205 | 206 | /** The next page in the sequence */ 207 | var next : Page[T] = null 208 | 209 | /** A later page in the sequence, serves a cache for pointing to last page */ 210 | var later : Page[T] = this 211 | 212 | /** The number of elements read into this page */ 213 | var filled: Int = 0 214 | 215 | /** Set true if the current page is the last in the sequence or if 216 | * the `more` function returned -1 signalling end of input. */ 217 | var isLast: Boolean = false 218 | 219 | /** The element array */ 220 | final val data = new Array[T](PageSize) 221 | 222 | /** The index of the first element in this page relative to the whole sequence */ 223 | final def start = num * PageSize 224 | 225 | /** The index of the element following the last element in this page relative 226 | * to the whole sequence */ 227 | final def end = start + filled 228 | 229 | /** The last page as currently present in the sequence; This can change as more 230 | * elements get appended to the sequence. */ 231 | final def latest: Page[T] = { 232 | var oldLater = later 233 | while (later.next != null) later = later.next 234 | while (oldLater.next != null) { 235 | oldLater = oldLater.next 236 | oldLater.later = later 237 | } 238 | later 239 | } 240 | 241 | /** The element at the given sequence index. 242 | * That index is relative to the whole sequence, not the page. */ 243 | def apply(index: Int) = { 244 | if (index < start || index - start >= filled) throw new IndexOutOfBoundsException(index.toString) 245 | data(index - start) 246 | } 247 | 248 | /** Produces more elements by calling `more` and adds them on the current page, 249 | * or fills a subsequent page if current page is full. 250 | * @note If current page is full, it is the last one in the sequence. */ 251 | final def addMore(more: (Array[T], Int, Int) => Int): Page[T] = 252 | if (filled == PageSize) { 253 | next = new Page[T](num + 1) 254 | next.addMore(more) 255 | } else { 256 | val count = more(data, filled, PageSize - filled) 257 | if (count < 0) isLast = true 258 | else filled += count 259 | this 260 | } 261 | } 262 | -------------------------------------------------------------------------------- /docs/Getting_Started.md: -------------------------------------------------------------------------------- 1 | ## Getting Started 2 | 3 | Scala parser combinators are a powerful way to build parsers that can be used in everyday programs. But it's hard to understand the plumbing pieces and how to get started. After you get the first couple of samples to compile and work, the plumbing starts to make sense. But until then it can be daunting, and the standard documentation isn't much help (some readers may remember the original "Scala By Example" chapter on parser combinators, and how that chapter disappeared from subsequent revisions of the book). So what are the components of a parser? How do those components fit together? What methods do I call? What patterns can be matched? Until those pieces are understood, you can’t begin to work on your grammar or build and process abstract syntax trees. So to minimize complexity, I wanted to start here with the simplest possible language: a lowercase word. Let’s build a parser for that language. We can describe the grammar in a single production rule: 4 | 5 | ``` 6 | word -> [a-z]+ 7 | ``` 8 | 9 | Here’s what the parser looks like: 10 | 11 | 12 | import scala.util.parsing.combinator._ 13 | class SimpleParser extends RegexParsers { 14 | def word: Parser[String] = """[a-z]+""".r ^^ { _.toString } 15 | } 16 | 17 | 18 | The package [scala.util.parsing.combinator](https://javadoc.io/static/org.scala-lang.modules/scala-parser-combinators_2.13/2.1.0/scala/util/parsing/combinator/index.html) contains all of the interesting stuff. Our parser extends [RegexParsers](https://javadoc.io/static/org.scala-lang.modules/scala-parser-combinators_2.13/2.1.0/scala/util/parsing/combinator/RegexParsers.html) because we do some lexical analysis. `"""[a-z]+""".r` is the regular expression. `^^` is [documented](https://javadoc.io/static/org.scala-lang.modules/scala-parser-combinators_2.13/2.1.0/scala/util/parsing/combinator/Parsers$Parser.html#^^[U](f:T=>U):Parsers.this.Parser[U]) to be "a parser combinator for function application". Basically, if the parsing on the left of the `^^` succeeds, the function on the right is executed. If you've done yacc parsing, the left hand side of the ^^ corresponds to the grammar rule and the right hand side corresponds to the code generated by the rule. Since the method "word" returns a Parser of type String, the function on the right of `^^` needs to return a String. 19 | 20 | So how do we use this parser? Well, if we want to extract a word from string, we can call 21 | 22 | 23 | SimpleParser.parse(SimpleParser.word(myString)) 24 | 25 | Here’s a little program to do this. 26 | 27 | object TestSimpleParser extends SimpleParser { 28 | def main(args: Array[String]) = println(parse(word, "johnny come lately")) 29 | } 30 | 31 | 32 | Two things to notice here: 33 | 34 | * The object extends SimpleParser. That gets us around having to prefix everything with "SimpleParser". 35 | * When we run this, we don’t get back the "word" we parsed, we get a `ParserResult[String]` back. The "String" type parameter is needed because the method named "word" returns a result of type `Parser[String]`, and the type parameter carries through to the `ParseResult`. 36 | 37 | When we run the program, we get the following at the console: 38 | 39 | 40 | [1.7] parsed: johnny 41 | 42 | 43 | That says that the first character of the input that matched the parser is position 1, and the first character remaining to be matched is in position 7. This is a good start, but all of this should suggest that we are missing something because we have a ParseResult, but not the thing we want, which is the word. We need to handle the `ParserResult` better. We could call the "get" method on the `ParseResult`. That would give us the result, but that would be making the optimistic assumption that everything worked and that parsing was successful. We can't plan on that because we probably can't control the input enough to know that it is valid. The input is given to us and we have to make the best of it. That means detecting and handling errors, which sounds like a job for pattern matching, right? In Scala we use pattern matching to trap exceptions, we use pattern matching (`Option`s) to branch for success and failure, so you would expect to use pattern matching to deal with parsing as well. And in fact you can pattern match on the `ParseResult` for the various termination states. Here’s a rewrite of the little program that does a better job: 44 | 45 | 46 | object TestSimpleParser extends SimpleParser { 47 | def main(args: Array[String]) = { 48 | parse(word, "johnny come lately") match { 49 | case Success(matched,_) => println(matched) 50 | case Failure(msg,_) => println("FAILURE: " + msg) 51 | case Error(msg,_) => println("ERROR: " + msg) 52 | } 53 | } 54 | } 55 | 56 | 57 | In comparison to `Option`, which has two primary cases (Some and None), the `ParseResult` basically has three cases: 1) `Success`, 2) `Failure`, and 3) `Error`. Each case is matched by a pattern of two items. In the `Success` case, the first item is the object produced by the parser (a String for us since "word" returns a `Parser[String]`), and in the `Failure` and `Error` cases, the first item is an error message. In all cases, the second item in the match is the remaining unmatched input, which we don’t care about here. But if we were doing fancy error handling or subsequent parsing, we would pay close attention to. The difference between `Failure` and `Error` is that on a `Failure`, parsing will backtrack when parsing continues (this rule didn't work but maybe there is some other grammar rule that will), whereas the `Error` case is fatal and there will be no backtracking (you have a syntax error, there is no way to match the expression you have provided with the grammar for this language, edit the expression and try again). 58 | 59 | This tiny example shows a lot of the necessary parser combinator plumbing. Now let’s look at a slightly more complex (and admittedly contrived) example to bring forward some of the remaining plumbing. Say that what we are really after is a word followed by a number. Pretend that this is data about the frequency count of words in a long document. Of course, there are ways to do this by simple regular expression matching, but let’s take a slightly more abstract approach to show some more combinator plumbing. In addition to words we will also have to match numbers, and we will have to match words and numbers together. So first, let’s add a new type to gather words and counts. Here is a simple case class for that: 60 | 61 | 62 | case class WordFreq(word: String, count: Int) { 63 | override def toString = "Word <" + word + "> " + 64 | "occurs with frequency " + count 65 | } 66 | 67 | Now we want our parser to return instances of this case class rather than instances of `String`. In the context of traditional parsing, productions that return primitive objects like strings and numbers are performing lexical analysis (aka tokenization, typically using regular expressions) whereas productions that return composite objects correspond to the creation of Abstract Syntax Trees (ASTs). Indeed, in the revised parser class, below, the words and numbers are recognized by regular expressions and the word frequencies use a higher-order pattern. So two of our grammar rules are for tokenization and the third builds the AST: 68 | 69 | class SimpleParser extends RegexParsers { 70 | def word: Parser[String] = """[a-z]+""".r ^^ { _.toString } 71 | def number: Parser[Int] = """(0|[1-9]\d*)""".r ^^ { _.toInt } 72 | def freq: Parser[WordFreq] = word ~ number ^^ { case wd ~ fr => WordFreq(wd,fr) } 73 | } 74 | 75 | So what’s to notice here, in this new program? Well, the parser for "number" looks just about like the parser for "word", except that it returns a `Parser[Int]` rather than a `Parser[String]`, and the conversion function calls `toInt` rather than `toString`. But there is a third production rule here, the freq rule. It: 76 | 77 | * Doesn't have a .r because it isn't a regular expression (it's a combinator). 78 | * Returns instances of `Parser[WordFreq]`, so the function to the right hand side of the `^^` operator had better return instances of the composite type `WordFreq`. 79 | * Combines the "word" rule with the "number" rule. It uses the `~` (tilde) combinator to say "you have to match a word first, and then a number". The tilde combinator is the most common combinator for rules that don't involve regular expressions. 80 | * Uses a pattern match on the right side of the rule. Sometimes these match expressions are complex but many times they are just echoes of the rule on the left hand side. In that case, all it really does is gives names to the different elements of the rule (in this case "wd" and "fr") so that we can operate on those elements. In this case, we use those named elements to construct the object we are interested in. But there are also cases where the pattern match is not an echo of the left hand side. Those cases may arise when parts of the rule are optional, or when there are very specific cases to match. For instance, if we wanted to perform special handling in the case where fr was exactly 0. For that, we could have added the case: 81 | ``` 82 | case wd ~ 0 83 | ``` 84 | 85 | Here is a very slightly modified program to use this parser: 86 | 87 | object TestSimpleParser extends SimpleParser { 88 | def main(args: Array[String]) = { 89 | parse(freq, "johnny 121") match { 90 | case Success(matched,_) => println(matched) 91 | case Failure(msg,_) => println("FAILURE: " + msg) 92 | case Error(msg,_) => println("ERROR: " + msg) 93 | } 94 | } 95 | } 96 | 97 | There are only two differences between this little program and the previous one. Both of those differences are on the third line: 98 | 99 | * Instead of using the "word" parser, we use the "freq" parser because those are the kinds of objects we are trying to get from the input, and 100 | * We changed the input string to match the new language. 101 | 102 | Now when we run the program we get: 103 | 104 | Word occurs with frequency 121 105 | 106 | At this point, we’ve shown enough of the parser combinator plumbing to get started and do something useful. Hopefully, all of that other documentation makes a lot more sense now. 107 | -------------------------------------------------------------------------------- /shared/src/test/scala/scala/util/parsing/combinator/PackratParsersTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala.util.parsing.combinator 14 | 15 | import org.junit.Test 16 | import org.junit.Assert.assertEquals 17 | import org.junit.Assert.assertTrue 18 | 19 | import scala.util.parsing.combinator.syntactical.StandardTokenParsers 20 | 21 | class PackratParsersTest { 22 | 23 | @Test 24 | def test1: Unit = { 25 | import grammars1._ 26 | val head = phrase(term) 27 | 28 | def extractResult(r : ParseResult[Int]): Int = r match { 29 | case Success(a,_) => a 30 | case NoSuccess.I(a,_) => sys.error(a) 31 | } 32 | def check(expected: Int, expr: String): Unit = { 33 | val parseResult = head(new lexical.Scanner(expr)) 34 | val result = extractResult(parseResult) 35 | assertEquals(expected, result) 36 | } 37 | 38 | check(1, "1") 39 | check(3, "1+2") 40 | check(5, "9-4") 41 | check(81, "9*9") 42 | check(4, "8/2") 43 | check(37, "4*9-0/7+9-8*1") 44 | check(9, "(1+2)*3") 45 | check(3, """/* This is a 46 | long long long long long 47 | long long long long long 48 | long long long long long 49 | long long long long long 50 | long long long long long 51 | long long long long long 52 | long long long long long 53 | long long long long long 54 | long long long long long 55 | long long long long long 56 | long long long long long 57 | long long long long long 58 | long long long long long 59 | long long long long long 60 | long long long long long 61 | long long long long long 62 | long long long long long 63 | long long long long long 64 | long long long long long 65 | long long long long long 66 | long long long long long 67 | long long long long long 68 | long long long long long 69 | long long long long long 70 | long long long long long 71 | long long long long long 72 | long long long long long 73 | comment */ 74 | 1+2""") 75 | } 76 | 77 | @Test 78 | def test2: Unit = { 79 | import grammars2._ 80 | val head = phrase(exp) 81 | 82 | def extractResult(r : ParseResult[Int]): Int = r match { 83 | case Success(a,_) => a 84 | case NoSuccess.I(a,_) => sys.error(a) 85 | } 86 | def check(expected: Int, expr: String): Unit = { 87 | val parseResult = head(new lexical.Scanner(expr)) 88 | val result = extractResult(parseResult) 89 | assertEquals(expected, result) 90 | } 91 | 92 | check(1, "1") 93 | check(3, "1+2") 94 | check(81, "9*9") 95 | check(43, "4*9+7") 96 | check(59, "4*9+7*2+3*3") 97 | check(188, "4*9+7*2+3*3+9*5+7*6*2") 98 | check(960, "4*(9+7)*(2+3)*3") 99 | } 100 | 101 | @Test 102 | def test3: Unit = { 103 | import grammars3._ 104 | val head = phrase(AnBnCn) 105 | def extractResult(r: ParseResult[AnBnCnResult]): AnBnCnResult = r match { 106 | case Success(a,_) => a 107 | case NoSuccess.I(a,_) => sys.error(a) 108 | } 109 | def threeLists(as: List[Symbol], bs: List[Symbol], cs: List[Symbol]): AnBnCnResult = { 110 | val as1 = as.map(_.name) 111 | val bs1 = bs.map(_.name) 112 | val cs1 = cs.map(_.name) 113 | new ~(new ~(as1, bs1), cs1) 114 | } 115 | def assertSuccess(expected1: List[Symbol], expected2: List[Symbol], expected3: List[Symbol], 116 | input: String): Unit = { 117 | val expected = threeLists(expected1, expected2, expected3) 118 | val parseResult = head(new lexical.Scanner(input)) 119 | val result = extractResult(parseResult) 120 | assertEquals(expected, result) 121 | } 122 | 123 | assertSuccess(List(Symbol("a"), Symbol("b")), List(Symbol("a")), List(Symbol("b"), Symbol("c")), "a b c") 124 | assertSuccess(List(Symbol("a"), Symbol("a"), Symbol("b"), Symbol("b")), List(Symbol("a"), Symbol("a")), List(Symbol("b"), Symbol("b"), Symbol("c"), Symbol("c")), "a a b b c c") 125 | assertSuccess(List(Symbol("a"), Symbol("a"), Symbol("a"), Symbol("b"), Symbol("b"), Symbol("b")), List(Symbol("a"), Symbol("a"), Symbol("a")), List(Symbol("b"), Symbol("b"), Symbol("b"), Symbol("c"), Symbol("c"), Symbol("c")), 126 | "a a a b b b c c c") 127 | assertSuccess(List(Symbol("a"), Symbol("a"), Symbol("a"), Symbol("a"), Symbol("b"), Symbol("b"), Symbol("b"), Symbol("b")), List(Symbol("a"), Symbol("a"), Symbol("a"), Symbol("a")), List(Symbol("b"), Symbol("b"), Symbol("b"), Symbol("b"), Symbol("c"), Symbol("c"), Symbol("c"), Symbol("c")), 128 | "a a a a b b b b c c c c") 129 | 130 | def assertFailure(expectedFailureMsg: String, input: String): Unit = { 131 | val packratReader = new PackratReader(new lexical.Scanner(input)) 132 | val parseResult = AnBnCn(packratReader) 133 | assertTrue(s"Not an instance of Failure: ${parseResult.toString()}", parseResult.isInstanceOf[Failure]) 134 | val failure = parseResult.asInstanceOf[Failure] 135 | assertEquals(expectedFailureMsg, failure.msg) 136 | } 137 | assertFailure("''b'' expected but 'c' found", "a a a a b b b c c c c") 138 | assertFailure("end of input", "a a a a b b b b c c c") 139 | } 140 | 141 | @Test 142 | def test4: Unit = { 143 | import grammars4._ 144 | import grammars4.parser._ 145 | 146 | def extractResult(r: ParseResult[Res]): Res = r match { 147 | case Success(a,_) => a 148 | case NoSuccess.I(a,_) => sys.error(a) 149 | } 150 | def check(expected: Term, input: String, ctx: Ctx): Unit = { 151 | val parseResult = phraseTerm(new lexical.Scanner(input)) 152 | val result = extractResult(parseResult) 153 | val term = result(ctx) 154 | assertEquals(expected, term) 155 | } 156 | 157 | check(Var(-1, 0), "x", Nil) 158 | check(Var(0, 3), "x", List("x", "y", "z")) 159 | check(Var(1, 3), "y", List("x", "y", "z")) 160 | check(Var(2, 3), "z", List("x", "y", "z")) 161 | 162 | check(App(Var(0, 2), Var(1, 2)), "x y", List("x", "y")) 163 | check(App(App(Var(0, 2), Var(1, 2)), Var(0, 2)), "x y x", List("x", "y")) 164 | check(App(App(Var(0, 2), Var(1, 2)), Var(0, 2)), "(x y) x", List("x", "y")) 165 | check(Abs(App(App(Var(0, 1), Var(0, 1)), Var(0, 1))), """\x. x x x""", List()) 166 | } 167 | 168 | } 169 | 170 | private object grammars1 extends StandardTokenParsers with PackratParsers { 171 | 172 | lexical.delimiters ++= List("+","-","*","/","(",")") 173 | lexical.reserved ++= List("Hello","World") 174 | 175 | /**** 176 | * term = term + fact | term - fact | fact 177 | * fact = fact * num | fact / num | num 178 | */ 179 | 180 | 181 | val term: PackratParser[Int] = (term~("+"~>fact) ^^ {case x~y => x+y} 182 | |term~("-"~>fact) ^^ {case x~y => x-y} 183 | |fact) 184 | 185 | val fact: PackratParser[Int] = (fact~("*"~>numericLit) ^^ {case x~y => x*y.toInt} 186 | |fact~("/"~>numericLit) ^^ {case x~y => x/y.toInt} 187 | |"("~>term<~")" 188 | |numericLit ^^ {_.toInt}) 189 | } 190 | 191 | private object grammars2 extends StandardTokenParsers with PackratParsers { 192 | 193 | lexical.delimiters ++= List("+","-","*","/","(",")") 194 | lexical.reserved ++= List("Hello","World") 195 | 196 | /* 197 | * exp = sum | prod | num 198 | * sum = exp ~ "+" ~ num 199 | * prod = exp ~ "*" ~ num 200 | */ 201 | 202 | val exp : PackratParser[Int] = sum | prod | numericLit ^^{_.toInt} | "("~>exp<~")" 203 | val sum : PackratParser[Int] = exp~("+"~>exp) ^^ {case x~y => x+y} 204 | val prod: PackratParser[Int] = exp~("*"~>(numericLit ^^{_.toInt} | exp)) ^^ {case x~y => x*y} 205 | 206 | } 207 | 208 | private object grammars3 extends StandardTokenParsers with PackratParsers { 209 | lexical.reserved ++= List("a","b", "c") 210 | val a: PackratParser[String] = memo("a") 211 | val b: PackratParser[String] = memo("b") 212 | val c: PackratParser[String] = memo("c") 213 | 214 | type AnBnCnResult = List[String] ~ List[String] ~ List[String] 215 | 216 | val AnBnCn: PackratParser[AnBnCnResult] = 217 | guard(repMany1(a,b) <~ not(b)) ~ rep1(a) ~ repMany1(b,c)// ^^{case x~y => x:::y} 218 | 219 | 220 | private def repMany[T](p: => Parser[T], q: => Parser[T]): Parser[List[T]] = 221 | ( p~repMany(p,q)~q ^^ {case x~xs~y => x::xs:::(y::Nil)} 222 | | success(Nil) 223 | ) 224 | 225 | @annotation.nowarn("cat=other-match-analysis") 226 | def repMany1[T](p: => Parser[T], q: => Parser[T]): Parser[List[T]] = 227 | p~opt(repMany(p,q))~q ^^ {case x~Some(xs)~y => x::xs:::(y::Nil)} 228 | 229 | } 230 | 231 | private object grammars4 { 232 | // untyped lambda calculus with named vars -> de brujin indices conversion on the fly 233 | // Adapted from https://github.com/ilya-klyuchnikov/tapl-scala/blob/master/src/main/scala/tapl/untyped/parser.scala 234 | sealed trait Term 235 | case class Var(i: Int, cl: Int) extends Term 236 | case class Abs(t: Term) extends Term 237 | case class App(t1: Term, t2: Term) extends Term 238 | 239 | object parser extends StandardTokenParsers with PackratParsers { 240 | lexical.delimiters ++= List("(", ")", ".", "\\") 241 | 242 | type Res = Ctx => Term 243 | type Ctx = List[String] 244 | 245 | private val term: PackratParser[Res] = app | atom | abs 246 | private val atom: PackratParser[Res] = "(" ~> term <~ ")" | id 247 | private val id : PackratParser[Res] = ident ^^ { n => (c: Ctx) => Var(c.indexOf(n), c.length) } 248 | private val app : PackratParser[Res] = (app ~ atom) ^^ {case t1 ~ t2 => (c: Ctx) => App(t1(c), t2(c)) } | atom 249 | private val abs : PackratParser[Res] = "\\" ~> ident ~ ("." ~> term) ^^ {case v ~ t => (c: Ctx) => Abs(t(v::c))} 250 | val phraseTerm : PackratParser[Res] = phrase(term) 251 | } 252 | } 253 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /shared/src/main/scala/scala/util/parsing/combinator/PackratParsers.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala 14 | package util.parsing.combinator 15 | 16 | import scala.util.parsing.input.{ Reader, Position } 17 | import scala.collection.mutable 18 | import scala.language.implicitConversions 19 | 20 | /** 21 | * `PackratParsers` is a component that extends the parser combinators 22 | * provided by [[scala.util.parsing.combinator.Parsers]] with a memoization 23 | * facility (''Packrat Parsing''). 24 | * 25 | * Packrat Parsing is a technique for implementing backtracking, 26 | * recursive-descent parsers, with the advantage that it guarantees 27 | * unlimited lookahead and a linear parse time. Using this technique, 28 | * left recursive grammars can also be accepted. 29 | * 30 | * Using `PackratParsers` is very similar to using `Parsers`: 31 | * - any class/trait that extends `Parsers` (directly or through a subclass) 32 | * can mix in `PackratParsers`. 33 | * Example: `'''object''' MyGrammar '''extends''' StandardTokenParsers '''with''' PackratParsers` 34 | * - each grammar production previously declared as a `def` without formal 35 | * parameters becomes a `lazy val`, and its type is changed from 36 | * `Parser[Elem]` to `PackratParser[Elem]`. 37 | * So, for example, `'''def''' production: Parser[Int] = {...}` 38 | * becomes `'''lazy val''' production: PackratParser[Int] = {...}` 39 | * - Important: using `PackratParser`s is not an ''all or nothing'' decision. 40 | * They can be free mixed with regular `Parser`s in a single grammar. 41 | * 42 | * Cached parse results are attached to the ''input'', not the grammar. 43 | * Therefore, `PackratsParser`s require a `PackratReader` as input, which 44 | * adds memoization to an underlying `Reader`. Programmers can create 45 | * `PackratReader` objects either manually, as in 46 | * `production('''new''' PackratReader('''new''' lexical.Scanner("input")))`, 47 | * but the common way should be to rely on the combinator `phrase` to wrap 48 | * a given input with a `PackratReader` if the input is not one itself. 49 | * 50 | * @see Bryan Ford: "Packrat Parsing: Simple, Powerful, Lazy, Linear Time." ICFP'02 51 | * @see Alessandro Warth, James R. Douglass, Todd Millstein: "Packrat Parsers Can Support Left Recursion." PEPM'08 52 | * 53 | * @since 2.8 54 | */ 55 | 56 | trait PackratParsers extends Parsers { 57 | /** 58 | * A specialized `Reader` class that wraps an underlying `Reader` 59 | * and provides memoization of parse results. 60 | */ 61 | class PackratReader[+T](underlying: Reader[T]) extends Reader[T] { outer => 62 | 63 | /* 64 | * caching of intermediate parse results and information about recursion 65 | */ 66 | private[PackratParsers] val cache = mutable.HashMap.empty[(Parser[?], Position), MemoEntry[?]] 67 | 68 | private[PackratParsers] def getFromCache[T2](p: Parser[T2]): Option[MemoEntry[T2]] = { 69 | cache.get((p, pos)).asInstanceOf[Option[MemoEntry[T2]]] 70 | } 71 | 72 | private[PackratParsers] def updateCacheAndGet[T2](p: Parser[T2], w: MemoEntry[T2]): MemoEntry[T2] = { 73 | cache.put((p, pos),w) 74 | w 75 | } 76 | 77 | /* a cache for storing parser heads: allows to know which parser is involved 78 | in a recursion*/ 79 | private[PackratParsers] val recursionHeads: mutable.HashMap[Position, Head] = mutable.HashMap.empty 80 | 81 | //a stack that keeps a list of all involved rules 82 | private[PackratParsers] var lrStack: List[LR] = Nil 83 | 84 | override def source: java.lang.CharSequence = underlying.source 85 | override def offset: Int = underlying.offset 86 | 87 | def first: T = underlying.first 88 | def rest: Reader[T] = new PackratReader(underlying.rest) { 89 | override private[PackratParsers] val cache = outer.cache 90 | override private[PackratParsers] val recursionHeads = outer.recursionHeads 91 | lrStack = outer.lrStack 92 | } 93 | 94 | def pos: Position = underlying.pos 95 | def atEnd: Boolean = underlying.atEnd 96 | } 97 | 98 | /** 99 | * A parser generator delimiting whole phrases (i.e. programs). 100 | * 101 | * Overridden to make sure any input passed to the argument parser 102 | * is wrapped in a `PackratReader`. 103 | */ 104 | override def phrase[T](p: Parser[T]): PackratParser[T] = { 105 | val q = super.phrase(p) 106 | new PackratParser[T] { 107 | def apply(in: Input) = in match { 108 | case in: PackratReader[?] => q(in) 109 | case in => q(new PackratReader(in)) 110 | } 111 | } 112 | } 113 | 114 | private def getPosFromResult(r: ParseResult[?]): Position = r.next.pos 115 | 116 | // auxiliary data structures 117 | 118 | private case class MemoEntry[+T](var r: Either[LR,ParseResult[?]]){ 119 | def getResult: ParseResult[T] = r match { 120 | case Left(LR(res,_,_)) => res.asInstanceOf[ParseResult[T]] 121 | case Right(res) => res.asInstanceOf[ParseResult[T]] 122 | } 123 | } 124 | 125 | private case class LR(var seed: ParseResult[?], var rule: Parser[?], var head: Option[Head]){ 126 | def getPos: Position = getPosFromResult(seed) 127 | } 128 | 129 | private case class Head(var headParser: Parser[?], var involvedSet: List[Parser[?]], var evalSet: List[Parser[?]]){ 130 | def getHead = headParser 131 | } 132 | 133 | /** 134 | * The root class of packrat parsers. 135 | */ 136 | abstract class PackratParser[+T] extends super.Parser[T] 137 | 138 | /** 139 | * Implicitly convert a parser to a packrat parser. 140 | * The conversion is triggered by giving the appropriate target type: 141 | * {{{ 142 | * val myParser: PackratParser[MyResult] = aParser 143 | * }}} */ 144 | implicit def parser2packrat[T](p: => super.Parser[T]): PackratParser[T] = { 145 | lazy val q = p 146 | memo(super.Parser {in => q(in)}) 147 | } 148 | 149 | /* 150 | * An unspecified function that is called when a packrat reader is applied. 151 | * It verifies whether we are in the process of growing a parse or not. 152 | * In the former case, it makes sure that rules involved in the recursion are evaluated. 153 | * It also prevents non-involved rules from getting evaluated further 154 | */ 155 | private def recall(p: super.Parser[?], in: PackratReader[Elem]): Option[MemoEntry[?]] = { 156 | val cached = in.getFromCache(p) 157 | val head = in.recursionHeads.get(in.pos) 158 | 159 | head match { 160 | case None => /*no heads*/ cached 161 | case Some(h@Head(hp, involved, evalSet)) => { 162 | //heads found 163 | if(cached.isEmpty && !(hp::involved contains p)) { 164 | //Nothing in the cache, and p is not involved 165 | return Some(MemoEntry(Right(Failure("dummy ",in)))) 166 | } 167 | if(evalSet contains p){ 168 | //something in cache, and p is in the evalSet 169 | //remove the rule from the evalSet of the Head 170 | h.evalSet = h.evalSet.filterNot(_==p) 171 | val tempRes = p(in) 172 | //we know that cached has an entry here 173 | val tempEntry: MemoEntry[?] = cached.get // match {case Some(x: MemoEntry[_]) => x} 174 | //cache is modified 175 | tempEntry.r = Right(tempRes) 176 | } 177 | cached 178 | } 179 | } 180 | } 181 | 182 | /* 183 | * setting up the left-recursion. We have the LR for the rule head 184 | * we modify the involvedSets of all LRs in the stack, till we see 185 | * the current parser again 186 | */ 187 | private def setupLR(p: Parser[?], in: PackratReader[?], recDetect: LR): Unit = { 188 | if(recDetect.head.isEmpty) recDetect.head = Some(Head(p, Nil, Nil)) 189 | 190 | in.lrStack.takeWhile(_.rule != p).foreach {x => 191 | x.head = recDetect.head 192 | recDetect.head.map(h => h.involvedSet = x.rule::h.involvedSet) 193 | } 194 | } 195 | 196 | /* 197 | * growing, if needed the recursion 198 | * check whether the parser we are growing is the head of the rule. 199 | * Not => no grow 200 | */ 201 | 202 | /* 203 | * Once the result of the recall function is known, if it is nil, then we need to store a dummy 204 | failure into the cache (much like in the previous listings) and compute the future parse. If it 205 | is not, however, this means we have detected a recursion, and we use the setupLR function 206 | to update each parser involved in the recursion. 207 | */ 208 | 209 | private def lrAnswer[T](p: Parser[T], in: PackratReader[Elem], growable: LR): ParseResult[T] = growable match { 210 | //growable will always be having a head, we can't enter lrAnswer otherwise 211 | case LR(seed, _, Some(head)) => 212 | if(head.getHead != p) /*not head rule, so not growing*/ seed.asInstanceOf[ParseResult[T]] 213 | else { 214 | in.updateCacheAndGet(p, MemoEntry(Right(seed.asInstanceOf[ParseResult[T]]))) 215 | seed match { 216 | case f@Failure(_,_) => f 217 | case e@Error(_,_) => e 218 | case Success(_,_) => /*growing*/ grow(p, in, head) 219 | } 220 | } 221 | case _=> throw new Exception("lrAnswer with no head !!") 222 | } 223 | 224 | //p here should be strict (cannot be non-strict) !! 225 | //failing left-recursive grammars: This is done by simply storing a failure if nothing is found 226 | 227 | /** 228 | * Explicitly convert a given parser to a memoizing packrat parser. 229 | * In most cases, client code should avoid calling `memo` directly 230 | * and rely on implicit conversion instead. 231 | */ 232 | def memo[T](p: super.Parser[T]): PackratParser[T] = { 233 | new PackratParser[T] { 234 | def apply(in: Input) = { 235 | /* 236 | * transformed reader 237 | */ 238 | val inMem = in.asInstanceOf[PackratReader[Elem]] 239 | 240 | //look in the global cache if in a recursion 241 | val m = recall(p, inMem) 242 | m match { 243 | //nothing has been done due to recall 244 | case None => 245 | val base = LR(Failure("Base Failure",in), p, None) 246 | inMem.lrStack = base::inMem.lrStack 247 | //cache base result 248 | inMem.updateCacheAndGet(p,MemoEntry(Left(base))) 249 | //parse the input 250 | val tempRes = p(in) 251 | //the base variable has passed equality tests with the cache 252 | inMem.lrStack = inMem.lrStack.tail 253 | //check whether base has changed, if yes, we will have a head 254 | base.head match { 255 | case None => 256 | /*simple result*/ 257 | inMem.updateCacheAndGet(p,MemoEntry(Right(tempRes))) 258 | tempRes 259 | case Some(_) => 260 | /*non simple result*/ 261 | base.seed = tempRes 262 | //the base variable has passed equality tests with the cache 263 | val res = lrAnswer(p, inMem, base) 264 | res 265 | } 266 | 267 | case Some(mEntry) => { 268 | //entry found in cache 269 | mEntry match { 270 | case MemoEntry(Left(recDetect)) => { 271 | setupLR(p, inMem, recDetect) 272 | //all setupLR does is change the heads of the recursions, so the seed will stay the same 273 | recDetect match {case LR(seed, _, _) => seed.asInstanceOf[ParseResult[T]]} 274 | } 275 | case MemoEntry(Right(res: ParseResult[?])) => res.asInstanceOf[ParseResult[T]] 276 | } 277 | } 278 | } 279 | } 280 | } 281 | } 282 | 283 | private def grow[T](p: super.Parser[T], rest: PackratReader[Elem], head: Head): ParseResult[T] = { 284 | //store the head into the recursionHeads 285 | rest.recursionHeads.put(rest.pos, head /*match {case Head(hp,involved,_) => Head(hp,involved,involved)}*/) 286 | val oldRes: ParseResult[T] = rest.getFromCache(p).get match { 287 | case MemoEntry(Right(x)) => x.asInstanceOf[ParseResult[T]] 288 | case _ => throw new Exception("impossible match") 289 | } 290 | 291 | //resetting the evalSet of the head of the recursion at each beginning of growth 292 | head.evalSet = head.involvedSet 293 | val tempRes = p(rest); tempRes match { 294 | case s@Success(_,_) => 295 | if(getPosFromResult(oldRes) < getPosFromResult(tempRes)) { 296 | rest.updateCacheAndGet(p, MemoEntry(Right(s))) 297 | grow(p, rest, head) 298 | } else { 299 | //we're done with growing, we can remove data from recursion head 300 | rest.recursionHeads -= rest.pos 301 | rest.getFromCache(p).get match { 302 | case MemoEntry(Right(x: ParseResult[?])) => x.asInstanceOf[ParseResult[T]] 303 | case _ => throw new Exception("impossible match") 304 | } 305 | } 306 | case _ => 307 | rest.recursionHeads -= rest.pos 308 | /*rest.updateCacheAndGet(p, MemoEntry(Right(f)));*/oldRes 309 | } 310 | } 311 | } 312 | -------------------------------------------------------------------------------- /shared/src/main/scala/scala/util/parsing/combinator/Parsers.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Scala (https://www.scala-lang.org) 3 | * 4 | * Copyright EPFL and Lightbend, Inc. dba Akka 5 | * 6 | * Licensed under Apache License 2.0 7 | * (http://www.apache.org/licenses/LICENSE-2.0). 8 | * 9 | * See the NOTICE file distributed with this work for 10 | * additional information regarding copyright ownership. 11 | */ 12 | 13 | package scala 14 | package util.parsing.combinator 15 | 16 | import scala.util.parsing.input._ 17 | import scala.collection.mutable.ListBuffer 18 | import scala.annotation.tailrec 19 | import scala.language.implicitConversions 20 | 21 | // TODO: better error handling (labelling like parsec's ) 22 | 23 | /** `Parsers` is a component that ''provides'' generic parser combinators. 24 | * 25 | * There are two abstract members that must be defined in order to 26 | * produce parsers: the type `Elem` and 27 | * [[scala.util.parsing.combinator.Parsers.Parser]]. There are helper 28 | * methods that produce concrete `Parser` implementations -- see ''primitive 29 | * parser'' below. 30 | * 31 | * A `Parsers` may define multiple `Parser` instances, which are combined 32 | * to produced the desired parser. 33 | * 34 | * The type of the elements these parsers should parse must be defined 35 | * by declaring `Elem` 36 | * (each parser is polymorphic in the type of result it produces). 37 | * 38 | * There are two aspects to the result of a parser: 39 | * 1. success or failure 40 | * 1. the result. 41 | * 42 | * A [[scala.util.parsing.combinator.Parsers.Parser]] produces both kinds of information, 43 | * by returning a [[scala.util.parsing.combinator.Parsers.ParseResult]] when its `apply` 44 | * method is called on an input. 45 | * 46 | * The term ''parser combinator'' refers to the fact that these parsers 47 | * are constructed from primitive parsers and composition operators, such 48 | * as sequencing, alternation, optionality, repetition, lifting, and so on. For example, 49 | * given `p1` and `p2` of type [[scala.util.parsing.combinator.Parsers.Parser]]: 50 | * 51 | * {{{ 52 | * p1 ~ p2 // sequencing: must match p1 followed by p2 53 | * p1 | p2 // alternation: must match either p1 or p2, with preference given to p1 54 | * p1.? // optionality: may match p1 or not 55 | * p1.* // repetition: matches any number of repetitions of p1 56 | * }}} 57 | * 58 | * These combinators are provided as methods on [[scala.util.parsing.combinator.Parsers.Parser]], 59 | * or as methods taking one or more `Parsers` and returning a `Parser` provided in 60 | * this class. 61 | * 62 | * A ''primitive parser'' is a parser that accepts or rejects a single 63 | * piece of input, based on a certain criterion, such as whether the 64 | * input... 65 | * - is equal to some given object (see method `accept`), 66 | * - satisfies a certain predicate (see method `acceptIf`), 67 | * - is in the domain of a given partial function (see method `acceptMatch`) 68 | * - or other conditions, by using one of the other methods available, or subclassing `Parser` 69 | * 70 | * Even more primitive parsers always produce the same result, irrespective of the input. See 71 | * methods `success`, `err` and `failure` as examples. 72 | * 73 | * @see [[scala.util.parsing.combinator.RegexParsers]] and other known subclasses for practical examples. 74 | */ 75 | trait Parsers { 76 | /** the type of input elements the provided parsers consume (When consuming 77 | * invidual characters, a parser is typically called a ''scanner'', which 78 | * produces ''tokens'' that are consumed by what is normally called a ''parser''. 79 | * Nonetheless, the same principles apply, regardless of the input type.) */ 80 | type Elem 81 | 82 | /** The parser input is an abstract reader of input elements, i.e. the type 83 | * of input the parsers in this component expect. */ 84 | type Input = Reader[Elem] 85 | 86 | /** A base class for parser results. A result is either successful or not 87 | * (failure may be fatal, i.e., an Error, or not, i.e., a Failure). On 88 | * success, provides a result of type `T` which consists of some result 89 | * (and the rest of the input). */ 90 | sealed abstract class ParseResult[+T] { 91 | /** Functional composition of ParseResults. 92 | * 93 | * @param f the function to be lifted over this result 94 | * @return `f` applied to the result of this `ParseResult`, packaged up as a new `ParseResult` 95 | */ 96 | def map[U](f: T => U): ParseResult[U] 97 | 98 | /** Partial functional composition of ParseResults. 99 | * 100 | * @param f the partial function to be lifted over this result 101 | * @param error a function that takes the same argument as `f` and 102 | * produces an error message to explain why `f` wasn't applicable 103 | * (it is called when this is the case) 104 | * @return if `f` f is defined at the result in this `ParseResult`, `f` 105 | * applied to the result of this `ParseResult`, packaged up as 106 | * a new `ParseResult`. If `f` is not defined, `Failure`. 107 | */ 108 | def mapPartial[U](f: PartialFunction[T, U], error: T => String): ParseResult[U] 109 | 110 | def flatMapWithNext[U](f: T => Input => ParseResult[U]): ParseResult[U] 111 | 112 | def filterWithError(p: T => Boolean, error: T => String, position: Input): ParseResult[T] 113 | 114 | def append[U >: T](a: => ParseResult[U]): ParseResult[U] 115 | 116 | def isEmpty = !successful 117 | 118 | /** Returns the embedded result. */ 119 | def get: T 120 | 121 | def getOrElse[B >: T](default: => B): B = 122 | if (isEmpty) default else this.get 123 | 124 | val next: Input 125 | 126 | val successful: Boolean 127 | } 128 | 129 | /** The success case of `ParseResult`: contains the result and the remaining input. 130 | * 131 | * @param result The parser's output 132 | * @param next The parser's remaining input 133 | */ 134 | case class Success[+T](result: T, override val next: Input) extends ParseResult[T] { 135 | def lastFailure: Option[Failure] = None 136 | 137 | def map[U](f: T => U) = Success(f(result), next, lastFailure) 138 | 139 | def mapPartial[U](f: PartialFunction[T, U], error: T => String): ParseResult[U] = 140 | if(f.isDefinedAt(result)) Success(f(result), next, lastFailure) 141 | else Failure(error(result), next) 142 | 143 | def flatMapWithNext[U](f: T => Input => ParseResult[U]): ParseResult[U] = f(result)(next) match { 144 | case s @ Success(result, rest) => 145 | val failure = selectLastFailure(this.lastFailure, s.lastFailure) 146 | Success(result, rest, failure) 147 | case f: Failure => selectLastFailure(Some(f), lastFailure).get 148 | case e: Error => e 149 | } 150 | 151 | def filterWithError(p: T => Boolean, error: T => String, position: Input): ParseResult[T] = 152 | if (p(result)) this 153 | else Failure(error(result), position) 154 | 155 | def append[U >: T](a: => ParseResult[U]): ParseResult[U] = this 156 | 157 | def get: T = result 158 | 159 | /** The toString method of a Success. */ 160 | override def toString = s"[${next.pos}] parsed: $result" 161 | 162 | val successful = true 163 | } 164 | 165 | /** A common super-class for unsuccessful parse results. */ 166 | sealed abstract class NoSuccess(val msg: String, override val next: Input) extends ParseResult[Nothing] { // when we don't care about the difference between Failure and Error 167 | val successful = false 168 | 169 | def map[U](f: Nothing => U) = this 170 | def mapPartial[U](f: PartialFunction[Nothing, U], error: Nothing => String): ParseResult[U] = this 171 | 172 | def flatMapWithNext[U](f: Nothing => Input => ParseResult[U]): ParseResult[U] 173 | = this 174 | 175 | def filterWithError(p: Nothing => Boolean, error: Nothing => String, position: Input): ParseResult[Nothing] = this 176 | 177 | def get: Nothing = scala.sys.error("No result when parsing failed") 178 | } 179 | /** 180 | * An extractor so `case NoSuccess(msg, next)` can be used in matches. 181 | * 182 | * Note: On Scala 2.13, using this extractor leads to an exhaustivity warning: 183 | * 184 | * {{{ 185 | * def m(r: ParseResult[Int]) = r match { 186 | * case Success(i) => ... 187 | * case NoSuccess(msg, _) => ... // "warning: match may not be exhaustive" 188 | * }}} 189 | * 190 | * To eliminate this warning, use the irrefutable `NoSuccess.I` extractor. 191 | * Due to binary compatibility, `NoSuccess` itself cannot be changed. 192 | */ 193 | object NoSuccess { 194 | def unapply[T](x: ParseResult[T]) = x match { 195 | case Failure(msg, next) => Some((msg, next)) 196 | case Error(msg, next) => Some((msg, next)) 197 | case _ => None 198 | } 199 | 200 | /** An irrefutable version of the `NoSuccess` extractor, used as `case NoSuccess.I(msg, next)`. */ 201 | object I { 202 | def unapply(x: NoSuccess): Some[(String, Input)] = x match { 203 | case Failure(msg, next) => Some((msg, next)) 204 | case Error(msg, next) => Some((msg, next)) 205 | } 206 | } 207 | } 208 | 209 | /** The failure case of `ParseResult`: contains an error-message and the remaining input. 210 | * Parsing will back-track when a failure occurs. 211 | * 212 | * @param msg An error message string describing the failure. 213 | * @param next The parser's unconsumed input at the point where the failure occurred. 214 | */ 215 | case class Failure(override val msg: String, override val next: Input) extends NoSuccess(msg, next) { 216 | /** The toString method of a Failure yields an error message. */ 217 | override def toString = s"[${next.pos}] failure: $msg\n\n${next.pos.longString}" 218 | 219 | def append[U >: Nothing](a: => ParseResult[U]): ParseResult[U] = { 220 | val alt = a 221 | 222 | alt match { 223 | case s @ Success(result, rest) => 224 | val failure = selectLastFailure(Some(this), s.lastFailure) 225 | Success(result, rest, failure) 226 | case _: NoSuccess => if (alt.next.pos < next.pos) this else alt 227 | } 228 | } 229 | } 230 | 231 | /** The fatal failure case of ParseResult: contains an error-message and 232 | * the remaining input. 233 | * No back-tracking is done when a parser returns an `Error`. 234 | * 235 | * @param msg An error message string describing the error. 236 | * @param next The parser's unconsumed input at the point where the error occurred. 237 | */ 238 | case class Error(override val msg: String, override val next: Input) extends NoSuccess(msg, next) { 239 | /** The toString method of an Error yields an error message. */ 240 | override def toString = s"[${next.pos}] error: $msg\n\n${next.pos.longString}" 241 | def append[U >: Nothing](a: => ParseResult[U]): ParseResult[U] = this 242 | } 243 | 244 | def Parser[T](f: Input => ParseResult[T]): Parser[T] 245 | = new Parser[T]{ def apply(in: Input) = f(in) } 246 | 247 | private[combinator] def Success[U](res: U, next: Input, failure: Option[Failure]): ParseResult[U] = 248 | new Success(res, next) { override val lastFailure: Option[Failure] = failure } 249 | 250 | private[combinator] def selectLastFailure(failure0: Option[Failure], failure1: Option[Failure]): Option[Failure] = 251 | (failure0, failure1) match { 252 | case (Some(f0), Some(f1)) => 253 | if(f0.next.pos < f1.next.pos) Some(f1) 254 | else Some(f0) 255 | case (Some(f0), _) => Some(f0) 256 | case (_, Some(f1)) => Some(f1) 257 | case _ => None 258 | } 259 | 260 | def OnceParser[T](f: Input => ParseResult[T]): Parser[T] with OnceParser[T] 261 | = new Parser[T] with OnceParser[T] { def apply(in: Input) = f(in) } 262 | 263 | /** The root class of parsers. 264 | * Parsers are functions from the Input type to ParseResult. 265 | */ 266 | abstract class Parser[+T] extends (Input => ParseResult[T]) { 267 | private var name: String = "" 268 | def named(n: String): this.type = {name=n; this} 269 | override def toString = s"Parser ($name)" 270 | 271 | /** An unspecified method that defines the behaviour of this parser. */ 272 | def apply(in: Input): ParseResult[T] 273 | 274 | def flatMap[U](f: T => Parser[U]): Parser[U] 275 | = Parser{ in => this(in) flatMapWithNext(f)} 276 | 277 | def map[U](f: T => U): Parser[U] //= flatMap{x => success(f(x))} 278 | = Parser{ in => this(in) map(f)} 279 | 280 | def filter(p: T => Boolean): Parser[T] 281 | = withFilter(p) 282 | 283 | def withFilter(p: T => Boolean): Parser[T] 284 | = Parser{ in => this(in).filterWithError(p, "Input doesn't match filter: "+_, in)} 285 | 286 | // no filter yet, dealing with zero is tricky! 287 | 288 | def append[U >: T](p0: => Parser[U]): Parser[U] = { lazy val p = p0 // lazy argument 289 | Parser{ in => this(in) append p(in)} 290 | } 291 | 292 | // the operator formerly known as +++, ++, &, but now, behold the venerable ~ 293 | // it's short, light (looks like whitespace), has few overloaded meaning (thanks to the recent change from ~ to unary_~) 294 | // and we love it! (or do we like `,` better?) 295 | 296 | /** A parser combinator for sequential composition. 297 | * 298 | * `p ~ q` succeeds if `p` succeeds and `q` succeeds on the input left over by `p`. 299 | * 300 | * @param q a parser that will be executed after `p` (this parser) 301 | * succeeds -- evaluated at most once, and only when necessary. 302 | * @return a `Parser` that -- on success -- returns a `~` (like a `Pair`, 303 | * but easier to pattern match on) that contains the result of `p` and 304 | * that of `q`. The resulting parser fails if either `p` or `q` fails. 305 | */ 306 | def ~ [U](q: => Parser[U]): Parser[~[T, U]] = { lazy val p = q // lazy argument 307 | (for(a <- this; b <- p) yield new ~(a,b)).named("~") 308 | } 309 | 310 | /** A parser combinator for sequential composition which keeps only the right result. 311 | * 312 | * `p ~> q` succeeds if `p` succeeds and `q` succeeds on the input left over by `p`. 313 | * 314 | * @param q a parser that will be executed after `p` (this parser) 315 | * succeeds -- evaluated at most once, and only when necessary. 316 | * @return a `Parser` that -- on success -- returns the result of `q`. 317 | */ 318 | def ~> [U](q: => Parser[U]): Parser[U] = { lazy val p = q // lazy argument 319 | (for(_ <- this; b <- p) yield b).named("~>") 320 | } 321 | 322 | /** A parser combinator for sequential composition which keeps only the left result. 323 | * 324 | * `p <~ q` succeeds if `p` succeeds and `q` succeeds on the input 325 | * left over by `p`. 326 | * 327 | * @note <~ has lower operator precedence than ~ or ~>. 328 | * 329 | * @param q a parser that will be executed after `p` (this parser) succeeds -- evaluated at most once, and only when necessary 330 | * @return a `Parser` that -- on success -- returns the result of `p`. 331 | */ 332 | def <~ [U](q: => Parser[U]): Parser[T] = { lazy val p = q // lazy argument 333 | (for(a <- this; _ <- p) yield a).named("<~") 334 | } 335 | 336 | /** 337 | * A parser combinator for exceptions. 338 | * 339 | * `p - q` succeeds if `p` succeeds, and `q` fails on the same input given `p`. 340 | * 341 | * @param q a parser that will be executed before `p` (this parser). q will not consume the input. 342 | * @return a `Parser` that returns the result of `p` (this parser) if it succeeds and q fails. If q succeeds, the parser will fail. 343 | */ 344 | def - [U](q: Parser[U]): Parser[T] = (not(q) ~> this).named("-") 345 | 346 | /* not really useful: V cannot be inferred because Parser is covariant in first type parameter (V is always trivially Nothing) 347 | def ~~ [U, V](q: => Parser[U])(implicit combine: (T, U) => V): Parser[V] = new Parser[V] { 348 | def apply(in: Input) = seq(Parser.this, q)((x, y) => combine(x,y))(in) 349 | } */ 350 | 351 | /** A parser combinator for non-back-tracking sequential composition. 352 | * 353 | * `p ~! q` succeeds if `p` succeeds and `q` succeeds on the input left over by `p`. 354 | * In case of failure, no back-tracking is performed (in an earlier parser produced by the `|` combinator). 355 | * 356 | * @param p a parser that will be executed after `p` (this parser) succeeds 357 | * @return a `Parser` that -- on success -- returns a `~` (like a Pair, but easier to pattern match on) 358 | * that contains the result of `p` and that of `q`. 359 | * The resulting parser fails if either `p` or `q` fails, this failure is fatal. 360 | */ 361 | def ~! [U](p: => Parser[U]): Parser[~[T, U]] 362 | = OnceParser{ (for(a <- this; b <- commit(p)) yield new ~(a,b)).named("~!") } 363 | 364 | 365 | /** A parser combinator for non-back-tracking sequential composition which only keeps the right result. 366 | * 367 | * `p ~>! q` succeeds if `p` succeeds and `q` succeeds on the input left over by `p`. 368 | * In case of failure, no back-tracking is performed (in an earlier parser produced by the `|` combinator). 369 | * 370 | * @param q a parser that will be executed after `p` (this parser) succeeds -- evaluated at most once, and only when necessary 371 | * @return a `Parser` that -- on success -- reutrns the result of `q`. 372 | * The resulting parser fails if either `p` or `q` fails, this failure is fatal. 373 | */ 374 | def ~>! [U](q: => Parser[U]): Parser[U] = { lazy val p = q // lazy argument 375 | OnceParser { (for(_ <- this; b <- commit(p)) yield b).named("~>!") } 376 | } 377 | 378 | /** A parser combinator for non-back-tracking sequential composition which only keeps the left result. 379 | * 380 | * `p <~! q` succeeds if `p` succeeds and `q` succeeds on the input left over by `p`. 381 | * In case of failure, no back-tracking is performed (in an earlier parser produced by the `|` combinator). 382 | * 383 | * @param q a parser that will be executed after `p` (this parser) succeeds -- evaluated at most once, and only when necessary 384 | * @return a `Parser` that -- on success -- reutrns the result of `p`. 385 | * The resulting parser fails if either `p` or `q` fails, this failure is fatal. 386 | */ 387 | def <~! [U](q: => Parser[U]): Parser[T] = { lazy val p = q // lazy argument 388 | OnceParser { (for(a <- this; _ <- commit(p)) yield a).named("<~!") } 389 | } 390 | 391 | 392 | /** A parser combinator for alternative composition. 393 | * 394 | * `p | q` succeeds if `p` succeeds or `q` succeeds. 395 | * Note that `q` is only tried if `p`s failure is non-fatal (i.e., back-tracking is allowed). 396 | * 397 | * @param q a parser that will be executed if `p` (this parser) fails (and allows back-tracking) 398 | * @return a `Parser` that returns the result of the first parser to succeed (out of `p` and `q`) 399 | * The resulting parser succeeds if (and only if) 400 | * - `p` succeeds, ''or'' 401 | * - if `p` fails allowing back-tracking and `q` succeeds. 402 | */ 403 | def | [U >: T](q: => Parser[U]): Parser[U] = append(q).named("|") 404 | 405 | /** A parser combinator for alternative with longest match composition. 406 | * 407 | * `p ||| q` succeeds if `p` succeeds or `q` succeeds. 408 | * If `p` and `q` both succeed, the parser that consumed the most characters accepts. 409 | * 410 | * @param q0 a parser that accepts if p consumes less characters. -- evaluated at most once, and only when necessary 411 | * @return a `Parser` that returns the result of the parser consuming the most characters (out of `p` and `q`). 412 | */ 413 | def ||| [U >: T](q0: => Parser[U]): Parser[U] = new Parser[U] { 414 | lazy val q = q0 // lazy argument 415 | def apply(in: Input) = { 416 | val res1 = Parser.this(in) 417 | val res2 = q(in) 418 | 419 | // compiler thinks match isn't exhaustive; perhaps it's right, but does that mean there's a bug here? 420 | // that's not clear to me, so for now let's just `@unchecked` it 421 | ((res1, res2): @unchecked) match { 422 | case (s1 @ Success(_, next1), s2 @ Success(_, next2)) => if (next2.pos < next1.pos || next2.pos == next1.pos) s1 else s2 423 | case (s1 @ Success(_, _), _) => s1 424 | case (_, s2 @ Success(_, _)) => s2 425 | case (e1 @ Error(_, _), _) => e1 426 | case (f1 @ Failure(_, next1), ns2 @ NoSuccess(_, next2)) => if (next2.pos < next1.pos || next2.pos == next1.pos) f1 else ns2 427 | } 428 | } 429 | override def toString = "|||" 430 | } 431 | 432 | /** A parser combinator for function application. 433 | * 434 | * `p ^^ f` succeeds if `p` succeeds; it returns `f` applied to the result of `p`. 435 | * 436 | * @param f a function that will be applied to this parser's result (see `map` in `ParseResult`). 437 | * @return a parser that has the same behaviour as the current parser, but whose result is 438 | * transformed by `f`. 439 | */ 440 | def ^^ [U](f: T => U): Parser[U] = map(f).named(toString+"^^") 441 | 442 | /** A parser combinator that changes a successful result into the specified value. 443 | * 444 | * `p ^^^ v` succeeds if `p` succeeds; discards its result, and returns `v` instead. 445 | * 446 | * @param v The new result for the parser, evaluated at most once (if `p` succeeds), not evaluated at all if `p` fails. 447 | * @return a parser that has the same behaviour as the current parser, but whose successful result is `v` 448 | */ 449 | def ^^^ [U](v: => U): Parser[U] = new Parser[U] { 450 | lazy val v0 = v // lazy argument 451 | def apply(in: Input) = Parser.this(in) map (_ => v0) 452 | }.named(toString+"^^^") 453 | 454 | /** A parser combinator for partial function application. 455 | * 456 | * `p ^? (f, error)` succeeds if `p` succeeds AND `f` is defined at the result of `p`; 457 | * in that case, it returns `f` applied to the result of `p`. If `f` is not applicable, 458 | * error(the result of `p`) should explain why. 459 | * 460 | * @param f a partial function that will be applied to this parser's result 461 | * (see `mapPartial` in `ParseResult`). 462 | * @param error a function that takes the same argument as `f` and produces an error message 463 | * to explain why `f` wasn't applicable 464 | * @return a parser that succeeds if the current parser succeeds and `f` is applicable 465 | * to the result. If so, the result will be transformed by `f`. 466 | */ 467 | def ^? [U](f: PartialFunction[T, U], error: T => String): Parser[U] = Parser{ in => 468 | this(in).mapPartial(f, error)}.named(toString+"^?") 469 | 470 | /** A parser combinator for partial function application. 471 | * 472 | * `p ^? f` succeeds if `p` succeeds AND `f` is defined at the result of `p`; 473 | * in that case, it returns `f` applied to the result of `p`. 474 | * 475 | * @param f a partial function that will be applied to this parser's result 476 | * (see `mapPartial` in `ParseResult`). 477 | * @return a parser that succeeds if the current parser succeeds and `f` is applicable 478 | * to the result. If so, the result will be transformed by `f`. 479 | */ 480 | def ^? [U](f: PartialFunction[T, U]): Parser[U] = ^?(f, r => "Constructor function not defined at "+r) 481 | 482 | /** A parser combinator that parameterizes a subsequent parser with the 483 | * result of this one. 484 | * 485 | * Use this combinator when a parser depends on the result of a previous 486 | * parser. `p` should be a function that takes the result from the first 487 | * parser and returns the second parser. 488 | * 489 | * `p into fq` (with `fq` typically `{x => q}`) first applies `p`, and 490 | * then, if `p` successfully returned result `r`, applies `fq(r)` to the 491 | * rest of the input. 492 | * 493 | * ''From: G. Hutton. Higher-order functions for parsing. J. Funct. Program., 2(3):323--343, 1992.'' 494 | * 495 | * @example {{{ 496 | * def perlRE = "m" ~> (".".r into (separator => """[^%s]*""".format(separator).r <~ separator)) 497 | * }}} 498 | * 499 | * @param fq a function that, given the result from this parser, returns 500 | * the second parser to be applied 501 | * @return a parser that succeeds if this parser succeeds (with result `x`) 502 | * and if then `fq(x)` succeeds 503 | */ 504 | def into[U](fq: T => Parser[U]): Parser[U] = flatMap(fq) 505 | 506 | // shortcuts for combinators: 507 | 508 | /** Returns `into(fq)`. */ 509 | def >>[U](fq: T => Parser[U])=into(fq) 510 | 511 | /** Returns a parser that repeatedly parses what this parser parses. 512 | * 513 | * @return rep(this) 514 | */ 515 | def * = rep(this) 516 | 517 | /** Returns a parser that repeatedly parses what this parser parses, 518 | * interleaved with the `sep` parser. The `sep` parser specifies how 519 | * the results parsed by this parser should be combined. 520 | * 521 | * @return chainl1(this, sep) 522 | */ 523 | def *[U >: T](sep: => Parser[(U, U) => U]) = chainl1(this, sep) 524 | 525 | // TODO: improve precedence? a ~ b*(",") = a ~ (b*(",")) should be true 526 | 527 | /** Returns a parser that repeatedly (at least once) parses what this parser parses. 528 | * 529 | * @return rep1(this) 530 | */ 531 | def + = rep1(this) 532 | 533 | /** Returns a parser that optionally parses what this parser parses. 534 | * 535 | * @return opt(this) 536 | */ 537 | def ? = opt(this) 538 | 539 | /** Changes the failure message produced by a parser. 540 | * 541 | * This doesn't change the behavior of a parser on neither 542 | * success nor error, just on failure. The semantics are 543 | * slightly different than those obtained by doing `| failure(msg)`, 544 | * in that the message produced by this method will always 545 | * replace the message produced, which is not guaranteed 546 | * by that idiom. 547 | * 548 | * For example, parser `p` below will always produce the 549 | * designated failure message, while `q` will not produce 550 | * it if `sign` is parsed but `number` is not. 551 | * 552 | * {{{ 553 | * def p = sign.? ~ number withFailureMessage "Number expected!" 554 | * def q = sign.? ~ number | failure("Number expected!") 555 | * }}} 556 | * 557 | * @param msg The message that will replace the default failure message. 558 | * @return A parser with the same properties and different failure message. 559 | */ 560 | def withFailureMessage(msg: String) = Parser{ in => 561 | this(in) match { 562 | case Failure(_, next) => Failure(msg, next) 563 | case other => other 564 | } 565 | } 566 | 567 | /** Changes the error message produced by a parser. 568 | * 569 | * This doesn't change the behavior of a parser on neither 570 | * success nor failure, just on error. The semantics are 571 | * slightly different than those obtained by doing `| error(msg)`, 572 | * in that the message produced by this method will always 573 | * replace the message produced, which is not guaranteed 574 | * by that idiom. 575 | * 576 | * For example, parser `p` below will always produce the 577 | * designated error message, while `q` will not produce 578 | * it if `sign` is parsed but `number` is not. 579 | * 580 | * {{{ 581 | * def p = sign.? ~ number withErrorMessage "Number expected!" 582 | * def q = sign.? ~ number | error("Number expected!") 583 | * }}} 584 | * 585 | * @param msg The message that will replace the default error message. 586 | * @return A parser with the same properties and different error message. 587 | */ 588 | def withErrorMessage(msg: String) = Parser{ in => 589 | this(in) match { 590 | case Error(_, next) => Error(msg, next) 591 | case other => other 592 | } 593 | } 594 | } 595 | 596 | /** Wrap a parser so that its failures become errors (the `|` combinator 597 | * will give up as soon as it encounters an error, on failure it simply 598 | * tries the next alternative). 599 | */ 600 | def commit[T](p: => Parser[T]) = Parser{ in => 601 | p(in) match{ 602 | case s @ Success(_, _) => s 603 | case e @ Error(_, _) => e 604 | case Failure(msg, next) => Error(msg, next) 605 | } 606 | } 607 | 608 | /** A parser matching input elements that satisfy a given predicate. 609 | * 610 | * `elem(kind, p)` succeeds if the input starts with an element `e` for which `p(e)` is true. 611 | * 612 | * @param kind The element kind, used for error messages 613 | * @param p A predicate that determines which elements match. 614 | * @return 615 | */ 616 | def elem(kind: String, p: Elem => Boolean) = acceptIf(p)(_ => kind + " expected") 617 | 618 | /** A parser that matches only the given element `e`. 619 | * 620 | * `elem(e)` succeeds if the input starts with an element `e`. 621 | * 622 | * @param e the `Elem` that must be the next piece of input for the returned parser to succeed 623 | * @return a `Parser` that succeeds if `e` is the next available input (and returns it). 624 | */ 625 | def elem(e: Elem): Parser[Elem] = accept(e) 626 | 627 | /** A parser that matches only the given element `e`. 628 | * 629 | * The method is implicit so that elements can automatically be lifted to their parsers. 630 | * For example, when parsing `Token`s, `Identifier("new")` (which is a `Token`) can be used directly, 631 | * instead of first creating a `Parser` using `accept(Identifier("new"))`. 632 | * 633 | * @param e the `Elem` that must be the next piece of input for the returned parser to succeed 634 | * @return a `tParser` that succeeds if `e` is the next available input. 635 | */ 636 | 637 | implicit def accept(e: Elem): Parser[Elem] = acceptIf(_ == e)("'"+e+"' expected but " + _ + " found") 638 | 639 | /** A parser that matches only the given list of element `es`. 640 | * 641 | * `accept(es)` succeeds if the input subsequently provides the elements in the list `es`. 642 | * 643 | * @param es the list of expected elements 644 | * @return a Parser that recognizes a specified list of elements 645 | */ 646 | def accept[ES](es: ES)(implicit f: ES => List[Elem]): Parser[List[Elem]] = acceptSeq(es) 647 | 648 | /** The parser that matches an element in the domain of the partial function `f`. 649 | * 650 | * If `f` is defined on the first element in the input, `f` is applied 651 | * to it to produce this parser's result. 652 | * 653 | * Example: The parser `accept("name", {case Identifier(n) => Name(n)})` 654 | * accepts an `Identifier(n)` and returns a `Name(n)` 655 | * 656 | * @param expected a description of the kind of element this parser expects (for error messages) 657 | * @param f a partial function that determines when this parser is successful and what its output is 658 | * @return A parser that succeeds if `f` is applicable to the first element of the input, 659 | * applying `f` to it to produce the result. 660 | */ 661 | def accept[U](expected: String, f: PartialFunction[Elem, U]): Parser[U] = acceptMatch(expected, f) 662 | 663 | /** A parser matching input elements that satisfy a given predicate. 664 | * 665 | * `acceptIf(p)(el => "Unexpected "+el)` succeeds if the input starts with an element `e` for which `p(e)` is true. 666 | * 667 | * @param err A function from the received element into an error message. 668 | * @param p A predicate that determines which elements match. 669 | * @return A parser for elements satisfying p(e). 670 | */ 671 | def acceptIf(p: Elem => Boolean)(err: Elem => String): Parser[Elem] = Parser { in => 672 | if (in.atEnd) Failure("end of input", in) 673 | else if (p(in.first)) Success(in.first, in.rest, None) 674 | else Failure(err(in.first), in) 675 | } 676 | 677 | /** The parser that matches an element in the domain of the partial function `f`. 678 | * 679 | * If `f` is defined on the first element in the input, `f` is applied 680 | * to it to produce this parser's result. 681 | * 682 | * Example: The parser `acceptMatch("name", {case Identifier(n) => Name(n)})` 683 | * accepts an `Identifier(n)` and returns a `Name(n)` 684 | * 685 | * @param expected a description of the kind of element this parser expects (for error messages) 686 | * @param f a partial function that determines when this parser is successful and what its output is 687 | * @return A parser that succeeds if `f` is applicable to the first element of the input, 688 | * applying `f` to it to produce the result. 689 | */ 690 | def acceptMatch[U](expected: String, f: PartialFunction[Elem, U]): Parser[U] = Parser{ in => 691 | if (in.atEnd) Failure("end of input", in) 692 | else if (f.isDefinedAt(in.first)) Success(f(in.first), in.rest, None) 693 | else Failure(expected+" expected", in) 694 | } 695 | 696 | /** A parser that matches only the given [[scala.collection.Iterable]] collection of elements `es`. 697 | * 698 | * `acceptSeq(es)` succeeds if the input subsequently provides the elements in the iterable `es`. 699 | * 700 | * @param es the list of expected elements 701 | * @return a Parser that recognizes a specified list of elements 702 | */ 703 | def acceptSeq[ES](es: ES)(implicit f: ES => Iterable[Elem]): Parser[List[Elem]] = { 704 | f(es) // explicit conversion for dotty 705 | .foldRight[Parser[List[Elem]]](success(Nil)){(x, pxs) => accept(x) ~ pxs ^^ mkList} 706 | } 707 | 708 | /** A parser that always fails. 709 | * 710 | * @param msg The error message describing the failure. 711 | * @return A parser that always fails with the specified error message. 712 | */ 713 | def failure(msg: String) = Parser{ in => Failure(msg, in) } 714 | 715 | /** A parser that results in an error. 716 | * 717 | * @param msg The error message describing the failure. 718 | * @return A parser that always fails with the specified error message. 719 | */ 720 | def err(msg: String) = Parser{ in => Error(msg, in) } 721 | 722 | /** A parser that always succeeds. 723 | * 724 | * @param v The result for the parser 725 | * @return A parser that always succeeds, with the given result `v` 726 | */ 727 | def success[T](v: T) = Parser{ in => Success(v, in, None) } 728 | 729 | /** A helper method that turns a `Parser` into one that will 730 | * print debugging information to stdout before and after 731 | * being applied. 732 | */ 733 | def log[T](p: => Parser[T])(name: String): Parser[T] = Parser{ in => 734 | println("trying "+ name +" at "+ in) 735 | val r = p(in) 736 | println(name +" --> "+ r) 737 | r 738 | } 739 | 740 | /** A parser generator for repetitions. 741 | * 742 | * `rep(p)` repeatedly uses `p` to parse the input until `p` fails 743 | * (the result is a List of the consecutive results of `p`). 744 | * 745 | * @param p a `Parser` that is to be applied successively to the input 746 | * @return A parser that returns a list of results produced by repeatedly applying `p` to the input. 747 | */ 748 | def rep[T](p: => Parser[T]): Parser[List[T]] = rep1(p) | success(List()) 749 | 750 | /** A parser generator for interleaved repetitions. 751 | * 752 | * `repsep(p, q)` repeatedly uses `p` interleaved with `q` to parse the input, until `p` fails. 753 | * (The result is a `List` of the results of `p`.) 754 | * 755 | * Example: `repsep(term, ",")` parses a comma-separated list of term's, yielding a list of these terms. 756 | * 757 | * @param p a `Parser` that is to be applied successively to the input 758 | * @param q a `Parser` that parses the elements that separate the elements parsed by `p` 759 | * @return A parser that returns a list of results produced by repeatedly applying `p` (interleaved with `q`) to the input. 760 | * The results of `p` are collected in a list. The results of `q` are discarded. 761 | */ 762 | def repsep[T](p: => Parser[T], q: => Parser[Any]): Parser[List[T]] = 763 | rep1sep(p, q) | success(List()) 764 | 765 | /** A parser generator for non-empty repetitions. 766 | * 767 | * `rep1(p)` repeatedly uses `p` to parse the input until `p` fails -- `p` must succeed at least 768 | * once (the result is a `List` of the consecutive results of `p`) 769 | * 770 | * @param p a `Parser` that is to be applied successively to the input 771 | * @return A parser that returns a list of results produced by repeatedly applying `p` to the input 772 | * (and that only succeeds if `p` matches at least once). 773 | */ 774 | def rep1[T](p: => Parser[T]): Parser[List[T]] = rep1(p, p) 775 | 776 | /** A parser generator for non-empty repetitions. 777 | * 778 | * `rep1(f, p)` first uses `f` (which must succeed) and then repeatedly 779 | * uses `p` to parse the input until `p` fails 780 | * (the result is a `List` of the consecutive results of `f` and `p`) 781 | * 782 | * @param first a `Parser` that parses the first piece of input 783 | * @param p0 a `Parser` that is to be applied successively to the rest of the input (if any) -- evaluated at most once, and only when necessary 784 | * @return A parser that returns a list of results produced by first applying `f` and then 785 | * repeatedly `p` to the input (it only succeeds if `f` matches). 786 | */ 787 | def rep1[T](first: => Parser[T], p0: => Parser[T]): Parser[List[T]] = Parser { in => 788 | lazy val p = p0 // lazy argument 789 | val elems = new ListBuffer[T] 790 | 791 | def continue(in: Input, failure: Option[Failure]): ParseResult[List[T]] = { 792 | val p0 = p // avoid repeatedly re-evaluating by-name parser 793 | @tailrec def applyp(in0: Input, failure: Option[Failure]): ParseResult[List[T]] = p0(in0) match { 794 | case s @ Success(x, rest) => 795 | val selectedFailure = selectLastFailure(s.lastFailure, failure) 796 | elems += x 797 | applyp(rest, selectedFailure) 798 | case e @ Error(_, _) => e // still have to propagate error 799 | case f: Failure => 800 | val selectedFailure = selectLastFailure(failure, Some(f)) 801 | Success(elems.toList, in0, selectedFailure) 802 | } 803 | 804 | applyp(in, failure) 805 | } 806 | 807 | first(in) match { 808 | case s @ Success(x, rest) => elems += x ; continue(rest, s.lastFailure) 809 | case ns: NoSuccess => ns 810 | } 811 | } 812 | 813 | /** A parser generator for a specified number of repetitions. 814 | * 815 | * `repN(n, p)` uses `p` exactly `n` time to parse the input 816 | * (the result is a `List` of the `n` consecutive results of `p`). 817 | * 818 | * @param p a `Parser` that is to be applied successively to the input 819 | * @param num the exact number of times `p` must succeed 820 | * @return A parser that returns a list of results produced by repeatedly applying `p` to the input 821 | * (and that only succeeds if `p` matches exactly `n` times). 822 | */ 823 | def repN[T](num: Int, p: => Parser[T]): Parser[List[T]] = 824 | if (num == 0) success(Nil) else Parser { in => 825 | val elems = new ListBuffer[T] 826 | val p0 = p // avoid repeatedly re-evaluating by-name parser 827 | 828 | @tailrec def applyp(in0: Input, failure: Option[Failure]): ParseResult[List[T]] = 829 | if (elems.length == num) Success(elems.toList, in0, failure) 830 | else p0(in0) match { 831 | case s @ Success(x, rest) => elems += x ; applyp(rest, s.lastFailure) 832 | case ns: NoSuccess => ns 833 | } 834 | 835 | applyp(in, None) 836 | } 837 | 838 | /** A parser generator for a specified range of repetitions interleaved by a 839 | * separator. 840 | * 841 | * `repNM(n, m, p, s)` uses `p` at least `n` times and up to `m` times, interleaved 842 | * with separator `s`, to parse the input 843 | * (the result is a `List` of at least `n` consecutive results of `p` and up to `m` results). 844 | * 845 | * @param n minimum number of repetitions 846 | * @param m maximum number of repetitions 847 | * @param p a `Parser` that is to be applied successively to the input 848 | * @param sep a `Parser` that interleaves with p 849 | * @return A parser that returns a list of results produced by repeatedly applying `p` interleaved 850 | * with `sep` to the input. The list has a size between `n` and up to `m` 851 | * (and that only succeeds if `p` matches at least `n` times). 852 | */ 853 | def repNM[T](n: Int, m: Int, p: Parser[T], sep: Parser[Any] = success(())): Parser[List[T]] = Parser { in => 854 | val mandatory = if (n == 0) success(Nil) else (p ~ repN(n - 1, sep ~> p)).map { case head ~ tail => head :: tail } 855 | val elems = new ListBuffer[T] 856 | 857 | def continue(in: Input): ParseResult[List[T]] = { 858 | val p0 = sep ~> p // avoid repeatedly re-evaluating by-name parser 859 | @tailrec def applyp(in0: Input): ParseResult[List[T]] = p0(in0) match { 860 | case Success(x, rest) => elems += x; if (elems.length == m) Success(elems.toList, rest, None) else applyp(rest) 861 | case e @ Error(_, _) => e // still have to propagate error 862 | case _ => Success(elems.toList, in0, None) 863 | } 864 | 865 | applyp(in) 866 | } 867 | 868 | mandatory(in) match { 869 | case Success(x, rest) => elems ++= x; continue(rest) 870 | case ns: NoSuccess => ns 871 | } 872 | } 873 | 874 | /** A parser generator for non-empty repetitions. 875 | * 876 | * `rep1sep(p, q)` repeatedly applies `p` interleaved with `q` to parse the 877 | * input, until `p` fails. The parser `p` must succeed at least once. 878 | * 879 | * @param p a `Parser` that is to be applied successively to the input 880 | * @param q a `Parser` that parses the elements that separate the elements parsed by `p` 881 | * (interleaved with `q`) 882 | * @return A parser that returns a list of results produced by repeatedly applying `p` to the input 883 | * (and that only succeeds if `p` matches at least once). 884 | * The results of `p` are collected in a list. The results of `q` are discarded. 885 | */ 886 | def rep1sep[T](p : => Parser[T], q : => Parser[Any]): Parser[List[T]] = 887 | p ~ rep(q ~> p) ^^ {case x~y => x::y} 888 | 889 | /** A parser generator that, roughly, generalises the rep1sep generator so 890 | * that `q`, which parses the separator, produces a left-associative 891 | * function that combines the elements it separates. 892 | * 893 | * ''From: J. Fokker. Functional parsers. In J. Jeuring and E. Meijer, editors, Advanced Functional Programming, 894 | * volume 925 of Lecture Notes in Computer Science, pages 1--23. Springer, 1995.'' 895 | * 896 | * @param p a parser that parses the elements 897 | * @param q a parser that parses the token(s) separating the elements, yielding a left-associative function that 898 | * combines two elements into one 899 | */ 900 | def chainl1[T](p: => Parser[T], q: => Parser[(T, T) => T]): Parser[T] 901 | = chainl1(p, p, q) 902 | 903 | /** A parser generator that, roughly, generalises the `rep1sep` generator 904 | * so that `q`, which parses the separator, produces a left-associative 905 | * function that combines the elements it separates. 906 | * 907 | * @param first a parser that parses the first element 908 | * @param p a parser that parses the subsequent elements 909 | * @param q a parser that parses the token(s) separating the elements, 910 | * yielding a left-associative function that combines two elements 911 | * into one 912 | */ 913 | def chainl1[T, U](first: => Parser[T], p: => Parser[U], q: => Parser[(T, U) => T]): Parser[T] 914 | = first ~ rep(q ~ p) ^^ { 915 | case x ~ xs => xs.foldLeft(x: T){case (a, f ~ b) => f(a, b)} // x's type annotation is needed to deal with changed type inference due to SI-5189 916 | } 917 | 918 | /** A parser generator that generalises the `rep1sep` generator so that `q`, 919 | * which parses the separator, produces a right-associative function that 920 | * combines the elements it separates. Additionally, the right-most (last) 921 | * element and the left-most combining function have to be supplied. 922 | * 923 | * rep1sep(p: Parser[T], q) corresponds to chainr1(p, q ^^ cons, cons, Nil) (where val cons = (x: T, y: List[T]) => x :: y) 924 | * 925 | * @param p a parser that parses the elements 926 | * @param q a parser that parses the token(s) separating the elements, yielding a right-associative function that 927 | * combines two elements into one 928 | * @param combine the "last" (left-most) combination function to be applied 929 | * @param first the "first" (right-most) element to be combined 930 | */ 931 | def chainr1[T, U](p: => Parser[T], q: => Parser[(T, U) => U], combine: (T, U) => U, first: U): Parser[U] 932 | = p ~ rep(q ~ p) ^^ { 933 | case x ~ xs => (new ~(combine, x) :: xs).foldRight(first){case (f ~ a, b) => f(a, b)} 934 | } 935 | 936 | /** A parser generator for optional sub-phrases. 937 | * 938 | * `opt(p)` is a parser that returns `Some(x)` if `p` returns `x` and `None` if `p` fails. 939 | * 940 | * @param p A `Parser` that is tried on the input 941 | * @return a `Parser` that always succeeds: either with the result provided by `p` or 942 | * with the empty result 943 | */ 944 | def opt[T](p: => Parser[T]): Parser[Option[T]] = 945 | p ^^ (x => Some(x)) | success(None) 946 | 947 | /** Wrap a parser so that its failures and errors become success and 948 | * vice versa -- it never consumes any input. 949 | */ 950 | def not[T](p: => Parser[T]): Parser[Unit] = Parser { in => 951 | p(in) match { 952 | case Success(_, _) => Failure("Expected failure", in) 953 | case _ => Success((), in, None) 954 | } 955 | } 956 | 957 | /** A parser generator for guard expressions. The resulting parser will 958 | * fail or succeed just like the one given as parameter but it will not 959 | * consume any input. 960 | * 961 | * @param p a `Parser` that is to be applied to the input 962 | * @return A parser that returns success if and only if `p` succeeds but 963 | * never consumes any input 964 | */ 965 | def guard[T](p: => Parser[T]): Parser[T] = Parser { in => 966 | p(in) match{ 967 | case s@ Success(s1,_) => Success(s1, in, s.lastFailure) 968 | case e => e 969 | } 970 | } 971 | 972 | /** `positioned` decorates a parser's result with the start position of the 973 | * input it consumed. 974 | * 975 | * @param p a `Parser` whose result conforms to `Positional`. 976 | * @return A parser that has the same behaviour as `p`, but which marks its 977 | * result with the start position of the input it consumed, 978 | * if it didn't already have a position. 979 | */ 980 | def positioned[T <: Positional](p: => Parser[T]): Parser[T] = Parser { in => 981 | p(in) match { 982 | case s @ Success(t, in1) => Success(if (t.pos == NoPosition) t setPos in.pos else t, in1, s.lastFailure) 983 | case ns: NoSuccess => ns 984 | } 985 | } 986 | 987 | /** A parser generator delimiting whole phrases (i.e. programs). 988 | * 989 | * `phrase(p)` succeeds if `p` succeeds and no input is left over after `p`. 990 | * 991 | * @param p the parser that must consume all input for the resulting parser 992 | * to succeed. 993 | * @return a parser that has the same result as `p`, but that only succeeds 994 | * if `p` consumed all the input. 995 | */ 996 | def phrase[T](p: Parser[T]) = new Parser[T] { 997 | def apply(in: Input) = p(in) match { 998 | case s @ Success(_, in1) => 999 | if (in1.atEnd) s 1000 | else s.lastFailure match { 1001 | case Some(failure) => failure 1002 | case _ => Failure("end of input expected", in1) 1003 | } 1004 | case ns => ns 1005 | } 1006 | } 1007 | 1008 | /** Given a concatenation with a repetition (list), move the concatenated element into the list */ 1009 | def mkList[T] = (_: ~[T, List[T]]) match { case x ~ xs => x :: xs } 1010 | 1011 | /** A wrapper over sequence of matches. 1012 | * 1013 | * Given `p1: Parser[A]` and `p2: Parser[B]`, a parser composed with 1014 | * `p1 ~ p2` will have type `Parser[~[A, B]]`. The successful result 1015 | * of the parser can be extracted from this case class. 1016 | * 1017 | * It also enables pattern matching, so something like this is possible: 1018 | * 1019 | * {{{ 1020 | * def concat(p1: Parser[String], p2: Parser[String]): Parser[String] = 1021 | * p1 ~ p2 ^^ { case a ~ b => a + b } 1022 | * }}} 1023 | */ 1024 | case class ~[+a, +b](_1: a, _2: b) { 1025 | override def toString = s"(${_1}~${_2})" 1026 | } 1027 | 1028 | /** A parser whose `~` combinator disallows back-tracking. 1029 | */ 1030 | trait OnceParser[+T] extends Parser[T] { 1031 | override def ~ [U](p: => Parser[U]): Parser[~[T, U]] 1032 | = OnceParser{ (for(a <- this; b <- commit(p)) yield new ~(a,b)).named("~") } 1033 | 1034 | override def ~> [U](p: => Parser[U]): Parser[U] 1035 | = OnceParser{ (for(_ <- this; b <- commit(p)) yield b).named("~>") } 1036 | 1037 | override def <~ [U](p: => Parser[U]): Parser[T] 1038 | = OnceParser{ (for(a <- this; _ <- commit(p)) yield a).named("<~") } 1039 | } 1040 | } 1041 | --------------------------------------------------------------------------------