├── .github └── workflows │ └── go.yml ├── .gitignore ├── .golangci.yml ├── LICENSE ├── README.md ├── TODO.md ├── branch.go ├── branch_test.go ├── bytes.go ├── bytes_test.go ├── characters.go ├── characters_test.go ├── combinators.go ├── combinators_test.go ├── containers.go ├── error.go ├── examples ├── .gitignore ├── csv │ ├── csv.go │ └── csv_test.go ├── hexcolor │ ├── hexcolor.go │ └── hexcolor_test.go ├── json │ ├── json.go │ └── test.json └── redis │ ├── redis.go │ └── redis_test.go ├── go.mod ├── go.sum ├── logo.png ├── multi.go ├── multi_test.go ├── numbers.go ├── sequence.go └── sequence_test.go /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | jobs: 10 | 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v3 15 | 16 | - name: Set up Go 17 | uses: actions/setup-go@v3 18 | with: 19 | go-version: 1.18 20 | 21 | - name: Build 22 | run: go build -v ./... 23 | 24 | - name: Test 25 | run: go test -v ./... 26 | 27 | - name: Lint 28 | uses: golangci/golangci-lint-action@v3.2.0 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, built with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | # Dependency directories (remove the comment below to include it) 15 | # vendor/ 16 | 17 | examples/redis/testdata -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | run: 2 | deadline: 5m 3 | 4 | issues: 5 | # Maximum issues count per one linter. Set to 0 to disable. Default is 50. 6 | max-issues-per-linter: 0 7 | # Maximum count of issues with the same text. Set to 0 to disable. Default is 3. 8 | max-same-issues: 0 9 | 10 | # We want to try and improve the comments in the k6 codebase, so individual 11 | # non-golint items from the default exclusion list will gradually be added 12 | # to the exclude-rules below 13 | exclude-use-default: false 14 | 15 | exclude-rules: 16 | # Exclude duplicate code and function length and complexity checking in test 17 | # files (due to common repeats and long functions in test code) 18 | - path: _(test|gen)\.go 19 | linters: 20 | - cyclop 21 | - dupl 22 | - gocognit 23 | - funlen 24 | - lll 25 | - linters: 26 | - staticcheck # Tracked in https://github.com/grafana/xk6-grpc/issues/14 27 | text: "The entire proto file grpc/reflection/v1alpha/reflection.proto is marked as deprecated." 28 | 29 | linters-settings: 30 | exhaustive: 31 | default-signifies-exhaustive: true 32 | govet: 33 | check-shadowing: true 34 | cyclop: 35 | max-complexity: 25 36 | maligned: 37 | suggest-new: true 38 | dupl: 39 | threshold: 150 40 | goconst: 41 | min-len: 10 42 | min-occurrences: 4 43 | funlen: 44 | lines: 80 45 | statements: 60 46 | forbidigo: 47 | forbid: 48 | - '^(fmt\\.Print(|f|ln)|print|println)$' 49 | # Forbid everything in syscall except the uppercase constants 50 | - '^syscall\.[^A-Z_]+$(# Using anything except constants from the syscall package is forbidden )?' 51 | - '^logrus\.Logger$' 52 | revive: 53 | rules: 54 | - name: package-comments 55 | severity: warning 56 | disabled: true 57 | stylecheck: 58 | checks: 59 | - "all" 60 | - '-ST1000' # Use of underscores in Go names 61 | 62 | linters: 63 | disable-all: true 64 | enable: 65 | - asasalint 66 | - asciicheck 67 | - bidichk 68 | - bodyclose 69 | - contextcheck 70 | - cyclop 71 | - dogsled 72 | - dupl 73 | - durationcheck 74 | - errcheck 75 | - errchkjson 76 | - errname 77 | - errorlint 78 | - exhaustive 79 | - exportloopref 80 | - forbidigo 81 | - forcetypeassert 82 | - funlen 83 | - gocheckcompilerdirectives 84 | - gochecknoglobals 85 | - gocognit 86 | - goconst 87 | - gocritic 88 | - gofmt 89 | - gofumpt 90 | - goimports 91 | - gomoddirectives 92 | - goprintffuncname 93 | - gosec 94 | - gosimple 95 | - govet 96 | - importas 97 | - ineffassign 98 | - interfacebloat 99 | - lll 100 | - makezero 101 | - misspell 102 | - nakedret 103 | - nestif 104 | - nilerr 105 | - nilnil 106 | - noctx 107 | - nolintlint 108 | - nosprintfhostport 109 | - paralleltest 110 | - prealloc 111 | - predeclared 112 | - promlinter 113 | - revive 114 | - reassign 115 | - rowserrcheck 116 | - sqlclosecheck 117 | - staticcheck 118 | - stylecheck 119 | - tenv 120 | - tparallel 121 | - typecheck 122 | - unconvert 123 | - unparam 124 | - unused 125 | - usestdlibvars 126 | - wastedassign 127 | - whitespace 128 | fast: false -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Théo Crevon 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

motus logo

2 |

A parser combinator library for Go

3 | 4 |

5 | MIT License 6 | Build Status 7 | Go Documentation 8 | Go Report Card 9 | 10 |

11 | 12 | Gomme is a library that simplifies building parsers in Go. 13 | 14 | Inspired by Rust's renowned `nom` crate, Gomme provides a developer-friendly toolkit that allows you to quickly and easily create reliable parsers for both textual and binary formats. 15 | 16 | With the power of Go's newly introduced Generics, Gomme gives you the flexibility to design your own parsers while ensuring optimal compile-time type safety. Whether you're a seasoned developer or just starting out, Gomme is designed to make the process of building parsers efficient, enjoyable, and less intimidating. 17 | 18 | ## Table of content 19 | 20 | 21 | - [Table of content](#table-of-content) 22 | - [Getting started](#getting-started) 23 | - [Why Gomme?](#why-gomme) 24 | - [Examples](#examples) 25 | - [Documentation](#documentation) 26 | - [Table of content](#table-of-content-1) 27 | - [Documentation](#documentation-1) 28 | - [Installation](#installation) 29 | - [Guide](#guide) 30 | - [List of combinators](#list-of-combinators) 31 | - [Base combinators](#base-combinators) 32 | - [Bytes combinators](#bytes-combinators) 33 | - [Character combinators](#character-combinators) 34 | - [Combinators for Sequences](#combinators-for-sequences) 35 | - [Combinators for Applying Parsers Many Times](#combinators-for-applying-parsers-many-times) 36 | - [Combinators for Choices](#combinators-for-choices) 37 | - [Installation](#installation-1) 38 | - [Frequently asked questions](#frequently-asked-questions) 39 | - [Q: What are parser combinators?](#q-what-are-parser-combinators) 40 | - [Q: Why would I use parser combinators instead of a specific parser?](#q-why-would-i-use-parser-combinators-instead-of-a-specific-parser) 41 | - [Q: Where can I learn more about parser combinators?](#q-where-can-i-learn-more-about-parser-combinators) 42 | - [Acknowledgements](#acknowledgements) 43 | - [Authors](#authors) 44 | 45 | 46 | ## Getting started 47 | 48 | Here's how to quickly parse [hexadecimal color codes](https://developer.mozilla.org/en-US/docs/Web/CSS/color) using Gomme: 49 | 50 | ```golang 51 | // RGBColor stores the three bytes describing a color in the RGB space. 52 | type RGBColor struct { 53 | red uint8 54 | green uint8 55 | blue uint8 56 | } 57 | 58 | // ParseRGBColor creates a new RGBColor from a hexadecimal color string. 59 | // The string must be a six-digit hexadecimal number, prefixed with a "#". 60 | func ParseRGBColor(input string) (RGBColor, error) { 61 | parser := gomme.Preceded( 62 | gomme.Token[string]("#"), 63 | gomme.Map( 64 | gomme.Count(HexColorComponent(), 3), 65 | func(components []uint8) (RGBColor, error) { 66 | return RGBColor{components[0], components[1], components[2]}, nil 67 | }, 68 | ), 69 | ) 70 | 71 | result := parser(input) 72 | if result.Err != nil { 73 | return RGBColor{}, result.Err 74 | } 75 | 76 | return result.Output, nil 77 | } 78 | 79 | // HexColorComponent produces a parser that parses a single hex color component, 80 | // which is a two-digit hexadecimal number. 81 | func HexColorComponent() gomme.Parser[string, uint8] { 82 | return func(input string) gomme.Result[uint8, string] { 83 | return gomme.Map( 84 | gomme.TakeWhileMN[string](2, 2, gomme.IsHexDigit), 85 | fromHex, 86 | )(input) 87 | } 88 | } 89 | 90 | // fromHex converts two digits hexadecimal numbers to their decimal value. 91 | func fromHex(input string) (uint8, error) { 92 | res, err := strconv.ParseInt(input, 16, 16) 93 | if err != nil { 94 | return 0, err 95 | } 96 | 97 | return uint8(res), nil 98 | } 99 | 100 | ``` 101 | 102 | It's as simple as that! Feel free to explore more in the [examples](examples/) directory. 103 | 104 | ## Why Gomme? 105 | 106 | While it's true that learning parser combinators might initially seem daunting, their power, flexibility, and efficiency make them an invaluable tool for parsing textual and binary formats. We've created Gomme with a focus on making this learning curve as smooth as possible, providing clear documentation and a wide array of examples. 107 | 108 | Once you get the hang of it, you'll find that Gomme's parser combinators are intuitive, adaptable, and perfect for quickly building parsers for various formats. They're easy to test and maintain, and they can help you create parsers that are as fast as their hand-written counterparts. 109 | 110 | ## Examples 111 | 112 | See Gomme in action with these handy examples: 113 | - [Parsing hexadecimal color codes](./examples/hexcolor) 114 | - [Parsing a simple CSV file](./examples/csv) 115 | - [Parsing Redis' RESP protocol](./examples/redis) 116 | - [Parsing JSON](./examples/json) 117 | 118 | ## Documentation 119 | 120 | For more detailled information, refer to the official [documentation](https://pkg.go.dev/github.com/oleiade/gomme). 121 | ## Table of content 122 | 123 | ## Documentation 124 | 125 | [Documentation](https://pkg.go.dev/github.com/oleiade/gomme) 126 | 127 | ## Installation 128 | 129 | ```bash 130 | go get github.com/oleiade/gomme 131 | ``` 132 | 133 | ## Guide 134 | 135 | In this guide, we provide a detailed overview of the various combinators available in Gomme. Combinators are fundamental building blocks in parser construction, each designed for a specific task. By combining them, you can create complex parsers suited to your specific needs. For each combinator, we've provided a brief description and a usage example. Let's explore! 136 | 137 | ### List of combinators 138 | 139 | #### Base combinators 140 | 141 | | Combinator | Description | Example | 142 | | :------------------------------------------------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :----------------------------------------------------------------- | 143 | | [`Map`](https://pkg.go.dev/github.com/oleiade/gomme#Map) | Applies a function to the result of the provided parser, allowing you to transform the parser's result. | `Map(Digit1(), func(s string)int { return 123 })` | 144 | | [`Optional`](https://pkg.go.dev/github.com/oleiade/gomme#Optional) | Makes a parser optional. If unsuccessful, the parser returns a nil `Result.Output`.Output`. | `Optional(CRLF())` | 145 | | [`Peek`](https://pkg.go.dev/github.com/oleiade/gomme#Peek) | Applies the provided parser without consuming the input. | | 146 | | [`Recognize`](https://pkg.go.dev/github.com/oleiade/gomme#Recognize) | Returns the consumed input as the produced value when the provided parser is successful. | `Recognize(SeparatedPair(Token("key"), Char(':'), Token("value"))` | 147 | | [`Assign`](https://pkg.go.dev/github.com/oleiade/gomme#Assign) | Returns the assigned value when the provided parser is successful. | `Assign(true, Token("true"))` | 148 | 149 | #### Bytes combinators 150 | 151 | | Combinator | Description | Example | 152 | | :----------------------------------------------------------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------------------------------ | 153 | | [`Take`](https://pkg.go.dev/github.com/oleiade/gomme#Take) | Parses the first N elements of the input. | `Take(5)` | 154 | | [`TakeUntil`](https://pkg.go.dev/github.com/oleiade/gomme#TakeUntil) | Parses the input until the provided parser argument succeeds. | `TakeUntil(CRLF()))` | 155 | | [`TakeWhileMN`](https://pkg.go.dev/github.com/oleiade/gomme#TakeWhileMN) | Parses the longest input slice fitting the length expectation (m <= input length <= n) and matching the predicate. The parser argument is a function taking a `rune` as input and returning a `bool`. | `TakeWhileMN(2, 6, gomme.isHexDigit)` | 156 | | [`Token`](https://pkg.go.dev/github.com/oleiade/gomme#Token) | Recognizes a specific pattern. Compares the input with the token's argument and returns the matching part. | `Token("tolkien")` | 157 | 158 | #### Character combinators 159 | 160 | | Combinator | Description | Example | 161 | | :--- | :--- | :--- | 162 | | [`Char`](https://pkg.go.dev/github.com/oleiade/gomme#Char) | Parses a single instance of a provided character. | `Char('$')` | 163 | | [`AnyChar`](https://pkg.go.dev/github.com/oleiade/gomme#AnyChar) | Parses a single instance of any character. | `AnyChar()` | 164 | | [`Alpha0`](https://pkg.go.dev/github.com/oleiade/gomme#Alpha0) | Parses zero or more alphabetical ASCII characters (case insensitive). | `Alpha0()` | 165 | | [`Alpha1`](https://pkg.go.dev/github.com/oleiade/gomme#Alpha1) | Parses one or more alphabetical ASCII characters (case insensitive). | `Alpha1()` | 166 | | [`Alphanumeric0`](https://pkg.go.dev/github.com/oleiade/gomme#Alphanumeric0) | Parses zero or more alphabetical and numerical ASCII characters (case insensitive). | `Alphanumeric0()` | 167 | | [`Alphanumeric1`](https://pkg.go.dev/github.com/oleiade/gomme#Alphanumeric1) | Parses one or more alphabetical and numerical ASCII characters (case insensitive). | `Alphanumeric1()` | 168 | | [`Digit0`](https://pkg.go.dev/github.com/oleiade/gomme#Digit0) | Parses zero or more numerical ASCII characters: 0-9. | `Digit0()` | 169 | | [`Digit1`](https://pkg.go.dev/github.com/oleiade/gomme#Digit1) | Parses one or more numerical ASCII characters: 0-9. | `Digit1()` | 170 | | [`HexDigit0`](https://pkg.go.dev/github.com/oleiade/gomme#HexDigit0) | Parses zero or more hexadecimal ASCII characters (case insensitive). | `HexDigit0()` | 171 | | [`HexDigit1`](https://pkg.go.dev/github.com/oleiade/gomme#HexDigit1) | Parses one or more hexadecimal ASCII characters (case insensitive). | `HexDigit1()` | 172 | | [`Whitespace0`](https://pkg.go.dev/github.com/oleiade/gomme#Whitespace0) | Parses zero or more whitespace ASCII characters: space, tab, carriage return, line feed. | `Whitespace0()` | 173 | | [`Whitespace1`](https://pkg.go.dev/github.com/oleiade/gomme#Whitespace1) | Parses one or more whitespace ASCII characters: space, tab, carriage return, line feed. | `Whitespace1()` | 174 | | [`LF`](https://pkg.go.dev/github.com/oleiade/gomme#LF) | Parses a single new line character '\n'. | `LF()` | 175 | | [`CRLF`](https://pkg.go.dev/github.com/oleiade/gomme#CRLF) | Parses a '\r\n' string. | `CRLF()` | 176 | | [`OneOf`](https://pkg.go.dev/github.com/oleiade/gomme#OneOf) | Parses one of the provided characters. Equivalent to using `Alternative` over a series of `Char` parsers. | `OneOf('a', 'b' , 'c')` | 177 | | [`Satisfy`](https://pkg.go.dev/github.com/oleiade/gomme#Satisfy) | Parses a single character, asserting that it matches the provided predicate. The predicate function takes a `rune` as input and returns a `bool`. `Satisfy` is useful for building custom character matchers. | `Satisfy(func(c rune)bool { return c == '{' || c == '[' })` | 178 | | [`Space`](https://pkg.go.dev/github.com/oleiade/gomme#Space) | Parses a single space character ' '. | `Space()` | 179 | | [`Tab`](https://pkg.go.dev/github.com/oleiade/gomme#Tab) | Parses a single tab character '\t'. | `Tab()` | 180 | | [`Int64`](https://pkg.go.dev/github.com/oleiade/gomme#Int64) | Parses an `int64` from its textual representation. | `Int64()` | 181 | | [`Int8`](https://pkg.go.dev/github.com/oleiade/gomme#Int8) | Parses an `int8` from its textual representation. | `Int8()` | 182 | | [`UInt8`](https://pkg.go.dev/github.com/oleiade/gomme#UInt8) | Parses a `uint8` from its textual representation. | `UInt8()` | 183 | 184 | #### Combinators for Sequences 185 | 186 | | Combinator | Description | Example | 187 | | :--- | :--- | :--- | 188 | | [`Preceded`](https://pkg.go.dev/github.com/oleiade/gomme#Preceded) | Applies the prefix parser and discards its result. It then applies the main parser and returns its result. It discards the prefix value. It proves useful when looking for data prefixed with a pattern. For instance, when parsing a value, prefixed with its name. | `Preceded(Token("name:"), Alpha1())` | 189 | | [`Terminated`](https://pkg.go.dev/github.com/oleiade/gomme#Terminated) | Applies the main parser, followed by the suffix parser whom it discards the result of, and returns the result of the main parser. Note that if the suffix parser fails, the whole operation fails, regardless of the result of the main parser. It proves useful when looking for suffixed data while not interested in retaining the suffix value itself. For instance, when parsing a value followed by a control character. | `Terminated(Digit1(), LF())` | 190 | | [`Delimited`](https://pkg.go.dev/github.com/oleiade/gomme#Delimited) | Applies the prefix parser, the main parser, followed by the suffix parser, discards the result of both the prefix and suffix parsers, and returns the result of the main parser. Note that if any of the prefix or suffix parsers fail, the whole operation fails, regardless of the result of the main parser. It proves useful when looking for data surrounded by patterns helping them identify it without retaining its value. For instance, when parsing a value, prefixed by its name and followed by a control character. | `Delimited(Tag("name:"), Digit1(), LF())` | 191 | | [`Pair`](https://pkg.go.dev/github.com/oleiade/gomme#Pair) | Applies two parsers in a row and returns a pair container holding both their result values. | `Pair(Alpha1(), Tag("cm"))` | 192 | | [`SeparatedPair`](https://pkg.go.dev/github.com/oleiade/gomme#SeparatedPair) | Applies a left parser, a separator parser, and a right parser discards the result of the separator parser, and returns the result of the left and right parsers as a pair container holding the result values. | `SeparatedPair(Alpha1(), Tag(":"), Alpha1())` | 193 | | [`Sequence`](https://pkg.go.dev/github.com/oleiade/gomme#Sequence) | Applies a sequence of parsers sharing the same signature. If any of the provided parsers fail, the whole operation fails. | `Sequence(SeparatedPair(Tag("name"), Char(':'), Alpha1()), SeparatedPair(Tag("height"), Char(':'), Digit1()))` | 194 | 195 | #### Combinators for Applying Parsers Many Times 196 | 197 | | Combinator | Description | Example | 198 | | :--- | :--- | :--- | 199 | | [`Count`](https://pkg.go.dev/github.com/oleiade/gomme#Count) | Applies the provided parser `count` times. If the parser fails before it can be applied `count` times, the operation fails. It proves useful whenever one needs to parse the same pattern many times in a row. | `Count(3, OneOf('a', 'b', 'c'))` | 200 | | [`Many0`](https://pkg.go.dev/github.com/oleiade/gomme#Many0) | Keeps applying the provided parser until it fails and returns a slice of all the results. Specifically, if the parser fails to match, `Many0` still succeeds, returning an empty slice of results. It proves useful when trying to consume a repeated pattern, regardless of whether there's any match, like when trying to parse any number of whitespaces in a row. | `Many0(Char(' '))` | 201 | | [`Many1`](https://pkg.go.dev/github.com/oleiade/gomme#Many1) | Keeps applying the provided parser until it fails and returns a slice of all the results. If the parser fails to match at least once, `Many1` fails. It proves useful when trying to consume a repeated pattern, like any number of whitespaces in a row, ensuring that it appears at least once. | `Many1(LF())` | 202 | | [`SeparatedList0`](https://pkg.go.dev/github.com/oleiade/gomme#SeparatedList0) | | | 203 | | [`SeparatedList1`](https://pkg.go.dev/github.com/oleiade/gomme#SeparatedList1) | | | 204 | 205 | #### Combinators for Choices 206 | 207 | | Combinator | Description | Example | 208 | | :--- | :--- | :--- | 209 | | [`Alternative`](https://pkg.go.dev/github.com/oleiade/gomme#Alternative) | Tests a list of parsers, one by one, until one succeeds. Note that all parsers must share the same signature (`Parser[I, O]`). | `Alternative(Token("abc"), Token("123"))` | 210 | 211 | 212 | ## Installation 213 | 214 | Add the library to your Go project with the following command: 215 | 216 | ```bash 217 | go get github.com/oleiade/gomme@latest 218 | ``` 219 | 220 | ## Frequently asked questions 221 | 222 | ### Q: What are parser combinators? 223 | 224 | **A**: Parser combinators offer a new way of building parsers. Instead of writing a complex parser that analyzes an entire format, you create small, simple parsers that handle the smallest units of the format. These small parsers can then be combined to build more complex parsers. It's a bit like using building blocks to construct whatever structure you want. 225 | 226 | ### Q: Why would I use parser combinators instead of a specific parser? 227 | 228 | **A**: Parser combinators are incredibly flexible and intuitive. Once you're familiar with them, they enable you to quickly create, maintain, and modify parsers. They offer you a high degree of freedom in designing your parser and how it's used. 229 | 230 | ### Q: Where can I learn more about parser combinators? 231 | 232 | A: Here are some resources we recommend: 233 | - [You could have invented parser combinators](https://theorangeduck.com/page/you-could-have-invented-parser-combinators) 234 | - [Functional Parsing](https://www.youtube.com/watch?v=dDtZLm7HIJs) 235 | - [Building a Mapping Language in Go with Parser Combinators](https://www.youtube.com/watch?v=JiViND-bpmw) 236 | 237 | ## Acknowledgements 238 | 239 | We can frankly take close to zero credit for this library, apart from work put into assembling the already existing elements of theory and implementation into a single autonomous project. 240 | 241 | We've stood on the shoulders of giants to create Gomme. The library draws heavily on the extensive theoretical work done in the parser combinators space, and we owe a huge debt to Rust's [nom](https://github.com/Geal/nom) and [benthos'](https://github.com/benthosdev/benthos) blob lang implementation. Our goal was to consolidate these diverse elements into a single, easy-to-use Go library. 242 | ## Authors 243 | 244 | - [@oleiade](https://github.com/oleiade) 245 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | # TODOS 2 | 3 | ## Dos 4 | 5 | - [ ] Create `Bytes` interface type for bytes file's content 6 | - [ ] Create `String` or `Characters` interface type for characters file's content 7 | - [ ] Sort Out Fatal/Non-Fatal errors (distinguish whether a parser failed in an expected manner, or if the whole parsing should be interrupted) 8 | - [ ] Reduce Int8/Int64 allocations (their parsers could be somewhat simplified?) 9 | - [ ] Add combinator to parse whitespace (+ helper for multispace0/1?) 10 | - [ ] Refactor TakeWhileOneOf to be "just" TakeWhile 11 | - [ ] Refactor space to be of the form space0 and space1 12 | - [ ] Rename `LF` to `Newline` 13 | - [X] Document Recognize as explicitly as possible 14 | - [X] Add Examples 15 | - [x] Add Benchmarks 16 | - [x] Make sure the Failure messages are properly cased 17 | - [x] Rename `p` parser arguments to `parse` for clearer code 18 | - [x] Add `Many0` and `Many1` parsers 19 | 20 | ## Maybes 21 | 22 | - [ ] Rename project to `crayon`? 23 | - [ ] Rename `Preceded` to `Prefixed` 24 | - [ ] Rename `Terminated` to `Suffixed` 25 | - [ ] Rename `Sequence` to `List`? 26 | - [ ] Rename `Satisfy` to `Satisfies`? 27 | - [X] Introduce `SeparatedList` as a result of previous? 28 | - [X] Create `bytes.go` file to distinguish from characters 29 | 30 | ## Track 31 | 32 | - [ ] Chase allocations, document them, and reduce their amount as much as possible 33 | 34 | ## NoNos 35 | - [X] Add an `ErrInfiniteLoop` (`Many0`) -------------------------------------------------------------------------------- /branch.go: -------------------------------------------------------------------------------- 1 | package gomme 2 | 3 | // Alternative tests a list of parsers in order, one by one, until one 4 | // succeeds. 5 | // 6 | // If none of the parsers succeed, this combinator produces an error Result. 7 | func Alternative[Input Bytes, Output any](parsers ...Parser[Input, Output]) Parser[Input, Output] { 8 | return func(input Input) Result[Output, Input] { 9 | for _, parse := range parsers { 10 | result := parse(input) 11 | if result.Err == nil { 12 | return result 13 | } 14 | } 15 | 16 | return Failure[Input, Output](NewError(input, "Alternative"), input) 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /branch_test.go: -------------------------------------------------------------------------------- 1 | package gomme 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestAlternative(t *testing.T) { 10 | t.Parallel() 11 | 12 | type args struct { 13 | p Parser[string, string] 14 | } 15 | testCases := []struct { 16 | name string 17 | args args 18 | input string 19 | wantErr bool 20 | wantOutput string 21 | wantRemaining string 22 | }{ 23 | { 24 | name: "head matching parser should succeed", 25 | input: "123", 26 | args: args{ 27 | p: Alternative(Digit1[string](), Alpha0[string]()), 28 | }, 29 | wantErr: false, 30 | wantOutput: "123", 31 | wantRemaining: "", 32 | }, 33 | { 34 | name: "matching parser should succeed", 35 | input: "1", 36 | args: args{ 37 | p: Alternative(Digit1[string](), Alpha0[string]()), 38 | }, 39 | wantErr: false, 40 | wantOutput: "1", 41 | wantRemaining: "", 42 | }, 43 | { 44 | name: "no matching parser should fail", 45 | input: "$%^*", 46 | args: args{ 47 | p: Alternative(Digit1[string](), Alpha1[string]()), 48 | }, 49 | wantErr: true, 50 | wantOutput: "", 51 | wantRemaining: "$%^*", 52 | }, 53 | { 54 | name: "empty input should fail", 55 | input: "", 56 | args: args{ 57 | p: Alternative(Digit1[string](), Alpha1[string]()), 58 | }, 59 | wantErr: true, 60 | wantOutput: "", 61 | wantRemaining: "", 62 | }, 63 | } 64 | for _, tc := range testCases { 65 | tc := tc 66 | 67 | t.Run(tc.name, func(t *testing.T) { 68 | t.Parallel() 69 | 70 | gotResult := tc.args.p(tc.input) 71 | if (gotResult.Err != nil) != tc.wantErr { 72 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) 73 | } 74 | 75 | // testify makes it easier comparing slices 76 | assert.Equal(t, 77 | tc.wantOutput, gotResult.Output, 78 | "got output %v, want output %v", gotResult.Output, tc.wantOutput, 79 | ) 80 | 81 | if gotResult.Remaining != tc.wantRemaining { 82 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) 83 | } 84 | }) 85 | } 86 | } 87 | 88 | func BenchmarkAlternative(b *testing.B) { 89 | p := Alternative(Digit1[string](), Alpha1[string]()) 90 | 91 | for i := 0; i < b.N; i++ { 92 | p("123") 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /bytes.go: -------------------------------------------------------------------------------- 1 | package gomme 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | ) 7 | 8 | // Take returns a subset of the input of size `count`. 9 | func Take[Input Bytes](count uint) Parser[Input, Input] { 10 | return func(input Input) Result[Input, Input] { 11 | if len(input) == 0 && count > 0 { 12 | return Failure[Input, Input](NewError(input, "TakeUntil"), input) 13 | } 14 | 15 | if uint(len(input)) < count { 16 | return Failure[Input, Input](NewError(input, "Take"), input) 17 | } 18 | 19 | return Success(input[:count], input[count:]) 20 | } 21 | } 22 | 23 | // TakeUntil parses any number of characters until the provided parser is successful. 24 | // If the provided parser is not successful, the parser fails, and the entire input is 25 | // returned as the Result's Remaining. 26 | func TakeUntil[Input Bytes, Output any](parse Parser[Input, Output]) Parser[Input, Input] { 27 | return func(input Input) Result[Input, Input] { 28 | if len(input) == 0 { 29 | return Failure[Input, Input](NewError(input, "TakeUntil"), input) 30 | } 31 | 32 | pos := 0 33 | for ; pos < len(input); pos++ { 34 | current := input[pos:] 35 | res := parse(current) 36 | if res.Err == nil { 37 | return Success(input[:pos], input[pos:]) 38 | } 39 | 40 | continue 41 | } 42 | 43 | return Failure[Input, Input](NewError(input, "TakeUntil"), input) 44 | } 45 | } 46 | 47 | // TakeWhileMN returns the longest input subset that matches the predicates, within 48 | // the boundaries of `atLeast` <= len(input) <= `atMost`. 49 | // 50 | // If the provided parser is not successful or the pattern is out of the 51 | // `atLeast` <= len(input) <= `atMost` range, the parser fails, and the entire 52 | // input is returned as the Result's Remaining. 53 | func TakeWhileMN[Input Bytes](atLeast, atMost uint, predicate func(rune) bool) Parser[Input, Input] { 54 | return func(input Input) Result[Input, Input] { 55 | if len(input) == 0 { 56 | return Failure[Input, Input](NewError(input, "TakeWhileMN"), input) 57 | } 58 | 59 | // Input is shorter than the minimum expected matching length, 60 | // it is thus not possible to match it within the established 61 | // constraints. 62 | if uint(len(input)) < atLeast { 63 | return Failure[Input, Input](NewError(input, "TakeWhileMN"), input) 64 | } 65 | 66 | lastValidPos := 0 67 | for idx := 0; idx < len(input); idx++ { 68 | if uint(idx) == atMost { 69 | break 70 | } 71 | 72 | matched := predicate(rune(input[idx])) 73 | if !matched { 74 | if uint(idx) < atLeast { 75 | return Failure[Input, Input](NewError(input, "TakeWhileMN"), input) 76 | } 77 | 78 | return Success(input[:idx], input[idx:]) 79 | } 80 | 81 | lastValidPos++ 82 | } 83 | 84 | return Success(input[:lastValidPos], input[lastValidPos:]) 85 | } 86 | } 87 | 88 | // Token parses a token from the input, and returns the part of the input that 89 | // matched the token. 90 | // If the token could not be found, the parser returns an error result. 91 | func Token[Input Bytes](token string) Parser[Input, Input] { 92 | return func(input Input) Result[Input, Input] { 93 | if !strings.HasPrefix(string(input), token) { 94 | return Failure[Input, Input](NewError(input, fmt.Sprintf("Token(%s)", token)), input) 95 | } 96 | 97 | return Success(input[:len(token)], input[len(token):]) 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /bytes_test.go: -------------------------------------------------------------------------------- 1 | package gomme 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func TestTake(t *testing.T) { 9 | t.Parallel() 10 | 11 | type args struct { 12 | p Parser[string, string] 13 | } 14 | testCases := []struct { 15 | name string 16 | args args 17 | input string 18 | wantErr bool 19 | wantOutput string 20 | wantRemaining string 21 | }{ 22 | { 23 | name: "taking less than input size should succeed", 24 | input: "1234567", 25 | args: args{ 26 | p: Take[string](6), 27 | }, 28 | wantErr: false, 29 | wantOutput: "123456", 30 | wantRemaining: "7", 31 | }, 32 | { 33 | name: "taking exact input size should succeed", 34 | input: "123456", 35 | args: args{ 36 | p: Take[string](6), 37 | }, 38 | wantErr: false, 39 | wantOutput: "123456", 40 | wantRemaining: "", 41 | }, 42 | { 43 | name: "taking more than input size should fail", 44 | input: "123", 45 | args: args{ 46 | p: Take[string](6), 47 | }, 48 | wantErr: true, 49 | wantOutput: "", 50 | wantRemaining: "123", 51 | }, 52 | { 53 | name: "taking from empty input should fail", 54 | input: "", 55 | args: args{ 56 | p: Take[string](6), 57 | }, 58 | wantErr: true, 59 | wantOutput: "", 60 | wantRemaining: "", 61 | }, 62 | } 63 | for _, tc := range testCases { 64 | tc := tc 65 | 66 | t.Run(tc.name, func(t *testing.T) { 67 | t.Parallel() 68 | 69 | gotResult := tc.args.p(tc.input) 70 | if (gotResult.Err != nil) != tc.wantErr { 71 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) 72 | } 73 | 74 | if gotResult.Output != tc.wantOutput { 75 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) 76 | } 77 | 78 | if gotResult.Remaining != tc.wantRemaining { 79 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) 80 | } 81 | }) 82 | } 83 | } 84 | 85 | func BenchmarkTake(b *testing.B) { 86 | p := Take[string](6) 87 | 88 | b.ResetTimer() 89 | for i := 0; i < b.N; i++ { 90 | p("123456") 91 | } 92 | } 93 | 94 | func TestTakeUntil(t *testing.T) { 95 | t.Parallel() 96 | 97 | type args struct { 98 | p Parser[string, string] 99 | } 100 | testCases := []struct { 101 | name string 102 | args args 103 | input string 104 | wantErr bool 105 | wantOutput string 106 | wantRemaining string 107 | }{ 108 | { 109 | name: "matching parser should succeed", 110 | input: "abc123", 111 | args: args{ 112 | p: TakeUntil(Digit1[string]()), 113 | }, 114 | wantErr: false, 115 | wantOutput: "abc", 116 | wantRemaining: "123", 117 | }, 118 | { 119 | name: "immediately matching parser should succeed", 120 | input: "123", 121 | args: args{ 122 | p: TakeUntil(Digit1[string]()), 123 | }, 124 | wantErr: false, 125 | wantOutput: "", 126 | wantRemaining: "123", 127 | }, 128 | { 129 | name: "no match should fail", 130 | input: "abcdef", 131 | args: args{ 132 | p: TakeUntil(Digit1[string]()), 133 | }, 134 | wantErr: true, 135 | wantOutput: "", 136 | wantRemaining: "abcdef", 137 | }, 138 | { 139 | name: "empty input should fail", 140 | input: "", 141 | args: args{ 142 | p: TakeUntil(Digit1[string]()), 143 | }, 144 | wantErr: true, 145 | wantOutput: "", 146 | wantRemaining: "", 147 | }, 148 | } 149 | for _, tc := range testCases { 150 | tc := tc 151 | 152 | t.Run(tc.name, func(t *testing.T) { 153 | t.Parallel() 154 | 155 | gotResult := tc.args.p(tc.input) 156 | if (gotResult.Err != nil) != tc.wantErr { 157 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) 158 | } 159 | 160 | if gotResult.Output != tc.wantOutput { 161 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) 162 | } 163 | 164 | if gotResult.Remaining != tc.wantRemaining { 165 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) 166 | } 167 | }) 168 | } 169 | } 170 | 171 | func BenchmarkTakeUntil(b *testing.B) { 172 | p := TakeUntil(Digit1[string]()) 173 | 174 | b.ResetTimer() 175 | for i := 0; i < b.N; i++ { 176 | p("abc123") 177 | } 178 | } 179 | 180 | func TestTakeWhileMN(t *testing.T) { 181 | t.Parallel() 182 | 183 | type args struct { 184 | p Parser[string, string] 185 | } 186 | testCases := []struct { 187 | name string 188 | args args 189 | input string 190 | wantErr bool 191 | wantOutput string 192 | wantRemaining string 193 | }{ 194 | { 195 | name: "parsing input with enough characters and partially matching predicated should succeed", 196 | input: "latin123", 197 | args: args{ 198 | p: TakeWhileMN[string](3, 6, IsAlpha), 199 | }, 200 | wantErr: false, 201 | wantOutput: "latin", 202 | wantRemaining: "123", 203 | }, 204 | { 205 | name: "parsing input longer than atLeast and atMost should succeed", 206 | input: "lengthy", 207 | args: args{ 208 | p: TakeWhileMN[string](3, 6, IsAlpha), 209 | }, 210 | wantErr: false, 211 | wantOutput: "length", 212 | wantRemaining: "y", 213 | }, 214 | { 215 | name: "parsing input longer than atLeast and shorter than atMost should succeed", 216 | input: "latin", 217 | args: args{ 218 | p: TakeWhileMN[string](3, 6, IsAlpha), 219 | }, 220 | wantErr: false, 221 | wantOutput: "latin", 222 | wantRemaining: "", 223 | }, 224 | { 225 | name: "parsing empty input should fail", 226 | input: "", 227 | args: args{ 228 | p: TakeWhileMN[string](3, 6, IsAlpha), 229 | }, 230 | wantErr: true, 231 | wantOutput: "", 232 | wantRemaining: "", 233 | }, 234 | { 235 | name: "parsing too short input should fail", 236 | input: "ed", 237 | args: args{ 238 | p: TakeWhileMN[string](3, 6, IsAlpha), 239 | }, 240 | wantErr: true, 241 | wantOutput: "", 242 | wantRemaining: "ed", 243 | }, 244 | { 245 | name: "parsing with non-matching predicate should fail", 246 | input: "12345", 247 | args: args{ 248 | p: TakeWhileMN[string](3, 6, IsAlpha), 249 | }, 250 | wantErr: true, 251 | wantOutput: "", 252 | wantRemaining: "12345", 253 | }, 254 | } 255 | for _, tc := range testCases { 256 | tc := tc 257 | 258 | t.Run(tc.name, func(t *testing.T) { 259 | t.Parallel() 260 | 261 | gotResult := tc.args.p(tc.input) 262 | if (gotResult.Err != nil) != tc.wantErr { 263 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) 264 | } 265 | 266 | if gotResult.Output != tc.wantOutput { 267 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) 268 | } 269 | 270 | if gotResult.Remaining != tc.wantRemaining { 271 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) 272 | } 273 | }) 274 | } 275 | } 276 | 277 | func BenchmarkTakeWhileMN(b *testing.B) { 278 | p := TakeWhileMN[string](3, 6, IsAlpha) 279 | 280 | b.ResetTimer() 281 | for i := 0; i < b.N; i++ { 282 | p("latin") 283 | } 284 | } 285 | 286 | // TakeWhileOneOf parses any number of characters present in the 287 | // provided collection of runes. 288 | func TakeWhileOneOf[I Bytes](collection ...rune) Parser[I, I] { 289 | index := make(map[rune]struct{}, len(collection)) 290 | 291 | for _, r := range collection { 292 | index[r] = struct{}{} 293 | } 294 | 295 | expected := fmt.Sprintf("chars(%v)", string(collection)) 296 | 297 | return func(input I) Result[I, I] { 298 | if len(input) == 0 { 299 | return Failure[I, I](NewError(input, expected), input) 300 | } 301 | 302 | pos := 0 303 | for ; pos < len(input); pos++ { 304 | _, exists := index[rune(input[pos])] 305 | if !exists { 306 | if pos == 0 { 307 | return Failure[I, I](NewError(input, expected), input) 308 | } 309 | 310 | break 311 | } 312 | } 313 | 314 | return Success(input[:pos], input[pos:]) 315 | } 316 | } 317 | 318 | func TestTakeWhileOneOf(t *testing.T) { 319 | t.Parallel() 320 | 321 | type args struct { 322 | p Parser[string, string] 323 | } 324 | testCases := []struct { 325 | name string 326 | args args 327 | input string 328 | wantErr bool 329 | wantOutput string 330 | wantRemaining string 331 | }{ 332 | { 333 | name: "matching parser should succeed", 334 | input: "abc123", 335 | args: args{ 336 | p: TakeWhileOneOf[string]('a', 'b', 'c'), 337 | }, 338 | wantErr: false, 339 | wantOutput: "abc", 340 | wantRemaining: "123", 341 | }, 342 | { 343 | name: "no match should fail", 344 | input: "123", 345 | args: args{ 346 | p: TakeWhileOneOf[string]('a', 'b', 'c'), 347 | }, 348 | wantErr: true, 349 | wantOutput: "", 350 | wantRemaining: "123", 351 | }, 352 | { 353 | name: "empty input should fail", 354 | input: "", 355 | args: args{ 356 | p: TakeWhileOneOf[string]('a', 'b', 'c'), 357 | }, 358 | wantErr: true, 359 | wantOutput: "", 360 | wantRemaining: "", 361 | }, 362 | } 363 | for _, tc := range testCases { 364 | tc := tc 365 | 366 | t.Run(tc.name, func(t *testing.T) { 367 | t.Parallel() 368 | 369 | gotResult := tc.args.p(tc.input) 370 | if (gotResult.Err != nil) != tc.wantErr { 371 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) 372 | } 373 | 374 | if gotResult.Output != tc.wantOutput { 375 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) 376 | } 377 | 378 | if gotResult.Remaining != tc.wantRemaining { 379 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) 380 | } 381 | }) 382 | } 383 | } 384 | 385 | func BenchmarkTakeWhileOneOf(b *testing.B) { 386 | p := TakeWhileOneOf[string]('a', 'b', 'c') 387 | 388 | b.ResetTimer() 389 | for i := 0; i < b.N; i++ { 390 | p("abc123") 391 | } 392 | } 393 | 394 | func TestToken(t *testing.T) { 395 | t.Parallel() 396 | 397 | testCases := []struct { 398 | name string 399 | parser Parser[string, string] 400 | input string 401 | wantErr bool 402 | wantOutput string 403 | wantRemaining string 404 | }{ 405 | { 406 | name: "parsing a token from an input starting with it should succeed", 407 | parser: Token[string]("Bonjour"), 408 | input: "Bonjour tout le monde", 409 | wantErr: false, 410 | wantOutput: "Bonjour", 411 | wantRemaining: " tout le monde", 412 | }, 413 | { 414 | name: "parsing a token from an non-matching input should fail", 415 | parser: Token[string]("Bonjour"), 416 | input: "Hello tout le monde", 417 | wantErr: true, 418 | wantOutput: "", 419 | wantRemaining: "Hello tout le monde", 420 | }, 421 | { 422 | name: "parsing a token from an empty input should fail", 423 | parser: Token[string]("Bonjour"), 424 | input: "", 425 | wantErr: true, 426 | wantOutput: "", 427 | wantRemaining: "", 428 | }, 429 | } 430 | 431 | for _, tc := range testCases { 432 | tc := tc 433 | 434 | t.Run(tc.name, func(t *testing.T) { 435 | t.Parallel() 436 | 437 | gotResult := tc.parser(tc.input) 438 | if (gotResult.Err != nil) != tc.wantErr { 439 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) 440 | } 441 | 442 | if gotResult.Output != tc.wantOutput { 443 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) 444 | } 445 | 446 | if gotResult.Remaining != tc.wantRemaining { 447 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) 448 | } 449 | }) 450 | } 451 | } 452 | 453 | func BenchmarkToken(b *testing.B) { 454 | parser := Token[string]("Bonjour") 455 | 456 | for i := 0; i < b.N; i++ { 457 | parser("Bonjour tout le monde") 458 | } 459 | } 460 | -------------------------------------------------------------------------------- /characters.go: -------------------------------------------------------------------------------- 1 | package gomme 2 | 3 | import ( 4 | "strconv" 5 | ) 6 | 7 | // Char parses a single character and matches it with 8 | // a provided candidate. 9 | func Char[Input Bytes](character rune) Parser[Input, rune] { 10 | return func(input Input) Result[rune, Input] { 11 | if len(input) == 0 || rune(input[0]) != character { 12 | return Failure[Input, rune](NewError(input, string(character)), input) 13 | } 14 | 15 | return Success(rune(input[0]), input[1:]) 16 | } 17 | } 18 | 19 | // AnyChar parses any single character. 20 | func AnyChar[Input Bytes]() Parser[Input, rune] { 21 | return func(input Input) Result[rune, Input] { 22 | if len(input) == 0 { 23 | return Failure[Input, rune](NewError(input, "AnyChar"), input) 24 | } 25 | 26 | return Success(rune(input[0]), input[1:]) 27 | } 28 | } 29 | 30 | // Alpha0 parses a zero or more lowercase or uppercase alphabetic characters: a-z, A-Z. 31 | // In the cases where the input is empty, or no terminating character is found, the parser 32 | // returns the input as is. 33 | func Alpha0[Input Bytes]() Parser[Input, Input] { 34 | return func(input Input) Result[Input, Input] { 35 | if len(input) == 0 { 36 | return Success(input, input) 37 | } 38 | 39 | lastAlphaPos := 0 40 | for idx := 0; idx < len(input); idx++ { 41 | if !IsAlpha(rune(input[idx])) { 42 | return Success(input[:idx], input[idx:]) 43 | } 44 | 45 | lastAlphaPos++ 46 | } 47 | 48 | return Success(input[:lastAlphaPos], input[lastAlphaPos:]) 49 | } 50 | } 51 | 52 | // Alpha1 parses one or more lowercase or uppercase alphabetic characters: a-z, A-Z. 53 | // In the cases where the input doesn't hold enough data, or a terminating character 54 | // is found before any matching ones were, the parser returns an error result. 55 | func Alpha1[Input Bytes]() Parser[Input, Input] { 56 | return func(input Input) Result[Input, Input] { 57 | if len(input) == 0 { 58 | return Failure[Input, Input](NewError(input, "Alpha1"), input) 59 | } 60 | 61 | if !IsAlpha(rune(input[0])) { 62 | return Failure[Input, Input](NewError(input, "Alpha1"), input) 63 | } 64 | 65 | lastAlphaPos := 1 66 | for idx := 1; idx < len(input); idx++ { 67 | if !IsAlpha(rune(input[idx])) { 68 | return Success(input[:idx], input[idx:]) 69 | } 70 | 71 | lastAlphaPos++ 72 | } 73 | 74 | return Success(input[:lastAlphaPos], input[lastAlphaPos:]) 75 | } 76 | } 77 | 78 | // Alphanumeric0 parses zero or more ASCII alphabetical or numerical characters: a-z, A-Z, 0-9. 79 | // In the cases where the input is empty, or no terminating character is found, the parser 80 | // returns the input as is. 81 | func Alphanumeric0[Input Bytes]() Parser[Input, Input] { 82 | return func(input Input) Result[Input, Input] { 83 | if len(input) == 0 { 84 | return Success(input, input) 85 | } 86 | 87 | lastDigitPos := 0 88 | for idx := 0; idx < len(input); idx++ { 89 | if !IsAlphanumeric(rune(input[idx])) { 90 | return Success(input[:idx], input[idx:]) 91 | } 92 | 93 | lastDigitPos++ 94 | } 95 | 96 | return Success(input[:lastDigitPos], input[lastDigitPos:]) 97 | } 98 | } 99 | 100 | // Alphanumeric1 parses one or more alphabetical or numerical characters: a-z, A-Z, 0-9. 101 | // In the cases where the input doesn't hold enough data, or a terminating character 102 | // is found before any matching ones were, the parser returns an error result. 103 | func Alphanumeric1[Input Bytes]() Parser[Input, Input] { 104 | return func(input Input) Result[Input, Input] { 105 | if len(input) == 0 { 106 | return Failure[Input, Input](NewError(input, "Digit1"), input) 107 | } 108 | 109 | if !IsAlphanumeric(rune(input[0])) { 110 | return Failure[Input, Input](NewError(input, "Digit1"), input) 111 | } 112 | 113 | lastDigitPos := 1 114 | for idx := 1; idx < len(input); idx++ { 115 | if !IsAlphanumeric(rune(input[idx])) { 116 | return Success(input[:idx], input[idx:]) 117 | } 118 | 119 | lastDigitPos++ 120 | } 121 | 122 | return Success(input[:lastDigitPos], input[lastDigitPos:]) 123 | } 124 | } 125 | 126 | // Digit0 parses zero or more ASCII numerical characters: 0-9. 127 | // In the cases where the input is empty, or no terminating character is found, the parser 128 | // returns the input as is. 129 | func Digit0[Input Bytes]() Parser[Input, Input] { 130 | return func(input Input) Result[Input, Input] { 131 | if len(input) == 0 { 132 | return Success(input, input) 133 | } 134 | 135 | lastDigitPos := 0 136 | for idx := 0; idx < len(input); idx++ { 137 | if !IsDigit(rune(input[idx])) { 138 | return Success(input[:idx], input[idx:]) 139 | } 140 | 141 | lastDigitPos++ 142 | } 143 | 144 | return Success(input[:lastDigitPos], input[lastDigitPos:]) 145 | } 146 | } 147 | 148 | // Digit1 parses one or more numerical characters: 0-9. 149 | // In the cases where the input doesn't hold enough data, or a terminating character 150 | // is found before any matching ones were, the parser returns an error result. 151 | func Digit1[Input Bytes]() Parser[Input, Input] { 152 | return func(input Input) Result[Input, Input] { 153 | if len(input) == 0 { 154 | return Failure[Input, Input](NewError(input, "Digit1"), input) 155 | } 156 | 157 | if !IsDigit(rune(input[0])) { 158 | return Failure[Input, Input](NewError(input, "Digit1"), input) 159 | } 160 | 161 | lastDigitPos := 1 162 | for idx := 1; idx < len(input); idx++ { 163 | if !IsDigit(rune(input[idx])) { 164 | return Success(input[:idx], input[idx:]) 165 | } 166 | 167 | lastDigitPos++ 168 | } 169 | 170 | return Success(input[:lastDigitPos], input[lastDigitPos:]) 171 | } 172 | } 173 | 174 | // HexDigit0 parses zero or more ASCII hexadecimal characters: a-f, A-F, 0-9. 175 | // In the cases where the input is empty, or no terminating character is found, the parser 176 | // returns the input as is. 177 | func HexDigit0[Input Bytes]() Parser[Input, Input] { 178 | return func(input Input) Result[Input, Input] { 179 | if len(input) == 0 { 180 | return Success(input, input) 181 | } 182 | 183 | lastDigitPos := 0 184 | for idx := 0; idx < len(input); idx++ { 185 | if !IsHexDigit(rune(input[idx])) { 186 | return Success(input[:idx], input[idx:]) 187 | } 188 | 189 | lastDigitPos++ 190 | } 191 | 192 | return Success(input[:lastDigitPos], input[lastDigitPos:]) 193 | } 194 | } 195 | 196 | // HexDigit1 parses one or more ASCII hexadecimal characters: a-f, A-F, 0-9. 197 | // In the cases where the input doesn't hold enough data, or a terminating character 198 | // is found before any matching ones were, the parser returns an error result. 199 | func HexDigit1[Input Bytes]() Parser[Input, Input] { 200 | return func(input Input) Result[Input, Input] { 201 | if len(input) == 0 { 202 | return Failure[Input, Input](NewError(input, "HexDigit1"), input) 203 | } 204 | 205 | if !IsHexDigit(rune(input[0])) { 206 | return Failure[Input, Input](NewError(input, "HexDigit1"), input) 207 | } 208 | 209 | lastDigitPos := 1 210 | for idx := 1; idx < len(input); idx++ { 211 | if !IsHexDigit(rune(input[idx])) { 212 | return Success(input[:idx], input[idx:]) 213 | } 214 | 215 | lastDigitPos++ 216 | } 217 | 218 | return Success(input[:lastDigitPos], input[lastDigitPos:]) 219 | } 220 | } 221 | 222 | // Whitespace0 parses zero or more whitespace characters: ' ', '\t', '\n', '\r'. 223 | // In the cases where the input is empty, or no terminating character is found, the parser 224 | // returns the input as is. 225 | func Whitespace0[Input Bytes]() Parser[Input, Input] { 226 | return func(input Input) Result[Input, Input] { 227 | if len(input) == 0 { 228 | return Success(input, input) 229 | } 230 | 231 | lastPos := 0 232 | for idx := 0; idx < len(input); idx++ { 233 | if !IsWhitespace(rune(input[idx])) { 234 | return Success(input[:idx], input[idx:]) 235 | } 236 | 237 | lastPos++ 238 | } 239 | 240 | return Success(input[:lastPos], input[lastPos:]) 241 | } 242 | } 243 | 244 | // Whitespace1 parses one or more whitespace characters: ' ', '\t', '\n', '\r'. 245 | // In the cases where the input doesn't hold enough data, or a terminating character 246 | // is found before any matching ones were, the parser returns an error result. 247 | func Whitespace1[Input Bytes]() Parser[Input, Input] { 248 | return func(input Input) Result[Input, Input] { 249 | if len(input) == 0 { 250 | return Failure[Input, Input](NewError(input, "WhiteSpace1"), input) 251 | } 252 | 253 | if !IsWhitespace(rune(input[0])) { 254 | return Failure[Input, Input](NewError(input, "WhiteSpace1"), input) 255 | } 256 | 257 | lastPos := 1 258 | for idx := 1; idx < len(input); idx++ { 259 | if !IsWhitespace(rune(input[idx])) { 260 | return Success(input[:idx], input[idx:]) 261 | } 262 | 263 | lastPos++ 264 | } 265 | 266 | return Success(input[:lastPos], input[lastPos:]) 267 | } 268 | } 269 | 270 | // LF parses a line feed `\n` character. 271 | func LF[Input Bytes]() Parser[Input, rune] { 272 | return func(input Input) Result[rune, Input] { 273 | if len(input) == 0 || input[0] != '\n' { 274 | return Failure[Input, rune](NewError(input, "LF"), input) 275 | } 276 | 277 | return Success(rune(input[0]), input[1:]) 278 | } 279 | } 280 | 281 | // CR parses a carriage return `\r` character. 282 | func CR[Input Bytes]() Parser[Input, rune] { 283 | return func(input Input) Result[rune, Input] { 284 | if len(input) == 0 || input[0] != '\r' { 285 | return Failure[Input, rune](NewError(input, "CR"), input) 286 | } 287 | 288 | return Success(rune(input[0]), input[1:]) 289 | } 290 | } 291 | 292 | // CRLF parses the string `\r\n`. 293 | func CRLF[Input Bytes]() Parser[Input, Input] { 294 | return func(input Input) Result[Input, Input] { 295 | if len(input) < 2 || (input[0] != '\r' || input[1] != '\n') { 296 | return Failure[Input, Input](NewError(input, "CRLF"), input) 297 | } 298 | 299 | return Success(input[:2], input[2:]) 300 | } 301 | } 302 | 303 | // OneOf parses a single character from the given set of characters. 304 | func OneOf[Input Bytes](collection ...rune) Parser[Input, rune] { 305 | return func(input Input) Result[rune, Input] { 306 | if len(input) == 0 { 307 | return Failure[Input, rune](NewError(input, "OneOf"), input) 308 | } 309 | 310 | for _, c := range collection { 311 | if rune(input[0]) == c { 312 | return Success(rune(input[0]), input[1:]) 313 | } 314 | } 315 | 316 | return Failure[Input, rune](NewError(input, "OneOf"), input) 317 | } 318 | } 319 | 320 | // Satisfy parses a single character, and ensures that it satisfies the given predicate. 321 | func Satisfy[Input Bytes](predicate func(rune) bool) Parser[Input, rune] { 322 | return func(input Input) Result[rune, Input] { 323 | if len(input) == 0 { 324 | return Failure[Input, rune](NewError(input, "Satisfy"), input) 325 | } 326 | 327 | if !predicate(rune(input[0])) { 328 | return Failure[Input, rune](NewError(input, "Satisfy"), input) 329 | } 330 | 331 | return Success(rune(input[0]), input[1:]) 332 | } 333 | } 334 | 335 | // Space parses a space character. 336 | func Space[Input Bytes]() Parser[Input, rune] { 337 | return func(input Input) Result[rune, Input] { 338 | if len(input) == 0 || input[0] != ' ' { 339 | return Failure[Input, rune](NewError(input, "Space"), input) 340 | } 341 | 342 | return Success(rune(input[0]), input[1:]) 343 | } 344 | } 345 | 346 | // Tab parses a tab character. 347 | func Tab[Input Bytes]() Parser[Input, rune] { 348 | return func(input Input) Result[rune, Input] { 349 | if len(input) == 0 || input[0] != '\t' { 350 | return Failure[Input, rune](NewError(input, "Tab"), input) 351 | } 352 | 353 | return Success(rune(input[0]), input[1:]) 354 | } 355 | } 356 | 357 | // Int64 parses an integer from the input, and returns the part of the input that 358 | // matched the integer. 359 | func Int64[Input Bytes]() Parser[Input, int64] { 360 | return func(input Input) Result[int64, Input] { 361 | parser := Recognize(Sequence(Optional(Token[Input]("-")), Digit1[Input]())) 362 | 363 | result := parser(input) 364 | if result.Err != nil { 365 | return Failure[Input, int64](NewError(input, "Int64"), input) 366 | } 367 | 368 | n, err := strconv.ParseInt(string(result.Output), 10, 64) 369 | if err != nil { 370 | return Failure[Input, int64](NewError(input, "Int64"), input) 371 | } 372 | 373 | return Success(n, result.Remaining) 374 | } 375 | } 376 | 377 | // Int8 parses an 8-bit integer from the input, 378 | // and returns the part of the input that matched the integer. 379 | func Int8[Input Bytes]() Parser[Input, int8] { 380 | return func(input Input) Result[int8, Input] { 381 | parser := Recognize(Sequence(Optional(Token[Input]("-")), Digit1[Input]())) 382 | 383 | result := parser(input) 384 | if result.Err != nil { 385 | return Failure[Input, int8](NewError(input, "Int8"), input) 386 | } 387 | 388 | n, err := strconv.ParseInt(string(result.Output), 10, 8) 389 | if err != nil { 390 | return Failure[Input, int8](NewError(input, "Int8"), input) 391 | } 392 | 393 | return Success(int8(n), result.Remaining) 394 | } 395 | } 396 | 397 | // UInt8 parses an 8-bit integer from the input, 398 | // and returns the part of the input that matched the integer. 399 | func UInt8[Input Bytes]() Parser[Input, uint8] { 400 | return func(input Input) Result[uint8, Input] { 401 | result := Digit1[Input]()(input) 402 | if result.Err != nil { 403 | return Failure[Input, uint8](NewError(input, "UInt8"), input) 404 | } 405 | 406 | n, err := strconv.ParseUint(string(result.Output), 10, 8) 407 | if err != nil { 408 | return Failure[Input, uint8](NewError(input, "UInt8"), input) 409 | } 410 | 411 | return Success(uint8(n), result.Remaining) 412 | } 413 | } 414 | 415 | // IsAlpha returns true if the rune is an alphabetic character. 416 | func IsAlpha(c rune) bool { 417 | return IsLowAlpha(c) || IsUpAlpha(c) 418 | } 419 | 420 | // IsLowAlpha returns true if the rune is a lowercase alphabetic character. 421 | func IsLowAlpha(c rune) bool { 422 | return c >= 'a' && c <= 'z' 423 | } 424 | 425 | // IsUpAlpha returns true if the rune is an uppercase alphabetic character. 426 | func IsUpAlpha(c rune) bool { 427 | return c >= 'A' && c <= 'Z' 428 | } 429 | 430 | // IsDigit returns true if the rune is a digit. 431 | func IsDigit(c rune) bool { 432 | return c >= '0' && c <= '9' 433 | } 434 | 435 | // IsAlphanumeric returns true if the rune is an alphanumeric character. 436 | func IsAlphanumeric(c rune) bool { 437 | return IsAlpha(c) || IsDigit(c) 438 | } 439 | 440 | // IsHexDigit returns true if the rune is a hexadecimal digit. 441 | func IsHexDigit(c rune) bool { 442 | return IsDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') 443 | } 444 | 445 | // IsControl returns true if the rune is a control character. 446 | func IsControl(c rune) bool { 447 | return (c >= 0 && c < 32) || c == 127 448 | } 449 | 450 | func IsWhitespace(c rune) bool { 451 | return c == ' ' || c == '\t' || c == '\n' || c == '\r' 452 | } 453 | -------------------------------------------------------------------------------- /combinators.go: -------------------------------------------------------------------------------- 1 | // Package gomme implements a parser combinator library. 2 | // It provides a toolkit for developers to build reliable, fast, flexible, and easy-to-develop and maintain parsers 3 | // for both textual and binary formats. It extensively uses the recent introduction of Generics in the Go programming 4 | // language to offer flexibility in how combinators can be mixed and matched to produce the desired output while 5 | // providing as much compile-time type safety as possible. 6 | package gomme 7 | 8 | // FIXME: Ideally, I would want the combinators working with sequences 9 | // to produce somewhat detailed errors, and tell me which of the combinators failed 10 | 11 | // Bytes is a generic type alias for string 12 | type Bytes interface { 13 | string | []byte 14 | } 15 | 16 | // Separator is a generic type alias for separator characters 17 | type Separator interface { 18 | rune | byte | string 19 | } 20 | 21 | // Result is a generic type alias for Result 22 | type Result[Output any, Remaining Bytes] struct { 23 | Output Output 24 | Err *Error[Remaining] 25 | Remaining Remaining 26 | } 27 | 28 | // Parser is a generic type alias for Parser 29 | type Parser[Input Bytes, Output any] func(input Input) Result[Output, Input] 30 | 31 | // Success creates a Result with a output set from 32 | // the result of a successful parsing. 33 | func Success[Output any, Remaining Bytes](output Output, r Remaining) Result[Output, Remaining] { 34 | return Result[Output, Remaining]{output, nil, r} 35 | } 36 | 37 | // Failure creates a Result with an error set from 38 | // the result of a failed parsing. 39 | // TODO: The Error type could be generic too 40 | func Failure[Input Bytes, Output any](err *Error[Input], input Input) Result[Output, Input] { 41 | var output Output 42 | return Result[Output, Input]{output, err, input} 43 | } 44 | 45 | // Map applies a function to the result of a parser. 46 | func Map[Input Bytes, ParserOutput any, MapperOutput any](parse Parser[Input, ParserOutput], fn func(ParserOutput) (MapperOutput, error)) Parser[Input, MapperOutput] { 47 | return func(input Input) Result[MapperOutput, Input] { 48 | res := parse(input) 49 | if res.Err != nil { 50 | return Failure[Input, MapperOutput](NewError(input, "Map"), input) 51 | } 52 | 53 | output, err := fn(res.Output) 54 | if err != nil { 55 | return Failure[Input, MapperOutput](NewError(input, err.Error()), input) 56 | } 57 | 58 | return Success(output, res.Remaining) 59 | } 60 | } 61 | 62 | // Optional applies a an optional child parser. Will return nil 63 | // if not successful. 64 | // 65 | // N.B: unless a FatalError is encountered, Optional will ignore 66 | // any parsing failures and errors. 67 | func Optional[Input Bytes, Output any](parse Parser[Input, Output]) Parser[Input, Output] { 68 | return func(input Input) Result[Output, Input] { 69 | result := parse(input) 70 | if result.Err != nil && !result.Err.IsFatal() { 71 | result.Err = nil 72 | } 73 | 74 | return Success(result.Output, result.Remaining) 75 | } 76 | } 77 | 78 | // Peek tries to apply the provided parser without consuming any input. 79 | // It effectively allows to look ahead in the input. 80 | func Peek[Input Bytes, Output any](parse Parser[Input, Output]) Parser[Input, Output] { 81 | return func(input Input) Result[Output, Input] { 82 | result := parse(input) 83 | if result.Err != nil { 84 | return Failure[Input, Output](result.Err, input) 85 | } 86 | 87 | return Success(result.Output, input) 88 | } 89 | } 90 | 91 | // Recognize returns the consumed input as the produced value when 92 | // the provided parser succeeds. 93 | func Recognize[Input Bytes, Output any](parse Parser[Input, Output]) Parser[Input, Input] { 94 | return func(input Input) Result[Input, Input] { 95 | result := parse(input) 96 | if result.Err != nil { 97 | return Failure[Input, Input](result.Err, input) 98 | } 99 | 100 | return Success(input[:len(input)-len(result.Remaining)], result.Remaining) 101 | } 102 | } 103 | 104 | // Assign returns the provided value if the parser succeeds, otherwise 105 | // it returns an error result. 106 | func Assign[Input Bytes, Output1, Output2 any](value Output1, parse Parser[Input, Output2]) Parser[Input, Output1] { 107 | return func(input Input) Result[Output1, Input] { 108 | result := parse(input) 109 | if result.Err != nil { 110 | return Failure[Input, Output1](result.Err, input) 111 | } 112 | 113 | return Success(value, result.Remaining) 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /combinators_test.go: -------------------------------------------------------------------------------- 1 | package gomme 2 | 3 | import ( 4 | "errors" 5 | "strconv" 6 | "testing" 7 | ) 8 | 9 | func TestMap(t *testing.T) { 10 | t.Parallel() 11 | 12 | type TestStruct struct { 13 | Foo int 14 | Bar string 15 | } 16 | 17 | type args struct { 18 | parser Parser[string, TestStruct] 19 | } 20 | testCases := []struct { 21 | name string 22 | input string 23 | args args 24 | wantErr bool 25 | wantOutput TestStruct 26 | wantRemaining string 27 | }{ 28 | { 29 | name: "matching parser should succeed", 30 | input: "1abc\r\n", 31 | args: args{ 32 | Map(Pair(Digit1[string](), TakeUntil(CRLF[string]())), func(p PairContainer[string, string]) (TestStruct, error) { 33 | left, _ := strconv.Atoi(p.Left) 34 | return TestStruct{ 35 | Foo: left, 36 | Bar: p.Right, 37 | }, nil 38 | }), 39 | }, 40 | wantErr: false, 41 | wantOutput: TestStruct{ 42 | Foo: 1, 43 | Bar: "abc", 44 | }, 45 | wantRemaining: "\r\n", 46 | }, 47 | { 48 | name: "failing parser should fail", 49 | input: "abc\r\n", 50 | args: args{ 51 | Map(Pair(Digit1[string](), TakeUntil(CRLF[string]())), func(p PairContainer[string, string]) (TestStruct, error) { 52 | left, _ := strconv.Atoi(p.Left) 53 | 54 | return TestStruct{ 55 | Foo: left, 56 | Bar: p.Right, 57 | }, nil 58 | }), 59 | }, 60 | wantErr: true, 61 | wantOutput: TestStruct{}, 62 | wantRemaining: "abc\r\n", 63 | }, 64 | { 65 | name: "failing mapper should fail", 66 | input: "1abc\r\n", 67 | args: args{ 68 | Map(Pair(Digit1[string](), TakeUntil(CRLF[string]())), func(p PairContainer[string, string]) (TestStruct, error) { 69 | return TestStruct{}, errors.New("unexpected error") 70 | }), 71 | }, 72 | wantErr: true, 73 | wantOutput: TestStruct{}, 74 | wantRemaining: "1abc\r\n", 75 | }, 76 | { 77 | name: "empty input should fail", 78 | input: "", 79 | args: args{ 80 | Map(Pair(Digit1[string](), TakeUntil(CRLF[string]())), func(p PairContainer[string, string]) (TestStruct, error) { 81 | left, _ := strconv.Atoi(p.Left) 82 | 83 | return TestStruct{ 84 | Foo: left, 85 | Bar: p.Right, 86 | }, nil 87 | }), 88 | }, 89 | wantErr: true, 90 | wantOutput: TestStruct{}, 91 | wantRemaining: "", 92 | }, 93 | } 94 | 95 | for _, tc := range testCases { 96 | tc := tc 97 | 98 | t.Run(tc.name, func(t *testing.T) { 99 | t.Parallel() 100 | 101 | gotResult := tc.args.parser(tc.input) 102 | if (gotResult.Err != nil) != tc.wantErr { 103 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) 104 | } 105 | 106 | if gotResult.Output != tc.wantOutput { 107 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) 108 | } 109 | 110 | if gotResult.Remaining != tc.wantRemaining { 111 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) 112 | } 113 | }) 114 | } 115 | } 116 | 117 | func BenchmarkMap(b *testing.B) { 118 | type TestStruct struct { 119 | Foo int 120 | Bar string 121 | } 122 | 123 | p := Map(Pair(Digit1[string](), TakeUntil(CRLF[string]())), func(p PairContainer[string, string]) (TestStruct, error) { 124 | left, _ := strconv.Atoi(p.Left) 125 | 126 | return TestStruct{ 127 | Foo: left, 128 | Bar: p.Right, 129 | }, nil 130 | }) 131 | 132 | b.ResetTimer() 133 | for i := 0; i < b.N; i++ { 134 | p("1abc\r\n") 135 | } 136 | } 137 | 138 | func TestOptional(t *testing.T) { 139 | t.Parallel() 140 | 141 | type args struct { 142 | p Parser[string, string] 143 | } 144 | testCases := []struct { 145 | name string 146 | args args 147 | input string 148 | wantErr bool 149 | wantOutput string 150 | wantRemaining string 151 | }{ 152 | { 153 | name: "matching parser should succeed", 154 | input: "\r\n123", 155 | args: args{ 156 | p: Optional(CRLF[string]()), 157 | }, 158 | wantErr: false, 159 | wantOutput: "\r\n", 160 | wantRemaining: "123", 161 | }, 162 | { 163 | name: "no match should succeed", 164 | input: "123", 165 | args: args{ 166 | p: Optional(CRLF[string]()), 167 | }, 168 | wantErr: false, 169 | wantOutput: "", 170 | wantRemaining: "123", 171 | }, 172 | { 173 | name: "empty input should succeed", 174 | input: "", 175 | args: args{ 176 | p: Optional(CRLF[string]()), 177 | }, 178 | wantErr: false, 179 | wantOutput: "", 180 | wantRemaining: "", 181 | }, 182 | } 183 | for _, tc := range testCases { 184 | tc := tc 185 | 186 | t.Run(tc.name, func(t *testing.T) { 187 | t.Parallel() 188 | 189 | gotResult := tc.args.p(tc.input) 190 | if (gotResult.Err != nil) != tc.wantErr { 191 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) 192 | } 193 | 194 | if gotResult.Output != tc.wantOutput { 195 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) 196 | } 197 | 198 | if gotResult.Remaining != tc.wantRemaining { 199 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) 200 | } 201 | }) 202 | } 203 | } 204 | 205 | func BenchmarkOptional(b *testing.B) { 206 | p := Optional(CRLF[string]()) 207 | 208 | b.ResetTimer() 209 | for i := 0; i < b.N; i++ { 210 | p("\r\n123") 211 | } 212 | } 213 | 214 | func TestPeek(t *testing.T) { 215 | t.Parallel() 216 | 217 | type args struct { 218 | p Parser[string, string] 219 | } 220 | testCases := []struct { 221 | name string 222 | args args 223 | input string 224 | wantErr bool 225 | wantOutput string 226 | wantRemaining string 227 | }{ 228 | { 229 | name: "matching parser should succeed", 230 | input: "abcd;", 231 | args: args{ 232 | p: Peek(Alpha1[string]()), 233 | }, 234 | wantErr: false, 235 | wantOutput: "abcd", 236 | wantRemaining: "abcd;", 237 | }, 238 | { 239 | name: "non matching parser should fail", 240 | input: "123;", 241 | args: args{ 242 | p: Peek(Alpha1[string]()), 243 | }, 244 | wantErr: true, 245 | wantOutput: "", 246 | wantRemaining: "123;", 247 | }, 248 | } 249 | for _, tc := range testCases { 250 | tc := tc 251 | 252 | t.Run(tc.name, func(t *testing.T) { 253 | t.Parallel() 254 | 255 | gotResult := tc.args.p(tc.input) 256 | if (gotResult.Err != nil) != tc.wantErr { 257 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) 258 | } 259 | 260 | if gotResult.Output != tc.wantOutput { 261 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) 262 | } 263 | 264 | if gotResult.Remaining != tc.wantRemaining { 265 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) 266 | } 267 | }) 268 | } 269 | } 270 | 271 | func BenchmarkPeek(b *testing.B) { 272 | p := Peek(Alpha1[string]()) 273 | 274 | b.ResetTimer() 275 | for i := 0; i < b.N; i++ { 276 | p("abcd;") 277 | } 278 | } 279 | 280 | func TestRecognize(t *testing.T) { 281 | t.Parallel() 282 | 283 | type args struct { 284 | p Parser[string, string] 285 | } 286 | testCases := []struct { 287 | name string 288 | args args 289 | input string 290 | wantErr bool 291 | wantOutput string 292 | wantRemaining string 293 | }{ 294 | { 295 | name: "matching parser should succeed", 296 | input: "123abc", 297 | args: args{ 298 | p: Recognize(Pair(Digit1[string](), Alpha1[string]())), 299 | }, 300 | wantErr: false, 301 | wantOutput: "123abc", 302 | wantRemaining: "", 303 | }, 304 | { 305 | name: "no prefix match should fail", 306 | input: "abc", 307 | args: args{ 308 | p: Recognize(Pair(Digit1[string](), Alpha1[string]())), 309 | }, 310 | wantErr: true, 311 | wantOutput: "", 312 | wantRemaining: "abc", 313 | }, 314 | { 315 | name: "no parser match should fail", 316 | input: "123", 317 | args: args{ 318 | p: Recognize(Pair(Digit1[string](), Alpha1[string]())), 319 | }, 320 | wantErr: true, 321 | wantOutput: "", 322 | wantRemaining: "123", 323 | }, 324 | { 325 | name: "empty input should fail", 326 | input: "", 327 | args: args{ 328 | p: Recognize(Pair(Digit1[string](), Alpha1[string]())), 329 | }, 330 | wantErr: true, 331 | wantOutput: "", 332 | wantRemaining: "", 333 | }, 334 | } 335 | for _, tc := range testCases { 336 | tc := tc 337 | 338 | t.Run(tc.name, func(t *testing.T) { 339 | t.Parallel() 340 | 341 | gotResult := tc.args.p(tc.input) 342 | if (gotResult.Err != nil) != tc.wantErr { 343 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) 344 | } 345 | 346 | if gotResult.Output != tc.wantOutput { 347 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) 348 | } 349 | 350 | if gotResult.Remaining != tc.wantRemaining { 351 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) 352 | } 353 | }) 354 | } 355 | } 356 | 357 | func BenchmarkRecognize(b *testing.B) { 358 | p := Recognize(Pair(Digit1[string](), Alpha1[string]())) 359 | 360 | b.ResetTimer() 361 | for i := 0; i < b.N; i++ { 362 | p("123abc") 363 | } 364 | } 365 | 366 | func TestAssign(t *testing.T) { 367 | t.Parallel() 368 | 369 | type args struct { 370 | p Parser[string, int] 371 | } 372 | testCases := []struct { 373 | name string 374 | args args 375 | input string 376 | wantErr bool 377 | wantOutput int 378 | wantRemaining string 379 | }{ 380 | { 381 | name: "matching parser should succeed", 382 | input: "abcd", 383 | args: args{ 384 | p: Assign(1234, Alpha1[string]()), 385 | }, 386 | wantErr: false, 387 | wantOutput: 1234, 388 | wantRemaining: "", 389 | }, 390 | { 391 | name: "non matching parser should fail", 392 | input: "123abcd;", 393 | args: args{ 394 | p: Assign(1234, Alpha1[string]()), 395 | }, 396 | wantErr: true, 397 | wantOutput: 0, 398 | wantRemaining: "123abcd;", 399 | }, 400 | } 401 | for _, tc := range testCases { 402 | tc := tc 403 | 404 | t.Run(tc.name, func(t *testing.T) { 405 | t.Parallel() 406 | 407 | gotResult := tc.args.p(tc.input) 408 | if (gotResult.Err != nil) != tc.wantErr { 409 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) 410 | } 411 | 412 | if gotResult.Output != tc.wantOutput { 413 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) 414 | } 415 | 416 | if gotResult.Remaining != tc.wantRemaining { 417 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) 418 | } 419 | }) 420 | } 421 | } 422 | 423 | func BenchmarkAssign(b *testing.B) { 424 | p := Assign(1234, Alpha1[string]()) 425 | 426 | b.ResetTimer() 427 | for i := 0; i < b.N; i++ { 428 | p("abcd") 429 | } 430 | } 431 | -------------------------------------------------------------------------------- /containers.go: -------------------------------------------------------------------------------- 1 | package gomme 2 | 3 | // PairContainer allows returning a pair of results from a parser. 4 | type PairContainer[Left, Right any] struct { 5 | Left Left 6 | Right Right 7 | } 8 | 9 | // NewPairContainer instantiates a new Pair 10 | func NewPairContainer[Left, Right any](left Left, right Right) *PairContainer[Left, Right] { 11 | return &PairContainer[Left, Right]{ 12 | Left: left, 13 | Right: right, 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /error.go: -------------------------------------------------------------------------------- 1 | package gomme 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | ) 7 | 8 | // Error represents a parsing error. It holds the input that was being parsed, 9 | // the parsers that were tried, and the error that was produced. 10 | type Error[Input Bytes] struct { 11 | Input Input 12 | Err error 13 | Expected []string 14 | } 15 | 16 | // NewError produces a new Error from the provided input and names of 17 | // parsers expected to succeed. 18 | func NewError[Input Bytes](input Input, expected ...string) *Error[Input] { 19 | return &Error[Input]{Input: input, Expected: expected} 20 | } 21 | 22 | // Error returns a human readable error string. 23 | func (e *Error[Input]) Error() string { 24 | return fmt.Sprintf("expected %v", strings.Join(e.Expected, ", ")) 25 | } 26 | 27 | // IsFatal returns true if the error is fatal. 28 | func (e *Error[Input]) IsFatal() bool { 29 | return e.Err != nil 30 | } 31 | -------------------------------------------------------------------------------- /examples/.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, built with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | # Dependency directories (remove the comment below to include it) 15 | # vendor/ 16 | 17 | examples/redis/testdata -------------------------------------------------------------------------------- /examples/csv/csv.go: -------------------------------------------------------------------------------- 1 | // Package csv implements a parser for CSV files. 2 | // 3 | // It is a simple, incomplete, example of how to use the gomme 4 | // parser combinator library to build a parser targetting the 5 | // format described in [RFC4180]. 6 | // 7 | // [RFC4180]: https://tools.ietf.org/html/rfc4180 8 | package csv 9 | 10 | import "github.com/oleiade/gomme" 11 | 12 | func ParseCSV(input string) ([][]string, error) { 13 | parser := gomme.SeparatedList1( 14 | gomme.SeparatedList1( 15 | gomme.Alternative( 16 | gomme.Alphanumeric1[string](), 17 | gomme.Delimited(gomme.Char[string]('"'), gomme.Alphanumeric1[string](), gomme.Char[string]('"')), 18 | ), 19 | gomme.Char[string](','), 20 | ), 21 | gomme.CRLF[string](), 22 | ) 23 | 24 | result := parser(input) 25 | if result.Err != nil { 26 | return nil, result.Err 27 | } 28 | 29 | return result.Output, nil 30 | } 31 | -------------------------------------------------------------------------------- /examples/csv/csv_test.go: -------------------------------------------------------------------------------- 1 | package csv 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestParseRGBColor(t *testing.T) { 10 | t.Parallel() 11 | 12 | testCases := []struct { 13 | name string 14 | input string 15 | wantErr bool 16 | wantOutput [][]string 17 | }{ 18 | { 19 | name: "parsing a single csv line should succeed", 20 | input: "abc,def,ghi\r\n", 21 | wantErr: false, 22 | wantOutput: [][]string{{"abc", "def", "ghi"}}, 23 | }, 24 | { 25 | name: "parsing multie csv lines should succeed", 26 | input: "abc,def,ghi\r\njkl,mno,pqr\r\n", 27 | wantErr: false, 28 | wantOutput: [][]string{ 29 | {"abc", "def", "ghi"}, 30 | {"jkl", "mno", "pqr"}, 31 | }, 32 | }, 33 | { 34 | name: "parsing a single csv line of escaped strings should succeed", 35 | input: "\"abc\",\"def\",\"ghi\"\r\n", 36 | wantErr: false, 37 | wantOutput: [][]string{{"abc", "def", "ghi"}}, 38 | }, 39 | } 40 | for _, tc := range testCases { 41 | tc := tc 42 | 43 | t.Run(tc.name, func(t *testing.T) { 44 | t.Parallel() 45 | 46 | gotOutput, gotErr := ParseCSV(tc.input) 47 | if (gotErr != nil) != tc.wantErr { 48 | t.Errorf("got error %v, want error %v", gotErr, tc.wantErr) 49 | } 50 | 51 | assert.Equal(t, 52 | tc.wantOutput, 53 | gotOutput, 54 | "got output %v, want output %v", gotOutput, tc.wantOutput, 55 | ) 56 | }) 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /examples/hexcolor/hexcolor.go: -------------------------------------------------------------------------------- 1 | // Package hexcolor implements a parser for hexadecimal color strings. 2 | // It demonstrates how to use gomme to build a parser for a simple string format. 3 | package hexcolor 4 | 5 | import ( 6 | "strconv" 7 | 8 | "github.com/oleiade/gomme" 9 | ) 10 | 11 | // RGBColor stores the three bytes describing a color in the RGB space. 12 | type RGBColor struct { 13 | red uint8 14 | green uint8 15 | blue uint8 16 | } 17 | 18 | // ParseRGBColor creates a new RGBColor from a hexadecimal color string. 19 | // The string must be a six digit hexadecimal number, prefixed with a "#". 20 | func ParseRGBColor(input string) (RGBColor, error) { 21 | parser := gomme.Preceded( 22 | gomme.Token[string]("#"), 23 | gomme.Map( 24 | gomme.Count(HexColorComponent(), 3), 25 | func(components []uint8) (RGBColor, error) { 26 | return RGBColor{components[0], components[1], components[2]}, nil 27 | }, 28 | ), 29 | ) 30 | 31 | result := parser(input) 32 | if result.Err != nil { 33 | return RGBColor{}, result.Err 34 | } 35 | 36 | return result.Output, nil 37 | } 38 | 39 | // HexColorComponent produces a parser that parses a single hex color component, 40 | // which is a two digit hexadecimal number. 41 | func HexColorComponent() gomme.Parser[string, uint8] { 42 | return func(input string) gomme.Result[uint8, string] { 43 | return gomme.Map( 44 | gomme.TakeWhileMN[string](2, 2, gomme.IsHexDigit), 45 | fromHex, 46 | )(input) 47 | } 48 | } 49 | 50 | // fromHex converts a two digits hexadecimal number to its decimal value. 51 | func fromHex(input string) (uint8, error) { 52 | res, err := strconv.ParseInt(input, 16, 16) 53 | if err != nil { 54 | return 0, err 55 | } 56 | 57 | return uint8(res), nil 58 | } 59 | -------------------------------------------------------------------------------- /examples/hexcolor/hexcolor_test.go: -------------------------------------------------------------------------------- 1 | package hexcolor 2 | 3 | import "testing" 4 | 5 | func TestParseRGBColor(t *testing.T) { 6 | t.Parallel() 7 | 8 | testCases := []struct { 9 | name string 10 | input string 11 | wantErr bool 12 | wantColor RGBColor 13 | }{ 14 | { 15 | name: "parsing minimum hexadecimal color should succeed", 16 | input: "#000000", 17 | wantErr: false, 18 | wantColor: RGBColor{0, 0, 0}, 19 | }, 20 | { 21 | name: "parsing maximum hexadecimal color should succeed", 22 | input: "#ffffff", 23 | wantErr: false, 24 | wantColor: RGBColor{255, 255, 255}, 25 | }, 26 | { 27 | name: "parsing out of bound color component should fail", 28 | input: "#fffffg", 29 | wantErr: true, 30 | wantColor: RGBColor{}, 31 | }, 32 | { 33 | name: "omitting leading # character should fail", 34 | input: "ffffff", 35 | wantErr: true, 36 | wantColor: RGBColor{}, 37 | }, 38 | { 39 | name: "parsing insufficient number of characters should fail", 40 | input: "#fffff", 41 | wantErr: true, 42 | wantColor: RGBColor{}, 43 | }, 44 | { 45 | name: "empty input should fail", 46 | input: "", 47 | wantErr: true, 48 | wantColor: RGBColor{}, 49 | }, 50 | } 51 | for _, tc := range testCases { 52 | tc := tc 53 | 54 | t.Run(tc.name, func(t *testing.T) { 55 | t.Parallel() 56 | 57 | gotColor, gotErr := ParseRGBColor(tc.input) 58 | if (gotErr != nil) != tc.wantErr { 59 | t.Errorf("got error %v, want error %v", gotErr, tc.wantErr) 60 | } 61 | 62 | if gotColor != tc.wantColor { 63 | t.Errorf("got color %v, want color %v", gotColor, tc.wantColor) 64 | } 65 | }) 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /examples/json/json.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | _ "embed" 5 | "fmt" 6 | "log" 7 | "strconv" 8 | "strings" 9 | 10 | "github.com/oleiade/gomme" 11 | ) 12 | 13 | //go:embed test.json 14 | var testJSON string 15 | 16 | func main() { 17 | result := parseJSON(testJSON) 18 | if result.Err != nil { 19 | log.Fatal(result.Err) 20 | return 21 | } 22 | 23 | fmt.Println(result.Output) 24 | } 25 | 26 | type ( 27 | // JSONValue represents any value that can be encountered in 28 | // JSON, including complex types like objects and arrays. 29 | JSONValue interface{} 30 | 31 | // JSONString represents a JSON string value. 32 | JSONString string 33 | 34 | // JSONNumber represents a JSON number value, which internally is treated as float64. 35 | JSONNumber float64 36 | 37 | // JSONObject represents a JSON object, which is a collection of key-value pairs. 38 | JSONObject map[string]JSONValue 39 | 40 | // JSONArray represents a JSON array, which is a list of JSON values. 41 | JSONArray []JSONValue 42 | 43 | // JSONBool represents a JSON boolean value. 44 | JSONBool bool 45 | 46 | // JSONNull represents the JSON null value. 47 | JSONNull struct{} 48 | ) 49 | 50 | // parseJSON is a convenience function to start parsing JSON from the given input string. 51 | func parseJSON(input string) gomme.Result[JSONValue, string] { 52 | return parseValue(input) 53 | } 54 | 55 | // parseValue is a parser that attempts to parse different types of 56 | // JSON values (object, array, string, etc.). 57 | func parseValue(input string) gomme.Result[JSONValue, string] { 58 | return gomme.Alternative( 59 | parseObject, 60 | parseArray, 61 | parseString, 62 | parseNumber, 63 | parseTrue, 64 | parseFalse, 65 | parseNull, 66 | )(input) 67 | } 68 | 69 | // parseObject parses a JSON object, which starts and ends with 70 | // curly braces and contains key-value pairs. 71 | func parseObject(input string) gomme.Result[JSONValue, string] { 72 | return gomme.Map( 73 | gomme.Delimited[string, rune, map[string]JSONValue, rune]( 74 | gomme.Char[string]('{'), 75 | gomme.Optional[string, map[string]JSONValue]( 76 | gomme.Preceded( 77 | ws(), 78 | gomme.Terminated[string, map[string]JSONValue]( 79 | parseMembers, 80 | ws(), 81 | ), 82 | ), 83 | ), 84 | gomme.Char[string]('}'), 85 | ), 86 | func(members map[string]JSONValue) (JSONValue, error) { 87 | return JSONObject(members), nil 88 | }, 89 | )(input) 90 | } 91 | 92 | // Ensure parseObject is a Parser[string, JSONValue] 93 | var _ gomme.Parser[string, JSONValue] = parseObject 94 | 95 | // parseArray parses a JSON array, which starts and ends with 96 | // square brackets and contains a list of values. 97 | func parseArray(input string) gomme.Result[JSONValue, string] { 98 | return gomme.Map( 99 | gomme.Delimited[string, rune, []JSONValue, rune]( 100 | gomme.Char[string]('['), 101 | gomme.Alternative( 102 | parseElements, 103 | gomme.Map(ws(), func(s string) ([]JSONValue, error) { return []JSONValue{}, nil }), 104 | ), 105 | gomme.Char[string](']'), 106 | ), 107 | func(elements []JSONValue) (JSONValue, error) { 108 | return JSONArray(elements), nil 109 | }, 110 | )(input) 111 | } 112 | 113 | // Ensure parseArray is a Parser[string, JSONValue] 114 | var _ gomme.Parser[string, JSONValue] = parseArray 115 | 116 | func parseElement(input string) gomme.Result[JSONValue, string] { 117 | return gomme.Map( 118 | gomme.Delimited[string](ws(), parseValue, ws()), 119 | func(v JSONValue) (JSONValue, error) { return v, nil }, 120 | )(input) 121 | } 122 | 123 | // Ensure parseElement is a Parser[string, JSONValue] 124 | var _ gomme.Parser[string, JSONValue] = parseElement 125 | 126 | // parseNumber parses a JSON number. 127 | func parseNumber(input string) gomme.Result[JSONValue, string] { 128 | return gomme.Map[string]( 129 | gomme.Sequence( 130 | gomme.Map(integer(), func(i int) (string, error) { return strconv.Itoa(i), nil }), 131 | gomme.Optional(fraction()), 132 | gomme.Optional(exponent()), 133 | ), 134 | func(parts []string) (JSONValue, error) { 135 | // Construct the float string from parts 136 | var floatStr string 137 | 138 | // Integer part 139 | floatStr += parts[0] 140 | 141 | // Fraction part 142 | if parts[1] != "" { 143 | fractionPart, err := strconv.Atoi(parts[1]) 144 | if err != nil { 145 | return 0, err 146 | } 147 | 148 | if fractionPart != 0 { 149 | floatStr += fmt.Sprintf(".%d", fractionPart) 150 | } 151 | } 152 | 153 | // Exponent part 154 | if parts[2] != "" { 155 | floatStr += fmt.Sprintf("e%s", parts[2]) 156 | } 157 | 158 | f, err := strconv.ParseFloat(floatStr, 64) 159 | if err != nil { 160 | return JSONNumber(0.0), err 161 | } 162 | 163 | return JSONNumber(f), nil 164 | }, 165 | )(input) 166 | } 167 | 168 | // Ensure parseNumber is a Parser[string, JSONValue] 169 | var _ gomme.Parser[string, JSONValue] = parseNumber 170 | 171 | // parseString parses a JSON string. 172 | func parseString(input string) gomme.Result[JSONValue, string] { 173 | return gomme.Map( 174 | stringParser(), 175 | func(s string) (JSONValue, error) { 176 | return JSONString(s), nil 177 | }, 178 | )(input) 179 | } 180 | 181 | // Ensure parseString is a Parser[string, JSONValue] 182 | var _ gomme.Parser[string, JSONValue] = parseString 183 | 184 | // parseFalse parses the JSON boolean value 'false'. 185 | func parseFalse(input string) gomme.Result[JSONValue, string] { 186 | return gomme.Map( 187 | gomme.Token[string]("false"), 188 | func(_ string) (JSONValue, error) { return JSONBool(false), nil }, 189 | )(input) 190 | } 191 | 192 | // Ensure parseFalse is a Parser[string, JSONValue] 193 | var _ gomme.Parser[string, JSONValue] = parseFalse 194 | 195 | // parseTrue parses the JSON boolean value 'true'. 196 | func parseTrue(input string) gomme.Result[JSONValue, string] { 197 | return gomme.Map( 198 | gomme.Token[string]("true"), 199 | func(_ string) (JSONValue, error) { return JSONBool(true), nil }, 200 | )(input) 201 | } 202 | 203 | // Ensure parseTrue is a Parser[string, JSONValue] 204 | var _ gomme.Parser[string, JSONValue] = parseTrue 205 | 206 | // parseNull parses the JSON 'null' value. 207 | func parseNull(input string) gomme.Result[JSONValue, string] { 208 | return gomme.Map( 209 | gomme.Token[string]("null"), 210 | func(_ string) (JSONValue, error) { return nil, nil }, 211 | )(input) 212 | } 213 | 214 | // Ensure parseNull is a Parser[string, JSONValue] 215 | var _ gomme.Parser[string, JSONValue] = parseNull 216 | 217 | // parseElements parses the elements of a JSON array. 218 | func parseElements(input string) gomme.Result[[]JSONValue, string] { 219 | return gomme.Map( 220 | gomme.SeparatedList0[string]( 221 | parseElement, 222 | gomme.Token[string](","), 223 | ), 224 | func(elems []JSONValue) ([]JSONValue, error) { 225 | return elems, nil 226 | }, 227 | )(input) 228 | } 229 | 230 | // Ensure parseElements is a Parser[string, []JSONValue] 231 | var _ gomme.Parser[string, []JSONValue] = parseElements 232 | 233 | // parseElement parses a single element of a JSON array. 234 | func parseMembers(input string) gomme.Result[map[string]JSONValue, string] { 235 | return gomme.Map( 236 | gomme.SeparatedList0[string]( 237 | parseMember, 238 | gomme.Token[string](","), 239 | ), 240 | func(kvs []kv) (map[string]JSONValue, error) { 241 | obj := make(JSONObject) 242 | for _, kv := range kvs { 243 | obj[kv.key] = kv.value 244 | } 245 | return obj, nil 246 | }, 247 | )(input) 248 | } 249 | 250 | // Ensure parseMembers is a Parser[string, map[string]JSONValue] 251 | var _ gomme.Parser[string, map[string]JSONValue] = parseMembers 252 | 253 | // parseMember parses a single member (key-value pair) of a JSON object. 254 | func parseMember(input string) gomme.Result[kv, string] { 255 | return member()(input) 256 | } 257 | 258 | // Ensure parseMember is a Parser[string, kv] 259 | var _ gomme.Parser[string, kv] = parseMember 260 | 261 | // member creates a parser for a single key-value pair in a JSON object. 262 | // 263 | // It expects a string followed by a colon and then a JSON value. 264 | // The result is a kv struct with the parsed key and value. 265 | func member() gomme.Parser[string, kv] { 266 | mapFunc := func(p gomme.PairContainer[string, JSONValue]) (kv, error) { 267 | return kv{p.Left, p.Right}, nil 268 | } 269 | 270 | return gomme.Map( 271 | gomme.SeparatedPair[string]( 272 | gomme.Delimited(ws(), stringParser(), ws()), 273 | gomme.Token[string](":"), 274 | element(), 275 | ), 276 | mapFunc, 277 | ) 278 | } 279 | 280 | // element creates a parser for a single element in a JSON array. 281 | // 282 | // It wraps the element with optional whitespace on either side. 283 | func element() gomme.Parser[string, JSONValue] { 284 | return gomme.Map( 285 | gomme.Delimited(ws(), parseValue, ws()), 286 | func(v JSONValue) (JSONValue, error) { return v, nil }, 287 | ) 288 | } 289 | 290 | // kv is a struct representing a key-value pair in a JSON object. 291 | // 292 | // 'key' holds the string key, and 'value' holds the corresponding JSON value. 293 | type kv struct { 294 | key string 295 | value JSONValue 296 | } 297 | 298 | // stringParser creates a parser for a JSON string. 299 | // 300 | // It expects a sequence of characters enclosed in double quotes. 301 | func stringParser() gomme.Parser[string, string] { 302 | return gomme.Delimited[string, rune, string, rune]( 303 | gomme.Char[string]('"'), 304 | characters(), 305 | gomme.Char[string]('"'), 306 | ) 307 | } 308 | 309 | // integer creates a parser for a JSON number's integer part. 310 | // 311 | // It handles negative and positive integers including zero. 312 | func integer() gomme.Parser[string, int] { 313 | return gomme.Alternative( 314 | // "-" onenine digits 315 | gomme.Preceded( 316 | gomme.Token[string]("-"), 317 | gomme.Map( 318 | gomme.Pair(onenine(), digits()), 319 | func(p gomme.PairContainer[string, string]) (int, error) { 320 | return strconv.Atoi(p.Left + p.Right) 321 | }, 322 | ), 323 | ), 324 | 325 | // onenine digits 326 | gomme.Map( 327 | gomme.Pair(onenine(), digits()), 328 | func(p gomme.PairContainer[string, string]) (int, error) { 329 | return strconv.Atoi(p.Left + p.Right) 330 | }, 331 | ), 332 | 333 | // "-" digit 334 | gomme.Preceded( 335 | gomme.Token[string]("-"), 336 | gomme.Map( 337 | digit(), 338 | strconv.Atoi, 339 | ), 340 | ), 341 | 342 | // digit 343 | gomme.Map(digit(), strconv.Atoi), 344 | ) 345 | } 346 | 347 | // digits creates a parser for a sequence of digits. 348 | // 349 | // It concatenates the sequence into a single string. 350 | func digits() gomme.Parser[string, string] { 351 | return gomme.Map(gomme.Many1(digit()), func(digits []string) (string, error) { 352 | return strings.Join(digits, ""), nil 353 | }) 354 | } 355 | 356 | // digit creates a parser for a single digit. 357 | // 358 | // It distinguishes between '0' and non-zero digits. 359 | func digit() gomme.Parser[string, string] { 360 | return gomme.Alternative( 361 | gomme.Token[string]("0"), 362 | onenine(), 363 | ) 364 | } 365 | 366 | // onenine creates a parser for digits from 1 to 9. 367 | func onenine() gomme.Parser[string, string] { 368 | return gomme.Alternative( 369 | gomme.Token[string]("1"), 370 | gomme.Token[string]("2"), 371 | gomme.Token[string]("3"), 372 | gomme.Token[string]("4"), 373 | gomme.Token[string]("5"), 374 | gomme.Token[string]("6"), 375 | gomme.Token[string]("7"), 376 | gomme.Token[string]("8"), 377 | gomme.Token[string]("9"), 378 | ) 379 | } 380 | 381 | // fraction creates a parser for the fractional part of a JSON number. 382 | // 383 | // It expects a dot followed by at least one digit. 384 | func fraction() gomme.Parser[string, string] { 385 | return gomme.Preceded( 386 | gomme.Token[string]("."), 387 | gomme.Digit1[string](), 388 | ) 389 | } 390 | 391 | // exponent creates a parser for the exponent part of a JSON number. 392 | // 393 | // It handles the exponent sign and the exponent digits. 394 | func exponent() gomme.Parser[string, string] { 395 | return gomme.Preceded( 396 | gomme.Token[string]("e"), 397 | gomme.Map( 398 | gomme.Pair(sign(), digits()), 399 | func(p gomme.PairContainer[string, string]) (string, error) { 400 | return p.Left + p.Right, nil 401 | }, 402 | ), 403 | ) 404 | } 405 | 406 | // sign creates a parser for the sign part of a number's exponent. 407 | // 408 | // It can parse both positive ('+') and negative ('-') signs. 409 | func sign() gomme.Parser[string, string] { 410 | return gomme.Optional( 411 | gomme.Alternative[string, string]( 412 | gomme.Token[string]("-"), 413 | gomme.Token[string]("+"), 414 | ), 415 | ) 416 | } 417 | 418 | // characters creates a parser for a sequence of JSON string characters. 419 | // 420 | // It handles regular characters and escaped sequences. 421 | func characters() gomme.Parser[string, string] { 422 | return gomme.Optional( 423 | gomme.Map( 424 | gomme.Many1[string, rune](character()), 425 | func(chars []rune) (string, error) { 426 | return string(chars), nil 427 | }, 428 | ), 429 | ) 430 | } 431 | 432 | // character creates a parser for a single JSON string character. 433 | // 434 | // It distinguishes between regular characters and escape sequences. 435 | func character() gomme.Parser[string, rune] { 436 | return gomme.Alternative( 437 | // normal character 438 | gomme.Satisfy[string](func(c rune) bool { 439 | return c != '"' && c != '\\' && c >= 0x20 && c <= 0x10FFFF 440 | }), 441 | 442 | // escape 443 | escape(), 444 | ) 445 | } 446 | 447 | // escape creates a parser for escaped characters in a JSON string. 448 | // 449 | // It handles common escape sequences like '\n', '\t', etc., and unicode escapes. 450 | func escape() gomme.Parser[string, rune] { 451 | mapFunc := func(chars []rune) (rune, error) { 452 | // chars[0] will always be '\\' 453 | switch chars[1] { 454 | case '"': 455 | return '"', nil 456 | case '\\': 457 | return '\\', nil 458 | case '/': 459 | return '/', nil 460 | case 'b': 461 | return '\b', nil 462 | case 'f': 463 | return '\f', nil 464 | case 'n': 465 | return '\n', nil 466 | case 'r': 467 | return '\r', nil 468 | case 't': 469 | return '\t', nil 470 | default: // for unicode escapes 471 | return chars[1], nil 472 | } 473 | } 474 | 475 | return gomme.Map( 476 | gomme.Sequence( 477 | gomme.Char[string]('\\'), 478 | gomme.Alternative( 479 | gomme.Char[string]('"'), 480 | gomme.Char[string]('\\'), 481 | gomme.Char[string]('/'), 482 | gomme.Char[string]('b'), 483 | gomme.Char[string]('f'), 484 | gomme.Char[string]('n'), 485 | gomme.Char[string]('r'), 486 | gomme.Char[string]('t'), 487 | unicodeEscape(), 488 | ), 489 | ), 490 | mapFunc, 491 | ) 492 | } 493 | 494 | // unicodeEscape creates a parser for a unicode escape sequence in a JSON string. 495 | // 496 | // It expects a sequence starting with 'u' followed by four hexadecimal digits and 497 | // converts them to the corresponding rune. 498 | func unicodeEscape() gomme.Parser[string, rune] { 499 | mapFunc := func(chars []rune) (rune, error) { 500 | // chars[0] will always be 'u' 501 | hex := string(chars[1:5]) 502 | codePoint, err := strconv.ParseInt(hex, 16, 32) 503 | if err != nil { 504 | return 0, err 505 | } 506 | return rune(codePoint), nil 507 | } 508 | 509 | return gomme.Map( 510 | gomme.Sequence( 511 | gomme.Char[string]('u'), 512 | hex(), 513 | hex(), 514 | hex(), 515 | hex(), 516 | ), 517 | mapFunc, 518 | ) 519 | } 520 | 521 | // hex creates a parser for a single hexadecimal digit. 522 | // 523 | // It can parse digits ('0'-'9') as well as 524 | // letters ('a'-'f', 'A'-'F') used in hexadecimal numbers. 525 | func hex() gomme.Parser[string, rune] { 526 | return gomme.Satisfy[string](func(r rune) bool { 527 | return ('0' <= r && r <= '9') || ('a' <= r && r <= 'f') || ('A' <= r && r <= 'F') 528 | }) 529 | } 530 | 531 | // ws creates a parser for whitespace in JSON. 532 | // 533 | // It can handle spaces, tabs, newlines, and carriage returns. 534 | // The parser accumulates all whitespace characters and returns them as a single string. 535 | func ws() gomme.Parser[string, string] { 536 | parser := gomme.Many0( 537 | gomme.Satisfy[string](func(r rune) bool { 538 | return r == ' ' || r == '\t' || r == '\n' || r == '\r' 539 | }), 540 | ) 541 | 542 | mapFunc := func(runes []rune) (string, error) { 543 | return string(runes), nil 544 | } 545 | 546 | return gomme.Map(parser, mapFunc) 547 | } 548 | -------------------------------------------------------------------------------- /examples/json/test.json: -------------------------------------------------------------------------------- 1 | { 2 | "abc": 123, 3 | "entries": [ 4 | { 5 | "name": "John", 6 | "age": 30 7 | }, 8 | { 9 | "name": "Jane", 10 | "age": 25 11 | } 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /examples/redis/redis.go: -------------------------------------------------------------------------------- 1 | // Package redis demonstrates the usage of the gomme package to parse Redis' 2 | // [RESP protocol] messages. 3 | // 4 | // [RESP protocol]: https://redis.io/docs/reference/protocol-spec/ 5 | package redis 6 | 7 | import ( 8 | "errors" 9 | "fmt" 10 | "strconv" 11 | "strings" 12 | 13 | "github.com/oleiade/gomme" 14 | ) 15 | 16 | // ParseRESPMESSAGE parses a Redis' [RESP protocol] message. 17 | // 18 | // [RESP protocol]: https://redis.io/docs/reference/protocol-spec/ 19 | func ParseRESPMessage(input string) (RESPMessage, error) { 20 | if len(input) < 3 { 21 | return RESPMessage{}, fmt.Errorf("malformed message %s; reason: %w", input, ErrMessageTooShort) 22 | } 23 | 24 | if !isValidMessageKind(MessageKind(input[0])) { 25 | return RESPMessage{}, fmt.Errorf("malformed message %s; reason: %w %c", input, ErrInvalidPrefix, input[0]) 26 | } 27 | 28 | if input[len(input)-2] != '\r' || input[len(input)-1] != '\n' { 29 | return RESPMessage{}, fmt.Errorf("malformed message %s; reason: %w", input, ErrInvalidSuffix) 30 | } 31 | 32 | parser := gomme.Alternative( 33 | SimpleString(), 34 | Error(), 35 | Integer(), 36 | BulkString(), 37 | Array(), 38 | ) 39 | 40 | result := parser(input) 41 | if result.Err != nil { 42 | return RESPMessage{}, result.Err 43 | } 44 | 45 | return result.Output, nil 46 | } 47 | 48 | // ErrMessageTooShort is returned when a message is too short to be valid. 49 | // A [RESP protocol] message is at least 3 characters long: the message kind 50 | // prefix, the message content (which can be empty), and the gomme.CRLF suffix. 51 | // 52 | // [RESP protocol]: https://redis.io/docs/reference/protocol-spec/ 53 | var ErrMessageTooShort = errors.New("message too short") 54 | 55 | // ErrInvalidPrefix is returned when a message kind prefix is not recognized. 56 | // Valid [RESP Protocol] message kind prefixes are "+", "-", ":", and "$". 57 | // 58 | // [RESP Protocol]: https://redis.io/docs/reference/protocol-spec/ 59 | var ErrInvalidPrefix = errors.New("invalid message prefix") 60 | 61 | // ErrInvalidSuffix is returned when a message suffix is not recognized. 62 | // Every [RESP protocol] message ends with a gomme.CRLF. 63 | // 64 | // [RESP protocol]: https://redis.io/docs/reference/protocol-spec/ 65 | var ErrInvalidSuffix = errors.New("invalid message suffix") 66 | 67 | // RESPMessage is a parsed Redis' [RESP protocol] message. 68 | // 69 | // It can hold either a simple string, an error, an integer, a bulk string, 70 | // or an array. The kind of the message is available in the Kind field. 71 | // 72 | // [RESP protocol]: https://redis.io/docs/reference/protocol-spec/ 73 | type RESPMessage struct { 74 | Kind MessageKind 75 | SimpleString *SimpleStringMessage 76 | Error *ErrorStringMessage 77 | Integer *IntegerMessage 78 | BulkString *BulkStringMessage 79 | Array *ArrayMessage 80 | } 81 | 82 | // MessageKind is the kind of a Redis' [RESP protocol] message. 83 | type MessageKind string 84 | 85 | // The many different kinds of Redis' [RESP protocol] messages map 86 | // to their respective protocol message's prefixes. 87 | // 88 | // [RESP protocol]: https://redis.io/docs/reference/protocol-spec/ 89 | const ( 90 | SimpleStringKind MessageKind = "+" 91 | ErrorKind MessageKind = "-" 92 | IntegerKind MessageKind = ":" 93 | BulkStringKind MessageKind = "$" 94 | ArrayKind MessageKind = "*" 95 | InvalidKind MessageKind = "?" 96 | ) 97 | 98 | // SimpleStringMessage is a simple string message parsed from a Redis' 99 | // [RESP protocol] message. 100 | // 101 | // [RESP protocol]: https://redis.io/docs/reference/protocol-spec/ 102 | type SimpleStringMessage struct { 103 | Content string 104 | } 105 | 106 | // SimpleString is a parser for Redis' RESP protocol simple strings. 107 | // 108 | // Simple strings are strings that are not expected to contain newlines. 109 | // Simple strings start with a "+" character, and end with a gomme.CRLF. 110 | // 111 | // Once parsed, the content of the simple string is available in the 112 | // simpleString field of the result's RESPMessage. 113 | func SimpleString() gomme.Parser[string, RESPMessage] { 114 | mapFn := func(message string) (RESPMessage, error) { 115 | if strings.ContainsAny(message, "\r\n") { 116 | return RESPMessage{}, fmt.Errorf("malformed simple string: %s", message) 117 | } 118 | 119 | return RESPMessage{ 120 | Kind: SimpleStringKind, 121 | SimpleString: &SimpleStringMessage{ 122 | Content: message, 123 | }, 124 | }, nil 125 | } 126 | 127 | return gomme.Delimited( 128 | gomme.Token[string](string(SimpleStringKind)), 129 | gomme.Map(gomme.TakeUntil(gomme.CRLF[string]()), mapFn), 130 | gomme.CRLF[string](), 131 | ) 132 | } 133 | 134 | // ErrorStringMessage is a parsed error string message from a Redis' 135 | // [RESP protocol] message. 136 | // 137 | // [RESP protocol]: https://redis.io/docs/reference/protocol-spec/ 138 | type ErrorStringMessage struct { 139 | Kind string 140 | Message string 141 | } 142 | 143 | // Error is a parser for Redis' RESP protocol errors. 144 | // 145 | // Errors are strings that start with a "-" character, and end with a gomme.CRLF. 146 | // 147 | // The error message is available in the Error field of the result's 148 | // RESPMessage. 149 | func Error() gomme.Parser[string, RESPMessage] { 150 | mapFn := func(message string) (RESPMessage, error) { 151 | if strings.ContainsAny(message, "\r\n") { 152 | return RESPMessage{}, fmt.Errorf("malformed error string: %s", message) 153 | } 154 | 155 | return RESPMessage{ 156 | Kind: ErrorKind, 157 | Error: &ErrorStringMessage{ 158 | Kind: "ERR", 159 | Message: message, 160 | }, 161 | }, nil 162 | } 163 | 164 | return gomme.Delimited( 165 | gomme.Token[string](string(ErrorKind)), 166 | gomme.Map(gomme.TakeUntil(gomme.CRLF[string]()), mapFn), 167 | gomme.CRLF[string](), 168 | ) 169 | } 170 | 171 | // IntegerMessage is a parsed integer message from a Redis' [RESP protocol] 172 | // message. 173 | // 174 | // [RESP protocol]: https://redis.io/docs/reference/protocol-spec/ 175 | type IntegerMessage struct { 176 | Value int 177 | } 178 | 179 | // Integer is a parser for Redis' RESP protocol integers. 180 | // 181 | // Integers are signed nummerical values represented as string messages 182 | // that start with a ":" character, and end with a gomme.CRLF. 183 | // 184 | // The integer value is available in the IntegerMessage field of the result's 185 | // RESPMessage. 186 | func Integer() gomme.Parser[string, RESPMessage] { 187 | mapFn := func(message string) (RESPMessage, error) { 188 | value, err := strconv.Atoi(message) 189 | if err != nil { 190 | return RESPMessage{}, err 191 | } 192 | 193 | return RESPMessage{ 194 | Kind: IntegerKind, 195 | Integer: &IntegerMessage{ 196 | Value: value, 197 | }, 198 | }, nil 199 | } 200 | 201 | return gomme.Delimited( 202 | gomme.Token[string](string(IntegerKind)), 203 | gomme.Map(gomme.TakeUntil(gomme.CRLF[string]()), mapFn), 204 | gomme.CRLF[string](), 205 | ) 206 | } 207 | 208 | // BulkStringMessage is a parsed bulk string message from a Redis' [RESP protocol] 209 | // message. 210 | // 211 | // [RESP protocol]: https://redis.io/docs/reference/protocol-spec/ 212 | type BulkStringMessage struct { 213 | Data []byte 214 | } 215 | 216 | // BulkString is a parser for Redis' RESP protocol bulk strings. 217 | // 218 | // Bulk strings are binary-safe strings up to 512MB in size. 219 | // Bulk strings start with a "$" character, and end with a gomme.CRLF. 220 | // 221 | // The bulk string's data is available in the BulkString field of the result's 222 | // RESPMessage. 223 | func BulkString() gomme.Parser[string, RESPMessage] { 224 | mapFn := func(message gomme.PairContainer[int64, string]) (RESPMessage, error) { 225 | if message.Left < 0 { 226 | if message.Left < -1 { 227 | return RESPMessage{}, fmt.Errorf( 228 | "unable to parse bulk string; "+ 229 | "reason: negative length %d", 230 | message.Left, 231 | ) 232 | } 233 | 234 | if message.Left == -1 && len(message.Right) != 0 { 235 | return RESPMessage{}, fmt.Errorf( 236 | "malformed array: declared message size -1, and actual size differ %d", 237 | len(message.Right), 238 | ) 239 | } 240 | } else if len(message.Right) != int(message.Left) { 241 | return RESPMessage{}, fmt.Errorf( 242 | "malformed array: declared message size %d, and actual size differ %d", 243 | message.Left, 244 | len(message.Right), 245 | ) 246 | } 247 | 248 | return RESPMessage{ 249 | Kind: BulkStringKind, 250 | BulkString: &BulkStringMessage{ 251 | Data: []byte(message.Right), 252 | }, 253 | }, nil 254 | } 255 | 256 | return gomme.Map( 257 | gomme.Pair( 258 | sizePrefix(gomme.Token[string](string(BulkStringKind))), 259 | gomme.Optional( 260 | gomme.Terminated(gomme.TakeUntil(gomme.CRLF[string]()), gomme.CRLF[string]()), 261 | ), 262 | ), 263 | mapFn, 264 | ) 265 | } 266 | 267 | // ArrayMessage is a parsed array message from a Redis' [RESP protocol] message. 268 | // 269 | // [RESP protocol]: https://redis.io/docs/reference/protocol-spec/ 270 | type ArrayMessage struct { 271 | Elements []RESPMessage 272 | } 273 | 274 | // Array is a parser for Redis' RESP protocol arrays. 275 | // 276 | // Arrays are sequences of RESP messages. 277 | // Arrays start with a "*" character, and end with a gomme.CRLF. 278 | // 279 | // The array's messages are available in the Array field of the result's 280 | // RESPMessage. 281 | func Array() gomme.Parser[string, RESPMessage] { 282 | mapFn := func(message gomme.PairContainer[int64, []RESPMessage]) (RESPMessage, error) { 283 | if int(message.Left) == -1 { 284 | if len(message.Right) != 0 { 285 | return RESPMessage{}, fmt.Errorf( 286 | "malformed array: declared message size -1, and actual size differ %d", 287 | len(message.Right), 288 | ) 289 | } 290 | } else { 291 | if len(message.Right) != int(message.Left) { 292 | return RESPMessage{}, fmt.Errorf( 293 | "malformed array: declared message size %d, and actual size differ %d", 294 | message.Left, 295 | len(message.Right), 296 | ) 297 | } 298 | } 299 | 300 | messages := make([]RESPMessage, 0, len(message.Right)) 301 | messages = append(messages, message.Right...) 302 | 303 | return RESPMessage{ 304 | Kind: ArrayKind, 305 | Array: &ArrayMessage{ 306 | Elements: messages, 307 | }, 308 | }, nil 309 | } 310 | 311 | return gomme.Map( 312 | gomme.Pair( 313 | sizePrefix(gomme.Token[string](string(ArrayKind))), 314 | gomme.Many0( 315 | gomme.Alternative( 316 | SimpleString(), 317 | Error(), 318 | Integer(), 319 | BulkString(), 320 | ), 321 | ), 322 | ), 323 | mapFn, 324 | ) 325 | } 326 | 327 | func sizePrefix(prefix gomme.Parser[string, string]) gomme.Parser[string, int64] { 328 | return gomme.Delimited( 329 | prefix, 330 | gomme.Int64[string](), 331 | gomme.CRLF[string](), 332 | ) 333 | } 334 | 335 | func isValidMessageKind(kind MessageKind) bool { 336 | return kind == SimpleStringKind || 337 | kind == ErrorKind || 338 | kind == IntegerKind || 339 | kind == BulkStringKind || 340 | kind == ArrayKind 341 | } 342 | -------------------------------------------------------------------------------- /examples/redis/redis_test.go: -------------------------------------------------------------------------------- 1 | package redis 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "math/rand" 7 | "reflect" 8 | "strconv" 9 | "strings" 10 | "testing" 11 | "time" 12 | ) 13 | 14 | func TestParseRESPMessage(t *testing.T) { 15 | t.Parallel() 16 | 17 | type args struct { 18 | input string 19 | } 20 | testCases := []struct { 21 | name string 22 | args args 23 | want RESPMessage 24 | wantErr bool 25 | }{ 26 | // 27 | // General 28 | // 29 | { 30 | name: "empty message should fail", 31 | args: args{ 32 | input: "", 33 | }, 34 | want: RESPMessage{}, 35 | wantErr: true, 36 | }, 37 | { 38 | name: "message with only a prefix should fail", 39 | args: args{ 40 | input: "+", 41 | }, 42 | want: RESPMessage{}, 43 | wantErr: true, 44 | }, 45 | { 46 | name: "message with only a CRLF should fail", 47 | args: args{ 48 | input: "\r\n", 49 | }, 50 | want: RESPMessage{}, 51 | wantErr: true, 52 | }, 53 | { 54 | name: "message with an invalid prefix should fail", 55 | args: args{ 56 | input: "?\r\n", 57 | }, 58 | want: RESPMessage{}, 59 | wantErr: true, 60 | }, 61 | 62 | // 63 | // SIMPLE STRINGS 64 | // 65 | 66 | { 67 | name: "proper simple string should succeed", 68 | args: args{ 69 | "+OK\r\n", 70 | }, 71 | want: RESPMessage{ 72 | Kind: SimpleStringKind, 73 | SimpleString: &SimpleStringMessage{Content: "OK"}, 74 | }, 75 | wantErr: false, 76 | }, 77 | { 78 | name: "empty simple string should succeed", 79 | args: args{ 80 | "+\r\n", 81 | }, 82 | want: RESPMessage{ 83 | Kind: SimpleStringKind, 84 | SimpleString: &SimpleStringMessage{Content: ""}, 85 | }, 86 | wantErr: false, 87 | }, 88 | { 89 | name: "malformed simple string containing a \\r should fail", 90 | args: args{ 91 | "+Hello\rWorld\r\n", 92 | }, 93 | want: RESPMessage{}, 94 | wantErr: true, 95 | }, 96 | { 97 | name: "malformed simple string containing a \\n should fail", 98 | args: args{ 99 | "+Hello\nWorld\r\n", 100 | }, 101 | want: RESPMessage{}, 102 | wantErr: true, 103 | }, 104 | { 105 | name: "malformed simple string containing a \\n\\r should fail", 106 | args: args{ 107 | "+Hello\n\rWorld\r\n", 108 | }, 109 | want: RESPMessage{}, 110 | wantErr: true, 111 | }, 112 | 113 | // // 114 | // // ERROR STRINGS 115 | // // 116 | 117 | { 118 | name: "proper error string should succeed", 119 | args: args{ 120 | "-Error message\r\n", 121 | }, 122 | want: RESPMessage{ 123 | Kind: ErrorKind, 124 | Error: &ErrorStringMessage{ 125 | Kind: "ERR", 126 | Message: "Error message", 127 | }, 128 | }, 129 | wantErr: false, 130 | }, 131 | { 132 | name: "malformed error string containing a \\r should fail", 133 | args: args{ 134 | "-Error\r message\r\n", 135 | }, 136 | want: RESPMessage{}, 137 | wantErr: true, 138 | }, 139 | { 140 | name: "malformed error string containing a \\n should fail", 141 | args: args{ 142 | "-Error\n message\r\n", 143 | }, 144 | want: RESPMessage{}, 145 | wantErr: true, 146 | }, 147 | { 148 | name: "malformed error string containing a \\n\\r should fail", 149 | args: args{ 150 | "-Error\n\r message\r\n", 151 | }, 152 | want: RESPMessage{}, 153 | wantErr: true, 154 | }, 155 | 156 | // // 157 | // // INTEGER 158 | // // 159 | 160 | { 161 | name: "proper integer should succeed", 162 | args: args{ 163 | ":123\r\n", 164 | }, 165 | want: RESPMessage{ 166 | Kind: IntegerKind, 167 | Integer: &IntegerMessage{ 168 | Value: 123, 169 | }, 170 | }, 171 | wantErr: false, 172 | }, 173 | 174 | // 175 | // Bulk Strings 176 | // 177 | 178 | { 179 | name: "proper bulk string should succeed", 180 | args: args{ 181 | "$5\r\nhello\r\n", 182 | }, 183 | want: RESPMessage{ 184 | Kind: BulkStringKind, 185 | BulkString: &BulkStringMessage{ 186 | Data: []byte("hello"), 187 | }, 188 | }, 189 | wantErr: false, 190 | }, 191 | { 192 | name: "nil bulk string should succeed", 193 | args: args{ 194 | "$-1\r\n", 195 | }, 196 | want: RESPMessage{ 197 | Kind: BulkStringKind, 198 | BulkString: &BulkStringMessage{ 199 | Data: []byte(""), 200 | }, 201 | }, 202 | wantErr: false, 203 | }, 204 | { 205 | name: "bulk string with negative size != -1 should fail", 206 | args: args{ 207 | "$-2\r\n", 208 | }, 209 | want: RESPMessage{}, 210 | wantErr: true, 211 | }, 212 | { 213 | name: "malformed bulk string with actual length different from declared length should fail", 214 | args: args{ 215 | "$5\r\nhello world\r\n", 216 | }, 217 | want: RESPMessage{}, 218 | wantErr: true, 219 | }, 220 | 221 | // 222 | // ARRAYS 223 | // 224 | 225 | { 226 | name: "proper array of simple strings should succeed", 227 | args: args{ 228 | "*2\r\n+hello\r\n+world\r\n", 229 | }, 230 | want: RESPMessage{ 231 | Kind: ArrayKind, 232 | Array: &ArrayMessage{ 233 | Elements: []RESPMessage{ 234 | { 235 | Kind: SimpleStringKind, 236 | SimpleString: &SimpleStringMessage{ 237 | Content: "hello", 238 | }, 239 | }, 240 | { 241 | Kind: SimpleStringKind, 242 | SimpleString: &SimpleStringMessage{ 243 | Content: "world", 244 | }, 245 | }, 246 | }, 247 | }, 248 | }, 249 | wantErr: false, 250 | }, 251 | { 252 | name: "proper array of errors should succeed", 253 | args: args{ 254 | "*2\r\n-Error Message\r\n-Other error\r\n", 255 | }, 256 | want: RESPMessage{ 257 | Kind: ArrayKind, 258 | Array: &ArrayMessage{ 259 | Elements: []RESPMessage{ 260 | { 261 | Kind: ErrorKind, 262 | Error: &ErrorStringMessage{ 263 | Kind: "ERR", 264 | Message: "Error Message", 265 | }, 266 | }, 267 | { 268 | Kind: ErrorKind, 269 | Error: &ErrorStringMessage{ 270 | Kind: "ERR", 271 | Message: "Other error", 272 | }, 273 | }, 274 | }, 275 | }, 276 | }, 277 | wantErr: false, 278 | }, 279 | { 280 | name: "proper array of integers should succeed", 281 | args: args{ 282 | "*2\r\n:0\r\n:1000\r\n", 283 | }, 284 | want: RESPMessage{ 285 | Kind: ArrayKind, 286 | Array: &ArrayMessage{ 287 | Elements: []RESPMessage{ 288 | { 289 | Kind: IntegerKind, 290 | Integer: &IntegerMessage{ 291 | Value: 0, 292 | }, 293 | }, 294 | { 295 | Kind: IntegerKind, 296 | Integer: &IntegerMessage{ 297 | Value: 1000, 298 | }, 299 | }, 300 | }, 301 | }, 302 | }, 303 | wantErr: false, 304 | }, 305 | { 306 | name: "proper array of bulk strings should succeed", 307 | args: args{ 308 | "*2\r\n$5\r\nhello\r\n$5\r\nworld\r\n", 309 | }, 310 | want: RESPMessage{ 311 | Kind: ArrayKind, 312 | Array: &ArrayMessage{ 313 | Elements: []RESPMessage{ 314 | { 315 | Kind: BulkStringKind, 316 | BulkString: &BulkStringMessage{ 317 | Data: []byte("hello"), 318 | }, 319 | }, 320 | { 321 | Kind: BulkStringKind, 322 | BulkString: &BulkStringMessage{ 323 | Data: []byte("world"), 324 | }, 325 | }, 326 | }, 327 | }, 328 | }, 329 | wantErr: false, 330 | }, 331 | { 332 | name: "proper array of mixed types should succeed", 333 | args: args{ 334 | "*4\r\n$5\r\nhello\r\n:123\r\n+OK\r\n-Error Message\r\n", 335 | }, 336 | want: RESPMessage{ 337 | Kind: ArrayKind, 338 | Array: &ArrayMessage{ 339 | Elements: []RESPMessage{ 340 | { 341 | Kind: BulkStringKind, 342 | BulkString: &BulkStringMessage{ 343 | Data: []byte("hello"), 344 | }, 345 | }, 346 | { 347 | Kind: IntegerKind, 348 | Integer: &IntegerMessage{ 349 | Value: 123, 350 | }, 351 | }, 352 | { 353 | Kind: SimpleStringKind, 354 | SimpleString: &SimpleStringMessage{ 355 | Content: "OK", 356 | }, 357 | }, 358 | { 359 | Kind: ErrorKind, 360 | Error: &ErrorStringMessage{ 361 | Kind: "ERR", 362 | Message: "Error Message", 363 | }, 364 | }, 365 | }, 366 | }, 367 | }, 368 | wantErr: false, 369 | }, 370 | { 371 | name: "empty array should succeed", 372 | args: args{ 373 | "*-1\r\n", 374 | }, 375 | want: RESPMessage{ 376 | Kind: ArrayKind, 377 | Array: &ArrayMessage{ 378 | Elements: []RESPMessage{}, 379 | }, 380 | }, 381 | wantErr: false, 382 | }, 383 | { 384 | name: "array with non matching size prefix should fail", 385 | args: args{ 386 | "*2\r\n+OK\r\n", 387 | }, 388 | want: RESPMessage{}, 389 | wantErr: true, 390 | }, 391 | } 392 | for _, tc := range testCases { 393 | tc := tc 394 | 395 | t.Run(tc.name, func(t *testing.T) { 396 | t.Parallel() 397 | 398 | got, err := ParseRESPMessage(tc.args.input) 399 | if (err != nil) != tc.wantErr { 400 | t.Errorf("ParseRESPMessage() error = %v, wantErr %v", err, tc.wantErr) 401 | return 402 | } 403 | if !reflect.DeepEqual(got, tc.want) { 404 | t.Errorf("ParseRESPMessage() = %v, want %v", got, tc.want) 405 | } 406 | }) 407 | } 408 | } 409 | 410 | func BenchmarkParseMessage(b *testing.B) { 411 | var benchmarks = []struct { 412 | kind string 413 | data string 414 | size string 415 | }{ 416 | {"simple_string", "+OK\r\n", "2"}, 417 | {"simple_string", simpleStringProducer(128 * Byte), "128b"}, 418 | {"simple_string", simpleStringProducer(1 * KiloBytes), "1kb"}, 419 | {"simple_string", simpleStringProducer(1 * MegaBytes), "1mb"}, 420 | {"error_string", "-Error\r\n", "5"}, 421 | {"error_string", errorStringProducer(128 * Byte), "128b"}, 422 | {"error_string", errorStringProducer(1 * KiloBytes), "1kb"}, 423 | {"integer", ":1\r\n", "1"}, 424 | {"integer", ":9,223,372,036,854,775,807\r\n", "biggest integer"}, 425 | {"integer", ":-9223372036854775808\r\n", "smallest integer"}, 426 | {"bulk_string", bulkStringProducer(128 * Byte), "128b"}, 427 | {"bulk_string", bulkStringProducer(1 * KiloBytes), "1kb"}, 428 | {"bulk_string", bulkStringProducer(1 * MegaBytes), "1mb"}, 429 | {"bulk_string", bulkStringProducer(512 * MegaBytes), "512mb"}, 430 | {"array", arrayProducer(10000, 128*Byte), "10000 * 128b"}, 431 | {"array", arrayProducer(1000, 1*KiloBytes), "1000 * 1kb"}, 432 | {"array", arrayProducer(100, 1*MegaBytes), "100 * 1mb"}, 433 | } 434 | 435 | for _, tt := range benchmarks { 436 | b.Run(fmt.Sprintf("%s_with_size_%s", tt.kind, tt.size), func(b *testing.B) { 437 | for i := 0; i < b.N; i++ { 438 | //nolint:errcheck,gosec 439 | ParseRESPMessage(tt.data) 440 | } 441 | }) 442 | } 443 | } 444 | 445 | const ( 446 | Byte = 1 447 | KiloBytes = Byte * 1024 448 | MegaBytes = KiloBytes * 1024 449 | GigaBytes = MegaBytes * 1024 450 | TeraBytes = GigaBytes * 1024 451 | ) 452 | 453 | // TODO: add fuzz tests input for other kind of messages, 454 | // and handled their expected format too. 455 | func FuzzTestParseMessage(f *testing.F) { 456 | testCases := []string{ 457 | "+OK\r\n", 458 | "+Hello world\r\n", 459 | "+This is a string\r\n", 460 | } 461 | 462 | for _, testCase := range testCases { 463 | f.Add(testCase) 464 | } 465 | 466 | f.Fuzz(func(t *testing.T, message string) { 467 | _, err := ParseRESPMessage(message) 468 | if err != nil { 469 | if errors.Is(err, ErrMessageTooShort) || errors.Is(err, ErrInvalidPrefix) || errors.Is(err, ErrInvalidSuffix) { 470 | t.Skip("skipping expected error") 471 | } 472 | 473 | if strings.Count(message, "\r") > 1 || strings.Count(message, "\n") > 1 { 474 | t.Skip("skipping simple string message with multiple \\r or \\n") 475 | } 476 | 477 | t.Errorf("ParseRESPMessage() error = %v", err) 478 | } 479 | }) 480 | } 481 | 482 | func simpleStringProducer(messageSize int) string { 483 | return strings.Join( 484 | []string{ 485 | "+", 486 | stringWithinCharset(messageSize, alnumCharset), 487 | "\r\n", 488 | }, 489 | "", 490 | ) 491 | } 492 | 493 | func errorStringProducer(messageSize int) string { 494 | return strings.Join( 495 | []string{ 496 | "-", 497 | stringWithinCharset(messageSize, alnumCharset), 498 | "\r\n", 499 | }, 500 | "", 501 | ) 502 | } 503 | 504 | func bulkStringProducer(messageSize int) string { 505 | return strings.Join( 506 | []string{ 507 | "$", 508 | strconv.Itoa(messageSize), 509 | "\r\n", 510 | stringWithinCharset(messageSize, alnumCharset), 511 | "\r\n", 512 | }, 513 | "", 514 | ) 515 | } 516 | 517 | func arrayProducer(arraySize, messageSize int) string { 518 | messages := make([]string, 0, arraySize) 519 | 520 | for i := 0; i < arraySize; i++ { 521 | messageKind := i % 4 522 | 523 | switch messageKind { 524 | case 0: 525 | messages = append(messages, simpleStringProducer(messageSize)) 526 | case 1: 527 | messages = append(messages, errorStringProducer(messageSize)) 528 | case 2: 529 | messages = append(messages, bulkStringProducer(messageSize)) 530 | case 3: 531 | messages = append(messages, strconv.Itoa(rand.Int())) 532 | } 533 | } 534 | 535 | return strings.Join( 536 | []string{ 537 | "*", 538 | strings.Join(messages, ""), 539 | "\r\n", 540 | }, 541 | "", 542 | ) 543 | } 544 | 545 | var seededRand *rand.Rand = rand.New( 546 | rand.NewSource(time.Now().UnixNano())) 547 | 548 | const alnumCharset = "abcdefghijklmnopqrstuvwxyz" + 549 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" 550 | 551 | func stringWithinCharset(length int, charset string) string { 552 | b := make([]byte, length) 553 | for i := range b { 554 | b[i] = charset[seededRand.Intn(len(charset))] 555 | } 556 | return string(b) 557 | } 558 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/oleiade/gomme 2 | 3 | go 1.18 4 | 5 | require github.com/stretchr/testify v1.7.1 6 | 7 | require ( 8 | github.com/davecgh/go-spew v1.1.0 // indirect 9 | github.com/pmezard/go-difflib v1.0.0 // indirect 10 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect 11 | ) 12 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= 2 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 5 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 6 | github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY= 7 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 8 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 9 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 10 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= 11 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 12 | -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oleiade/gomme/c8967c191356a03f184cef70295243720365c979/logo.png -------------------------------------------------------------------------------- /multi.go: -------------------------------------------------------------------------------- 1 | package gomme 2 | 3 | // Count runs the provided parser `count` times. 4 | // 5 | // If the provided parser cannot be successfully applied `count` times, the operation 6 | // fails and the Result will contain an error. 7 | func Count[Input Bytes, Output any](parse Parser[Input, Output], count uint) Parser[Input, []Output] { 8 | return func(input Input) Result[[]Output, Input] { 9 | if len(input) == 0 || count == 0 { 10 | return Failure[Input, []Output](NewError(input, "Count"), input) 11 | } 12 | 13 | outputs := make([]Output, 0, int(count)) 14 | remaining := input 15 | for i := 0; uint(i) < count; i++ { 16 | result := parse(remaining) 17 | if result.Err != nil { 18 | return Failure[Input, []Output](result.Err, input) 19 | } 20 | 21 | remaining = result.Remaining 22 | outputs = append(outputs, result.Output) 23 | } 24 | 25 | return Success(outputs, remaining) 26 | } 27 | } 28 | 29 | // Many0 applies a parser repeatedly until it fails, and returns a slice of all 30 | // the results as the Result's Output. 31 | // 32 | // Note that Many0 will succeed even if the parser fails to match at all. It will 33 | // however fail if the provided parser accepts empty inputs (such as `Digit0`, or 34 | // `Alpha0`) in order to prevent infinite loops. 35 | func Many0[Input Bytes, Output any](parse Parser[Input, Output]) Parser[Input, []Output] { 36 | return func(input Input) Result[[]Output, Input] { 37 | results := []Output{} 38 | 39 | remaining := input 40 | for { 41 | res := parse(remaining) 42 | if res.Err != nil { 43 | return Success(results, remaining) 44 | } 45 | 46 | // Checking for infinite loops, if nothing was consumed, 47 | // the provided parser would make us go around in circles. 48 | if len(res.Remaining) == len(remaining) { 49 | return Failure[Input, []Output](NewError(input, "Many0"), input) 50 | } 51 | 52 | results = append(results, res.Output) 53 | remaining = res.Remaining 54 | } 55 | } 56 | } 57 | 58 | // Many1 applies a parser repeatedly until it fails, and returns a slice of all 59 | // the results as the Result's Output. Many1 will fail if the parser fails to 60 | // match at least once. 61 | // 62 | // Note that Many1 will fail if the provided parser accepts empty 63 | // inputs (such as `Digit0`, or `Alpha0`) in order to prevent infinite loops. 64 | func Many1[Input Bytes, Output any](parse Parser[Input, Output]) Parser[Input, []Output] { 65 | return func(input Input) Result[[]Output, Input] { 66 | first := parse(input) 67 | if first.Err != nil { 68 | return Failure[Input, []Output](first.Err, input) 69 | } 70 | 71 | // Checking for infinite loops, if nothing was consumed, 72 | // the provided parser would make us go around in circles. 73 | if len(first.Remaining) == len(input) { 74 | return Failure[Input, []Output](NewError(input, "Many1"), input) 75 | } 76 | 77 | results := []Output{first.Output} 78 | remaining := first.Remaining 79 | 80 | for { 81 | res := parse(remaining) 82 | if res.Err != nil { 83 | return Success(results, remaining) 84 | } 85 | 86 | // Checking for infinite loops, if nothing was consumed, 87 | // the provided parser would make us go around in circles. 88 | if len(res.Remaining) == len(remaining) { 89 | return Failure[Input, []Output](NewError(input, "Many1"), input) 90 | } 91 | 92 | results = append(results, res.Output) 93 | remaining = res.Remaining 94 | } 95 | } 96 | } 97 | 98 | // SeparatedList0 applies an element parser and a separator parser repeatedly in order 99 | // to produce a list of elements. 100 | // 101 | // Note that SeparatedList0 will succeed even if the element parser fails to match at all. 102 | // It will however fail if the provided element parser accepts empty inputs (such as 103 | // `Digit0`, or `Alpha0`) in order to prevent infinite loops. 104 | // 105 | // Because the `SeparatedList0` is really looking to produce a list of elements resulting 106 | // from the provided main parser, it will succeed even if the separator parser fails to 107 | // match at all. It will however fail if the provided separator parser accepts empty 108 | // inputs in order to prevent infinite loops. 109 | func SeparatedList0[Input Bytes, Output any, S Separator]( 110 | parse Parser[Input, Output], 111 | separator Parser[Input, S], 112 | ) Parser[Input, []Output] { 113 | return func(input Input) Result[[]Output, Input] { 114 | results := []Output{} 115 | 116 | res := parse(input) 117 | if res.Err != nil { 118 | return Success(results, input) 119 | } 120 | 121 | // Checking for infinite loops, if nothing was consumed, 122 | // the provided parser would make us go around in circles. 123 | if len(res.Remaining) == len(input) { 124 | return Failure[Input, []Output](NewError(input, "SeparatedList0"), input) 125 | } 126 | 127 | results = append(results, res.Output) 128 | remaining := res.Remaining 129 | 130 | for { 131 | separatorResult := separator(remaining) 132 | if separatorResult.Err != nil { 133 | return Success(results, remaining) 134 | } 135 | 136 | // Checking for infinite loops, if nothing was consumed, 137 | // the provided parser would make us go around in circles. 138 | if len(separatorResult.Remaining) == len(remaining) { 139 | return Failure[Input, []Output](NewError(input, "SeparatedList0"), input) 140 | } 141 | 142 | parserResult := parse(separatorResult.Remaining) 143 | if parserResult.Err != nil { 144 | return Success(results, remaining) 145 | } 146 | 147 | results = append(results, parserResult.Output) 148 | 149 | remaining = parserResult.Remaining 150 | } 151 | } 152 | } 153 | 154 | // SeparatedList1 applies an element parser and a separator parser repeatedly in order 155 | // to produce a list of elements. 156 | // 157 | // Note that SeparatedList1 will fail if the element parser fails to match at all. 158 | // 159 | // Because the `SeparatedList1` is really looking to produce a list of elements resulting 160 | // from the provided main parser, it will succeed even if the separator parser fails to 161 | // match at all. 162 | func SeparatedList1[Input Bytes, Output any, S Separator]( 163 | parse Parser[Input, Output], 164 | separator Parser[Input, S], 165 | ) Parser[Input, []Output] { 166 | return func(input Input) Result[[]Output, Input] { 167 | results := []Output{} 168 | 169 | res := parse(input) 170 | if res.Err != nil { 171 | return Failure[Input, []Output](res.Err, input) 172 | } 173 | 174 | // Checking for infinite loops, if nothing was consumed, 175 | // the provided parser would make us go around in circles. 176 | if len(res.Remaining) == len(input) { 177 | return Failure[Input, []Output](NewError(input, "SeparatedList0"), input) 178 | } 179 | 180 | results = append(results, res.Output) 181 | remaining := res.Remaining 182 | 183 | for { 184 | separatorResult := separator(remaining) 185 | if separatorResult.Err != nil { 186 | return Success(results, remaining) 187 | } 188 | 189 | // Checking for infinite loops, if nothing was consumed, 190 | // the provided parser would make us go around in circles. 191 | if len(separatorResult.Remaining) == len(remaining) { 192 | return Failure[Input, []Output](NewError(input, "SeparatedList0"), input) 193 | } 194 | 195 | parserResult := parse(separatorResult.Remaining) 196 | if parserResult.Err != nil { 197 | return Success(results, remaining) 198 | } 199 | 200 | results = append(results, parserResult.Output) 201 | 202 | remaining = parserResult.Remaining 203 | } 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /multi_test.go: -------------------------------------------------------------------------------- 1 | package gomme 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestCount(t *testing.T) { 10 | t.Parallel() 11 | 12 | testCases := []struct { 13 | name string 14 | parser Parser[string, []string] 15 | input string 16 | wantErr bool 17 | wantOutput []string 18 | wantRemaining string 19 | }{ 20 | { 21 | name: "parsing exact count should succeed", 22 | parser: Count(Token[string]("abc"), 2), 23 | input: "abcabc", 24 | wantErr: false, 25 | wantOutput: []string{"abc", "abc"}, 26 | wantRemaining: "", 27 | }, 28 | { 29 | name: "parsing more than count should succeed", 30 | parser: Count(Token[string]("abc"), 2), 31 | input: "abcabcabc", 32 | wantErr: false, 33 | wantOutput: []string{"abc", "abc"}, 34 | wantRemaining: "abc", 35 | }, 36 | { 37 | name: "parsing less than count should fail", 38 | parser: Count(Token[string]("abc"), 2), 39 | input: "abc123", 40 | wantErr: true, 41 | wantOutput: nil, 42 | wantRemaining: "abc123", 43 | }, 44 | { 45 | name: "parsing no count should fail", 46 | parser: Count(Token[string]("abc"), 2), 47 | input: "123123", 48 | wantErr: true, 49 | wantOutput: nil, 50 | wantRemaining: "123123", 51 | }, 52 | { 53 | name: "parsing empty input should fail", 54 | parser: Count(Token[string]("abc"), 2), 55 | input: "", 56 | wantErr: true, 57 | wantOutput: nil, 58 | wantRemaining: "", 59 | }, 60 | } 61 | 62 | for _, tc := range testCases { 63 | tc := tc 64 | 65 | t.Run(tc.name, func(t *testing.T) { 66 | t.Parallel() 67 | 68 | gotResult := tc.parser(tc.input) 69 | if (gotResult.Err != nil) != tc.wantErr { 70 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) 71 | } 72 | 73 | assert.Equal(t, 74 | tc.wantOutput, 75 | gotResult.Output, 76 | "got output %v, want output %v", gotResult.Output, tc.wantOutput, 77 | ) 78 | 79 | if gotResult.Remaining != tc.wantRemaining { 80 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) 81 | } 82 | }) 83 | } 84 | } 85 | 86 | func BenchmarkCount(b *testing.B) { 87 | parser := Count(Char[string]('#'), 3) 88 | 89 | b.ResetTimer() 90 | for i := 0; i < b.N; i++ { 91 | parser("###") 92 | } 93 | } 94 | 95 | func TestMany0(t *testing.T) { 96 | t.Parallel() 97 | 98 | type args struct { 99 | p Parser[string, []rune] 100 | } 101 | testCases := []struct { 102 | name string 103 | args args 104 | input string 105 | wantErr bool 106 | wantOutput []rune 107 | wantRemaining string 108 | }{ 109 | { 110 | name: "matching parser should succeed", 111 | input: "###", 112 | args: args{ 113 | p: Many0(Char[string]('#')), 114 | }, 115 | wantErr: false, 116 | wantOutput: []rune{'#', '#', '#'}, 117 | wantRemaining: "", 118 | }, 119 | { 120 | name: "no match should succeed", 121 | input: "abc", 122 | args: args{ 123 | p: Many0(Char[string]('#')), 124 | }, 125 | wantErr: false, 126 | wantOutput: []rune{}, 127 | wantRemaining: "abc", 128 | }, 129 | { 130 | name: "empty input should succeed", 131 | input: "", 132 | args: args{ 133 | p: Many0(Char[string]('#')), 134 | }, 135 | wantErr: false, 136 | wantOutput: []rune{}, 137 | wantRemaining: "", 138 | }, 139 | } 140 | for _, tc := range testCases { 141 | tc := tc 142 | 143 | t.Run(tc.name, func(t *testing.T) { 144 | t.Parallel() 145 | 146 | gotResult := tc.args.p(tc.input) 147 | if (gotResult.Err != nil) != tc.wantErr { 148 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) 149 | } 150 | 151 | // testify makes it easier comparing slices 152 | assert.Equal(t, 153 | tc.wantOutput, gotResult.Output, 154 | "got output %v, want output %v", gotResult.Output, tc.wantOutput, 155 | ) 156 | 157 | if gotResult.Remaining != tc.wantRemaining { 158 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) 159 | } 160 | }) 161 | } 162 | } 163 | 164 | func TestMany0DetectsInfiniteLoops(t *testing.T) { 165 | t.Parallel() 166 | 167 | // Digit0 accepts empty input, and would cause an infinite loop if not detected 168 | input := "abcdef" 169 | parser := Many0(Digit0[string]()) 170 | 171 | result := parser(input) 172 | 173 | assert.Error(t, result.Err) 174 | assert.Nil(t, result.Output) 175 | assert.Equal(t, input, result.Remaining) 176 | } 177 | 178 | func BenchmarkMany0(b *testing.B) { 179 | parser := Many0(Char[string]('#')) 180 | 181 | b.ResetTimer() 182 | for i := 0; i < b.N; i++ { 183 | parser("###") 184 | } 185 | } 186 | 187 | func TestMany1(t *testing.T) { 188 | t.Parallel() 189 | 190 | type args struct { 191 | p Parser[string, []rune] 192 | } 193 | testCases := []struct { 194 | name string 195 | args args 196 | input string 197 | wantErr bool 198 | wantOutput []rune 199 | wantRemaining string 200 | }{ 201 | { 202 | name: "matching parser should succeed", 203 | input: "###", 204 | args: args{ 205 | p: Many1(Char[string]('#')), 206 | }, 207 | wantErr: false, 208 | wantOutput: []rune{'#', '#', '#'}, 209 | wantRemaining: "", 210 | }, 211 | { 212 | name: "matching at least once should succeed", 213 | input: "#abc", 214 | args: args{ 215 | p: Many1(Char[string]('#')), 216 | }, 217 | wantErr: false, 218 | wantOutput: []rune{'#'}, 219 | wantRemaining: "abc", 220 | }, 221 | { 222 | name: "not matching at least once should fail", 223 | input: "a##", 224 | args: args{ 225 | p: Many1(Char[string]('#')), 226 | }, 227 | wantErr: true, 228 | wantOutput: nil, 229 | wantRemaining: "a##", 230 | }, 231 | { 232 | name: "no match should fail", 233 | input: "abc", 234 | args: args{ 235 | p: Many1(Char[string]('#')), 236 | }, 237 | wantErr: true, 238 | wantOutput: nil, 239 | wantRemaining: "abc", 240 | }, 241 | { 242 | name: "empty input should fail", 243 | input: "", 244 | args: args{ 245 | p: Many1(Char[string]('#')), 246 | }, 247 | wantErr: true, 248 | wantOutput: nil, 249 | wantRemaining: "", 250 | }, 251 | } 252 | for _, tc := range testCases { 253 | tc := tc 254 | 255 | t.Run(tc.name, func(t *testing.T) { 256 | t.Parallel() 257 | 258 | gotResult := tc.args.p(tc.input) 259 | if (gotResult.Err != nil) != tc.wantErr { 260 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) 261 | } 262 | 263 | // testify makes it easier comparing slices 264 | assert.Equal(t, 265 | tc.wantOutput, gotResult.Output, 266 | "got output %v, want output %v", gotResult.Output, tc.wantOutput, 267 | ) 268 | 269 | if gotResult.Remaining != tc.wantRemaining { 270 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) 271 | } 272 | }) 273 | } 274 | } 275 | 276 | func TestMany1DetectsInfiniteLoops(t *testing.T) { 277 | t.Parallel() 278 | 279 | // Digit0 accepts empty input, and would cause an infinite loop if not detected 280 | input := "abcdef" 281 | parser := Many1(Digit0[string]()) 282 | 283 | result := parser(input) 284 | 285 | assert.Error(t, result.Err) 286 | assert.Nil(t, result.Output) 287 | assert.Equal(t, input, result.Remaining) 288 | } 289 | 290 | func BenchmarkMany1(b *testing.B) { 291 | parser := Many1(Char[string]('#')) 292 | 293 | b.ResetTimer() 294 | for i := 0; i < b.N; i++ { 295 | parser("###") 296 | } 297 | } 298 | 299 | func TestSeparatedList0(t *testing.T) { 300 | t.Parallel() 301 | 302 | type args struct { 303 | p Parser[string, []string] 304 | } 305 | testCases := []struct { 306 | name string 307 | args args 308 | input string 309 | wantErr bool 310 | wantOutput []string 311 | wantRemaining string 312 | }{ 313 | { 314 | name: "matching parser should succeed", 315 | input: "abc,abc,abc", 316 | args: args{ 317 | p: SeparatedList0(Token[string]("abc"), Char[string](',')), 318 | }, 319 | wantErr: false, 320 | wantOutput: []string{"abc", "abc", "abc"}, 321 | wantRemaining: "", 322 | }, 323 | { 324 | name: "matching parser and missing separator should succeed", 325 | input: "abc123abc", 326 | args: args{ 327 | p: SeparatedList0(Token[string]("abc"), Char[string](',')), 328 | }, 329 | wantErr: false, 330 | wantOutput: []string{"abc"}, 331 | wantRemaining: "123abc", 332 | }, 333 | { 334 | name: "parser with separator but non-matching right side should succeed", 335 | input: "abc,def", 336 | args: args{ 337 | p: SeparatedList0(Token[string]("abc"), Char[string](',')), 338 | }, 339 | wantErr: false, 340 | wantOutput: []string{"abc"}, 341 | wantRemaining: ",def", 342 | }, 343 | { 344 | name: "parser matching on the right of the separator should succeed", 345 | input: "def,abc", 346 | args: args{ 347 | p: SeparatedList0(Token[string]("abc"), Char[string](',')), 348 | }, 349 | wantErr: false, 350 | wantOutput: []string{}, 351 | wantRemaining: "def,abc", 352 | }, 353 | { 354 | name: "empty input should succeed", 355 | input: "", 356 | args: args{ 357 | p: SeparatedList0(Token[string]("abc"), Char[string](',')), 358 | }, 359 | wantErr: false, 360 | wantOutput: []string{}, 361 | wantRemaining: "", 362 | }, 363 | { 364 | name: "parsing input without separator should succeed", 365 | input: "123", 366 | args: args{ 367 | p: SeparatedList0(Digit0[string](), Char[string](',')), 368 | }, 369 | wantErr: false, 370 | wantOutput: []string{"123"}, 371 | wantRemaining: "", 372 | }, 373 | { 374 | name: "using a parser accepting empty input should fail", 375 | input: "", 376 | args: args{ 377 | p: SeparatedList0(Digit0[string](), Char[string](',')), 378 | }, 379 | wantErr: true, 380 | wantOutput: nil, 381 | wantRemaining: "", 382 | }, 383 | } 384 | for _, tc := range testCases { 385 | tc := tc 386 | 387 | t.Run(tc.name, func(t *testing.T) { 388 | t.Parallel() 389 | 390 | gotResult := tc.args.p(tc.input) 391 | if (gotResult.Err != nil) != tc.wantErr { 392 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) 393 | } 394 | 395 | // testify makes it easier comparing slices 396 | assert.Equal(t, 397 | tc.wantOutput, gotResult.Output, 398 | "got output %v, want output %v", gotResult.Output, tc.wantOutput, 399 | ) 400 | 401 | if gotResult.Remaining != tc.wantRemaining { 402 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) 403 | } 404 | }) 405 | } 406 | } 407 | 408 | func BenchmarkSeparatedList0(t *testing.B) { 409 | parser := SeparatedList0(Char[string]('#'), Char[string](',')) 410 | 411 | t.ResetTimer() 412 | for i := 0; i < t.N; i++ { 413 | parser("#,#,#") 414 | } 415 | } 416 | 417 | func TestSeparatedList1(t *testing.T) { 418 | t.Parallel() 419 | 420 | type args struct { 421 | p Parser[string, []string] 422 | } 423 | testCases := []struct { 424 | name string 425 | args args 426 | input string 427 | wantErr bool 428 | wantOutput []string 429 | wantRemaining string 430 | }{ 431 | { 432 | name: "matching parser should succeed", 433 | input: "abc,abc,abc", 434 | args: args{ 435 | p: SeparatedList1(Token[string]("abc"), Char[string](',')), 436 | }, 437 | wantErr: false, 438 | wantOutput: []string{"abc", "abc", "abc"}, 439 | wantRemaining: "", 440 | }, 441 | { 442 | name: "matching parser and missing separator should succeed", 443 | input: "abc123abc", 444 | args: args{ 445 | p: SeparatedList1(Token[string]("abc"), Char[string](',')), 446 | }, 447 | wantErr: false, 448 | wantOutput: []string{"abc"}, 449 | wantRemaining: "123abc", 450 | }, 451 | { 452 | name: "parser with separator but non-matching right side should succeed", 453 | input: "abc,def", 454 | args: args{ 455 | p: SeparatedList1(Token[string]("abc"), Char[string](',')), 456 | }, 457 | wantErr: false, 458 | wantOutput: []string{"abc"}, 459 | wantRemaining: ",def", 460 | }, 461 | { 462 | name: "parser matching on the right of the separator should succeed", 463 | input: "def,abc", 464 | args: args{ 465 | p: SeparatedList1(Token[string]("abc"), Char[string](',')), 466 | }, 467 | wantErr: true, 468 | wantOutput: nil, 469 | wantRemaining: "def,abc", 470 | }, 471 | { 472 | name: "empty input should fail", 473 | input: "", 474 | args: args{ 475 | p: SeparatedList1(Token[string]("abc"), Char[string](',')), 476 | }, 477 | wantErr: true, 478 | wantOutput: nil, 479 | wantRemaining: "", 480 | }, 481 | } 482 | for _, tc := range testCases { 483 | tc := tc 484 | 485 | t.Run(tc.name, func(t *testing.T) { 486 | t.Parallel() 487 | 488 | gotResult := tc.args.p(tc.input) 489 | if (gotResult.Err != nil) != tc.wantErr { 490 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) 491 | } 492 | 493 | // testify makes it easier comparing slices 494 | assert.Equal(t, 495 | tc.wantOutput, gotResult.Output, 496 | "got output %v, want output %v", gotResult.Output, tc.wantOutput, 497 | ) 498 | 499 | if gotResult.Remaining != tc.wantRemaining { 500 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) 501 | } 502 | }) 503 | } 504 | } 505 | 506 | func BenchmarkSeparatedList1(t *testing.B) { 507 | parser := SeparatedList1(Char[string]('#'), Char[string](',')) 508 | 509 | t.ResetTimer() 510 | for i := 0; i < t.N; i++ { 511 | parser("#,#,#") 512 | } 513 | } 514 | -------------------------------------------------------------------------------- /numbers.go: -------------------------------------------------------------------------------- 1 | package gomme 2 | 3 | // import "math" 4 | 5 | // Float parses a sequence of numerical characters into a float64. 6 | // The '.' character is used as the optional decimal delimiter. Any 7 | // number without a decimal part will still be parsed as a float64. 8 | // 9 | // N.B: it is not the parser's role to make sure the floating point 10 | // number you're attempting to parse fits into a 64 bits float. 11 | 12 | // func Float[I Bytes]() Parser[I, float64] { 13 | // digitsParser := TakeWhileOneOf[I]([]rune("0123456789")...) 14 | // minusParser := Char[I]('-') 15 | // dotParser := Char[I]('.') 16 | 17 | // return func(input I) Result[float64, I] { 18 | // var negative bool 19 | 20 | // minusresult := minusParser(input) 21 | // if result.Err == nil { 22 | // negative = true 23 | // } 24 | 25 | // result = digitsParser(result.Remaining) 26 | // // result = Expect(digitsParser, "digits")(result.Remaining) 27 | // // if result.Err != nil { 28 | // // return result 29 | // // } 30 | 31 | // parsed, ok := result.Output.(string) 32 | // if !ok { 33 | // err := fmt.Errorf("failed parsing floating point value; " + 34 | // "reason: converting Float() parser result's output to string failed", 35 | // ) 36 | // return Failure(NewFatalError(input, err, "float"), input) 37 | // } 38 | // if resultTest := dotParser(result.Remaining); resultTest.Err == nil { 39 | // if resultTest = digitsParser(resultTest.Remaining); resultTest.Err == nil { 40 | // parsed = parsed + "." + resultTest.Output.(string) 41 | // result = resultTest 42 | // } 43 | // } 44 | 45 | // floatingPointValue, err := strconv.ParseFloat(parsed, 64) 46 | // if err != nil { 47 | // err = fmt.Errorf("failed to parse '%v' as float; reason: %w", parsed, err) 48 | // return Failure(NewFatalError(input, err), input) 49 | // } 50 | 51 | // if negative { 52 | // floatingPointValue = -floatingPointValue 53 | // } 54 | 55 | // result.Output = floatingPointValue 56 | 57 | // return result 58 | // } 59 | // } 60 | -------------------------------------------------------------------------------- /sequence.go: -------------------------------------------------------------------------------- 1 | package gomme 2 | 3 | // Delimited parses and discards the result from the prefix parser, then 4 | // parses the result of the main parser, and finally parses and discards 5 | // the result of the suffix parser. 6 | func Delimited[I Bytes, OP, O, OS any](prefix Parser[I, OP], parser Parser[I, O], suffix Parser[I, OS]) Parser[I, O] { 7 | return func(input I) Result[O, I] { 8 | return Terminated(Preceded(prefix, parser), suffix)(input) 9 | } 10 | } 11 | 12 | // Pair applies two parsers and returns a Result containing a pair container holding 13 | // the resulting values. 14 | func Pair[I Bytes, LO, RO any, LP Parser[I, LO], RP Parser[I, RO]]( 15 | leftParser LP, rightParser RP, 16 | ) Parser[I, PairContainer[LO, RO]] { 17 | return func(input I) Result[PairContainer[LO, RO], I] { 18 | leftResult := leftParser(input) 19 | if leftResult.Err != nil { 20 | return Failure[I, PairContainer[LO, RO]](NewError(input, "Pair"), input) 21 | } 22 | 23 | rightResult := rightParser(leftResult.Remaining) 24 | if rightResult.Err != nil { 25 | return Failure[I, PairContainer[LO, RO]](NewError(input, "Pair"), input) 26 | } 27 | 28 | return Success(PairContainer[LO, RO]{leftResult.Output, rightResult.Output}, rightResult.Remaining) 29 | } 30 | } 31 | 32 | // Preceded parses and discards a result from the prefix parser. It 33 | // then parses a result from the main parser and returns its result. 34 | // 35 | // Preceded is effectively equivalent to applying DiscardAll(prefix), 36 | // and then applying the main parser. 37 | func Preceded[I Bytes, OP, O any](prefix Parser[I, OP], parser Parser[I, O]) Parser[I, O] { 38 | return func(input I) Result[O, I] { 39 | prefixResult := prefix(input) 40 | if prefixResult.Err != nil { 41 | return Failure[I, O](prefixResult.Err, input) 42 | } 43 | 44 | result := parser(prefixResult.Remaining) 45 | if result.Err != nil { 46 | return Failure[I, O](result.Err, input) 47 | } 48 | 49 | return Success(result.Output, result.Remaining) 50 | } 51 | } 52 | 53 | // SeparatedPair applies two separated parsers and returns a Result containing a slice of 54 | // size 2 as its output. The first element of the slice is the result of the left parser, 55 | // and the second element is the result of the right parser. The result of the separator parser 56 | // is discarded. 57 | func SeparatedPair[I Bytes, LO, RO any, S Separator, LP Parser[I, LO], SP Parser[I, S], RP Parser[I, RO]]( 58 | leftParser LP, separator SP, rightParser RP, 59 | ) Parser[I, PairContainer[LO, RO]] { 60 | return func(input I) Result[PairContainer[LO, RO], I] { 61 | leftResult := leftParser(input) 62 | if leftResult.Err != nil { 63 | return Failure[I, PairContainer[LO, RO]](NewError(input, "SeparatedPair"), input) 64 | } 65 | 66 | sepResult := separator(leftResult.Remaining) 67 | if sepResult.Err != nil { 68 | return Failure[I, PairContainer[LO, RO]](NewError(input, "SeparatedPair"), input) 69 | } 70 | 71 | rightResult := rightParser(sepResult.Remaining) 72 | if rightResult.Err != nil { 73 | return Failure[I, PairContainer[LO, RO]](NewError(input, "SeparatedPair"), input) 74 | } 75 | 76 | return Success(PairContainer[LO, RO]{leftResult.Output, rightResult.Output}, rightResult.Remaining) 77 | } 78 | } 79 | 80 | // Sequence applies a sequence of parsers and returns either a 81 | // slice of results or an error if any parser fails. 82 | func Sequence[I Bytes, O any](parsers ...Parser[I, O]) Parser[I, []O] { 83 | return func(input I) Result[[]O, I] { 84 | remaining := input 85 | outputs := make([]O, 0, len(parsers)) 86 | 87 | for _, parser := range parsers { 88 | res := parser(remaining) 89 | if res.Err != nil { 90 | return Failure[I, []O](res.Err, input) 91 | } 92 | 93 | outputs = append(outputs, res.Output) 94 | remaining = res.Remaining 95 | } 96 | 97 | return Success(outputs, remaining) 98 | } 99 | } 100 | 101 | // Terminated parses a result from the main parser, it then 102 | // parses the result from the suffix parser and discards it; only 103 | // returning the result of the main parser. 104 | func Terminated[I Bytes, O, OS any](parser Parser[I, O], suffix Parser[I, OS]) Parser[I, O] { 105 | return func(input I) Result[O, I] { 106 | result := parser(input) 107 | if result.Err != nil { 108 | return Failure[I, O](result.Err, input) 109 | } 110 | 111 | suffixResult := suffix(result.Remaining) 112 | if suffixResult.Err != nil { 113 | return Failure[I, O](suffixResult.Err, input) 114 | } 115 | 116 | return Success(result.Output, suffixResult.Remaining) 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /sequence_test.go: -------------------------------------------------------------------------------- 1 | package gomme 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestDelimited(t *testing.T) { 10 | t.Parallel() 11 | 12 | type args struct { 13 | p Parser[string, string] 14 | } 15 | testCases := []struct { 16 | name string 17 | args args 18 | input string 19 | wantErr bool 20 | wantOutput string 21 | wantRemaining string 22 | }{ 23 | { 24 | name: "matching parser should succeed", 25 | input: "+1\r\n", 26 | args: args{ 27 | p: Delimited(Char[string]('+'), Digit1[string](), CRLF[string]()), 28 | }, 29 | wantErr: false, 30 | wantOutput: "1", 31 | wantRemaining: "", 32 | }, 33 | { 34 | name: "no prefix match should fail", 35 | input: "1\r\n", 36 | args: args{ 37 | p: Delimited(Char[string]('+'), Digit1[string](), CRLF[string]()), 38 | }, 39 | wantErr: true, 40 | wantOutput: "", 41 | wantRemaining: "1\r\n", 42 | }, 43 | { 44 | name: "no parser match should fail", 45 | input: "+\r\n", 46 | args: args{ 47 | p: Delimited(Char[string]('+'), Digit1[string](), CRLF[string]()), 48 | }, 49 | wantErr: true, 50 | wantOutput: "", 51 | wantRemaining: "+\r\n", 52 | }, 53 | { 54 | name: "no suffix match should fail", 55 | input: "+1", 56 | args: args{ 57 | p: Delimited(Char[string]('+'), Digit1[string](), CRLF[string]()), 58 | }, 59 | wantErr: true, 60 | wantOutput: "", 61 | wantRemaining: "+1", 62 | }, 63 | { 64 | name: "empty input should fail", 65 | input: "", 66 | args: args{ 67 | p: Delimited(Char[string]('+'), Digit1[string](), CRLF[string]()), 68 | }, 69 | wantErr: true, 70 | wantOutput: "", 71 | wantRemaining: "", 72 | }, 73 | } 74 | for _, tc := range testCases { 75 | tc := tc 76 | 77 | t.Run(tc.name, func(t *testing.T) { 78 | t.Parallel() 79 | 80 | gotResult := tc.args.p(tc.input) 81 | if (gotResult.Err != nil) != tc.wantErr { 82 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) 83 | } 84 | 85 | if gotResult.Output != tc.wantOutput { 86 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) 87 | } 88 | 89 | if gotResult.Remaining != tc.wantRemaining { 90 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) 91 | } 92 | }) 93 | } 94 | } 95 | 96 | func BenchmarkDelimited(b *testing.B) { 97 | parser := Delimited(Char[string]('+'), Digit1[string](), CRLF[string]()) 98 | 99 | b.ResetTimer() 100 | for i := 0; i < b.N; i++ { 101 | parser("+1\r\n") 102 | } 103 | } 104 | 105 | func TestPair(t *testing.T) { 106 | t.Parallel() 107 | 108 | type args struct { 109 | leftParser Parser[string, string] 110 | rightParser Parser[string, string] 111 | } 112 | testCases := []struct { 113 | name string 114 | args args 115 | input string 116 | wantErr bool 117 | wantOutput PairContainer[string, string] 118 | wantRemaining string 119 | }{ 120 | { 121 | name: "matching parsers should succeed", 122 | input: "1abc\r\n", 123 | args: args{ 124 | leftParser: Digit1[string](), 125 | rightParser: TakeUntil(CRLF[string]()), 126 | }, 127 | wantErr: false, 128 | wantOutput: PairContainer[string, string]{"1", "abc"}, 129 | wantRemaining: "\r\n", 130 | }, 131 | { 132 | name: "matching left parser, failing right parser, should fail", 133 | input: "1abc", 134 | args: args{ 135 | leftParser: Digit1[string](), 136 | rightParser: TakeWhileOneOf[string]('d', 'e', 'f'), 137 | }, 138 | wantErr: true, 139 | wantOutput: PairContainer[string, string]{}, 140 | wantRemaining: "1abc", 141 | }, 142 | { 143 | name: "failing left parser, matching right parser, should fail", 144 | input: "adef", 145 | args: args{ 146 | leftParser: Digit1[string](), 147 | rightParser: TakeWhileOneOf[string]('d', 'e', 'f'), 148 | }, 149 | wantErr: true, 150 | wantOutput: PairContainer[string, string]{}, 151 | wantRemaining: "adef", 152 | }, 153 | { 154 | name: "failing left parser, failing right parser, should fail", 155 | input: "123", 156 | args: args{ 157 | leftParser: Digit1[string](), 158 | rightParser: TakeWhileOneOf[string]('d', 'e', 'f'), 159 | }, 160 | wantErr: true, 161 | wantOutput: PairContainer[string, string]{}, 162 | wantRemaining: "123", 163 | }, 164 | } 165 | for _, tc := range testCases { 166 | tc := tc 167 | 168 | t.Run(tc.name, func(t *testing.T) { 169 | t.Parallel() 170 | 171 | parser := Pair(tc.args.leftParser, tc.args.rightParser) 172 | 173 | gotResult := parser(tc.input) 174 | if (gotResult.Err != nil) != tc.wantErr { 175 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) 176 | } 177 | 178 | if gotResult.Output != tc.wantOutput { 179 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) 180 | } 181 | 182 | if gotResult.Remaining != tc.wantRemaining { 183 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) 184 | } 185 | }) 186 | } 187 | } 188 | 189 | func BenchmarkPair(b *testing.B) { 190 | parser := Pair(Digit1[string](), TakeUntil(CRLF[string]())) 191 | 192 | b.ResetTimer() 193 | for i := 0; i < b.N; i++ { 194 | parser("1abc\r\n") 195 | } 196 | } 197 | 198 | func TestPreceded(t *testing.T) { 199 | t.Parallel() 200 | 201 | type args struct { 202 | p Parser[string, string] 203 | } 204 | testCases := []struct { 205 | name string 206 | args args 207 | input string 208 | wantErr bool 209 | wantOutput string 210 | wantRemaining string 211 | }{ 212 | { 213 | name: "matching parser should succeed", 214 | input: "+123", 215 | args: args{ 216 | p: Preceded(Char[string]('+'), Digit1[string]()), 217 | }, 218 | wantErr: false, 219 | wantOutput: "123", 220 | wantRemaining: "", 221 | }, 222 | { 223 | name: "no prefix match should fail", 224 | input: "+123", 225 | args: args{ 226 | p: Preceded(Char[string]('-'), Digit1[string]()), 227 | }, 228 | wantErr: true, 229 | wantOutput: "", 230 | wantRemaining: "+123", 231 | }, 232 | { 233 | name: "no parser match should succeed", 234 | input: "+", 235 | args: args{ 236 | p: Preceded(Char[string]('+'), Digit1[string]()), 237 | }, 238 | wantErr: true, 239 | wantOutput: "", 240 | wantRemaining: "+", 241 | }, 242 | { 243 | name: "empty input should fail", 244 | input: "", 245 | args: args{ 246 | p: Preceded(Char[string]('+'), Digit1[string]()), 247 | }, 248 | wantErr: true, 249 | wantOutput: "", 250 | wantRemaining: "", 251 | }, 252 | } 253 | for _, tc := range testCases { 254 | tc := tc 255 | 256 | t.Run(tc.name, func(t *testing.T) { 257 | t.Parallel() 258 | 259 | gotResult := tc.args.p(tc.input) 260 | if (gotResult.Err != nil) != tc.wantErr { 261 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) 262 | } 263 | 264 | if gotResult.Output != tc.wantOutput { 265 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) 266 | } 267 | 268 | if gotResult.Remaining != tc.wantRemaining { 269 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) 270 | } 271 | }) 272 | } 273 | } 274 | 275 | func BenchmarkPreceded(b *testing.B) { 276 | parser := Preceded(Char[string]('+'), Digit1[string]()) 277 | 278 | b.ResetTimer() 279 | for i := 0; i < b.N; i++ { 280 | parser("+123") 281 | } 282 | } 283 | 284 | func TestSeparatedPair(t *testing.T) { 285 | t.Parallel() 286 | 287 | type args struct { 288 | leftParser Parser[string, string] 289 | separatorParser Parser[string, rune] 290 | rightParser Parser[string, string] 291 | } 292 | testCases := []struct { 293 | name string 294 | args args 295 | input string 296 | wantErr bool 297 | wantOutput PairContainer[string, string] 298 | wantRemaining string 299 | }{ 300 | // { true, true, true } 301 | { 302 | name: "matching parsers should succeed", 303 | input: "1|abc\r\n", 304 | args: args{ 305 | leftParser: Digit1[string](), 306 | separatorParser: Char[string]('|'), 307 | rightParser: TakeUntil(CRLF[string]()), 308 | }, 309 | wantErr: false, 310 | wantOutput: PairContainer[string, string]{"1", "abc"}, 311 | wantRemaining: "\r\n", 312 | }, 313 | // { true, true, false } 314 | { 315 | name: "matching left parser, matching separator, failing right parser, should fail", 316 | input: "1|abc", 317 | args: args{ 318 | leftParser: Digit1[string](), 319 | separatorParser: Char[string]('|'), 320 | rightParser: TakeWhileOneOf[string]('d', 'e', 'f'), 321 | }, 322 | wantErr: true, 323 | wantOutput: PairContainer[string, string]{}, 324 | wantRemaining: "1|abc", 325 | }, 326 | // { true, false, true } 327 | { 328 | name: "matching left parser, failing separator, matching right parser, should fail", 329 | input: "1^abc", 330 | args: args{ 331 | leftParser: Digit1[string](), 332 | separatorParser: Char[string]('|'), 333 | rightParser: TakeWhileOneOf[string]('a', 'b', 'c'), 334 | }, 335 | wantErr: true, 336 | wantOutput: PairContainer[string, string]{}, 337 | wantRemaining: "1^abc", 338 | }, 339 | // { true, false, false } 340 | { 341 | name: "matching left parser, failing separator, failing right parser, should fail", 342 | input: "1^abc", 343 | args: args{ 344 | leftParser: Digit1[string](), 345 | separatorParser: Char[string]('|'), 346 | rightParser: TakeWhileOneOf[string]('d', 'e', 'f'), 347 | }, 348 | wantErr: true, 349 | wantOutput: PairContainer[string, string]{}, 350 | wantRemaining: "1^abc", 351 | }, 352 | // { false, true, true } 353 | { 354 | name: "failing left parser, matching separator, matching right parser, should fail", 355 | input: "a|def", 356 | args: args{ 357 | leftParser: Digit1[string](), 358 | separatorParser: Char[string]('|'), 359 | rightParser: TakeWhileOneOf[string]('d', 'e', 'f'), 360 | }, 361 | wantErr: true, 362 | wantOutput: PairContainer[string, string]{}, 363 | wantRemaining: "a|def", 364 | }, 365 | // { false, true, false } 366 | { 367 | name: "failing left parser, matching separator, failing right parser, should fail", 368 | input: "a|123", 369 | args: args{ 370 | leftParser: Digit1[string](), 371 | separatorParser: Char[string]('|'), 372 | rightParser: TakeWhileOneOf[string]('d', 'e', 'f'), 373 | }, 374 | wantErr: true, 375 | wantOutput: PairContainer[string, string]{}, 376 | wantRemaining: "a|123", 377 | }, 378 | // { false, false, true } 379 | { 380 | name: "failing left parser, failing separator, matching right parser, should fail", 381 | input: "a^def", 382 | args: args{ 383 | leftParser: Digit1[string](), 384 | separatorParser: Char[string]('|'), 385 | rightParser: TakeWhileOneOf[string]('d', 'e', 'f'), 386 | }, 387 | wantErr: true, 388 | wantOutput: PairContainer[string, string]{}, 389 | wantRemaining: "a^def", 390 | }, 391 | // { false, false, false } 392 | { 393 | name: "failing left parser, failing separator, failing right parser, should fail", 394 | input: "a^123", 395 | args: args{ 396 | leftParser: Digit1[string](), 397 | separatorParser: Char[string]('|'), 398 | rightParser: TakeWhileOneOf[string]('d', 'e', 'f'), 399 | }, 400 | wantErr: true, 401 | wantOutput: PairContainer[string, string]{}, 402 | wantRemaining: "a^123", 403 | }, 404 | } 405 | for _, tc := range testCases { 406 | tc := tc 407 | 408 | t.Run(tc.name, func(t *testing.T) { 409 | t.Parallel() 410 | 411 | parser := SeparatedPair(tc.args.leftParser, tc.args.separatorParser, tc.args.rightParser) 412 | 413 | gotResult := parser(tc.input) 414 | if (gotResult.Err != nil) != tc.wantErr { 415 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) 416 | } 417 | 418 | if gotResult.Output != tc.wantOutput { 419 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) 420 | } 421 | 422 | if gotResult.Remaining != tc.wantRemaining { 423 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) 424 | } 425 | }) 426 | } 427 | } 428 | 429 | func BenchmarkSeparatedPair(b *testing.B) { 430 | parser := SeparatedPair(Digit1[string](), Char[string]('|'), TakeUntil(CRLF[string]())) 431 | 432 | b.ResetTimer() 433 | for i := 0; i < b.N; i++ { 434 | parser("1|abc\r\n") 435 | } 436 | } 437 | 438 | func TestSequence(t *testing.T) { 439 | t.Parallel() 440 | 441 | type args struct { 442 | p Parser[string, []string] 443 | } 444 | testCases := []struct { 445 | name string 446 | args args 447 | input string 448 | wantErr bool 449 | wantOutput []string 450 | wantRemaining string 451 | }{ 452 | { 453 | name: "matching parsers should succeed", 454 | input: "1a3", 455 | args: args{ 456 | p: Sequence(Digit1[string](), Alpha0[string](), Digit1[string]()), 457 | }, 458 | wantErr: false, 459 | wantOutput: []string{"1", "a", "3"}, 460 | wantRemaining: "", 461 | }, 462 | { 463 | name: "matching parsers in longer input should succeed", 464 | input: "1a3bcd", 465 | args: args{ 466 | p: Sequence(Digit1[string](), Alpha0[string](), Digit1[string]()), 467 | }, 468 | wantErr: false, 469 | wantOutput: []string{"1", "a", "3"}, 470 | wantRemaining: "bcd", 471 | }, 472 | { 473 | name: "partially matching parsers should fail", 474 | input: "1a3", 475 | args: args{ 476 | p: Sequence(Digit1[string](), Digit1[string](), Digit1[string]()), 477 | }, 478 | wantErr: true, 479 | wantOutput: nil, 480 | wantRemaining: "1a3", 481 | }, 482 | { 483 | name: "too short input should fail", 484 | input: "12", 485 | args: args{ 486 | p: Sequence(Digit1[string](), Digit1[string](), Digit1[string]()), 487 | }, 488 | wantErr: true, 489 | wantOutput: nil, 490 | wantRemaining: "12", 491 | }, 492 | { 493 | name: "empty input should succeed", 494 | input: "", 495 | args: args{ 496 | p: Sequence(Digit1[string](), Digit1[string](), Digit1[string]()), 497 | }, 498 | wantErr: true, 499 | wantOutput: nil, 500 | wantRemaining: "", 501 | }, 502 | } 503 | for _, tc := range testCases { 504 | tc := tc 505 | 506 | t.Run(tc.name, func(t *testing.T) { 507 | t.Parallel() 508 | 509 | gotResult := tc.args.p(tc.input) 510 | if (gotResult.Err != nil) != tc.wantErr { 511 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) 512 | } 513 | 514 | // testify makes it easier comparing slices 515 | assert.Equal(t, 516 | tc.wantOutput, gotResult.Output, 517 | "got output %v, want output %v", gotResult.Output, tc.wantOutput, 518 | ) 519 | 520 | if gotResult.Remaining != tc.wantRemaining { 521 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) 522 | } 523 | }) 524 | } 525 | } 526 | 527 | func BenchmarkSequence(b *testing.B) { 528 | parser := Sequence(Digit1[string](), Alpha0[string](), Digit1[string]()) 529 | 530 | b.ResetTimer() 531 | for i := 0; i < b.N; i++ { 532 | parser("123") 533 | } 534 | } 535 | 536 | func TestTerminated(t *testing.T) { 537 | t.Parallel() 538 | 539 | type args struct { 540 | p Parser[string, string] 541 | } 542 | testCases := []struct { 543 | name string 544 | args args 545 | input string 546 | wantErr bool 547 | wantOutput string 548 | wantRemaining string 549 | }{ 550 | { 551 | name: "matching parser should succeed", 552 | input: "1+23", 553 | args: args{ 554 | p: Terminated(Digit1[string](), Char[string]('+')), 555 | }, 556 | wantErr: false, 557 | wantOutput: "1", 558 | wantRemaining: "23", 559 | }, 560 | { 561 | name: "no suffix match should fail", 562 | input: "1-23", 563 | args: args{ 564 | p: Terminated(Digit1[string](), Char[string]('+')), 565 | }, 566 | wantErr: true, 567 | wantOutput: "", 568 | wantRemaining: "1-23", 569 | }, 570 | { 571 | name: "no parser match should succeed", 572 | input: "+", 573 | args: args{ 574 | p: Terminated(Digit1[string](), Char[string]('+')), 575 | }, 576 | wantErr: true, 577 | wantOutput: "", 578 | wantRemaining: "+", 579 | }, 580 | { 581 | name: "empty input should fail", 582 | input: "", 583 | args: args{ 584 | p: Terminated(Digit1[string](), Char[string]('+')), 585 | }, 586 | wantErr: true, 587 | wantOutput: "", 588 | wantRemaining: "", 589 | }, 590 | } 591 | for _, tc := range testCases { 592 | tc := tc 593 | 594 | t.Run(tc.name, func(t *testing.T) { 595 | t.Parallel() 596 | 597 | gotResult := tc.args.p(tc.input) 598 | if (gotResult.Err != nil) != tc.wantErr { 599 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) 600 | } 601 | 602 | if gotResult.Output != tc.wantOutput { 603 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) 604 | } 605 | 606 | if gotResult.Remaining != tc.wantRemaining { 607 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) 608 | } 609 | }) 610 | } 611 | } 612 | 613 | func BenchmarkTerminated(b *testing.B) { 614 | parser := Terminated(Digit1[string](), Char[string]('+')) 615 | 616 | b.ResetTimer() 617 | for i := 0; i < b.N; i++ { 618 | parser("123+") 619 | } 620 | } 621 | --------------------------------------------------------------------------------