├── .github
└── workflows
│ └── go.yml
├── .gitignore
├── .golangci.yml
├── LICENSE
├── README.md
├── TODO.md
├── branch.go
├── branch_test.go
├── bytes.go
├── bytes_test.go
├── characters.go
├── characters_test.go
├── combinators.go
├── combinators_test.go
├── containers.go
├── error.go
├── examples
├── .gitignore
├── csv
│ ├── csv.go
│ └── csv_test.go
├── hexcolor
│ ├── hexcolor.go
│ └── hexcolor_test.go
├── json
│ ├── json.go
│ └── test.json
└── redis
│ ├── redis.go
│ └── redis_test.go
├── go.mod
├── go.sum
├── logo.png
├── multi.go
├── multi_test.go
├── numbers.go
├── sequence.go
└── sequence_test.go
/.github/workflows/go.yml:
--------------------------------------------------------------------------------
1 | name: Go
2 |
3 | on:
4 | push:
5 | branches: [ "main" ]
6 | pull_request:
7 | branches: [ "main" ]
8 |
9 | jobs:
10 |
11 | build:
12 | runs-on: ubuntu-latest
13 | steps:
14 | - uses: actions/checkout@v3
15 |
16 | - name: Set up Go
17 | uses: actions/setup-go@v3
18 | with:
19 | go-version: 1.18
20 |
21 | - name: Build
22 | run: go build -v ./...
23 |
24 | - name: Test
25 | run: go test -v ./...
26 |
27 | - name: Lint
28 | uses: golangci/golangci-lint-action@v3.2.0
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Binaries for programs and plugins
2 | *.exe
3 | *.exe~
4 | *.dll
5 | *.so
6 | *.dylib
7 |
8 | # Test binary, built with `go test -c`
9 | *.test
10 |
11 | # Output of the go coverage tool, specifically when used with LiteIDE
12 | *.out
13 |
14 | # Dependency directories (remove the comment below to include it)
15 | # vendor/
16 |
17 | examples/redis/testdata
--------------------------------------------------------------------------------
/.golangci.yml:
--------------------------------------------------------------------------------
1 | run:
2 | deadline: 5m
3 |
4 | issues:
5 | # Maximum issues count per one linter. Set to 0 to disable. Default is 50.
6 | max-issues-per-linter: 0
7 | # Maximum count of issues with the same text. Set to 0 to disable. Default is 3.
8 | max-same-issues: 0
9 |
10 | # We want to try and improve the comments in the k6 codebase, so individual
11 | # non-golint items from the default exclusion list will gradually be added
12 | # to the exclude-rules below
13 | exclude-use-default: false
14 |
15 | exclude-rules:
16 | # Exclude duplicate code and function length and complexity checking in test
17 | # files (due to common repeats and long functions in test code)
18 | - path: _(test|gen)\.go
19 | linters:
20 | - cyclop
21 | - dupl
22 | - gocognit
23 | - funlen
24 | - lll
25 | - linters:
26 | - staticcheck # Tracked in https://github.com/grafana/xk6-grpc/issues/14
27 | text: "The entire proto file grpc/reflection/v1alpha/reflection.proto is marked as deprecated."
28 |
29 | linters-settings:
30 | exhaustive:
31 | default-signifies-exhaustive: true
32 | govet:
33 | check-shadowing: true
34 | cyclop:
35 | max-complexity: 25
36 | maligned:
37 | suggest-new: true
38 | dupl:
39 | threshold: 150
40 | goconst:
41 | min-len: 10
42 | min-occurrences: 4
43 | funlen:
44 | lines: 80
45 | statements: 60
46 | forbidigo:
47 | forbid:
48 | - '^(fmt\\.Print(|f|ln)|print|println)$'
49 | # Forbid everything in syscall except the uppercase constants
50 | - '^syscall\.[^A-Z_]+$(# Using anything except constants from the syscall package is forbidden )?'
51 | - '^logrus\.Logger$'
52 | revive:
53 | rules:
54 | - name: package-comments
55 | severity: warning
56 | disabled: true
57 | stylecheck:
58 | checks:
59 | - "all"
60 | - '-ST1000' # Use of underscores in Go names
61 |
62 | linters:
63 | disable-all: true
64 | enable:
65 | - asasalint
66 | - asciicheck
67 | - bidichk
68 | - bodyclose
69 | - contextcheck
70 | - cyclop
71 | - dogsled
72 | - dupl
73 | - durationcheck
74 | - errcheck
75 | - errchkjson
76 | - errname
77 | - errorlint
78 | - exhaustive
79 | - exportloopref
80 | - forbidigo
81 | - forcetypeassert
82 | - funlen
83 | - gocheckcompilerdirectives
84 | - gochecknoglobals
85 | - gocognit
86 | - goconst
87 | - gocritic
88 | - gofmt
89 | - gofumpt
90 | - goimports
91 | - gomoddirectives
92 | - goprintffuncname
93 | - gosec
94 | - gosimple
95 | - govet
96 | - importas
97 | - ineffassign
98 | - interfacebloat
99 | - lll
100 | - makezero
101 | - misspell
102 | - nakedret
103 | - nestif
104 | - nilerr
105 | - nilnil
106 | - noctx
107 | - nolintlint
108 | - nosprintfhostport
109 | - paralleltest
110 | - prealloc
111 | - predeclared
112 | - promlinter
113 | - revive
114 | - reassign
115 | - rowserrcheck
116 | - sqlclosecheck
117 | - staticcheck
118 | - stylecheck
119 | - tenv
120 | - tparallel
121 | - typecheck
122 | - unconvert
123 | - unparam
124 | - unused
125 | - usestdlibvars
126 | - wastedassign
127 | - whitespace
128 | fast: false
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Théo Crevon
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |

2 | A parser combinator library for Go
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 | Gomme is a library that simplifies building parsers in Go.
13 |
14 | Inspired by Rust's renowned `nom` crate, Gomme provides a developer-friendly toolkit that allows you to quickly and easily create reliable parsers for both textual and binary formats.
15 |
16 | With the power of Go's newly introduced Generics, Gomme gives you the flexibility to design your own parsers while ensuring optimal compile-time type safety. Whether you're a seasoned developer or just starting out, Gomme is designed to make the process of building parsers efficient, enjoyable, and less intimidating.
17 |
18 | ## Table of content
19 |
20 |
21 | - [Table of content](#table-of-content)
22 | - [Getting started](#getting-started)
23 | - [Why Gomme?](#why-gomme)
24 | - [Examples](#examples)
25 | - [Documentation](#documentation)
26 | - [Table of content](#table-of-content-1)
27 | - [Documentation](#documentation-1)
28 | - [Installation](#installation)
29 | - [Guide](#guide)
30 | - [List of combinators](#list-of-combinators)
31 | - [Base combinators](#base-combinators)
32 | - [Bytes combinators](#bytes-combinators)
33 | - [Character combinators](#character-combinators)
34 | - [Combinators for Sequences](#combinators-for-sequences)
35 | - [Combinators for Applying Parsers Many Times](#combinators-for-applying-parsers-many-times)
36 | - [Combinators for Choices](#combinators-for-choices)
37 | - [Installation](#installation-1)
38 | - [Frequently asked questions](#frequently-asked-questions)
39 | - [Q: What are parser combinators?](#q-what-are-parser-combinators)
40 | - [Q: Why would I use parser combinators instead of a specific parser?](#q-why-would-i-use-parser-combinators-instead-of-a-specific-parser)
41 | - [Q: Where can I learn more about parser combinators?](#q-where-can-i-learn-more-about-parser-combinators)
42 | - [Acknowledgements](#acknowledgements)
43 | - [Authors](#authors)
44 |
45 |
46 | ## Getting started
47 |
48 | Here's how to quickly parse [hexadecimal color codes](https://developer.mozilla.org/en-US/docs/Web/CSS/color) using Gomme:
49 |
50 | ```golang
51 | // RGBColor stores the three bytes describing a color in the RGB space.
52 | type RGBColor struct {
53 | red uint8
54 | green uint8
55 | blue uint8
56 | }
57 |
58 | // ParseRGBColor creates a new RGBColor from a hexadecimal color string.
59 | // The string must be a six-digit hexadecimal number, prefixed with a "#".
60 | func ParseRGBColor(input string) (RGBColor, error) {
61 | parser := gomme.Preceded(
62 | gomme.Token[string]("#"),
63 | gomme.Map(
64 | gomme.Count(HexColorComponent(), 3),
65 | func(components []uint8) (RGBColor, error) {
66 | return RGBColor{components[0], components[1], components[2]}, nil
67 | },
68 | ),
69 | )
70 |
71 | result := parser(input)
72 | if result.Err != nil {
73 | return RGBColor{}, result.Err
74 | }
75 |
76 | return result.Output, nil
77 | }
78 |
79 | // HexColorComponent produces a parser that parses a single hex color component,
80 | // which is a two-digit hexadecimal number.
81 | func HexColorComponent() gomme.Parser[string, uint8] {
82 | return func(input string) gomme.Result[uint8, string] {
83 | return gomme.Map(
84 | gomme.TakeWhileMN[string](2, 2, gomme.IsHexDigit),
85 | fromHex,
86 | )(input)
87 | }
88 | }
89 |
90 | // fromHex converts two digits hexadecimal numbers to their decimal value.
91 | func fromHex(input string) (uint8, error) {
92 | res, err := strconv.ParseInt(input, 16, 16)
93 | if err != nil {
94 | return 0, err
95 | }
96 |
97 | return uint8(res), nil
98 | }
99 |
100 | ```
101 |
102 | It's as simple as that! Feel free to explore more in the [examples](examples/) directory.
103 |
104 | ## Why Gomme?
105 |
106 | While it's true that learning parser combinators might initially seem daunting, their power, flexibility, and efficiency make them an invaluable tool for parsing textual and binary formats. We've created Gomme with a focus on making this learning curve as smooth as possible, providing clear documentation and a wide array of examples.
107 |
108 | Once you get the hang of it, you'll find that Gomme's parser combinators are intuitive, adaptable, and perfect for quickly building parsers for various formats. They're easy to test and maintain, and they can help you create parsers that are as fast as their hand-written counterparts.
109 |
110 | ## Examples
111 |
112 | See Gomme in action with these handy examples:
113 | - [Parsing hexadecimal color codes](./examples/hexcolor)
114 | - [Parsing a simple CSV file](./examples/csv)
115 | - [Parsing Redis' RESP protocol](./examples/redis)
116 | - [Parsing JSON](./examples/json)
117 |
118 | ## Documentation
119 |
120 | For more detailled information, refer to the official [documentation](https://pkg.go.dev/github.com/oleiade/gomme).
121 | ## Table of content
122 |
123 | ## Documentation
124 |
125 | [Documentation](https://pkg.go.dev/github.com/oleiade/gomme)
126 |
127 | ## Installation
128 |
129 | ```bash
130 | go get github.com/oleiade/gomme
131 | ```
132 |
133 | ## Guide
134 |
135 | In this guide, we provide a detailed overview of the various combinators available in Gomme. Combinators are fundamental building blocks in parser construction, each designed for a specific task. By combining them, you can create complex parsers suited to your specific needs. For each combinator, we've provided a brief description and a usage example. Let's explore!
136 |
137 | ### List of combinators
138 |
139 | #### Base combinators
140 |
141 | | Combinator | Description | Example |
142 | | :------------------------------------------------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :----------------------------------------------------------------- |
143 | | [`Map`](https://pkg.go.dev/github.com/oleiade/gomme#Map) | Applies a function to the result of the provided parser, allowing you to transform the parser's result. | `Map(Digit1(), func(s string)int { return 123 })` |
144 | | [`Optional`](https://pkg.go.dev/github.com/oleiade/gomme#Optional) | Makes a parser optional. If unsuccessful, the parser returns a nil `Result.Output`.Output`. | `Optional(CRLF())` |
145 | | [`Peek`](https://pkg.go.dev/github.com/oleiade/gomme#Peek) | Applies the provided parser without consuming the input. | |
146 | | [`Recognize`](https://pkg.go.dev/github.com/oleiade/gomme#Recognize) | Returns the consumed input as the produced value when the provided parser is successful. | `Recognize(SeparatedPair(Token("key"), Char(':'), Token("value"))` |
147 | | [`Assign`](https://pkg.go.dev/github.com/oleiade/gomme#Assign) | Returns the assigned value when the provided parser is successful. | `Assign(true, Token("true"))` |
148 |
149 | #### Bytes combinators
150 |
151 | | Combinator | Description | Example |
152 | | :----------------------------------------------------------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------------------------------ |
153 | | [`Take`](https://pkg.go.dev/github.com/oleiade/gomme#Take) | Parses the first N elements of the input. | `Take(5)` |
154 | | [`TakeUntil`](https://pkg.go.dev/github.com/oleiade/gomme#TakeUntil) | Parses the input until the provided parser argument succeeds. | `TakeUntil(CRLF()))` |
155 | | [`TakeWhileMN`](https://pkg.go.dev/github.com/oleiade/gomme#TakeWhileMN) | Parses the longest input slice fitting the length expectation (m <= input length <= n) and matching the predicate. The parser argument is a function taking a `rune` as input and returning a `bool`. | `TakeWhileMN(2, 6, gomme.isHexDigit)` |
156 | | [`Token`](https://pkg.go.dev/github.com/oleiade/gomme#Token) | Recognizes a specific pattern. Compares the input with the token's argument and returns the matching part. | `Token("tolkien")` |
157 |
158 | #### Character combinators
159 |
160 | | Combinator | Description | Example |
161 | | :--- | :--- | :--- |
162 | | [`Char`](https://pkg.go.dev/github.com/oleiade/gomme#Char) | Parses a single instance of a provided character. | `Char('$')` |
163 | | [`AnyChar`](https://pkg.go.dev/github.com/oleiade/gomme#AnyChar) | Parses a single instance of any character. | `AnyChar()` |
164 | | [`Alpha0`](https://pkg.go.dev/github.com/oleiade/gomme#Alpha0) | Parses zero or more alphabetical ASCII characters (case insensitive). | `Alpha0()` |
165 | | [`Alpha1`](https://pkg.go.dev/github.com/oleiade/gomme#Alpha1) | Parses one or more alphabetical ASCII characters (case insensitive). | `Alpha1()` |
166 | | [`Alphanumeric0`](https://pkg.go.dev/github.com/oleiade/gomme#Alphanumeric0) | Parses zero or more alphabetical and numerical ASCII characters (case insensitive). | `Alphanumeric0()` |
167 | | [`Alphanumeric1`](https://pkg.go.dev/github.com/oleiade/gomme#Alphanumeric1) | Parses one or more alphabetical and numerical ASCII characters (case insensitive). | `Alphanumeric1()` |
168 | | [`Digit0`](https://pkg.go.dev/github.com/oleiade/gomme#Digit0) | Parses zero or more numerical ASCII characters: 0-9. | `Digit0()` |
169 | | [`Digit1`](https://pkg.go.dev/github.com/oleiade/gomme#Digit1) | Parses one or more numerical ASCII characters: 0-9. | `Digit1()` |
170 | | [`HexDigit0`](https://pkg.go.dev/github.com/oleiade/gomme#HexDigit0) | Parses zero or more hexadecimal ASCII characters (case insensitive). | `HexDigit0()` |
171 | | [`HexDigit1`](https://pkg.go.dev/github.com/oleiade/gomme#HexDigit1) | Parses one or more hexadecimal ASCII characters (case insensitive). | `HexDigit1()` |
172 | | [`Whitespace0`](https://pkg.go.dev/github.com/oleiade/gomme#Whitespace0) | Parses zero or more whitespace ASCII characters: space, tab, carriage return, line feed. | `Whitespace0()` |
173 | | [`Whitespace1`](https://pkg.go.dev/github.com/oleiade/gomme#Whitespace1) | Parses one or more whitespace ASCII characters: space, tab, carriage return, line feed. | `Whitespace1()` |
174 | | [`LF`](https://pkg.go.dev/github.com/oleiade/gomme#LF) | Parses a single new line character '\n'. | `LF()` |
175 | | [`CRLF`](https://pkg.go.dev/github.com/oleiade/gomme#CRLF) | Parses a '\r\n' string. | `CRLF()` |
176 | | [`OneOf`](https://pkg.go.dev/github.com/oleiade/gomme#OneOf) | Parses one of the provided characters. Equivalent to using `Alternative` over a series of `Char` parsers. | `OneOf('a', 'b' , 'c')` |
177 | | [`Satisfy`](https://pkg.go.dev/github.com/oleiade/gomme#Satisfy) | Parses a single character, asserting that it matches the provided predicate. The predicate function takes a `rune` as input and returns a `bool`. `Satisfy` is useful for building custom character matchers. | `Satisfy(func(c rune)bool { return c == '{' || c == '[' })` |
178 | | [`Space`](https://pkg.go.dev/github.com/oleiade/gomme#Space) | Parses a single space character ' '. | `Space()` |
179 | | [`Tab`](https://pkg.go.dev/github.com/oleiade/gomme#Tab) | Parses a single tab character '\t'. | `Tab()` |
180 | | [`Int64`](https://pkg.go.dev/github.com/oleiade/gomme#Int64) | Parses an `int64` from its textual representation. | `Int64()` |
181 | | [`Int8`](https://pkg.go.dev/github.com/oleiade/gomme#Int8) | Parses an `int8` from its textual representation. | `Int8()` |
182 | | [`UInt8`](https://pkg.go.dev/github.com/oleiade/gomme#UInt8) | Parses a `uint8` from its textual representation. | `UInt8()` |
183 |
184 | #### Combinators for Sequences
185 |
186 | | Combinator | Description | Example |
187 | | :--- | :--- | :--- |
188 | | [`Preceded`](https://pkg.go.dev/github.com/oleiade/gomme#Preceded) | Applies the prefix parser and discards its result. It then applies the main parser and returns its result. It discards the prefix value. It proves useful when looking for data prefixed with a pattern. For instance, when parsing a value, prefixed with its name. | `Preceded(Token("name:"), Alpha1())` |
189 | | [`Terminated`](https://pkg.go.dev/github.com/oleiade/gomme#Terminated) | Applies the main parser, followed by the suffix parser whom it discards the result of, and returns the result of the main parser. Note that if the suffix parser fails, the whole operation fails, regardless of the result of the main parser. It proves useful when looking for suffixed data while not interested in retaining the suffix value itself. For instance, when parsing a value followed by a control character. | `Terminated(Digit1(), LF())` |
190 | | [`Delimited`](https://pkg.go.dev/github.com/oleiade/gomme#Delimited) | Applies the prefix parser, the main parser, followed by the suffix parser, discards the result of both the prefix and suffix parsers, and returns the result of the main parser. Note that if any of the prefix or suffix parsers fail, the whole operation fails, regardless of the result of the main parser. It proves useful when looking for data surrounded by patterns helping them identify it without retaining its value. For instance, when parsing a value, prefixed by its name and followed by a control character. | `Delimited(Tag("name:"), Digit1(), LF())` |
191 | | [`Pair`](https://pkg.go.dev/github.com/oleiade/gomme#Pair) | Applies two parsers in a row and returns a pair container holding both their result values. | `Pair(Alpha1(), Tag("cm"))` |
192 | | [`SeparatedPair`](https://pkg.go.dev/github.com/oleiade/gomme#SeparatedPair) | Applies a left parser, a separator parser, and a right parser discards the result of the separator parser, and returns the result of the left and right parsers as a pair container holding the result values. | `SeparatedPair(Alpha1(), Tag(":"), Alpha1())` |
193 | | [`Sequence`](https://pkg.go.dev/github.com/oleiade/gomme#Sequence) | Applies a sequence of parsers sharing the same signature. If any of the provided parsers fail, the whole operation fails. | `Sequence(SeparatedPair(Tag("name"), Char(':'), Alpha1()), SeparatedPair(Tag("height"), Char(':'), Digit1()))` |
194 |
195 | #### Combinators for Applying Parsers Many Times
196 |
197 | | Combinator | Description | Example |
198 | | :--- | :--- | :--- |
199 | | [`Count`](https://pkg.go.dev/github.com/oleiade/gomme#Count) | Applies the provided parser `count` times. If the parser fails before it can be applied `count` times, the operation fails. It proves useful whenever one needs to parse the same pattern many times in a row. | `Count(3, OneOf('a', 'b', 'c'))` |
200 | | [`Many0`](https://pkg.go.dev/github.com/oleiade/gomme#Many0) | Keeps applying the provided parser until it fails and returns a slice of all the results. Specifically, if the parser fails to match, `Many0` still succeeds, returning an empty slice of results. It proves useful when trying to consume a repeated pattern, regardless of whether there's any match, like when trying to parse any number of whitespaces in a row. | `Many0(Char(' '))` |
201 | | [`Many1`](https://pkg.go.dev/github.com/oleiade/gomme#Many1) | Keeps applying the provided parser until it fails and returns a slice of all the results. If the parser fails to match at least once, `Many1` fails. It proves useful when trying to consume a repeated pattern, like any number of whitespaces in a row, ensuring that it appears at least once. | `Many1(LF())` |
202 | | [`SeparatedList0`](https://pkg.go.dev/github.com/oleiade/gomme#SeparatedList0) | | |
203 | | [`SeparatedList1`](https://pkg.go.dev/github.com/oleiade/gomme#SeparatedList1) | | |
204 |
205 | #### Combinators for Choices
206 |
207 | | Combinator | Description | Example |
208 | | :--- | :--- | :--- |
209 | | [`Alternative`](https://pkg.go.dev/github.com/oleiade/gomme#Alternative) | Tests a list of parsers, one by one, until one succeeds. Note that all parsers must share the same signature (`Parser[I, O]`). | `Alternative(Token("abc"), Token("123"))` |
210 |
211 |
212 | ## Installation
213 |
214 | Add the library to your Go project with the following command:
215 |
216 | ```bash
217 | go get github.com/oleiade/gomme@latest
218 | ```
219 |
220 | ## Frequently asked questions
221 |
222 | ### Q: What are parser combinators?
223 |
224 | **A**: Parser combinators offer a new way of building parsers. Instead of writing a complex parser that analyzes an entire format, you create small, simple parsers that handle the smallest units of the format. These small parsers can then be combined to build more complex parsers. It's a bit like using building blocks to construct whatever structure you want.
225 |
226 | ### Q: Why would I use parser combinators instead of a specific parser?
227 |
228 | **A**: Parser combinators are incredibly flexible and intuitive. Once you're familiar with them, they enable you to quickly create, maintain, and modify parsers. They offer you a high degree of freedom in designing your parser and how it's used.
229 |
230 | ### Q: Where can I learn more about parser combinators?
231 |
232 | A: Here are some resources we recommend:
233 | - [You could have invented parser combinators](https://theorangeduck.com/page/you-could-have-invented-parser-combinators)
234 | - [Functional Parsing](https://www.youtube.com/watch?v=dDtZLm7HIJs)
235 | - [Building a Mapping Language in Go with Parser Combinators](https://www.youtube.com/watch?v=JiViND-bpmw)
236 |
237 | ## Acknowledgements
238 |
239 | We can frankly take close to zero credit for this library, apart from work put into assembling the already existing elements of theory and implementation into a single autonomous project.
240 |
241 | We've stood on the shoulders of giants to create Gomme. The library draws heavily on the extensive theoretical work done in the parser combinators space, and we owe a huge debt to Rust's [nom](https://github.com/Geal/nom) and [benthos'](https://github.com/benthosdev/benthos) blob lang implementation. Our goal was to consolidate these diverse elements into a single, easy-to-use Go library.
242 | ## Authors
243 |
244 | - [@oleiade](https://github.com/oleiade)
245 |
--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
1 | # TODOS
2 |
3 | ## Dos
4 |
5 | - [ ] Create `Bytes` interface type for bytes file's content
6 | - [ ] Create `String` or `Characters` interface type for characters file's content
7 | - [ ] Sort Out Fatal/Non-Fatal errors (distinguish whether a parser failed in an expected manner, or if the whole parsing should be interrupted)
8 | - [ ] Reduce Int8/Int64 allocations (their parsers could be somewhat simplified?)
9 | - [ ] Add combinator to parse whitespace (+ helper for multispace0/1?)
10 | - [ ] Refactor TakeWhileOneOf to be "just" TakeWhile
11 | - [ ] Refactor space to be of the form space0 and space1
12 | - [ ] Rename `LF` to `Newline`
13 | - [X] Document Recognize as explicitly as possible
14 | - [X] Add Examples
15 | - [x] Add Benchmarks
16 | - [x] Make sure the Failure messages are properly cased
17 | - [x] Rename `p` parser arguments to `parse` for clearer code
18 | - [x] Add `Many0` and `Many1` parsers
19 |
20 | ## Maybes
21 |
22 | - [ ] Rename project to `crayon`?
23 | - [ ] Rename `Preceded` to `Prefixed`
24 | - [ ] Rename `Terminated` to `Suffixed`
25 | - [ ] Rename `Sequence` to `List`?
26 | - [ ] Rename `Satisfy` to `Satisfies`?
27 | - [X] Introduce `SeparatedList` as a result of previous?
28 | - [X] Create `bytes.go` file to distinguish from characters
29 |
30 | ## Track
31 |
32 | - [ ] Chase allocations, document them, and reduce their amount as much as possible
33 |
34 | ## NoNos
35 | - [X] Add an `ErrInfiniteLoop` (`Many0`)
--------------------------------------------------------------------------------
/branch.go:
--------------------------------------------------------------------------------
1 | package gomme
2 |
3 | // Alternative tests a list of parsers in order, one by one, until one
4 | // succeeds.
5 | //
6 | // If none of the parsers succeed, this combinator produces an error Result.
7 | func Alternative[Input Bytes, Output any](parsers ...Parser[Input, Output]) Parser[Input, Output] {
8 | return func(input Input) Result[Output, Input] {
9 | for _, parse := range parsers {
10 | result := parse(input)
11 | if result.Err == nil {
12 | return result
13 | }
14 | }
15 |
16 | return Failure[Input, Output](NewError(input, "Alternative"), input)
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/branch_test.go:
--------------------------------------------------------------------------------
1 | package gomme
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/stretchr/testify/assert"
7 | )
8 |
9 | func TestAlternative(t *testing.T) {
10 | t.Parallel()
11 |
12 | type args struct {
13 | p Parser[string, string]
14 | }
15 | testCases := []struct {
16 | name string
17 | args args
18 | input string
19 | wantErr bool
20 | wantOutput string
21 | wantRemaining string
22 | }{
23 | {
24 | name: "head matching parser should succeed",
25 | input: "123",
26 | args: args{
27 | p: Alternative(Digit1[string](), Alpha0[string]()),
28 | },
29 | wantErr: false,
30 | wantOutput: "123",
31 | wantRemaining: "",
32 | },
33 | {
34 | name: "matching parser should succeed",
35 | input: "1",
36 | args: args{
37 | p: Alternative(Digit1[string](), Alpha0[string]()),
38 | },
39 | wantErr: false,
40 | wantOutput: "1",
41 | wantRemaining: "",
42 | },
43 | {
44 | name: "no matching parser should fail",
45 | input: "$%^*",
46 | args: args{
47 | p: Alternative(Digit1[string](), Alpha1[string]()),
48 | },
49 | wantErr: true,
50 | wantOutput: "",
51 | wantRemaining: "$%^*",
52 | },
53 | {
54 | name: "empty input should fail",
55 | input: "",
56 | args: args{
57 | p: Alternative(Digit1[string](), Alpha1[string]()),
58 | },
59 | wantErr: true,
60 | wantOutput: "",
61 | wantRemaining: "",
62 | },
63 | }
64 | for _, tc := range testCases {
65 | tc := tc
66 |
67 | t.Run(tc.name, func(t *testing.T) {
68 | t.Parallel()
69 |
70 | gotResult := tc.args.p(tc.input)
71 | if (gotResult.Err != nil) != tc.wantErr {
72 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr)
73 | }
74 |
75 | // testify makes it easier comparing slices
76 | assert.Equal(t,
77 | tc.wantOutput, gotResult.Output,
78 | "got output %v, want output %v", gotResult.Output, tc.wantOutput,
79 | )
80 |
81 | if gotResult.Remaining != tc.wantRemaining {
82 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining)
83 | }
84 | })
85 | }
86 | }
87 |
88 | func BenchmarkAlternative(b *testing.B) {
89 | p := Alternative(Digit1[string](), Alpha1[string]())
90 |
91 | for i := 0; i < b.N; i++ {
92 | p("123")
93 | }
94 | }
95 |
--------------------------------------------------------------------------------
/bytes.go:
--------------------------------------------------------------------------------
1 | package gomme
2 |
3 | import (
4 | "fmt"
5 | "strings"
6 | )
7 |
8 | // Take returns a subset of the input of size `count`.
9 | func Take[Input Bytes](count uint) Parser[Input, Input] {
10 | return func(input Input) Result[Input, Input] {
11 | if len(input) == 0 && count > 0 {
12 | return Failure[Input, Input](NewError(input, "TakeUntil"), input)
13 | }
14 |
15 | if uint(len(input)) < count {
16 | return Failure[Input, Input](NewError(input, "Take"), input)
17 | }
18 |
19 | return Success(input[:count], input[count:])
20 | }
21 | }
22 |
23 | // TakeUntil parses any number of characters until the provided parser is successful.
24 | // If the provided parser is not successful, the parser fails, and the entire input is
25 | // returned as the Result's Remaining.
26 | func TakeUntil[Input Bytes, Output any](parse Parser[Input, Output]) Parser[Input, Input] {
27 | return func(input Input) Result[Input, Input] {
28 | if len(input) == 0 {
29 | return Failure[Input, Input](NewError(input, "TakeUntil"), input)
30 | }
31 |
32 | pos := 0
33 | for ; pos < len(input); pos++ {
34 | current := input[pos:]
35 | res := parse(current)
36 | if res.Err == nil {
37 | return Success(input[:pos], input[pos:])
38 | }
39 |
40 | continue
41 | }
42 |
43 | return Failure[Input, Input](NewError(input, "TakeUntil"), input)
44 | }
45 | }
46 |
47 | // TakeWhileMN returns the longest input subset that matches the predicates, within
48 | // the boundaries of `atLeast` <= len(input) <= `atMost`.
49 | //
50 | // If the provided parser is not successful or the pattern is out of the
51 | // `atLeast` <= len(input) <= `atMost` range, the parser fails, and the entire
52 | // input is returned as the Result's Remaining.
53 | func TakeWhileMN[Input Bytes](atLeast, atMost uint, predicate func(rune) bool) Parser[Input, Input] {
54 | return func(input Input) Result[Input, Input] {
55 | if len(input) == 0 {
56 | return Failure[Input, Input](NewError(input, "TakeWhileMN"), input)
57 | }
58 |
59 | // Input is shorter than the minimum expected matching length,
60 | // it is thus not possible to match it within the established
61 | // constraints.
62 | if uint(len(input)) < atLeast {
63 | return Failure[Input, Input](NewError(input, "TakeWhileMN"), input)
64 | }
65 |
66 | lastValidPos := 0
67 | for idx := 0; idx < len(input); idx++ {
68 | if uint(idx) == atMost {
69 | break
70 | }
71 |
72 | matched := predicate(rune(input[idx]))
73 | if !matched {
74 | if uint(idx) < atLeast {
75 | return Failure[Input, Input](NewError(input, "TakeWhileMN"), input)
76 | }
77 |
78 | return Success(input[:idx], input[idx:])
79 | }
80 |
81 | lastValidPos++
82 | }
83 |
84 | return Success(input[:lastValidPos], input[lastValidPos:])
85 | }
86 | }
87 |
88 | // Token parses a token from the input, and returns the part of the input that
89 | // matched the token.
90 | // If the token could not be found, the parser returns an error result.
91 | func Token[Input Bytes](token string) Parser[Input, Input] {
92 | return func(input Input) Result[Input, Input] {
93 | if !strings.HasPrefix(string(input), token) {
94 | return Failure[Input, Input](NewError(input, fmt.Sprintf("Token(%s)", token)), input)
95 | }
96 |
97 | return Success(input[:len(token)], input[len(token):])
98 | }
99 | }
100 |
--------------------------------------------------------------------------------
/bytes_test.go:
--------------------------------------------------------------------------------
1 | package gomme
2 |
3 | import (
4 | "fmt"
5 | "testing"
6 | )
7 |
8 | func TestTake(t *testing.T) {
9 | t.Parallel()
10 |
11 | type args struct {
12 | p Parser[string, string]
13 | }
14 | testCases := []struct {
15 | name string
16 | args args
17 | input string
18 | wantErr bool
19 | wantOutput string
20 | wantRemaining string
21 | }{
22 | {
23 | name: "taking less than input size should succeed",
24 | input: "1234567",
25 | args: args{
26 | p: Take[string](6),
27 | },
28 | wantErr: false,
29 | wantOutput: "123456",
30 | wantRemaining: "7",
31 | },
32 | {
33 | name: "taking exact input size should succeed",
34 | input: "123456",
35 | args: args{
36 | p: Take[string](6),
37 | },
38 | wantErr: false,
39 | wantOutput: "123456",
40 | wantRemaining: "",
41 | },
42 | {
43 | name: "taking more than input size should fail",
44 | input: "123",
45 | args: args{
46 | p: Take[string](6),
47 | },
48 | wantErr: true,
49 | wantOutput: "",
50 | wantRemaining: "123",
51 | },
52 | {
53 | name: "taking from empty input should fail",
54 | input: "",
55 | args: args{
56 | p: Take[string](6),
57 | },
58 | wantErr: true,
59 | wantOutput: "",
60 | wantRemaining: "",
61 | },
62 | }
63 | for _, tc := range testCases {
64 | tc := tc
65 |
66 | t.Run(tc.name, func(t *testing.T) {
67 | t.Parallel()
68 |
69 | gotResult := tc.args.p(tc.input)
70 | if (gotResult.Err != nil) != tc.wantErr {
71 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr)
72 | }
73 |
74 | if gotResult.Output != tc.wantOutput {
75 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput)
76 | }
77 |
78 | if gotResult.Remaining != tc.wantRemaining {
79 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining)
80 | }
81 | })
82 | }
83 | }
84 |
85 | func BenchmarkTake(b *testing.B) {
86 | p := Take[string](6)
87 |
88 | b.ResetTimer()
89 | for i := 0; i < b.N; i++ {
90 | p("123456")
91 | }
92 | }
93 |
94 | func TestTakeUntil(t *testing.T) {
95 | t.Parallel()
96 |
97 | type args struct {
98 | p Parser[string, string]
99 | }
100 | testCases := []struct {
101 | name string
102 | args args
103 | input string
104 | wantErr bool
105 | wantOutput string
106 | wantRemaining string
107 | }{
108 | {
109 | name: "matching parser should succeed",
110 | input: "abc123",
111 | args: args{
112 | p: TakeUntil(Digit1[string]()),
113 | },
114 | wantErr: false,
115 | wantOutput: "abc",
116 | wantRemaining: "123",
117 | },
118 | {
119 | name: "immediately matching parser should succeed",
120 | input: "123",
121 | args: args{
122 | p: TakeUntil(Digit1[string]()),
123 | },
124 | wantErr: false,
125 | wantOutput: "",
126 | wantRemaining: "123",
127 | },
128 | {
129 | name: "no match should fail",
130 | input: "abcdef",
131 | args: args{
132 | p: TakeUntil(Digit1[string]()),
133 | },
134 | wantErr: true,
135 | wantOutput: "",
136 | wantRemaining: "abcdef",
137 | },
138 | {
139 | name: "empty input should fail",
140 | input: "",
141 | args: args{
142 | p: TakeUntil(Digit1[string]()),
143 | },
144 | wantErr: true,
145 | wantOutput: "",
146 | wantRemaining: "",
147 | },
148 | }
149 | for _, tc := range testCases {
150 | tc := tc
151 |
152 | t.Run(tc.name, func(t *testing.T) {
153 | t.Parallel()
154 |
155 | gotResult := tc.args.p(tc.input)
156 | if (gotResult.Err != nil) != tc.wantErr {
157 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr)
158 | }
159 |
160 | if gotResult.Output != tc.wantOutput {
161 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput)
162 | }
163 |
164 | if gotResult.Remaining != tc.wantRemaining {
165 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining)
166 | }
167 | })
168 | }
169 | }
170 |
171 | func BenchmarkTakeUntil(b *testing.B) {
172 | p := TakeUntil(Digit1[string]())
173 |
174 | b.ResetTimer()
175 | for i := 0; i < b.N; i++ {
176 | p("abc123")
177 | }
178 | }
179 |
180 | func TestTakeWhileMN(t *testing.T) {
181 | t.Parallel()
182 |
183 | type args struct {
184 | p Parser[string, string]
185 | }
186 | testCases := []struct {
187 | name string
188 | args args
189 | input string
190 | wantErr bool
191 | wantOutput string
192 | wantRemaining string
193 | }{
194 | {
195 | name: "parsing input with enough characters and partially matching predicated should succeed",
196 | input: "latin123",
197 | args: args{
198 | p: TakeWhileMN[string](3, 6, IsAlpha),
199 | },
200 | wantErr: false,
201 | wantOutput: "latin",
202 | wantRemaining: "123",
203 | },
204 | {
205 | name: "parsing input longer than atLeast and atMost should succeed",
206 | input: "lengthy",
207 | args: args{
208 | p: TakeWhileMN[string](3, 6, IsAlpha),
209 | },
210 | wantErr: false,
211 | wantOutput: "length",
212 | wantRemaining: "y",
213 | },
214 | {
215 | name: "parsing input longer than atLeast and shorter than atMost should succeed",
216 | input: "latin",
217 | args: args{
218 | p: TakeWhileMN[string](3, 6, IsAlpha),
219 | },
220 | wantErr: false,
221 | wantOutput: "latin",
222 | wantRemaining: "",
223 | },
224 | {
225 | name: "parsing empty input should fail",
226 | input: "",
227 | args: args{
228 | p: TakeWhileMN[string](3, 6, IsAlpha),
229 | },
230 | wantErr: true,
231 | wantOutput: "",
232 | wantRemaining: "",
233 | },
234 | {
235 | name: "parsing too short input should fail",
236 | input: "ed",
237 | args: args{
238 | p: TakeWhileMN[string](3, 6, IsAlpha),
239 | },
240 | wantErr: true,
241 | wantOutput: "",
242 | wantRemaining: "ed",
243 | },
244 | {
245 | name: "parsing with non-matching predicate should fail",
246 | input: "12345",
247 | args: args{
248 | p: TakeWhileMN[string](3, 6, IsAlpha),
249 | },
250 | wantErr: true,
251 | wantOutput: "",
252 | wantRemaining: "12345",
253 | },
254 | }
255 | for _, tc := range testCases {
256 | tc := tc
257 |
258 | t.Run(tc.name, func(t *testing.T) {
259 | t.Parallel()
260 |
261 | gotResult := tc.args.p(tc.input)
262 | if (gotResult.Err != nil) != tc.wantErr {
263 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr)
264 | }
265 |
266 | if gotResult.Output != tc.wantOutput {
267 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput)
268 | }
269 |
270 | if gotResult.Remaining != tc.wantRemaining {
271 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining)
272 | }
273 | })
274 | }
275 | }
276 |
277 | func BenchmarkTakeWhileMN(b *testing.B) {
278 | p := TakeWhileMN[string](3, 6, IsAlpha)
279 |
280 | b.ResetTimer()
281 | for i := 0; i < b.N; i++ {
282 | p("latin")
283 | }
284 | }
285 |
286 | // TakeWhileOneOf parses any number of characters present in the
287 | // provided collection of runes.
288 | func TakeWhileOneOf[I Bytes](collection ...rune) Parser[I, I] {
289 | index := make(map[rune]struct{}, len(collection))
290 |
291 | for _, r := range collection {
292 | index[r] = struct{}{}
293 | }
294 |
295 | expected := fmt.Sprintf("chars(%v)", string(collection))
296 |
297 | return func(input I) Result[I, I] {
298 | if len(input) == 0 {
299 | return Failure[I, I](NewError(input, expected), input)
300 | }
301 |
302 | pos := 0
303 | for ; pos < len(input); pos++ {
304 | _, exists := index[rune(input[pos])]
305 | if !exists {
306 | if pos == 0 {
307 | return Failure[I, I](NewError(input, expected), input)
308 | }
309 |
310 | break
311 | }
312 | }
313 |
314 | return Success(input[:pos], input[pos:])
315 | }
316 | }
317 |
318 | func TestTakeWhileOneOf(t *testing.T) {
319 | t.Parallel()
320 |
321 | type args struct {
322 | p Parser[string, string]
323 | }
324 | testCases := []struct {
325 | name string
326 | args args
327 | input string
328 | wantErr bool
329 | wantOutput string
330 | wantRemaining string
331 | }{
332 | {
333 | name: "matching parser should succeed",
334 | input: "abc123",
335 | args: args{
336 | p: TakeWhileOneOf[string]('a', 'b', 'c'),
337 | },
338 | wantErr: false,
339 | wantOutput: "abc",
340 | wantRemaining: "123",
341 | },
342 | {
343 | name: "no match should fail",
344 | input: "123",
345 | args: args{
346 | p: TakeWhileOneOf[string]('a', 'b', 'c'),
347 | },
348 | wantErr: true,
349 | wantOutput: "",
350 | wantRemaining: "123",
351 | },
352 | {
353 | name: "empty input should fail",
354 | input: "",
355 | args: args{
356 | p: TakeWhileOneOf[string]('a', 'b', 'c'),
357 | },
358 | wantErr: true,
359 | wantOutput: "",
360 | wantRemaining: "",
361 | },
362 | }
363 | for _, tc := range testCases {
364 | tc := tc
365 |
366 | t.Run(tc.name, func(t *testing.T) {
367 | t.Parallel()
368 |
369 | gotResult := tc.args.p(tc.input)
370 | if (gotResult.Err != nil) != tc.wantErr {
371 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr)
372 | }
373 |
374 | if gotResult.Output != tc.wantOutput {
375 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput)
376 | }
377 |
378 | if gotResult.Remaining != tc.wantRemaining {
379 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining)
380 | }
381 | })
382 | }
383 | }
384 |
385 | func BenchmarkTakeWhileOneOf(b *testing.B) {
386 | p := TakeWhileOneOf[string]('a', 'b', 'c')
387 |
388 | b.ResetTimer()
389 | for i := 0; i < b.N; i++ {
390 | p("abc123")
391 | }
392 | }
393 |
394 | func TestToken(t *testing.T) {
395 | t.Parallel()
396 |
397 | testCases := []struct {
398 | name string
399 | parser Parser[string, string]
400 | input string
401 | wantErr bool
402 | wantOutput string
403 | wantRemaining string
404 | }{
405 | {
406 | name: "parsing a token from an input starting with it should succeed",
407 | parser: Token[string]("Bonjour"),
408 | input: "Bonjour tout le monde",
409 | wantErr: false,
410 | wantOutput: "Bonjour",
411 | wantRemaining: " tout le monde",
412 | },
413 | {
414 | name: "parsing a token from an non-matching input should fail",
415 | parser: Token[string]("Bonjour"),
416 | input: "Hello tout le monde",
417 | wantErr: true,
418 | wantOutput: "",
419 | wantRemaining: "Hello tout le monde",
420 | },
421 | {
422 | name: "parsing a token from an empty input should fail",
423 | parser: Token[string]("Bonjour"),
424 | input: "",
425 | wantErr: true,
426 | wantOutput: "",
427 | wantRemaining: "",
428 | },
429 | }
430 |
431 | for _, tc := range testCases {
432 | tc := tc
433 |
434 | t.Run(tc.name, func(t *testing.T) {
435 | t.Parallel()
436 |
437 | gotResult := tc.parser(tc.input)
438 | if (gotResult.Err != nil) != tc.wantErr {
439 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr)
440 | }
441 |
442 | if gotResult.Output != tc.wantOutput {
443 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput)
444 | }
445 |
446 | if gotResult.Remaining != tc.wantRemaining {
447 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining)
448 | }
449 | })
450 | }
451 | }
452 |
453 | func BenchmarkToken(b *testing.B) {
454 | parser := Token[string]("Bonjour")
455 |
456 | for i := 0; i < b.N; i++ {
457 | parser("Bonjour tout le monde")
458 | }
459 | }
460 |
--------------------------------------------------------------------------------
/characters.go:
--------------------------------------------------------------------------------
1 | package gomme
2 |
3 | import (
4 | "strconv"
5 | )
6 |
7 | // Char parses a single character and matches it with
8 | // a provided candidate.
9 | func Char[Input Bytes](character rune) Parser[Input, rune] {
10 | return func(input Input) Result[rune, Input] {
11 | if len(input) == 0 || rune(input[0]) != character {
12 | return Failure[Input, rune](NewError(input, string(character)), input)
13 | }
14 |
15 | return Success(rune(input[0]), input[1:])
16 | }
17 | }
18 |
19 | // AnyChar parses any single character.
20 | func AnyChar[Input Bytes]() Parser[Input, rune] {
21 | return func(input Input) Result[rune, Input] {
22 | if len(input) == 0 {
23 | return Failure[Input, rune](NewError(input, "AnyChar"), input)
24 | }
25 |
26 | return Success(rune(input[0]), input[1:])
27 | }
28 | }
29 |
30 | // Alpha0 parses a zero or more lowercase or uppercase alphabetic characters: a-z, A-Z.
31 | // In the cases where the input is empty, or no terminating character is found, the parser
32 | // returns the input as is.
33 | func Alpha0[Input Bytes]() Parser[Input, Input] {
34 | return func(input Input) Result[Input, Input] {
35 | if len(input) == 0 {
36 | return Success(input, input)
37 | }
38 |
39 | lastAlphaPos := 0
40 | for idx := 0; idx < len(input); idx++ {
41 | if !IsAlpha(rune(input[idx])) {
42 | return Success(input[:idx], input[idx:])
43 | }
44 |
45 | lastAlphaPos++
46 | }
47 |
48 | return Success(input[:lastAlphaPos], input[lastAlphaPos:])
49 | }
50 | }
51 |
52 | // Alpha1 parses one or more lowercase or uppercase alphabetic characters: a-z, A-Z.
53 | // In the cases where the input doesn't hold enough data, or a terminating character
54 | // is found before any matching ones were, the parser returns an error result.
55 | func Alpha1[Input Bytes]() Parser[Input, Input] {
56 | return func(input Input) Result[Input, Input] {
57 | if len(input) == 0 {
58 | return Failure[Input, Input](NewError(input, "Alpha1"), input)
59 | }
60 |
61 | if !IsAlpha(rune(input[0])) {
62 | return Failure[Input, Input](NewError(input, "Alpha1"), input)
63 | }
64 |
65 | lastAlphaPos := 1
66 | for idx := 1; idx < len(input); idx++ {
67 | if !IsAlpha(rune(input[idx])) {
68 | return Success(input[:idx], input[idx:])
69 | }
70 |
71 | lastAlphaPos++
72 | }
73 |
74 | return Success(input[:lastAlphaPos], input[lastAlphaPos:])
75 | }
76 | }
77 |
78 | // Alphanumeric0 parses zero or more ASCII alphabetical or numerical characters: a-z, A-Z, 0-9.
79 | // In the cases where the input is empty, or no terminating character is found, the parser
80 | // returns the input as is.
81 | func Alphanumeric0[Input Bytes]() Parser[Input, Input] {
82 | return func(input Input) Result[Input, Input] {
83 | if len(input) == 0 {
84 | return Success(input, input)
85 | }
86 |
87 | lastDigitPos := 0
88 | for idx := 0; idx < len(input); idx++ {
89 | if !IsAlphanumeric(rune(input[idx])) {
90 | return Success(input[:idx], input[idx:])
91 | }
92 |
93 | lastDigitPos++
94 | }
95 |
96 | return Success(input[:lastDigitPos], input[lastDigitPos:])
97 | }
98 | }
99 |
100 | // Alphanumeric1 parses one or more alphabetical or numerical characters: a-z, A-Z, 0-9.
101 | // In the cases where the input doesn't hold enough data, or a terminating character
102 | // is found before any matching ones were, the parser returns an error result.
103 | func Alphanumeric1[Input Bytes]() Parser[Input, Input] {
104 | return func(input Input) Result[Input, Input] {
105 | if len(input) == 0 {
106 | return Failure[Input, Input](NewError(input, "Digit1"), input)
107 | }
108 |
109 | if !IsAlphanumeric(rune(input[0])) {
110 | return Failure[Input, Input](NewError(input, "Digit1"), input)
111 | }
112 |
113 | lastDigitPos := 1
114 | for idx := 1; idx < len(input); idx++ {
115 | if !IsAlphanumeric(rune(input[idx])) {
116 | return Success(input[:idx], input[idx:])
117 | }
118 |
119 | lastDigitPos++
120 | }
121 |
122 | return Success(input[:lastDigitPos], input[lastDigitPos:])
123 | }
124 | }
125 |
126 | // Digit0 parses zero or more ASCII numerical characters: 0-9.
127 | // In the cases where the input is empty, or no terminating character is found, the parser
128 | // returns the input as is.
129 | func Digit0[Input Bytes]() Parser[Input, Input] {
130 | return func(input Input) Result[Input, Input] {
131 | if len(input) == 0 {
132 | return Success(input, input)
133 | }
134 |
135 | lastDigitPos := 0
136 | for idx := 0; idx < len(input); idx++ {
137 | if !IsDigit(rune(input[idx])) {
138 | return Success(input[:idx], input[idx:])
139 | }
140 |
141 | lastDigitPos++
142 | }
143 |
144 | return Success(input[:lastDigitPos], input[lastDigitPos:])
145 | }
146 | }
147 |
148 | // Digit1 parses one or more numerical characters: 0-9.
149 | // In the cases where the input doesn't hold enough data, or a terminating character
150 | // is found before any matching ones were, the parser returns an error result.
151 | func Digit1[Input Bytes]() Parser[Input, Input] {
152 | return func(input Input) Result[Input, Input] {
153 | if len(input) == 0 {
154 | return Failure[Input, Input](NewError(input, "Digit1"), input)
155 | }
156 |
157 | if !IsDigit(rune(input[0])) {
158 | return Failure[Input, Input](NewError(input, "Digit1"), input)
159 | }
160 |
161 | lastDigitPos := 1
162 | for idx := 1; idx < len(input); idx++ {
163 | if !IsDigit(rune(input[idx])) {
164 | return Success(input[:idx], input[idx:])
165 | }
166 |
167 | lastDigitPos++
168 | }
169 |
170 | return Success(input[:lastDigitPos], input[lastDigitPos:])
171 | }
172 | }
173 |
174 | // HexDigit0 parses zero or more ASCII hexadecimal characters: a-f, A-F, 0-9.
175 | // In the cases where the input is empty, or no terminating character is found, the parser
176 | // returns the input as is.
177 | func HexDigit0[Input Bytes]() Parser[Input, Input] {
178 | return func(input Input) Result[Input, Input] {
179 | if len(input) == 0 {
180 | return Success(input, input)
181 | }
182 |
183 | lastDigitPos := 0
184 | for idx := 0; idx < len(input); idx++ {
185 | if !IsHexDigit(rune(input[idx])) {
186 | return Success(input[:idx], input[idx:])
187 | }
188 |
189 | lastDigitPos++
190 | }
191 |
192 | return Success(input[:lastDigitPos], input[lastDigitPos:])
193 | }
194 | }
195 |
196 | // HexDigit1 parses one or more ASCII hexadecimal characters: a-f, A-F, 0-9.
197 | // In the cases where the input doesn't hold enough data, or a terminating character
198 | // is found before any matching ones were, the parser returns an error result.
199 | func HexDigit1[Input Bytes]() Parser[Input, Input] {
200 | return func(input Input) Result[Input, Input] {
201 | if len(input) == 0 {
202 | return Failure[Input, Input](NewError(input, "HexDigit1"), input)
203 | }
204 |
205 | if !IsHexDigit(rune(input[0])) {
206 | return Failure[Input, Input](NewError(input, "HexDigit1"), input)
207 | }
208 |
209 | lastDigitPos := 1
210 | for idx := 1; idx < len(input); idx++ {
211 | if !IsHexDigit(rune(input[idx])) {
212 | return Success(input[:idx], input[idx:])
213 | }
214 |
215 | lastDigitPos++
216 | }
217 |
218 | return Success(input[:lastDigitPos], input[lastDigitPos:])
219 | }
220 | }
221 |
222 | // Whitespace0 parses zero or more whitespace characters: ' ', '\t', '\n', '\r'.
223 | // In the cases where the input is empty, or no terminating character is found, the parser
224 | // returns the input as is.
225 | func Whitespace0[Input Bytes]() Parser[Input, Input] {
226 | return func(input Input) Result[Input, Input] {
227 | if len(input) == 0 {
228 | return Success(input, input)
229 | }
230 |
231 | lastPos := 0
232 | for idx := 0; idx < len(input); idx++ {
233 | if !IsWhitespace(rune(input[idx])) {
234 | return Success(input[:idx], input[idx:])
235 | }
236 |
237 | lastPos++
238 | }
239 |
240 | return Success(input[:lastPos], input[lastPos:])
241 | }
242 | }
243 |
244 | // Whitespace1 parses one or more whitespace characters: ' ', '\t', '\n', '\r'.
245 | // In the cases where the input doesn't hold enough data, or a terminating character
246 | // is found before any matching ones were, the parser returns an error result.
247 | func Whitespace1[Input Bytes]() Parser[Input, Input] {
248 | return func(input Input) Result[Input, Input] {
249 | if len(input) == 0 {
250 | return Failure[Input, Input](NewError(input, "WhiteSpace1"), input)
251 | }
252 |
253 | if !IsWhitespace(rune(input[0])) {
254 | return Failure[Input, Input](NewError(input, "WhiteSpace1"), input)
255 | }
256 |
257 | lastPos := 1
258 | for idx := 1; idx < len(input); idx++ {
259 | if !IsWhitespace(rune(input[idx])) {
260 | return Success(input[:idx], input[idx:])
261 | }
262 |
263 | lastPos++
264 | }
265 |
266 | return Success(input[:lastPos], input[lastPos:])
267 | }
268 | }
269 |
270 | // LF parses a line feed `\n` character.
271 | func LF[Input Bytes]() Parser[Input, rune] {
272 | return func(input Input) Result[rune, Input] {
273 | if len(input) == 0 || input[0] != '\n' {
274 | return Failure[Input, rune](NewError(input, "LF"), input)
275 | }
276 |
277 | return Success(rune(input[0]), input[1:])
278 | }
279 | }
280 |
281 | // CR parses a carriage return `\r` character.
282 | func CR[Input Bytes]() Parser[Input, rune] {
283 | return func(input Input) Result[rune, Input] {
284 | if len(input) == 0 || input[0] != '\r' {
285 | return Failure[Input, rune](NewError(input, "CR"), input)
286 | }
287 |
288 | return Success(rune(input[0]), input[1:])
289 | }
290 | }
291 |
292 | // CRLF parses the string `\r\n`.
293 | func CRLF[Input Bytes]() Parser[Input, Input] {
294 | return func(input Input) Result[Input, Input] {
295 | if len(input) < 2 || (input[0] != '\r' || input[1] != '\n') {
296 | return Failure[Input, Input](NewError(input, "CRLF"), input)
297 | }
298 |
299 | return Success(input[:2], input[2:])
300 | }
301 | }
302 |
303 | // OneOf parses a single character from the given set of characters.
304 | func OneOf[Input Bytes](collection ...rune) Parser[Input, rune] {
305 | return func(input Input) Result[rune, Input] {
306 | if len(input) == 0 {
307 | return Failure[Input, rune](NewError(input, "OneOf"), input)
308 | }
309 |
310 | for _, c := range collection {
311 | if rune(input[0]) == c {
312 | return Success(rune(input[0]), input[1:])
313 | }
314 | }
315 |
316 | return Failure[Input, rune](NewError(input, "OneOf"), input)
317 | }
318 | }
319 |
320 | // Satisfy parses a single character, and ensures that it satisfies the given predicate.
321 | func Satisfy[Input Bytes](predicate func(rune) bool) Parser[Input, rune] {
322 | return func(input Input) Result[rune, Input] {
323 | if len(input) == 0 {
324 | return Failure[Input, rune](NewError(input, "Satisfy"), input)
325 | }
326 |
327 | if !predicate(rune(input[0])) {
328 | return Failure[Input, rune](NewError(input, "Satisfy"), input)
329 | }
330 |
331 | return Success(rune(input[0]), input[1:])
332 | }
333 | }
334 |
335 | // Space parses a space character.
336 | func Space[Input Bytes]() Parser[Input, rune] {
337 | return func(input Input) Result[rune, Input] {
338 | if len(input) == 0 || input[0] != ' ' {
339 | return Failure[Input, rune](NewError(input, "Space"), input)
340 | }
341 |
342 | return Success(rune(input[0]), input[1:])
343 | }
344 | }
345 |
346 | // Tab parses a tab character.
347 | func Tab[Input Bytes]() Parser[Input, rune] {
348 | return func(input Input) Result[rune, Input] {
349 | if len(input) == 0 || input[0] != '\t' {
350 | return Failure[Input, rune](NewError(input, "Tab"), input)
351 | }
352 |
353 | return Success(rune(input[0]), input[1:])
354 | }
355 | }
356 |
357 | // Int64 parses an integer from the input, and returns the part of the input that
358 | // matched the integer.
359 | func Int64[Input Bytes]() Parser[Input, int64] {
360 | return func(input Input) Result[int64, Input] {
361 | parser := Recognize(Sequence(Optional(Token[Input]("-")), Digit1[Input]()))
362 |
363 | result := parser(input)
364 | if result.Err != nil {
365 | return Failure[Input, int64](NewError(input, "Int64"), input)
366 | }
367 |
368 | n, err := strconv.ParseInt(string(result.Output), 10, 64)
369 | if err != nil {
370 | return Failure[Input, int64](NewError(input, "Int64"), input)
371 | }
372 |
373 | return Success(n, result.Remaining)
374 | }
375 | }
376 |
377 | // Int8 parses an 8-bit integer from the input,
378 | // and returns the part of the input that matched the integer.
379 | func Int8[Input Bytes]() Parser[Input, int8] {
380 | return func(input Input) Result[int8, Input] {
381 | parser := Recognize(Sequence(Optional(Token[Input]("-")), Digit1[Input]()))
382 |
383 | result := parser(input)
384 | if result.Err != nil {
385 | return Failure[Input, int8](NewError(input, "Int8"), input)
386 | }
387 |
388 | n, err := strconv.ParseInt(string(result.Output), 10, 8)
389 | if err != nil {
390 | return Failure[Input, int8](NewError(input, "Int8"), input)
391 | }
392 |
393 | return Success(int8(n), result.Remaining)
394 | }
395 | }
396 |
397 | // UInt8 parses an 8-bit integer from the input,
398 | // and returns the part of the input that matched the integer.
399 | func UInt8[Input Bytes]() Parser[Input, uint8] {
400 | return func(input Input) Result[uint8, Input] {
401 | result := Digit1[Input]()(input)
402 | if result.Err != nil {
403 | return Failure[Input, uint8](NewError(input, "UInt8"), input)
404 | }
405 |
406 | n, err := strconv.ParseUint(string(result.Output), 10, 8)
407 | if err != nil {
408 | return Failure[Input, uint8](NewError(input, "UInt8"), input)
409 | }
410 |
411 | return Success(uint8(n), result.Remaining)
412 | }
413 | }
414 |
415 | // IsAlpha returns true if the rune is an alphabetic character.
416 | func IsAlpha(c rune) bool {
417 | return IsLowAlpha(c) || IsUpAlpha(c)
418 | }
419 |
420 | // IsLowAlpha returns true if the rune is a lowercase alphabetic character.
421 | func IsLowAlpha(c rune) bool {
422 | return c >= 'a' && c <= 'z'
423 | }
424 |
425 | // IsUpAlpha returns true if the rune is an uppercase alphabetic character.
426 | func IsUpAlpha(c rune) bool {
427 | return c >= 'A' && c <= 'Z'
428 | }
429 |
430 | // IsDigit returns true if the rune is a digit.
431 | func IsDigit(c rune) bool {
432 | return c >= '0' && c <= '9'
433 | }
434 |
435 | // IsAlphanumeric returns true if the rune is an alphanumeric character.
436 | func IsAlphanumeric(c rune) bool {
437 | return IsAlpha(c) || IsDigit(c)
438 | }
439 |
440 | // IsHexDigit returns true if the rune is a hexadecimal digit.
441 | func IsHexDigit(c rune) bool {
442 | return IsDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
443 | }
444 |
445 | // IsControl returns true if the rune is a control character.
446 | func IsControl(c rune) bool {
447 | return (c >= 0 && c < 32) || c == 127
448 | }
449 |
450 | func IsWhitespace(c rune) bool {
451 | return c == ' ' || c == '\t' || c == '\n' || c == '\r'
452 | }
453 |
--------------------------------------------------------------------------------
/combinators.go:
--------------------------------------------------------------------------------
1 | // Package gomme implements a parser combinator library.
2 | // It provides a toolkit for developers to build reliable, fast, flexible, and easy-to-develop and maintain parsers
3 | // for both textual and binary formats. It extensively uses the recent introduction of Generics in the Go programming
4 | // language to offer flexibility in how combinators can be mixed and matched to produce the desired output while
5 | // providing as much compile-time type safety as possible.
6 | package gomme
7 |
8 | // FIXME: Ideally, I would want the combinators working with sequences
9 | // to produce somewhat detailed errors, and tell me which of the combinators failed
10 |
11 | // Bytes is a generic type alias for string
12 | type Bytes interface {
13 | string | []byte
14 | }
15 |
16 | // Separator is a generic type alias for separator characters
17 | type Separator interface {
18 | rune | byte | string
19 | }
20 |
21 | // Result is a generic type alias for Result
22 | type Result[Output any, Remaining Bytes] struct {
23 | Output Output
24 | Err *Error[Remaining]
25 | Remaining Remaining
26 | }
27 |
28 | // Parser is a generic type alias for Parser
29 | type Parser[Input Bytes, Output any] func(input Input) Result[Output, Input]
30 |
31 | // Success creates a Result with a output set from
32 | // the result of a successful parsing.
33 | func Success[Output any, Remaining Bytes](output Output, r Remaining) Result[Output, Remaining] {
34 | return Result[Output, Remaining]{output, nil, r}
35 | }
36 |
37 | // Failure creates a Result with an error set from
38 | // the result of a failed parsing.
39 | // TODO: The Error type could be generic too
40 | func Failure[Input Bytes, Output any](err *Error[Input], input Input) Result[Output, Input] {
41 | var output Output
42 | return Result[Output, Input]{output, err, input}
43 | }
44 |
45 | // Map applies a function to the result of a parser.
46 | func Map[Input Bytes, ParserOutput any, MapperOutput any](parse Parser[Input, ParserOutput], fn func(ParserOutput) (MapperOutput, error)) Parser[Input, MapperOutput] {
47 | return func(input Input) Result[MapperOutput, Input] {
48 | res := parse(input)
49 | if res.Err != nil {
50 | return Failure[Input, MapperOutput](NewError(input, "Map"), input)
51 | }
52 |
53 | output, err := fn(res.Output)
54 | if err != nil {
55 | return Failure[Input, MapperOutput](NewError(input, err.Error()), input)
56 | }
57 |
58 | return Success(output, res.Remaining)
59 | }
60 | }
61 |
62 | // Optional applies a an optional child parser. Will return nil
63 | // if not successful.
64 | //
65 | // N.B: unless a FatalError is encountered, Optional will ignore
66 | // any parsing failures and errors.
67 | func Optional[Input Bytes, Output any](parse Parser[Input, Output]) Parser[Input, Output] {
68 | return func(input Input) Result[Output, Input] {
69 | result := parse(input)
70 | if result.Err != nil && !result.Err.IsFatal() {
71 | result.Err = nil
72 | }
73 |
74 | return Success(result.Output, result.Remaining)
75 | }
76 | }
77 |
78 | // Peek tries to apply the provided parser without consuming any input.
79 | // It effectively allows to look ahead in the input.
80 | func Peek[Input Bytes, Output any](parse Parser[Input, Output]) Parser[Input, Output] {
81 | return func(input Input) Result[Output, Input] {
82 | result := parse(input)
83 | if result.Err != nil {
84 | return Failure[Input, Output](result.Err, input)
85 | }
86 |
87 | return Success(result.Output, input)
88 | }
89 | }
90 |
91 | // Recognize returns the consumed input as the produced value when
92 | // the provided parser succeeds.
93 | func Recognize[Input Bytes, Output any](parse Parser[Input, Output]) Parser[Input, Input] {
94 | return func(input Input) Result[Input, Input] {
95 | result := parse(input)
96 | if result.Err != nil {
97 | return Failure[Input, Input](result.Err, input)
98 | }
99 |
100 | return Success(input[:len(input)-len(result.Remaining)], result.Remaining)
101 | }
102 | }
103 |
104 | // Assign returns the provided value if the parser succeeds, otherwise
105 | // it returns an error result.
106 | func Assign[Input Bytes, Output1, Output2 any](value Output1, parse Parser[Input, Output2]) Parser[Input, Output1] {
107 | return func(input Input) Result[Output1, Input] {
108 | result := parse(input)
109 | if result.Err != nil {
110 | return Failure[Input, Output1](result.Err, input)
111 | }
112 |
113 | return Success(value, result.Remaining)
114 | }
115 | }
116 |
--------------------------------------------------------------------------------
/combinators_test.go:
--------------------------------------------------------------------------------
1 | package gomme
2 |
3 | import (
4 | "errors"
5 | "strconv"
6 | "testing"
7 | )
8 |
9 | func TestMap(t *testing.T) {
10 | t.Parallel()
11 |
12 | type TestStruct struct {
13 | Foo int
14 | Bar string
15 | }
16 |
17 | type args struct {
18 | parser Parser[string, TestStruct]
19 | }
20 | testCases := []struct {
21 | name string
22 | input string
23 | args args
24 | wantErr bool
25 | wantOutput TestStruct
26 | wantRemaining string
27 | }{
28 | {
29 | name: "matching parser should succeed",
30 | input: "1abc\r\n",
31 | args: args{
32 | Map(Pair(Digit1[string](), TakeUntil(CRLF[string]())), func(p PairContainer[string, string]) (TestStruct, error) {
33 | left, _ := strconv.Atoi(p.Left)
34 | return TestStruct{
35 | Foo: left,
36 | Bar: p.Right,
37 | }, nil
38 | }),
39 | },
40 | wantErr: false,
41 | wantOutput: TestStruct{
42 | Foo: 1,
43 | Bar: "abc",
44 | },
45 | wantRemaining: "\r\n",
46 | },
47 | {
48 | name: "failing parser should fail",
49 | input: "abc\r\n",
50 | args: args{
51 | Map(Pair(Digit1[string](), TakeUntil(CRLF[string]())), func(p PairContainer[string, string]) (TestStruct, error) {
52 | left, _ := strconv.Atoi(p.Left)
53 |
54 | return TestStruct{
55 | Foo: left,
56 | Bar: p.Right,
57 | }, nil
58 | }),
59 | },
60 | wantErr: true,
61 | wantOutput: TestStruct{},
62 | wantRemaining: "abc\r\n",
63 | },
64 | {
65 | name: "failing mapper should fail",
66 | input: "1abc\r\n",
67 | args: args{
68 | Map(Pair(Digit1[string](), TakeUntil(CRLF[string]())), func(p PairContainer[string, string]) (TestStruct, error) {
69 | return TestStruct{}, errors.New("unexpected error")
70 | }),
71 | },
72 | wantErr: true,
73 | wantOutput: TestStruct{},
74 | wantRemaining: "1abc\r\n",
75 | },
76 | {
77 | name: "empty input should fail",
78 | input: "",
79 | args: args{
80 | Map(Pair(Digit1[string](), TakeUntil(CRLF[string]())), func(p PairContainer[string, string]) (TestStruct, error) {
81 | left, _ := strconv.Atoi(p.Left)
82 |
83 | return TestStruct{
84 | Foo: left,
85 | Bar: p.Right,
86 | }, nil
87 | }),
88 | },
89 | wantErr: true,
90 | wantOutput: TestStruct{},
91 | wantRemaining: "",
92 | },
93 | }
94 |
95 | for _, tc := range testCases {
96 | tc := tc
97 |
98 | t.Run(tc.name, func(t *testing.T) {
99 | t.Parallel()
100 |
101 | gotResult := tc.args.parser(tc.input)
102 | if (gotResult.Err != nil) != tc.wantErr {
103 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr)
104 | }
105 |
106 | if gotResult.Output != tc.wantOutput {
107 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput)
108 | }
109 |
110 | if gotResult.Remaining != tc.wantRemaining {
111 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining)
112 | }
113 | })
114 | }
115 | }
116 |
117 | func BenchmarkMap(b *testing.B) {
118 | type TestStruct struct {
119 | Foo int
120 | Bar string
121 | }
122 |
123 | p := Map(Pair(Digit1[string](), TakeUntil(CRLF[string]())), func(p PairContainer[string, string]) (TestStruct, error) {
124 | left, _ := strconv.Atoi(p.Left)
125 |
126 | return TestStruct{
127 | Foo: left,
128 | Bar: p.Right,
129 | }, nil
130 | })
131 |
132 | b.ResetTimer()
133 | for i := 0; i < b.N; i++ {
134 | p("1abc\r\n")
135 | }
136 | }
137 |
138 | func TestOptional(t *testing.T) {
139 | t.Parallel()
140 |
141 | type args struct {
142 | p Parser[string, string]
143 | }
144 | testCases := []struct {
145 | name string
146 | args args
147 | input string
148 | wantErr bool
149 | wantOutput string
150 | wantRemaining string
151 | }{
152 | {
153 | name: "matching parser should succeed",
154 | input: "\r\n123",
155 | args: args{
156 | p: Optional(CRLF[string]()),
157 | },
158 | wantErr: false,
159 | wantOutput: "\r\n",
160 | wantRemaining: "123",
161 | },
162 | {
163 | name: "no match should succeed",
164 | input: "123",
165 | args: args{
166 | p: Optional(CRLF[string]()),
167 | },
168 | wantErr: false,
169 | wantOutput: "",
170 | wantRemaining: "123",
171 | },
172 | {
173 | name: "empty input should succeed",
174 | input: "",
175 | args: args{
176 | p: Optional(CRLF[string]()),
177 | },
178 | wantErr: false,
179 | wantOutput: "",
180 | wantRemaining: "",
181 | },
182 | }
183 | for _, tc := range testCases {
184 | tc := tc
185 |
186 | t.Run(tc.name, func(t *testing.T) {
187 | t.Parallel()
188 |
189 | gotResult := tc.args.p(tc.input)
190 | if (gotResult.Err != nil) != tc.wantErr {
191 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr)
192 | }
193 |
194 | if gotResult.Output != tc.wantOutput {
195 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput)
196 | }
197 |
198 | if gotResult.Remaining != tc.wantRemaining {
199 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining)
200 | }
201 | })
202 | }
203 | }
204 |
205 | func BenchmarkOptional(b *testing.B) {
206 | p := Optional(CRLF[string]())
207 |
208 | b.ResetTimer()
209 | for i := 0; i < b.N; i++ {
210 | p("\r\n123")
211 | }
212 | }
213 |
214 | func TestPeek(t *testing.T) {
215 | t.Parallel()
216 |
217 | type args struct {
218 | p Parser[string, string]
219 | }
220 | testCases := []struct {
221 | name string
222 | args args
223 | input string
224 | wantErr bool
225 | wantOutput string
226 | wantRemaining string
227 | }{
228 | {
229 | name: "matching parser should succeed",
230 | input: "abcd;",
231 | args: args{
232 | p: Peek(Alpha1[string]()),
233 | },
234 | wantErr: false,
235 | wantOutput: "abcd",
236 | wantRemaining: "abcd;",
237 | },
238 | {
239 | name: "non matching parser should fail",
240 | input: "123;",
241 | args: args{
242 | p: Peek(Alpha1[string]()),
243 | },
244 | wantErr: true,
245 | wantOutput: "",
246 | wantRemaining: "123;",
247 | },
248 | }
249 | for _, tc := range testCases {
250 | tc := tc
251 |
252 | t.Run(tc.name, func(t *testing.T) {
253 | t.Parallel()
254 |
255 | gotResult := tc.args.p(tc.input)
256 | if (gotResult.Err != nil) != tc.wantErr {
257 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr)
258 | }
259 |
260 | if gotResult.Output != tc.wantOutput {
261 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput)
262 | }
263 |
264 | if gotResult.Remaining != tc.wantRemaining {
265 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining)
266 | }
267 | })
268 | }
269 | }
270 |
271 | func BenchmarkPeek(b *testing.B) {
272 | p := Peek(Alpha1[string]())
273 |
274 | b.ResetTimer()
275 | for i := 0; i < b.N; i++ {
276 | p("abcd;")
277 | }
278 | }
279 |
280 | func TestRecognize(t *testing.T) {
281 | t.Parallel()
282 |
283 | type args struct {
284 | p Parser[string, string]
285 | }
286 | testCases := []struct {
287 | name string
288 | args args
289 | input string
290 | wantErr bool
291 | wantOutput string
292 | wantRemaining string
293 | }{
294 | {
295 | name: "matching parser should succeed",
296 | input: "123abc",
297 | args: args{
298 | p: Recognize(Pair(Digit1[string](), Alpha1[string]())),
299 | },
300 | wantErr: false,
301 | wantOutput: "123abc",
302 | wantRemaining: "",
303 | },
304 | {
305 | name: "no prefix match should fail",
306 | input: "abc",
307 | args: args{
308 | p: Recognize(Pair(Digit1[string](), Alpha1[string]())),
309 | },
310 | wantErr: true,
311 | wantOutput: "",
312 | wantRemaining: "abc",
313 | },
314 | {
315 | name: "no parser match should fail",
316 | input: "123",
317 | args: args{
318 | p: Recognize(Pair(Digit1[string](), Alpha1[string]())),
319 | },
320 | wantErr: true,
321 | wantOutput: "",
322 | wantRemaining: "123",
323 | },
324 | {
325 | name: "empty input should fail",
326 | input: "",
327 | args: args{
328 | p: Recognize(Pair(Digit1[string](), Alpha1[string]())),
329 | },
330 | wantErr: true,
331 | wantOutput: "",
332 | wantRemaining: "",
333 | },
334 | }
335 | for _, tc := range testCases {
336 | tc := tc
337 |
338 | t.Run(tc.name, func(t *testing.T) {
339 | t.Parallel()
340 |
341 | gotResult := tc.args.p(tc.input)
342 | if (gotResult.Err != nil) != tc.wantErr {
343 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr)
344 | }
345 |
346 | if gotResult.Output != tc.wantOutput {
347 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput)
348 | }
349 |
350 | if gotResult.Remaining != tc.wantRemaining {
351 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining)
352 | }
353 | })
354 | }
355 | }
356 |
357 | func BenchmarkRecognize(b *testing.B) {
358 | p := Recognize(Pair(Digit1[string](), Alpha1[string]()))
359 |
360 | b.ResetTimer()
361 | for i := 0; i < b.N; i++ {
362 | p("123abc")
363 | }
364 | }
365 |
366 | func TestAssign(t *testing.T) {
367 | t.Parallel()
368 |
369 | type args struct {
370 | p Parser[string, int]
371 | }
372 | testCases := []struct {
373 | name string
374 | args args
375 | input string
376 | wantErr bool
377 | wantOutput int
378 | wantRemaining string
379 | }{
380 | {
381 | name: "matching parser should succeed",
382 | input: "abcd",
383 | args: args{
384 | p: Assign(1234, Alpha1[string]()),
385 | },
386 | wantErr: false,
387 | wantOutput: 1234,
388 | wantRemaining: "",
389 | },
390 | {
391 | name: "non matching parser should fail",
392 | input: "123abcd;",
393 | args: args{
394 | p: Assign(1234, Alpha1[string]()),
395 | },
396 | wantErr: true,
397 | wantOutput: 0,
398 | wantRemaining: "123abcd;",
399 | },
400 | }
401 | for _, tc := range testCases {
402 | tc := tc
403 |
404 | t.Run(tc.name, func(t *testing.T) {
405 | t.Parallel()
406 |
407 | gotResult := tc.args.p(tc.input)
408 | if (gotResult.Err != nil) != tc.wantErr {
409 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr)
410 | }
411 |
412 | if gotResult.Output != tc.wantOutput {
413 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput)
414 | }
415 |
416 | if gotResult.Remaining != tc.wantRemaining {
417 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining)
418 | }
419 | })
420 | }
421 | }
422 |
423 | func BenchmarkAssign(b *testing.B) {
424 | p := Assign(1234, Alpha1[string]())
425 |
426 | b.ResetTimer()
427 | for i := 0; i < b.N; i++ {
428 | p("abcd")
429 | }
430 | }
431 |
--------------------------------------------------------------------------------
/containers.go:
--------------------------------------------------------------------------------
1 | package gomme
2 |
3 | // PairContainer allows returning a pair of results from a parser.
4 | type PairContainer[Left, Right any] struct {
5 | Left Left
6 | Right Right
7 | }
8 |
9 | // NewPairContainer instantiates a new Pair
10 | func NewPairContainer[Left, Right any](left Left, right Right) *PairContainer[Left, Right] {
11 | return &PairContainer[Left, Right]{
12 | Left: left,
13 | Right: right,
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/error.go:
--------------------------------------------------------------------------------
1 | package gomme
2 |
3 | import (
4 | "fmt"
5 | "strings"
6 | )
7 |
8 | // Error represents a parsing error. It holds the input that was being parsed,
9 | // the parsers that were tried, and the error that was produced.
10 | type Error[Input Bytes] struct {
11 | Input Input
12 | Err error
13 | Expected []string
14 | }
15 |
16 | // NewError produces a new Error from the provided input and names of
17 | // parsers expected to succeed.
18 | func NewError[Input Bytes](input Input, expected ...string) *Error[Input] {
19 | return &Error[Input]{Input: input, Expected: expected}
20 | }
21 |
22 | // Error returns a human readable error string.
23 | func (e *Error[Input]) Error() string {
24 | return fmt.Sprintf("expected %v", strings.Join(e.Expected, ", "))
25 | }
26 |
27 | // IsFatal returns true if the error is fatal.
28 | func (e *Error[Input]) IsFatal() bool {
29 | return e.Err != nil
30 | }
31 |
--------------------------------------------------------------------------------
/examples/.gitignore:
--------------------------------------------------------------------------------
1 | # Binaries for programs and plugins
2 | *.exe
3 | *.exe~
4 | *.dll
5 | *.so
6 | *.dylib
7 |
8 | # Test binary, built with `go test -c`
9 | *.test
10 |
11 | # Output of the go coverage tool, specifically when used with LiteIDE
12 | *.out
13 |
14 | # Dependency directories (remove the comment below to include it)
15 | # vendor/
16 |
17 | examples/redis/testdata
--------------------------------------------------------------------------------
/examples/csv/csv.go:
--------------------------------------------------------------------------------
1 | // Package csv implements a parser for CSV files.
2 | //
3 | // It is a simple, incomplete, example of how to use the gomme
4 | // parser combinator library to build a parser targetting the
5 | // format described in [RFC4180].
6 | //
7 | // [RFC4180]: https://tools.ietf.org/html/rfc4180
8 | package csv
9 |
10 | import "github.com/oleiade/gomme"
11 |
12 | func ParseCSV(input string) ([][]string, error) {
13 | parser := gomme.SeparatedList1(
14 | gomme.SeparatedList1(
15 | gomme.Alternative(
16 | gomme.Alphanumeric1[string](),
17 | gomme.Delimited(gomme.Char[string]('"'), gomme.Alphanumeric1[string](), gomme.Char[string]('"')),
18 | ),
19 | gomme.Char[string](','),
20 | ),
21 | gomme.CRLF[string](),
22 | )
23 |
24 | result := parser(input)
25 | if result.Err != nil {
26 | return nil, result.Err
27 | }
28 |
29 | return result.Output, nil
30 | }
31 |
--------------------------------------------------------------------------------
/examples/csv/csv_test.go:
--------------------------------------------------------------------------------
1 | package csv
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/stretchr/testify/assert"
7 | )
8 |
9 | func TestParseRGBColor(t *testing.T) {
10 | t.Parallel()
11 |
12 | testCases := []struct {
13 | name string
14 | input string
15 | wantErr bool
16 | wantOutput [][]string
17 | }{
18 | {
19 | name: "parsing a single csv line should succeed",
20 | input: "abc,def,ghi\r\n",
21 | wantErr: false,
22 | wantOutput: [][]string{{"abc", "def", "ghi"}},
23 | },
24 | {
25 | name: "parsing multie csv lines should succeed",
26 | input: "abc,def,ghi\r\njkl,mno,pqr\r\n",
27 | wantErr: false,
28 | wantOutput: [][]string{
29 | {"abc", "def", "ghi"},
30 | {"jkl", "mno", "pqr"},
31 | },
32 | },
33 | {
34 | name: "parsing a single csv line of escaped strings should succeed",
35 | input: "\"abc\",\"def\",\"ghi\"\r\n",
36 | wantErr: false,
37 | wantOutput: [][]string{{"abc", "def", "ghi"}},
38 | },
39 | }
40 | for _, tc := range testCases {
41 | tc := tc
42 |
43 | t.Run(tc.name, func(t *testing.T) {
44 | t.Parallel()
45 |
46 | gotOutput, gotErr := ParseCSV(tc.input)
47 | if (gotErr != nil) != tc.wantErr {
48 | t.Errorf("got error %v, want error %v", gotErr, tc.wantErr)
49 | }
50 |
51 | assert.Equal(t,
52 | tc.wantOutput,
53 | gotOutput,
54 | "got output %v, want output %v", gotOutput, tc.wantOutput,
55 | )
56 | })
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/examples/hexcolor/hexcolor.go:
--------------------------------------------------------------------------------
1 | // Package hexcolor implements a parser for hexadecimal color strings.
2 | // It demonstrates how to use gomme to build a parser for a simple string format.
3 | package hexcolor
4 |
5 | import (
6 | "strconv"
7 |
8 | "github.com/oleiade/gomme"
9 | )
10 |
11 | // RGBColor stores the three bytes describing a color in the RGB space.
12 | type RGBColor struct {
13 | red uint8
14 | green uint8
15 | blue uint8
16 | }
17 |
18 | // ParseRGBColor creates a new RGBColor from a hexadecimal color string.
19 | // The string must be a six digit hexadecimal number, prefixed with a "#".
20 | func ParseRGBColor(input string) (RGBColor, error) {
21 | parser := gomme.Preceded(
22 | gomme.Token[string]("#"),
23 | gomme.Map(
24 | gomme.Count(HexColorComponent(), 3),
25 | func(components []uint8) (RGBColor, error) {
26 | return RGBColor{components[0], components[1], components[2]}, nil
27 | },
28 | ),
29 | )
30 |
31 | result := parser(input)
32 | if result.Err != nil {
33 | return RGBColor{}, result.Err
34 | }
35 |
36 | return result.Output, nil
37 | }
38 |
39 | // HexColorComponent produces a parser that parses a single hex color component,
40 | // which is a two digit hexadecimal number.
41 | func HexColorComponent() gomme.Parser[string, uint8] {
42 | return func(input string) gomme.Result[uint8, string] {
43 | return gomme.Map(
44 | gomme.TakeWhileMN[string](2, 2, gomme.IsHexDigit),
45 | fromHex,
46 | )(input)
47 | }
48 | }
49 |
50 | // fromHex converts a two digits hexadecimal number to its decimal value.
51 | func fromHex(input string) (uint8, error) {
52 | res, err := strconv.ParseInt(input, 16, 16)
53 | if err != nil {
54 | return 0, err
55 | }
56 |
57 | return uint8(res), nil
58 | }
59 |
--------------------------------------------------------------------------------
/examples/hexcolor/hexcolor_test.go:
--------------------------------------------------------------------------------
1 | package hexcolor
2 |
3 | import "testing"
4 |
5 | func TestParseRGBColor(t *testing.T) {
6 | t.Parallel()
7 |
8 | testCases := []struct {
9 | name string
10 | input string
11 | wantErr bool
12 | wantColor RGBColor
13 | }{
14 | {
15 | name: "parsing minimum hexadecimal color should succeed",
16 | input: "#000000",
17 | wantErr: false,
18 | wantColor: RGBColor{0, 0, 0},
19 | },
20 | {
21 | name: "parsing maximum hexadecimal color should succeed",
22 | input: "#ffffff",
23 | wantErr: false,
24 | wantColor: RGBColor{255, 255, 255},
25 | },
26 | {
27 | name: "parsing out of bound color component should fail",
28 | input: "#fffffg",
29 | wantErr: true,
30 | wantColor: RGBColor{},
31 | },
32 | {
33 | name: "omitting leading # character should fail",
34 | input: "ffffff",
35 | wantErr: true,
36 | wantColor: RGBColor{},
37 | },
38 | {
39 | name: "parsing insufficient number of characters should fail",
40 | input: "#fffff",
41 | wantErr: true,
42 | wantColor: RGBColor{},
43 | },
44 | {
45 | name: "empty input should fail",
46 | input: "",
47 | wantErr: true,
48 | wantColor: RGBColor{},
49 | },
50 | }
51 | for _, tc := range testCases {
52 | tc := tc
53 |
54 | t.Run(tc.name, func(t *testing.T) {
55 | t.Parallel()
56 |
57 | gotColor, gotErr := ParseRGBColor(tc.input)
58 | if (gotErr != nil) != tc.wantErr {
59 | t.Errorf("got error %v, want error %v", gotErr, tc.wantErr)
60 | }
61 |
62 | if gotColor != tc.wantColor {
63 | t.Errorf("got color %v, want color %v", gotColor, tc.wantColor)
64 | }
65 | })
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/examples/json/json.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | _ "embed"
5 | "fmt"
6 | "log"
7 | "strconv"
8 | "strings"
9 |
10 | "github.com/oleiade/gomme"
11 | )
12 |
13 | //go:embed test.json
14 | var testJSON string
15 |
16 | func main() {
17 | result := parseJSON(testJSON)
18 | if result.Err != nil {
19 | log.Fatal(result.Err)
20 | return
21 | }
22 |
23 | fmt.Println(result.Output)
24 | }
25 |
26 | type (
27 | // JSONValue represents any value that can be encountered in
28 | // JSON, including complex types like objects and arrays.
29 | JSONValue interface{}
30 |
31 | // JSONString represents a JSON string value.
32 | JSONString string
33 |
34 | // JSONNumber represents a JSON number value, which internally is treated as float64.
35 | JSONNumber float64
36 |
37 | // JSONObject represents a JSON object, which is a collection of key-value pairs.
38 | JSONObject map[string]JSONValue
39 |
40 | // JSONArray represents a JSON array, which is a list of JSON values.
41 | JSONArray []JSONValue
42 |
43 | // JSONBool represents a JSON boolean value.
44 | JSONBool bool
45 |
46 | // JSONNull represents the JSON null value.
47 | JSONNull struct{}
48 | )
49 |
50 | // parseJSON is a convenience function to start parsing JSON from the given input string.
51 | func parseJSON(input string) gomme.Result[JSONValue, string] {
52 | return parseValue(input)
53 | }
54 |
55 | // parseValue is a parser that attempts to parse different types of
56 | // JSON values (object, array, string, etc.).
57 | func parseValue(input string) gomme.Result[JSONValue, string] {
58 | return gomme.Alternative(
59 | parseObject,
60 | parseArray,
61 | parseString,
62 | parseNumber,
63 | parseTrue,
64 | parseFalse,
65 | parseNull,
66 | )(input)
67 | }
68 |
69 | // parseObject parses a JSON object, which starts and ends with
70 | // curly braces and contains key-value pairs.
71 | func parseObject(input string) gomme.Result[JSONValue, string] {
72 | return gomme.Map(
73 | gomme.Delimited[string, rune, map[string]JSONValue, rune](
74 | gomme.Char[string]('{'),
75 | gomme.Optional[string, map[string]JSONValue](
76 | gomme.Preceded(
77 | ws(),
78 | gomme.Terminated[string, map[string]JSONValue](
79 | parseMembers,
80 | ws(),
81 | ),
82 | ),
83 | ),
84 | gomme.Char[string]('}'),
85 | ),
86 | func(members map[string]JSONValue) (JSONValue, error) {
87 | return JSONObject(members), nil
88 | },
89 | )(input)
90 | }
91 |
92 | // Ensure parseObject is a Parser[string, JSONValue]
93 | var _ gomme.Parser[string, JSONValue] = parseObject
94 |
95 | // parseArray parses a JSON array, which starts and ends with
96 | // square brackets and contains a list of values.
97 | func parseArray(input string) gomme.Result[JSONValue, string] {
98 | return gomme.Map(
99 | gomme.Delimited[string, rune, []JSONValue, rune](
100 | gomme.Char[string]('['),
101 | gomme.Alternative(
102 | parseElements,
103 | gomme.Map(ws(), func(s string) ([]JSONValue, error) { return []JSONValue{}, nil }),
104 | ),
105 | gomme.Char[string](']'),
106 | ),
107 | func(elements []JSONValue) (JSONValue, error) {
108 | return JSONArray(elements), nil
109 | },
110 | )(input)
111 | }
112 |
113 | // Ensure parseArray is a Parser[string, JSONValue]
114 | var _ gomme.Parser[string, JSONValue] = parseArray
115 |
116 | func parseElement(input string) gomme.Result[JSONValue, string] {
117 | return gomme.Map(
118 | gomme.Delimited[string](ws(), parseValue, ws()),
119 | func(v JSONValue) (JSONValue, error) { return v, nil },
120 | )(input)
121 | }
122 |
123 | // Ensure parseElement is a Parser[string, JSONValue]
124 | var _ gomme.Parser[string, JSONValue] = parseElement
125 |
126 | // parseNumber parses a JSON number.
127 | func parseNumber(input string) gomme.Result[JSONValue, string] {
128 | return gomme.Map[string](
129 | gomme.Sequence(
130 | gomme.Map(integer(), func(i int) (string, error) { return strconv.Itoa(i), nil }),
131 | gomme.Optional(fraction()),
132 | gomme.Optional(exponent()),
133 | ),
134 | func(parts []string) (JSONValue, error) {
135 | // Construct the float string from parts
136 | var floatStr string
137 |
138 | // Integer part
139 | floatStr += parts[0]
140 |
141 | // Fraction part
142 | if parts[1] != "" {
143 | fractionPart, err := strconv.Atoi(parts[1])
144 | if err != nil {
145 | return 0, err
146 | }
147 |
148 | if fractionPart != 0 {
149 | floatStr += fmt.Sprintf(".%d", fractionPart)
150 | }
151 | }
152 |
153 | // Exponent part
154 | if parts[2] != "" {
155 | floatStr += fmt.Sprintf("e%s", parts[2])
156 | }
157 |
158 | f, err := strconv.ParseFloat(floatStr, 64)
159 | if err != nil {
160 | return JSONNumber(0.0), err
161 | }
162 |
163 | return JSONNumber(f), nil
164 | },
165 | )(input)
166 | }
167 |
168 | // Ensure parseNumber is a Parser[string, JSONValue]
169 | var _ gomme.Parser[string, JSONValue] = parseNumber
170 |
171 | // parseString parses a JSON string.
172 | func parseString(input string) gomme.Result[JSONValue, string] {
173 | return gomme.Map(
174 | stringParser(),
175 | func(s string) (JSONValue, error) {
176 | return JSONString(s), nil
177 | },
178 | )(input)
179 | }
180 |
181 | // Ensure parseString is a Parser[string, JSONValue]
182 | var _ gomme.Parser[string, JSONValue] = parseString
183 |
184 | // parseFalse parses the JSON boolean value 'false'.
185 | func parseFalse(input string) gomme.Result[JSONValue, string] {
186 | return gomme.Map(
187 | gomme.Token[string]("false"),
188 | func(_ string) (JSONValue, error) { return JSONBool(false), nil },
189 | )(input)
190 | }
191 |
192 | // Ensure parseFalse is a Parser[string, JSONValue]
193 | var _ gomme.Parser[string, JSONValue] = parseFalse
194 |
195 | // parseTrue parses the JSON boolean value 'true'.
196 | func parseTrue(input string) gomme.Result[JSONValue, string] {
197 | return gomme.Map(
198 | gomme.Token[string]("true"),
199 | func(_ string) (JSONValue, error) { return JSONBool(true), nil },
200 | )(input)
201 | }
202 |
203 | // Ensure parseTrue is a Parser[string, JSONValue]
204 | var _ gomme.Parser[string, JSONValue] = parseTrue
205 |
206 | // parseNull parses the JSON 'null' value.
207 | func parseNull(input string) gomme.Result[JSONValue, string] {
208 | return gomme.Map(
209 | gomme.Token[string]("null"),
210 | func(_ string) (JSONValue, error) { return nil, nil },
211 | )(input)
212 | }
213 |
214 | // Ensure parseNull is a Parser[string, JSONValue]
215 | var _ gomme.Parser[string, JSONValue] = parseNull
216 |
217 | // parseElements parses the elements of a JSON array.
218 | func parseElements(input string) gomme.Result[[]JSONValue, string] {
219 | return gomme.Map(
220 | gomme.SeparatedList0[string](
221 | parseElement,
222 | gomme.Token[string](","),
223 | ),
224 | func(elems []JSONValue) ([]JSONValue, error) {
225 | return elems, nil
226 | },
227 | )(input)
228 | }
229 |
230 | // Ensure parseElements is a Parser[string, []JSONValue]
231 | var _ gomme.Parser[string, []JSONValue] = parseElements
232 |
233 | // parseElement parses a single element of a JSON array.
234 | func parseMembers(input string) gomme.Result[map[string]JSONValue, string] {
235 | return gomme.Map(
236 | gomme.SeparatedList0[string](
237 | parseMember,
238 | gomme.Token[string](","),
239 | ),
240 | func(kvs []kv) (map[string]JSONValue, error) {
241 | obj := make(JSONObject)
242 | for _, kv := range kvs {
243 | obj[kv.key] = kv.value
244 | }
245 | return obj, nil
246 | },
247 | )(input)
248 | }
249 |
250 | // Ensure parseMembers is a Parser[string, map[string]JSONValue]
251 | var _ gomme.Parser[string, map[string]JSONValue] = parseMembers
252 |
253 | // parseMember parses a single member (key-value pair) of a JSON object.
254 | func parseMember(input string) gomme.Result[kv, string] {
255 | return member()(input)
256 | }
257 |
258 | // Ensure parseMember is a Parser[string, kv]
259 | var _ gomme.Parser[string, kv] = parseMember
260 |
261 | // member creates a parser for a single key-value pair in a JSON object.
262 | //
263 | // It expects a string followed by a colon and then a JSON value.
264 | // The result is a kv struct with the parsed key and value.
265 | func member() gomme.Parser[string, kv] {
266 | mapFunc := func(p gomme.PairContainer[string, JSONValue]) (kv, error) {
267 | return kv{p.Left, p.Right}, nil
268 | }
269 |
270 | return gomme.Map(
271 | gomme.SeparatedPair[string](
272 | gomme.Delimited(ws(), stringParser(), ws()),
273 | gomme.Token[string](":"),
274 | element(),
275 | ),
276 | mapFunc,
277 | )
278 | }
279 |
280 | // element creates a parser for a single element in a JSON array.
281 | //
282 | // It wraps the element with optional whitespace on either side.
283 | func element() gomme.Parser[string, JSONValue] {
284 | return gomme.Map(
285 | gomme.Delimited(ws(), parseValue, ws()),
286 | func(v JSONValue) (JSONValue, error) { return v, nil },
287 | )
288 | }
289 |
290 | // kv is a struct representing a key-value pair in a JSON object.
291 | //
292 | // 'key' holds the string key, and 'value' holds the corresponding JSON value.
293 | type kv struct {
294 | key string
295 | value JSONValue
296 | }
297 |
298 | // stringParser creates a parser for a JSON string.
299 | //
300 | // It expects a sequence of characters enclosed in double quotes.
301 | func stringParser() gomme.Parser[string, string] {
302 | return gomme.Delimited[string, rune, string, rune](
303 | gomme.Char[string]('"'),
304 | characters(),
305 | gomme.Char[string]('"'),
306 | )
307 | }
308 |
309 | // integer creates a parser for a JSON number's integer part.
310 | //
311 | // It handles negative and positive integers including zero.
312 | func integer() gomme.Parser[string, int] {
313 | return gomme.Alternative(
314 | // "-" onenine digits
315 | gomme.Preceded(
316 | gomme.Token[string]("-"),
317 | gomme.Map(
318 | gomme.Pair(onenine(), digits()),
319 | func(p gomme.PairContainer[string, string]) (int, error) {
320 | return strconv.Atoi(p.Left + p.Right)
321 | },
322 | ),
323 | ),
324 |
325 | // onenine digits
326 | gomme.Map(
327 | gomme.Pair(onenine(), digits()),
328 | func(p gomme.PairContainer[string, string]) (int, error) {
329 | return strconv.Atoi(p.Left + p.Right)
330 | },
331 | ),
332 |
333 | // "-" digit
334 | gomme.Preceded(
335 | gomme.Token[string]("-"),
336 | gomme.Map(
337 | digit(),
338 | strconv.Atoi,
339 | ),
340 | ),
341 |
342 | // digit
343 | gomme.Map(digit(), strconv.Atoi),
344 | )
345 | }
346 |
347 | // digits creates a parser for a sequence of digits.
348 | //
349 | // It concatenates the sequence into a single string.
350 | func digits() gomme.Parser[string, string] {
351 | return gomme.Map(gomme.Many1(digit()), func(digits []string) (string, error) {
352 | return strings.Join(digits, ""), nil
353 | })
354 | }
355 |
356 | // digit creates a parser for a single digit.
357 | //
358 | // It distinguishes between '0' and non-zero digits.
359 | func digit() gomme.Parser[string, string] {
360 | return gomme.Alternative(
361 | gomme.Token[string]("0"),
362 | onenine(),
363 | )
364 | }
365 |
366 | // onenine creates a parser for digits from 1 to 9.
367 | func onenine() gomme.Parser[string, string] {
368 | return gomme.Alternative(
369 | gomme.Token[string]("1"),
370 | gomme.Token[string]("2"),
371 | gomme.Token[string]("3"),
372 | gomme.Token[string]("4"),
373 | gomme.Token[string]("5"),
374 | gomme.Token[string]("6"),
375 | gomme.Token[string]("7"),
376 | gomme.Token[string]("8"),
377 | gomme.Token[string]("9"),
378 | )
379 | }
380 |
381 | // fraction creates a parser for the fractional part of a JSON number.
382 | //
383 | // It expects a dot followed by at least one digit.
384 | func fraction() gomme.Parser[string, string] {
385 | return gomme.Preceded(
386 | gomme.Token[string]("."),
387 | gomme.Digit1[string](),
388 | )
389 | }
390 |
391 | // exponent creates a parser for the exponent part of a JSON number.
392 | //
393 | // It handles the exponent sign and the exponent digits.
394 | func exponent() gomme.Parser[string, string] {
395 | return gomme.Preceded(
396 | gomme.Token[string]("e"),
397 | gomme.Map(
398 | gomme.Pair(sign(), digits()),
399 | func(p gomme.PairContainer[string, string]) (string, error) {
400 | return p.Left + p.Right, nil
401 | },
402 | ),
403 | )
404 | }
405 |
406 | // sign creates a parser for the sign part of a number's exponent.
407 | //
408 | // It can parse both positive ('+') and negative ('-') signs.
409 | func sign() gomme.Parser[string, string] {
410 | return gomme.Optional(
411 | gomme.Alternative[string, string](
412 | gomme.Token[string]("-"),
413 | gomme.Token[string]("+"),
414 | ),
415 | )
416 | }
417 |
418 | // characters creates a parser for a sequence of JSON string characters.
419 | //
420 | // It handles regular characters and escaped sequences.
421 | func characters() gomme.Parser[string, string] {
422 | return gomme.Optional(
423 | gomme.Map(
424 | gomme.Many1[string, rune](character()),
425 | func(chars []rune) (string, error) {
426 | return string(chars), nil
427 | },
428 | ),
429 | )
430 | }
431 |
432 | // character creates a parser for a single JSON string character.
433 | //
434 | // It distinguishes between regular characters and escape sequences.
435 | func character() gomme.Parser[string, rune] {
436 | return gomme.Alternative(
437 | // normal character
438 | gomme.Satisfy[string](func(c rune) bool {
439 | return c != '"' && c != '\\' && c >= 0x20 && c <= 0x10FFFF
440 | }),
441 |
442 | // escape
443 | escape(),
444 | )
445 | }
446 |
447 | // escape creates a parser for escaped characters in a JSON string.
448 | //
449 | // It handles common escape sequences like '\n', '\t', etc., and unicode escapes.
450 | func escape() gomme.Parser[string, rune] {
451 | mapFunc := func(chars []rune) (rune, error) {
452 | // chars[0] will always be '\\'
453 | switch chars[1] {
454 | case '"':
455 | return '"', nil
456 | case '\\':
457 | return '\\', nil
458 | case '/':
459 | return '/', nil
460 | case 'b':
461 | return '\b', nil
462 | case 'f':
463 | return '\f', nil
464 | case 'n':
465 | return '\n', nil
466 | case 'r':
467 | return '\r', nil
468 | case 't':
469 | return '\t', nil
470 | default: // for unicode escapes
471 | return chars[1], nil
472 | }
473 | }
474 |
475 | return gomme.Map(
476 | gomme.Sequence(
477 | gomme.Char[string]('\\'),
478 | gomme.Alternative(
479 | gomme.Char[string]('"'),
480 | gomme.Char[string]('\\'),
481 | gomme.Char[string]('/'),
482 | gomme.Char[string]('b'),
483 | gomme.Char[string]('f'),
484 | gomme.Char[string]('n'),
485 | gomme.Char[string]('r'),
486 | gomme.Char[string]('t'),
487 | unicodeEscape(),
488 | ),
489 | ),
490 | mapFunc,
491 | )
492 | }
493 |
494 | // unicodeEscape creates a parser for a unicode escape sequence in a JSON string.
495 | //
496 | // It expects a sequence starting with 'u' followed by four hexadecimal digits and
497 | // converts them to the corresponding rune.
498 | func unicodeEscape() gomme.Parser[string, rune] {
499 | mapFunc := func(chars []rune) (rune, error) {
500 | // chars[0] will always be 'u'
501 | hex := string(chars[1:5])
502 | codePoint, err := strconv.ParseInt(hex, 16, 32)
503 | if err != nil {
504 | return 0, err
505 | }
506 | return rune(codePoint), nil
507 | }
508 |
509 | return gomme.Map(
510 | gomme.Sequence(
511 | gomme.Char[string]('u'),
512 | hex(),
513 | hex(),
514 | hex(),
515 | hex(),
516 | ),
517 | mapFunc,
518 | )
519 | }
520 |
521 | // hex creates a parser for a single hexadecimal digit.
522 | //
523 | // It can parse digits ('0'-'9') as well as
524 | // letters ('a'-'f', 'A'-'F') used in hexadecimal numbers.
525 | func hex() gomme.Parser[string, rune] {
526 | return gomme.Satisfy[string](func(r rune) bool {
527 | return ('0' <= r && r <= '9') || ('a' <= r && r <= 'f') || ('A' <= r && r <= 'F')
528 | })
529 | }
530 |
531 | // ws creates a parser for whitespace in JSON.
532 | //
533 | // It can handle spaces, tabs, newlines, and carriage returns.
534 | // The parser accumulates all whitespace characters and returns them as a single string.
535 | func ws() gomme.Parser[string, string] {
536 | parser := gomme.Many0(
537 | gomme.Satisfy[string](func(r rune) bool {
538 | return r == ' ' || r == '\t' || r == '\n' || r == '\r'
539 | }),
540 | )
541 |
542 | mapFunc := func(runes []rune) (string, error) {
543 | return string(runes), nil
544 | }
545 |
546 | return gomme.Map(parser, mapFunc)
547 | }
548 |
--------------------------------------------------------------------------------
/examples/json/test.json:
--------------------------------------------------------------------------------
1 | {
2 | "abc": 123,
3 | "entries": [
4 | {
5 | "name": "John",
6 | "age": 30
7 | },
8 | {
9 | "name": "Jane",
10 | "age": 25
11 | }
12 | ]
13 | }
14 |
--------------------------------------------------------------------------------
/examples/redis/redis.go:
--------------------------------------------------------------------------------
1 | // Package redis demonstrates the usage of the gomme package to parse Redis'
2 | // [RESP protocol] messages.
3 | //
4 | // [RESP protocol]: https://redis.io/docs/reference/protocol-spec/
5 | package redis
6 |
7 | import (
8 | "errors"
9 | "fmt"
10 | "strconv"
11 | "strings"
12 |
13 | "github.com/oleiade/gomme"
14 | )
15 |
16 | // ParseRESPMESSAGE parses a Redis' [RESP protocol] message.
17 | //
18 | // [RESP protocol]: https://redis.io/docs/reference/protocol-spec/
19 | func ParseRESPMessage(input string) (RESPMessage, error) {
20 | if len(input) < 3 {
21 | return RESPMessage{}, fmt.Errorf("malformed message %s; reason: %w", input, ErrMessageTooShort)
22 | }
23 |
24 | if !isValidMessageKind(MessageKind(input[0])) {
25 | return RESPMessage{}, fmt.Errorf("malformed message %s; reason: %w %c", input, ErrInvalidPrefix, input[0])
26 | }
27 |
28 | if input[len(input)-2] != '\r' || input[len(input)-1] != '\n' {
29 | return RESPMessage{}, fmt.Errorf("malformed message %s; reason: %w", input, ErrInvalidSuffix)
30 | }
31 |
32 | parser := gomme.Alternative(
33 | SimpleString(),
34 | Error(),
35 | Integer(),
36 | BulkString(),
37 | Array(),
38 | )
39 |
40 | result := parser(input)
41 | if result.Err != nil {
42 | return RESPMessage{}, result.Err
43 | }
44 |
45 | return result.Output, nil
46 | }
47 |
48 | // ErrMessageTooShort is returned when a message is too short to be valid.
49 | // A [RESP protocol] message is at least 3 characters long: the message kind
50 | // prefix, the message content (which can be empty), and the gomme.CRLF suffix.
51 | //
52 | // [RESP protocol]: https://redis.io/docs/reference/protocol-spec/
53 | var ErrMessageTooShort = errors.New("message too short")
54 |
55 | // ErrInvalidPrefix is returned when a message kind prefix is not recognized.
56 | // Valid [RESP Protocol] message kind prefixes are "+", "-", ":", and "$".
57 | //
58 | // [RESP Protocol]: https://redis.io/docs/reference/protocol-spec/
59 | var ErrInvalidPrefix = errors.New("invalid message prefix")
60 |
61 | // ErrInvalidSuffix is returned when a message suffix is not recognized.
62 | // Every [RESP protocol] message ends with a gomme.CRLF.
63 | //
64 | // [RESP protocol]: https://redis.io/docs/reference/protocol-spec/
65 | var ErrInvalidSuffix = errors.New("invalid message suffix")
66 |
67 | // RESPMessage is a parsed Redis' [RESP protocol] message.
68 | //
69 | // It can hold either a simple string, an error, an integer, a bulk string,
70 | // or an array. The kind of the message is available in the Kind field.
71 | //
72 | // [RESP protocol]: https://redis.io/docs/reference/protocol-spec/
73 | type RESPMessage struct {
74 | Kind MessageKind
75 | SimpleString *SimpleStringMessage
76 | Error *ErrorStringMessage
77 | Integer *IntegerMessage
78 | BulkString *BulkStringMessage
79 | Array *ArrayMessage
80 | }
81 |
82 | // MessageKind is the kind of a Redis' [RESP protocol] message.
83 | type MessageKind string
84 |
85 | // The many different kinds of Redis' [RESP protocol] messages map
86 | // to their respective protocol message's prefixes.
87 | //
88 | // [RESP protocol]: https://redis.io/docs/reference/protocol-spec/
89 | const (
90 | SimpleStringKind MessageKind = "+"
91 | ErrorKind MessageKind = "-"
92 | IntegerKind MessageKind = ":"
93 | BulkStringKind MessageKind = "$"
94 | ArrayKind MessageKind = "*"
95 | InvalidKind MessageKind = "?"
96 | )
97 |
98 | // SimpleStringMessage is a simple string message parsed from a Redis'
99 | // [RESP protocol] message.
100 | //
101 | // [RESP protocol]: https://redis.io/docs/reference/protocol-spec/
102 | type SimpleStringMessage struct {
103 | Content string
104 | }
105 |
106 | // SimpleString is a parser for Redis' RESP protocol simple strings.
107 | //
108 | // Simple strings are strings that are not expected to contain newlines.
109 | // Simple strings start with a "+" character, and end with a gomme.CRLF.
110 | //
111 | // Once parsed, the content of the simple string is available in the
112 | // simpleString field of the result's RESPMessage.
113 | func SimpleString() gomme.Parser[string, RESPMessage] {
114 | mapFn := func(message string) (RESPMessage, error) {
115 | if strings.ContainsAny(message, "\r\n") {
116 | return RESPMessage{}, fmt.Errorf("malformed simple string: %s", message)
117 | }
118 |
119 | return RESPMessage{
120 | Kind: SimpleStringKind,
121 | SimpleString: &SimpleStringMessage{
122 | Content: message,
123 | },
124 | }, nil
125 | }
126 |
127 | return gomme.Delimited(
128 | gomme.Token[string](string(SimpleStringKind)),
129 | gomme.Map(gomme.TakeUntil(gomme.CRLF[string]()), mapFn),
130 | gomme.CRLF[string](),
131 | )
132 | }
133 |
134 | // ErrorStringMessage is a parsed error string message from a Redis'
135 | // [RESP protocol] message.
136 | //
137 | // [RESP protocol]: https://redis.io/docs/reference/protocol-spec/
138 | type ErrorStringMessage struct {
139 | Kind string
140 | Message string
141 | }
142 |
143 | // Error is a parser for Redis' RESP protocol errors.
144 | //
145 | // Errors are strings that start with a "-" character, and end with a gomme.CRLF.
146 | //
147 | // The error message is available in the Error field of the result's
148 | // RESPMessage.
149 | func Error() gomme.Parser[string, RESPMessage] {
150 | mapFn := func(message string) (RESPMessage, error) {
151 | if strings.ContainsAny(message, "\r\n") {
152 | return RESPMessage{}, fmt.Errorf("malformed error string: %s", message)
153 | }
154 |
155 | return RESPMessage{
156 | Kind: ErrorKind,
157 | Error: &ErrorStringMessage{
158 | Kind: "ERR",
159 | Message: message,
160 | },
161 | }, nil
162 | }
163 |
164 | return gomme.Delimited(
165 | gomme.Token[string](string(ErrorKind)),
166 | gomme.Map(gomme.TakeUntil(gomme.CRLF[string]()), mapFn),
167 | gomme.CRLF[string](),
168 | )
169 | }
170 |
171 | // IntegerMessage is a parsed integer message from a Redis' [RESP protocol]
172 | // message.
173 | //
174 | // [RESP protocol]: https://redis.io/docs/reference/protocol-spec/
175 | type IntegerMessage struct {
176 | Value int
177 | }
178 |
179 | // Integer is a parser for Redis' RESP protocol integers.
180 | //
181 | // Integers are signed nummerical values represented as string messages
182 | // that start with a ":" character, and end with a gomme.CRLF.
183 | //
184 | // The integer value is available in the IntegerMessage field of the result's
185 | // RESPMessage.
186 | func Integer() gomme.Parser[string, RESPMessage] {
187 | mapFn := func(message string) (RESPMessage, error) {
188 | value, err := strconv.Atoi(message)
189 | if err != nil {
190 | return RESPMessage{}, err
191 | }
192 |
193 | return RESPMessage{
194 | Kind: IntegerKind,
195 | Integer: &IntegerMessage{
196 | Value: value,
197 | },
198 | }, nil
199 | }
200 |
201 | return gomme.Delimited(
202 | gomme.Token[string](string(IntegerKind)),
203 | gomme.Map(gomme.TakeUntil(gomme.CRLF[string]()), mapFn),
204 | gomme.CRLF[string](),
205 | )
206 | }
207 |
208 | // BulkStringMessage is a parsed bulk string message from a Redis' [RESP protocol]
209 | // message.
210 | //
211 | // [RESP protocol]: https://redis.io/docs/reference/protocol-spec/
212 | type BulkStringMessage struct {
213 | Data []byte
214 | }
215 |
216 | // BulkString is a parser for Redis' RESP protocol bulk strings.
217 | //
218 | // Bulk strings are binary-safe strings up to 512MB in size.
219 | // Bulk strings start with a "$" character, and end with a gomme.CRLF.
220 | //
221 | // The bulk string's data is available in the BulkString field of the result's
222 | // RESPMessage.
223 | func BulkString() gomme.Parser[string, RESPMessage] {
224 | mapFn := func(message gomme.PairContainer[int64, string]) (RESPMessage, error) {
225 | if message.Left < 0 {
226 | if message.Left < -1 {
227 | return RESPMessage{}, fmt.Errorf(
228 | "unable to parse bulk string; "+
229 | "reason: negative length %d",
230 | message.Left,
231 | )
232 | }
233 |
234 | if message.Left == -1 && len(message.Right) != 0 {
235 | return RESPMessage{}, fmt.Errorf(
236 | "malformed array: declared message size -1, and actual size differ %d",
237 | len(message.Right),
238 | )
239 | }
240 | } else if len(message.Right) != int(message.Left) {
241 | return RESPMessage{}, fmt.Errorf(
242 | "malformed array: declared message size %d, and actual size differ %d",
243 | message.Left,
244 | len(message.Right),
245 | )
246 | }
247 |
248 | return RESPMessage{
249 | Kind: BulkStringKind,
250 | BulkString: &BulkStringMessage{
251 | Data: []byte(message.Right),
252 | },
253 | }, nil
254 | }
255 |
256 | return gomme.Map(
257 | gomme.Pair(
258 | sizePrefix(gomme.Token[string](string(BulkStringKind))),
259 | gomme.Optional(
260 | gomme.Terminated(gomme.TakeUntil(gomme.CRLF[string]()), gomme.CRLF[string]()),
261 | ),
262 | ),
263 | mapFn,
264 | )
265 | }
266 |
267 | // ArrayMessage is a parsed array message from a Redis' [RESP protocol] message.
268 | //
269 | // [RESP protocol]: https://redis.io/docs/reference/protocol-spec/
270 | type ArrayMessage struct {
271 | Elements []RESPMessage
272 | }
273 |
274 | // Array is a parser for Redis' RESP protocol arrays.
275 | //
276 | // Arrays are sequences of RESP messages.
277 | // Arrays start with a "*" character, and end with a gomme.CRLF.
278 | //
279 | // The array's messages are available in the Array field of the result's
280 | // RESPMessage.
281 | func Array() gomme.Parser[string, RESPMessage] {
282 | mapFn := func(message gomme.PairContainer[int64, []RESPMessage]) (RESPMessage, error) {
283 | if int(message.Left) == -1 {
284 | if len(message.Right) != 0 {
285 | return RESPMessage{}, fmt.Errorf(
286 | "malformed array: declared message size -1, and actual size differ %d",
287 | len(message.Right),
288 | )
289 | }
290 | } else {
291 | if len(message.Right) != int(message.Left) {
292 | return RESPMessage{}, fmt.Errorf(
293 | "malformed array: declared message size %d, and actual size differ %d",
294 | message.Left,
295 | len(message.Right),
296 | )
297 | }
298 | }
299 |
300 | messages := make([]RESPMessage, 0, len(message.Right))
301 | messages = append(messages, message.Right...)
302 |
303 | return RESPMessage{
304 | Kind: ArrayKind,
305 | Array: &ArrayMessage{
306 | Elements: messages,
307 | },
308 | }, nil
309 | }
310 |
311 | return gomme.Map(
312 | gomme.Pair(
313 | sizePrefix(gomme.Token[string](string(ArrayKind))),
314 | gomme.Many0(
315 | gomme.Alternative(
316 | SimpleString(),
317 | Error(),
318 | Integer(),
319 | BulkString(),
320 | ),
321 | ),
322 | ),
323 | mapFn,
324 | )
325 | }
326 |
327 | func sizePrefix(prefix gomme.Parser[string, string]) gomme.Parser[string, int64] {
328 | return gomme.Delimited(
329 | prefix,
330 | gomme.Int64[string](),
331 | gomme.CRLF[string](),
332 | )
333 | }
334 |
335 | func isValidMessageKind(kind MessageKind) bool {
336 | return kind == SimpleStringKind ||
337 | kind == ErrorKind ||
338 | kind == IntegerKind ||
339 | kind == BulkStringKind ||
340 | kind == ArrayKind
341 | }
342 |
--------------------------------------------------------------------------------
/examples/redis/redis_test.go:
--------------------------------------------------------------------------------
1 | package redis
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | "math/rand"
7 | "reflect"
8 | "strconv"
9 | "strings"
10 | "testing"
11 | "time"
12 | )
13 |
14 | func TestParseRESPMessage(t *testing.T) {
15 | t.Parallel()
16 |
17 | type args struct {
18 | input string
19 | }
20 | testCases := []struct {
21 | name string
22 | args args
23 | want RESPMessage
24 | wantErr bool
25 | }{
26 | //
27 | // General
28 | //
29 | {
30 | name: "empty message should fail",
31 | args: args{
32 | input: "",
33 | },
34 | want: RESPMessage{},
35 | wantErr: true,
36 | },
37 | {
38 | name: "message with only a prefix should fail",
39 | args: args{
40 | input: "+",
41 | },
42 | want: RESPMessage{},
43 | wantErr: true,
44 | },
45 | {
46 | name: "message with only a CRLF should fail",
47 | args: args{
48 | input: "\r\n",
49 | },
50 | want: RESPMessage{},
51 | wantErr: true,
52 | },
53 | {
54 | name: "message with an invalid prefix should fail",
55 | args: args{
56 | input: "?\r\n",
57 | },
58 | want: RESPMessage{},
59 | wantErr: true,
60 | },
61 |
62 | //
63 | // SIMPLE STRINGS
64 | //
65 |
66 | {
67 | name: "proper simple string should succeed",
68 | args: args{
69 | "+OK\r\n",
70 | },
71 | want: RESPMessage{
72 | Kind: SimpleStringKind,
73 | SimpleString: &SimpleStringMessage{Content: "OK"},
74 | },
75 | wantErr: false,
76 | },
77 | {
78 | name: "empty simple string should succeed",
79 | args: args{
80 | "+\r\n",
81 | },
82 | want: RESPMessage{
83 | Kind: SimpleStringKind,
84 | SimpleString: &SimpleStringMessage{Content: ""},
85 | },
86 | wantErr: false,
87 | },
88 | {
89 | name: "malformed simple string containing a \\r should fail",
90 | args: args{
91 | "+Hello\rWorld\r\n",
92 | },
93 | want: RESPMessage{},
94 | wantErr: true,
95 | },
96 | {
97 | name: "malformed simple string containing a \\n should fail",
98 | args: args{
99 | "+Hello\nWorld\r\n",
100 | },
101 | want: RESPMessage{},
102 | wantErr: true,
103 | },
104 | {
105 | name: "malformed simple string containing a \\n\\r should fail",
106 | args: args{
107 | "+Hello\n\rWorld\r\n",
108 | },
109 | want: RESPMessage{},
110 | wantErr: true,
111 | },
112 |
113 | // //
114 | // // ERROR STRINGS
115 | // //
116 |
117 | {
118 | name: "proper error string should succeed",
119 | args: args{
120 | "-Error message\r\n",
121 | },
122 | want: RESPMessage{
123 | Kind: ErrorKind,
124 | Error: &ErrorStringMessage{
125 | Kind: "ERR",
126 | Message: "Error message",
127 | },
128 | },
129 | wantErr: false,
130 | },
131 | {
132 | name: "malformed error string containing a \\r should fail",
133 | args: args{
134 | "-Error\r message\r\n",
135 | },
136 | want: RESPMessage{},
137 | wantErr: true,
138 | },
139 | {
140 | name: "malformed error string containing a \\n should fail",
141 | args: args{
142 | "-Error\n message\r\n",
143 | },
144 | want: RESPMessage{},
145 | wantErr: true,
146 | },
147 | {
148 | name: "malformed error string containing a \\n\\r should fail",
149 | args: args{
150 | "-Error\n\r message\r\n",
151 | },
152 | want: RESPMessage{},
153 | wantErr: true,
154 | },
155 |
156 | // //
157 | // // INTEGER
158 | // //
159 |
160 | {
161 | name: "proper integer should succeed",
162 | args: args{
163 | ":123\r\n",
164 | },
165 | want: RESPMessage{
166 | Kind: IntegerKind,
167 | Integer: &IntegerMessage{
168 | Value: 123,
169 | },
170 | },
171 | wantErr: false,
172 | },
173 |
174 | //
175 | // Bulk Strings
176 | //
177 |
178 | {
179 | name: "proper bulk string should succeed",
180 | args: args{
181 | "$5\r\nhello\r\n",
182 | },
183 | want: RESPMessage{
184 | Kind: BulkStringKind,
185 | BulkString: &BulkStringMessage{
186 | Data: []byte("hello"),
187 | },
188 | },
189 | wantErr: false,
190 | },
191 | {
192 | name: "nil bulk string should succeed",
193 | args: args{
194 | "$-1\r\n",
195 | },
196 | want: RESPMessage{
197 | Kind: BulkStringKind,
198 | BulkString: &BulkStringMessage{
199 | Data: []byte(""),
200 | },
201 | },
202 | wantErr: false,
203 | },
204 | {
205 | name: "bulk string with negative size != -1 should fail",
206 | args: args{
207 | "$-2\r\n",
208 | },
209 | want: RESPMessage{},
210 | wantErr: true,
211 | },
212 | {
213 | name: "malformed bulk string with actual length different from declared length should fail",
214 | args: args{
215 | "$5\r\nhello world\r\n",
216 | },
217 | want: RESPMessage{},
218 | wantErr: true,
219 | },
220 |
221 | //
222 | // ARRAYS
223 | //
224 |
225 | {
226 | name: "proper array of simple strings should succeed",
227 | args: args{
228 | "*2\r\n+hello\r\n+world\r\n",
229 | },
230 | want: RESPMessage{
231 | Kind: ArrayKind,
232 | Array: &ArrayMessage{
233 | Elements: []RESPMessage{
234 | {
235 | Kind: SimpleStringKind,
236 | SimpleString: &SimpleStringMessage{
237 | Content: "hello",
238 | },
239 | },
240 | {
241 | Kind: SimpleStringKind,
242 | SimpleString: &SimpleStringMessage{
243 | Content: "world",
244 | },
245 | },
246 | },
247 | },
248 | },
249 | wantErr: false,
250 | },
251 | {
252 | name: "proper array of errors should succeed",
253 | args: args{
254 | "*2\r\n-Error Message\r\n-Other error\r\n",
255 | },
256 | want: RESPMessage{
257 | Kind: ArrayKind,
258 | Array: &ArrayMessage{
259 | Elements: []RESPMessage{
260 | {
261 | Kind: ErrorKind,
262 | Error: &ErrorStringMessage{
263 | Kind: "ERR",
264 | Message: "Error Message",
265 | },
266 | },
267 | {
268 | Kind: ErrorKind,
269 | Error: &ErrorStringMessage{
270 | Kind: "ERR",
271 | Message: "Other error",
272 | },
273 | },
274 | },
275 | },
276 | },
277 | wantErr: false,
278 | },
279 | {
280 | name: "proper array of integers should succeed",
281 | args: args{
282 | "*2\r\n:0\r\n:1000\r\n",
283 | },
284 | want: RESPMessage{
285 | Kind: ArrayKind,
286 | Array: &ArrayMessage{
287 | Elements: []RESPMessage{
288 | {
289 | Kind: IntegerKind,
290 | Integer: &IntegerMessage{
291 | Value: 0,
292 | },
293 | },
294 | {
295 | Kind: IntegerKind,
296 | Integer: &IntegerMessage{
297 | Value: 1000,
298 | },
299 | },
300 | },
301 | },
302 | },
303 | wantErr: false,
304 | },
305 | {
306 | name: "proper array of bulk strings should succeed",
307 | args: args{
308 | "*2\r\n$5\r\nhello\r\n$5\r\nworld\r\n",
309 | },
310 | want: RESPMessage{
311 | Kind: ArrayKind,
312 | Array: &ArrayMessage{
313 | Elements: []RESPMessage{
314 | {
315 | Kind: BulkStringKind,
316 | BulkString: &BulkStringMessage{
317 | Data: []byte("hello"),
318 | },
319 | },
320 | {
321 | Kind: BulkStringKind,
322 | BulkString: &BulkStringMessage{
323 | Data: []byte("world"),
324 | },
325 | },
326 | },
327 | },
328 | },
329 | wantErr: false,
330 | },
331 | {
332 | name: "proper array of mixed types should succeed",
333 | args: args{
334 | "*4\r\n$5\r\nhello\r\n:123\r\n+OK\r\n-Error Message\r\n",
335 | },
336 | want: RESPMessage{
337 | Kind: ArrayKind,
338 | Array: &ArrayMessage{
339 | Elements: []RESPMessage{
340 | {
341 | Kind: BulkStringKind,
342 | BulkString: &BulkStringMessage{
343 | Data: []byte("hello"),
344 | },
345 | },
346 | {
347 | Kind: IntegerKind,
348 | Integer: &IntegerMessage{
349 | Value: 123,
350 | },
351 | },
352 | {
353 | Kind: SimpleStringKind,
354 | SimpleString: &SimpleStringMessage{
355 | Content: "OK",
356 | },
357 | },
358 | {
359 | Kind: ErrorKind,
360 | Error: &ErrorStringMessage{
361 | Kind: "ERR",
362 | Message: "Error Message",
363 | },
364 | },
365 | },
366 | },
367 | },
368 | wantErr: false,
369 | },
370 | {
371 | name: "empty array should succeed",
372 | args: args{
373 | "*-1\r\n",
374 | },
375 | want: RESPMessage{
376 | Kind: ArrayKind,
377 | Array: &ArrayMessage{
378 | Elements: []RESPMessage{},
379 | },
380 | },
381 | wantErr: false,
382 | },
383 | {
384 | name: "array with non matching size prefix should fail",
385 | args: args{
386 | "*2\r\n+OK\r\n",
387 | },
388 | want: RESPMessage{},
389 | wantErr: true,
390 | },
391 | }
392 | for _, tc := range testCases {
393 | tc := tc
394 |
395 | t.Run(tc.name, func(t *testing.T) {
396 | t.Parallel()
397 |
398 | got, err := ParseRESPMessage(tc.args.input)
399 | if (err != nil) != tc.wantErr {
400 | t.Errorf("ParseRESPMessage() error = %v, wantErr %v", err, tc.wantErr)
401 | return
402 | }
403 | if !reflect.DeepEqual(got, tc.want) {
404 | t.Errorf("ParseRESPMessage() = %v, want %v", got, tc.want)
405 | }
406 | })
407 | }
408 | }
409 |
410 | func BenchmarkParseMessage(b *testing.B) {
411 | var benchmarks = []struct {
412 | kind string
413 | data string
414 | size string
415 | }{
416 | {"simple_string", "+OK\r\n", "2"},
417 | {"simple_string", simpleStringProducer(128 * Byte), "128b"},
418 | {"simple_string", simpleStringProducer(1 * KiloBytes), "1kb"},
419 | {"simple_string", simpleStringProducer(1 * MegaBytes), "1mb"},
420 | {"error_string", "-Error\r\n", "5"},
421 | {"error_string", errorStringProducer(128 * Byte), "128b"},
422 | {"error_string", errorStringProducer(1 * KiloBytes), "1kb"},
423 | {"integer", ":1\r\n", "1"},
424 | {"integer", ":9,223,372,036,854,775,807\r\n", "biggest integer"},
425 | {"integer", ":-9223372036854775808\r\n", "smallest integer"},
426 | {"bulk_string", bulkStringProducer(128 * Byte), "128b"},
427 | {"bulk_string", bulkStringProducer(1 * KiloBytes), "1kb"},
428 | {"bulk_string", bulkStringProducer(1 * MegaBytes), "1mb"},
429 | {"bulk_string", bulkStringProducer(512 * MegaBytes), "512mb"},
430 | {"array", arrayProducer(10000, 128*Byte), "10000 * 128b"},
431 | {"array", arrayProducer(1000, 1*KiloBytes), "1000 * 1kb"},
432 | {"array", arrayProducer(100, 1*MegaBytes), "100 * 1mb"},
433 | }
434 |
435 | for _, tt := range benchmarks {
436 | b.Run(fmt.Sprintf("%s_with_size_%s", tt.kind, tt.size), func(b *testing.B) {
437 | for i := 0; i < b.N; i++ {
438 | //nolint:errcheck,gosec
439 | ParseRESPMessage(tt.data)
440 | }
441 | })
442 | }
443 | }
444 |
445 | const (
446 | Byte = 1
447 | KiloBytes = Byte * 1024
448 | MegaBytes = KiloBytes * 1024
449 | GigaBytes = MegaBytes * 1024
450 | TeraBytes = GigaBytes * 1024
451 | )
452 |
453 | // TODO: add fuzz tests input for other kind of messages,
454 | // and handled their expected format too.
455 | func FuzzTestParseMessage(f *testing.F) {
456 | testCases := []string{
457 | "+OK\r\n",
458 | "+Hello world\r\n",
459 | "+This is a string\r\n",
460 | }
461 |
462 | for _, testCase := range testCases {
463 | f.Add(testCase)
464 | }
465 |
466 | f.Fuzz(func(t *testing.T, message string) {
467 | _, err := ParseRESPMessage(message)
468 | if err != nil {
469 | if errors.Is(err, ErrMessageTooShort) || errors.Is(err, ErrInvalidPrefix) || errors.Is(err, ErrInvalidSuffix) {
470 | t.Skip("skipping expected error")
471 | }
472 |
473 | if strings.Count(message, "\r") > 1 || strings.Count(message, "\n") > 1 {
474 | t.Skip("skipping simple string message with multiple \\r or \\n")
475 | }
476 |
477 | t.Errorf("ParseRESPMessage() error = %v", err)
478 | }
479 | })
480 | }
481 |
482 | func simpleStringProducer(messageSize int) string {
483 | return strings.Join(
484 | []string{
485 | "+",
486 | stringWithinCharset(messageSize, alnumCharset),
487 | "\r\n",
488 | },
489 | "",
490 | )
491 | }
492 |
493 | func errorStringProducer(messageSize int) string {
494 | return strings.Join(
495 | []string{
496 | "-",
497 | stringWithinCharset(messageSize, alnumCharset),
498 | "\r\n",
499 | },
500 | "",
501 | )
502 | }
503 |
504 | func bulkStringProducer(messageSize int) string {
505 | return strings.Join(
506 | []string{
507 | "$",
508 | strconv.Itoa(messageSize),
509 | "\r\n",
510 | stringWithinCharset(messageSize, alnumCharset),
511 | "\r\n",
512 | },
513 | "",
514 | )
515 | }
516 |
517 | func arrayProducer(arraySize, messageSize int) string {
518 | messages := make([]string, 0, arraySize)
519 |
520 | for i := 0; i < arraySize; i++ {
521 | messageKind := i % 4
522 |
523 | switch messageKind {
524 | case 0:
525 | messages = append(messages, simpleStringProducer(messageSize))
526 | case 1:
527 | messages = append(messages, errorStringProducer(messageSize))
528 | case 2:
529 | messages = append(messages, bulkStringProducer(messageSize))
530 | case 3:
531 | messages = append(messages, strconv.Itoa(rand.Int()))
532 | }
533 | }
534 |
535 | return strings.Join(
536 | []string{
537 | "*",
538 | strings.Join(messages, ""),
539 | "\r\n",
540 | },
541 | "",
542 | )
543 | }
544 |
545 | var seededRand *rand.Rand = rand.New(
546 | rand.NewSource(time.Now().UnixNano()))
547 |
548 | const alnumCharset = "abcdefghijklmnopqrstuvwxyz" +
549 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
550 |
551 | func stringWithinCharset(length int, charset string) string {
552 | b := make([]byte, length)
553 | for i := range b {
554 | b[i] = charset[seededRand.Intn(len(charset))]
555 | }
556 | return string(b)
557 | }
558 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/oleiade/gomme
2 |
3 | go 1.18
4 |
5 | require github.com/stretchr/testify v1.7.1
6 |
7 | require (
8 | github.com/davecgh/go-spew v1.1.0 // indirect
9 | github.com/pmezard/go-difflib v1.0.0 // indirect
10 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect
11 | )
12 |
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
2 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
5 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
6 | github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY=
7 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
8 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
9 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
10 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
11 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
12 |
--------------------------------------------------------------------------------
/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oleiade/gomme/c8967c191356a03f184cef70295243720365c979/logo.png
--------------------------------------------------------------------------------
/multi.go:
--------------------------------------------------------------------------------
1 | package gomme
2 |
3 | // Count runs the provided parser `count` times.
4 | //
5 | // If the provided parser cannot be successfully applied `count` times, the operation
6 | // fails and the Result will contain an error.
7 | func Count[Input Bytes, Output any](parse Parser[Input, Output], count uint) Parser[Input, []Output] {
8 | return func(input Input) Result[[]Output, Input] {
9 | if len(input) == 0 || count == 0 {
10 | return Failure[Input, []Output](NewError(input, "Count"), input)
11 | }
12 |
13 | outputs := make([]Output, 0, int(count))
14 | remaining := input
15 | for i := 0; uint(i) < count; i++ {
16 | result := parse(remaining)
17 | if result.Err != nil {
18 | return Failure[Input, []Output](result.Err, input)
19 | }
20 |
21 | remaining = result.Remaining
22 | outputs = append(outputs, result.Output)
23 | }
24 |
25 | return Success(outputs, remaining)
26 | }
27 | }
28 |
29 | // Many0 applies a parser repeatedly until it fails, and returns a slice of all
30 | // the results as the Result's Output.
31 | //
32 | // Note that Many0 will succeed even if the parser fails to match at all. It will
33 | // however fail if the provided parser accepts empty inputs (such as `Digit0`, or
34 | // `Alpha0`) in order to prevent infinite loops.
35 | func Many0[Input Bytes, Output any](parse Parser[Input, Output]) Parser[Input, []Output] {
36 | return func(input Input) Result[[]Output, Input] {
37 | results := []Output{}
38 |
39 | remaining := input
40 | for {
41 | res := parse(remaining)
42 | if res.Err != nil {
43 | return Success(results, remaining)
44 | }
45 |
46 | // Checking for infinite loops, if nothing was consumed,
47 | // the provided parser would make us go around in circles.
48 | if len(res.Remaining) == len(remaining) {
49 | return Failure[Input, []Output](NewError(input, "Many0"), input)
50 | }
51 |
52 | results = append(results, res.Output)
53 | remaining = res.Remaining
54 | }
55 | }
56 | }
57 |
58 | // Many1 applies a parser repeatedly until it fails, and returns a slice of all
59 | // the results as the Result's Output. Many1 will fail if the parser fails to
60 | // match at least once.
61 | //
62 | // Note that Many1 will fail if the provided parser accepts empty
63 | // inputs (such as `Digit0`, or `Alpha0`) in order to prevent infinite loops.
64 | func Many1[Input Bytes, Output any](parse Parser[Input, Output]) Parser[Input, []Output] {
65 | return func(input Input) Result[[]Output, Input] {
66 | first := parse(input)
67 | if first.Err != nil {
68 | return Failure[Input, []Output](first.Err, input)
69 | }
70 |
71 | // Checking for infinite loops, if nothing was consumed,
72 | // the provided parser would make us go around in circles.
73 | if len(first.Remaining) == len(input) {
74 | return Failure[Input, []Output](NewError(input, "Many1"), input)
75 | }
76 |
77 | results := []Output{first.Output}
78 | remaining := first.Remaining
79 |
80 | for {
81 | res := parse(remaining)
82 | if res.Err != nil {
83 | return Success(results, remaining)
84 | }
85 |
86 | // Checking for infinite loops, if nothing was consumed,
87 | // the provided parser would make us go around in circles.
88 | if len(res.Remaining) == len(remaining) {
89 | return Failure[Input, []Output](NewError(input, "Many1"), input)
90 | }
91 |
92 | results = append(results, res.Output)
93 | remaining = res.Remaining
94 | }
95 | }
96 | }
97 |
98 | // SeparatedList0 applies an element parser and a separator parser repeatedly in order
99 | // to produce a list of elements.
100 | //
101 | // Note that SeparatedList0 will succeed even if the element parser fails to match at all.
102 | // It will however fail if the provided element parser accepts empty inputs (such as
103 | // `Digit0`, or `Alpha0`) in order to prevent infinite loops.
104 | //
105 | // Because the `SeparatedList0` is really looking to produce a list of elements resulting
106 | // from the provided main parser, it will succeed even if the separator parser fails to
107 | // match at all. It will however fail if the provided separator parser accepts empty
108 | // inputs in order to prevent infinite loops.
109 | func SeparatedList0[Input Bytes, Output any, S Separator](
110 | parse Parser[Input, Output],
111 | separator Parser[Input, S],
112 | ) Parser[Input, []Output] {
113 | return func(input Input) Result[[]Output, Input] {
114 | results := []Output{}
115 |
116 | res := parse(input)
117 | if res.Err != nil {
118 | return Success(results, input)
119 | }
120 |
121 | // Checking for infinite loops, if nothing was consumed,
122 | // the provided parser would make us go around in circles.
123 | if len(res.Remaining) == len(input) {
124 | return Failure[Input, []Output](NewError(input, "SeparatedList0"), input)
125 | }
126 |
127 | results = append(results, res.Output)
128 | remaining := res.Remaining
129 |
130 | for {
131 | separatorResult := separator(remaining)
132 | if separatorResult.Err != nil {
133 | return Success(results, remaining)
134 | }
135 |
136 | // Checking for infinite loops, if nothing was consumed,
137 | // the provided parser would make us go around in circles.
138 | if len(separatorResult.Remaining) == len(remaining) {
139 | return Failure[Input, []Output](NewError(input, "SeparatedList0"), input)
140 | }
141 |
142 | parserResult := parse(separatorResult.Remaining)
143 | if parserResult.Err != nil {
144 | return Success(results, remaining)
145 | }
146 |
147 | results = append(results, parserResult.Output)
148 |
149 | remaining = parserResult.Remaining
150 | }
151 | }
152 | }
153 |
154 | // SeparatedList1 applies an element parser and a separator parser repeatedly in order
155 | // to produce a list of elements.
156 | //
157 | // Note that SeparatedList1 will fail if the element parser fails to match at all.
158 | //
159 | // Because the `SeparatedList1` is really looking to produce a list of elements resulting
160 | // from the provided main parser, it will succeed even if the separator parser fails to
161 | // match at all.
162 | func SeparatedList1[Input Bytes, Output any, S Separator](
163 | parse Parser[Input, Output],
164 | separator Parser[Input, S],
165 | ) Parser[Input, []Output] {
166 | return func(input Input) Result[[]Output, Input] {
167 | results := []Output{}
168 |
169 | res := parse(input)
170 | if res.Err != nil {
171 | return Failure[Input, []Output](res.Err, input)
172 | }
173 |
174 | // Checking for infinite loops, if nothing was consumed,
175 | // the provided parser would make us go around in circles.
176 | if len(res.Remaining) == len(input) {
177 | return Failure[Input, []Output](NewError(input, "SeparatedList0"), input)
178 | }
179 |
180 | results = append(results, res.Output)
181 | remaining := res.Remaining
182 |
183 | for {
184 | separatorResult := separator(remaining)
185 | if separatorResult.Err != nil {
186 | return Success(results, remaining)
187 | }
188 |
189 | // Checking for infinite loops, if nothing was consumed,
190 | // the provided parser would make us go around in circles.
191 | if len(separatorResult.Remaining) == len(remaining) {
192 | return Failure[Input, []Output](NewError(input, "SeparatedList0"), input)
193 | }
194 |
195 | parserResult := parse(separatorResult.Remaining)
196 | if parserResult.Err != nil {
197 | return Success(results, remaining)
198 | }
199 |
200 | results = append(results, parserResult.Output)
201 |
202 | remaining = parserResult.Remaining
203 | }
204 | }
205 | }
206 |
--------------------------------------------------------------------------------
/multi_test.go:
--------------------------------------------------------------------------------
1 | package gomme
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/stretchr/testify/assert"
7 | )
8 |
9 | func TestCount(t *testing.T) {
10 | t.Parallel()
11 |
12 | testCases := []struct {
13 | name string
14 | parser Parser[string, []string]
15 | input string
16 | wantErr bool
17 | wantOutput []string
18 | wantRemaining string
19 | }{
20 | {
21 | name: "parsing exact count should succeed",
22 | parser: Count(Token[string]("abc"), 2),
23 | input: "abcabc",
24 | wantErr: false,
25 | wantOutput: []string{"abc", "abc"},
26 | wantRemaining: "",
27 | },
28 | {
29 | name: "parsing more than count should succeed",
30 | parser: Count(Token[string]("abc"), 2),
31 | input: "abcabcabc",
32 | wantErr: false,
33 | wantOutput: []string{"abc", "abc"},
34 | wantRemaining: "abc",
35 | },
36 | {
37 | name: "parsing less than count should fail",
38 | parser: Count(Token[string]("abc"), 2),
39 | input: "abc123",
40 | wantErr: true,
41 | wantOutput: nil,
42 | wantRemaining: "abc123",
43 | },
44 | {
45 | name: "parsing no count should fail",
46 | parser: Count(Token[string]("abc"), 2),
47 | input: "123123",
48 | wantErr: true,
49 | wantOutput: nil,
50 | wantRemaining: "123123",
51 | },
52 | {
53 | name: "parsing empty input should fail",
54 | parser: Count(Token[string]("abc"), 2),
55 | input: "",
56 | wantErr: true,
57 | wantOutput: nil,
58 | wantRemaining: "",
59 | },
60 | }
61 |
62 | for _, tc := range testCases {
63 | tc := tc
64 |
65 | t.Run(tc.name, func(t *testing.T) {
66 | t.Parallel()
67 |
68 | gotResult := tc.parser(tc.input)
69 | if (gotResult.Err != nil) != tc.wantErr {
70 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr)
71 | }
72 |
73 | assert.Equal(t,
74 | tc.wantOutput,
75 | gotResult.Output,
76 | "got output %v, want output %v", gotResult.Output, tc.wantOutput,
77 | )
78 |
79 | if gotResult.Remaining != tc.wantRemaining {
80 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining)
81 | }
82 | })
83 | }
84 | }
85 |
86 | func BenchmarkCount(b *testing.B) {
87 | parser := Count(Char[string]('#'), 3)
88 |
89 | b.ResetTimer()
90 | for i := 0; i < b.N; i++ {
91 | parser("###")
92 | }
93 | }
94 |
95 | func TestMany0(t *testing.T) {
96 | t.Parallel()
97 |
98 | type args struct {
99 | p Parser[string, []rune]
100 | }
101 | testCases := []struct {
102 | name string
103 | args args
104 | input string
105 | wantErr bool
106 | wantOutput []rune
107 | wantRemaining string
108 | }{
109 | {
110 | name: "matching parser should succeed",
111 | input: "###",
112 | args: args{
113 | p: Many0(Char[string]('#')),
114 | },
115 | wantErr: false,
116 | wantOutput: []rune{'#', '#', '#'},
117 | wantRemaining: "",
118 | },
119 | {
120 | name: "no match should succeed",
121 | input: "abc",
122 | args: args{
123 | p: Many0(Char[string]('#')),
124 | },
125 | wantErr: false,
126 | wantOutput: []rune{},
127 | wantRemaining: "abc",
128 | },
129 | {
130 | name: "empty input should succeed",
131 | input: "",
132 | args: args{
133 | p: Many0(Char[string]('#')),
134 | },
135 | wantErr: false,
136 | wantOutput: []rune{},
137 | wantRemaining: "",
138 | },
139 | }
140 | for _, tc := range testCases {
141 | tc := tc
142 |
143 | t.Run(tc.name, func(t *testing.T) {
144 | t.Parallel()
145 |
146 | gotResult := tc.args.p(tc.input)
147 | if (gotResult.Err != nil) != tc.wantErr {
148 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr)
149 | }
150 |
151 | // testify makes it easier comparing slices
152 | assert.Equal(t,
153 | tc.wantOutput, gotResult.Output,
154 | "got output %v, want output %v", gotResult.Output, tc.wantOutput,
155 | )
156 |
157 | if gotResult.Remaining != tc.wantRemaining {
158 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining)
159 | }
160 | })
161 | }
162 | }
163 |
164 | func TestMany0DetectsInfiniteLoops(t *testing.T) {
165 | t.Parallel()
166 |
167 | // Digit0 accepts empty input, and would cause an infinite loop if not detected
168 | input := "abcdef"
169 | parser := Many0(Digit0[string]())
170 |
171 | result := parser(input)
172 |
173 | assert.Error(t, result.Err)
174 | assert.Nil(t, result.Output)
175 | assert.Equal(t, input, result.Remaining)
176 | }
177 |
178 | func BenchmarkMany0(b *testing.B) {
179 | parser := Many0(Char[string]('#'))
180 |
181 | b.ResetTimer()
182 | for i := 0; i < b.N; i++ {
183 | parser("###")
184 | }
185 | }
186 |
187 | func TestMany1(t *testing.T) {
188 | t.Parallel()
189 |
190 | type args struct {
191 | p Parser[string, []rune]
192 | }
193 | testCases := []struct {
194 | name string
195 | args args
196 | input string
197 | wantErr bool
198 | wantOutput []rune
199 | wantRemaining string
200 | }{
201 | {
202 | name: "matching parser should succeed",
203 | input: "###",
204 | args: args{
205 | p: Many1(Char[string]('#')),
206 | },
207 | wantErr: false,
208 | wantOutput: []rune{'#', '#', '#'},
209 | wantRemaining: "",
210 | },
211 | {
212 | name: "matching at least once should succeed",
213 | input: "#abc",
214 | args: args{
215 | p: Many1(Char[string]('#')),
216 | },
217 | wantErr: false,
218 | wantOutput: []rune{'#'},
219 | wantRemaining: "abc",
220 | },
221 | {
222 | name: "not matching at least once should fail",
223 | input: "a##",
224 | args: args{
225 | p: Many1(Char[string]('#')),
226 | },
227 | wantErr: true,
228 | wantOutput: nil,
229 | wantRemaining: "a##",
230 | },
231 | {
232 | name: "no match should fail",
233 | input: "abc",
234 | args: args{
235 | p: Many1(Char[string]('#')),
236 | },
237 | wantErr: true,
238 | wantOutput: nil,
239 | wantRemaining: "abc",
240 | },
241 | {
242 | name: "empty input should fail",
243 | input: "",
244 | args: args{
245 | p: Many1(Char[string]('#')),
246 | },
247 | wantErr: true,
248 | wantOutput: nil,
249 | wantRemaining: "",
250 | },
251 | }
252 | for _, tc := range testCases {
253 | tc := tc
254 |
255 | t.Run(tc.name, func(t *testing.T) {
256 | t.Parallel()
257 |
258 | gotResult := tc.args.p(tc.input)
259 | if (gotResult.Err != nil) != tc.wantErr {
260 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr)
261 | }
262 |
263 | // testify makes it easier comparing slices
264 | assert.Equal(t,
265 | tc.wantOutput, gotResult.Output,
266 | "got output %v, want output %v", gotResult.Output, tc.wantOutput,
267 | )
268 |
269 | if gotResult.Remaining != tc.wantRemaining {
270 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining)
271 | }
272 | })
273 | }
274 | }
275 |
276 | func TestMany1DetectsInfiniteLoops(t *testing.T) {
277 | t.Parallel()
278 |
279 | // Digit0 accepts empty input, and would cause an infinite loop if not detected
280 | input := "abcdef"
281 | parser := Many1(Digit0[string]())
282 |
283 | result := parser(input)
284 |
285 | assert.Error(t, result.Err)
286 | assert.Nil(t, result.Output)
287 | assert.Equal(t, input, result.Remaining)
288 | }
289 |
290 | func BenchmarkMany1(b *testing.B) {
291 | parser := Many1(Char[string]('#'))
292 |
293 | b.ResetTimer()
294 | for i := 0; i < b.N; i++ {
295 | parser("###")
296 | }
297 | }
298 |
299 | func TestSeparatedList0(t *testing.T) {
300 | t.Parallel()
301 |
302 | type args struct {
303 | p Parser[string, []string]
304 | }
305 | testCases := []struct {
306 | name string
307 | args args
308 | input string
309 | wantErr bool
310 | wantOutput []string
311 | wantRemaining string
312 | }{
313 | {
314 | name: "matching parser should succeed",
315 | input: "abc,abc,abc",
316 | args: args{
317 | p: SeparatedList0(Token[string]("abc"), Char[string](',')),
318 | },
319 | wantErr: false,
320 | wantOutput: []string{"abc", "abc", "abc"},
321 | wantRemaining: "",
322 | },
323 | {
324 | name: "matching parser and missing separator should succeed",
325 | input: "abc123abc",
326 | args: args{
327 | p: SeparatedList0(Token[string]("abc"), Char[string](',')),
328 | },
329 | wantErr: false,
330 | wantOutput: []string{"abc"},
331 | wantRemaining: "123abc",
332 | },
333 | {
334 | name: "parser with separator but non-matching right side should succeed",
335 | input: "abc,def",
336 | args: args{
337 | p: SeparatedList0(Token[string]("abc"), Char[string](',')),
338 | },
339 | wantErr: false,
340 | wantOutput: []string{"abc"},
341 | wantRemaining: ",def",
342 | },
343 | {
344 | name: "parser matching on the right of the separator should succeed",
345 | input: "def,abc",
346 | args: args{
347 | p: SeparatedList0(Token[string]("abc"), Char[string](',')),
348 | },
349 | wantErr: false,
350 | wantOutput: []string{},
351 | wantRemaining: "def,abc",
352 | },
353 | {
354 | name: "empty input should succeed",
355 | input: "",
356 | args: args{
357 | p: SeparatedList0(Token[string]("abc"), Char[string](',')),
358 | },
359 | wantErr: false,
360 | wantOutput: []string{},
361 | wantRemaining: "",
362 | },
363 | {
364 | name: "parsing input without separator should succeed",
365 | input: "123",
366 | args: args{
367 | p: SeparatedList0(Digit0[string](), Char[string](',')),
368 | },
369 | wantErr: false,
370 | wantOutput: []string{"123"},
371 | wantRemaining: "",
372 | },
373 | {
374 | name: "using a parser accepting empty input should fail",
375 | input: "",
376 | args: args{
377 | p: SeparatedList0(Digit0[string](), Char[string](',')),
378 | },
379 | wantErr: true,
380 | wantOutput: nil,
381 | wantRemaining: "",
382 | },
383 | }
384 | for _, tc := range testCases {
385 | tc := tc
386 |
387 | t.Run(tc.name, func(t *testing.T) {
388 | t.Parallel()
389 |
390 | gotResult := tc.args.p(tc.input)
391 | if (gotResult.Err != nil) != tc.wantErr {
392 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr)
393 | }
394 |
395 | // testify makes it easier comparing slices
396 | assert.Equal(t,
397 | tc.wantOutput, gotResult.Output,
398 | "got output %v, want output %v", gotResult.Output, tc.wantOutput,
399 | )
400 |
401 | if gotResult.Remaining != tc.wantRemaining {
402 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining)
403 | }
404 | })
405 | }
406 | }
407 |
408 | func BenchmarkSeparatedList0(t *testing.B) {
409 | parser := SeparatedList0(Char[string]('#'), Char[string](','))
410 |
411 | t.ResetTimer()
412 | for i := 0; i < t.N; i++ {
413 | parser("#,#,#")
414 | }
415 | }
416 |
417 | func TestSeparatedList1(t *testing.T) {
418 | t.Parallel()
419 |
420 | type args struct {
421 | p Parser[string, []string]
422 | }
423 | testCases := []struct {
424 | name string
425 | args args
426 | input string
427 | wantErr bool
428 | wantOutput []string
429 | wantRemaining string
430 | }{
431 | {
432 | name: "matching parser should succeed",
433 | input: "abc,abc,abc",
434 | args: args{
435 | p: SeparatedList1(Token[string]("abc"), Char[string](',')),
436 | },
437 | wantErr: false,
438 | wantOutput: []string{"abc", "abc", "abc"},
439 | wantRemaining: "",
440 | },
441 | {
442 | name: "matching parser and missing separator should succeed",
443 | input: "abc123abc",
444 | args: args{
445 | p: SeparatedList1(Token[string]("abc"), Char[string](',')),
446 | },
447 | wantErr: false,
448 | wantOutput: []string{"abc"},
449 | wantRemaining: "123abc",
450 | },
451 | {
452 | name: "parser with separator but non-matching right side should succeed",
453 | input: "abc,def",
454 | args: args{
455 | p: SeparatedList1(Token[string]("abc"), Char[string](',')),
456 | },
457 | wantErr: false,
458 | wantOutput: []string{"abc"},
459 | wantRemaining: ",def",
460 | },
461 | {
462 | name: "parser matching on the right of the separator should succeed",
463 | input: "def,abc",
464 | args: args{
465 | p: SeparatedList1(Token[string]("abc"), Char[string](',')),
466 | },
467 | wantErr: true,
468 | wantOutput: nil,
469 | wantRemaining: "def,abc",
470 | },
471 | {
472 | name: "empty input should fail",
473 | input: "",
474 | args: args{
475 | p: SeparatedList1(Token[string]("abc"), Char[string](',')),
476 | },
477 | wantErr: true,
478 | wantOutput: nil,
479 | wantRemaining: "",
480 | },
481 | }
482 | for _, tc := range testCases {
483 | tc := tc
484 |
485 | t.Run(tc.name, func(t *testing.T) {
486 | t.Parallel()
487 |
488 | gotResult := tc.args.p(tc.input)
489 | if (gotResult.Err != nil) != tc.wantErr {
490 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr)
491 | }
492 |
493 | // testify makes it easier comparing slices
494 | assert.Equal(t,
495 | tc.wantOutput, gotResult.Output,
496 | "got output %v, want output %v", gotResult.Output, tc.wantOutput,
497 | )
498 |
499 | if gotResult.Remaining != tc.wantRemaining {
500 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining)
501 | }
502 | })
503 | }
504 | }
505 |
506 | func BenchmarkSeparatedList1(t *testing.B) {
507 | parser := SeparatedList1(Char[string]('#'), Char[string](','))
508 |
509 | t.ResetTimer()
510 | for i := 0; i < t.N; i++ {
511 | parser("#,#,#")
512 | }
513 | }
514 |
--------------------------------------------------------------------------------
/numbers.go:
--------------------------------------------------------------------------------
1 | package gomme
2 |
3 | // import "math"
4 |
5 | // Float parses a sequence of numerical characters into a float64.
6 | // The '.' character is used as the optional decimal delimiter. Any
7 | // number without a decimal part will still be parsed as a float64.
8 | //
9 | // N.B: it is not the parser's role to make sure the floating point
10 | // number you're attempting to parse fits into a 64 bits float.
11 |
12 | // func Float[I Bytes]() Parser[I, float64] {
13 | // digitsParser := TakeWhileOneOf[I]([]rune("0123456789")...)
14 | // minusParser := Char[I]('-')
15 | // dotParser := Char[I]('.')
16 |
17 | // return func(input I) Result[float64, I] {
18 | // var negative bool
19 |
20 | // minusresult := minusParser(input)
21 | // if result.Err == nil {
22 | // negative = true
23 | // }
24 |
25 | // result = digitsParser(result.Remaining)
26 | // // result = Expect(digitsParser, "digits")(result.Remaining)
27 | // // if result.Err != nil {
28 | // // return result
29 | // // }
30 |
31 | // parsed, ok := result.Output.(string)
32 | // if !ok {
33 | // err := fmt.Errorf("failed parsing floating point value; " +
34 | // "reason: converting Float() parser result's output to string failed",
35 | // )
36 | // return Failure(NewFatalError(input, err, "float"), input)
37 | // }
38 | // if resultTest := dotParser(result.Remaining); resultTest.Err == nil {
39 | // if resultTest = digitsParser(resultTest.Remaining); resultTest.Err == nil {
40 | // parsed = parsed + "." + resultTest.Output.(string)
41 | // result = resultTest
42 | // }
43 | // }
44 |
45 | // floatingPointValue, err := strconv.ParseFloat(parsed, 64)
46 | // if err != nil {
47 | // err = fmt.Errorf("failed to parse '%v' as float; reason: %w", parsed, err)
48 | // return Failure(NewFatalError(input, err), input)
49 | // }
50 |
51 | // if negative {
52 | // floatingPointValue = -floatingPointValue
53 | // }
54 |
55 | // result.Output = floatingPointValue
56 |
57 | // return result
58 | // }
59 | // }
60 |
--------------------------------------------------------------------------------
/sequence.go:
--------------------------------------------------------------------------------
1 | package gomme
2 |
3 | // Delimited parses and discards the result from the prefix parser, then
4 | // parses the result of the main parser, and finally parses and discards
5 | // the result of the suffix parser.
6 | func Delimited[I Bytes, OP, O, OS any](prefix Parser[I, OP], parser Parser[I, O], suffix Parser[I, OS]) Parser[I, O] {
7 | return func(input I) Result[O, I] {
8 | return Terminated(Preceded(prefix, parser), suffix)(input)
9 | }
10 | }
11 |
12 | // Pair applies two parsers and returns a Result containing a pair container holding
13 | // the resulting values.
14 | func Pair[I Bytes, LO, RO any, LP Parser[I, LO], RP Parser[I, RO]](
15 | leftParser LP, rightParser RP,
16 | ) Parser[I, PairContainer[LO, RO]] {
17 | return func(input I) Result[PairContainer[LO, RO], I] {
18 | leftResult := leftParser(input)
19 | if leftResult.Err != nil {
20 | return Failure[I, PairContainer[LO, RO]](NewError(input, "Pair"), input)
21 | }
22 |
23 | rightResult := rightParser(leftResult.Remaining)
24 | if rightResult.Err != nil {
25 | return Failure[I, PairContainer[LO, RO]](NewError(input, "Pair"), input)
26 | }
27 |
28 | return Success(PairContainer[LO, RO]{leftResult.Output, rightResult.Output}, rightResult.Remaining)
29 | }
30 | }
31 |
32 | // Preceded parses and discards a result from the prefix parser. It
33 | // then parses a result from the main parser and returns its result.
34 | //
35 | // Preceded is effectively equivalent to applying DiscardAll(prefix),
36 | // and then applying the main parser.
37 | func Preceded[I Bytes, OP, O any](prefix Parser[I, OP], parser Parser[I, O]) Parser[I, O] {
38 | return func(input I) Result[O, I] {
39 | prefixResult := prefix(input)
40 | if prefixResult.Err != nil {
41 | return Failure[I, O](prefixResult.Err, input)
42 | }
43 |
44 | result := parser(prefixResult.Remaining)
45 | if result.Err != nil {
46 | return Failure[I, O](result.Err, input)
47 | }
48 |
49 | return Success(result.Output, result.Remaining)
50 | }
51 | }
52 |
53 | // SeparatedPair applies two separated parsers and returns a Result containing a slice of
54 | // size 2 as its output. The first element of the slice is the result of the left parser,
55 | // and the second element is the result of the right parser. The result of the separator parser
56 | // is discarded.
57 | func SeparatedPair[I Bytes, LO, RO any, S Separator, LP Parser[I, LO], SP Parser[I, S], RP Parser[I, RO]](
58 | leftParser LP, separator SP, rightParser RP,
59 | ) Parser[I, PairContainer[LO, RO]] {
60 | return func(input I) Result[PairContainer[LO, RO], I] {
61 | leftResult := leftParser(input)
62 | if leftResult.Err != nil {
63 | return Failure[I, PairContainer[LO, RO]](NewError(input, "SeparatedPair"), input)
64 | }
65 |
66 | sepResult := separator(leftResult.Remaining)
67 | if sepResult.Err != nil {
68 | return Failure[I, PairContainer[LO, RO]](NewError(input, "SeparatedPair"), input)
69 | }
70 |
71 | rightResult := rightParser(sepResult.Remaining)
72 | if rightResult.Err != nil {
73 | return Failure[I, PairContainer[LO, RO]](NewError(input, "SeparatedPair"), input)
74 | }
75 |
76 | return Success(PairContainer[LO, RO]{leftResult.Output, rightResult.Output}, rightResult.Remaining)
77 | }
78 | }
79 |
80 | // Sequence applies a sequence of parsers and returns either a
81 | // slice of results or an error if any parser fails.
82 | func Sequence[I Bytes, O any](parsers ...Parser[I, O]) Parser[I, []O] {
83 | return func(input I) Result[[]O, I] {
84 | remaining := input
85 | outputs := make([]O, 0, len(parsers))
86 |
87 | for _, parser := range parsers {
88 | res := parser(remaining)
89 | if res.Err != nil {
90 | return Failure[I, []O](res.Err, input)
91 | }
92 |
93 | outputs = append(outputs, res.Output)
94 | remaining = res.Remaining
95 | }
96 |
97 | return Success(outputs, remaining)
98 | }
99 | }
100 |
101 | // Terminated parses a result from the main parser, it then
102 | // parses the result from the suffix parser and discards it; only
103 | // returning the result of the main parser.
104 | func Terminated[I Bytes, O, OS any](parser Parser[I, O], suffix Parser[I, OS]) Parser[I, O] {
105 | return func(input I) Result[O, I] {
106 | result := parser(input)
107 | if result.Err != nil {
108 | return Failure[I, O](result.Err, input)
109 | }
110 |
111 | suffixResult := suffix(result.Remaining)
112 | if suffixResult.Err != nil {
113 | return Failure[I, O](suffixResult.Err, input)
114 | }
115 |
116 | return Success(result.Output, suffixResult.Remaining)
117 | }
118 | }
119 |
--------------------------------------------------------------------------------
/sequence_test.go:
--------------------------------------------------------------------------------
1 | package gomme
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/stretchr/testify/assert"
7 | )
8 |
9 | func TestDelimited(t *testing.T) {
10 | t.Parallel()
11 |
12 | type args struct {
13 | p Parser[string, string]
14 | }
15 | testCases := []struct {
16 | name string
17 | args args
18 | input string
19 | wantErr bool
20 | wantOutput string
21 | wantRemaining string
22 | }{
23 | {
24 | name: "matching parser should succeed",
25 | input: "+1\r\n",
26 | args: args{
27 | p: Delimited(Char[string]('+'), Digit1[string](), CRLF[string]()),
28 | },
29 | wantErr: false,
30 | wantOutput: "1",
31 | wantRemaining: "",
32 | },
33 | {
34 | name: "no prefix match should fail",
35 | input: "1\r\n",
36 | args: args{
37 | p: Delimited(Char[string]('+'), Digit1[string](), CRLF[string]()),
38 | },
39 | wantErr: true,
40 | wantOutput: "",
41 | wantRemaining: "1\r\n",
42 | },
43 | {
44 | name: "no parser match should fail",
45 | input: "+\r\n",
46 | args: args{
47 | p: Delimited(Char[string]('+'), Digit1[string](), CRLF[string]()),
48 | },
49 | wantErr: true,
50 | wantOutput: "",
51 | wantRemaining: "+\r\n",
52 | },
53 | {
54 | name: "no suffix match should fail",
55 | input: "+1",
56 | args: args{
57 | p: Delimited(Char[string]('+'), Digit1[string](), CRLF[string]()),
58 | },
59 | wantErr: true,
60 | wantOutput: "",
61 | wantRemaining: "+1",
62 | },
63 | {
64 | name: "empty input should fail",
65 | input: "",
66 | args: args{
67 | p: Delimited(Char[string]('+'), Digit1[string](), CRLF[string]()),
68 | },
69 | wantErr: true,
70 | wantOutput: "",
71 | wantRemaining: "",
72 | },
73 | }
74 | for _, tc := range testCases {
75 | tc := tc
76 |
77 | t.Run(tc.name, func(t *testing.T) {
78 | t.Parallel()
79 |
80 | gotResult := tc.args.p(tc.input)
81 | if (gotResult.Err != nil) != tc.wantErr {
82 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr)
83 | }
84 |
85 | if gotResult.Output != tc.wantOutput {
86 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput)
87 | }
88 |
89 | if gotResult.Remaining != tc.wantRemaining {
90 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining)
91 | }
92 | })
93 | }
94 | }
95 |
96 | func BenchmarkDelimited(b *testing.B) {
97 | parser := Delimited(Char[string]('+'), Digit1[string](), CRLF[string]())
98 |
99 | b.ResetTimer()
100 | for i := 0; i < b.N; i++ {
101 | parser("+1\r\n")
102 | }
103 | }
104 |
105 | func TestPair(t *testing.T) {
106 | t.Parallel()
107 |
108 | type args struct {
109 | leftParser Parser[string, string]
110 | rightParser Parser[string, string]
111 | }
112 | testCases := []struct {
113 | name string
114 | args args
115 | input string
116 | wantErr bool
117 | wantOutput PairContainer[string, string]
118 | wantRemaining string
119 | }{
120 | {
121 | name: "matching parsers should succeed",
122 | input: "1abc\r\n",
123 | args: args{
124 | leftParser: Digit1[string](),
125 | rightParser: TakeUntil(CRLF[string]()),
126 | },
127 | wantErr: false,
128 | wantOutput: PairContainer[string, string]{"1", "abc"},
129 | wantRemaining: "\r\n",
130 | },
131 | {
132 | name: "matching left parser, failing right parser, should fail",
133 | input: "1abc",
134 | args: args{
135 | leftParser: Digit1[string](),
136 | rightParser: TakeWhileOneOf[string]('d', 'e', 'f'),
137 | },
138 | wantErr: true,
139 | wantOutput: PairContainer[string, string]{},
140 | wantRemaining: "1abc",
141 | },
142 | {
143 | name: "failing left parser, matching right parser, should fail",
144 | input: "adef",
145 | args: args{
146 | leftParser: Digit1[string](),
147 | rightParser: TakeWhileOneOf[string]('d', 'e', 'f'),
148 | },
149 | wantErr: true,
150 | wantOutput: PairContainer[string, string]{},
151 | wantRemaining: "adef",
152 | },
153 | {
154 | name: "failing left parser, failing right parser, should fail",
155 | input: "123",
156 | args: args{
157 | leftParser: Digit1[string](),
158 | rightParser: TakeWhileOneOf[string]('d', 'e', 'f'),
159 | },
160 | wantErr: true,
161 | wantOutput: PairContainer[string, string]{},
162 | wantRemaining: "123",
163 | },
164 | }
165 | for _, tc := range testCases {
166 | tc := tc
167 |
168 | t.Run(tc.name, func(t *testing.T) {
169 | t.Parallel()
170 |
171 | parser := Pair(tc.args.leftParser, tc.args.rightParser)
172 |
173 | gotResult := parser(tc.input)
174 | if (gotResult.Err != nil) != tc.wantErr {
175 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr)
176 | }
177 |
178 | if gotResult.Output != tc.wantOutput {
179 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput)
180 | }
181 |
182 | if gotResult.Remaining != tc.wantRemaining {
183 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining)
184 | }
185 | })
186 | }
187 | }
188 |
189 | func BenchmarkPair(b *testing.B) {
190 | parser := Pair(Digit1[string](), TakeUntil(CRLF[string]()))
191 |
192 | b.ResetTimer()
193 | for i := 0; i < b.N; i++ {
194 | parser("1abc\r\n")
195 | }
196 | }
197 |
198 | func TestPreceded(t *testing.T) {
199 | t.Parallel()
200 |
201 | type args struct {
202 | p Parser[string, string]
203 | }
204 | testCases := []struct {
205 | name string
206 | args args
207 | input string
208 | wantErr bool
209 | wantOutput string
210 | wantRemaining string
211 | }{
212 | {
213 | name: "matching parser should succeed",
214 | input: "+123",
215 | args: args{
216 | p: Preceded(Char[string]('+'), Digit1[string]()),
217 | },
218 | wantErr: false,
219 | wantOutput: "123",
220 | wantRemaining: "",
221 | },
222 | {
223 | name: "no prefix match should fail",
224 | input: "+123",
225 | args: args{
226 | p: Preceded(Char[string]('-'), Digit1[string]()),
227 | },
228 | wantErr: true,
229 | wantOutput: "",
230 | wantRemaining: "+123",
231 | },
232 | {
233 | name: "no parser match should succeed",
234 | input: "+",
235 | args: args{
236 | p: Preceded(Char[string]('+'), Digit1[string]()),
237 | },
238 | wantErr: true,
239 | wantOutput: "",
240 | wantRemaining: "+",
241 | },
242 | {
243 | name: "empty input should fail",
244 | input: "",
245 | args: args{
246 | p: Preceded(Char[string]('+'), Digit1[string]()),
247 | },
248 | wantErr: true,
249 | wantOutput: "",
250 | wantRemaining: "",
251 | },
252 | }
253 | for _, tc := range testCases {
254 | tc := tc
255 |
256 | t.Run(tc.name, func(t *testing.T) {
257 | t.Parallel()
258 |
259 | gotResult := tc.args.p(tc.input)
260 | if (gotResult.Err != nil) != tc.wantErr {
261 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr)
262 | }
263 |
264 | if gotResult.Output != tc.wantOutput {
265 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput)
266 | }
267 |
268 | if gotResult.Remaining != tc.wantRemaining {
269 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining)
270 | }
271 | })
272 | }
273 | }
274 |
275 | func BenchmarkPreceded(b *testing.B) {
276 | parser := Preceded(Char[string]('+'), Digit1[string]())
277 |
278 | b.ResetTimer()
279 | for i := 0; i < b.N; i++ {
280 | parser("+123")
281 | }
282 | }
283 |
284 | func TestSeparatedPair(t *testing.T) {
285 | t.Parallel()
286 |
287 | type args struct {
288 | leftParser Parser[string, string]
289 | separatorParser Parser[string, rune]
290 | rightParser Parser[string, string]
291 | }
292 | testCases := []struct {
293 | name string
294 | args args
295 | input string
296 | wantErr bool
297 | wantOutput PairContainer[string, string]
298 | wantRemaining string
299 | }{
300 | // { true, true, true }
301 | {
302 | name: "matching parsers should succeed",
303 | input: "1|abc\r\n",
304 | args: args{
305 | leftParser: Digit1[string](),
306 | separatorParser: Char[string]('|'),
307 | rightParser: TakeUntil(CRLF[string]()),
308 | },
309 | wantErr: false,
310 | wantOutput: PairContainer[string, string]{"1", "abc"},
311 | wantRemaining: "\r\n",
312 | },
313 | // { true, true, false }
314 | {
315 | name: "matching left parser, matching separator, failing right parser, should fail",
316 | input: "1|abc",
317 | args: args{
318 | leftParser: Digit1[string](),
319 | separatorParser: Char[string]('|'),
320 | rightParser: TakeWhileOneOf[string]('d', 'e', 'f'),
321 | },
322 | wantErr: true,
323 | wantOutput: PairContainer[string, string]{},
324 | wantRemaining: "1|abc",
325 | },
326 | // { true, false, true }
327 | {
328 | name: "matching left parser, failing separator, matching right parser, should fail",
329 | input: "1^abc",
330 | args: args{
331 | leftParser: Digit1[string](),
332 | separatorParser: Char[string]('|'),
333 | rightParser: TakeWhileOneOf[string]('a', 'b', 'c'),
334 | },
335 | wantErr: true,
336 | wantOutput: PairContainer[string, string]{},
337 | wantRemaining: "1^abc",
338 | },
339 | // { true, false, false }
340 | {
341 | name: "matching left parser, failing separator, failing right parser, should fail",
342 | input: "1^abc",
343 | args: args{
344 | leftParser: Digit1[string](),
345 | separatorParser: Char[string]('|'),
346 | rightParser: TakeWhileOneOf[string]('d', 'e', 'f'),
347 | },
348 | wantErr: true,
349 | wantOutput: PairContainer[string, string]{},
350 | wantRemaining: "1^abc",
351 | },
352 | // { false, true, true }
353 | {
354 | name: "failing left parser, matching separator, matching right parser, should fail",
355 | input: "a|def",
356 | args: args{
357 | leftParser: Digit1[string](),
358 | separatorParser: Char[string]('|'),
359 | rightParser: TakeWhileOneOf[string]('d', 'e', 'f'),
360 | },
361 | wantErr: true,
362 | wantOutput: PairContainer[string, string]{},
363 | wantRemaining: "a|def",
364 | },
365 | // { false, true, false }
366 | {
367 | name: "failing left parser, matching separator, failing right parser, should fail",
368 | input: "a|123",
369 | args: args{
370 | leftParser: Digit1[string](),
371 | separatorParser: Char[string]('|'),
372 | rightParser: TakeWhileOneOf[string]('d', 'e', 'f'),
373 | },
374 | wantErr: true,
375 | wantOutput: PairContainer[string, string]{},
376 | wantRemaining: "a|123",
377 | },
378 | // { false, false, true }
379 | {
380 | name: "failing left parser, failing separator, matching right parser, should fail",
381 | input: "a^def",
382 | args: args{
383 | leftParser: Digit1[string](),
384 | separatorParser: Char[string]('|'),
385 | rightParser: TakeWhileOneOf[string]('d', 'e', 'f'),
386 | },
387 | wantErr: true,
388 | wantOutput: PairContainer[string, string]{},
389 | wantRemaining: "a^def",
390 | },
391 | // { false, false, false }
392 | {
393 | name: "failing left parser, failing separator, failing right parser, should fail",
394 | input: "a^123",
395 | args: args{
396 | leftParser: Digit1[string](),
397 | separatorParser: Char[string]('|'),
398 | rightParser: TakeWhileOneOf[string]('d', 'e', 'f'),
399 | },
400 | wantErr: true,
401 | wantOutput: PairContainer[string, string]{},
402 | wantRemaining: "a^123",
403 | },
404 | }
405 | for _, tc := range testCases {
406 | tc := tc
407 |
408 | t.Run(tc.name, func(t *testing.T) {
409 | t.Parallel()
410 |
411 | parser := SeparatedPair(tc.args.leftParser, tc.args.separatorParser, tc.args.rightParser)
412 |
413 | gotResult := parser(tc.input)
414 | if (gotResult.Err != nil) != tc.wantErr {
415 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr)
416 | }
417 |
418 | if gotResult.Output != tc.wantOutput {
419 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput)
420 | }
421 |
422 | if gotResult.Remaining != tc.wantRemaining {
423 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining)
424 | }
425 | })
426 | }
427 | }
428 |
429 | func BenchmarkSeparatedPair(b *testing.B) {
430 | parser := SeparatedPair(Digit1[string](), Char[string]('|'), TakeUntil(CRLF[string]()))
431 |
432 | b.ResetTimer()
433 | for i := 0; i < b.N; i++ {
434 | parser("1|abc\r\n")
435 | }
436 | }
437 |
438 | func TestSequence(t *testing.T) {
439 | t.Parallel()
440 |
441 | type args struct {
442 | p Parser[string, []string]
443 | }
444 | testCases := []struct {
445 | name string
446 | args args
447 | input string
448 | wantErr bool
449 | wantOutput []string
450 | wantRemaining string
451 | }{
452 | {
453 | name: "matching parsers should succeed",
454 | input: "1a3",
455 | args: args{
456 | p: Sequence(Digit1[string](), Alpha0[string](), Digit1[string]()),
457 | },
458 | wantErr: false,
459 | wantOutput: []string{"1", "a", "3"},
460 | wantRemaining: "",
461 | },
462 | {
463 | name: "matching parsers in longer input should succeed",
464 | input: "1a3bcd",
465 | args: args{
466 | p: Sequence(Digit1[string](), Alpha0[string](), Digit1[string]()),
467 | },
468 | wantErr: false,
469 | wantOutput: []string{"1", "a", "3"},
470 | wantRemaining: "bcd",
471 | },
472 | {
473 | name: "partially matching parsers should fail",
474 | input: "1a3",
475 | args: args{
476 | p: Sequence(Digit1[string](), Digit1[string](), Digit1[string]()),
477 | },
478 | wantErr: true,
479 | wantOutput: nil,
480 | wantRemaining: "1a3",
481 | },
482 | {
483 | name: "too short input should fail",
484 | input: "12",
485 | args: args{
486 | p: Sequence(Digit1[string](), Digit1[string](), Digit1[string]()),
487 | },
488 | wantErr: true,
489 | wantOutput: nil,
490 | wantRemaining: "12",
491 | },
492 | {
493 | name: "empty input should succeed",
494 | input: "",
495 | args: args{
496 | p: Sequence(Digit1[string](), Digit1[string](), Digit1[string]()),
497 | },
498 | wantErr: true,
499 | wantOutput: nil,
500 | wantRemaining: "",
501 | },
502 | }
503 | for _, tc := range testCases {
504 | tc := tc
505 |
506 | t.Run(tc.name, func(t *testing.T) {
507 | t.Parallel()
508 |
509 | gotResult := tc.args.p(tc.input)
510 | if (gotResult.Err != nil) != tc.wantErr {
511 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr)
512 | }
513 |
514 | // testify makes it easier comparing slices
515 | assert.Equal(t,
516 | tc.wantOutput, gotResult.Output,
517 | "got output %v, want output %v", gotResult.Output, tc.wantOutput,
518 | )
519 |
520 | if gotResult.Remaining != tc.wantRemaining {
521 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining)
522 | }
523 | })
524 | }
525 | }
526 |
527 | func BenchmarkSequence(b *testing.B) {
528 | parser := Sequence(Digit1[string](), Alpha0[string](), Digit1[string]())
529 |
530 | b.ResetTimer()
531 | for i := 0; i < b.N; i++ {
532 | parser("123")
533 | }
534 | }
535 |
536 | func TestTerminated(t *testing.T) {
537 | t.Parallel()
538 |
539 | type args struct {
540 | p Parser[string, string]
541 | }
542 | testCases := []struct {
543 | name string
544 | args args
545 | input string
546 | wantErr bool
547 | wantOutput string
548 | wantRemaining string
549 | }{
550 | {
551 | name: "matching parser should succeed",
552 | input: "1+23",
553 | args: args{
554 | p: Terminated(Digit1[string](), Char[string]('+')),
555 | },
556 | wantErr: false,
557 | wantOutput: "1",
558 | wantRemaining: "23",
559 | },
560 | {
561 | name: "no suffix match should fail",
562 | input: "1-23",
563 | args: args{
564 | p: Terminated(Digit1[string](), Char[string]('+')),
565 | },
566 | wantErr: true,
567 | wantOutput: "",
568 | wantRemaining: "1-23",
569 | },
570 | {
571 | name: "no parser match should succeed",
572 | input: "+",
573 | args: args{
574 | p: Terminated(Digit1[string](), Char[string]('+')),
575 | },
576 | wantErr: true,
577 | wantOutput: "",
578 | wantRemaining: "+",
579 | },
580 | {
581 | name: "empty input should fail",
582 | input: "",
583 | args: args{
584 | p: Terminated(Digit1[string](), Char[string]('+')),
585 | },
586 | wantErr: true,
587 | wantOutput: "",
588 | wantRemaining: "",
589 | },
590 | }
591 | for _, tc := range testCases {
592 | tc := tc
593 |
594 | t.Run(tc.name, func(t *testing.T) {
595 | t.Parallel()
596 |
597 | gotResult := tc.args.p(tc.input)
598 | if (gotResult.Err != nil) != tc.wantErr {
599 | t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr)
600 | }
601 |
602 | if gotResult.Output != tc.wantOutput {
603 | t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput)
604 | }
605 |
606 | if gotResult.Remaining != tc.wantRemaining {
607 | t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining)
608 | }
609 | })
610 | }
611 | }
612 |
613 | func BenchmarkTerminated(b *testing.B) {
614 | parser := Terminated(Digit1[string](), Char[string]('+'))
615 |
616 | b.ResetTimer()
617 | for i := 0; i < b.N; i++ {
618 | parser("123+")
619 | }
620 | }
621 |
--------------------------------------------------------------------------------