├── .gitignore
├── README.md
├── ROADMAP.org
├── doc
    └── guide.markdown
├── project.clj
├── src
    ├── clj
    │   ├── parsatron
    │   │   └── languages
    │   │   │   ├── bencode.clj
    │   │   │   └── bf.clj
    │   └── the
    │   │   └── parsatron.clj
    └── cljs
    │   └── the
    │       └── parsatron.cljs
└── test
    ├── clj
        └── parsatron
        │   ├── languages
        │       ├── test_bencode.clj
        │       └── test_bf.clj
        │   ├── test.clj
        │   └── test_trampoline.clj
    ├── cljs
        └── parsatron
        │   └── test.cljs
    └── resources
        └── parsatron_test.html


/.gitignore:
--------------------------------------------------------------------------------
 1 | /target
 2 | /classes
 3 | /checkouts
 4 | pom.xml
 5 | pom.xml.asc
 6 | *.jar
 7 | *.class
 8 | /.lein-*
 9 | /.nrepl-port
10 | test/resources/parsatron_test.js


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # The Parsatron
  2 | 
  3 | Born from Haskell's Parsec library, The Parsatron is a functional
  4 | parser library. The Parsatron provides a lot of very small functions that can
  5 | be combined into larger ones to very quickly write parsers for languages.
  6 | 
  7 | Like all parser combinator libraries, The Parsatron produces recursive-descent
  8 | parsers that are best suited for LL(1) grammars. However, The Parsatron offers
  9 | *infinite lookahead* which means you can try and parse any insane thing you'd
 10 | like and if it doesn't work out, fall back to where you started. It's a feature
 11 | that's worked out well for others. I'm sure you'll find something useful to do
 12 | with it.
 13 | 
 14 | 
 15 | ## Installation
 16 | 
 17 | You can use The Parsatron by including
 18 | 
 19 |     [the/parsatron "0.0.8"]
 20 | 
 21 | in your `project.clj` dependencies. It's available for download from Clojars.
 22 | 
 23 | ## ClojureScript Support
 24 | 
 25 | The Parsatron has been ported to ClojureScript and is included in the
 26 | library distribution. There are a few small differences between ClojureScript
 27 | and Clojure that follow The Parsatron into the browser, namely:
 28 | 
 29 | * The Parsatron makes liberal use of macros: `>>`, `let->>` and
 30 |   `defparser` must be included via `:require-macros`
 31 | * ClojureScript has no notion of a character type like Clojure does.
 32 |   The Parsatron considers Strings of length 1 to be characters
 33 | 
 34 | The Parsatron's ClojureScript tests can be run by first building them:
 35 | 
 36 |     lein cljsbuild once
 37 | 
 38 | and then opening the html file [test/resources/parsatron_test.html](test/resources/parsatron_test.html)
 39 | 
 40 | ## Usage
 41 | 
 42 | A basic syntax checker for a certain profane esoteric programming language could
 43 | be defined as follows:
 44 | 
 45 |     (defparser instruction []
 46 |       (choice (char \>)
 47 |               (char \<)
 48 |               (char \+)
 49 |               (char \-)
 50 |               (char \.)
 51 |               (char \,)
 52 |               (between (char \[) (char \]) (many (instruction)))))
 53 | 
 54 |     (defparser bf []
 55 |       (many (instruction))
 56 |       (eof))
 57 | 
 58 | The `defparser` forms create new parsers that you can combine into other, more
 59 | complex parsers. As you can see in this example, those parsers can be recursive.
 60 | 
 61 | The `choice`, `char`, `between` and `many` functions you see are themselves
 62 | combinators, provided *gratis* by the library. Some, like `choice`, `many`, and
 63 | `between`, take parsers as arguments and return you a new one, wholly different,
 64 | but exhibiting eerily familiar behavior. Some, like `char`, take less exotic input
 65 | (in this case, a humble character) and return more basic parsers, that perform
 66 | what is asked of them without hestitation or spite.
 67 | 
 68 | You execute a parser over some input via the `run` form.
 69 | 
 70 |     (run (bf) ",>++++++[<-------->-],[<+>-]<.")
 71 | 
 72 | Currently, The Parsatron only provides character-oriented parsers, but the ideas
 73 | it's built on are powerful enough that with the right series of commits, it can
 74 | be made to run over sequence of arbitrary "tokens". Clojure's handling of
 75 | sequences and sequence-like things is a feature deeply ingrained in the language's
 76 | ethos. Look for expansion in this area.
 77 | 
 78 | * * * * *
 79 | 
 80 | Beyond just verifying that a string is a valid member of some language, The
 81 | Parsatron offers you facilities for interacting with and operating on the things
 82 | you parse via sequencing of multiple parsers and binding their results. The
 83 | macros `>>` and `let->>` embody this facility.
 84 | 
 85 | As an example, [bencoded strings](http://en.wikipedia.org/wiki/Bencode) are prefixed by their length and a colon:
 86 | 
 87 |     (defparser ben-string []
 88 |       (let->> [length (integer)]
 89 |         (>> (char \:)
 90 |             (times length (any-char)))))
 91 | 
 92 | `let->>` allows you to capture and name the result of a parser so it's value may
 93 | be used later. `>>` is very similar to Clojure's `do` in that it executes it's
 94 | forms in order, but "throws away" all but the value of the last form.
 95 | 
 96 |     (run (ben-string) "4:spam") ;; => [\s \p \a \m]
 97 | 
 98 | ## License
 99 | 
100 | Copyright (C) 2011 Nate Young
101 | 
102 | Distributed under the Eclipse Public License, the same as Clojure.
103 | 


--------------------------------------------------------------------------------
/ROADMAP.org:
--------------------------------------------------------------------------------
 1 | * 0.1.0
 2 | ** baseline sane error messages
 3 | * 0.4.0
 4 | ** website with documentation
 5 | at the least move github issue discussions into coherent pages
 6 | marginalia?
 7 | list of other good resources for learning parser combinators
 8 | * 1.0
 9 | ** grammar support
10 | put in a grammar in a BNF format and get back parsatron-implemented parsers
11 | ** 5-7 non-trivial example parsers
12 | at least one using a lexer + parsatron over a seq of lexical tokens, not chars
13 | ** basic compile-time parser optimizations where possible
14 | ** unified clj+cljs codebase + host-environment specific files
15 | low-magic build process
16 | one command to run both clj & cljs tests
17 | 


--------------------------------------------------------------------------------
/doc/guide.markdown:
--------------------------------------------------------------------------------
  1 | A Guide to the Parsatron
  2 | ========================
  3 | 
  4 | The Parsatron is a library for building parsers for languages.  For an overview
  5 | of how it works internally you can watch [this talk][talk].
  6 | 
  7 | This document will show you the basics of how to use the Parsatron as an end
  8 | user.
  9 | 
 10 | [talk]: http://www.infoq.com/presentations/Parser-Combinators
 11 | 
 12 | Importing
 13 | ---------
 14 | 
 15 | Assuming you have the library installed, you can grab all the things you'll
 16 | need by using it:
 17 | 
 18 |     (ns myparser.core
 19 |       (:refer-clojure :exclude [char])
 20 |       (:use [the.parsatron]))
 21 | 
 22 | Notice the exclusion of `clojure.core/char`, which would otherwise collide with
 23 | the `char` imported from the parsatron.
 24 | 
 25 | You can, of course, use `:only` if you want, though that can get tedious very
 26 | quickly.
 27 | 
 28 | Running
 29 | -------
 30 | 
 31 | Let's see how to run a basic parser.  It won't do much, but it will get
 32 | something on the screen so we can try things as we go.  Assuming you've got
 33 | everything imported:
 34 | 
 35 |     (run (char \H) "Hello, world!")
 36 |     ; \H
 37 | 
 38 | The `run` function takes a parser and some input, runs the parser on that
 39 | input, and returns the result.
 40 | 
 41 | The parser we passed here was `(char \H)`.  We'll talk more about parsers in a
 42 | second, but for now just know that it's a parser that will parse a single "H"
 43 | character.
 44 | 
 45 | Notice that it only parsed the first character, and even though there was more
 46 | left it still successfully returned.  We'll talk about how to make sure that
 47 | there's no remaining input later.
 48 | 
 49 | Input
 50 | -----
 51 | 
 52 | We passed a string as the input to `run` in our first example, but the input
 53 | doesn't necessarily have to be a string.  It can be any sequence.  For example,
 54 | this works:
 55 | 
 56 |     (run (token #{1 2}) [1 "cats" :dogs])
 57 |     ; 1
 58 | 
 59 | The `(token #{1 2})` is a parser that matches the *integer* 1 or the *integer*
 60 | 2, and we've passed it a vector of things.
 61 | 
 62 | Errors
 63 | ------
 64 | 
 65 | If the parser you give to `run` can't parse the input successfully, a
 66 | RuntimeException will be thrown:
 67 | 
 68 |     (run (char \q) "Hello, world!")
 69 |     ; RuntimeException Unexpected token 'H' at line: 1 column: 1 ...
 70 | 
 71 | The exception will tell you the line and column of the error, which is usually
 72 | quite helpful.
 73 | 
 74 | Parsers
 75 | -------
 76 | 
 77 | Now that we've got the basics, it's time to talk about how to create new
 78 | parsers.
 79 | 
 80 | A "parser" is, technically, a function that takes 5 arguments and returns
 81 | a special value, but you don't need to worry about that yet.  What you *do* need
 82 | to worry about is how to create them and combine them.
 83 | 
 84 | When we ran `(char \H)` in the first example, it returned a parser.  `char`
 85 | itself is a *function* that, when given a character, creates a parser that
 86 | parses that character.
 87 | 
 88 | Read that again and make sure you understand it before moving on.  `char` is
 89 | not a parser.  It's a function that creates parsers.  Character goes in, parser
 90 | comes out:
 91 | 
 92 |     (def h-parser (char \h))
 93 |     (run h-parser "hi")
 94 |     ; \h
 95 | 
 96 | Basic Built-In Parsers
 97 | ----------------------
 98 | 
 99 | There are a few other basic parser-creating functions that you'll probably find
100 | useful, which we'll talk about now.
101 | 
102 | ### token
103 | 
104 | `token` creates parsers that match single items from the input stream (which
105 | are characters if the input stream happens to be a string).  You give it a
106 | predicate, and it returns a parser that parses and returns items that match the
107 | predicate.  For example:
108 | 
109 |     (defn less-than-five [i]
110 |       (< i 5))
111 | 
112 |     (run (token less-than-five)
113 |          [3])
114 |     ; 3
115 | 
116 | The predicate can be any function, so things like anonymous functions and sets
117 | work well.
118 | 
119 | ### char
120 | 
121 | We've already seen `char`, which creates parsers that parse and return a
122 | single, specific character.
123 | 
124 |     (run (char \H) "Hello, world!")
125 |     ; \H
126 | 
127 | ### any-char
128 | 
129 | `any-char` creates parsers that will parse and return any character.  Remember
130 | that we can use the parsatron to parse more than just strings:
131 | 
132 |     (run (any-char) "Cats")
133 |     ; \C
134 | 
135 |     (run (any-char) [\C \a \t \s])
136 |     ; \C
137 | 
138 |     (run (any-char) [1 2 3])
139 |     ; RuntimeException...
140 | 
141 | ### letter and digit
142 | 
143 | `letter` and `digits` create parsers that parse and return letter characters
144 | (a-z and A-Z) and digit characters (0-9) respectively.
145 | 
146 |     (run (letter) "Dogs")
147 |     ; \D
148 | 
149 |     (run (digit) "100")
150 |     ; \1
151 | 
152 | Note that digit works with *character* objects.  It won't work with actual
153 | integers:
154 | 
155 |     (run (digit) [10 20 30])
156 |     ; RuntimeException...
157 | 
158 | If you want a parser that matches numbers in a non-string input sequence, use
159 | `token` and the Clojure builtin function `number?` to make it:
160 | 
161 |     (run (token number?) [10 20 30])
162 |     ; 10
163 | 
164 | ### string
165 | 
166 | `string` creates parsers that parse and return a sequence of characters given
167 | as a string:
168 | 
169 |     (run (string "Hello") "Hello, world!")
170 |     ; "Hello"
171 | 
172 | Note that this is the first time we've seen a parser that consumes more than
173 | one item in the input sequence.
174 | 
175 | ### eof
176 | 
177 | `eof` creates parsers that ensure the input stream doesn't contain anything else:
178 | 
179 |     (run (eof) "")
180 |     ; nil
181 | 
182 |     (run (eof) "a")
183 |     ; RuntimeException...
184 | 
185 | On its own it's not very useful, but we'll need it once we learn how to combine
186 | parsers.
187 | 
188 | Combining Parsers
189 | -----------------
190 | 
191 | The Parsatron wouldn't be very useful if we could only ever parse one thing at
192 | a time.  There are a number of ways you can combine parsers to build up complex
193 | ones from basic parts.
194 | 
195 | ### >>
196 | 
197 | The `>>` macro is the simplest way to combine parsers.  It takes any number of
198 | parsers and creates a new parser.  This new parser runs them in order and
199 | returns the value of the last one.
200 | 
201 | Again, `>>` takes *parsers* and returns a new *parser*.  We'll see this many
202 | times in this section.
203 | 
204 | Here's an example:
205 | 
206 |     (def my-parser (>> (char \a)
207 |                        (digit)))
208 | 
209 |     (run my-parser "a5")
210 |     ; \5
211 | 
212 |     (run my-parser "5a")
213 |     ; RuntimeException...
214 | 
215 |     (run my-parser "b5")
216 |     ; RuntimeException...
217 | 
218 |     (run my-parser "aq")
219 |     ; RuntimeException...
220 | 
221 | We create a parser from two other parsers with `>>` and run it on some input.
222 | `>>` runs its constituent parsers in order, and they all have to match for it
223 | to parse successfully.
224 | 
225 | Now that we can combine parsers, we can also ensure that there's no garbage
226 | after the stuff we parse by using `eof`:
227 | 
228 |     (run (>> (digit) (eof)) "1")
229 |     ; nil
230 | 
231 |     (run (>> (digit) (eof)) "1 cat")
232 |     ; RuntimeException...
233 | 
234 | ### times
235 | 
236 | The next way to combine parsers (or, really, a parser with itself) is the
237 | `times` function.
238 | 
239 | `times` is a function that takes a count and a parser, and returns a parser that
240 | repeats the one you gave it the specified number of times and returns the
241 | results concatenated into a sequence.
242 | 
243 | For example:
244 | 
245 |     (run (times 5 (letter)) "Hello, world!")
246 |     ; (\H \e \l \l \o)
247 | 
248 | This is different than `(>> (letter) (letter) (letter) (letter) (letter))`
249 | because it returns *all* of the parsers' results, not just the last one.
250 | 
251 | ### many
252 | 
253 | `many` is the first creator of "open-ended" parsers we've seen.  It's a function
254 | that takes a parser and returns a new parser that will parse zero or more of the
255 | one you gave it, and return the results concatenated into a sequence.
256 | 
257 | For example:
258 | 
259 |     (run (many (digit)) "100 cats")
260 |     ; (\1 \0 \0)
261 | 
262 | Now we can start to build much more powerful parsers:
263 | 
264 |     (def number-parser (many (digit)))
265 |     (def whitespace-parser (many (token #{\space \newline \tab})))
266 | 
267 |     (run (>> number-parser whitespace-parser number-parser) "100    400")
268 |     ; (\4 \0 \0)
269 | 
270 | We still need to talk about how to get more than just the last return value, but
271 | that will come later.
272 | 
273 | ### many1
274 | 
275 | `many1` is just like `many`, except that the parsers it creates require at least
276 | one item.  It's like `+` in a regular expression instead of `*`.
277 | 
278 |     (def number-parser (many (digit)))
279 |     (def number-parser1 (many1 (digit)))
280 | 
281 |     (run number-parser "")
282 |     ; []
283 | 
284 |     (run number-parser "100")
285 |     ; (\1 \0 \0)
286 | 
287 |     (run number-parser1 "")
288 |     ; RuntimeException...
289 | 
290 |     (run number-parser1 "100")
291 |     ; (\1 \0 \0)
292 | 
293 | ### either
294 | 
295 | `either` is a parser that takes two parsers. If the first one succeeds its 
296 | value is returned, if it fails, the second parser is tried and it's value is 
297 | returned.
298 | 
299 |     (def number (many1 (digit)))
300 |     (def word (many1 (letter)))
301 | 
302 |     (def number-or-word (either number word))
303 | 
304 |     (run number-or-word "dog")
305 |     ; (\d \o \g)
306 | 
307 |     (run number-or-word "42")
308 |     ; (\4 \2)
309 | 
310 |     (run number-or-word "@#$")
311 |     ; RuntimeException ...
312 | 
313 | ### choice
314 | 
315 | `choice` takes one or more parsers and creates a parser that will try each of
316 | them in order until one parses successfully, and return its result. It is 
317 | different from `either` in that it may take more than two parsers while 
318 | `either` can only take 2.
319 | 
320 | For example:
321 | 
322 |     (def number (many1 (digit)))
323 |     (def word (many1 (letter)))
324 |     (def other (many1 (any-char)))
325 | 
326 |     (def number-or-word-or-anything (choice number word other))
327 | 
328 |     (run number-or-word-or-anything "dog")
329 |     ; (\d \o \g)
330 | 
331 |     (run number-or-word-or-anything "42")
332 |     ; (\4 \2)
333 | 
334 |     (run number-or-word-or-anything "!@#$")
335 |     ; (\! \@ \# \$)
336 | 
337 | Notice that we used `many1` when defining the parsers `number` and `word`.  If
338 | we had used `many` then this would always parse as a number because if there
339 | were no digits it would successfully return an empty sequence.
340 | 
341 | 
342 | ### between
343 | 
344 | `between` is a function that takes three parsers, call them left, right, and
345 | center.  It creates a parser that parses them in left - center - right order and
346 | returns the result of center.
347 | 
348 | This is a convenient way to handle things like parentheses:
349 | 
350 |     (def whitespace-char (token #{\space \newline \tab}))
351 |     (def optional-whitespace (many whitespace-char))
352 | 
353 |     (def open-paren (char \())
354 |     (def close-paren (char \)))
355 | 
356 |     (def number (many1 (digit)))
357 | 
358 |     (run (between (>> open-paren optional-whitespace)
359 |                   (>> optional-whitespace close-paren)
360 |                   number)
361 |         "(123    )")
362 |     ; (\1 \2 \3)
363 | 
364 | This example is a bit more complicated than we've seen so far, so slow down and
365 | make sure you know what's going on.
366 | 
367 | The three parsers we're giving to `between` are:
368 | 
369 | 1. `(>> open-paren optional-whitespace)`
370 | 2. `(>> optional-whitespace close-paren)`
371 | 3. `number`
372 | 
373 | Once you're comfortable with this example, it's time to move on to the next
374 | stage of parsing: building and returning values.
375 | 
376 | Returning Values
377 | ----------------
378 | 
379 | So far we've looked at many ways to parse input.  If you just need to validate
380 | that input is in the correct format, but not *do* anything with it, you're all
381 | set.  But usually the goal of parsing something is to do things with it, so
382 | let's look at how that works now.
383 | 
384 | We've been using the word "returns" in a fast-and-loose fashion so far, but now
385 | it's time to look a bit more closely at what it means in the Parsatron.
386 | 
387 | ### defparser and always
388 | 
389 | When we looked at parsers created with `char` (like `(char \H)`) we said that
390 | these parsers *returned* that character they parsed.  That's not quite true.
391 | They actually return a specially-wrapped value.
392 | 
393 | If you want to know exactly what that special wrapping is, watch the [talk][].
394 | But you don't really need to understand the guts to use the Parsatron.  You just
395 | need to know how to create them.
396 | 
397 | This is the first time we're going to be creating parsers that are more than
398 | just simple combinations of existing ones.  To do that we need to use a special
399 | macro that handles setting them up properly: `defparser`.  Look at the following
400 | example (don't worry about what `always` is yet):
401 | 
402 |     (defparser sample []
403 |       (string "Hello")
404 |       (always 42))
405 | 
406 | First of all, `defparser` doesn't define parsers.  It defines functions that
407 | *create* parsers, just like all of the ones we've seen so far.  Yes, I know how
408 | ridiculous that sounds.  In practice it's only *slightly* confusing.
409 | 
410 | So now we've got a function `sample` that we can use to create a parser by
411 | calling it:
412 | 
413 |     (def my-sample-parser (sample))
414 | 
415 | Okay, now lets run it on some input:
416 | 
417 |     (run my-sample-parser "Hello, world!")
418 |     ; 42
419 | 
420 | There's a bunch of interesting things going on here, so let's slow down and take
421 | a look.
422 | 
423 | First, the parsers created by the functions `defparser` defines implicitely wrap
424 | their bodies in `>>`, which as we've seen runs its argument parsers in order and
425 | returns the last result.  So our `(sample)` parser will run the "Hello" string
426 | parser, and then the always parser (which it uses as the result).
427 | 
428 | So what is this `always` thing?  Well, remember at the beginning of this section
429 | we said that parsers return a specially-wrapped value?  `always` is a way to
430 | simply stick a piece of data in this special wrapper so it can be the result of
431 | a parser.
432 | 
433 | Here's a little drawing that might help:
434 | 
435 |     raw input --> (run ...) --> raw output
436 |                   |      ^
437 |                   |      |
438 |                   |  wrapped output
439 |                   v      |
440 |                (some parser)
441 | 
442 | `run` takes the wrapped output from the parser and unwraps it for us before
443 | returning it, which is why our `run` calls always gave us vanilla Clojure data
444 | structures before.
445 | 
446 | We're almost to the point where we can create full-featured parsers.  The final
447 | piece of the puzzle is a way to intercept results and make decisions inside of
448 | our parsers.
449 | 
450 | ### let->>
451 | 
452 | The `let->>` macro is the magic glue that's going to make creating your parsers
453 | fun.  In a nutshell, it lets you bind (unwrapped) parser results to names, which
454 | you can then use normally.  Let's just take a look at how it works:
455 | 
456 |     (defparser word []
457 |       (many1 (letter)))
458 | 
459 |     (defparser greeting []
460 |       (let->> [prefix (string "Hello, ")
461 |                name (word)
462 |                punctuation (choice (char \.)
463 |                                    (char \!))]
464 |         (if (= punctuation \!)
465 |           (always [(apply str name) :excited])
466 |           (always [(apply str name) :not-excited]))))
467 | 
468 |     (run (greeting) "Hello, Cat!")
469 |     ; ["Cat" :excited]
470 | 
471 |     (run (greeting) "Hello, Dog.")
472 |     ; ["Dog" :not-excited]
473 | 
474 | There's a lot happening here so let's look at it piece-by-piece.
475 | 
476 | First we use `defparser` to make a `word` function for creating word parsers.
477 | We could have done this with `(def word (many1 (letter)))` and then used it as
478 | `word` later, but I find it's easier to just use `defparser` for everything.
479 | That way we always get parsers the same way: by calling a function.
480 | 
481 | Next we have our `greeting` parser (technically a function that makes a parser,
482 | but you get the idea by now).  Inside we have a `let->>` that runs three parsers
483 | and binds their (unwrapped) results to names:
484 | 
485 | 1. `(string "Hello, ")` parses a literal string.  `prefix` gets bound to the
486 |    string `"Hello, "`.
487 | 2. `(word)` parses one or more letters.  `name` gets bound to the result, which
488 |    is a sequence of chars like `(\C \a \t)`.
489 | 3. `(choice (char \.) (char \!))` parses a period or exclamation point.
490 |    `punctuation` gets bound to the character that was parsed, like `\.` or `\!`.
491 | 
492 | That's it for the binding section.  Next we have the body of the `let->>`.  This
493 | needs to return a *wrapped* value, but we can do anything we like with our bound
494 | variables to determine what to return.  In this case we return different things
495 | depending on whether the greeting ended with an exclamation point or not.
496 | 
497 | Notice how the return values are wrapped in `(always ...)`.  Also notice how all
498 | the bound values have been unwrapped for us by `let->>`.  `name` really is just
499 | a sequence of characters which can be used with `(apply str ...)` as usual.
500 | 
501 | You might wonder whether you can move the `(apply str ...)` into the `let->>`
502 | binding form, so we don't have to do it twice.  Unfortunately you can't.
503 | **Every right hand side in a `let->>` binding form has to evaluate to a parser**.
504 | 
505 | If you tried to do something like `(let->> [name (apply str (word))] ...)` it
506 | wouldn't work for two reasons.  First, `let->>` evaluates the right hand side
507 | and expects the result to be a parser, which it then runs.  So it would call
508 | `(apply str some-word-parser)` and get a string back, which isn't a parser.
509 | 
510 | Second, `let->>` unwraps the return value of `(word)` right before it binds it,
511 | so even if the first problem weren't true, `(apply str ...)` would get a wrapped
512 | value as its argument, which is not going to work.
513 | 
514 | Of course, you can do anything you want in the *body* of a `let->>`, so this is
515 | fine:
516 | 
517 |     (let->> [name (word)]
518 |       (let [name (apply str name)]
519 |         (always name)))
520 | 
521 | `let` in this example is a vanilla Clojure `let`.
522 | 
523 | Binding forms in a `let->>` are executed in order, and importantly, later forms
524 | can refer to earlier ones.  Look at this example:
525 | 
526 |     (defparser sample []
527 |       (let->> [sign (choice (char \+)
528 |                             (char \-))
529 |                word (if (= sign \+)
530 |                       (string "plus")
531 |                       (string "minus"))]
532 |         (always [sign word])))
533 | 
534 |     (run (sample) "+plus")
535 |     ; [\+ "plus"]
536 | 
537 |     (run (sample) "-minus")
538 |     ; [\- "minus"]
539 | 
540 |     (run (sample) "+minus")
541 |     ; RuntimeException...
542 | 
543 | In this example, `sign` gets bound to the unwrapped result of the `choice`
544 | parser, which is a character.  Then we use that character to determine which
545 | parser to use in the next binding.  If the sign was a `\+`, we parse the string
546 | `"plus"`.  Likewise for minus.
547 | 
548 | Notice how mixing the two in the last example produced an error.  We saw the
549 | `\+` and decided that we'd used the `(string "plus")` parser for the next input,
550 | but it turned out to be `"minus"`.
551 | 
552 | Tips and Tricks
553 | ---------------
554 | 
555 | That's about it for the basics!  You now know enough to parse a wide variety of
556 | things by building up complex parsers from very simple ones.
557 | 
558 | Before you go, here's a few tips and tricks that you might find helpful.
559 | 
560 | ### You can parse more than just strings
561 | 
562 | Remember that the Parsatron operates on sequences of input.  These don't
563 | necessarily have to be strings.
564 | 
565 | Maybe you've got a big JSON response that you want to split apart.  Don't try to
566 | write a JSON parser from scratch, just use an existing one like [Cheshire][] and
567 | then use the Parsatron to parse the Clojure datastructure(s) it sends back!
568 | 
569 | [Cheshire]: https://github.com/dakrone/cheshire
570 | 
571 | ### You can throw away `let->>` bindings
572 | 
573 | Sometimes you're writing a `let->>` form and encounter a value that you don't
574 | really need to bind to a name.  Instead of stopping the `let->>` and nesting
575 | a `>>` inside it, just bind the value to a disposable name, like `_`:
576 | 
577 |     (defparser float []
578 |       (let->> [integral (many1 (digit))
579 |                _ (char \.)
580 |                fractional (many1 (digit))]
581 |         (let [integral (apply str integral)
582 |               fractional (apply str fractional)]
583 |           (always (Double/parseDouble (str integral "." fractional))))))
584 | 
585 |     (run (float) "1.4")
586 |     ; 1.4
587 | 
588 |     (run (float) "1.04")
589 |     ; 1.04
590 | 
591 |     (run (float) "1.0400000")
592 |     ; 1.04
593 | 


--------------------------------------------------------------------------------
/project.clj:
--------------------------------------------------------------------------------
 1 | (defproject the/parsatron "0.0.9-SNAPSHOT"
 2 |   :description "Clojure parser combinators"
 3 | 
 4 |   :dependencies [[org.clojure/clojure "1.10.1"]
 5 |                  [org.clojure/clojurescript "1.10.520"]]
 6 | 
 7 |   :plugins [[lein-cljsbuild "1.1.7"]]
 8 | 
 9 |   :source-paths ["src/clj" "src/cljs"]
10 |   :test-paths ["test/clj"]
11 | 
12 |   :global-vars {*warn-on-reflection* false}
13 | 
14 |   :cljsbuild {:builds [{:source-paths ["src/cljs" "test/cljs"]
15 |                         :compiler {:optimizations :simple
16 |                                    :target :nodejs
17 |                                    :output-to "test/resources/parsatron_test.js"}}]
18 |               :test-commands { "unit" ["node" "test/resources/parsatron_test.js"]}})
19 | 


--------------------------------------------------------------------------------
/src/clj/parsatron/languages/bencode.clj:
--------------------------------------------------------------------------------
 1 | (ns parsatron.languages.bencode
 2 |   (:refer-clojure :exclude [char])
 3 |   (:use [the.parsatron]))
 4 | 
 5 | (declare ben-value)
 6 | 
 7 | (defparser positive-int []
 8 |   (let->> [digits (many1 (digit))]
 9 |     (always (read-string (apply str digits)))))
10 | 
11 | (defparser negative-int []
12 |   (let->> [digits (>> (char \-) (many1 (digit)))]
13 |     (always (read-string (apply str digits)))))
14 | 
15 | (defparser ben-integer []
16 |   (between (char \i) (char \e)
17 |            (either
18 |             (positive-int)
19 |             (negative-int))))
20 | 
21 | (defparser ben-bytestring []
22 |   (let->> [length (positive-int)
23 |            _ (char \:)
24 |            chars (times length (any-char))]
25 |     (always (apply str chars))))
26 | 
27 | (defparser ben-list []
28 |   (between (char \l) (char \e)
29 |            (many (ben-value))))
30 | 
31 | (defparser ben-dictionary []
32 |   (let [entry (let->> [key (ben-bytestring)
33 |                        val (ben-value)]
34 |                 (always [key val]))]
35 |     (between (char \d) (char \e)
36 |              (let->> [entries (many entry)]
37 |                (always (into (sorted-map) entries))))))
38 | 
39 | (defparser ben-value []
40 |   (choice (ben-integer)
41 |           (ben-bytestring)
42 |           (ben-list)
43 |           (ben-dictionary)))
44 | 


--------------------------------------------------------------------------------
/src/clj/parsatron/languages/bf.clj:
--------------------------------------------------------------------------------
 1 | (ns parsatron.languages.bf
 2 |   (:refer-clojure :exclude [char])
 3 |   (:use [the.parsatron]))
 4 | 
 5 | (defparser instruction []
 6 |   (choice (char \>)
 7 |           (char \<)
 8 |           (char \+)
 9 |           (char \-)
10 |           (char \.)
11 |           (char \,)
12 |           (between (char \[) (char \]) (many (instruction)))))
13 | 
14 | (defparser bf []
15 |   (many (instruction))
16 |   (eof))
17 | 


--------------------------------------------------------------------------------
/src/clj/the/parsatron.clj:
--------------------------------------------------------------------------------
  1 | (ns the.parsatron
  2 |   (:refer-clojure :exclude [char])
  3 |   (:require [clojure.string :as str]))
  4 | 
  5 | (defrecord InputState [input pos])
  6 | (defrecord SourcePos [line column])
  7 | 
  8 | (defrecord Continue [fn])
  9 | (defrecord Ok [item])
 10 | (defrecord Err [errmsg])
 11 | 
 12 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 13 | ;; position
 14 | (defn inc-sourcepos
 15 |   "Increment the source position by a single character, c. On newline,
 16 |    increments the SourcePos's line number and resets the column, on
 17 |    all other characters, increments the column"
 18 |   [{:keys [line column]} c]
 19 |   (if (= c \newline)
 20 |     (SourcePos. (inc line) 1)
 21 |     (SourcePos. line (inc column))))
 22 | 
 23 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 24 | ;; errors
 25 | (defprotocol ShowableError
 26 |   (show-error [this]))
 27 | 
 28 | (defrecord ParseError [pos msgs]
 29 |   ShowableError
 30 |   (show-error [_] (str (str/join ", " msgs)
 31 |                        " at"
 32 |                        " line: " (:line pos)
 33 |                        " column: " (:column pos))))
 34 | 
 35 | (defn unknown-error [{:keys [pos] :as state}]
 36 |   (ParseError. pos ["Error"]))
 37 | 
 38 | (defn unexpect-error [msg pos]
 39 |   (ParseError. pos [(str "Unexpected " msg)]))
 40 | 
 41 | (defn expect-error [msg pos]
 42 |   (ParseError. pos [(str "Expected " msg)]))
 43 | 
 44 | (defn merge-errors [{:keys [pos] :as err} other-err]
 45 |   (ParseError. pos (flatten (concat (:msgs err) (:msgs other-err)))))
 46 | 
 47 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 48 | ;; trampoline
 49 | (defn parsatron-poline
 50 |   "A trampoline for executing potentially stack-blowing recursive
 51 |    functions without running out of stack space. This particular
 52 |    trampoline differs from clojure.core/trampoline by requiring
 53 |    continuations to be wrapped in a Continue record. Will loop until
 54 |    the value is no longer a Continue record, returning that."
 55 |   [f & args]
 56 |   (loop [value (apply f args)]
 57 |     (condp instance? value
 58 |       Continue (recur ((:fn value)))
 59 |       value)))
 60 | 
 61 | (defn sequentially [f value]
 62 |   (condp instance? value
 63 |     Continue (Continue. #(sequentially f ((:fn value))))
 64 |     (f value)))
 65 | 
 66 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 67 | ;; host environment
 68 | (defn fail [message]
 69 |   (RuntimeException. message))
 70 | 
 71 | (defn digit?
 72 |   "Tests if a character is a digit: [0-9]"
 73 |   [c]
 74 |   (Character/isDigit ^Character c))
 75 | 
 76 | (defn letter?
 77 |   "Tests if a character is a letter: [a-zA-Z]"
 78 |   [c]
 79 |   (Character/isLetter ^Character c))
 80 | 
 81 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 82 | ;; m
 83 | (defn always
 84 |   "A parser that always succeeds with the value given and consumes no
 85 |    input"
 86 |   [x]
 87 |   (fn [state cok cerr eok eerr]
 88 |     (eok x state)))
 89 | 
 90 | (defn bind
 91 |   "Parse p, and then q. The function f must be of one argument, it
 92 |    will be given the value of p and must return the q to follow p"
 93 |   [p f]
 94 |   (fn [state cok cerr eok eerr]
 95 |     (letfn [(pcok [item state]
 96 |               (sequentially
 97 |                (fn [q] (Continue. #(q state cok cerr cok cerr)))
 98 |                (f item)))
 99 |             (peok [item state]
100 |               (sequentially
101 |                (fn [q] (Continue. #(q state cok cerr eok eerr)))
102 |                (f item)))]
103 |       (Continue. #(p state pcok cerr peok eerr)))))
104 | 
105 | (defn nxt
106 |   "Parse p and then q, returning q's value and discarding p's"
107 |   [p q]
108 |   (bind p (fn [_] q)))
109 | 
110 | (defmacro defparser
111 |   "Defines a new parser. Parsers are simply functions that accept the
112 |    5 arguments state, cok, cerr, eok, eerr but this macro takes care
113 |    of writing that ceremony for you and wraps the body in a >>"
114 |   [name args & body]
115 |   `(defn ~name ~args
116 |      (fn [state# cok# cerr# eok# eerr#]
117 |        (let [p# (>> ~@body)]
118 |          (Continue. #(p# state# cok# cerr# eok# eerr#))))))
119 | 
120 | (defmacro >>
121 |   "Expands into nested nxt forms"
122 |   ([m] m)
123 |   ([m n] `(nxt ~m ~n))
124 |   ([m n & ms] `(nxt ~m (>> ~n ~@ms))))
125 | 
126 | (defmacro let->>
127 |   "Expands into nested bind forms"
128 |   [[& bindings] & body]
129 |   (let [[bind-form p] (take 2 bindings)]
130 |     (if (= 2 (count bindings))
131 |       `(bind ~p (fn [~bind-form] ~@body))
132 |       `(bind ~p (fn [~bind-form] (let->> ~(drop 2 bindings) ~@body))))))
133 | 
134 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
135 | ;; m+
136 | (defn never
137 |   "A parser that always fails, consuming no input"
138 |   []
139 |   (fn [state cok cerr eok eerr]
140 |     (eerr (unknown-error state))))
141 | 
142 | (defn either
143 |   "A parser that tries p, upon success, returning its value, and upon
144 |    failure (if no input was consumed) tries to parse q"
145 |   [p q]
146 |   (fn [state cok cerr eok eerr]
147 |     (letfn [(peerr [err-from-p]
148 |               (letfn [(qeerr [err-from-q]
149 |                         (eerr (merge-errors err-from-p err-from-q)))]
150 |                 (Continue. #(q state cok cerr eok qeerr))))]
151 |       (Continue. #(p state cok cerr eok peerr)))))
152 | 
153 | (defn attempt
154 |   "A parser that will attempt to parse p, and upon failure never
155 |    consume any input"
156 |   [p]
157 |   (fn [state cok cerr eok eerr]
158 |     (Continue. #(p state cok eerr eok eerr))))
159 | 
160 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
161 | ;; interacting with the parser's state
162 | 
163 | (defn extract
164 |   "Extract information from the Parser's current state. f should be a
165 |    fn of one argument, the parser's current state, and any value that
166 |    it deems worthy of returning will be returned by the entire parser.
167 |    No input is consumed by this parser, and the state itself is not
168 |    altered."
169 |   [f]
170 |   (fn [state _ _ eok _]
171 |     (eok (f state) state)))
172 | 
173 | (defn examine
174 |   "Return the Parser's current state"
175 |   []
176 |   (extract identity))
177 | 
178 | (defn lineno
179 |   "A parser that returns the current line number. It consumes no input"
180 |   []
181 |   (extract (comp :line :pos)))
182 | 
183 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
184 | ;; token
185 | (defn token
186 |   "Consume a single item from the head of the input if (consume? item)
187 |    is not nil. This parser will fail to consume if either the consume?
188 |    test returns nil or if the input is empty"
189 |   [consume?]
190 |   (fn [{:keys [input pos] :as state} cok cerr eok eerr]
191 |     (if-not (empty? input)
192 |       (let [tok (first input)]
193 |         (if (consume? tok)
194 |           (cok tok (InputState. (rest input) (inc-sourcepos pos tok)))
195 |           (eerr (unexpect-error (str "token '" tok "'") pos))))
196 |       (eerr (unexpect-error "end of input" pos)))))
197 | 
198 | (defn many
199 |   "Consume zero or more p. A RuntimeException will be thrown if this
200 |    combinator is applied to a parser that accepts the empty string, as
201 |    that would cause the parser to loop forever"
202 |   [p]
203 |   (letfn [(many-err [_ _]
204 |             (fail "Combinator '*' is applied to a parser that accepts an empty string"))
205 |           (safe-p [state cok cerr eok eerr]
206 |             (Continue. #(p state cok cerr many-err eerr)))]
207 |     (either
208 |      (let->> [x safe-p
209 |               xs (many safe-p)]
210 |        (always (cons x xs)))
211 |      (always []))))
212 | 
213 | (defn times
214 |   "Consume exactly n number of p"
215 |   [n p]
216 |   (if (= n 0)
217 |     (always [])
218 |     (let->> [x p
219 |              xs (times (dec n) p)]
220 |       (always (cons x xs)))))
221 | 
222 | (defn lookahead
223 |   "A parser that upon success consumes no input, but returns what was
224 |    parsed"
225 |   [p]
226 |   (fn [state cok cerr eok eerr]
227 |     (letfn [(ok [item _]
228 |               (eok item state))]
229 |       (Continue. #(p state ok cerr eok eerr)))))
230 | 
231 | (defn choice
232 |   "A varargs version of either that tries each given parser in turn,
233 |    returning the value of the first one that succeeds"
234 |   [& parsers]
235 |   (if (empty? parsers)
236 |     (never)
237 |     (let [p (first parsers)]
238 |       (either p (apply choice (rest parsers))))))
239 | 
240 | (defn eof
241 |   "A parser to detect the end of input. If there is nothing more to
242 |    consume from the underlying input, this parser suceeds with a nil
243 |    value, otherwise it fails"
244 |   []
245 |   (fn [{:keys [input pos] :as state} cok cerr eok eerr]
246 |     (if (empty? input)
247 |       (eok nil state)
248 |       (eerr (expect-error "end of input" pos)))))
249 | 
250 | (defn char
251 |   "Consume the given character"
252 |   [c]
253 |   (token #(= c %)))
254 | 
255 | (defn any-char
256 |   "Consume any character"
257 |   []
258 |   (token char?))
259 | 
260 | (defn digit
261 |   "Consume a digit [0-9] character"
262 |   []
263 |   (token digit?))
264 | 
265 | (defn letter
266 |   "Consume a letter [a-zA-Z] character"
267 |   []
268 |   (token letter?))
269 | 
270 | (defn string
271 |   "Consume the given string"
272 |   [s]
273 |   (reduce nxt (concat (map char s)
274 |                       (list (always s)))))
275 | 
276 | (defn between
277 |   "Parse p after parsing open and before parsing close, returning the
278 |    value of p and discarding the values of open and close"
279 |   [open close p]
280 |   (let->> [_ open
281 |            x p
282 |            _ close]
283 |     (always x)))
284 | 
285 | (defn many1
286 |   "Consume 1 or more p"
287 |   [p]
288 |   (let->> [x p
289 |            xs (many p)]
290 |     (always (cons x xs))))
291 | 
292 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
293 | ;; run parsers
294 | (defn run-parser
295 |   "Execute a parser p, given some state, Returns Ok or Err"
296 |   [p state]
297 |   (parsatron-poline p state
298 |                     (fn cok [item _]
299 |                       (Ok. item))
300 |                     (fn cerr [err]
301 |                       (Err. (show-error err)))
302 |                     (fn eok [item _]
303 |                       (Ok. item))
304 |                     (fn eerr [err]
305 |                       (Err. (show-error err)))))
306 | 
307 | (defn run
308 |   "Run a parser p over some input. The input can be a string or a seq
309 |    of tokens, if the parser produces an error, its message is wrapped
310 |    in a RuntimeException and thrown, and if the parser succeeds, its
311 |    value is returned"
312 |   [p input]
313 |   (let [result (run-parser p (InputState. input (SourcePos. 1 1)))]
314 |     (condp instance? result
315 |       Ok (:item result)
316 |       Err (throw (fail ^String (:errmsg result))))))
317 | 


--------------------------------------------------------------------------------
/src/cljs/the/parsatron.cljs:
--------------------------------------------------------------------------------
  1 | (ns the.parsatron
  2 |   (:refer-clojure :exclude [char char?])
  3 |   (:require [clojure.string :as str])
  4 |   (:require-macros [the.parsatron :refer [defparser >> let->>]]))
  5 | 
  6 | (defrecord InputState [input pos])
  7 | (defrecord SourcePos [line column])
  8 | 
  9 | (defrecord Continue [fn])
 10 | (defrecord Ok [item])
 11 | (defrecord Err [errmsg])
 12 | 
 13 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 14 | ;; position
 15 | (defn inc-sourcepos
 16 |   "Increment the source position by a single character, c. On newline,
 17 |    increments the SourcePos's line number and resets the column, on
 18 |    all other characters, increments the column"
 19 |   [{:keys [line column]} c]
 20 |   (if (= c \newline)
 21 |     (SourcePos. (inc line) 1)
 22 |     (SourcePos. line (inc column))))
 23 | 
 24 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 25 | ;; errors
 26 | (defprotocol ShowableError
 27 |   (show-error [this]))
 28 | 
 29 | (defrecord ParseError [pos msgs]
 30 |   ShowableError
 31 |   (show-error [_] (str (str/join ", " msgs)
 32 |                        " at"
 33 |                        " line: " (:line pos)
 34 |                        " column: " (:column pos))))
 35 | 
 36 | (defn unknown-error [{:keys [pos] :as state}]
 37 |   (ParseError. pos ["Error"]))
 38 | 
 39 | (defn unexpect-error [msg pos]
 40 |   (ParseError. pos [(str "Unexpected " msg)]))
 41 | 
 42 | (defn expect-error [msg pos]
 43 |   (ParseError. pos [(str "Expected " msg)]))
 44 | 
 45 | (defn merge-errors [{:keys [pos] :as err} other-err]
 46 |   (ParseError. pos (flatten (concat (:msgs err) (:msgs other-err)))))
 47 | 
 48 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 49 | ;; trampoline
 50 | (defn parsatron-poline
 51 |   "A trampoline for executing potentially stack-blowing recursive
 52 |    functions without running out of stack space. This particular
 53 |    trampoline differs from clojure.core/trampoline by requiring
 54 |    continuations to be wrapped in a Continue record. Will loop until
 55 |    the value is no longer a Continue record, returning that."
 56 |   [f & args]
 57 |   (loop [value (apply f args)]
 58 |     (condp instance? value
 59 |       Continue (recur ((:fn value)))
 60 |       value)))
 61 | 
 62 | (defn sequentially [f value]
 63 |   (condp instance? value
 64 |     Continue (Continue. #(sequentially f ((:fn value))))
 65 |     (f value)))
 66 | 
 67 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 68 | ;; host environment
 69 | (defn fail [message]
 70 |   (js/Error. message))
 71 | 
 72 | (defn char?
 73 |   "Test for a single-character string.
 74 | 
 75 |    ClojureScript doesn't support a character type, so we pretend it
 76 |    does"
 77 |   [x]
 78 |   (and (string? x) (= (count x) 1)))
 79 | 
 80 | (defn digit?
 81 |   "Tests if a character is a digit: [0-9]"
 82 |   [c]
 83 |   (re-matches #"\d" c))
 84 | 
 85 | (defn letter?
 86 |   "Tests if a character is a letter: [a-zA-Z]"
 87 |   [c]
 88 |   (re-matches #"[a-zA-Z]" c))
 89 | 
 90 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 91 | ;; m
 92 | (defn always
 93 |   "A parser that always succeeds with the value given and consumes no
 94 |    input"
 95 |   [x]
 96 |   (fn [state cok cerr eok eerr]
 97 |     (eok x state)))
 98 | 
 99 | (defn bind
100 |   "Parse p, and then q. The function f must be of one argument, it
101 |    will be given the value of p and must return the q to follow p"
102 |   [p f]
103 |   (fn [state cok cerr eok eerr]
104 |     (letfn [(pcok [item state]
105 |               (sequentially
106 |                (fn [q] (Continue. #(q state cok cerr cok cerr)))
107 |                (f item)))
108 |             (peok [item state]
109 |               (sequentially
110 |                (fn [q] (Continue. #(q state cok cerr eok eerr)))
111 |                (f item)))]
112 |       (Continue. #(p state pcok cerr peok eerr)))))
113 | 
114 | (defn nxt
115 |   "Parse p and then q, returning q's value and discarding p's"
116 |   [p q]
117 |   (bind p (fn [_] q)))
118 | 
119 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
120 | ;; m+
121 | (defn never
122 |   "A parser that always fails, consuming no input"
123 |   []
124 |   (fn [state cok cerr eok eerr]
125 |     (eerr (unknown-error state))))
126 | 
127 | (defn either
128 |   "A parser that tries p, upon success, returning its value, and upon
129 |    failure (if no input was consumed) tries to parse q"
130 |   [p q]
131 |   (fn [state cok cerr eok eerr]
132 |     (letfn [(peerr [err-from-p]
133 |               (letfn [(qeerr [err-from-q]
134 |                         (eerr (merge-errors err-from-p err-from-q)))]
135 |                 (Continue. #(q state cok cerr eok qeerr))))]
136 |       (Continue. #(p state cok cerr eok peerr)))))
137 | 
138 | (defn attempt
139 |   "A parser that will attempt to parse p, and upon failure never
140 |    consume any input"
141 |   [p]
142 |   (fn [state cok cerr eok eerr]
143 |     (Continue. #(p state cok eerr eok eerr))))
144 | 
145 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
146 | ;; interacting with the parser's state
147 | 
148 | (defn extract
149 |   "Extract information from the Parser's current state. f should be a
150 |    fn of one argument, the parser's current state, and any value that
151 |    it deems worthy of returning will be returned by the entire parser.
152 |    No input is consumed by this parser, and the state itself is not
153 |    altered."
154 |   [f]
155 |   (fn [state _ _ eok _]
156 |     (eok (f state) state)))
157 | 
158 | (defn examine
159 |   "Return the Parser's current state"
160 |   []
161 |   (extract identity))
162 | 
163 | (defn lineno
164 |   "A parser that returns the current line number. It consumes no input"
165 |   []
166 |   (extract (comp :line :pos)))
167 | 
168 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
169 | ;; token
170 | (defn token
171 |   "Consume a single item from the head of the input if (consume? item)
172 |    is not nil. This parser will fail to consume if either the consume?
173 |    test returns nil or if the input is empty"
174 |   [consume?]
175 |   (fn [{:keys [input pos] :as state} cok cerr eok eerr]
176 |     (if-not (empty? input)
177 |       (let [tok (first input)]
178 |         (if (consume? tok)
179 |           (cok tok (InputState. (rest input) (inc-sourcepos pos tok)))
180 |           (eerr (unexpect-error (str "token '" tok "'") pos))))
181 |       (eerr (unexpect-error "end of input" pos)))))
182 | 
183 | (defn many
184 |   "Consume zero or more p. A RuntimeException will be thrown if this
185 |    combinator is applied to a parser that accepts the empty string, as
186 |    that would cause the parser to loop forever"
187 |   [p]
188 |   (letfn [(many-err [_ _]
189 |             (fail "Combinator '*' is applied to a parser that accepts an empty string"))
190 |           (safe-p [state cok cerr eok eerr]
191 |             (Continue. #(p state cok cerr many-err eerr)))]
192 |     (either
193 |      (let->> [x safe-p
194 |               xs (many safe-p)]
195 |        (always (cons x xs)))
196 |      (always []))))
197 | 
198 | (defn times
199 |   "Consume exactly n number of p"
200 |   [n p]
201 |   (if (= n 0)
202 |     (always [])
203 |     (let->> [x p
204 |              xs (times (dec n) p)]
205 |       (always (cons x xs)))))
206 | 
207 | (defn lookahead
208 |   "A parser that upon success consumes no input, but returns what was
209 |    parsed"
210 |   [p]
211 |   (fn [state cok cerr eok eerr]
212 |     (letfn [(ok [item _]
213 |               (eok item state))]
214 |       (Continue. #(p state ok cerr eok eerr)))))
215 | 
216 | (defn choice
217 |   "A varargs version of either that tries each given parser in turn,
218 |    returning the value of the first one that succeeds"
219 |   [& parsers]
220 |   (if (empty? parsers)
221 |     (never)
222 |     (let [p (first parsers)]
223 |       (either p (apply choice (rest parsers))))))
224 | 
225 | (defn eof
226 |   "A parser to detect the end of input. If there is nothing more to
227 |    consume from the underlying input, this parser suceeds with a nil
228 |    value, otherwise it fails"
229 |   []
230 |   (fn [{:keys [input pos] :as state} cok cerr eok eerr]
231 |     (if (empty? input)
232 |       (eok nil state)
233 |       (eerr (expect-error "end of input" pos)))))
234 | 
235 | (defn char
236 |   "Consume the given character"
237 |   [c]
238 |   (token #(= c %)))
239 | 
240 | (defn any-char
241 |   "Consume any character"
242 |   []
243 |   (token #(char? %)))
244 | 
245 | (defn digit
246 |   "Consume a digit [0-9] character"
247 |   []
248 |   (token digit?))
249 | 
250 | (defn letter
251 |   "Consume a letter [a-zA-Z] character"
252 |   []
253 |   (token letter?))
254 | 
255 | (defn string
256 |   "Consume the given string"
257 |   [s]
258 |   (reduce nxt (concat (map char s)
259 |                       (list (always s)))))
260 | 
261 | (defn between
262 |   "Parse p after parsing open and before parsing close, returning the
263 |    value of p and discarding the values of open and close"
264 |   [open close p]
265 |   (let->> [_ open
266 |            x p
267 |            _ close]
268 |     (always x)))
269 | 
270 | (defn many1
271 |   "Consume 1 or more p"
272 |   [p]
273 |   (let->> [x p
274 |            xs (many p)]
275 |     (always (cons x xs))))
276 | 
277 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
278 | ;; run parsers
279 | (defn run-parser
280 |   "Execute a parser p, given some state, Returns Ok or Err"
281 |   [p state]
282 |   (parsatron-poline p state
283 |                     (fn cok [item _]
284 |                       (Ok. item))
285 |                     (fn cerr [err]
286 |                       (Err. (show-error err)))
287 |                     (fn eok [item _]
288 |                       (Ok. item))
289 |                     (fn eerr [err]
290 |                       (Err. (show-error err)))))
291 | 
292 | (defn run
293 |   "Run a parser p over some input. The input can be a string or a seq
294 |    of tokens, if the parser produces an error, its message is wrapped
295 |    in a RuntimeException and thrown, and if the parser succeeds, its
296 |    value is returned"
297 |   [p input]
298 |   (let [result (run-parser p (InputState. input (SourcePos. 1 1)))]
299 |     (condp instance? result
300 |       Ok (:item result)
301 |       Err (throw (fail ^String (:errmsg result))))))
302 | 


--------------------------------------------------------------------------------
/test/clj/parsatron/languages/test_bencode.clj:
--------------------------------------------------------------------------------
 1 | (ns parsatron.languages.test-bencode
 2 |   (:refer-clojure :exclude [char])
 3 |   (:use [the.parsatron]
 4 |         [parsatron.languages.bencode]
 5 |         [clojure.test]))
 6 | 
 7 | (deftest test-ben-integer
 8 |   (are [expected input] (= expected (run (ben-integer) input))
 9 |        42 "i42e"))
10 | 
11 | (deftest test-ben-bytestring
12 |   (are [expected input] (= expected (run (ben-bytestring) input))
13 |        "spam" "4:spam"))
14 | 
15 | (deftest test-ben-list
16 |   (are [expected input] (= expected (run (ben-list) input))
17 |        [42 "spam"] "li42e4:spame"))
18 | 
19 | (deftest test-ben-dictionary
20 |   (are [expected input] (= expected (run (ben-dictionary) input))
21 |        {"42" "spam", "spam" 42} "d2:424:spam4:spami42ee"
22 |        {"spam" ["a" "b"]} "d4:spaml1:a1:bee"
23 |        {"name" "Mary"
24 |         "age" 33
25 |         "children" ["Betty", "Sam"]
26 |         "address" {"street" "1 Home St"
27 |                    "city" "Anywhere"}}
28 |         "d4:name4:Mary3:agei33e8:childrenl5:Betty3:Same7:addressd6:street9:1 Home St4:city8:Anywhereee")
29 |   (is (thrown?  RuntimeException (run (ben-dictionary) "di42e4:spam4:spami42ee"))))
30 | 
31 | 


--------------------------------------------------------------------------------
/test/clj/parsatron/languages/test_bf.clj:
--------------------------------------------------------------------------------
 1 | (ns parsatron.languages.test-bf
 2 |   (:refer-clojure :exclude [char])
 3 |   (:use [the.parsatron]
 4 |         [parsatron.languages.bf]
 5 |         [clojure.test]))
 6 | 
 7 | (deftest test-accepts-valid-brainf*ck
 8 |   (are [input] (try
 9 |                  (run (bf) input)
10 |                  true
11 |                  (catch Exception _
12 |                    false))
13 |        ">"
14 |        "<"
15 |        "+"
16 |        "-"
17 |        "."
18 |        ","
19 |        "[+]"
20 |        ",>++++++[<-------->-],[<+>-]<."))
21 | 
22 | (deftest test-rejects-invalid-brainf*ck
23 |   (are [input] (thrown? RuntimeException (run (bf) input))
24 |        "a"
25 |        "abc"
26 |        "[+"
27 |        "]"
28 |        "[+>[+]"))


--------------------------------------------------------------------------------
/test/clj/parsatron/test.clj:
--------------------------------------------------------------------------------
  1 | (ns parsatron.test
  2 |   (:refer-clojure :exclude [char])
  3 |   (:use [the.parsatron]
  4 |         [clojure.test])
  5 |   (:import (the.parsatron SourcePos)))
  6 | 
  7 | (defn parser-result? [expected p input]
  8 |   (= expected (run p input)))
  9 | 
 10 | (deftest test-always
 11 |   (is (parser-result? 5 (always 5) ""))
 12 |   (is (parser-result? 5 (always 5) "abc")))
 13 | 
 14 | (deftest test-nxt
 15 |   (is (parser-result? 5 (nxt (always 3)
 16 |                              (always 5)) "")))
 17 | 
 18 | (deftest test-bind
 19 |   (is (parser-result? 8 (bind (always 3)
 20 |                               (fn [x]
 21 |                                 (always (+ x 5)))) "")))
 22 | 
 23 | (deftest test-never
 24 |   (is (thrown? RuntimeException (run (never) "")))
 25 |   (is (thrown? RuntimeException (run (never) "abc"))))
 26 | 
 27 | (deftest test-either
 28 |   (testing "first parser succeeds"
 29 |     (is (parser-result? 5 (either (always 5) (always 3)) "")))
 30 | 
 31 |   (testing "second parser succeeds, when first fails with empty"
 32 |     (is (parser-result? 5 (either (never) (always 5)) "")))
 33 | 
 34 |   (testing "when neither succeed, errors are merged"
 35 |     (is (thrown-with-msg? RuntimeException #"Unexpected token 'c', Unexpected token 'c'"
 36 |           (run (either (char \a) (char \b)) "c")))))
 37 | 
 38 | (deftest test-attempt
 39 |   (testing "success returns value of p"
 40 |     (is (parser-result? \a (attempt (char \a)) "a")))
 41 | 
 42 |   (testing "failure is same as never"
 43 |     (is (thrown? RuntimeException (run (attempt (char \a)) "b")))
 44 |     (is (parser-result? \c (either (attempt (>> (char \a) (char \b)))
 45 |                                    (>> (char \a) (char \c))) "ac"))))
 46 | 
 47 | (deftest test-token
 48 |   (testing "throws error on empty input"
 49 |     (is (thrown-with-msg? RuntimeException #"Unexpected end of input"
 50 |           (run (token (constantly true)) ""))))
 51 | 
 52 |   (testing "consume? determines parser's behavior, show-f used in error message"
 53 |     (is (parser-result? \a (token (constantly true)) "a"))
 54 |     (is (thrown-with-msg? RuntimeException #"Unexpected token 'a'"
 55 |           (run (token (constantly false)) "a")))))
 56 | 
 57 | (deftest test-many
 58 |   (testing "throws an exception if parser does not consume"
 59 |     (is (thrown-with-msg? RuntimeException #"Combinator '\*' is applied to a parser that accepts an empty string"
 60 |           (run (many (always 5)) ""))))
 61 | 
 62 |   (testing "returns empty list when no input consumed"
 63 |     (is (parser-result? [] (many (char \a)) "")))
 64 | 
 65 |   (testing "parser returns list of consumed items"
 66 |     (is (parser-result? [\a \a \b \a \b \b]
 67 |                         (many (either (char \a)
 68 |                                       (char \b)))
 69 |                         "aababbc")))
 70 | 
 71 |   (testing "does not blow the stack"
 72 |     (is (parser-result? (take 1000 (repeat \a))
 73 |                         (many (char \a))
 74 |                         (apply str (take 1000 (repeat \a)))))))
 75 | 
 76 | (deftest test-times
 77 |   (testing "0 times returns [], and does not consume"
 78 |     (is (parser-result? [] (times 0 (char \a)) "")))
 79 | 
 80 |   (testing "throws an error (from underlying parser) if fewer than specified"
 81 |     (are [input] (thrown-with-msg? RuntimeException #"Unexpected end of input"
 82 |                    (run (times 3 (char \a)) input))
 83 |          ""
 84 |          "a"
 85 |          "aa"))
 86 | 
 87 |   (testing "returns a list with the results"
 88 |     (is (parser-result? [\a \a \a] (times 3 (char \a)) "aaa"))
 89 |     (is (parser-result? [5 5 5] (times 3 (always 5)) "")))
 90 | 
 91 |   (testing "does not blow the stack"
 92 |     (is (parser-result? (take 10000 (repeat \a))
 93 |                         (times 10000 (char \a))
 94 |                         (apply str (take 10000 (repeat \a)))))))
 95 | 
 96 | (deftest test-lookahead
 97 |   (testing "returns value of p on success"
 98 |     (is (parser-result? \a (lookahead (char \a)) "a")))
 99 | 
100 |   (testing "does not consume input on success"
101 |     (is (parser-result? \a (>> (lookahead (char \a)) (char \a)) "a"))))
102 | 
103 | (deftest test-choice
104 |   (testing "choice with no choices throws an exception"
105 |     (is (thrown? RuntimeException (run (choice) ""))))
106 | 
107 |   (testing "first parser to succeed returns result"
108 |     (are [input] (parser-result? (first input) (choice (char \a) (char \b) (char \c)) input)
109 |          "a"
110 |          "b"
111 |          "c")))
112 | 
113 | (deftest test-eof
114 |   (testing "parser succeeds, returns nil when no more input left"
115 |     (is (parser-result? nil (eof) ""))
116 |     (is (parser-result? nil (>> (char \a) (eof)) "a")))
117 | 
118 |   (testing "parser fails with message when input if left"
119 |     (is (thrown-with-msg? RuntimeException #"Expected end of input"
120 |           (run (eof) "a")))
121 |     (is (thrown-with-msg? RuntimeException #"Expected end of input"
122 |           (run (>> (char \a) (eof)) "ab")))))
123 | 


--------------------------------------------------------------------------------
/test/clj/parsatron/test_trampoline.clj:
--------------------------------------------------------------------------------
 1 | (ns parsatron.test-trampoline
 2 |   (:refer-clojure :exclude [char])
 3 |   (:use [the.parsatron]
 4 |         [clojure.test])
 5 |   (:import (the.parsatron Continue Ok)))
 6 | 
 7 | (deftest test-always
 8 |   (testing "always is a fn"
 9 |     (is (fn? (always 5))))
10 | 
11 |   (testing "with no next parser, always returns Ok"
12 |     (let [p (always 5)
13 |           result (p nil nil nil (fn eok [item _] (Ok. item)) nil)]
14 |       (is (= (Ok. 5) result))))
15 | 
16 |   (testing "bound to a next parser, always returns Continue"
17 |     (let [p (bind (always 5) (fn [x] (always (+ x 2))))
18 |           p-continue (p nil nil nil (fn eok [item _] (Ok. item)) nil)]
19 |       (is (instance? Continue p-continue))
20 |       (let [q-continue ((:fn p-continue))]
21 |         (is (instance? Continue q-continue))
22 |         (let [result ((:fn q-continue))]
23 |           (is (= (Ok. 7) result)))))))


--------------------------------------------------------------------------------
/test/cljs/parsatron/test.cljs:
--------------------------------------------------------------------------------
  1 | (ns parsatron.test
  2 |   (:require [the.parsatron :as p])
  3 |   (:import [goog.testing TestRunner TestCase])
  4 |   (:require-macros [the.parsatron :refer (>>)]))
  5 | 
  6 | (def tr (TestRunner.))
  7 | (def test (TestCase. "The Parsatron"))
  8 | 
  9 | (defn parser-result? [expected p input]
 10 |   (js/assertEquals expected (p/run p input)))
 11 | 
 12 | (defn throws-with-msg? [re f]
 13 |   (let [err (js/assertThrows f)]
 14 |     (js/assertTrue (.test re (.-message err)))))
 15 | 
 16 | (defn doeach [f & args]
 17 |   (doall (map f args)))
 18 | 
 19 | 
 20 | 
 21 | (defn test-always []
 22 |   (parser-result? 5 (p/always 5) "")
 23 |   (parser-result? 5 (p/always 5) "abc"))
 24 | 
 25 | (.add test (TestCase.Test. "test-always" test-always))
 26 | 
 27 | (defn test-nxt []
 28 |   (parser-result? 5 (p/nxt (p/always 3)
 29 |                            (p/always 5)) ""))
 30 | 
 31 | (.add test (TestCase.Test. "test-nxt" test-nxt))
 32 | 
 33 | (defn test-bind []
 34 |   (parser-result? 8 (p/bind (p/always 3)
 35 |                             (fn [x]
 36 |                               (p/always (+ x 5)))) ""))
 37 | 
 38 | (.add test (TestCase.Test. "test-bind" test-bind))
 39 | 
 40 | (defn test-never []
 41 |   (js/assertThrows #(p/run (p/never) "")))
 42 | 
 43 | (.add test (TestCase.Test. "test-never" test-never))
 44 | 
 45 | (defn test-either []
 46 |   ;; first parser succeeds
 47 |   (parser-result? 5 (p/either (p/always 5) (p/always 3)) "")
 48 | 
 49 |   ;; second parser succeeds, when first fails with empty
 50 |   (parser-result? 5 (p/either (p/never) (p/always 5)) "")
 51 | 
 52 |   ;; when neither succeed, errors are merged
 53 |   (throws-with-msg? #"Unexpected token 'c', Unexpected token 'c'" #(p/run (p/either (p/char "a") (p/char "b")) "c")))
 54 | 
 55 | (.add test (TestCase.Test. "test-either" test-either))
 56 | 
 57 | (defn test-attempt []
 58 |   ;; success returns value of p
 59 |   (parser-result? "a" (p/attempt (p/char "a")) "a")
 60 | 
 61 |   ;; failure is same as never
 62 |   (js/assertThrows #(p/run (p/attempt (char "a")) "b"))
 63 |   (parser-result? "c" (p/either (p/attempt (>> (p/char "a") (p/char "b")))
 64 |                                 (>> (p/char "a") (p/char "c"))) "ac"))
 65 | 
 66 | (.add test (TestCase.Test. "test-attempt" test-attempt))
 67 | 
 68 | (defn test-token []
 69 |   ;; throws error on empty input
 70 |   (throws-with-msg? #"Unexpected end of input" #(p/run (p/token (constantly true)) ""))
 71 | 
 72 |   ;; consume? determines parser's behavior, show-f used in error message
 73 |   (parser-result? "a" (p/token (constantly true)) "a")
 74 |   (throws-with-msg? #"Unexpected token 'a'" #(p/run (p/token (constantly false)) "a")))
 75 | 
 76 | (.add test (TestCase.Test. "test-token" test-token))
 77 | 
 78 | (defn test-many []
 79 |   ;; throws an error if parser does not consume
 80 |   (throws-with-msg? #"Combinator '\*' is applied to a parser that accepts an empty string" #(p/run (p/many (p/always 5)) ""))
 81 | 
 82 |   ;; returns empty list when no input consumed
 83 |   (parser-result? [] (p/many (p/char "a")) "")
 84 | 
 85 |   ;; parser returns list of consumed items
 86 |   (js/assertTrue (= ["a" "a" "b" "a" "b" "b"]
 87 |                     (p/run
 88 |                         (p/many (p/either (p/char "a")
 89 |                                           (p/char "b")))
 90 |                         "aababbc")))
 91 | 
 92 |   ;; does not blow the stack
 93 |   (js/assertTrue (= (take 1000 (repeat "a"))
 94 |                     (p/run
 95 |                         (p/many (p/char "a"))
 96 |                         (apply str (take 1000 (repeat "a")))))))
 97 | 
 98 | (.add test (TestCase.Test. "test-many" test-many))
 99 | 
100 | (defn test-times []
101 |   ;; 0 times returns [], and does not consume
102 |   (parser-result? [] (p/times 0 (p/char "a")) "")
103 | 
104 |   ;; throws an error (from underlying parser) if fewer than specified
105 |   (doeach
106 |    (fn [input]
107 |      (throws-with-msg? #"Unexpected end of input" #(p/run (p/times 3 (p/char "a")) input)))
108 |    ""
109 |    "a"
110 |    "aa")
111 | 
112 |   ;; returns a list with the results
113 |   (js/assertTrue (= ["a" "a" "a"] (p/run (p/times 3 (p/char "a")) "aaa")))
114 |   (js/assertTrue (= [5 5 5] (p/run (p/times 3 (p/always 5)) "")))
115 | 
116 |   ;; does not blow the stack
117 |   (js/assertTrue (= (take 10000 (repeat "a"))
118 |                     (p/run
119 |                         (p/times 10000 (p/char "a"))
120 |                         (apply str (take 10000 (repeat "a")))))))
121 | 
122 | (.add test (TestCase.Test. "test-times" test-times))
123 | 
124 | (defn test-lookahead []
125 |   ;; returns value of p on success
126 |   (parser-result? "a" (p/lookahead (p/char "a")) "a")
127 | 
128 |   ;; does not consume input on success
129 |   (parser-result? "a" (>> (p/lookahead (p/char "a")) (p/char "a")) "a"))
130 | 
131 | (.add test (TestCase.Test. "test-lookahead" test-lookahead))
132 | 
133 | (defn test-choice []
134 |   ;; choice with no choices throws an exception
135 |   (js/assertThrows #(p/run (p/choice) ""))
136 | 
137 |   ;; first parser to succeed returns result
138 |   (doeach
139 |    (fn [input]
140 |      (parser-result? (first input) (p/choice (p/char "a") (p/char "b") (p/char "c")) input))
141 |    "a"
142 |    "b"
143 |    "c"))
144 | 
145 | (.add test (TestCase.Test. "test-choice" test-choice))
146 | 
147 | (defn test-eof []
148 |   ;; parser succeeds, returns nil when no more input left
149 |   (parser-result? nil (p/eof) "")
150 |   (parser-result? nil (>> (p/char "a") (p/eof)) "a")
151 | 
152 |   ;; parser fails with message when input if left
153 |   (throws-with-msg? #"Expected end of input"
154 |                     #(p/run (p/eof) "a"))
155 |   (throws-with-msg? #"Expected end of input"
156 |                     #(p/run (>> (p/char "a") (p/eof)) "ab")))
157 | 
158 | (.add test (TestCase.Test. "test-eof" test-eof))
159 | 
160 | (.initialize tr test)
161 | (.execute tr)
162 | 


--------------------------------------------------------------------------------
/test/resources/parsatron_test.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 | <head>
 3 |   <meta charset="UTF-8">
 4 |   <title>Test - The Parsatron</title>
 5 | </head>
 6 | <body>
 7 |   <script type="text/javascript" src="parsatron_test.js"></script>
 8 | </body>
 9 | </html>
10 | 


--------------------------------------------------------------------------------