├── LICENSE
├── README.md
├── default.nix
├── examples
    ├── arithmetic
    │   └── default.nix
    ├── kernel-config
    │   ├── default.nix
    │   └── surface-4.19.config
    ├── parens
    │   └── default.nix
    └── uuids
    │   └── default.nix
├── flake.nix
├── lexer.nix
└── parsec.nix


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Nicole Prindle
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # nix-parsec
 2 | 
 3 | Parser combinators in Nix for no-nixpkgs parsing.
 4 | 
 5 | ### Background
 6 | 
 7 | Nix isn't meant to be a general purpose programming language, so using parser
 8 | combinators like this should generally be avoided. Here are some reasons to
 9 | prefer a pure-Nix parser over other approaches:
10 | 
11 | - Regular expressions won't work for your use case
12 |   - You _usually_ don't need to parse things in Nix, and when you do, you usually don't need anything more powerful than regex
13 |   - Nix's regex builtins are somewhat inconvenient to use. If you're looking for a more convenient regex interface in Nix, consider [the regex module](https://github.com/chessai/nix-std/blob/master/regex.nix) in [nix-std](https://github.com/chessai/nix-std)
14 | - Parsing performance will not be a bottleneck for the build
15 |   - Nix evaluation can be slow! If your files are large, parsing may take a while
16 |   - If parsing evaluation is a bottleneck, consider implementing your parser in your language of choice and using Nix to invoke it
17 | - It's difficult to pass results of parsing in another language back to Nix
18 | - You need to avoid nixpkgs or other dependencies
19 | 
20 | ### Usage
21 | 
22 | Include by fetching via usual means (`fetchTarball`, `fetchFromGitHub`, etc.):
23 | 
24 | ```nix
25 | let
26 |   version = "v0.1.0";
27 |   sha256 = "...";
28 | 
29 |   nix-parsec = import (builtins.fetchTarball {
30 |     url = "https://github.com/nprindle/nix-parsec/archive/${version}.tar.gz";
31 |     inherit sha256;
32 |   });
33 | 
34 |   inherit (nix-parsec) parsec lexer;
35 | in ...
36 | ```
37 | 
38 | If you are using Nix Flakes, you can add `nix-parsec` as an input:
39 | ```nix
40 | {
41 |   inputs = {
42 |     nix-parsec.url = "github:nprindle/nix-parsec";
43 |   };
44 | 
45 |   outputs = { self, nix-parsec, ... }: {
46 |     # ...
47 |   };
48 | }
49 | ```
50 | 
51 | At the top level, two attribute sets are exported:
52 | 
53 | - `parsec`: Parser combinators and functions to run parsers
54 | - `lexer`: Combinators for parsing token-related things
55 | 
56 | The parsing/lexing APIs roughly correspond to those of Haskell's `megaparsec`
57 | library. See `examples/` for some example parsers.
58 | 
59 | 


--------------------------------------------------------------------------------
/default.nix:
--------------------------------------------------------------------------------
1 | let
2 |   parsec = import ./parsec.nix;
3 |   lexer = import ./lexer.nix { inherit parsec; };
4 | in {
5 |   inherit parsec lexer;
6 | }
7 | 


--------------------------------------------------------------------------------
/examples/arithmetic/default.nix:
--------------------------------------------------------------------------------
 1 | # Example: parse a simple arithmetic expression using "+" or "*" into its result
 2 | #
 3 | # This could backtrack a lot, though obviously it can be written to be much more
 4 | # efficient.
 5 | #
 6 | # Load in nix repl and test, e.g.:
 7 | #
 8 | # nix-repl> parseExpr "1 + 2 * -3"
 9 | # { type = "success"; value = -5; }
10 | 
11 | let
12 |   nix-parsec = import ../../default.nix;
13 |   inherit (nix-parsec) lexer;
14 | in
15 | 
16 | with nix-parsec.parsec;
17 | 
18 | let
19 |   spaces = skipWhile (c: c == " ");
20 |   lexeme = lexer.lexeme spaces;
21 |   symbol = lexer.symbol spaces;
22 | 
23 |   int = lexeme (lexer.signed spaces lexer.decimal);
24 | 
25 |   # Grammar:
26 |   #   expr   ::= term + expr | term - expr | term
27 |   #   term   ::= factor * term | factor / term | factor
28 |   #   factor ::= (expr) | int
29 | 
30 |   expr = alt (bind term (n: skipThen (symbol "+") (fmap (m: n + m) expr))) term;
31 |   term = alt (bind factor (n: skipThen (symbol "*") (fmap (m: n * m) term))) factor;
32 |   factor = alt (between (symbol "(") (symbol ")") expr) int;
33 | in {
34 |   parseExpr = runParser (thenSkip expr eof);
35 | }
36 | 


--------------------------------------------------------------------------------
/examples/kernel-config/default.nix:
--------------------------------------------------------------------------------
 1 | # Example: parse a linux kernel config file, like 'surface-4.19.config' in this
 2 | # directory
 3 | #
 4 | # Load in nix repl and test, e.g.:
 5 | #
 6 | # nix-repl> parseConfigFile ./surface-4.19.config
 7 | # { type = "success"; value = ...; }
 8 | 
 9 | { pkgs ? import <nixpkgs> {}
10 | }:
11 | 
12 | let
13 |   nix-parsec = import ../../default.nix;
14 |   inherit (nix-parsec) lexer;
15 |   inherit (pkgs) lib;
16 | in
17 | 
18 | with nix-parsec.parsec;
19 | 
20 | let
21 |   # Consume zero or more spaces, not including newlines
22 |   spaces = skipWhile (c: c == " " || c == "\t");
23 | 
24 |   # Skip spaces and line comments and newlines
25 |   spaceComments = lexer.space
26 |     (skipWhile1 (c: c == " " || c == "\t" || c == "\n"))
27 |     (lexer.skipLineComment "#")
28 |     fail;
29 | 
30 |   lexeme = lexer.lexeme spaces;
31 |   symbol = lexer.symbol spaces;
32 | 
33 |   identifier =
34 |     let isIdChar = c: builtins.match "[a-zA-Z0-9_]" c != null;
35 |     in lexeme (takeWhile1 isIdChar);
36 | 
37 |   kernelOption = identifier;
38 |   kernelValue = lexeme (choice [
39 |     (fmap (_: lib.kernel.yes) (symbol "y"))
40 |     (fmap (_: lib.kernel.no) (symbol "n"))
41 |     (fmap (_: lib.kernel.module) (symbol "m"))
42 |     (fmap lib.kernel.freeform identifier)
43 |   ]);
44 | 
45 |   line =
46 |     bind kernelOption (key:
47 |       skipThen
48 |         (symbol "=")
49 |         (thenSkip
50 |           (fmap (lib.nameValuePair key) kernelValue)
51 |           spaceComments));
52 | 
53 |   configFile =
54 |     fmap lib.listToAttrs
55 |     (skipThen spaceComments (thenSkip (many line) eof));
56 | 
57 | in {
58 |   parseConfigFile = path: runParser configFile (builtins.readFile path);
59 | }
60 | 


--------------------------------------------------------------------------------
/examples/kernel-config/surface-4.19.config:
--------------------------------------------------------------------------------
 1 | # The problem of parsing kernel config files was proposed by hpfr
 2 | #
 3 | # This config file is taken from here:
 4 | # https://github.com/linux-surface/linux-surface/blob/master/configs/surface-4.19.config
 5 | 
 6 | #
 7 | # Intel IPTS Touchscreen
 8 | #
 9 | CONFIG_INTEL_IPTS=m
10 | CONFIG_INTEL_IPTS_SURFACE=m
11 | 
12 | #
13 | # Surface Aggregator Module
14 | #
15 | CONFIG_GPIO_SYSFS=y                         # required for SURFACE_SAM_HPS
16 | CONFIG_SURFACE_SAM=m
17 | CONFIG_SURFACE_SAM_SSH=m
18 | CONFIG_SURFACE_SAM_SSH_DEBUG_DEVICE=y
19 | CONFIG_SURFACE_SAM_SAN=m
20 | CONFIG_SURFACE_SAM_VHF=m
21 | CONFIG_SURFACE_SAM_DTX=m
22 | CONFIG_SURFACE_SAM_HPS=m
23 | CONFIG_SURFACE_SAM_SID=m
24 | CONFIG_SURFACE_SAM_SID_GPELID=m
25 | CONFIG_SURFACE_SAM_SID_PERFMODE=m
26 | CONFIG_SURFACE_SAM_SID_VHF=m
27 | CONFIG_SURFACE_SAM_SID_POWER=m
28 | 
29 | #
30 | # Other Drivers
31 | #
32 | CONFIG_INPUT_SOC_BUTTON_ARRAY=m
33 | CONFIG_SURFACE_3_POWER_OPREGION=m
34 | CONFIG_SURFACE_3_BUTTON=m
35 | CONFIG_SURFACE_3_POWER_OPREGION=m
36 | CONFIG_SURFACE_PRO3_BUTTON=m
37 | 


--------------------------------------------------------------------------------
/examples/parens/default.nix:
--------------------------------------------------------------------------------
 1 | # Example: find the deepest depth of nested parentheses in a string of balanced
 2 | # parentheses, or return an error if the parentheses are not balanced.
 3 | #
 4 | # Load in nix repl and test, e.g.:
 5 | #
 6 | # nix-repl> parseParens "((())())"
 7 | # { type = "success"; value = 3; }
 8 | 
 9 | let
10 |   nix-parsec = import ../../default.nix;
11 | in
12 | 
13 | with nix-parsec.parsec;
14 | 
15 | let
16 |   max = x: y: if x > y then x else y;
17 |   maximum = builtins.foldl' max 0;
18 | 
19 |   parens =
20 |     let expr = fmap (x: x + 1) (between (string "(") (string ")") parens);
21 |     in fmap maximum (many expr);
22 | in {
23 |   parseParens = runParser (thenSkip parens eof);
24 | }
25 | 


--------------------------------------------------------------------------------
/examples/uuids/default.nix:
--------------------------------------------------------------------------------
 1 | # Example: parse a UUID using nix-parsec and using regular expressions
 2 | #
 3 | # Load in nix repl and test, e.g.:
 4 | #
 5 | # nix-repl> :p parseUuid "123e4567-e89b-12d3-a456-426614174000"
 6 | # { type = "success"; value = [ "123e4567" "e89b" "12d3" "a456" "426614174000" ]; }
 7 | 
 8 | { pkgs ? import <nixpkgs> {}
 9 | }:
10 | 
11 | let
12 |   inherit (pkgs) lib;
13 |   nix-parsec = import ../../default.nix;
14 | in
15 | 
16 | with nix-parsec.parsec;
17 | 
18 | let
19 |   # Parses a UUID in a format like 123e4567-e89b-12d3-a456-426614174000
20 |   uuid =
21 |     let
22 |       hex = satisfy (x: builtins.match "[0-9a-f]" x != null);
23 |       nHex = n: fmap lib.concatStrings (count n hex);
24 |       hyphen = string "-";
25 |       group = n: next: skipThen hyphen (bind (nHex n) next);
26 |     in
27 |       bind (nHex 8)
28 |       (g1: group 4
29 |       (g2: group 4
30 |       (g3: group 4
31 |       (g4: group 12
32 |       (g5: pure [g1 g2 g3 g4 g5])))));
33 | in rec {
34 |   # Parse using nix-parsec
35 |   parseUuid = runParser uuid;
36 | 
37 |   # Parse using regular expressions
38 |   parseUuid' = builtins.match "([0-9a-f]{8})-([0-9a-f]{4})-([0-9a-f]{4})-([0-9a-f]{4})-([0-9a-f]{12})";
39 | }
40 | 


--------------------------------------------------------------------------------
/flake.nix:
--------------------------------------------------------------------------------
 1 | {
 2 |   description = "nix-parsec";
 3 | 
 4 |   outputs = { self }:
 5 |     {
 6 |       lib = {
 7 |         lexer = import ./lexer.nix { parsec = self.lib.parsec; };
 8 |         parsec = import ./parsec.nix;
 9 |       };
10 |     };
11 | }
12 | 


--------------------------------------------------------------------------------
/lexer.nix:
--------------------------------------------------------------------------------
  1 | { parsec
  2 | }:
  3 | 
  4 | with parsec;
  5 | 
  6 | let
  7 |   escapeToChar = c:
  8 |     if c == "n" then "\n"
  9 |     else if c == "r" then "\r"
 10 |     else if c == "t" then "\t"
 11 |     else c;
 12 | 
 13 |   hexDigits = {
 14 |     "0" = 0; "1" = 1; "2" = 2; "3" = 3; "4" = 4; "5" = 5; "6" = 6; "7" = 7;
 15 |     "8" = 8; "9" = 9;
 16 |     "A" = 10; "B" = 11; "C" = 12; "D" = 13; "E" = 14; "F" = 15;
 17 |     "a" = 10; "b" = 11; "c" = 12; "d" = 13; "e" = 14; "f" = 15;
 18 |   };
 19 | 
 20 | in rec {
 21 |   # Build a space-consuming parser out of:
 22 |   #   - a parser that consumes spaces
 23 |   #   - a parser that consumes line comments
 24 |   #   - a parser that consumes block comments
 25 |   # None of these should be able to accept empty input
 26 |   #   :: Parser null -> Parser null -> Parser null -> Parser null
 27 |   space = sp: lc: bc: skipMany (alt sp (alt lc bc));
 28 | 
 29 |   # Given the delimiter marking the start of a line comment, build a parser that
 30 |   # consume a line comment
 31 |   #   :: String -> Parser null
 32 |   skipLineComment = start:
 33 |     let
 34 |       prefix = string start;
 35 |     in skipThen prefix (skipWhile (x: x != "\n"));
 36 | 
 37 |   # Given start and end delimiters of a block comment, build a parser that
 38 |   # consumes a block comment
 39 |   #   :: String -> String -> Parser null
 40 |   skipBlockComment = start: end:
 41 |     let
 42 |       prefix = string start;
 43 |       suffix = string end;
 44 |     in skipThen prefix (skipTill anyChar suffix);
 45 | 
 46 |   # Given start and end delimiters of a block comment, build a parser that
 47 |   # consumes a possibly nested block comment
 48 |   #   :: String -> String -> Parser null
 49 |   skipBlockCommentNested = start: end:
 50 |     let
 51 |       prefix = string start;
 52 |       suffix = string end;
 53 |       go = skipThen prefix (skipTill (alt go anyChar) suffix);
 54 |     in go;
 55 | 
 56 |   # Use a space-consuming parser to turn a parser into a lexeme parser
 57 |   #   :: Parser null -> Parser a -> Parser a
 58 |   lexeme = sc: parser: thenSkip parser sc;
 59 | 
 60 |   # Use a space-consuming parser to turn a parser into a symbol parser
 61 |   #   :: Parser null -> String -> Parser String
 62 |   symbol = sc: sym: lexeme sc (string sym);
 63 | 
 64 |   # Parses a decimal integer. If you want to handle leading signs, wrap it using
 65 |   # 'signed'.
 66 |   #
 67 |   # Unlike 'decimal', it is often faster, but will
 68 |   # only ever consume at most 19 characters after leading zeros. Only use this
 69 |   # if you're sure the number you're parsing can fit in a signed 64-bit integer.
 70 |   unsafeDecimal =
 71 |     let
 72 |       toInt = builtins.fromJSON; # Hacky, but efficient
 73 |       int = fmap (x: toInt (builtins.elemAt x 0)) (matchingN 19 "[[:digit:]]+");
 74 |       leadingZeros = skipWhile (c: c == "0");
 75 |     in alt
 76 |       # Nonzero number with leading zeros
 77 |       (skipThen leadingZeros int)
 78 |       # Only zeros
 79 |       (fmap (_: 0) (skipWhile1 (c: c == "0")));
 80 | 
 81 |   # Parses a decimal integer. If you want to handle leading signs, wrap it using
 82 |   # 'signed'.
 83 |   #
 84 |   # NOTE: since Nix can only handle 64-bit signed integers, the behavior on
 85 |   # larger numbers is not guaranteed.
 86 |   decimal =
 87 |     let isDigit = c: builtins.match "[[:digit:]]" c != null;
 88 |     in fmap builtins.fromJSON (takeWhile1 isDigit);
 89 | 
 90 |   # Parses a binary integer, as a nonempty string of "0"s and "1"s. Does not
 91 |   # assume a prefix.
 92 |   binary =
 93 |     let
 94 |       isBinDigit = c: c == "0" || c == "1";
 95 |       binToInt = str:
 96 |         let
 97 |           len = builtins.stringLength str;
 98 |           nthDigit = n: if builtins.substring n 1 str == "0" then 0 else 1;
 99 |           go = acc: i:
100 |             if i >= len
101 |               then acc
102 |               else go (2 * acc + nthDigit i) (i + 1);
103 |         in go 0 0;
104 |     in fmap binToInt (takeWhile1 isBinDigit);
105 | 
106 |   # Parses a hexadecimal integer, as a nonempty string of digits "0" through "9"
107 |   # or letters "A" through "F" (either uppercase or lowercase). Does not assume
108 |   # a prefix.
109 |   hexadecimal =
110 |     let
111 |       isHexDigit = c: builtins.match "[[:xdigit:]]" c != null;
112 |       hexToInt = str:
113 |         let
114 |           len = builtins.stringLength str;
115 |           nthDigit = n: hexDigits.${builtins.substring n 1 str};
116 |           go = acc: i:
117 |             if i >= len
118 |               then acc
119 |               else go (16 * acc + nthDigit i) (i + 1);
120 |         in go 0 0;
121 |     in fmap hexToInt (takeWhile1 isHexDigit);
122 | 
123 |   # Given a way to consume the space after the sign, and given a parser that
124 |   # parses a number, return a parser that can also handle a leading +/- sign.
125 |   #   :: Num a => Parser () -> Parser a -> Parser a
126 |   signed = sp: parser:
127 |     let
128 |       plus = fmap (_: 1) (string "+");
129 |       minus = fmap (_: -1) (string "-");
130 |       sign = option 1 (alt minus plus);
131 |     in bind sign (res: fmap (n: res * n) parser);
132 | 
133 |   # Parses a Nix character literal, without quotes. Handles character escaping.
134 |   #
135 |   # NOTE: Only supports \n, \r, and \t. All other characters after a backslash
136 |   # will be returned as-is; e.g., "\a" becomes "a".
137 |   charLit =
138 |     bind anyChar
139 |     (c: if c == "\\"
140 |       then fmap escapeToChar anyChar
141 |       else pure c);
142 | 
143 |   # Parses a basic double-quoted string literal, handling escaped inner quotes.
144 |   stringLit = fmap (builtins.concatStringsSep "")
145 |     (skipThen (string "\"") (manyTill charLit (string "\"")));
146 | }
147 | 


--------------------------------------------------------------------------------
/parsec.nix:
--------------------------------------------------------------------------------
  1 | # A parser is a value with the following type:
  2 | #   type Parser a = (String, Int, Int) -> Either e (a, Int, Int)
  3 | #
  4 | # - The parameters are the source, the offset, and the length
  5 | # - The result is the value produced, the new offset, and the new length
  6 | # - If a failure occurs, the result will be an attribute set containing
  7 | #   information about the error
  8 | #
  9 | # Note that in the types, 'Maybe a' denotes a value that is either null or a
 10 | # singleton list containing a value of type 'a'. 'NonEmpty a' denotes a list
 11 | # containing one or more values of type 'a'. 'null' denotes the singleton type
 12 | # containing only the value 'null'.
 13 | 
 14 | with builtins;
 15 | 
 16 | with rec {
 17 |   # Redefinitions to avoid depending on lib
 18 | 
 19 |   foldr = op: nul: list:
 20 |     let
 21 |       len = length list;
 22 |       fold' = n:
 23 |         if n == len
 24 |         then nul
 25 |         else op (elemAt list n) (fold' (n + 1));
 26 |     in fold' 0;
 27 | 
 28 |   # TODO: optimize result collection
 29 |   reverseList = xs:
 30 |     let l = length xs; in genList (n: elemAt xs (l - n - 1)) l;
 31 | };
 32 | 
 33 | rec {
 34 |   # running {{{
 35 | 
 36 |   # Run a parser, returning the result in an attrset either containing a "type"
 37 |   # key and a "value" key. If "type" is "error", "value" will contain the
 38 |   # reported error. If "type" is "success", "value" will contain the parsed
 39 |   # value.
 40 |   #
 41 |   # If the parser did not consume all of its input, this will still succeed. If
 42 |   # you want to make sure all input has been consume, use 'eof'.
 43 |   #
 44 |   #   :: Parser a -> String -> Either e a
 45 |   runParser = parser: str:
 46 |     let res = parser [str 0 (stringLength str)];
 47 |     in if failed res
 48 |       then { type = "error"; value = res; }
 49 |       else { type = "success"; value = elemAt res 0; };
 50 | 
 51 |   # }}}
 52 | 
 53 |   # queries {{{
 54 | 
 55 |   # Did the raw result of a parser fail?
 56 |   #
 57 |   #   :: Either e (a, Int, Int) -> Bool
 58 |   failed = ps: !builtins.isList ps;
 59 | 
 60 |   # Query the current state of the parser
 61 |   #   :: Parser (String, Int, Int)
 62 |   state = ps:
 63 |     let
 64 |       offset = elemAt ps 1;
 65 |       len = elemAt ps 2;
 66 |     in [ps offset len];
 67 | 
 68 |   # Augment a parser to also return the number of characters it consumed
 69 |   #   :: Parser a -> Parser (Int, a)
 70 |   measure = parser: ps:
 71 |     let
 72 |       initialOffset = elemAt ps 1;
 73 |       res = parser ps;
 74 |     in if failed res
 75 |       then res
 76 |       else let
 77 |         value = elemAt res 0;
 78 |         newOffset = elemAt res 1;
 79 |         newLength = elemAt res 2;
 80 |       in [[(newOffset - initialOffset) value] newOffset newLength];
 81 | 
 82 |   # Augment a parser to also return the characters it consumed
 83 |   #   :: Parser a -> Parser (String, a)
 84 |   withMatch = parser: ps:
 85 |     let
 86 |       str = elemAt ps 0;
 87 |       oldOffset = elemAt ps 1;
 88 |       res = parser ps;
 89 |     in if failed res
 90 |       then res
 91 |       else let
 92 |         value = elemAt res 0;
 93 |         newOffset = elemAt res 1;
 94 |         newLen = elemAt res 2;
 95 |       in [[(substring oldOffset (newOffset - oldOffset) str) value] newOffset newLen];
 96 | 
 97 |   # }}}
 98 | 
 99 |   # composition {{{
100 | 
101 |   # Map a function over the result of a parser
102 |   #   :: (a -> b) -> Parser a -> Parser b
103 |   fmap = f: parser: ps:
104 |     let
105 |       res = parser ps;
106 |       val = elemAt res 0;
107 |       offset = elemAt res 1;
108 |       len = elemAt res 2;
109 |     in if failed res
110 |       then res
111 |       else [(f val) offset len];
112 | 
113 |   # Lift a value into a parser
114 |   #   :: a -> Parser a
115 |   pure = x: ps: [x (elemAt ps 1) (elemAt ps 2)];
116 | 
117 |   # Applicative functor application
118 |   #   :: Parser (a -> b) -> Parser a -> Parser b
119 |   ap = p1: p2: bind p1 (f: fmap f p2);
120 | 
121 |   # Lift a two-argument function over two parsers in sequence.
122 |   #   :: Parser (a -> b) -> Parser a -> Parser b
123 |   lift2 = f: p1: p2: ap (fmap f p1) p2;
124 | 
125 |   # Monadic bind; sequence two parsers together
126 |   #   :: Parser a -> (a -> Parser b) -> Parser b
127 |   bind = parser: f: ps:
128 |     let
129 |       str = elemAt ps 0;
130 |       res1 = parser ps;
131 |     in if failed res1
132 |       then res1
133 |       else let
134 |         val = elemAt res1 0;
135 |         offset = elemAt res1 1;
136 |         len = elemAt res1 2;
137 |       in (f val) [str offset len];
138 | 
139 |   # Sequence two parsers, ignoring the result of the first one, like '*>' in
140 |   # Haskell
141 |   #   :: Parser a -> Parser b -> Parser b
142 |   skipThen = parser1: parser2: bind parser1 (_: parser2);
143 | 
144 |   # Sequence two parsers, ignoring the result of the second one, like '<*' in
145 |   # Haskell
146 |   #   :: Parser a -> Parser b -> Parser a
147 |   thenSkip = parser1: parser2: bind parser1 (x: fmap (_: x) parser2);
148 | 
149 |   # Run a list of parsers in sequence, collecting their results
150 |   #   :: [Parser a] -> Parser [a]
151 |   sequence = xs:
152 |     let
153 |       len = length xs;
154 |       go = n:
155 |         if n >= len
156 |           then pure []
157 |           else bind (elemAt xs n) (first: fmap (rest: [first] ++ rest) (go (n + 1)));
158 |     in go 0;
159 | 
160 |   # Ignore the results of a parser
161 |   #   :: Parser a -> Parser null
162 |   void = fmap (_: null);
163 | 
164 |   # Like 'sequence', but ignore the outputs of the parsers
165 |   #   :: [Parser a] -> Parser null
166 |   sequence_ = xs:
167 |     let
168 |       len = length xs;
169 |       go = n:
170 |         if n >= len
171 |           then pure null
172 |           else skipThen (elemAt xs n) (go (n + 1));
173 |     in go 0;
174 | 
175 |   # }}}
176 | 
177 |   # options and failure {{{
178 | 
179 |   # Parser that always fails (the identity under 'alt')
180 |   fail = failWith { context = "parsec.fail"; };
181 | 
182 |   # Parser that always fails with the given error
183 |   #   :: e -> Parser a
184 |   failWith = e: _: e;
185 | 
186 |   # Apply a function to modify an error message for a parser
187 |   annotate = f: parser: ps:
188 |     let res = parser ps;
189 |     in if failed res
190 |       then f res
191 |       else res;
192 | 
193 |   # Modify a parser error message by adding the given attributes
194 |   #
195 |   # NOTE: this overrides any of the old attributes, so make sure that any
196 |   # possible error information is irrelevant first
197 |   annotateWith = e: annotate (x: x // e);
198 | 
199 |   # Add a new context annotation to an error, keeping the old error entirely
200 |   annotateContext = s: annotate (e: { context = s; error = e; });
201 | 
202 |   # Add information about the current offset to a parser
203 |   #
204 |   # NOTE: this overrides any old offset info, which could make it confusing
205 |   # where the error actually happened.
206 |   withOffsetInfo = parser:
207 |     bind state (info: annotateWith { str = elemAt info 0; offset = elemAt info 1; } parser);
208 | 
209 |   # Override an error message for a parser
210 |   label = e: annotate (_: e);
211 | 
212 |   # Run two parsers; if the first one fails, run the second one
213 |   #   :: Parser a -> Parser a -> Parser a
214 |   alt = parser1: parser2: withOffsetInfo (ps:
215 |     let
216 |       str = elemAt ps 0;
217 |       res1 = parser1 ps;
218 |       res2 = parser2 ps;
219 |     in if failed res1
220 |       then if failed res2
221 |         then {
222 |           context = "parsec.alt";
223 |           msg = "expected one of these to succeed";
224 |           error = [res1 res2];
225 |         }
226 |         else res2
227 |       else res1);
228 | 
229 |   # Try to apply a parser, or return a default value if it fails without
230 |   # consuming input. Cannot fail.
231 |   #   :: a -> Parser a -> Parser a
232 |   option = def: parser: alt parser (pure def);
233 | 
234 |   # Try to apply a parser. If it succeeds, return its result in a singleton
235 |   # list, and if it fails without consuming input, return an empty list. Cannot
236 |   # fail.
237 |   #   :: Parser a -> Parser [a]
238 |   optional = parser: alt (fmap (x: [x]) parser) (pure []);
239 | 
240 |   # Run a list of parsers, using the first one that succeeds
241 |   #   :: [Parser a] -> Parser a
242 |   choice = parsers: withOffsetInfo (ps:
243 |     let
244 |       results = map (p: p ps) parsers;
245 |       firstSuccess = foldr (x: rest: if failed x then rest else x) null results;
246 |     in if firstSuccess == null
247 |       then {
248 |         context = "parsec.choice";
249 |         msg = "expected one of these to be satisfied";
250 |         error = results;
251 |       }
252 |       else firstSuccess);
253 | 
254 |   # }}}
255 | 
256 |   # consumption primitives {{{
257 | 
258 |   # Consumes a character if it satisfies a predicate
259 |   #   :: (Char -> Bool) -> Parser Char
260 |   satisfy = pred: withOffsetInfo (ps:
261 |     let
262 |       str = elemAt ps 0;
263 |       offset = elemAt ps 1;
264 |       len = elemAt ps 2;
265 |       c = substring offset 1 str; # the next character
266 |     in if len > 0 && pred c
267 |       then [c (offset + 1) (len - 1)]
268 |       else { context = "parsec.satisfy"; });
269 | 
270 |   # Consumes a character if it satisfies a predicate, applying a function to the
271 |   # result.
272 |   #   :: (Char -> a) -> (Char -> Bool) -> Parser a
273 |   satisfyWith = f: pred: withOffsetInfo (ps:
274 |     let
275 |       str = elemAt ps 0;
276 |       offset = elemAt ps 1;
277 |       len = elemAt ps 2;
278 |       c = substring offset 1 str; # the next character
279 |     in if len > 0 && pred c
280 |       then [(f c) (offset + 1) (len - 1)]
281 |       else { context = "parsec.satisfyWith"; });
282 | 
283 |   # Consume any character
284 |   #   :: Parser Char
285 |   anyChar = withOffsetInfo (annotateWith {
286 |     context = "parsec.anyChar";
287 |   } (satisfy (_: true)));
288 | 
289 |   # Consume any character except a given character
290 |   #   :: Char -> Parser Char
291 |   anyCharBut = c: withOffsetInfo (annotateWith {
292 |     context = "parsec.anyCharBut";
293 |     error = "expected any char except ${c}";
294 |   } (satisfy (x: x != c)));
295 | 
296 |   # Given a string, try to consume it from the input and return it if it
297 |   # succeeds. If it fails, DON'T consume any input.
298 |   #   :: String -> Parser String
299 |   string = pr: withOffsetInfo (ps:
300 |     let
301 |       prefixLen = stringLength pr;
302 |       str = elemAt ps 0;
303 |       offset = elemAt ps 1;
304 |       len = elemAt ps 2;
305 |     in if len >= prefixLen && substring offset prefixLen str == pr
306 |       then [pr (offset + prefixLen) (len - prefixLen)]
307 |       else {
308 |         context = "parsec.string";
309 |         msg = "expected string '${pr}'";
310 |       });
311 | 
312 |   # 'notFollowedBy p' only succeeds when 'p' fails, and never consumes any input
313 |   #   :: Parser a -> Parser null
314 |   notFollowedBy = parser: withOffsetInfo (ps:
315 |     let
316 |       offset = elemAt ps 1;
317 |       len = elemAt ps 2;
318 |     in if failed (parser ps)
319 |       then [null offset len]
320 |       else { context = "parsec.notFollowedBy"; });
321 | 
322 |   # Fails if there is still more input remaining, returns null otherwise
323 |   #   :: Parser null
324 |   eof = withOffsetInfo (ps:
325 |     let
326 |       offset = elemAt ps 1;
327 |       len = elemAt ps 2;
328 |     in if len == 0
329 |       then [null offset len]
330 |       else {
331 |         context = "parsec.eof";
332 |         msg = "expected end of input";
333 |       });
334 | 
335 |   # Return whether or not we're at the end of the input. Cannot fail.
336 |   #   :: Parser Bool
337 |   atEnd = ps:
338 |     let
339 |       offset = elemAt ps 1;
340 |       len = elemAt ps 2;
341 |     in [(len == 0) offset len];
342 | 
343 |   # }}}
344 | 
345 |   # takes {{{
346 | 
347 |   # Repeat a parser 'n' times, returning the results from each parse
348 |   #   :: Int -> Parser a -> Parser [a]
349 |   count = n: assert n >= 0; parser:
350 |     let p' =
351 |       let go = m: if m == 0
352 |         then pure []
353 |         else bind parser (first: fmap (rest: [first] ++ rest) (go (m - 1)));
354 |       in go n;
355 |     in annotate (e: {
356 |       context = "parsec.count";
357 |       msg = "expected ${toString n} occurrances";
358 |       error = e;
359 |     }) p';
360 | 
361 |   # Consume 'n' characters, or fail if there's not enough characters left.
362 |   # Return the characters consumed.
363 |   #   :: Int -> Parser String
364 |   take = n: assert n >= 0; withOffsetInfo (ps:
365 |     let
366 |       str = elemAt ps 0;
367 |       offset = elemAt ps 1;
368 |       len = elemAt ps 2;
369 |     in if n <= len
370 |       then [(substring offset n str) (offset + n) (len - n)]
371 |       else {
372 |         context = "parsec.take";
373 |         error = "expected ${toString n} characters, but only got ${toString len}";
374 |       });
375 | 
376 |   # Consume zero or more characters while the predicate holds, returning the
377 |   # consumed characters. Cannot fail.
378 |   #   :: (Char -> Bool) -> Parser String
379 |   takeWhile = pred: ps:
380 |     let
381 |       str = elemAt ps 0;
382 |       offset = elemAt ps 1;
383 |       len = elemAt ps 2;
384 |       strLen = stringLength str;
385 |       # Search for the next offset that violates the predicate
386 |       go = ix:
387 |         if ix >= strLen || !pred (substring ix 1 str)
388 |           then ix
389 |           else go (ix + 1);
390 |       endIx = go offset;
391 |       # The number of characters we found
392 |       numChars = endIx - offset;
393 |     in [(substring offset numChars str) endIx (len - numChars)];
394 | 
395 |   # Consume one or more characters while the predicate holds, returning the
396 |   # consumed characters.
397 |   #   :: (Char -> Bool) -> Parser String
398 |   takeWhile1 = pred:
399 |     let p' = bind (satisfy pred) (first: fmap (rest: first + rest) (takeWhile pred));
400 |     in annotateWith {
401 |       context = "parsec.takeWhile1";
402 |       msg = "expected at least one character matching the predicate";
403 |     } p';
404 | 
405 |   # Apply a parser zero or more times until it fails, returning a list of the
406 |   # results. Cannot fail.
407 |   #   :: Parser a -> Parser [a]
408 |   many = parser:
409 |     let go = alt (bind parser (first: fmap (rest: [first] ++ rest) go)) (pure []);
410 |     in go;
411 | 
412 |   # Apply a parser one or more times until it fails, returning a list of the
413 |   # results
414 |   #   :: Parser a -> Parser (NonEmpty a)
415 |   many1 = parser:
416 |     let p' = bind parser (first: fmap (rest: [first] ++ rest) (many parser));
417 |     in annotate (e: {
418 |       context = "parsec.many1";
419 |       msg = "expected one or more occurrences";
420 |       error = e;
421 |     }) p';
422 | 
423 |   # Repeat a parser zero or more times until the end parser succeeds. Returns a
424 |   # list of the results of the first parser.
425 |   #   :: Parser a -> Parser b -> Parser [a]
426 |   manyTill = parser: end:
427 |     let p' =
428 |       let go = alt (fmap (_: []) end) (bind parser (first: fmap (rest: [first] ++ rest) go));
429 |       in go;
430 |     in annotateContext "parsec.manyTill" p';
431 | 
432 |   # Repeat a parser one or more times until the end parser succeeds. Returns a
433 |   # list of the results of the first parser.
434 |   #   :: Parser a -> Parser b -> Parser (NonEmpty a)
435 |   manyTill1 = parser: end:
436 |     let p' = bind parser (first: fmap (rest: [first] ++ rest) (manyTill parser end));
437 |     in annotateContext "parsec.manyTill1" p';
438 | 
439 |   # }}}
440 | 
441 |   # separators {{{
442 | 
443 |   # Sequence three parsers, 'before', 'after', and 'middle', running them in the
444 |   # obvious order and keeping the middle result.
445 |   # Example: parens = between (string "(") (string ")")
446 |   #
447 |   #   :: Parser a -> Parser b -> Parser c -> Parser c
448 |   between = before: after: middle: skipThen before (thenSkip middle after);
449 | 
450 |   # Parse zero or more occurrences of the first parser, separated by the second
451 |   # parser. Returns a list of results of the first parser. Cannot fail.
452 |   #   :: Parser a -> Parser b -> Parser [a]
453 |   sepBy = parser: end:
454 |     alt (sepBy1 parser end) (pure []);
455 | 
456 |   # Parse one or more occurrences of the first parser, separated by the second
457 |   # parser. Returns a list of results of the first parser.
458 |   #   :: Parser a -> Parser b -> Parser (NonEmpty a)
459 |   sepBy1 = parser: end:
460 |     let p' = bind parser (first: fmap (rest: [first] ++ rest) (many (skipThen end parser)));
461 |     in annotateContext "parsec.sepBy1" p';
462 | 
463 |   # Parse zero or more occurrences of the first parser, separated and ended by
464 |   # the second parser. Returns a list of results of the first parser. Cannot
465 |   # fail.
466 |   #   :: Parser a -> Parser b -> Parser [a]
467 |   endBy = parser: end:
468 |     many (thenSkip parser end);
469 | 
470 |   # Parse one or more occurrences of the first parser, separated and ended by
471 |   # the second parser. Returns a list of results of the first parser.
472 |   #   :: Parser a -> Parser b -> Parser (NonEmpty a)
473 |   endBy1 = parser: end:
474 |     let p' = many1 (thenSkip parser end);
475 |     in annotateContext "parsec.endBy1" p';
476 | 
477 |   # Parse zero or more occurrences of the first parser, separated and optionally
478 |   # ended by the second parser. Returns a list of result of the first parser.
479 |   # Cannot fail.
480 |   #   :: Parser a -> Parser b -> Parser [a]
481 |   sepEndBy = parser: end:
482 |     alt (sepEndBy1 parser end) (pure []);
483 | 
484 |   # Parse one or more occurrences of the first parser, separated and optionally
485 |   # ended by the second parser. Returns a list of result of the first parser.
486 |   #   :: Parser a -> Parser b -> Parser (NonEmpty a)
487 |   sepEndBy1 = parser: end:
488 |     let p' =
489 |       let
490 |         go = alt
491 |           (skipThen end (alt
492 |             (bind parser (first: fmap (rest: [first] ++ rest) go))
493 |             (pure [])))
494 |           (pure []);
495 |       in bind parser (first: fmap (rest: [first] ++ rest) go);
496 |     in annotateContext "parsec.sepEndBy1" p';
497 | 
498 |   # }}}
499 | 
500 |   # skips {{{
501 | 
502 |   # Consume 'n' characters, or fail if there's not enough characters left.
503 |   #   :: Int -> Parser null
504 |   skip = n: assert n >= 0; ps:
505 |     let
506 |       str = elemAt ps 0;
507 |       offset = elemAt ps 1;
508 |       len = elemAt ps 2;
509 |     in if n <= len
510 |       then [null (offset + n) (len - n)]
511 |       else {
512 |         context = "parsec.skip";
513 |         error = "expected ${toString n} characters, but only got ${toString len}";
514 |       };
515 | 
516 |   # Consume zero or more characters while the predicate holds. Cannot fail.
517 |   #   :: (Char -> Bool) -> Parser null
518 |   skipWhile = pred: ps:
519 |     let
520 |       str = elemAt ps 0;
521 |       offset = elemAt ps 1;
522 |       len = elemAt ps 2;
523 |       strLen = stringLength str;
524 |       # Search for the next offset that violates the predicate
525 |       go = ix:
526 |         if ix >= strLen || !pred (substring ix 1 str)
527 |           then ix
528 |           else go (ix + 1);
529 |       endIx = go offset;
530 |       # The number of characters we found
531 |       numChars = endIx - offset;
532 |     in [null endIx (len - numChars)];
533 | 
534 |   # Consume one or more characters while the predicate holds.
535 |   #   :: (Char -> Bool) -> Parser null
536 |   skipWhile1 = pred:
537 |     let p' = skipThen (satisfy pred) (skipWhile pred);
538 |     in annotateContext "parsec.skipWhile1" p';
539 | 
540 |   # Run a parser zero or more times until it fails, discarding all the input
541 |   # that it accepts. Cannot fail.
542 |   #   :: Parser a -> Parser null
543 |   skipMany = parser:
544 |     let go = alt (skipThen parser go) (pure null);
545 |     in go;
546 | 
547 |   # Run a parser one or more times until it fails, discarding all the input that
548 |   # it accepts.
549 |   #   :: Parser a -> Parser null
550 |   skipMany1 = parser:
551 |     let p' = skipThen parser (skipMany parser);
552 |     in annotateContext "parsec.skipMany1" p';
553 | 
554 |   # Repeat a parser zero or more times until the end parser succeeds. Discards
555 |   # consumed input.
556 |   #   :: Parser a -> Parser b -> Parser null
557 |   skipTill = parser: end:
558 |     let p' =
559 |       let go = alt end (skipThen parser go);
560 |       in void go;
561 |     in annotateContext "parsec.skipTill" p';
562 | 
563 |   # Repeat a parser one or more times until the end parser succeeds. Discards
564 |   # consumed input.
565 |   #   :: Parser a -> Parser b -> Parser null
566 |   skipTill1 = parser: end:
567 |     let p' = skipThen parser (skipTill parser end);
568 |     in annotateContext "parsec.skipTill1" p';
569 | 
570 |   # }}}
571 | 
572 |   # peeks and drops {{{
573 | 
574 |   # Examine the next character without consuming it. Fails if there's no input
575 |   # left.
576 |   #   :: Parser Char
577 |   peek = ps:
578 |     let
579 |       str = elemAt ps 0;
580 |       offset = elemAt ps 1;
581 |       len = elemAt ps 2;
582 |     in if len > 0
583 |       then [(substring offset 1 str) offset len]
584 |       else {
585 |         context = "parsec.peek";
586 |         msg = "expected a character";
587 |       };
588 | 
589 |   # Examine the rest of the input without consuming it. Cannot fail.
590 |   #
591 |   # NOTE: this has to copy the rest of the input into a substring, so use with
592 |   # caution.
593 |   #
594 |   #   :: Parser String
595 |   peekRest = ps:
596 |     let
597 |       str = elemAt ps 0;
598 |       offset = elemAt ps 1;
599 |       len = elemAt ps 2;
600 |     in [(substring offset len str) offset len];
601 | 
602 |   # Consume and return the rest of the input. Cannot fail.
603 |   #
604 |   # NOTE: this has to copy the rest of the input into a substring, so use with
605 |   # caution.
606 |   #
607 |   #   :: Parser String
608 |   consumeRest = ps:
609 |     let
610 |       str = elemAt ps 0;
611 |       offset = elemAt ps 1;
612 |       len = elemAt ps 2;
613 |     in [(substring offset len str) (offset + len) 0];
614 | 
615 |   # Consume and ignore the rest of the input. Cannot fail.
616 |   #   :: Parser null
617 |   dropRest = ps:
618 |     let
619 |       offset = elemAt ps 1;
620 |       len = elemAt ps 2;
621 |     in [null (offset + len) 0];
622 | 
623 |   # }}}
624 | 
625 |   # regex {{{
626 | 
627 |   # Given a regex that matches a string, consume characters matching that regex,
628 |   # or fail if the next characters in the input do not match. Return the matched
629 |   # text, followed by any capture groups from the match.
630 |   #
631 |   # NOTE: This has to copy the rest of the string, so if you know the maximum
632 |   # number of characters you may need, use "matchingN".
633 |   #
634 |   #   :: String -> Parser (NonEmpty String)
635 |   matching = regex: annotateContext "parsec.matching" (ps:
636 |     let len = elemAt ps 2;
637 |     in matchingN len regex ps);
638 | 
639 |   # Given a regex that matches a string, consume at most 'n' characters from the
640 |   # input matching the regular expression. Return the matched text, followed by
641 |   # any capture groups from the match.
642 |   #   :: Int -> String -> Parser (NonEmpty String)
643 |   matchingN = n: assert n >= 0; regex: withOffsetInfo (ps:
644 |     let
645 |       str = elemAt ps 0;
646 |       offset = elemAt ps 1;
647 |       len = elemAt ps 2;
648 |       result = match ("(" + regex + ").*") (substring offset n str);
649 |     in if result == null
650 |       then {
651 |         context = "parsec.matchingN";
652 |         error = "expected text matching '${regex}'";
653 |       }
654 |       else let
655 |         matchText = elemAt result 0;
656 |         matchLen = stringLength matchText;
657 |       in [result (offset + matchLen) (len - matchLen)]);
658 | 
659 |   # }}}
660 | }
661 | 
662 | # vim: foldmethod=marker:
663 | 


--------------------------------------------------------------------------------