├── LICENSE ├── README.md ├── default.nix ├── examples ├── arithmetic │ └── default.nix ├── kernel-config │ ├── default.nix │ └── surface-4.19.config ├── parens │ └── default.nix └── uuids │ └── default.nix ├── flake.nix ├── lexer.nix └── parsec.nix /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Nicole Prindle 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # nix-parsec 2 | 3 | Parser combinators in Nix for no-nixpkgs parsing. 4 | 5 | ### Background 6 | 7 | Nix isn't meant to be a general purpose programming language, so using parser 8 | combinators like this should generally be avoided. Here are some reasons to 9 | prefer a pure-Nix parser over other approaches: 10 | 11 | - Regular expressions won't work for your use case 12 | - You _usually_ don't need to parse things in Nix, and when you do, you usually don't need anything more powerful than regex 13 | - Nix's regex builtins are somewhat inconvenient to use. If you're looking for a more convenient regex interface in Nix, consider [the regex module](https://github.com/chessai/nix-std/blob/master/regex.nix) in [nix-std](https://github.com/chessai/nix-std) 14 | - Parsing performance will not be a bottleneck for the build 15 | - Nix evaluation can be slow! If your files are large, parsing may take a while 16 | - If parsing evaluation is a bottleneck, consider implementing your parser in your language of choice and using Nix to invoke it 17 | - It's difficult to pass results of parsing in another language back to Nix 18 | - You need to avoid nixpkgs or other dependencies 19 | 20 | ### Usage 21 | 22 | Include by fetching via usual means (`fetchTarball`, `fetchFromGitHub`, etc.): 23 | 24 | ```nix 25 | let 26 | version = "v0.1.0"; 27 | sha256 = "..."; 28 | 29 | nix-parsec = import (builtins.fetchTarball { 30 | url = "https://github.com/nprindle/nix-parsec/archive/${version}.tar.gz"; 31 | inherit sha256; 32 | }); 33 | 34 | inherit (nix-parsec) parsec lexer; 35 | in ... 36 | ``` 37 | 38 | If you are using Nix Flakes, you can add `nix-parsec` as an input: 39 | ```nix 40 | { 41 | inputs = { 42 | nix-parsec.url = "github:nprindle/nix-parsec"; 43 | }; 44 | 45 | outputs = { self, nix-parsec, ... }: { 46 | # ... 47 | }; 48 | } 49 | ``` 50 | 51 | At the top level, two attribute sets are exported: 52 | 53 | - `parsec`: Parser combinators and functions to run parsers 54 | - `lexer`: Combinators for parsing token-related things 55 | 56 | The parsing/lexing APIs roughly correspond to those of Haskell's `megaparsec` 57 | library. See `examples/` for some example parsers. 58 | 59 | -------------------------------------------------------------------------------- /default.nix: -------------------------------------------------------------------------------- 1 | let 2 | parsec = import ./parsec.nix; 3 | lexer = import ./lexer.nix { inherit parsec; }; 4 | in { 5 | inherit parsec lexer; 6 | } 7 | -------------------------------------------------------------------------------- /examples/arithmetic/default.nix: -------------------------------------------------------------------------------- 1 | # Example: parse a simple arithmetic expression using "+" or "*" into its result 2 | # 3 | # This could backtrack a lot, though obviously it can be written to be much more 4 | # efficient. 5 | # 6 | # Load in nix repl and test, e.g.: 7 | # 8 | # nix-repl> parseExpr "1 + 2 * -3" 9 | # { type = "success"; value = -5; } 10 | 11 | let 12 | nix-parsec = import ../../default.nix; 13 | inherit (nix-parsec) lexer; 14 | in 15 | 16 | with nix-parsec.parsec; 17 | 18 | let 19 | spaces = skipWhile (c: c == " "); 20 | lexeme = lexer.lexeme spaces; 21 | symbol = lexer.symbol spaces; 22 | 23 | int = lexeme (lexer.signed spaces lexer.decimal); 24 | 25 | # Grammar: 26 | # expr ::= term + expr | term - expr | term 27 | # term ::= factor * term | factor / term | factor 28 | # factor ::= (expr) | int 29 | 30 | expr = alt (bind term (n: skipThen (symbol "+") (fmap (m: n + m) expr))) term; 31 | term = alt (bind factor (n: skipThen (symbol "*") (fmap (m: n * m) term))) factor; 32 | factor = alt (between (symbol "(") (symbol ")") expr) int; 33 | in { 34 | parseExpr = runParser (thenSkip expr eof); 35 | } 36 | -------------------------------------------------------------------------------- /examples/kernel-config/default.nix: -------------------------------------------------------------------------------- 1 | # Example: parse a linux kernel config file, like 'surface-4.19.config' in this 2 | # directory 3 | # 4 | # Load in nix repl and test, e.g.: 5 | # 6 | # nix-repl> parseConfigFile ./surface-4.19.config 7 | # { type = "success"; value = ...; } 8 | 9 | { pkgs ? import {} 10 | }: 11 | 12 | let 13 | nix-parsec = import ../../default.nix; 14 | inherit (nix-parsec) lexer; 15 | inherit (pkgs) lib; 16 | in 17 | 18 | with nix-parsec.parsec; 19 | 20 | let 21 | # Consume zero or more spaces, not including newlines 22 | spaces = skipWhile (c: c == " " || c == "\t"); 23 | 24 | # Skip spaces and line comments and newlines 25 | spaceComments = lexer.space 26 | (skipWhile1 (c: c == " " || c == "\t" || c == "\n")) 27 | (lexer.skipLineComment "#") 28 | fail; 29 | 30 | lexeme = lexer.lexeme spaces; 31 | symbol = lexer.symbol spaces; 32 | 33 | identifier = 34 | let isIdChar = c: builtins.match "[a-zA-Z0-9_]" c != null; 35 | in lexeme (takeWhile1 isIdChar); 36 | 37 | kernelOption = identifier; 38 | kernelValue = lexeme (choice [ 39 | (fmap (_: lib.kernel.yes) (symbol "y")) 40 | (fmap (_: lib.kernel.no) (symbol "n")) 41 | (fmap (_: lib.kernel.module) (symbol "m")) 42 | (fmap lib.kernel.freeform identifier) 43 | ]); 44 | 45 | line = 46 | bind kernelOption (key: 47 | skipThen 48 | (symbol "=") 49 | (thenSkip 50 | (fmap (lib.nameValuePair key) kernelValue) 51 | spaceComments)); 52 | 53 | configFile = 54 | fmap lib.listToAttrs 55 | (skipThen spaceComments (thenSkip (many line) eof)); 56 | 57 | in { 58 | parseConfigFile = path: runParser configFile (builtins.readFile path); 59 | } 60 | -------------------------------------------------------------------------------- /examples/kernel-config/surface-4.19.config: -------------------------------------------------------------------------------- 1 | # The problem of parsing kernel config files was proposed by hpfr 2 | # 3 | # This config file is taken from here: 4 | # https://github.com/linux-surface/linux-surface/blob/master/configs/surface-4.19.config 5 | 6 | # 7 | # Intel IPTS Touchscreen 8 | # 9 | CONFIG_INTEL_IPTS=m 10 | CONFIG_INTEL_IPTS_SURFACE=m 11 | 12 | # 13 | # Surface Aggregator Module 14 | # 15 | CONFIG_GPIO_SYSFS=y # required for SURFACE_SAM_HPS 16 | CONFIG_SURFACE_SAM=m 17 | CONFIG_SURFACE_SAM_SSH=m 18 | CONFIG_SURFACE_SAM_SSH_DEBUG_DEVICE=y 19 | CONFIG_SURFACE_SAM_SAN=m 20 | CONFIG_SURFACE_SAM_VHF=m 21 | CONFIG_SURFACE_SAM_DTX=m 22 | CONFIG_SURFACE_SAM_HPS=m 23 | CONFIG_SURFACE_SAM_SID=m 24 | CONFIG_SURFACE_SAM_SID_GPELID=m 25 | CONFIG_SURFACE_SAM_SID_PERFMODE=m 26 | CONFIG_SURFACE_SAM_SID_VHF=m 27 | CONFIG_SURFACE_SAM_SID_POWER=m 28 | 29 | # 30 | # Other Drivers 31 | # 32 | CONFIG_INPUT_SOC_BUTTON_ARRAY=m 33 | CONFIG_SURFACE_3_POWER_OPREGION=m 34 | CONFIG_SURFACE_3_BUTTON=m 35 | CONFIG_SURFACE_3_POWER_OPREGION=m 36 | CONFIG_SURFACE_PRO3_BUTTON=m 37 | -------------------------------------------------------------------------------- /examples/parens/default.nix: -------------------------------------------------------------------------------- 1 | # Example: find the deepest depth of nested parentheses in a string of balanced 2 | # parentheses, or return an error if the parentheses are not balanced. 3 | # 4 | # Load in nix repl and test, e.g.: 5 | # 6 | # nix-repl> parseParens "((())())" 7 | # { type = "success"; value = 3; } 8 | 9 | let 10 | nix-parsec = import ../../default.nix; 11 | in 12 | 13 | with nix-parsec.parsec; 14 | 15 | let 16 | max = x: y: if x > y then x else y; 17 | maximum = builtins.foldl' max 0; 18 | 19 | parens = 20 | let expr = fmap (x: x + 1) (between (string "(") (string ")") parens); 21 | in fmap maximum (many expr); 22 | in { 23 | parseParens = runParser (thenSkip parens eof); 24 | } 25 | -------------------------------------------------------------------------------- /examples/uuids/default.nix: -------------------------------------------------------------------------------- 1 | # Example: parse a UUID using nix-parsec and using regular expressions 2 | # 3 | # Load in nix repl and test, e.g.: 4 | # 5 | # nix-repl> :p parseUuid "123e4567-e89b-12d3-a456-426614174000" 6 | # { type = "success"; value = [ "123e4567" "e89b" "12d3" "a456" "426614174000" ]; } 7 | 8 | { pkgs ? import {} 9 | }: 10 | 11 | let 12 | inherit (pkgs) lib; 13 | nix-parsec = import ../../default.nix; 14 | in 15 | 16 | with nix-parsec.parsec; 17 | 18 | let 19 | # Parses a UUID in a format like 123e4567-e89b-12d3-a456-426614174000 20 | uuid = 21 | let 22 | hex = satisfy (x: builtins.match "[0-9a-f]" x != null); 23 | nHex = n: fmap lib.concatStrings (count n hex); 24 | hyphen = string "-"; 25 | group = n: next: skipThen hyphen (bind (nHex n) next); 26 | in 27 | bind (nHex 8) 28 | (g1: group 4 29 | (g2: group 4 30 | (g3: group 4 31 | (g4: group 12 32 | (g5: pure [g1 g2 g3 g4 g5]))))); 33 | in rec { 34 | # Parse using nix-parsec 35 | parseUuid = runParser uuid; 36 | 37 | # Parse using regular expressions 38 | parseUuid' = builtins.match "([0-9a-f]{8})-([0-9a-f]{4})-([0-9a-f]{4})-([0-9a-f]{4})-([0-9a-f]{12})"; 39 | } 40 | -------------------------------------------------------------------------------- /flake.nix: -------------------------------------------------------------------------------- 1 | { 2 | description = "nix-parsec"; 3 | 4 | outputs = { self }: 5 | { 6 | lib = { 7 | lexer = import ./lexer.nix { parsec = self.lib.parsec; }; 8 | parsec = import ./parsec.nix; 9 | }; 10 | }; 11 | } 12 | -------------------------------------------------------------------------------- /lexer.nix: -------------------------------------------------------------------------------- 1 | { parsec 2 | }: 3 | 4 | with parsec; 5 | 6 | let 7 | escapeToChar = c: 8 | if c == "n" then "\n" 9 | else if c == "r" then "\r" 10 | else if c == "t" then "\t" 11 | else c; 12 | 13 | hexDigits = { 14 | "0" = 0; "1" = 1; "2" = 2; "3" = 3; "4" = 4; "5" = 5; "6" = 6; "7" = 7; 15 | "8" = 8; "9" = 9; 16 | "A" = 10; "B" = 11; "C" = 12; "D" = 13; "E" = 14; "F" = 15; 17 | "a" = 10; "b" = 11; "c" = 12; "d" = 13; "e" = 14; "f" = 15; 18 | }; 19 | 20 | in rec { 21 | # Build a space-consuming parser out of: 22 | # - a parser that consumes spaces 23 | # - a parser that consumes line comments 24 | # - a parser that consumes block comments 25 | # None of these should be able to accept empty input 26 | # :: Parser null -> Parser null -> Parser null -> Parser null 27 | space = sp: lc: bc: skipMany (alt sp (alt lc bc)); 28 | 29 | # Given the delimiter marking the start of a line comment, build a parser that 30 | # consume a line comment 31 | # :: String -> Parser null 32 | skipLineComment = start: 33 | let 34 | prefix = string start; 35 | in skipThen prefix (skipWhile (x: x != "\n")); 36 | 37 | # Given start and end delimiters of a block comment, build a parser that 38 | # consumes a block comment 39 | # :: String -> String -> Parser null 40 | skipBlockComment = start: end: 41 | let 42 | prefix = string start; 43 | suffix = string end; 44 | in skipThen prefix (skipTill anyChar suffix); 45 | 46 | # Given start and end delimiters of a block comment, build a parser that 47 | # consumes a possibly nested block comment 48 | # :: String -> String -> Parser null 49 | skipBlockCommentNested = start: end: 50 | let 51 | prefix = string start; 52 | suffix = string end; 53 | go = skipThen prefix (skipTill (alt go anyChar) suffix); 54 | in go; 55 | 56 | # Use a space-consuming parser to turn a parser into a lexeme parser 57 | # :: Parser null -> Parser a -> Parser a 58 | lexeme = sc: parser: thenSkip parser sc; 59 | 60 | # Use a space-consuming parser to turn a parser into a symbol parser 61 | # :: Parser null -> String -> Parser String 62 | symbol = sc: sym: lexeme sc (string sym); 63 | 64 | # Parses a decimal integer. If you want to handle leading signs, wrap it using 65 | # 'signed'. 66 | # 67 | # Unlike 'decimal', it is often faster, but will 68 | # only ever consume at most 19 characters after leading zeros. Only use this 69 | # if you're sure the number you're parsing can fit in a signed 64-bit integer. 70 | unsafeDecimal = 71 | let 72 | toInt = builtins.fromJSON; # Hacky, but efficient 73 | int = fmap (x: toInt (builtins.elemAt x 0)) (matchingN 19 "[[:digit:]]+"); 74 | leadingZeros = skipWhile (c: c == "0"); 75 | in alt 76 | # Nonzero number with leading zeros 77 | (skipThen leadingZeros int) 78 | # Only zeros 79 | (fmap (_: 0) (skipWhile1 (c: c == "0"))); 80 | 81 | # Parses a decimal integer. If you want to handle leading signs, wrap it using 82 | # 'signed'. 83 | # 84 | # NOTE: since Nix can only handle 64-bit signed integers, the behavior on 85 | # larger numbers is not guaranteed. 86 | decimal = 87 | let isDigit = c: builtins.match "[[:digit:]]" c != null; 88 | in fmap builtins.fromJSON (takeWhile1 isDigit); 89 | 90 | # Parses a binary integer, as a nonempty string of "0"s and "1"s. Does not 91 | # assume a prefix. 92 | binary = 93 | let 94 | isBinDigit = c: c == "0" || c == "1"; 95 | binToInt = str: 96 | let 97 | len = builtins.stringLength str; 98 | nthDigit = n: if builtins.substring n 1 str == "0" then 0 else 1; 99 | go = acc: i: 100 | if i >= len 101 | then acc 102 | else go (2 * acc + nthDigit i) (i + 1); 103 | in go 0 0; 104 | in fmap binToInt (takeWhile1 isBinDigit); 105 | 106 | # Parses a hexadecimal integer, as a nonempty string of digits "0" through "9" 107 | # or letters "A" through "F" (either uppercase or lowercase). Does not assume 108 | # a prefix. 109 | hexadecimal = 110 | let 111 | isHexDigit = c: builtins.match "[[:xdigit:]]" c != null; 112 | hexToInt = str: 113 | let 114 | len = builtins.stringLength str; 115 | nthDigit = n: hexDigits.${builtins.substring n 1 str}; 116 | go = acc: i: 117 | if i >= len 118 | then acc 119 | else go (16 * acc + nthDigit i) (i + 1); 120 | in go 0 0; 121 | in fmap hexToInt (takeWhile1 isHexDigit); 122 | 123 | # Given a way to consume the space after the sign, and given a parser that 124 | # parses a number, return a parser that can also handle a leading +/- sign. 125 | # :: Num a => Parser () -> Parser a -> Parser a 126 | signed = sp: parser: 127 | let 128 | plus = fmap (_: 1) (string "+"); 129 | minus = fmap (_: -1) (string "-"); 130 | sign = option 1 (alt minus plus); 131 | in bind sign (res: fmap (n: res * n) parser); 132 | 133 | # Parses a Nix character literal, without quotes. Handles character escaping. 134 | # 135 | # NOTE: Only supports \n, \r, and \t. All other characters after a backslash 136 | # will be returned as-is; e.g., "\a" becomes "a". 137 | charLit = 138 | bind anyChar 139 | (c: if c == "\\" 140 | then fmap escapeToChar anyChar 141 | else pure c); 142 | 143 | # Parses a basic double-quoted string literal, handling escaped inner quotes. 144 | stringLit = fmap (builtins.concatStringsSep "") 145 | (skipThen (string "\"") (manyTill charLit (string "\""))); 146 | } 147 | -------------------------------------------------------------------------------- /parsec.nix: -------------------------------------------------------------------------------- 1 | # A parser is a value with the following type: 2 | # type Parser a = (String, Int, Int) -> Either e (a, Int, Int) 3 | # 4 | # - The parameters are the source, the offset, and the length 5 | # - The result is the value produced, the new offset, and the new length 6 | # - If a failure occurs, the result will be an attribute set containing 7 | # information about the error 8 | # 9 | # Note that in the types, 'Maybe a' denotes a value that is either null or a 10 | # singleton list containing a value of type 'a'. 'NonEmpty a' denotes a list 11 | # containing one or more values of type 'a'. 'null' denotes the singleton type 12 | # containing only the value 'null'. 13 | 14 | with builtins; 15 | 16 | with rec { 17 | # Redefinitions to avoid depending on lib 18 | 19 | foldr = op: nul: list: 20 | let 21 | len = length list; 22 | fold' = n: 23 | if n == len 24 | then nul 25 | else op (elemAt list n) (fold' (n + 1)); 26 | in fold' 0; 27 | 28 | # TODO: optimize result collection 29 | reverseList = xs: 30 | let l = length xs; in genList (n: elemAt xs (l - n - 1)) l; 31 | }; 32 | 33 | rec { 34 | # running {{{ 35 | 36 | # Run a parser, returning the result in an attrset either containing a "type" 37 | # key and a "value" key. If "type" is "error", "value" will contain the 38 | # reported error. If "type" is "success", "value" will contain the parsed 39 | # value. 40 | # 41 | # If the parser did not consume all of its input, this will still succeed. If 42 | # you want to make sure all input has been consume, use 'eof'. 43 | # 44 | # :: Parser a -> String -> Either e a 45 | runParser = parser: str: 46 | let res = parser [str 0 (stringLength str)]; 47 | in if failed res 48 | then { type = "error"; value = res; } 49 | else { type = "success"; value = elemAt res 0; }; 50 | 51 | # }}} 52 | 53 | # queries {{{ 54 | 55 | # Did the raw result of a parser fail? 56 | # 57 | # :: Either e (a, Int, Int) -> Bool 58 | failed = ps: !builtins.isList ps; 59 | 60 | # Query the current state of the parser 61 | # :: Parser (String, Int, Int) 62 | state = ps: 63 | let 64 | offset = elemAt ps 1; 65 | len = elemAt ps 2; 66 | in [ps offset len]; 67 | 68 | # Augment a parser to also return the number of characters it consumed 69 | # :: Parser a -> Parser (Int, a) 70 | measure = parser: ps: 71 | let 72 | initialOffset = elemAt ps 1; 73 | res = parser ps; 74 | in if failed res 75 | then res 76 | else let 77 | value = elemAt res 0; 78 | newOffset = elemAt res 1; 79 | newLength = elemAt res 2; 80 | in [[(newOffset - initialOffset) value] newOffset newLength]; 81 | 82 | # Augment a parser to also return the characters it consumed 83 | # :: Parser a -> Parser (String, a) 84 | withMatch = parser: ps: 85 | let 86 | str = elemAt ps 0; 87 | oldOffset = elemAt ps 1; 88 | res = parser ps; 89 | in if failed res 90 | then res 91 | else let 92 | value = elemAt res 0; 93 | newOffset = elemAt res 1; 94 | newLen = elemAt res 2; 95 | in [[(substring oldOffset (newOffset - oldOffset) str) value] newOffset newLen]; 96 | 97 | # }}} 98 | 99 | # composition {{{ 100 | 101 | # Map a function over the result of a parser 102 | # :: (a -> b) -> Parser a -> Parser b 103 | fmap = f: parser: ps: 104 | let 105 | res = parser ps; 106 | val = elemAt res 0; 107 | offset = elemAt res 1; 108 | len = elemAt res 2; 109 | in if failed res 110 | then res 111 | else [(f val) offset len]; 112 | 113 | # Lift a value into a parser 114 | # :: a -> Parser a 115 | pure = x: ps: [x (elemAt ps 1) (elemAt ps 2)]; 116 | 117 | # Applicative functor application 118 | # :: Parser (a -> b) -> Parser a -> Parser b 119 | ap = p1: p2: bind p1 (f: fmap f p2); 120 | 121 | # Lift a two-argument function over two parsers in sequence. 122 | # :: Parser (a -> b) -> Parser a -> Parser b 123 | lift2 = f: p1: p2: ap (fmap f p1) p2; 124 | 125 | # Monadic bind; sequence two parsers together 126 | # :: Parser a -> (a -> Parser b) -> Parser b 127 | bind = parser: f: ps: 128 | let 129 | str = elemAt ps 0; 130 | res1 = parser ps; 131 | in if failed res1 132 | then res1 133 | else let 134 | val = elemAt res1 0; 135 | offset = elemAt res1 1; 136 | len = elemAt res1 2; 137 | in (f val) [str offset len]; 138 | 139 | # Sequence two parsers, ignoring the result of the first one, like '*>' in 140 | # Haskell 141 | # :: Parser a -> Parser b -> Parser b 142 | skipThen = parser1: parser2: bind parser1 (_: parser2); 143 | 144 | # Sequence two parsers, ignoring the result of the second one, like '<*' in 145 | # Haskell 146 | # :: Parser a -> Parser b -> Parser a 147 | thenSkip = parser1: parser2: bind parser1 (x: fmap (_: x) parser2); 148 | 149 | # Run a list of parsers in sequence, collecting their results 150 | # :: [Parser a] -> Parser [a] 151 | sequence = xs: 152 | let 153 | len = length xs; 154 | go = n: 155 | if n >= len 156 | then pure [] 157 | else bind (elemAt xs n) (first: fmap (rest: [first] ++ rest) (go (n + 1))); 158 | in go 0; 159 | 160 | # Ignore the results of a parser 161 | # :: Parser a -> Parser null 162 | void = fmap (_: null); 163 | 164 | # Like 'sequence', but ignore the outputs of the parsers 165 | # :: [Parser a] -> Parser null 166 | sequence_ = xs: 167 | let 168 | len = length xs; 169 | go = n: 170 | if n >= len 171 | then pure null 172 | else skipThen (elemAt xs n) (go (n + 1)); 173 | in go 0; 174 | 175 | # }}} 176 | 177 | # options and failure {{{ 178 | 179 | # Parser that always fails (the identity under 'alt') 180 | fail = failWith { context = "parsec.fail"; }; 181 | 182 | # Parser that always fails with the given error 183 | # :: e -> Parser a 184 | failWith = e: _: e; 185 | 186 | # Apply a function to modify an error message for a parser 187 | annotate = f: parser: ps: 188 | let res = parser ps; 189 | in if failed res 190 | then f res 191 | else res; 192 | 193 | # Modify a parser error message by adding the given attributes 194 | # 195 | # NOTE: this overrides any of the old attributes, so make sure that any 196 | # possible error information is irrelevant first 197 | annotateWith = e: annotate (x: x // e); 198 | 199 | # Add a new context annotation to an error, keeping the old error entirely 200 | annotateContext = s: annotate (e: { context = s; error = e; }); 201 | 202 | # Add information about the current offset to a parser 203 | # 204 | # NOTE: this overrides any old offset info, which could make it confusing 205 | # where the error actually happened. 206 | withOffsetInfo = parser: 207 | bind state (info: annotateWith { str = elemAt info 0; offset = elemAt info 1; } parser); 208 | 209 | # Override an error message for a parser 210 | label = e: annotate (_: e); 211 | 212 | # Run two parsers; if the first one fails, run the second one 213 | # :: Parser a -> Parser a -> Parser a 214 | alt = parser1: parser2: withOffsetInfo (ps: 215 | let 216 | str = elemAt ps 0; 217 | res1 = parser1 ps; 218 | res2 = parser2 ps; 219 | in if failed res1 220 | then if failed res2 221 | then { 222 | context = "parsec.alt"; 223 | msg = "expected one of these to succeed"; 224 | error = [res1 res2]; 225 | } 226 | else res2 227 | else res1); 228 | 229 | # Try to apply a parser, or return a default value if it fails without 230 | # consuming input. Cannot fail. 231 | # :: a -> Parser a -> Parser a 232 | option = def: parser: alt parser (pure def); 233 | 234 | # Try to apply a parser. If it succeeds, return its result in a singleton 235 | # list, and if it fails without consuming input, return an empty list. Cannot 236 | # fail. 237 | # :: Parser a -> Parser [a] 238 | optional = parser: alt (fmap (x: [x]) parser) (pure []); 239 | 240 | # Run a list of parsers, using the first one that succeeds 241 | # :: [Parser a] -> Parser a 242 | choice = parsers: withOffsetInfo (ps: 243 | let 244 | results = map (p: p ps) parsers; 245 | firstSuccess = foldr (x: rest: if failed x then rest else x) null results; 246 | in if firstSuccess == null 247 | then { 248 | context = "parsec.choice"; 249 | msg = "expected one of these to be satisfied"; 250 | error = results; 251 | } 252 | else firstSuccess); 253 | 254 | # }}} 255 | 256 | # consumption primitives {{{ 257 | 258 | # Consumes a character if it satisfies a predicate 259 | # :: (Char -> Bool) -> Parser Char 260 | satisfy = pred: withOffsetInfo (ps: 261 | let 262 | str = elemAt ps 0; 263 | offset = elemAt ps 1; 264 | len = elemAt ps 2; 265 | c = substring offset 1 str; # the next character 266 | in if len > 0 && pred c 267 | then [c (offset + 1) (len - 1)] 268 | else { context = "parsec.satisfy"; }); 269 | 270 | # Consumes a character if it satisfies a predicate, applying a function to the 271 | # result. 272 | # :: (Char -> a) -> (Char -> Bool) -> Parser a 273 | satisfyWith = f: pred: withOffsetInfo (ps: 274 | let 275 | str = elemAt ps 0; 276 | offset = elemAt ps 1; 277 | len = elemAt ps 2; 278 | c = substring offset 1 str; # the next character 279 | in if len > 0 && pred c 280 | then [(f c) (offset + 1) (len - 1)] 281 | else { context = "parsec.satisfyWith"; }); 282 | 283 | # Consume any character 284 | # :: Parser Char 285 | anyChar = withOffsetInfo (annotateWith { 286 | context = "parsec.anyChar"; 287 | } (satisfy (_: true))); 288 | 289 | # Consume any character except a given character 290 | # :: Char -> Parser Char 291 | anyCharBut = c: withOffsetInfo (annotateWith { 292 | context = "parsec.anyCharBut"; 293 | error = "expected any char except ${c}"; 294 | } (satisfy (x: x != c))); 295 | 296 | # Given a string, try to consume it from the input and return it if it 297 | # succeeds. If it fails, DON'T consume any input. 298 | # :: String -> Parser String 299 | string = pr: withOffsetInfo (ps: 300 | let 301 | prefixLen = stringLength pr; 302 | str = elemAt ps 0; 303 | offset = elemAt ps 1; 304 | len = elemAt ps 2; 305 | in if len >= prefixLen && substring offset prefixLen str == pr 306 | then [pr (offset + prefixLen) (len - prefixLen)] 307 | else { 308 | context = "parsec.string"; 309 | msg = "expected string '${pr}'"; 310 | }); 311 | 312 | # 'notFollowedBy p' only succeeds when 'p' fails, and never consumes any input 313 | # :: Parser a -> Parser null 314 | notFollowedBy = parser: withOffsetInfo (ps: 315 | let 316 | offset = elemAt ps 1; 317 | len = elemAt ps 2; 318 | in if failed (parser ps) 319 | then [null offset len] 320 | else { context = "parsec.notFollowedBy"; }); 321 | 322 | # Fails if there is still more input remaining, returns null otherwise 323 | # :: Parser null 324 | eof = withOffsetInfo (ps: 325 | let 326 | offset = elemAt ps 1; 327 | len = elemAt ps 2; 328 | in if len == 0 329 | then [null offset len] 330 | else { 331 | context = "parsec.eof"; 332 | msg = "expected end of input"; 333 | }); 334 | 335 | # Return whether or not we're at the end of the input. Cannot fail. 336 | # :: Parser Bool 337 | atEnd = ps: 338 | let 339 | offset = elemAt ps 1; 340 | len = elemAt ps 2; 341 | in [(len == 0) offset len]; 342 | 343 | # }}} 344 | 345 | # takes {{{ 346 | 347 | # Repeat a parser 'n' times, returning the results from each parse 348 | # :: Int -> Parser a -> Parser [a] 349 | count = n: assert n >= 0; parser: 350 | let p' = 351 | let go = m: if m == 0 352 | then pure [] 353 | else bind parser (first: fmap (rest: [first] ++ rest) (go (m - 1))); 354 | in go n; 355 | in annotate (e: { 356 | context = "parsec.count"; 357 | msg = "expected ${toString n} occurrances"; 358 | error = e; 359 | }) p'; 360 | 361 | # Consume 'n' characters, or fail if there's not enough characters left. 362 | # Return the characters consumed. 363 | # :: Int -> Parser String 364 | take = n: assert n >= 0; withOffsetInfo (ps: 365 | let 366 | str = elemAt ps 0; 367 | offset = elemAt ps 1; 368 | len = elemAt ps 2; 369 | in if n <= len 370 | then [(substring offset n str) (offset + n) (len - n)] 371 | else { 372 | context = "parsec.take"; 373 | error = "expected ${toString n} characters, but only got ${toString len}"; 374 | }); 375 | 376 | # Consume zero or more characters while the predicate holds, returning the 377 | # consumed characters. Cannot fail. 378 | # :: (Char -> Bool) -> Parser String 379 | takeWhile = pred: ps: 380 | let 381 | str = elemAt ps 0; 382 | offset = elemAt ps 1; 383 | len = elemAt ps 2; 384 | strLen = stringLength str; 385 | # Search for the next offset that violates the predicate 386 | go = ix: 387 | if ix >= strLen || !pred (substring ix 1 str) 388 | then ix 389 | else go (ix + 1); 390 | endIx = go offset; 391 | # The number of characters we found 392 | numChars = endIx - offset; 393 | in [(substring offset numChars str) endIx (len - numChars)]; 394 | 395 | # Consume one or more characters while the predicate holds, returning the 396 | # consumed characters. 397 | # :: (Char -> Bool) -> Parser String 398 | takeWhile1 = pred: 399 | let p' = bind (satisfy pred) (first: fmap (rest: first + rest) (takeWhile pred)); 400 | in annotateWith { 401 | context = "parsec.takeWhile1"; 402 | msg = "expected at least one character matching the predicate"; 403 | } p'; 404 | 405 | # Apply a parser zero or more times until it fails, returning a list of the 406 | # results. Cannot fail. 407 | # :: Parser a -> Parser [a] 408 | many = parser: 409 | let go = alt (bind parser (first: fmap (rest: [first] ++ rest) go)) (pure []); 410 | in go; 411 | 412 | # Apply a parser one or more times until it fails, returning a list of the 413 | # results 414 | # :: Parser a -> Parser (NonEmpty a) 415 | many1 = parser: 416 | let p' = bind parser (first: fmap (rest: [first] ++ rest) (many parser)); 417 | in annotate (e: { 418 | context = "parsec.many1"; 419 | msg = "expected one or more occurrences"; 420 | error = e; 421 | }) p'; 422 | 423 | # Repeat a parser zero or more times until the end parser succeeds. Returns a 424 | # list of the results of the first parser. 425 | # :: Parser a -> Parser b -> Parser [a] 426 | manyTill = parser: end: 427 | let p' = 428 | let go = alt (fmap (_: []) end) (bind parser (first: fmap (rest: [first] ++ rest) go)); 429 | in go; 430 | in annotateContext "parsec.manyTill" p'; 431 | 432 | # Repeat a parser one or more times until the end parser succeeds. Returns a 433 | # list of the results of the first parser. 434 | # :: Parser a -> Parser b -> Parser (NonEmpty a) 435 | manyTill1 = parser: end: 436 | let p' = bind parser (first: fmap (rest: [first] ++ rest) (manyTill parser end)); 437 | in annotateContext "parsec.manyTill1" p'; 438 | 439 | # }}} 440 | 441 | # separators {{{ 442 | 443 | # Sequence three parsers, 'before', 'after', and 'middle', running them in the 444 | # obvious order and keeping the middle result. 445 | # Example: parens = between (string "(") (string ")") 446 | # 447 | # :: Parser a -> Parser b -> Parser c -> Parser c 448 | between = before: after: middle: skipThen before (thenSkip middle after); 449 | 450 | # Parse zero or more occurrences of the first parser, separated by the second 451 | # parser. Returns a list of results of the first parser. Cannot fail. 452 | # :: Parser a -> Parser b -> Parser [a] 453 | sepBy = parser: end: 454 | alt (sepBy1 parser end) (pure []); 455 | 456 | # Parse one or more occurrences of the first parser, separated by the second 457 | # parser. Returns a list of results of the first parser. 458 | # :: Parser a -> Parser b -> Parser (NonEmpty a) 459 | sepBy1 = parser: end: 460 | let p' = bind parser (first: fmap (rest: [first] ++ rest) (many (skipThen end parser))); 461 | in annotateContext "parsec.sepBy1" p'; 462 | 463 | # Parse zero or more occurrences of the first parser, separated and ended by 464 | # the second parser. Returns a list of results of the first parser. Cannot 465 | # fail. 466 | # :: Parser a -> Parser b -> Parser [a] 467 | endBy = parser: end: 468 | many (thenSkip parser end); 469 | 470 | # Parse one or more occurrences of the first parser, separated and ended by 471 | # the second parser. Returns a list of results of the first parser. 472 | # :: Parser a -> Parser b -> Parser (NonEmpty a) 473 | endBy1 = parser: end: 474 | let p' = many1 (thenSkip parser end); 475 | in annotateContext "parsec.endBy1" p'; 476 | 477 | # Parse zero or more occurrences of the first parser, separated and optionally 478 | # ended by the second parser. Returns a list of result of the first parser. 479 | # Cannot fail. 480 | # :: Parser a -> Parser b -> Parser [a] 481 | sepEndBy = parser: end: 482 | alt (sepEndBy1 parser end) (pure []); 483 | 484 | # Parse one or more occurrences of the first parser, separated and optionally 485 | # ended by the second parser. Returns a list of result of the first parser. 486 | # :: Parser a -> Parser b -> Parser (NonEmpty a) 487 | sepEndBy1 = parser: end: 488 | let p' = 489 | let 490 | go = alt 491 | (skipThen end (alt 492 | (bind parser (first: fmap (rest: [first] ++ rest) go)) 493 | (pure []))) 494 | (pure []); 495 | in bind parser (first: fmap (rest: [first] ++ rest) go); 496 | in annotateContext "parsec.sepEndBy1" p'; 497 | 498 | # }}} 499 | 500 | # skips {{{ 501 | 502 | # Consume 'n' characters, or fail if there's not enough characters left. 503 | # :: Int -> Parser null 504 | skip = n: assert n >= 0; ps: 505 | let 506 | str = elemAt ps 0; 507 | offset = elemAt ps 1; 508 | len = elemAt ps 2; 509 | in if n <= len 510 | then [null (offset + n) (len - n)] 511 | else { 512 | context = "parsec.skip"; 513 | error = "expected ${toString n} characters, but only got ${toString len}"; 514 | }; 515 | 516 | # Consume zero or more characters while the predicate holds. Cannot fail. 517 | # :: (Char -> Bool) -> Parser null 518 | skipWhile = pred: ps: 519 | let 520 | str = elemAt ps 0; 521 | offset = elemAt ps 1; 522 | len = elemAt ps 2; 523 | strLen = stringLength str; 524 | # Search for the next offset that violates the predicate 525 | go = ix: 526 | if ix >= strLen || !pred (substring ix 1 str) 527 | then ix 528 | else go (ix + 1); 529 | endIx = go offset; 530 | # The number of characters we found 531 | numChars = endIx - offset; 532 | in [null endIx (len - numChars)]; 533 | 534 | # Consume one or more characters while the predicate holds. 535 | # :: (Char -> Bool) -> Parser null 536 | skipWhile1 = pred: 537 | let p' = skipThen (satisfy pred) (skipWhile pred); 538 | in annotateContext "parsec.skipWhile1" p'; 539 | 540 | # Run a parser zero or more times until it fails, discarding all the input 541 | # that it accepts. Cannot fail. 542 | # :: Parser a -> Parser null 543 | skipMany = parser: 544 | let go = alt (skipThen parser go) (pure null); 545 | in go; 546 | 547 | # Run a parser one or more times until it fails, discarding all the input that 548 | # it accepts. 549 | # :: Parser a -> Parser null 550 | skipMany1 = parser: 551 | let p' = skipThen parser (skipMany parser); 552 | in annotateContext "parsec.skipMany1" p'; 553 | 554 | # Repeat a parser zero or more times until the end parser succeeds. Discards 555 | # consumed input. 556 | # :: Parser a -> Parser b -> Parser null 557 | skipTill = parser: end: 558 | let p' = 559 | let go = alt end (skipThen parser go); 560 | in void go; 561 | in annotateContext "parsec.skipTill" p'; 562 | 563 | # Repeat a parser one or more times until the end parser succeeds. Discards 564 | # consumed input. 565 | # :: Parser a -> Parser b -> Parser null 566 | skipTill1 = parser: end: 567 | let p' = skipThen parser (skipTill parser end); 568 | in annotateContext "parsec.skipTill1" p'; 569 | 570 | # }}} 571 | 572 | # peeks and drops {{{ 573 | 574 | # Examine the next character without consuming it. Fails if there's no input 575 | # left. 576 | # :: Parser Char 577 | peek = ps: 578 | let 579 | str = elemAt ps 0; 580 | offset = elemAt ps 1; 581 | len = elemAt ps 2; 582 | in if len > 0 583 | then [(substring offset 1 str) offset len] 584 | else { 585 | context = "parsec.peek"; 586 | msg = "expected a character"; 587 | }; 588 | 589 | # Examine the rest of the input without consuming it. Cannot fail. 590 | # 591 | # NOTE: this has to copy the rest of the input into a substring, so use with 592 | # caution. 593 | # 594 | # :: Parser String 595 | peekRest = ps: 596 | let 597 | str = elemAt ps 0; 598 | offset = elemAt ps 1; 599 | len = elemAt ps 2; 600 | in [(substring offset len str) offset len]; 601 | 602 | # Consume and return the rest of the input. Cannot fail. 603 | # 604 | # NOTE: this has to copy the rest of the input into a substring, so use with 605 | # caution. 606 | # 607 | # :: Parser String 608 | consumeRest = ps: 609 | let 610 | str = elemAt ps 0; 611 | offset = elemAt ps 1; 612 | len = elemAt ps 2; 613 | in [(substring offset len str) (offset + len) 0]; 614 | 615 | # Consume and ignore the rest of the input. Cannot fail. 616 | # :: Parser null 617 | dropRest = ps: 618 | let 619 | offset = elemAt ps 1; 620 | len = elemAt ps 2; 621 | in [null (offset + len) 0]; 622 | 623 | # }}} 624 | 625 | # regex {{{ 626 | 627 | # Given a regex that matches a string, consume characters matching that regex, 628 | # or fail if the next characters in the input do not match. Return the matched 629 | # text, followed by any capture groups from the match. 630 | # 631 | # NOTE: This has to copy the rest of the string, so if you know the maximum 632 | # number of characters you may need, use "matchingN". 633 | # 634 | # :: String -> Parser (NonEmpty String) 635 | matching = regex: annotateContext "parsec.matching" (ps: 636 | let len = elemAt ps 2; 637 | in matchingN len regex ps); 638 | 639 | # Given a regex that matches a string, consume at most 'n' characters from the 640 | # input matching the regular expression. Return the matched text, followed by 641 | # any capture groups from the match. 642 | # :: Int -> String -> Parser (NonEmpty String) 643 | matchingN = n: assert n >= 0; regex: withOffsetInfo (ps: 644 | let 645 | str = elemAt ps 0; 646 | offset = elemAt ps 1; 647 | len = elemAt ps 2; 648 | result = match ("(" + regex + ").*") (substring offset n str); 649 | in if result == null 650 | then { 651 | context = "parsec.matchingN"; 652 | error = "expected text matching '${regex}'"; 653 | } 654 | else let 655 | matchText = elemAt result 0; 656 | matchLen = stringLength matchText; 657 | in [result (offset + matchLen) (len - matchLen)]); 658 | 659 | # }}} 660 | } 661 | 662 | # vim: foldmethod=marker: 663 | --------------------------------------------------------------------------------