├── .gitignore ├── CHANGELOG.md ├── LICENSE.md ├── Makefile ├── README.md ├── Setup.hs ├── cabal.project ├── cbits ├── parsnip.c └── parsnip.h ├── parsnip.cabal └── src └── Text ├── Parsnip.hs └── Parsnip ├── Char8.hs ├── Internal ├── Mark.hs ├── Parser.hs ├── Private.hs └── Simple.hs ├── Location.hs ├── Parser.hs ├── Word8.hs └── Word8 └── Binary.hs /.gitignore: -------------------------------------------------------------------------------- 1 | dist-newstyle 2 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 0.1 [unreleased] 2 | ---------------- 3 | * Added 'Text.Parsnip.Word8.Binary' to support parsing files with embedded nulls. 4 | * Removed support for older versions of 'bytestring'. 5 | * Moved to a separate repository from `codex`. 6 | * Added an 'e' parameter to 'Parser' to allow propagation of user errors. 7 | 8 | 0 [2021.03.23] 9 | -------------- 10 | * Split off from `engine` 11 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # License 2 | 3 | Licensed under either of 4 | * Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) 5 | * BSD 2-Clause license (https://opensource.org/licenses/BSD-2-Clause) 6 | at your option. 7 | 8 | ## BSD 2-Clause License 9 | 10 | - Copyright 2019 Edward Kmett and Sean Chalmers 11 | 12 | All rights reserved. 13 | 14 | Redistribution and use in source and binary forms, with or without 15 | modification, are permitted provided that the following conditions 16 | are met: 17 | 18 | 1. Redistributions of source code must retain the above copyright 19 | notice, this list of conditions and the following disclaimer. 20 | 21 | 2. Redistributions in binary form must reproduce the above copyright 22 | notice, this list of conditions and the following disclaimer in the 23 | documentation and/or other materials provided with the distribution. 24 | 25 | THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 26 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 27 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 28 | DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR 29 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 33 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 34 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 | POSSIBILITY OF SUCH DAMAGE. 36 | 37 | ## Apache License 38 | 39 | _Version 2.0, January 2004_ 40 | _<>_ 41 | 42 | ### Terms and Conditions for use, reproduction, and distribution 43 | 44 | #### 1. Definitions 45 | 46 | “License” shall mean the terms and conditions for use, reproduction, and 47 | distribution as defined by Sections 1 through 9 of this document. 48 | 49 | “Licensor” shall mean the copyright owner or entity authorized by the copyright 50 | owner that is granting the License. 51 | 52 | “Legal Entity” shall mean the union of the acting entity and all other entities 53 | that control, are controlled by, or are under common control with that entity. 54 | For the purposes of this definition, “control” means **(i)** the power, direct or 55 | indirect, to cause the direction or management of such entity, whether by 56 | contract or otherwise, or **(ii)** ownership of fifty percent (50%) or more of the 57 | outstanding shares, or **(iii)** beneficial ownership of such entity. 58 | 59 | “You” (or “Your”) shall mean an individual or Legal Entity exercising 60 | permissions granted by this License. 61 | 62 | “Source” form shall mean the preferred form for making modifications, including 63 | but not limited to software source code, documentation source, and configuration 64 | files. 65 | 66 | “Object” form shall mean any form resulting from mechanical transformation or 67 | translation of a Source form, including but not limited to compiled object code, 68 | generated documentation, and conversions to other media types. 69 | 70 | “Work” shall mean the work of authorship, whether in Source or Object form, made 71 | available under the License, as indicated by a copyright notice that is included 72 | in or attached to the work (an example is provided in the Appendix below). 73 | 74 | “Derivative Works” shall mean any work, whether in Source or Object form, that 75 | is based on (or derived from) the Work and for which the editorial revisions, 76 | annotations, elaborations, or other modifications represent, as a whole, an 77 | original work of authorship. For the purposes of this License, Derivative Works 78 | shall not include works that remain separable from, or merely link (or bind by 79 | name) to the interfaces of, the Work and Derivative Works thereof. 80 | 81 | “Contribution” shall mean any work of authorship, including the original version 82 | of the Work and any modifications or additions to that Work or Derivative Works 83 | thereof, that is intentionally submitted to Licensor for inclusion in the Work 84 | by the copyright owner or by an individual or Legal Entity authorized to submit 85 | on behalf of the copyright owner. For the purposes of this definition, 86 | “submitted” means any form of electronic, verbal, or written communication sent 87 | to the Licensor or its representatives, including but not limited to 88 | communication on electronic mailing lists, source code control systems, and 89 | issue tracking systems that are managed by, or on behalf of, the Licensor for 90 | the purpose of discussing and improving the Work, but excluding communication 91 | that is conspicuously marked or otherwise designated in writing by the copyright 92 | owner as “Not a Contribution.” 93 | 94 | “Contributor” shall mean Licensor and any individual or Legal Entity on behalf 95 | of whom a Contribution has been received by Licensor and subsequently 96 | incorporated within the Work. 97 | 98 | #### 2. Grant of Copyright License 99 | 100 | Subject to the terms and conditions of this License, each Contributor hereby 101 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 102 | irrevocable copyright license to reproduce, prepare Derivative Works of, 103 | publicly display, publicly perform, sublicense, and distribute the Work and such 104 | Derivative Works in Source or Object form. 105 | 106 | #### 3. Grant of Patent License 107 | 108 | Subject to the terms and conditions of this License, each Contributor hereby 109 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 110 | irrevocable (except as stated in this section) patent license to make, have 111 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where 112 | such license applies only to those patent claims licensable by such Contributor 113 | that are necessarily infringed by their Contribution(s) alone or by combination 114 | of their Contribution(s) with the Work to which such Contribution(s) was 115 | submitted. If You institute patent litigation against any entity (including a 116 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a 117 | Contribution incorporated within the Work constitutes direct or contributory 118 | patent infringement, then any patent licenses granted to You under this License 119 | for that Work shall terminate as of the date such litigation is filed. 120 | 121 | #### 4. Redistribution 122 | 123 | You may reproduce and distribute copies of the Work or Derivative Works thereof 124 | in any medium, with or without modifications, and in Source or Object form, 125 | provided that You meet the following conditions: 126 | 127 | * **(a)** You must give any other recipients of the Work or Derivative Works a copy of 128 | this License; and 129 | * **(b)** You must cause any modified files to carry prominent notices stating that You 130 | changed the files; and 131 | * **(c)** You must retain, in the Source form of any Derivative Works that You distribute, 132 | all copyright, patent, trademark, and attribution notices from the Source form 133 | of the Work, excluding those notices that do not pertain to any part of the 134 | Derivative Works; and 135 | * **(d)** If the Work includes a “NOTICE” text file as part of its distribution, then any 136 | Derivative Works that You distribute must include a readable copy of the 137 | attribution notices contained within such NOTICE file, excluding those notices 138 | that do not pertain to any part of the Derivative Works, in at least one of the 139 | following places: within a NOTICE text file distributed as part of the 140 | Derivative Works; within the Source form or documentation, if provided along 141 | with the Derivative Works; or, within a display generated by the Derivative 142 | Works, if and wherever such third-party notices normally appear. The contents of 143 | the NOTICE file are for informational purposes only and do not modify the 144 | License. You may add Your own attribution notices within Derivative Works that 145 | You distribute, alongside or as an addendum to the NOTICE text from the Work, 146 | provided that such additional attribution notices cannot be construed as 147 | modifying the License. 148 | 149 | You may add Your own copyright statement to Your modifications and may provide 150 | additional or different license terms and conditions for use, reproduction, or 151 | distribution of Your modifications, or for any such Derivative Works as a whole, 152 | provided Your use, reproduction, and distribution of the Work otherwise complies 153 | with the conditions stated in this License. 154 | 155 | #### 5. Submission of Contributions 156 | 157 | Unless You explicitly state otherwise, any Contribution intentionally submitted 158 | for inclusion in the Work by You to the Licensor shall be under the terms and 159 | conditions of this License, without any additional terms or conditions. 160 | Notwithstanding the above, nothing herein shall supersede or modify the terms of 161 | any separate license agreement you may have executed with Licensor regarding 162 | such Contributions. 163 | 164 | #### 6. Trademarks 165 | 166 | This License does not grant permission to use the trade names, trademarks, 167 | service marks, or product names of the Licensor, except as required for 168 | reasonable and customary use in describing the origin of the Work and 169 | reproducing the content of the NOTICE file. 170 | 171 | #### 7. Disclaimer of Warranty 172 | 173 | Unless required by applicable law or agreed to in writing, Licensor provides the 174 | Work (and each Contributor provides its Contributions) on an “AS IS” BASIS, 175 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, 176 | including, without limitation, any warranties or conditions of TITLE, 177 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are 178 | solely responsible for determining the appropriateness of using or 179 | redistributing the Work and assume any risks associated with Your exercise of 180 | permissions under this License. 181 | 182 | #### 8. Limitation of Liability 183 | 184 | In no event and under no legal theory, whether in tort (including negligence), 185 | contract, or otherwise, unless required by applicable law (such as deliberate 186 | and grossly negligent acts) or agreed to in writing, shall any Contributor be 187 | liable to You for damages, including any direct, indirect, special, incidental, 188 | or consequential damages of any character arising as a result of this License or 189 | out of the use or inability to use the Work (including but not limited to 190 | damages for loss of goodwill, work stoppage, computer failure or malfunction, or 191 | any and all other commercial damages or losses), even if such Contributor has 192 | been advised of the possibility of such damages. 193 | 194 | #### 9. Accepting Warranty or Additional Liability 195 | 196 | While redistributing the Work or Derivative Works thereof, You may choose to 197 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or 198 | other liability obligations and/or rights consistent with this License. However, 199 | in accepting such obligations, You may act only on Your own behalf and on Your 200 | sole responsibility, not on behalf of any other Contributor, and only if You 201 | agree to indemnify, defend, and hold each Contributor harmless for any liability 202 | incurred by, or claims asserted against, such Contributor by reason of your 203 | accepting any such warranty or additional liability. 204 | 205 | _END OF TERMS AND CONDITIONS_ 206 | 207 | ### APPENDIX: How to apply the Apache License to your work 208 | 209 | To apply the Apache License to your work, attach the following boilerplate 210 | notice, with the fields enclosed by brackets `[]` replaced with your own 211 | identifying information. (Don't include the brackets!) The text should be 212 | enclosed in the appropriate comment syntax for the file format. We also 213 | recommend that a file or class name and description of purpose be included on 214 | the same “printed page” as the copyright notice for easier identification within 215 | third-party archives. 216 | 217 | Copyright [yyyy] [name of copyright owner] 218 | 219 | Licensed under the Apache License, Version 2.0 (the "License"); 220 | you may not use this file except in compliance with the License. 221 | You may obtain a copy of the License at 222 | 223 | http://www.apache.org/licenses/LICENSE-2.0 224 | 225 | Unless required by applicable law or agreed to in writing, software 226 | distributed under the License is distributed on an "AS IS" BASIS, 227 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 228 | See the License for the specific language governing permissions and 229 | limitations under the License. 230 | 231 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PWD=$(shell pwd) 2 | LIBRARY=$(shell basename $(PWD)) 3 | 4 | all: 5 | cabal v2-build 6 | 7 | install: 8 | cabal v2-install 9 | 10 | run: 11 | cabal v2-run example 12 | 13 | repl: 14 | cabal v2-repl $(LIBRARY) --repl-options=-v1 --repl-options=-ferror-spans --repl-options=-j 15 | 16 | watch: 17 | ghcid -p $(LIBRARY) --color -c "cabal v2-repl $(LIBRARY) --repl-options=-fno-code --repl-options=-fno-break-on-exception --repl-options=-fno-break-on-error --repl-options=-v1 --repl-options=-ferror-spans --repl-options=-j" --restart *.cabal 18 | 19 | lint: 20 | find src -name "*.hs*" -print | xargs hlint 21 | 22 | .PHONY: all install run repl watch lint 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | parsnip 2 | ===== 3 | 4 | [![Hackage](https://img.shields.io/hackage/v/parsnip.svg)](https://hackage.haskell.org/package/parsnip) 5 | 6 | This is a rather minimal parsing library. 7 | 8 | Use with a library like `parsers` or `parser-combinators` to fill in the missing functionality. 9 | 10 | Contact Information 11 | ------------------- 12 | 13 | Contributions and bug reports are welcome! 14 | 15 | Please feel free to contact me through github or on the #haskell IRC channel on irc.freenode.net. 16 | 17 | -Edward Kmett 18 | 19 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /cabal.project: -------------------------------------------------------------------------------- 1 | packages: . 2 | -------------------------------------------------------------------------------- /cbits/parsnip.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "parsnip.h" 3 | 4 | /* find the first occurence of a given character or the location of the terminating null in a string. */ 5 | 6 | char * strchr0 (char *s, HsChar hc) { 7 | char c = (char)hc; 8 | for (; ((uint64_t)s & 7) != 0; ++s) 9 | if (*s == c || *s == '\0') 10 | return s; 11 | 12 | uint64_t *p = (uint64_t*)s; 13 | uint64_t magic = (uint64_t)(-1) / 0xff * 0xfe << 1 >> 1 | 1; 14 | 15 | uint64_t mask = (uint64_t)c; 16 | mask |= mask << 8; 17 | mask |= mask << 16; 18 | mask |= mask << 32; 19 | 20 | for (;;) { 21 | uint64_t w = *p++; 22 | if ((((w + magic) ^ ~w) & ~magic) != 0 || ((((w ^ mask) + magic) ^ ~(w ^ mask)) & ~magic) != 0) { 23 | s = (char*)(p-1); 24 | if (*s == c || *s == '\0') return s; 25 | if (*++s == c) return s; 26 | if (*s == '\0') return s; 27 | if (*++s == c) return s; 28 | if (*s == '\0') return s; 29 | if (*++s == c) return s; 30 | if (*s == '\0') return s; 31 | if (*++s == c) return s; 32 | if (*s == '\0') return s; 33 | if (*++s == c) return s; 34 | if (*s == '\0') return s; 35 | if (*++s == c) return s; 36 | if (*s == '\0') return s; 37 | if (*++s == c) return s; 38 | if (*s == '\0') return s; 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /cbits/parsnip.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "HsFFI.h" 3 | 4 | extern char * strchr0(char * s, HsChar c); 5 | -------------------------------------------------------------------------------- /parsnip.cabal: -------------------------------------------------------------------------------- 1 | cabal-version: 2.2 2 | name: parsnip 3 | version: 0.1 4 | synopsis: A fast, minimal parser 5 | description: 6 | A fast, minimal parser. 7 | . 8 | @parsnip@ parses null-terminated input strings with an absolute minimum of 9 | internal state. It copies the input to a c string and works its way through 10 | hand-over-hand with just an @Addr#@ as the internal state. 11 | . 12 | It uses @reflection@-like tricks to lift the input 'ByteString' into a 13 | constraint that is then passed to just the combinators that need access 14 | to it to compute position, snip out slices of the original, or ask the 15 | number of bytes remaining, but none of the monadic combinators nor simple 16 | 'satisfy' or 'eof' predicates need access to this information. 17 | . 18 | Because of the null-termination, we can't assume that we can handle binary 19 | inputs, but this works well for text-based source languages. 20 | homepage: https://github.com/ekmett/parsnip 21 | license: BSD-2-Clause OR Apache-2.0 22 | license-file: LICENSE.md 23 | author: Edward Kmett 24 | maintainer: Edward Kmett 25 | copyright: Copyright (c) 2019-2021 Edward Kmett 26 | stability: experimental 27 | category: Parsing, Text 28 | build-type: Simple 29 | extra-doc-files: 30 | README.md, 31 | CHANGELOG.md 32 | extra-source-files: 33 | cbits/*.h 34 | cbits/*.c 35 | 36 | source-repository head 37 | type: git 38 | location: https://github.com/ekmett/parsnip 39 | 40 | common base 41 | default-language: Haskell2010 42 | ghc-options: 43 | -Wall -Wincomplete-uni-patterns -Wincomplete-record-updates 44 | -Wredundant-constraints -Widentities -Wmissing-export-lists 45 | include-dirs: cbits 46 | c-sources: cbits/parsnip.c 47 | build-depends: 48 | attoparsec, 49 | base >= 4.15 && < 5, 50 | bytestring ^>= 0.11, 51 | containers ^>= 0.6, 52 | data-default < 0.8, 53 | ghc-prim, 54 | primitive ^>= 0.7 55 | 56 | library 57 | import: base 58 | hs-source-dirs: src 59 | exposed-modules: 60 | Text.Parsnip 61 | Text.Parsnip.Char8 62 | Text.Parsnip.Location 63 | Text.Parsnip.Parser 64 | Text.Parsnip.Word8 65 | Text.Parsnip.Word8.Binary 66 | Text.Parsnip.Internal.Mark 67 | Text.Parsnip.Internal.Parser 68 | Text.Parsnip.Internal.Private 69 | Text.Parsnip.Internal.Simple 70 | -------------------------------------------------------------------------------- /src/Text/Parsnip.hs: -------------------------------------------------------------------------------- 1 | module Text.Parsnip 2 | ( module Text.Parsnip.Char8 3 | , module Text.Parsnip.Location 4 | , module Text.Parsnip.Parser 5 | ) where 6 | 7 | import Text.Parsnip.Char8 8 | import Text.Parsnip.Location 9 | import Text.Parsnip.Parser 10 | -------------------------------------------------------------------------------- /src/Text/Parsnip/Char8.hs: -------------------------------------------------------------------------------- 1 | {-# language MagicHash #-} 2 | {-# language BlockArguments #-} 3 | {-# language UnboxedTuples #-} 4 | {-# language BangPatterns #-} 5 | {-# language TypeApplications #-} 6 | {-# language NegativeLiterals #-} 7 | {-# language UnliftedFFITypes #-} 8 | {-# language ScopedTypeVariables #-} 9 | {-# language ForeignFunctionInterface #-} 10 | module Text.Parsnip.Char8 11 | ( satisfy 12 | , char 13 | , notChar 14 | , anyChar 15 | , digit 16 | , space 17 | , skipSpace 18 | , letter_ascii 19 | , letter_iso8859_15 20 | , while, whileSome 21 | , till, tillSome, tillChar 22 | , skipWhile, skipWhileSome 23 | , skipTill, skipTillSome, skipTillChar 24 | , previousChar, previousChar' 25 | , nextChar, nextChar' 26 | ) where 27 | 28 | import Control.Applicative 29 | import qualified Data.Attoparsec.ByteString.Char8 as A 30 | import Data.ByteString (ByteString) 31 | import Data.Word 32 | import GHC.Char 33 | import GHC.Prim 34 | import GHC.Ptr 35 | import GHC.Types 36 | import Text.Parsnip.Internal.Parser 37 | import Text.Parsnip.Internal.Private 38 | import Text.Parsnip.Parser 39 | 40 | -------------------------------------------------------------------------------- 41 | -- * Char parsers 42 | -------------------------------------------------------------------------------- 43 | 44 | satisfy :: (Char -> Bool) -> Parser s e Char 45 | satisfy f = Parser \p s -> case readCharOffAddr# p 0# s of 46 | (# t, c #) -> if isTrue# (chr# 0# `neChar#` c) && f (C# c) 47 | then OK (C# c) (plusAddr# p 1#) t 48 | else Fail p t 49 | {-# inline satisfy #-} 50 | 51 | char :: Char -> Parser s e Char 52 | char '\0' = empty 53 | char r@(C# c) = Parser \p s -> case readCharOffAddr# p 0# s of 54 | (# t, c' #) -> if isTrue# (eqChar# c c') 55 | then OK r (plusAddr# p 1#) t 56 | else Fail p t 57 | {-# inline char #-} 58 | 59 | notChar :: Char -> Parser s e Char 60 | notChar '\0' = anyChar 61 | notChar (C# c) = Parser \p s -> case readCharOffAddr# p 0# s of 62 | (# t, c' #) -> if isTrue# (chr# 0# `neChar#` c') && isTrue# (neChar# c c') 63 | then OK (C# c') (plusAddr# p 1#) t 64 | else Fail p t 65 | {-# inline notChar #-} 66 | 67 | nextChar :: Parser s e (Maybe Char) 68 | nextChar = Parser \p s -> case readCharOffAddr# p 0# s of 69 | (# t, c #) -> OK (if isTrue# (chr# 0# `neChar#` c) then Just (C# c) else Nothing) p t 70 | {-# inline nextChar #-} 71 | 72 | nextChar' :: Parser s e Char 73 | nextChar' = Parser \p s -> case readCharOffAddr# p 0# s of 74 | (# t, c #) -> if isTrue# (chr# 0# `neChar#` c) 75 | then OK (C# c) p t 76 | else Fail p t 77 | {-# inline nextChar' #-} 78 | 79 | anyChar :: Parser s e Char 80 | anyChar = Parser \p s -> case readCharOffAddr# p 0# s of 81 | (# t, c #) -> if isTrue# (chr# 0# `neChar#` c) 82 | then OK (C# c) (plusAddr# p 1#) t 83 | else Fail p t 84 | {-# inline anyChar #-} 85 | 86 | digit :: Parser s e Char 87 | digit = satisfy A.isDigit 88 | {-# inline digit #-} 89 | 90 | space :: Parser s e Char 91 | space = satisfy A.isSpace 92 | {-# inline space #-} 93 | 94 | skipSpace :: Parser s e () 95 | skipSpace = skipWhile A.isSpace 96 | {-# inline skipSpace #-} 97 | 98 | letter_ascii :: Parser s e Char 99 | letter_ascii = satisfy A.isAlpha_ascii 100 | {-# inline letter_ascii #-} 101 | 102 | letter_iso8859_15:: Parser s e Char 103 | letter_iso8859_15 = satisfy A.isAlpha_iso8859_15 104 | {-# inline letter_iso8859_15 #-} 105 | 106 | scan :: (Char -> Bool) -> Addr# -> State# s -> (# State# s, Addr# #) 107 | scan f = go where 108 | go p s = case readCharOffAddr# p 0# s of 109 | (# t, c #) -> if isTrue# (chr# 0# `neChar#` c) && f (C# c) 110 | then (# t, p #) 111 | else scan f (plusAddr# p 1#) t 112 | {-# inline scan #-} 113 | 114 | skipWhile :: (Char -> Bool) -> Parser s e () 115 | skipWhile f = Parser \p s -> case scan f p s of 116 | (# t, q #) -> OK () q t 117 | {-# inline [1] skipWhile #-} 118 | 119 | {-# RULES 120 | "skipWhile (x/=)" forall x. 121 | skipWhile (x `neChar`) = skipTillChar x 122 | "skipWhile (/=x)" forall x. 123 | skipWhile (`neChar` x) = skipTillChar x 124 | #-} 125 | 126 | skipTill :: (Char -> Bool) -> Parser s e () 127 | skipTill p = skipWhile (not . p) 128 | {-# inline [1] skipTill #-} 129 | 130 | {-# RULES 131 | "skipTill (x==)" forall x. 132 | skipTill (x `eqChar`) = skipTillChar x 133 | "skipWhile (==x)" forall x. 134 | skipWhile (`eqChar` x) = skipTillChar x 135 | #-} 136 | 137 | skipTillSome :: (Char -> Bool) -> Parser s e () 138 | skipTillSome p = skipWhileSome (not . p) 139 | {-# inline skipTillSome #-} 140 | 141 | foreign import ccall "parsnip.h" strchr0 :: Addr# -> Char# -> IO (Ptr Word8) 142 | 143 | skipTillChar :: Char -> Parser s e () 144 | skipTillChar (C# c) = Parser $ \p s -> case io (strchr0 p c) s of 145 | (# t, Ptr q #) -> OK () q t 146 | {-# inline skipTillChar #-} 147 | 148 | skipWhileSome :: (Char -> Bool) -> Parser s e () 149 | skipWhileSome p = satisfy p *> skipWhile p 150 | {-# inline skipWhileSome #-} 151 | 152 | while :: KnownBase s => (Char -> Bool) -> Parser s e ByteString 153 | while f = snipping (skipWhile f) 154 | {-# inline while #-} 155 | 156 | till :: KnownBase s => (Char -> Bool) -> Parser s e ByteString 157 | till p = snipping (skipTill p) 158 | {-# inline till #-} 159 | 160 | tillChar :: KnownBase s => Char -> Parser s e ByteString 161 | tillChar c = snipping (skipTillChar c) 162 | {-# inline tillChar #-} 163 | 164 | whileSome :: KnownBase s => (Char -> Bool) -> Parser s e ByteString 165 | whileSome p = snipping (skipWhileSome p) 166 | {-# inline whileSome #-} 167 | 168 | tillSome :: KnownBase s => (Char -> Bool) -> Parser s e ByteString 169 | tillSome p = snipping (skipTillSome p) 170 | {-# inline tillSome #-} 171 | 172 | -- Peek at the previous character. Always succeeds. 173 | previousChar :: forall s e. KnownBase s => Parser s e (Maybe Char) 174 | previousChar = case reflectBase @s of 175 | !(Base _ _ l _) -> Parser \p s -> 176 | if isTrue# (ltAddr# l p) 177 | then case readCharOffAddr# p (-1#) s of 178 | (# t, c #) -> OK (Just (C# c)) p t 179 | else OK Nothing p s 180 | 181 | -- Peek at the previous character. Fails if we're at the start of input. 182 | previousChar' :: forall s e. KnownBase s => Parser s e Char 183 | previousChar' = case reflectBase @s of 184 | !(Base _ _ l _) -> Parser \p s -> 185 | if isTrue# (ltAddr# l p) 186 | then case readCharOffAddr# p (-1#) s of 187 | (# t, c #) -> OK (C# c) p t 188 | else Fail p s 189 | -------------------------------------------------------------------------------- /src/Text/Parsnip/Internal/Mark.hs: -------------------------------------------------------------------------------- 1 | {-# language MagicHash #-} 2 | {-# language TypeApplications #-} 3 | {-# language ScopedTypeVariables #-} 4 | {-# language PatternSynonyms #-} 5 | {-# language BlockArguments #-} 6 | {-# language BangPatterns #-} 7 | {-# language UnboxedTuples #-} 8 | module Text.Parsnip.Internal.Mark 9 | ( Mark(Mark,Mk) 10 | , minusMark 11 | , mark, release 12 | , snip, snipping 13 | ) where 14 | 15 | import Data.ByteString as B 16 | import Data.Word 17 | import GHC.Arr 18 | import GHC.Prim 19 | import GHC.Ptr 20 | import GHC.Types 21 | import Text.Parsnip.Internal.Parser 22 | import Text.Parsnip.Internal.Private 23 | 24 | --------------------------------------------------------------------------------------- 25 | -- * Marks 26 | --------------------------------------------------------------------------------------- 27 | 28 | newtype Mark s = Mark (Ptr Word8) -- unexposed, so known valid addresses 29 | deriving (Eq,Ord,Show) 30 | 31 | pattern Mk :: Addr# -> Mark s 32 | pattern Mk a = Mark (Ptr a) 33 | {-# complete Mk #-} -- if only... 34 | 35 | instance KnownBase s => Bounded (Mark s) where 36 | minBound = Mk (start @s) 37 | maxBound = Mk (end @s) 38 | {-# inline minBound #-} 39 | {-# inline maxBound #-} 40 | 41 | instance KnownBase s => Enum (Mark s) where 42 | fromEnum p = minusMark p minBound 43 | toEnum = case reflectBase @s of 44 | !(Base _ _ l h) -> \(I# i) -> if isTrue# (0# <=# i) && isTrue# (i <=# minusAddr# h l) 45 | then Mk (plusAddr# l i) 46 | else error "Mark.toEnum: Out of bounds" 47 | succ (Mk p) = if isTrue# (ltAddr# p (end @s)) 48 | then Mk (plusAddr# p 1#) 49 | else error "Mark.succ: Out of bounds" 50 | pred (Mk p) = if isTrue# (ltAddr# (start @s) p) 51 | then Mk (plusAddr# p (negateInt# 1#)) 52 | else error "Mark.pred: Out of bounds" 53 | enumFrom (Mk p) = ptrs1 p (end @s) 54 | enumFromTo (Mk p) (Mk q) = ptrs1 p q 55 | enumFromThen = case reflectBase @s of 56 | !(Base _ _ l h) -> \(Mk p) (Mk q) -> if isTrue# (gtAddr# p q) 57 | then dptrs p (minusAddr# q p) l 58 | else ptrs p (minusAddr# q p) h 59 | enumFromThenTo (Mk p) (Mk q) (Mk r) = if isTrue# (gtAddr# p q) 60 | then dptrs p (minusAddr# q p) r 61 | else ptrs p (minusAddr# q p) r 62 | {-# inline fromEnum #-} 63 | {-# inline toEnum #-} 64 | {-# inline succ #-} 65 | {-# inline pred #-} 66 | {-# inline enumFrom #-} 67 | {-# inline enumFromTo #-} 68 | {-# inline enumFromThen #-} 69 | {-# inline enumFromThenTo #-} 70 | 71 | instance Ix (Mark s) where 72 | range (Mk p, Mk q) = ptrs1 p q 73 | unsafeIndex (p,_) r = minusMark r p 74 | inRange (Mk p, Mk q) (Mk r) = isTrue# (leAddr# p r) && isTrue# (leAddr# r q) 75 | unsafeRangeSize = uncurry minusMark 76 | {-# inline range #-} 77 | {-# inline unsafeIndex #-} 78 | {-# inline inRange #-} 79 | {-# inline unsafeRangeSize #-} 80 | 81 | ptrs1 :: Addr# -> Addr# -> [Mark s] 82 | ptrs1 l h 83 | | isTrue# (leAddr# l h) = Mk l : ptrs1 (plusAddr# l 1#) h 84 | | otherwise = [] 85 | {-# inline ptrs1 #-} 86 | 87 | ptrs :: Addr# -> Int# -> Addr# -> [Mark s] 88 | ptrs l d h 89 | | isTrue# (leAddr# l h) = Mk l : ptrs (plusAddr# l d) d h 90 | | otherwise = [] 91 | {-# inline ptrs #-} 92 | 93 | dptrs :: Addr# -> Int# -> Addr# -> [Mark s] 94 | dptrs h d l 95 | | isTrue# (leAddr# l h) = Mark (Ptr h) : ptrs (plusAddr# h d) d l 96 | | otherwise = [] 97 | {-# inline dptrs #-} 98 | 99 | minusMark :: Mark s -> Mark s -> Int 100 | minusMark (Mk p) (Mk q) = I# (minusAddr# p q) 101 | {-# inline minusMark #-} 102 | 103 | -- | Record the current position 104 | mark :: Parser s e (Mark s) 105 | mark = Parser \p s -> OK (Mk p) p s 106 | {-# inline mark #-} 107 | 108 | -- | Return to a previous location. 109 | release :: Mark s -> Parser s e () 110 | release (Mk q) = Parser \_ s -> OK () q s 111 | {-# inline release #-} 112 | 113 | -- | To grab all the text covered by a given parser, consider using @snipping@ 114 | -- and applying it to a combinator simply recognizes the content rather than returns 115 | -- it. 'snipping' a 'ByteString' is significantly cheaper than assembling one from 116 | -- smaller fragments. 117 | snip :: forall s. KnownBase s => Mark s -> Mark s -> ByteString 118 | snip = case reflectBase @s of 119 | !(Base x g _ _) -> \(Mk i) (Mk j) -> 120 | if isTrue# (geAddr# i j) 121 | then mkBS x g (minusAddr# i j) 122 | else B.empty 123 | {-# inline snip #-} 124 | 125 | snipping :: forall s e a. KnownBase s => Parser s e a -> Parser s e ByteString 126 | snipping = case reflectBase @s of 127 | !(Base b g r _) -> \(Parser m) -> Parser \p s -> case m p s of 128 | (# o, q, t #) -> 129 | (# setRes 130 | ( if isTrue# (geAddr# q p) 131 | then mkBS (b `plusAddr#` minusAddr# p r) g (minusAddr# q p) 132 | else B.empty 133 | ) o 134 | , q, t #) 135 | {-# inline snipping #-} 136 | -------------------------------------------------------------------------------- /src/Text/Parsnip/Internal/Parser.hs: -------------------------------------------------------------------------------- 1 | {-# language CPP #-} 2 | {-# language PatternSynonyms #-} 3 | {-# language OverloadedStrings #-} 4 | {-# language MagicHash #-} 5 | {-# language TypeFamilies #-} 6 | {-# language UnboxedSums #-} 7 | {-# language StandaloneDeriving #-} 8 | {-# language UnboxedTuples #-} 9 | {-# language ImplicitParams #-} 10 | {-# language ConstraintKinds #-} 11 | {-# language LambdaCase #-} 12 | {-# language ScopedTypeVariables #-} 13 | {-# language RankNTypes #-} 14 | {-# language BangPatterns #-} 15 | {-# language ForeignFunctionInterface #-} 16 | {-# language KindSignatures #-} 17 | {-# language UnliftedFFITypes #-} 18 | {-# language TypeApplications #-} 19 | {-# language AllowAmbiguousTypes #-} 20 | {-# language BlockArguments #-} 21 | {-# language ViewPatterns #-} 22 | {-# language UnboxedTuples #-} 23 | {-# language MagicHash #-} 24 | {-# language PatternSynonyms #-} 25 | {-# language UnliftedNewtypes #-} 26 | {-# options_ghc -O2 #-} 27 | 28 | module Text.Parsnip.Internal.Parser 29 | ( 30 | -- * Parser 31 | Parser(..) 32 | , Res(Res#,Good,Bad,Ugly) 33 | , mapRes, setRes 34 | , Result, pattern OK, pattern Fail, pattern Err 35 | , mapResult, setResult 36 | , try 37 | -- * Unsafe literals 38 | , lit, litN, word8 39 | -- * Guts 40 | , Base(..), bytes, start, end 41 | , KnownBase(..) 42 | , parse 43 | ) where 44 | 45 | import Control.Applicative 46 | import Control.Monad 47 | import Control.Monad.Primitive 48 | import qualified Data.ByteString as B 49 | import Data.ByteString.Internal (ByteString(..)) 50 | import qualified Data.ByteString.Internal as B 51 | import Data.Primitive.ByteArray 52 | import Data.String 53 | import Foreign.C.Types 54 | import Foreign.ForeignPtr 55 | import GHC.ForeignPtr 56 | import GHC.Prim 57 | import GHC.Ptr 58 | import GHC.Types 59 | import GHC.Word 60 | import System.IO.Unsafe 61 | 62 | import Text.Parsnip.Location 63 | import Text.Parsnip.Internal.Private 64 | 65 | newtype Res e a = Res# (# a | (##) | e #) 66 | 67 | pattern Good :: a -> Res e a 68 | pattern Good a = Res# (# a | | #) 69 | 70 | pattern Bad :: Res e a 71 | pattern Bad = Res# (# | (##) | #) 72 | 73 | -- user error, don't recover 74 | pattern Ugly :: e -> Res e a 75 | pattern Ugly e = Res# (# | | e #) 76 | 77 | {-# complete Good, Bad, Ugly :: Res #-} 78 | 79 | mapRes :: (a -> b) -> Res e a -> Res e b 80 | mapRes f (Good a) = Good $! f a 81 | mapRes _ Bad = Bad 82 | mapRes _ (Ugly e) = Ugly e 83 | {-# inline mapRes #-} 84 | 85 | setRes :: b -> Res e a -> Res e b 86 | setRes b (Good _) = Good b 87 | setRes _ Bad = Bad 88 | setRes _ (Ugly e) = Ugly e 89 | {-# inline setRes #-} 90 | 91 | -------------------------------------------------------------------------------- 92 | -- * Result 93 | -------------------------------------------------------------------------------- 94 | 95 | type Result s e a = (# Res e a, Addr#, State# s #) 96 | 97 | pattern OK :: a -> Addr# -> State# s -> Result s e a 98 | pattern OK a p s = (# Good a, p, s #) 99 | 100 | pattern Fail :: Addr# -> State# s -> Result s e a 101 | pattern Fail p s = (# Bad, p, s #) 102 | 103 | pattern Err :: e -> Addr# -> State# s -> Result s e a 104 | pattern Err e p s = (# Ugly e, p, s #) 105 | 106 | {-# complete OK, Fail, Err #-} 107 | 108 | mapResult :: (a -> b) -> Result s e a -> Result s e b 109 | mapResult f (# o, p, s #) = (# mapRes f o, p, s #) 110 | {-# inline mapResult #-} 111 | 112 | setResult :: b -> Result s e a -> Result s e b 113 | setResult b (# o, p, s #) = (# setRes b o, p, s #) 114 | {-# inline setResult #-} 115 | 116 | -------------------------------------------------------------------------------- 117 | -- * Result 118 | -------------------------------------------------------------------------------- 119 | 120 | newtype Parser s e a = Parser 121 | { runParser :: Addr# -> State# s -> Result s e a 122 | } 123 | 124 | instance Functor (Parser s e) where 125 | fmap f (Parser m) = Parser \ p s -> mapResult f (m p s) 126 | {-# inline fmap #-} 127 | b <$ Parser m = Parser \ p s -> case m p s of 128 | OK _ q t -> OK b q t 129 | Fail q t -> Fail q t 130 | Err e q t -> Err e q t 131 | {-# inline (<$) #-} 132 | 133 | instance Applicative (Parser s e) where 134 | pure a = Parser \ p s -> OK a p s 135 | {-# inline pure #-} 136 | Parser m <*> Parser n = Parser \p s -> case m p s of 137 | Fail q t -> Fail q t 138 | OK f q t -> mapResult f (n q t) 139 | Err e q t -> Err e q t 140 | {-# inline (<*>) #-} 141 | Parser m *> Parser n = Parser \p s -> case m p s of 142 | Fail q t -> Fail q t 143 | OK _ q t -> n q t 144 | Err e q t -> Err e q t 145 | {-# inline (*>) #-} 146 | Parser m <* Parser n = Parser \p s -> case m p s of 147 | OK a q t -> setResult a (n q t) 148 | x -> x 149 | {-# inline (<*) #-} 150 | 151 | instance Monad (Parser s e) where 152 | Parser m >>= f = Parser \p s -> case m p s of 153 | Fail q t -> Fail q t 154 | Err e q t -> Err e q t 155 | OK a q t -> runParser (f a) q t 156 | {-# inline (>>=) #-} 157 | (>>) = (*>) 158 | {-# inline (>>) #-} 159 | 160 | instance Alternative (Parser s e) where 161 | Parser m <|> Parser n = Parser \ p s -> case m p s of 162 | Fail _ t -> n p t 163 | OK a q t -> OK a q t 164 | Err _ _ t -> n p t 165 | {-# inline (<|>) #-} 166 | empty = Parser Fail 167 | {-# inline empty #-} 168 | 169 | instance MonadPlus (Parser s e) where 170 | mplus = (<|>) 171 | {-# inline mplus #-} 172 | mzero = empty 173 | {-# inline mzero #-} 174 | 175 | instance PrimMonad (Parser s e) where 176 | type PrimState (Parser s e) = s 177 | primitive f = Parser \p s -> case f s of 178 | (# t, a #) -> OK a p t 179 | {-# inline primitive #-} 180 | 181 | -- perhaps this interface is a little low level. hrmm 182 | instance a ~ ByteString => IsString (Parser s e a) where 183 | fromString "" = pure B.empty 184 | fromString xs = Parser \p s -> case sizeofMutableByteArray# ba of 185 | n -> case io (c_strncmp (mutableByteArrayContents# ba) p (fromIntegral $ I# n)) s of 186 | (# t, i #) 187 | | i /= 0 -> Fail p t 188 | | otherwise -> OK bs (plusAddr# p n) t 189 | where !(MutableByteArray ba) = pinnedByteArrayFromString0 xs 190 | bs = B.PS (ForeignPtr (mutableByteArrayContents# ba) (PlainPtr ba)) 0 (I# (sizeofMutableByteArray# ba)) 191 | 192 | try :: Parser s e a -> Parser s e a 193 | try (Parser m) = Parser $ \p s -> case m p s of 194 | OK a q t -> OK a q t 195 | Fail _ t -> Fail p t 196 | Err e _ t -> Err e p t 197 | 198 | word8 :: Word8 -> Parser s e Word8 199 | word8 0 = empty 200 | word8 r@(W8# c) = Parser \p s -> case readWord8OffAddr# p 0# s of 201 | (# t, c' #) -> if isTrue# (c `eqWord#` c') 202 | then OK r (plusAddr# p 1#) t 203 | else Fail p t 204 | {-# inline word8 #-} 205 | 206 | --------------------------------------------------------------------------------------- 207 | -- * Super-unsafe literal parsers 208 | --------------------------------------------------------------------------------------- 209 | 210 | -- | super-duper unsafe. Fabricates bytestrings that directly reference constant memory 211 | litN :: Addr# -> CSize -> Parser s e ByteString 212 | litN q n = Parser \p s -> case io (c_strncmp p q n) s of 213 | (# t, 0 #) -> OK bs (p `plusAddr#` csize n) t 214 | (# t, _ #) -> Fail p t 215 | where bs = unsafeLiteralByteStringN q n 216 | 217 | -- | Super unsafe. Fabricates a bytestring that directly reference constant memory. 218 | -- 219 | -- Usage: 220 | -- 221 | -- @ 222 | -- hello = lit "hello"# 223 | -- @ 224 | lit :: Addr# -> Parser s e ByteString 225 | lit q = litN q (pure_strlen q) 226 | 227 | literalForeignPtrContents :: ForeignPtrContents 228 | literalForeignPtrContents = unsafeDupablePerformIO $ primitive \s -> case newByteArray# 0# s of 229 | (# t, a #) -> (# t, PlainPtr a #) 230 | -- {-# noinline literalForeignPtrContents #-} 231 | 232 | unsafeLiteralForeignPtr :: Addr# -> ForeignPtr Word8 233 | unsafeLiteralForeignPtr addr = ForeignPtr addr literalForeignPtrContents 234 | 235 | unsafeLiteralByteStringN :: Addr# -> CSize -> ByteString 236 | unsafeLiteralByteStringN p n = BS (unsafeLiteralForeignPtr p) (fromIntegral n) 237 | {-# noinline unsafeLiteralByteStringN #-} 238 | 239 | --unsafeLiteralByteString :: Addr# -> ByteString 240 | --unsafeLiteralByteString p = unsafeLiteralByteStringN p (pure_strlen p) 241 | 242 | -- Given a 'Base' you can do two things with it. While in a Parser, you're allowed to 243 | -- access the memory between the start and end addresses, as they'll be alive. 244 | -- 245 | -- However, you can always reconstruct a bytestring from the oriignal (non-0 terminated 246 | -- data using 'bytes', and that will remain valid forever or until appropriately 247 | -- garbage collected. 248 | -- 249 | -- In general, in a Parser you should try to access the memory in the null-terminated 250 | -- region for cache locality. 251 | -- 252 | -- Afterwards, or to report bytestrings, you should trim them off the original, this 253 | -- way, no additional memory needs to be copied, and the garbage collector will just 254 | -- manage the storage of the bytestrings you cut off of the parent for you. 255 | 256 | data Base s = Base 257 | { baseOriginal :: Addr# -- the start of a valid bytestring 258 | , baseContents :: ForeignPtrContents -- memory management for that bytestring 259 | , baseStart :: Addr# -- the start of our null terminated copy of the bytestring 260 | , baseEnd :: Addr# -- the end of our null terminated copy (points to the '\0') 261 | } 262 | 263 | bytes :: forall s. KnownBase s => ByteString 264 | bytes = case reflectBase @s of 265 | !(Base b g p q) -> mkBS b g (minusAddr# q p) 266 | {-# inline bytes #-} 267 | 268 | start :: forall s. KnownBase s => Addr# 269 | start = baseStart (reflectBase @s) 270 | {-# inline start #-} 271 | 272 | end :: forall s. KnownBase s => Addr# 273 | end = baseEnd (reflectBase @s) 274 | {-# inline end #-} 275 | 276 | class KnownBase (s :: Type) where 277 | reflectBase :: Base s 278 | 279 | -------------------------------------------------------------------------------- 280 | -- * Parsing 281 | -------------------------------------------------------------------------------- 282 | 283 | parse :: (forall s. KnownBase s => Parser s e a) -> ByteString -> Either (Location, Maybe e) a 284 | parse m bs@(B.BS (ForeignPtr b g) (I# len)) = unsafeDupablePerformIO $ 285 | B.useAsCString bs \(Ptr p) -> -- now it is null terminated 286 | IO \s -> let base = Base b g p (plusAddr# p len) in 287 | case runParser (withBase (\_ -> m) base proxy#) p s of 288 | (# n, q, t #) -> (# t, finish base q n #) 289 | 290 | finish :: Base s -> Addr# -> Res e a -> Either (Location, Maybe e) a 291 | finish (Base b g q r) p = \case 292 | Good a -> Right a 293 | Bad -> Left (location (mkBS b g (minusAddr# r q)) (I# (minusAddr# p q)), Nothing) 294 | Ugly e -> Left (location (mkBS b g (minusAddr# r q)) (I# (minusAddr# p q)), Just e) 295 | {-# inline finish #-} 296 | 297 | data Wrap s e a = Wrap (KnownBase s => Proxy# s -> Parser s e a) 298 | 299 | withBase :: (KnownBase s => Proxy# s -> Parser s e a) -> Base s -> Proxy# s -> Parser s e a 300 | withBase f x y = magicDict (Wrap f) x y 301 | {-# inline withBase #-} 302 | -------------------------------------------------------------------------------- /src/Text/Parsnip/Internal/Private.hs: -------------------------------------------------------------------------------- 1 | {-# language MagicHash #-} 2 | {-# language UnboxedTuples #-} 3 | {-# language BlockArguments #-} 4 | {-# language BangPatterns #-} 5 | {-# language ViewPatterns #-} 6 | {-# language UnliftedFFITypes #-} 7 | {-# language UnliftedNewtypes #-} 8 | 9 | -- | the proverbial junk drawer 10 | module Text.Parsnip.Internal.Private 11 | ( io 12 | , mutableByteArrayContents# 13 | , pinnedByteArrayFromString0 14 | , pinnedByteArrayFromStringN0 15 | , c_memchr 16 | , c_strlen 17 | , c_strncmp 18 | , pure_strlen 19 | , cint 20 | , csize 21 | , mkBS 22 | , ForeignString(..) 23 | , packForeignString 24 | , withForeignString 25 | ) where 26 | 27 | import Data.Primitive.ByteArray 28 | import Data.Primitive.PrimArray 29 | import Data.Primitive.Ptr 30 | import Data.ByteString.Internal (ByteString(..)) 31 | import qualified Data.ByteString.Internal as B 32 | import Data.String 33 | import Data.Word 34 | import Foreign.ForeignPtr 35 | import Foreign.C.String 36 | import Foreign.C.Types 37 | import GHC.ForeignPtr 38 | import GHC.Prim 39 | import GHC.Ptr 40 | import GHC.Types 41 | import System.IO.Unsafe 42 | import Unsafe.Coerce 43 | 44 | io :: IO a -> State# s -> (# State# s, a #) 45 | io = unsafeCoerce# 46 | 47 | -- | Missing primitive 48 | mutableByteArrayContents# :: MutableByteArray# s -> Addr# 49 | mutableByteArrayContents# arr = byteArrayContents# (unsafeCoerce# arr) 50 | 51 | pinnedByteArrayFromString0 :: String -> MutableByteArray RealWorld 52 | pinnedByteArrayFromString0 xs = pinnedByteArrayFromStringN0 (length xs) xs 53 | 54 | pinnedByteArrayFromStringN0 :: Int -> String -> MutableByteArray RealWorld 55 | pinnedByteArrayFromStringN0 n ys = unsafeDupablePerformIO do 56 | marr <- newPinnedByteArray (n+1) 57 | let go !ix [] = if ix == n 58 | then writeByteArray marr ix (0 :: Word8) 59 | else fail "pinnedByteArrayFromStringN: list length less than specified size" 60 | go !ix (x : xs) = if ix < n 61 | then do 62 | writeByteArray marr ix (B.c2w x) 63 | go (ix + 1) xs 64 | else fail "pinnedByteArrayFromStringN: list length greater than specified size" 65 | go 0 ys 66 | pure marr 67 | 68 | -- | An _immutable_ foreign cstring. This is mostly useful for things like calling strstr through ffi 69 | -- where the needle needs to be null terminated. 70 | newtype ForeignString = ForeignString (ForeignPtr Word8) 71 | deriving (Eq,Ord) 72 | 73 | instance Show ForeignString where 74 | showsPrec d (ForeignString fp) = showsPrec d $ unsafeDupablePerformIO $ withForeignPtr (castForeignPtr fp) peekCString 75 | 76 | instance IsString ForeignString where 77 | fromString s = unsafeDupablePerformIO $ do 78 | cstr <- newCString s 79 | ForeignString <$> newForeignPtr_ (castPtr cstr) 80 | 81 | packForeignString :: ByteString -> ForeignString 82 | packForeignString (PS _fp (plusForeignPtr _fp -> fp) n) = unsafeDupablePerformIO do 83 | MutableByteArray mba <- newPinnedByteArray (n+1) 84 | let mpa = MutablePrimArray mba :: MutablePrimArray RealWorld Word8 -- See haskell/primitive#253 85 | withForeignPtr fp $ \p -> copyPtrToMutablePrimArray mpa 0 p n 86 | writePrimArray mpa 0 (0 :: Word8) -- null terminate 87 | -- PrimArray ba <- unsafeFreezePrimArray mpa 88 | pure $ ForeignString $ ForeignPtr (mutableByteArrayContents# mba) (PlainPtr mba) 89 | 90 | withForeignString :: ForeignString -> (CString -> IO r) -> IO r 91 | withForeignString (ForeignString fp) = withForeignPtr (castForeignPtr fp) 92 | 93 | --------------------------------------------------------------------------------------- 94 | -- * C 95 | --------------------------------------------------------------------------------------- 96 | 97 | foreign import ccall unsafe "string.h memchr" c_memchr :: Addr# -> CInt -> CSize -> IO (Ptr ()) 98 | foreign import ccall unsafe "string.h strncmp" c_strncmp :: Addr# -> Addr# -> CSize -> IO CInt 99 | foreign import ccall unsafe "string.h strlen" c_strlen :: Addr# -> IO CSize 100 | foreign import ccall unsafe "string.h strlen" pure_strlen :: Addr# -> CSize 101 | 102 | cint :: CInt -> Int# 103 | cint (fromIntegral -> I# i) = i 104 | {-# inline cint #-} 105 | 106 | csize :: CSize -> Int# 107 | csize (fromIntegral -> I# i) = i 108 | {-# inline csize #-} 109 | 110 | mkBS :: Addr# -> ForeignPtrContents -> Int# -> ByteString 111 | mkBS b g l = BS (ForeignPtr b g) (I# l) 112 | {-# inline mkBS #-} 113 | -------------------------------------------------------------------------------- /src/Text/Parsnip/Internal/Simple.hs: -------------------------------------------------------------------------------- 1 | {-# language TypeApplications #-} 2 | {-# language BlockArguments #-} 3 | {-# language ScopedTypeVariables #-} 4 | {-# language BangPatterns #-} 5 | {-# language MagicHash #-} 6 | module Text.Parsnip.Internal.Simple 7 | ( SimpleResult(..) 8 | , relative 9 | , absolute 10 | ) where 11 | 12 | import Data.ByteString 13 | import GHC.Prim 14 | import GHC.Types 15 | import Text.Parsnip.Internal.Parser 16 | import Text.Parsnip.Internal.Private 17 | 18 | -------------------------------------------------------------------------------- 19 | -- * Simple parsers 20 | -------------------------------------------------------------------------------- 21 | 22 | data SimpleResult e a 23 | = SimpleOK a {-# unpack #-} !Int 24 | | SimpleFail {-# unpack #-} !Int 25 | | SimpleErr e {-# unpack #-} !Int 26 | 27 | relative :: forall s e a. KnownBase s => (ByteString -> SimpleResult e a) -> Parser s e a 28 | relative = case reflectBase @s of 29 | !(Base x g q r) -> \f -> Parser \p s -> case f $ mkBS (x `plusAddr#` minusAddr# p q) g (minusAddr# r p) of 30 | SimpleOK a (I# i) -> OK a (plusAddr# p i) s 31 | SimpleFail (I# i) -> Fail (plusAddr# p i) s 32 | SimpleErr e (I# i) -> Err e (plusAddr# p i) s 33 | {-# inline relative #-} 34 | 35 | absolute :: forall s e a. KnownBase s => (ByteString -> Int -> SimpleResult e a) -> Parser s e a 36 | absolute = case reflectBase @s of 37 | !(Base x g q r) -> \f -> Parser \p s -> case f (mkBS x g (minusAddr# r q)) $ I# (minusAddr# p q) of 38 | SimpleOK a (I# i) -> OK a (plusAddr# p i) s 39 | SimpleFail (I# i) -> Fail (plusAddr# p i) s 40 | SimpleErr e (I# i) -> Err e (plusAddr# p i) s 41 | {-# inline absolute #-} 42 | -------------------------------------------------------------------------------- /src/Text/Parsnip/Location.hs: -------------------------------------------------------------------------------- 1 | module Text.Parsnip.Location 2 | ( location 3 | , Location(..) 4 | , located 5 | ) where 6 | 7 | import Data.ByteString (ByteString) 8 | import qualified Data.ByteString.Char8 as B 9 | import Data.Maybe 10 | 11 | -- | Deliberately lazy, so we don't bother to compute the 12 | -- exact line and column until forced. 13 | data Location = Location 14 | { locationLine :: Int 15 | , locationColumn :: Int 16 | , locationSource :: ByteString 17 | } deriving Show 18 | 19 | -- | /O(n)/ in the size of the input bytestring to actually use one. 20 | -- If used primarily for error location reporting, then this probably 21 | -- puts the burden in the right place. 22 | location :: ByteString -> Int -> Location 23 | location bs j = Location (B.count '\n' before) (j - k) $ B.takeWhile (/='\n') $ B.drop k bs where 24 | before = B.take j bs 25 | k = fromMaybe 0 $ B.elemIndexEnd '\n' before 26 | {-# inline location #-} 27 | 28 | -- | Use this til we get a pretty printer in here 29 | located :: Location -> String -> String 30 | located (Location l c bs) msg = Prelude.unlines 31 | [ show l ++ ":" ++ show c ++ " " ++ msg 32 | , ls 33 | , show l ++ " | " ++ B.unpack bs 34 | , ls ++ Prelude.replicate c ' ' ++ "^" 35 | ] where ls = Prelude.replicate (length (show l) + 1) ' ' ++ "|" 36 | -------------------------------------------------------------------------------- /src/Text/Parsnip/Parser.hs: -------------------------------------------------------------------------------- 1 | {-# language BlockArguments #-} 2 | {-# language MagicHash #-} 3 | {-# language UnboxedTuples #-} 4 | {-# language ScopedTypeVariables #-} 5 | {-# language UnliftedFFITypes #-} 6 | {-# language BangPatterns #-} 7 | {-# language RankNTypes #-} 8 | {-# language TypeApplications #-} 9 | {-# language LambdaCase #-} 10 | {-# language AllowAmbiguousTypes #-} 11 | {-# language PolyKinds #-} 12 | {-# language CPP #-} 13 | module Text.Parsnip.Parser 14 | ( Parser, KnownBase 15 | , parse 16 | ---------------------------- 17 | , try 18 | , atEnd 19 | , endOfInput 20 | ---------------------------- 21 | , tillSubstring 22 | , skipTillSubstring 23 | , skip 24 | , skip0 25 | , take 26 | ---------------------------- 27 | , Mark 28 | , mark 29 | , release 30 | , snip 31 | , snipping 32 | ---------------------------- 33 | , input 34 | , pos 35 | , betwixt 36 | , rest 37 | ---------------------------- 38 | , loc 39 | ) where 40 | 41 | import Data.ByteString (ByteString) 42 | import qualified Data.ByteString as B 43 | import qualified Data.ByteString.Internal as B 44 | import qualified Data.ByteString.Unsafe as B 45 | import Foreign.C.Types 46 | import GHC.ForeignPtr 47 | import GHC.Prim 48 | import GHC.Ptr 49 | import GHC.Types 50 | import Prelude hiding (take) 51 | import Text.Parsnip.Internal.Mark 52 | import Text.Parsnip.Internal.Parser 53 | import Text.Parsnip.Internal.Private 54 | import Text.Parsnip.Internal.Simple 55 | import Text.Parsnip.Location 56 | 57 | -------------------------------------------------------------------------------- 58 | -- * Combinators 59 | -------------------------------------------------------------------------------- 60 | 61 | atEnd :: Parser s e Bool 62 | atEnd = Parser \p s -> case readCharOffAddr# p 0# s of 63 | (# t, c #) -> OK (isTrue# do chr# 0# `eqChar#` c) p t 64 | 65 | endOfInput :: Parser s e () 66 | endOfInput = Parser \p s -> case readCharOffAddr# p 0# s of 67 | (# t, c #) -> (# if isTrue# do chr# 0# `eqChar#` c then Good () else Bad, p, t #) 68 | 69 | take :: forall s e. KnownBase s => Int -> Parser s e ByteString 70 | take = case reflectBase @s of 71 | !(Base b g q r) -> \(I# i) -> Parser \p s -> 72 | if isTrue# (minusAddr# r p <# i) 73 | then Fail p s 74 | else OK (B.PS (ForeignPtr (b `plusAddr#` minusAddr# p q) g) 0 (I# i)) (plusAddr# p i) s 75 | 76 | -- | We can do this two ways, this way is O(1) but needs KnownBase. 77 | skip :: forall s e. KnownBase s => Int -> Parser s e () 78 | skip = \(I# i) -> Parser \p s -> 79 | if isTrue# (minusAddr# r p <# i) 80 | then Fail p s 81 | else OK () (plusAddr# p i) s 82 | where r = end @s 83 | 84 | -- | Linear time, but no @KnownBase@ dependency. 85 | skip0 :: Int -> Parser s e () 86 | skip0 n@(I# i) = Parser \p s -> case io (c_memchr p 0 (fromIntegral n)) s of 87 | (# t, Ptr q #) -> if isTrue# (q `eqAddr#` nullAddr#) 88 | then OK () (plusAddr# p i) t 89 | else Fail p s 90 | 91 | tillSubstring :: KnownBase s => ByteString -> Parser s e ByteString 92 | tillSubstring needle = relative \bs -> case p bs of 93 | (r, _) -> SimpleOK r (B.length r) 94 | where p = B.breakSubstring needle 95 | 96 | foreign import ccall unsafe "string.h" strstr :: Addr# -> Addr# -> IO (Ptr ()) 97 | foreign import ccall unsafe "string.h" strlen :: Addr# -> IO CSize 98 | 99 | skipTillSubstring :: ByteString -> Parser s e () 100 | skipTillSubstring bneedle = case B.length bneedle of 101 | 0 -> pure () 102 | 1 -> () <$ word8 (B.unsafeHead bneedle) 103 | _ -> let fneedle = packForeignString bneedle 104 | in Parser \p s -> case io 105 | ( withForeignString fneedle \(Ptr cneedle)-> 106 | strstr p cneedle >>= \q -> if q == nullPtr 107 | then plusPtr (Ptr p) . fromIntegral <$> strlen p 108 | else pure q 109 | ) s of (# t, Ptr r #) -> OK () r t 110 | 111 | 112 | --skipTillSubstring :: ByteString -> Parser s e () 113 | --skipTillSubstring needle = relative \bs -> case p bs of 114 | -- (r, _) -> SimpleOK r (B.length r) 115 | -- where p = B.breakSubstring needle 116 | 117 | 118 | -- | @input = snip minBound maxBound@ 119 | input :: KnownBase s => Parser s e ByteString 120 | input = absolute \b _ -> SimpleOK b 0 121 | 122 | -- | @rest = mark >>= \p -> snip p maxBound@ 123 | rest :: KnownBase s => Parser s e ByteString 124 | rest = relative \b -> SimpleOK b 0 125 | 126 | -- | 'snip' is a smidge faster, easier to type, if less fun to say, and 127 | -- doesn't need you to fiddle with explicit type application to actually 128 | -- apply. 129 | -- 130 | -- The benefit of this combinator is that it is easy to come up with numbers 131 | -- of bytes into a file, and this combinator will automatically trim the 132 | -- result to the actual range of bytes available, whereas constructing an 133 | -- illegal 'Mark' will error in 'toEnum'/'fromEnum'/'succ' or whatever other 134 | -- combinator tries to produce one out of range to maintain the invariant 135 | -- that a mark is always a well formed location in the content. 136 | betwixt :: forall s. KnownBase s => Int -> Int -> ByteString 137 | betwixt i j = B.take (j-i) $ B.drop i $ bytes @s 138 | 139 | -- | 'mark' is generally faster 140 | pos :: forall s e. KnownBase s => Parser s e Int 141 | pos = Parser \ p s -> OK (I# (minusAddr# p (start @s))) p s 142 | {-# inline pos #-} 143 | 144 | loc :: KnownBase s => Parser s e Location 145 | loc = markLocation <$> mark 146 | {-# inline loc #-} 147 | 148 | -- | Actually looking at one of these is pretty slow, as it has to do a linear 149 | -- scan to figure out its line number for display. 150 | markLocation :: forall s. KnownBase s => Mark s -> Location 151 | markLocation (Mark (Ptr p)) = location (bytes @s) (I# (minusAddr# p (start @s))) 152 | {-# inline markLocation #-} 153 | -------------------------------------------------------------------------------- /src/Text/Parsnip/Word8.hs: -------------------------------------------------------------------------------- 1 | {-# language MagicHash #-} 2 | {-# language BlockArguments #-} 3 | {-# language UnboxedTuples #-} 4 | {-# language BangPatterns #-} 5 | {-# language TypeApplications #-} 6 | {-# language NegativeLiterals #-} 7 | {-# language UnliftedFFITypes #-} 8 | {-# language ScopedTypeVariables #-} 9 | {-# language ForeignFunctionInterface #-} 10 | 11 | -- | Note: @parsnip@ will still be assuming that the input is null terminated 12 | -- even if you use these combinators. 13 | module Text.Parsnip.Word8 14 | ( satisfy 15 | , word8 16 | , notWord8 17 | , anyWord8 18 | , while, whileSome 19 | , till, tillSome, tillWord8 20 | , skipWhile, skipWhileSome 21 | , skipTill, skipTillSome, skipTillWord8 22 | , previousWord8, previousWord8' 23 | , nextWord8, nextWord8' 24 | ) where 25 | 26 | import Data.ByteString (ByteString) 27 | import Data.Word 28 | import GHC.Prim 29 | import GHC.Ptr 30 | import GHC.Types 31 | import GHC.Word 32 | import Text.Parsnip.Internal.Parser 33 | import Text.Parsnip.Internal.Private 34 | import Text.Parsnip.Parser 35 | 36 | -------------------------------------------------------------------------------- 37 | -- * Word8 parsers 38 | -------------------------------------------------------------------------------- 39 | 40 | satisfy :: (Word8 -> Bool) -> Parser s e Word8 41 | satisfy f = Parser \p s -> case readWord8OffAddr# p 0# s of 42 | (# t, c #) -> if isTrue# (0## `neWord#` c) && f (W8# c) 43 | then OK (W8# c) (plusAddr# p 1#) t 44 | else Fail p t 45 | {-# inline satisfy #-} 46 | 47 | notWord8 :: Word8 -> Parser s e Word8 48 | notWord8 0 = anyWord8 49 | notWord8 (W8# c) = Parser \p s -> case readWord8OffAddr# p 0# s of 50 | (# t, c' #) -> if isTrue# (0## `neWord#` c') && isTrue# (c `neWord#` c') 51 | then OK (W8# c') (plusAddr# p 1#) t 52 | else Fail p t 53 | {-# inline notWord8 #-} 54 | 55 | nextWord8 :: Parser s e (Maybe Word8) 56 | nextWord8 = Parser \p s -> case readWord8OffAddr# p 0# s of 57 | (# t, c #) -> OK (if isTrue# (0## `neWord#` c) then Just (W8# c) else Nothing) p t 58 | {-# inline nextWord8 #-} 59 | 60 | nextWord8' :: Parser s e Word8 61 | nextWord8' = Parser \p s -> case readWord8OffAddr# p 0# s of 62 | (# t, c #) -> if isTrue# (0## `neWord#` c) 63 | then OK (W8# c) p t 64 | else Fail p t 65 | {-# inline nextWord8' #-} 66 | 67 | anyWord8 :: Parser s e Word8 68 | anyWord8 = Parser \p s -> case readWord8OffAddr# p 0# s of 69 | (# t, c #) -> if isTrue# (0## `neWord#` c) 70 | then OK (W8# c) (plusAddr# p 1#) t 71 | else Fail p t 72 | {-# inline anyWord8 #-} 73 | 74 | scan :: (Word8 -> Bool) -> Addr# -> State# s -> (# State# s, Addr# #) 75 | scan f = go where 76 | go p s = case readWord8OffAddr# p 0# s of 77 | (# t, c #) -> if isTrue# (0## `neWord#` c) && f (W8# c) 78 | then (# t, p #) 79 | else scan f (plusAddr# p 1#) t 80 | {-# inline scan #-} 81 | 82 | skipWhile :: (Word8 -> Bool) -> Parser s e () 83 | skipWhile f = Parser \p s -> case scan f p s of 84 | (# t, q #) -> OK () q t 85 | {-# inline [1] skipWhile #-} 86 | 87 | {-# RULES 88 | "skipWhile (x/=)" forall x. 89 | skipWhile (x `neWord8`) = skipTillWord8 x 90 | "skipWhile (/=x)" forall x. 91 | skipWhile (`neWord8` x) = skipTillWord8 x 92 | #-} 93 | 94 | skipTill :: (Word8 -> Bool) -> Parser s e () 95 | skipTill p = skipWhile (not . p) 96 | {-# inline [1] skipTill #-} 97 | 98 | {-# RULES 99 | "skipTill (x==)" forall x. 100 | skipTill (x `eqWord8`) = skipTillWord8 x 101 | "skipWhile (==x)" forall x. 102 | skipWhile (`eqWord8` x) = skipTillWord8 x 103 | #-} 104 | 105 | skipTillSome :: (Word8 -> Bool) -> Parser s e () 106 | skipTillSome p = skipWhileSome (not . p) 107 | {-# inline skipTillSome #-} 108 | 109 | foreign import ccall "parsnip.h" strchr0 :: Addr# -> Char# -> IO (Ptr Word8) -- lazy reimport is lazy 110 | 111 | skipTillWord8 :: Word8 -> Parser s e () 112 | skipTillWord8 (W8# c) = Parser $ \p s -> case io (strchr0 p (chr# (word2Int# c))) s of -- lazy cast is lazy 113 | (# t, Ptr q #) -> OK () q t 114 | {-# inline skipTillWord8 #-} 115 | 116 | skipWhileSome :: (Word8 -> Bool) -> Parser s e () 117 | skipWhileSome p = satisfy p *> skipWhile p 118 | {-# inline skipWhileSome #-} 119 | 120 | while :: KnownBase s => (Word8 -> Bool) -> Parser s e ByteString 121 | while f = snipping (skipWhile f) 122 | {-# inline while #-} 123 | 124 | till :: KnownBase s => (Word8 -> Bool) -> Parser s e ByteString 125 | till p = snipping (skipTill p) 126 | {-# inline till #-} 127 | 128 | tillWord8 :: KnownBase s => Word8 -> Parser s e ByteString 129 | tillWord8 c = snipping (skipTillWord8 c) 130 | {-# inline tillWord8 #-} 131 | 132 | whileSome :: KnownBase s => (Word8 -> Bool) -> Parser s e ByteString 133 | whileSome p = snipping (skipWhileSome p) 134 | {-# inline whileSome #-} 135 | 136 | tillSome :: KnownBase s => (Word8 -> Bool) -> Parser s e ByteString 137 | tillSome p = snipping (skipTillSome p) 138 | {-# inline tillSome #-} 139 | 140 | -- | Peek at the previous character. Always succeeds. 141 | previousWord8 :: forall s e. KnownBase s => Parser s e (Maybe Word8) 142 | previousWord8 = case reflectBase @s of 143 | !(Base _ _ l _) -> Parser \p s -> 144 | if isTrue# (ltAddr# l p) 145 | then case readWord8OffAddr# p (-1#) s of 146 | (# t, c #) -> OK (Just (W8# c)) p t 147 | else OK Nothing p s 148 | {-# inline previousWord8 #-} 149 | 150 | -- | Peek at the previous character. Fails if we're at the start of input. 151 | previousWord8' :: forall s e. KnownBase s => Parser s e Word8 152 | previousWord8' = case reflectBase @s of 153 | !(Base _ _ l _) -> Parser \p s -> 154 | if isTrue# (ltAddr# l p) 155 | then case readWord8OffAddr# p (-1#) s of 156 | (# t, c #) -> OK (W8# c) p t 157 | else Fail p s 158 | {-# inline previousWord8' #-} 159 | -------------------------------------------------------------------------------- /src/Text/Parsnip/Word8/Binary.hs: -------------------------------------------------------------------------------- 1 | {-# language MagicHash #-} 2 | {-# language BlockArguments #-} 3 | {-# language UnboxedTuples #-} 4 | {-# language BangPatterns #-} 5 | {-# language TypeApplications #-} 6 | {-# language NegativeLiterals #-} 7 | {-# language UnliftedFFITypes #-} 8 | {-# language ScopedTypeVariables #-} 9 | {-# language ForeignFunctionInterface #-} 10 | 11 | -- | These combinators allow handling of embedded null characters. 12 | -- 13 | -- However, they do assume that the string is null terminated to 14 | -- accelerate access, and to properly intermix with other combinators 15 | -- which means we still have to do an initial copy. 16 | -- 17 | -- Use 'binaryEof' instead of 'eof' to check for end-of-file, as 18 | -- it understands embdded nulls. 19 | module Text.Parsnip.Word8.Binary 20 | ( satisfy 21 | , word8 22 | , notWord8 23 | , anyWord8 24 | , while, whileSome 25 | , till, tillSome, tillWord8 26 | , skipWhile, skipWhileSome 27 | , skipTill, skipTillSome, skipTillWord8 28 | , previousWord8, previousWord8' 29 | , nextWord8, nextWord8' 30 | , binaryEof 31 | ) where 32 | 33 | import Data.ByteString (ByteString) 34 | import Data.Word 35 | import Foreign.C.Types 36 | import GHC.Prim 37 | import GHC.Ptr 38 | import GHC.Types 39 | import GHC.Word 40 | import Text.Parsnip.Internal.Parser 41 | import Text.Parsnip.Internal.Private 42 | import Text.Parsnip.Parser 43 | 44 | -------------------------------------------------------------------------------- 45 | -- * Word8 parsers that handle embedded nulls 46 | -------------------------------------------------------------------------------- 47 | 48 | satisfy :: forall s e. KnownBase s => (Word8 -> Bool) -> Parser s e Word8 49 | satisfy f = Parser \p s -> case readWord8OffAddr# p 0# s of 50 | (# t, c #) -> if f (W8# c) 51 | then if isTrue# (0## `neWord#` c) || isTrue# (neAddr# p (end @s)) 52 | then OK (W8# c) (plusAddr# p 1#) t 53 | else Fail p t 54 | else Fail p t 55 | {-# inline satisfy #-} 56 | 57 | notWord8 :: forall s e. KnownBase s => Word8 -> Parser s e Word8 58 | notWord8 (W8# c) = Parser \p s -> case readWord8OffAddr# p 0# s of 59 | (# t, c' #) -> 60 | if isTrue# (c `neWord#` c') 61 | then if isTrue# (0## `neWord#` c') || isTrue# (neAddr# p (end @s)) 62 | then OK (W8# c') (plusAddr# p 1#) t 63 | else Fail p t 64 | else Fail p t 65 | {-# inline notWord8 #-} 66 | 67 | nextWord8 :: forall s e. KnownBase s => Parser s e (Maybe Word8) 68 | nextWord8 = Parser \p s -> case readWord8OffAddr# p 0# s of 69 | (# t, c #) -> OK 70 | ( if isTrue# (0## `neWord#` c) || isTrue# (neAddr# p (end @s)) 71 | then Just (W8# c) 72 | else Nothing 73 | ) 74 | p 75 | t 76 | {-# inline nextWord8 #-} 77 | 78 | nextWord8' :: forall s e. KnownBase s => Parser s e Word8 79 | nextWord8' = Parser \p s -> case readWord8OffAddr# p 0# s of 80 | (# t, c #) -> 81 | if isTrue# (0## `neWord#` c) || isTrue# (neAddr# p (end @s)) 82 | then OK (W8# c) p t 83 | else Fail p t 84 | {-# inline nextWord8' #-} 85 | 86 | anyWord8 :: forall s e. KnownBase s => Parser s e Word8 87 | anyWord8 = Parser \p s -> case readWord8OffAddr# p 0# s of 88 | (# t, c #) -> 89 | if isTrue# (0## `neWord#` c) || isTrue# (neAddr# p (end @s)) 90 | then OK (W8# c) (plusAddr# p 1#) t 91 | else Fail p t 92 | {-# inline anyWord8 #-} 93 | 94 | scan :: forall s. KnownBase s => (Word8 -> Bool) -> Addr# -> State# s -> (# State# s, Addr# #) 95 | scan f = go where 96 | go p s = case readWord8OffAddr# p 0# s of 97 | (# t, c #) -> 98 | if (isTrue# (0## `neWord#` c) || isTrue# (neAddr# p (end @s))) && f (W8# c) 99 | then (# t, p #) 100 | else scan f (plusAddr# p 1#) t 101 | {-# inline scan #-} 102 | 103 | skipWhile :: KnownBase s => (Word8 -> Bool) -> Parser s e () 104 | skipWhile f = Parser \p s -> case scan f p s of 105 | (# t, q #) -> OK () q t 106 | {-# inline [1] skipWhile #-} 107 | 108 | {-# RULES 109 | "skipWhile (x/=)" forall x. 110 | skipWhile (x `neWord8`) = skipTillWord8 x 111 | "skipWhile (/=x)" forall x. 112 | skipWhile (`neWord8` x) = skipTillWord8 x 113 | #-} 114 | 115 | skipTill :: KnownBase s => (Word8 -> Bool) -> Parser s e () 116 | skipTill p = skipWhile (not . p) 117 | {-# inline [1] skipTill #-} 118 | 119 | {-# RULES 120 | "skipTill (x==)" forall x. 121 | skipTill (x `eqWord8`) = skipTillWord8 x 122 | "skipWhile (==x)" forall x. 123 | skipWhile (`eqWord8` x) = skipTillWord8 x 124 | #-} 125 | 126 | skipTillSome :: KnownBase s => (Word8 -> Bool) -> Parser s e () 127 | skipTillSome p = skipWhileSome (not . p) 128 | {-# inline skipTillSome #-} 129 | 130 | foreign import ccall "string.h" memchr :: Addr# -> CInt -> CSize -> IO (Ptr Word8) 131 | 132 | skipTillWord8 :: forall s e. KnownBase s => Word8 -> Parser s e () 133 | skipTillWord8 w = Parser $ \p s -> case io (memchr p (fromIntegral w) (fromIntegral $ I# (minusAddr# (end @s) p))) s of 134 | (# t, Ptr q #) -> OK () q t 135 | {-# inline skipTillWord8 #-} 136 | 137 | skipWhileSome :: KnownBase s => (Word8 -> Bool) -> Parser s e () 138 | skipWhileSome p = satisfy p *> skipWhile p 139 | {-# inline skipWhileSome #-} 140 | 141 | while :: KnownBase s => (Word8 -> Bool) -> Parser s e ByteString 142 | while f = snipping (skipWhile f) 143 | {-# inline while #-} 144 | 145 | till :: KnownBase s => (Word8 -> Bool) -> Parser s e ByteString 146 | till p = snipping (skipTill p) 147 | {-# inline till #-} 148 | 149 | tillWord8 :: KnownBase s => Word8 -> Parser s e ByteString 150 | tillWord8 c = snipping (skipTillWord8 c) 151 | {-# inline tillWord8 #-} 152 | 153 | whileSome :: KnownBase s => (Word8 -> Bool) -> Parser s e ByteString 154 | whileSome p = snipping (skipWhileSome p) 155 | {-# inline whileSome #-} 156 | 157 | tillSome :: KnownBase s => (Word8 -> Bool) -> Parser s e ByteString 158 | tillSome p = snipping (skipTillSome p) 159 | {-# inline tillSome #-} 160 | 161 | -- | Peek at the previous character. Always succeeds. 162 | previousWord8 :: forall s e. KnownBase s => Parser s e (Maybe Word8) 163 | previousWord8 = case reflectBase @s of 164 | !(Base _ _ l _) -> Parser \p s -> 165 | if isTrue# (ltAddr# l p) 166 | then case readWord8OffAddr# p (-1#) s of 167 | (# t, c #) -> OK (Just (W8# c)) p t 168 | else OK Nothing p s 169 | {-# inline previousWord8 #-} 170 | 171 | -- | Peek at the previous character. Fails if we're at the start of input. 172 | previousWord8' :: forall s e. KnownBase s => Parser s e Word8 173 | previousWord8' = case reflectBase @s of 174 | !(Base _ _ l _) -> Parser \p s -> 175 | if isTrue# (ltAddr# l p) 176 | then case readWord8OffAddr# p (-1#) s of 177 | (# t, c #) -> OK (W8# c) p t 178 | else Fail p s 179 | {-# inline previousWord8' #-} 180 | 181 | -- This version of 'eof' is not fooled by embedded nulls. 182 | binaryEof :: forall s e. KnownBase s => Parser s e () 183 | binaryEof = Parser \p s -> case readWord8OffAddr# p 0# s of 184 | (# t, c #) -> 185 | if isTrue# (0## `eqWord#` c) && isTrue# (eqAddr# p (end @s)) 186 | then OK () p t 187 | else Fail p t 188 | {-# inline binaryEof #-} 189 | --------------------------------------------------------------------------------