├── .gitignore
├── CHANGELOG.md
├── LICENSE.md
├── Makefile
├── README.md
├── Setup.hs
├── cabal.project
├── cbits
    ├── parsnip.c
    └── parsnip.h
├── parsnip.cabal
└── src
    └── Text
        ├── Parsnip.hs
        └── Parsnip
            ├── Char8.hs
            ├── Internal
                ├── Mark.hs
                ├── Parser.hs
                ├── Private.hs
                └── Simple.hs
            ├── Location.hs
            ├── Parser.hs
            ├── Word8.hs
            └── Word8
                └── Binary.hs


/.gitignore:
--------------------------------------------------------------------------------
1 | dist-newstyle
2 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | 0.1 [unreleased]
 2 | ----------------
 3 | * Added 'Text.Parsnip.Word8.Binary' to support parsing files with embedded nulls.
 4 | * Removed support for older versions of 'bytestring'.
 5 | * Moved to a separate repository from `codex`.
 6 | * Added an 'e' parameter to 'Parser' to allow propagation of user errors.
 7 | 
 8 | 0 [2021.03.23]
 9 | --------------
10 | * Split off from `engine`
11 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
  1 | # License
  2 | 
  3 | Licensed under either of
  4 |  * Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
  5 |  * BSD 2-Clause license (https://opensource.org/licenses/BSD-2-Clause)
  6 | at your option.
  7 | 
  8 | ## BSD 2-Clause License
  9 | 
 10 | - Copyright 2019 Edward Kmett and Sean Chalmers
 11 | 
 12 | All rights reserved.
 13 | 
 14 | Redistribution and use in source and binary forms, with or without
 15 | modification, are permitted provided that the following conditions
 16 | are met:
 17 | 
 18 | 1. Redistributions of source code must retain the above copyright
 19 |    notice, this list of conditions and the following disclaimer.
 20 | 
 21 | 2. Redistributions in binary form must reproduce the above copyright
 22 |    notice, this list of conditions and the following disclaimer in the
 23 |    documentation and/or other materials provided with the distribution.
 24 | 
 25 | THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
 26 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 27 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 28 | DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
 29 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 30 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 31 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 32 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 33 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 34 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 35 | POSSIBILITY OF SUCH DAMAGE.
 36 | 
 37 | ## Apache License
 38 | 
 39 | _Version 2.0, January 2004_  
 40 | _&lt;<http://www.apache.org/licenses/>&gt;_
 41 | 
 42 | ### Terms and Conditions for use, reproduction, and distribution
 43 | 
 44 | #### 1. Definitions
 45 | 
 46 | “License” shall mean the terms and conditions for use, reproduction, and
 47 | distribution as defined by Sections 1 through 9 of this document.
 48 | 
 49 | “Licensor” shall mean the copyright owner or entity authorized by the copyright
 50 | owner that is granting the License.
 51 | 
 52 | “Legal Entity” shall mean the union of the acting entity and all other entities
 53 | that control, are controlled by, or are under common control with that entity.
 54 | For the purposes of this definition, “control” means **(i)** the power, direct or
 55 | indirect, to cause the direction or management of such entity, whether by
 56 | contract or otherwise, or **(ii)** ownership of fifty percent (50%) or more of the
 57 | outstanding shares, or **(iii)** beneficial ownership of such entity.
 58 | 
 59 | “You” (or “Your”) shall mean an individual or Legal Entity exercising
 60 | permissions granted by this License.
 61 | 
 62 | “Source” form shall mean the preferred form for making modifications, including
 63 | but not limited to software source code, documentation source, and configuration
 64 | files.
 65 | 
 66 | “Object” form shall mean any form resulting from mechanical transformation or
 67 | translation of a Source form, including but not limited to compiled object code,
 68 | generated documentation, and conversions to other media types.
 69 | 
 70 | “Work” shall mean the work of authorship, whether in Source or Object form, made
 71 | available under the License, as indicated by a copyright notice that is included
 72 | in or attached to the work (an example is provided in the Appendix below).
 73 | 
 74 | “Derivative Works” shall mean any work, whether in Source or Object form, that
 75 | is based on (or derived from) the Work and for which the editorial revisions,
 76 | annotations, elaborations, or other modifications represent, as a whole, an
 77 | original work of authorship. For the purposes of this License, Derivative Works
 78 | shall not include works that remain separable from, or merely link (or bind by
 79 | name) to the interfaces of, the Work and Derivative Works thereof.
 80 | 
 81 | “Contribution” shall mean any work of authorship, including the original version
 82 | of the Work and any modifications or additions to that Work or Derivative Works
 83 | thereof, that is intentionally submitted to Licensor for inclusion in the Work
 84 | by the copyright owner or by an individual or Legal Entity authorized to submit
 85 | on behalf of the copyright owner. For the purposes of this definition,
 86 | “submitted” means any form of electronic, verbal, or written communication sent
 87 | to the Licensor or its representatives, including but not limited to
 88 | communication on electronic mailing lists, source code control systems, and
 89 | issue tracking systems that are managed by, or on behalf of, the Licensor for
 90 | the purpose of discussing and improving the Work, but excluding communication
 91 | that is conspicuously marked or otherwise designated in writing by the copyright
 92 | owner as “Not a Contribution.”
 93 | 
 94 | “Contributor” shall mean Licensor and any individual or Legal Entity on behalf
 95 | of whom a Contribution has been received by Licensor and subsequently
 96 | incorporated within the Work.
 97 | 
 98 | #### 2. Grant of Copyright License
 99 | 
100 | Subject to the terms and conditions of this License, each Contributor hereby
101 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
102 | irrevocable copyright license to reproduce, prepare Derivative Works of,
103 | publicly display, publicly perform, sublicense, and distribute the Work and such
104 | Derivative Works in Source or Object form.
105 | 
106 | #### 3. Grant of Patent License
107 | 
108 | Subject to the terms and conditions of this License, each Contributor hereby
109 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
110 | irrevocable (except as stated in this section) patent license to make, have
111 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where
112 | such license applies only to those patent claims licensable by such Contributor
113 | that are necessarily infringed by their Contribution(s) alone or by combination
114 | of their Contribution(s) with the Work to which such Contribution(s) was
115 | submitted. If You institute patent litigation against any entity (including a
116 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a
117 | Contribution incorporated within the Work constitutes direct or contributory
118 | patent infringement, then any patent licenses granted to You under this License
119 | for that Work shall terminate as of the date such litigation is filed.
120 | 
121 | #### 4. Redistribution
122 | 
123 | You may reproduce and distribute copies of the Work or Derivative Works thereof
124 | in any medium, with or without modifications, and in Source or Object form,
125 | provided that You meet the following conditions:
126 | 
127 | * **(a)** You must give any other recipients of the Work or Derivative Works a copy of
128 | this License; and
129 | * **(b)** You must cause any modified files to carry prominent notices stating that You
130 | changed the files; and
131 | * **(c)** You must retain, in the Source form of any Derivative Works that You distribute,
132 | all copyright, patent, trademark, and attribution notices from the Source form
133 | of the Work, excluding those notices that do not pertain to any part of the
134 | Derivative Works; and
135 | * **(d)** If the Work includes a “NOTICE” text file as part of its distribution, then any
136 | Derivative Works that You distribute must include a readable copy of the
137 | attribution notices contained within such NOTICE file, excluding those notices
138 | that do not pertain to any part of the Derivative Works, in at least one of the
139 | following places: within a NOTICE text file distributed as part of the
140 | Derivative Works; within the Source form or documentation, if provided along
141 | with the Derivative Works; or, within a display generated by the Derivative
142 | Works, if and wherever such third-party notices normally appear. The contents of
143 | the NOTICE file are for informational purposes only and do not modify the
144 | License. You may add Your own attribution notices within Derivative Works that
145 | You distribute, alongside or as an addendum to the NOTICE text from the Work,
146 | provided that such additional attribution notices cannot be construed as
147 | modifying the License.
148 | 
149 | You may add Your own copyright statement to Your modifications and may provide
150 | additional or different license terms and conditions for use, reproduction, or
151 | distribution of Your modifications, or for any such Derivative Works as a whole,
152 | provided Your use, reproduction, and distribution of the Work otherwise complies
153 | with the conditions stated in this License.
154 | 
155 | #### 5. Submission of Contributions
156 | 
157 | Unless You explicitly state otherwise, any Contribution intentionally submitted
158 | for inclusion in the Work by You to the Licensor shall be under the terms and
159 | conditions of this License, without any additional terms or conditions.
160 | Notwithstanding the above, nothing herein shall supersede or modify the terms of
161 | any separate license agreement you may have executed with Licensor regarding
162 | such Contributions.
163 | 
164 | #### 6. Trademarks
165 | 
166 | This License does not grant permission to use the trade names, trademarks,
167 | service marks, or product names of the Licensor, except as required for
168 | reasonable and customary use in describing the origin of the Work and
169 | reproducing the content of the NOTICE file.
170 | 
171 | #### 7. Disclaimer of Warranty
172 | 
173 | Unless required by applicable law or agreed to in writing, Licensor provides the
174 | Work (and each Contributor provides its Contributions) on an “AS IS” BASIS,
175 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
176 | including, without limitation, any warranties or conditions of TITLE,
177 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
178 | solely responsible for determining the appropriateness of using or
179 | redistributing the Work and assume any risks associated with Your exercise of
180 | permissions under this License.
181 | 
182 | #### 8. Limitation of Liability
183 | 
184 | In no event and under no legal theory, whether in tort (including negligence),
185 | contract, or otherwise, unless required by applicable law (such as deliberate
186 | and grossly negligent acts) or agreed to in writing, shall any Contributor be
187 | liable to You for damages, including any direct, indirect, special, incidental,
188 | or consequential damages of any character arising as a result of this License or
189 | out of the use or inability to use the Work (including but not limited to
190 | damages for loss of goodwill, work stoppage, computer failure or malfunction, or
191 | any and all other commercial damages or losses), even if such Contributor has
192 | been advised of the possibility of such damages.
193 | 
194 | #### 9. Accepting Warranty or Additional Liability
195 | 
196 | While redistributing the Work or Derivative Works thereof, You may choose to
197 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or
198 | other liability obligations and/or rights consistent with this License. However,
199 | in accepting such obligations, You may act only on Your own behalf and on Your
200 | sole responsibility, not on behalf of any other Contributor, and only if You
201 | agree to indemnify, defend, and hold each Contributor harmless for any liability
202 | incurred by, or claims asserted against, such Contributor by reason of your
203 | accepting any such warranty or additional liability.
204 | 
205 | _END OF TERMS AND CONDITIONS_
206 | 
207 | ### APPENDIX: How to apply the Apache License to your work
208 | 
209 | To apply the Apache License to your work, attach the following boilerplate
210 | notice, with the fields enclosed by brackets `[]` replaced with your own
211 | identifying information. (Don't include the brackets!) The text should be
212 | enclosed in the appropriate comment syntax for the file format. We also
213 | recommend that a file or class name and description of purpose be included on
214 | the same “printed page” as the copyright notice for easier identification within
215 | third-party archives.
216 | 
217 |     Copyright [yyyy] [name of copyright owner]
218 |     
219 |     Licensed under the Apache License, Version 2.0 (the "License");
220 |     you may not use this file except in compliance with the License.
221 |     You may obtain a copy of the License at
222 |     
223 |       http://www.apache.org/licenses/LICENSE-2.0
224 |     
225 |     Unless required by applicable law or agreed to in writing, software
226 |     distributed under the License is distributed on an "AS IS" BASIS,
227 |     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
228 |     See the License for the specific language governing permissions and
229 |     limitations under the License.
230 | 
231 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | PWD=$(shell pwd)
 2 | LIBRARY=$(shell basename $(PWD))
 3 | 
 4 | all:
 5 | 	cabal v2-build
 6 | 
 7 | install:
 8 | 	cabal v2-install
 9 | 
10 | run:
11 | 	cabal v2-run example
12 | 
13 | repl:
14 | 	cabal v2-repl $(LIBRARY) --repl-options=-v1 --repl-options=-ferror-spans --repl-options=-j
15 | 
16 | watch:
17 | 	ghcid -p $(LIBRARY) --color -c "cabal v2-repl $(LIBRARY) --repl-options=-fno-code --repl-options=-fno-break-on-exception --repl-options=-fno-break-on-error --repl-options=-v1 --repl-options=-ferror-spans --repl-options=-j" --restart *.cabal
18 | 
19 | lint:
20 | 	find src -name "*.hs*" -print | xargs hlint
21 | 
22 | .PHONY: all install run repl watch lint
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | parsnip
 2 | =====
 3 | 
 4 | [![Hackage](https://img.shields.io/hackage/v/parsnip.svg)](https://hackage.haskell.org/package/parsnip)
 5 | 
 6 | This is a rather minimal parsing library.
 7 | 
 8 | Use with a library like `parsers` or `parser-combinators` to fill in the missing functionality.
 9 | 
10 | Contact Information
11 | -------------------
12 | 
13 | Contributions and bug reports are welcome!
14 | 
15 | Please feel free to contact me through github or on the #haskell IRC channel on irc.freenode.net.
16 | 
17 | -Edward Kmett
18 | 
19 | 


--------------------------------------------------------------------------------
/Setup.hs:
--------------------------------------------------------------------------------
1 | import Distribution.Simple
2 | main = defaultMain
3 | 


--------------------------------------------------------------------------------
/cabal.project:
--------------------------------------------------------------------------------
1 | packages: .
2 | 


--------------------------------------------------------------------------------
/cbits/parsnip.c:
--------------------------------------------------------------------------------
 1 | #include <stdint.h>
 2 | #include "parsnip.h"
 3 | 
 4 | /* find the first occurence of a given character or the location of the terminating null in a string. */
 5 | 
 6 | char * strchr0 (char *s, HsChar hc) {
 7 |   char c = (char)hc;
 8 |   for (; ((uint64_t)s & 7) != 0; ++s)
 9 |     if (*s == c || *s == '\0')
10 |       return s;
11 | 
12 |   uint64_t *p = (uint64_t*)s;
13 |   uint64_t magic = (uint64_t)(-1) / 0xff * 0xfe << 1 >> 1 | 1;
14 | 
15 |   uint64_t mask = (uint64_t)c;
16 |   mask |= mask << 8;
17 |   mask |= mask << 16;
18 |   mask |= mask << 32;
19 | 
20 |   for (;;) {
21 |     uint64_t w = *p++;
22 |     if ((((w + magic) ^ ~w) & ~magic) != 0 || ((((w ^ mask) + magic) ^ ~(w ^ mask)) & ~magic) != 0) {
23 |       s = (char*)(p-1);
24 |       if (*s == c || *s == '\0') return s;
25 |       if (*++s == c) return s;
26 |       if (*s == '\0') return s;
27 |       if (*++s == c) return s;
28 |       if (*s == '\0') return s;
29 |       if (*++s == c) return s;
30 |       if (*s == '\0') return s;
31 |       if (*++s == c) return s;
32 |       if (*s == '\0') return s;
33 |       if (*++s == c) return s;
34 |       if (*s == '\0') return s;
35 |       if (*++s == c) return s;
36 |       if (*s == '\0') return s;
37 |       if (*++s == c) return s;
38 |       if (*s == '\0') return s;
39 |     }
40 |   }
41 | }
42 | 


--------------------------------------------------------------------------------
/cbits/parsnip.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "HsFFI.h"
3 | 
4 | extern char * strchr0(char * s, HsChar c);
5 | 


--------------------------------------------------------------------------------
/parsnip.cabal:
--------------------------------------------------------------------------------
 1 | cabal-version: 2.2
 2 | name: parsnip
 3 | version: 0.1
 4 | synopsis: A fast, minimal parser
 5 | description:
 6 |   A fast, minimal parser.
 7 |   .
 8 |   @parsnip@ parses null-terminated input strings with an absolute minimum of
 9 |   internal state. It copies the input to a c string and works its way through
10 |   hand-over-hand with just an @Addr#@ as the internal state.
11 |   .
12 |   It uses @reflection@-like tricks to lift the input 'ByteString' into a
13 |   constraint that is then passed to just the combinators that need access
14 |   to it to compute position, snip out slices of the original, or ask the
15 |   number of bytes remaining, but none of the monadic combinators nor simple
16 |   'satisfy' or 'eof' predicates need access to this information.
17 |   .
18 |   Because of the null-termination, we can't assume that we can handle binary
19 |   inputs, but this works well for text-based source languages.
20 | homepage: https://github.com/ekmett/parsnip
21 | license: BSD-2-Clause OR Apache-2.0
22 | license-file: LICENSE.md
23 | author: Edward Kmett
24 | maintainer: Edward Kmett <ekmett@gmail.com>
25 | copyright: Copyright (c) 2019-2021 Edward Kmett
26 | stability: experimental
27 | category: Parsing, Text
28 | build-type: Simple
29 | extra-doc-files:
30 |   README.md,
31 |   CHANGELOG.md
32 | extra-source-files:
33 |   cbits/*.h
34 |   cbits/*.c
35 | 
36 | source-repository head
37 |   type: git
38 |   location: https://github.com/ekmett/parsnip
39 | 
40 | common base
41 |   default-language: Haskell2010
42 |   ghc-options:
43 |     -Wall -Wincomplete-uni-patterns -Wincomplete-record-updates
44 |     -Wredundant-constraints -Widentities -Wmissing-export-lists
45 |   include-dirs: cbits
46 |   c-sources: cbits/parsnip.c
47 |   build-depends:
48 |     attoparsec,
49 |     base >= 4.15 && < 5,
50 |     bytestring ^>= 0.11,
51 |     containers ^>= 0.6,
52 |     data-default < 0.8,
53 |     ghc-prim,
54 |     primitive ^>= 0.7
55 | 
56 | library
57 |   import: base
58 |   hs-source-dirs: src
59 |   exposed-modules:
60 |     Text.Parsnip
61 |     Text.Parsnip.Char8
62 |     Text.Parsnip.Location
63 |     Text.Parsnip.Parser
64 |     Text.Parsnip.Word8
65 |     Text.Parsnip.Word8.Binary
66 |     Text.Parsnip.Internal.Mark
67 |     Text.Parsnip.Internal.Parser
68 |     Text.Parsnip.Internal.Private
69 |     Text.Parsnip.Internal.Simple
70 | 


--------------------------------------------------------------------------------
/src/Text/Parsnip.hs:
--------------------------------------------------------------------------------
 1 | module Text.Parsnip
 2 | ( module Text.Parsnip.Char8
 3 | , module Text.Parsnip.Location
 4 | , module Text.Parsnip.Parser
 5 | ) where
 6 | 
 7 | import Text.Parsnip.Char8
 8 | import Text.Parsnip.Location
 9 | import Text.Parsnip.Parser
10 | 


--------------------------------------------------------------------------------
/src/Text/Parsnip/Char8.hs:
--------------------------------------------------------------------------------
  1 | {-# language MagicHash #-}
  2 | {-# language BlockArguments #-}
  3 | {-# language UnboxedTuples #-}
  4 | {-# language BangPatterns #-}
  5 | {-# language TypeApplications #-}
  6 | {-# language NegativeLiterals #-}
  7 | {-# language UnliftedFFITypes #-}
  8 | {-# language ScopedTypeVariables #-}
  9 | {-# language ForeignFunctionInterface #-}
 10 | module Text.Parsnip.Char8
 11 | ( satisfy
 12 | , char
 13 | , notChar
 14 | , anyChar
 15 | , digit
 16 | , space
 17 | , skipSpace
 18 | , letter_ascii
 19 | , letter_iso8859_15
 20 | , while, whileSome
 21 | , till, tillSome, tillChar
 22 | , skipWhile, skipWhileSome
 23 | , skipTill, skipTillSome, skipTillChar
 24 | , previousChar, previousChar'
 25 | , nextChar, nextChar'
 26 | ) where
 27 | 
 28 | import Control.Applicative
 29 | import qualified Data.Attoparsec.ByteString.Char8 as A
 30 | import Data.ByteString (ByteString)
 31 | import Data.Word
 32 | import GHC.Char
 33 | import GHC.Prim
 34 | import GHC.Ptr
 35 | import GHC.Types
 36 | import Text.Parsnip.Internal.Parser
 37 | import Text.Parsnip.Internal.Private
 38 | import Text.Parsnip.Parser
 39 | 
 40 | --------------------------------------------------------------------------------
 41 | -- * Char parsers
 42 | --------------------------------------------------------------------------------
 43 | 
 44 | satisfy :: (Char -> Bool) -> Parser s e Char
 45 | satisfy f = Parser \p s -> case readCharOffAddr# p 0# s of
 46 |   (# t, c #) -> if isTrue# (chr# 0# `neChar#` c) && f (C# c)
 47 |     then OK (C# c) (plusAddr# p 1#) t
 48 |     else Fail p t
 49 | {-# inline satisfy #-}
 50 | 
 51 | char :: Char -> Parser s e Char
 52 | char '\0' = empty
 53 | char r@(C# c) = Parser \p s -> case readCharOffAddr# p 0# s of
 54 |   (# t, c' #) -> if isTrue# (eqChar# c c')
 55 |     then OK r (plusAddr# p 1#) t
 56 |     else Fail p t
 57 | {-# inline char #-}
 58 | 
 59 | notChar :: Char -> Parser s e Char
 60 | notChar '\0' = anyChar
 61 | notChar (C# c) = Parser \p s -> case readCharOffAddr# p 0# s of
 62 |   (# t, c' #) -> if isTrue# (chr# 0# `neChar#` c') && isTrue# (neChar# c c')
 63 |     then OK (C# c') (plusAddr# p 1#) t
 64 |     else Fail p t
 65 | {-# inline notChar #-}
 66 | 
 67 | nextChar :: Parser s e (Maybe Char)
 68 | nextChar = Parser \p s -> case readCharOffAddr# p 0# s of
 69 |   (# t, c #) -> OK (if isTrue# (chr# 0# `neChar#` c) then Just (C# c) else Nothing) p t
 70 | {-# inline nextChar #-}
 71 | 
 72 | nextChar' :: Parser s e Char
 73 | nextChar' = Parser \p s -> case readCharOffAddr# p 0# s of
 74 |   (# t, c #) -> if isTrue# (chr# 0# `neChar#` c)
 75 |     then OK (C# c) p t
 76 |     else Fail p t
 77 | {-# inline nextChar' #-}
 78 | 
 79 | anyChar :: Parser s e Char
 80 | anyChar = Parser \p s -> case readCharOffAddr# p 0# s of
 81 |   (# t, c #) -> if isTrue# (chr# 0# `neChar#` c)
 82 |     then OK (C# c) (plusAddr# p 1#) t
 83 |     else Fail p t
 84 | {-# inline anyChar #-}
 85 | 
 86 | digit :: Parser s e Char
 87 | digit = satisfy A.isDigit 
 88 | {-# inline digit #-}
 89 | 
 90 | space :: Parser s e Char
 91 | space = satisfy A.isSpace
 92 | {-# inline space #-}
 93 | 
 94 | skipSpace :: Parser s e ()
 95 | skipSpace = skipWhile A.isSpace
 96 | {-# inline skipSpace #-}
 97 | 
 98 | letter_ascii :: Parser s e Char
 99 | letter_ascii = satisfy A.isAlpha_ascii
100 | {-# inline letter_ascii #-}
101 | 
102 | letter_iso8859_15:: Parser s e Char
103 | letter_iso8859_15 = satisfy A.isAlpha_iso8859_15
104 | {-# inline letter_iso8859_15 #-}
105 | 
106 | scan :: (Char -> Bool) -> Addr# -> State# s -> (# State# s, Addr# #)
107 | scan f = go where
108 |   go p s = case readCharOffAddr# p 0# s of
109 |     (# t, c #) -> if isTrue# (chr# 0# `neChar#` c) && f (C# c)
110 |       then (# t, p #)
111 |       else scan f (plusAddr# p 1#) t
112 | {-# inline scan #-}
113 | 
114 | skipWhile :: (Char -> Bool) -> Parser s e ()
115 | skipWhile f = Parser \p s -> case scan f p s of
116 |   (# t, q #) -> OK () q t
117 | {-# inline [1] skipWhile #-}  
118 | 
119 | {-# RULES
120 | "skipWhile (x/=)" forall x.
121 |   skipWhile (x `neChar`) = skipTillChar x
122 | "skipWhile (/=x)" forall x.
123 |   skipWhile (`neChar` x) = skipTillChar x
124 |   #-}
125 | 
126 | skipTill :: (Char -> Bool) -> Parser s e ()
127 | skipTill p = skipWhile (not . p)
128 | {-# inline [1] skipTill #-}
129 | 
130 | {-# RULES
131 | "skipTill (x==)" forall x.
132 |   skipTill (x `eqChar`) = skipTillChar x
133 | "skipWhile (==x)" forall x.
134 |   skipWhile (`eqChar` x) = skipTillChar x
135 |   #-}
136 | 
137 | skipTillSome :: (Char -> Bool) -> Parser s e ()
138 | skipTillSome p = skipWhileSome (not . p)
139 | {-# inline skipTillSome #-}
140 | 
141 | foreign import ccall "parsnip.h" strchr0 :: Addr# -> Char# -> IO (Ptr Word8)
142 | 
143 | skipTillChar :: Char -> Parser s e ()
144 | skipTillChar (C# c) = Parser $ \p s -> case io (strchr0 p c) s of
145 |   (# t, Ptr q #) -> OK () q t
146 | {-# inline skipTillChar #-}
147 | 
148 | skipWhileSome :: (Char -> Bool) -> Parser s e ()
149 | skipWhileSome p = satisfy p *> skipWhile p
150 | {-# inline skipWhileSome #-}
151 | 
152 | while :: KnownBase s => (Char -> Bool) -> Parser s e ByteString
153 | while f = snipping (skipWhile f)
154 | {-# inline while #-}  
155 | 
156 | till :: KnownBase s => (Char -> Bool) -> Parser s e ByteString
157 | till p = snipping (skipTill p)
158 | {-# inline till #-}
159 | 
160 | tillChar :: KnownBase s => Char -> Parser s e ByteString
161 | tillChar c = snipping (skipTillChar c)
162 | {-# inline tillChar #-}
163 | 
164 | whileSome :: KnownBase s => (Char -> Bool) -> Parser s e ByteString
165 | whileSome p = snipping (skipWhileSome p)
166 | {-# inline whileSome #-}
167 | 
168 | tillSome :: KnownBase s => (Char -> Bool) -> Parser s e ByteString
169 | tillSome p = snipping (skipTillSome p)
170 | {-# inline tillSome #-}
171 | 
172 | -- Peek at the previous character. Always succeeds. 
173 | previousChar :: forall s e. KnownBase s => Parser s e (Maybe Char)
174 | previousChar = case reflectBase @s of
175 |   !(Base _ _ l _) -> Parser \p s ->
176 |     if isTrue# (ltAddr# l p)
177 |     then case readCharOffAddr# p (-1#) s of
178 |       (# t, c #) -> OK (Just (C# c)) p t
179 |     else OK Nothing p s
180 | 
181 | -- Peek at the previous character. Fails if we're at the start of input.
182 | previousChar' :: forall s e. KnownBase s => Parser s e Char
183 | previousChar' = case reflectBase @s of
184 |   !(Base _ _ l _) -> Parser \p s ->
185 |     if isTrue# (ltAddr# l p)
186 |     then case readCharOffAddr# p (-1#) s of
187 |       (# t, c #) -> OK (C# c) p t
188 |     else Fail p s
189 | 


--------------------------------------------------------------------------------
/src/Text/Parsnip/Internal/Mark.hs:
--------------------------------------------------------------------------------
  1 | {-# language MagicHash #-}
  2 | {-# language TypeApplications #-}
  3 | {-# language ScopedTypeVariables #-}
  4 | {-# language PatternSynonyms #-}
  5 | {-# language BlockArguments #-}
  6 | {-# language BangPatterns #-}
  7 | {-# language UnboxedTuples #-}
  8 | module Text.Parsnip.Internal.Mark
  9 | ( Mark(Mark,Mk)
 10 | , minusMark
 11 | , mark, release
 12 | , snip, snipping
 13 | ) where
 14 | 
 15 | import Data.ByteString as B
 16 | import Data.Word
 17 | import GHC.Arr
 18 | import GHC.Prim
 19 | import GHC.Ptr
 20 | import GHC.Types
 21 | import Text.Parsnip.Internal.Parser
 22 | import Text.Parsnip.Internal.Private
 23 | 
 24 | ---------------------------------------------------------------------------------------
 25 | -- * Marks
 26 | ---------------------------------------------------------------------------------------
 27 | 
 28 | newtype Mark s = Mark (Ptr Word8) -- unexposed, so known valid addresses
 29 |   deriving (Eq,Ord,Show)
 30 | 
 31 | pattern Mk :: Addr# -> Mark s
 32 | pattern Mk a = Mark (Ptr a)
 33 | {-# complete Mk #-} -- if only...
 34 | 
 35 | instance KnownBase s => Bounded (Mark s) where
 36 |   minBound = Mk (start @s)
 37 |   maxBound = Mk (end @s)
 38 |   {-# inline minBound #-}
 39 |   {-# inline maxBound #-}
 40 | 
 41 | instance KnownBase s => Enum (Mark s) where
 42 |   fromEnum p = minusMark p minBound
 43 |   toEnum = case reflectBase @s of
 44 |     !(Base _ _ l h) -> \(I# i) -> if isTrue# (0# <=# i) && isTrue# (i <=# minusAddr# h l)
 45 |       then Mk (plusAddr# l i)
 46 |       else error "Mark.toEnum: Out of bounds"
 47 |   succ (Mk p) = if isTrue# (ltAddr# p (end @s))
 48 |       then Mk (plusAddr# p 1#)
 49 |       else error "Mark.succ: Out of bounds"
 50 |   pred (Mk p) = if isTrue# (ltAddr# (start @s) p)
 51 |       then Mk (plusAddr# p (negateInt# 1#))
 52 |       else error "Mark.pred: Out of bounds"
 53 |   enumFrom (Mk p) = ptrs1 p (end @s)
 54 |   enumFromTo (Mk p) (Mk q) = ptrs1 p q
 55 |   enumFromThen = case reflectBase @s of
 56 |     !(Base _ _ l h) -> \(Mk p) (Mk q) -> if isTrue# (gtAddr# p q)
 57 |       then dptrs p (minusAddr# q p) l
 58 |       else ptrs p (minusAddr# q p) h
 59 |   enumFromThenTo (Mk p) (Mk q) (Mk r) = if isTrue# (gtAddr# p q)
 60 |     then dptrs p (minusAddr# q p) r
 61 |     else ptrs p (minusAddr# q p) r
 62 |   {-# inline fromEnum #-}
 63 |   {-# inline toEnum #-}
 64 |   {-# inline succ #-}
 65 |   {-# inline pred #-}
 66 |   {-# inline enumFrom #-}
 67 |   {-# inline enumFromTo #-}
 68 |   {-# inline enumFromThen #-}
 69 |   {-# inline enumFromThenTo #-}
 70 | 
 71 | instance Ix (Mark s) where
 72 |   range (Mk p, Mk q) = ptrs1 p q
 73 |   unsafeIndex (p,_) r = minusMark r p
 74 |   inRange (Mk p, Mk q) (Mk r) = isTrue# (leAddr# p r) && isTrue# (leAddr# r q)
 75 |   unsafeRangeSize = uncurry minusMark
 76 |   {-# inline range #-}
 77 |   {-# inline unsafeIndex #-}
 78 |   {-# inline inRange #-}
 79 |   {-# inline unsafeRangeSize #-}
 80 | 
 81 | ptrs1 :: Addr# -> Addr# -> [Mark s]
 82 | ptrs1 l h
 83 |   | isTrue# (leAddr# l h) = Mk l : ptrs1 (plusAddr# l 1#) h
 84 |   | otherwise = []
 85 | {-# inline ptrs1 #-}
 86 | 
 87 | ptrs :: Addr# -> Int# -> Addr# -> [Mark s]
 88 | ptrs l d h
 89 |   | isTrue# (leAddr# l h) = Mk l : ptrs (plusAddr# l d) d h
 90 |   | otherwise = []
 91 | {-# inline ptrs #-}
 92 | 
 93 | dptrs :: Addr# -> Int# -> Addr# -> [Mark s]
 94 | dptrs h d l
 95 |   | isTrue# (leAddr# l h) = Mark (Ptr h) : ptrs (plusAddr# h d) d l
 96 |   | otherwise = []
 97 | {-# inline dptrs #-}
 98 | 
 99 | minusMark :: Mark s -> Mark s -> Int
100 | minusMark (Mk p) (Mk q) = I# (minusAddr# p q)
101 | {-# inline minusMark #-}
102 | 
103 | -- | Record the current position
104 | mark :: Parser s e (Mark s)
105 | mark = Parser \p s -> OK (Mk p) p s
106 | {-# inline mark #-}
107 | 
108 | -- | Return to a previous location.
109 | release :: Mark s -> Parser s e ()
110 | release (Mk q) = Parser \_ s -> OK () q s
111 | {-# inline release #-}
112 | 
113 | -- | To grab all the text covered by a given parser, consider using @snipping@
114 | -- and applying it to a combinator simply recognizes the content rather than returns
115 | -- it. 'snipping' a 'ByteString' is significantly cheaper than assembling one from
116 | -- smaller fragments.
117 | snip :: forall s. KnownBase s => Mark s -> Mark s -> ByteString
118 | snip = case reflectBase @s of
119 |   !(Base x g _ _) -> \(Mk i) (Mk j) ->
120 |     if isTrue# (geAddr# i j)
121 |     then mkBS x g (minusAddr# i j)
122 |     else B.empty
123 | {-# inline snip #-}
124 | 
125 | snipping :: forall s e a. KnownBase s => Parser s e a -> Parser s e ByteString
126 | snipping = case reflectBase @s of
127 |   !(Base b g r _) -> \(Parser m) -> Parser \p s -> case m p s of
128 |     (# o, q, t #) ->
129 |       (# setRes
130 |         ( if isTrue# (geAddr# q p)
131 |           then mkBS (b `plusAddr#` minusAddr# p r) g (minusAddr# q p)
132 |           else B.empty
133 |         ) o
134 |       , q, t #)
135 | {-# inline snipping #-}
136 | 


--------------------------------------------------------------------------------
/src/Text/Parsnip/Internal/Parser.hs:
--------------------------------------------------------------------------------
  1 | {-# language CPP #-}
  2 | {-# language PatternSynonyms #-}
  3 | {-# language OverloadedStrings #-}
  4 | {-# language MagicHash #-}
  5 | {-# language TypeFamilies #-}
  6 | {-# language UnboxedSums #-}
  7 | {-# language StandaloneDeriving #-}
  8 | {-# language UnboxedTuples #-}
  9 | {-# language ImplicitParams #-}
 10 | {-# language ConstraintKinds #-}
 11 | {-# language LambdaCase #-}
 12 | {-# language ScopedTypeVariables #-}
 13 | {-# language RankNTypes #-}
 14 | {-# language BangPatterns #-}
 15 | {-# language ForeignFunctionInterface #-}
 16 | {-# language KindSignatures #-}
 17 | {-# language UnliftedFFITypes #-}
 18 | {-# language TypeApplications #-}
 19 | {-# language AllowAmbiguousTypes #-}
 20 | {-# language BlockArguments #-}
 21 | {-# language ViewPatterns #-}
 22 | {-# language UnboxedTuples #-}
 23 | {-# language MagicHash #-}
 24 | {-# language PatternSynonyms #-}
 25 | {-# language UnliftedNewtypes #-}
 26 | {-# options_ghc -O2 #-}
 27 | 
 28 | module Text.Parsnip.Internal.Parser
 29 | (
 30 | -- * Parser
 31 |   Parser(..)
 32 | , Res(Res#,Good,Bad,Ugly)
 33 | , mapRes, setRes
 34 | , Result, pattern OK, pattern Fail, pattern Err
 35 | , mapResult, setResult
 36 | , try
 37 | -- * Unsafe literals
 38 | , lit, litN, word8
 39 | -- * Guts
 40 | , Base(..), bytes, start, end
 41 | , KnownBase(..)
 42 | , parse
 43 | ) where
 44 | 
 45 | import Control.Applicative
 46 | import Control.Monad
 47 | import Control.Monad.Primitive
 48 | import qualified Data.ByteString as B
 49 | import Data.ByteString.Internal (ByteString(..))
 50 | import qualified Data.ByteString.Internal as B
 51 | import Data.Primitive.ByteArray
 52 | import Data.String
 53 | import Foreign.C.Types
 54 | import Foreign.ForeignPtr
 55 | import GHC.ForeignPtr
 56 | import GHC.Prim
 57 | import GHC.Ptr
 58 | import GHC.Types
 59 | import GHC.Word
 60 | import System.IO.Unsafe
 61 | 
 62 | import Text.Parsnip.Location
 63 | import Text.Parsnip.Internal.Private
 64 | 
 65 | newtype Res e a = Res# (# a | (##) | e #)
 66 | 
 67 | pattern Good :: a -> Res e a
 68 | pattern Good a = Res# (# a | | #)
 69 | 
 70 | pattern Bad :: Res e a
 71 | pattern Bad = Res# (# | (##) | #)
 72 | 
 73 | -- user error, don't recover
 74 | pattern Ugly :: e -> Res e a
 75 | pattern Ugly e = Res# (# | | e #)
 76 | 
 77 | {-# complete Good, Bad, Ugly :: Res #-}
 78 | 
 79 | mapRes :: (a -> b) -> Res e a -> Res e b
 80 | mapRes f (Good a) = Good $! f a
 81 | mapRes _ Bad = Bad
 82 | mapRes _ (Ugly e) = Ugly e
 83 | {-# inline mapRes #-}
 84 | 
 85 | setRes :: b -> Res e a -> Res e b
 86 | setRes b (Good _) = Good b
 87 | setRes _ Bad = Bad
 88 | setRes _ (Ugly e) = Ugly e
 89 | {-# inline setRes #-}
 90 | 
 91 | --------------------------------------------------------------------------------
 92 | -- * Result
 93 | --------------------------------------------------------------------------------
 94 | 
 95 | type Result s e a = (# Res e a, Addr#, State# s #)
 96 | 
 97 | pattern OK :: a -> Addr# -> State# s -> Result s e a
 98 | pattern OK a p s = (# Good a, p, s #)
 99 | 
100 | pattern Fail :: Addr# -> State# s -> Result s e a
101 | pattern Fail p s = (# Bad, p, s #)
102 | 
103 | pattern Err :: e -> Addr# -> State# s -> Result s e a
104 | pattern Err e p s = (# Ugly e, p, s #)
105 | 
106 | {-# complete OK, Fail, Err #-}
107 | 
108 | mapResult :: (a -> b) -> Result s e a -> Result s e b
109 | mapResult f (# o, p, s #) = (# mapRes f o, p, s #)
110 | {-# inline mapResult #-}
111 | 
112 | setResult :: b -> Result s e a -> Result s e b
113 | setResult b (# o, p, s #) = (# setRes b o, p, s #)
114 | {-# inline setResult #-}
115 | 
116 | --------------------------------------------------------------------------------
117 | -- * Result
118 | --------------------------------------------------------------------------------
119 | 
120 | newtype Parser s e a = Parser
121 |  { runParser :: Addr# -> State# s -> Result s e a
122 |  }
123 | 
124 | instance Functor (Parser s e) where
125 |   fmap f (Parser m) = Parser \ p s -> mapResult f (m p s)
126 |   {-# inline fmap #-}
127 |   b <$ Parser m = Parser \ p s -> case m p s of
128 |     OK _ q t -> OK b q t
129 |     Fail q t -> Fail q t
130 |     Err e q t -> Err e q t
131 |   {-# inline (<$) #-}
132 | 
133 | instance Applicative (Parser s e) where
134 |   pure a = Parser \ p s -> OK a p s
135 |   {-# inline pure #-}
136 |   Parser m <*> Parser n = Parser \p s -> case m p s of
137 |     Fail q t -> Fail q t
138 |     OK f q t -> mapResult f (n q t)
139 |     Err e q t -> Err e q t
140 |   {-# inline (<*>) #-}
141 |   Parser m *> Parser n = Parser \p s -> case m p s of
142 |     Fail q t -> Fail q t
143 |     OK _ q t -> n q t
144 |     Err e q t -> Err e q t
145 |   {-# inline (*>) #-}
146 |   Parser m <* Parser n = Parser \p s -> case m p s of
147 |     OK a q t -> setResult a (n q t)
148 |     x -> x
149 |   {-# inline (<*) #-}
150 | 
151 | instance Monad (Parser s e) where
152 |   Parser m >>= f = Parser \p s -> case m p s of
153 |     Fail q t -> Fail q t
154 |     Err e q t -> Err e q t
155 |     OK a q t -> runParser (f a) q t
156 |   {-# inline (>>=) #-}
157 |   (>>) = (*>)
158 |   {-# inline (>>) #-}
159 | 
160 | instance Alternative (Parser s e) where
161 |   Parser m <|> Parser n = Parser \ p s -> case m p s of
162 |     Fail _ t -> n p t
163 |     OK a q t -> OK a q t
164 |     Err _ _ t -> n p t
165 |   {-# inline (<|>) #-}
166 |   empty = Parser Fail
167 |   {-# inline empty #-}
168 | 
169 | instance MonadPlus (Parser s e) where
170 |   mplus = (<|>)
171 |   {-# inline mplus #-}
172 |   mzero = empty
173 |   {-# inline mzero #-}
174 | 
175 | instance PrimMonad (Parser s e) where
176 |   type PrimState (Parser s e) = s
177 |   primitive f = Parser \p s -> case f s of
178 |     (# t, a #) -> OK a p t
179 |   {-# inline primitive #-}
180 | 
181 | -- perhaps this interface is a little low level. hrmm
182 | instance a ~ ByteString => IsString (Parser s e a) where
183 |   fromString "" = pure B.empty
184 |   fromString xs = Parser \p s -> case sizeofMutableByteArray# ba of
185 |     n -> case io (c_strncmp (mutableByteArrayContents# ba) p (fromIntegral $ I# n)) s of
186 |       (# t, i #)
187 |         | i /= 0    -> Fail p t
188 |         | otherwise -> OK bs (plusAddr# p n) t
189 |     where !(MutableByteArray ba) = pinnedByteArrayFromString0 xs
190 |           bs = B.PS (ForeignPtr (mutableByteArrayContents# ba) (PlainPtr ba)) 0 (I# (sizeofMutableByteArray# ba))
191 | 
192 | try :: Parser s e a -> Parser s e a
193 | try (Parser m) = Parser $ \p s -> case m p s of
194 |   OK a q t -> OK a q t
195 |   Fail _ t -> Fail p t
196 |   Err e _ t -> Err e p t
197 | 
198 | word8 :: Word8 -> Parser s e Word8
199 | word8 0 = empty
200 | word8 r@(W8# c) = Parser \p s -> case readWord8OffAddr# p 0# s of
201 |   (# t, c' #) -> if isTrue# (c `eqWord#` c')
202 |     then OK r (plusAddr# p 1#) t
203 |     else Fail p t
204 | {-# inline word8 #-}
205 | 
206 | ---------------------------------------------------------------------------------------
207 | -- * Super-unsafe literal parsers
208 | ---------------------------------------------------------------------------------------
209 | 
210 | -- | super-duper unsafe. Fabricates bytestrings that directly reference constant memory
211 | litN :: Addr# -> CSize -> Parser s e ByteString
212 | litN q n = Parser \p s -> case io (c_strncmp p q n) s of
213 |     (# t, 0 #) -> OK bs (p `plusAddr#` csize n) t
214 |     (# t, _ #) -> Fail p t
215 |   where bs = unsafeLiteralByteStringN q n
216 | 
217 | -- | Super unsafe. Fabricates a bytestring that directly reference constant memory.
218 | --
219 | -- Usage:
220 | --
221 | -- @
222 | -- hello = lit "hello"#
223 | -- @
224 | lit :: Addr# -> Parser s e ByteString
225 | lit q = litN q (pure_strlen q)
226 | 
227 | literalForeignPtrContents :: ForeignPtrContents
228 | literalForeignPtrContents = unsafeDupablePerformIO $ primitive \s -> case newByteArray# 0# s of
229 |   (# t, a #) -> (# t, PlainPtr a #)
230 | -- {-# noinline literalForeignPtrContents #-}
231 | 
232 | unsafeLiteralForeignPtr :: Addr# -> ForeignPtr Word8
233 | unsafeLiteralForeignPtr addr = ForeignPtr addr literalForeignPtrContents
234 | 
235 | unsafeLiteralByteStringN :: Addr# -> CSize -> ByteString
236 | unsafeLiteralByteStringN p n = BS (unsafeLiteralForeignPtr p) (fromIntegral n)
237 | {-# noinline unsafeLiteralByteStringN #-}
238 | 
239 | --unsafeLiteralByteString :: Addr# -> ByteString
240 | --unsafeLiteralByteString p = unsafeLiteralByteStringN p (pure_strlen p)
241 | 
242 | -- Given a 'Base' you can do two things with it. While in a Parser, you're allowed to
243 | -- access the memory between the start and end addresses, as they'll be alive.
244 | --
245 | -- However, you can always reconstruct a bytestring from the oriignal (non-0 terminated
246 | -- data using 'bytes', and that will remain valid forever or until appropriately
247 | -- garbage collected.
248 | --
249 | -- In general, in a Parser you should try to access the memory in the null-terminated
250 | -- region for cache locality.
251 | --
252 | -- Afterwards, or to report bytestrings, you should trim them off the original, this
253 | -- way, no additional memory needs to be copied, and the garbage collector will just
254 | -- manage the storage of the bytestrings you cut off of the parent for you.
255 | 
256 | data Base s = Base
257 |   { baseOriginal  :: Addr# -- the start of a valid bytestring
258 |   , baseContents  :: ForeignPtrContents -- memory management for that bytestring
259 |   , baseStart :: Addr# -- the start of our null terminated copy of the bytestring
260 |   , baseEnd :: Addr# -- the end of our null terminated copy (points to the '\0')
261 |   }
262 | 
263 | bytes :: forall s. KnownBase s => ByteString
264 | bytes = case reflectBase @s of
265 |   !(Base b g p q) -> mkBS b g (minusAddr# q p)
266 | {-# inline bytes #-}
267 | 
268 | start :: forall s. KnownBase s => Addr#
269 | start = baseStart (reflectBase @s)
270 | {-# inline start #-}
271 | 
272 | end :: forall s. KnownBase s => Addr#
273 | end = baseEnd (reflectBase @s)
274 | {-# inline end #-}
275 | 
276 | class KnownBase (s :: Type) where
277 |   reflectBase :: Base s
278 | 
279 | --------------------------------------------------------------------------------
280 | -- * Parsing
281 | --------------------------------------------------------------------------------
282 | 
283 | parse :: (forall s. KnownBase s => Parser s e a) -> ByteString -> Either (Location, Maybe e) a
284 | parse m bs@(B.BS (ForeignPtr b g) (I# len)) = unsafeDupablePerformIO $
285 |   B.useAsCString bs \(Ptr p) -> -- now it is null terminated
286 |     IO \s -> let base = Base b g p (plusAddr# p len) in
287 |       case runParser (withBase (\_ -> m) base proxy#) p s of
288 |         (# n, q, t #) -> (# t, finish base q n #)
289 | 
290 | finish :: Base s -> Addr# -> Res e a -> Either (Location, Maybe e) a
291 | finish (Base b g q r) p = \case
292 |   Good a -> Right a
293 |   Bad    -> Left (location (mkBS b g (minusAddr# r q)) (I# (minusAddr# p q)), Nothing)
294 |   Ugly e -> Left (location (mkBS b g (minusAddr# r q)) (I# (minusAddr# p q)), Just e)
295 | {-# inline finish #-}
296 | 
297 | data Wrap s e a = Wrap (KnownBase s => Proxy# s -> Parser s e a)
298 | 
299 | withBase :: (KnownBase s => Proxy# s -> Parser s e a) -> Base s -> Proxy# s -> Parser s e a
300 | withBase f x y = magicDict (Wrap f) x y
301 | {-# inline withBase #-}
302 | 


--------------------------------------------------------------------------------
/src/Text/Parsnip/Internal/Private.hs:
--------------------------------------------------------------------------------
  1 | {-# language MagicHash #-}
  2 | {-# language UnboxedTuples #-}
  3 | {-# language BlockArguments #-}
  4 | {-# language BangPatterns #-}
  5 | {-# language ViewPatterns #-}
  6 | {-# language UnliftedFFITypes #-}
  7 | {-# language UnliftedNewtypes #-}
  8 | 
  9 | -- | the proverbial junk drawer
 10 | module Text.Parsnip.Internal.Private
 11 | ( io
 12 | , mutableByteArrayContents#
 13 | , pinnedByteArrayFromString0
 14 | , pinnedByteArrayFromStringN0
 15 | , c_memchr
 16 | , c_strlen
 17 | , c_strncmp
 18 | , pure_strlen
 19 | , cint
 20 | , csize
 21 | , mkBS
 22 | , ForeignString(..)
 23 | , packForeignString
 24 | , withForeignString
 25 | ) where
 26 | 
 27 | import Data.Primitive.ByteArray
 28 | import Data.Primitive.PrimArray
 29 | import Data.Primitive.Ptr
 30 | import Data.ByteString.Internal (ByteString(..))
 31 | import qualified Data.ByteString.Internal as B
 32 | import Data.String
 33 | import Data.Word
 34 | import Foreign.ForeignPtr
 35 | import Foreign.C.String
 36 | import Foreign.C.Types
 37 | import GHC.ForeignPtr
 38 | import GHC.Prim
 39 | import GHC.Ptr
 40 | import GHC.Types
 41 | import System.IO.Unsafe
 42 | import Unsafe.Coerce
 43 | 
 44 | io :: IO a -> State# s -> (# State# s, a #)
 45 | io = unsafeCoerce#
 46 | 
 47 | -- | Missing primitive
 48 | mutableByteArrayContents# :: MutableByteArray# s -> Addr#
 49 | mutableByteArrayContents# arr = byteArrayContents# (unsafeCoerce# arr)
 50 | 
 51 | pinnedByteArrayFromString0 :: String -> MutableByteArray RealWorld
 52 | pinnedByteArrayFromString0 xs = pinnedByteArrayFromStringN0 (length xs) xs
 53 | 
 54 | pinnedByteArrayFromStringN0 :: Int -> String -> MutableByteArray RealWorld
 55 | pinnedByteArrayFromStringN0 n ys = unsafeDupablePerformIO do
 56 |   marr <- newPinnedByteArray (n+1)
 57 |   let go !ix [] = if ix == n
 58 |         then writeByteArray marr ix (0 :: Word8)
 59 |         else fail "pinnedByteArrayFromStringN: list length less than specified size"
 60 |       go !ix (x : xs) = if ix < n
 61 |         then do
 62 |           writeByteArray marr ix (B.c2w x)
 63 |           go (ix + 1) xs
 64 |         else fail "pinnedByteArrayFromStringN: list length greater than specified size"
 65 |   go 0 ys
 66 |   pure marr
 67 | 
 68 | -- | An _immutable_ foreign cstring. This is mostly useful for things like calling strstr through ffi
 69 | -- where the needle needs to be null terminated.
 70 | newtype ForeignString = ForeignString (ForeignPtr Word8)
 71 |   deriving (Eq,Ord)
 72 | 
 73 | instance Show ForeignString where
 74 |   showsPrec d (ForeignString fp) = showsPrec d $ unsafeDupablePerformIO $ withForeignPtr (castForeignPtr fp) peekCString
 75 | 
 76 | instance IsString ForeignString where
 77 |   fromString s = unsafeDupablePerformIO $ do
 78 |     cstr <- newCString s
 79 |     ForeignString <$> newForeignPtr_ (castPtr cstr)
 80 | 
 81 | packForeignString :: ByteString -> ForeignString
 82 | packForeignString (PS _fp (plusForeignPtr _fp -> fp) n) = unsafeDupablePerformIO do
 83 |   MutableByteArray mba <- newPinnedByteArray (n+1)
 84 |   let mpa = MutablePrimArray mba :: MutablePrimArray RealWorld Word8 -- See haskell/primitive#253
 85 |   withForeignPtr fp $ \p -> copyPtrToMutablePrimArray mpa 0 p n
 86 |   writePrimArray mpa 0 (0 :: Word8) -- null terminate
 87 |   -- PrimArray ba <- unsafeFreezePrimArray mpa
 88 |   pure $ ForeignString $ ForeignPtr (mutableByteArrayContents# mba) (PlainPtr mba)
 89 | 
 90 | withForeignString :: ForeignString -> (CString -> IO r) -> IO r
 91 | withForeignString (ForeignString fp) = withForeignPtr (castForeignPtr fp)
 92 | 
 93 | ---------------------------------------------------------------------------------------
 94 | -- * C
 95 | ---------------------------------------------------------------------------------------
 96 | 
 97 | foreign import ccall unsafe "string.h memchr" c_memchr :: Addr# -> CInt -> CSize -> IO (Ptr ())
 98 | foreign import ccall unsafe "string.h strncmp" c_strncmp :: Addr# -> Addr# -> CSize -> IO CInt
 99 | foreign import ccall unsafe "string.h strlen" c_strlen :: Addr# -> IO CSize
100 | foreign import ccall unsafe "string.h strlen" pure_strlen :: Addr# -> CSize
101 | 
102 | cint :: CInt -> Int#
103 | cint (fromIntegral -> I# i) = i
104 | {-# inline cint #-}
105 | 
106 | csize :: CSize -> Int#
107 | csize (fromIntegral -> I# i) = i
108 | {-# inline csize #-}
109 | 
110 | mkBS :: Addr# -> ForeignPtrContents -> Int# -> ByteString
111 | mkBS b g l = BS (ForeignPtr b g) (I# l)
112 | {-# inline mkBS #-}
113 | 


--------------------------------------------------------------------------------
/src/Text/Parsnip/Internal/Simple.hs:
--------------------------------------------------------------------------------
 1 | {-# language TypeApplications #-}
 2 | {-# language BlockArguments #-}
 3 | {-# language ScopedTypeVariables #-}
 4 | {-# language BangPatterns #-}
 5 | {-# language MagicHash #-}
 6 | module Text.Parsnip.Internal.Simple
 7 | ( SimpleResult(..)
 8 | , relative
 9 | , absolute
10 | ) where
11 | 
12 | import Data.ByteString
13 | import GHC.Prim
14 | import GHC.Types
15 | import Text.Parsnip.Internal.Parser
16 | import Text.Parsnip.Internal.Private
17 | 
18 | --------------------------------------------------------------------------------
19 | -- * Simple parsers
20 | --------------------------------------------------------------------------------
21 | 
22 | data SimpleResult e a
23 |   = SimpleOK a {-# unpack #-} !Int
24 |   | SimpleFail {-# unpack #-} !Int
25 |   | SimpleErr e {-# unpack #-} !Int
26 | 
27 | relative :: forall s e a. KnownBase s => (ByteString -> SimpleResult e a) -> Parser s e a
28 | relative = case reflectBase @s of
29 |   !(Base x g q r) -> \f -> Parser \p s -> case f $ mkBS (x `plusAddr#` minusAddr# p q) g (minusAddr# r p) of
30 |     SimpleOK a (I# i)  -> OK a (plusAddr# p i) s
31 |     SimpleFail (I# i)  -> Fail (plusAddr# p i) s
32 |     SimpleErr e (I# i) -> Err e (plusAddr# p i) s
33 | {-# inline relative #-}
34 | 
35 | absolute :: forall s e a. KnownBase s => (ByteString -> Int -> SimpleResult e a) -> Parser s e a
36 | absolute = case reflectBase @s of
37 |   !(Base x g q r) -> \f -> Parser \p s -> case f (mkBS x g (minusAddr# r q)) $ I# (minusAddr# p q) of
38 |     SimpleOK a (I# i)  -> OK a (plusAddr# p i) s
39 |     SimpleFail (I# i)  -> Fail (plusAddr# p i) s
40 |     SimpleErr e (I# i) -> Err e (plusAddr# p i) s
41 | {-# inline absolute #-}
42 | 


--------------------------------------------------------------------------------
/src/Text/Parsnip/Location.hs:
--------------------------------------------------------------------------------
 1 | module Text.Parsnip.Location
 2 | ( location
 3 | , Location(..)
 4 | , located
 5 | ) where
 6 | 
 7 | import Data.ByteString (ByteString)
 8 | import qualified Data.ByteString.Char8 as B
 9 | import Data.Maybe
10 | 
11 | -- | Deliberately lazy, so we don't bother to compute the
12 | -- exact line and column until forced.
13 | data Location = Location
14 |   { locationLine   :: Int
15 |   , locationColumn :: Int
16 |   , locationSource :: ByteString
17 |   } deriving Show
18 | 
19 | -- | /O(n)/ in the size of the input bytestring to actually use one.
20 | -- If used primarily for error location reporting, then this probably
21 | -- puts the burden in the right place.
22 | location :: ByteString -> Int -> Location
23 | location bs j = Location (B.count '\n' before) (j - k) $ B.takeWhile (/='\n') $ B.drop k bs where
24 |   before = B.take j bs
25 |   k = fromMaybe 0 $ B.elemIndexEnd '\n' before
26 | {-# inline location #-}
27 | 
28 | -- | Use this til we get a pretty printer in here
29 | located :: Location -> String -> String
30 | located (Location l c bs) msg = Prelude.unlines
31 |   [ show l ++ ":" ++ show c ++ " " ++ msg
32 |   , ls
33 |   , show l ++ " | " ++ B.unpack bs
34 |   , ls ++ Prelude.replicate c ' ' ++ "^"
35 |   ] where ls = Prelude.replicate (length (show l) + 1) ' ' ++ "|"
36 | 


--------------------------------------------------------------------------------
/src/Text/Parsnip/Parser.hs:
--------------------------------------------------------------------------------
  1 | {-# language BlockArguments #-}
  2 | {-# language MagicHash #-}
  3 | {-# language UnboxedTuples #-}
  4 | {-# language ScopedTypeVariables #-}
  5 | {-# language UnliftedFFITypes #-}
  6 | {-# language BangPatterns #-}
  7 | {-# language RankNTypes #-}
  8 | {-# language TypeApplications #-}
  9 | {-# language LambdaCase #-}
 10 | {-# language AllowAmbiguousTypes #-}
 11 | {-# language PolyKinds #-}
 12 | {-# language CPP #-}
 13 | module Text.Parsnip.Parser
 14 | ( Parser, KnownBase
 15 | , parse
 16 | ----------------------------
 17 | , try
 18 | , atEnd
 19 | , endOfInput
 20 | ----------------------------
 21 | , tillSubstring
 22 | , skipTillSubstring
 23 | , skip
 24 | , skip0
 25 | , take
 26 | ----------------------------
 27 | , Mark
 28 | , mark
 29 | , release
 30 | , snip
 31 | , snipping
 32 | ----------------------------
 33 | , input
 34 | , pos
 35 | , betwixt
 36 | , rest
 37 | ----------------------------
 38 | , loc
 39 | ) where
 40 | 
 41 | import Data.ByteString (ByteString)
 42 | import qualified Data.ByteString as B
 43 | import qualified Data.ByteString.Internal as B
 44 | import qualified Data.ByteString.Unsafe as B
 45 | import Foreign.C.Types
 46 | import GHC.ForeignPtr
 47 | import GHC.Prim
 48 | import GHC.Ptr
 49 | import GHC.Types
 50 | import Prelude hiding (take)
 51 | import Text.Parsnip.Internal.Mark
 52 | import Text.Parsnip.Internal.Parser
 53 | import Text.Parsnip.Internal.Private
 54 | import Text.Parsnip.Internal.Simple
 55 | import Text.Parsnip.Location
 56 | 
 57 | --------------------------------------------------------------------------------
 58 | -- * Combinators
 59 | --------------------------------------------------------------------------------
 60 | 
 61 | atEnd :: Parser s e Bool
 62 | atEnd = Parser \p s -> case readCharOffAddr# p 0# s of
 63 |   (# t, c #) -> OK (isTrue# do chr# 0# `eqChar#` c) p t
 64 | 
 65 | endOfInput :: Parser s e ()
 66 | endOfInput = Parser \p s -> case readCharOffAddr# p 0# s of
 67 |   (# t, c #) -> (# if isTrue# do chr# 0# `eqChar#` c then Good () else Bad, p, t #)
 68 | 
 69 | take :: forall s e. KnownBase s => Int -> Parser s e ByteString
 70 | take = case reflectBase @s of
 71 |   !(Base b g q r) -> \(I# i) -> Parser \p s ->
 72 |     if isTrue# (minusAddr# r p <# i)
 73 |     then Fail p s
 74 |     else OK (B.PS (ForeignPtr (b `plusAddr#` minusAddr# p q) g) 0 (I# i)) (plusAddr# p i) s
 75 | 
 76 | -- | We can do this two ways, this way is O(1) but needs KnownBase.
 77 | skip :: forall s e. KnownBase s => Int -> Parser s e ()
 78 | skip = \(I# i) -> Parser \p s ->
 79 |     if isTrue# (minusAddr# r p <# i)
 80 |     then Fail p s
 81 |     else OK () (plusAddr# p i) s
 82 |   where r = end @s
 83 | 
 84 | -- | Linear time, but no @KnownBase@ dependency.
 85 | skip0 :: Int -> Parser s e ()
 86 | skip0 n@(I# i) = Parser \p s -> case io (c_memchr p 0 (fromIntegral n)) s of
 87 |   (# t, Ptr q #) -> if isTrue# (q `eqAddr#` nullAddr#)
 88 |     then OK () (plusAddr# p i) t
 89 |     else Fail p s
 90 | 
 91 | tillSubstring :: KnownBase s => ByteString -> Parser s e ByteString
 92 | tillSubstring needle = relative \bs -> case p bs of
 93 |     (r, _) -> SimpleOK r (B.length r)
 94 |   where p = B.breakSubstring needle
 95 | 
 96 | foreign import ccall unsafe "string.h" strstr :: Addr# -> Addr# -> IO (Ptr ())
 97 | foreign import ccall unsafe "string.h" strlen :: Addr# -> IO CSize
 98 | 
 99 | skipTillSubstring :: ByteString -> Parser s e ()
100 | skipTillSubstring bneedle = case B.length bneedle of
101 |   0 -> pure ()
102 |   1 -> () <$ word8 (B.unsafeHead bneedle)
103 |   _ -> let fneedle = packForeignString bneedle
104 |     in Parser \p s -> case io
105 |       ( withForeignString fneedle \(Ptr cneedle)->
106 |           strstr p cneedle >>= \q -> if q == nullPtr
107 |             then plusPtr (Ptr p) . fromIntegral <$> strlen p
108 |             else pure q
109 |       ) s of (# t, Ptr r #) -> OK () r t
110 | 
111 | 
112 | --skipTillSubstring :: ByteString -> Parser s e ()
113 | --skipTillSubstring needle = relative \bs -> case p bs of
114 | --    (r, _) -> SimpleOK r (B.length r)
115 | --  where p = B.breakSubstring needle
116 | 
117 | 
118 | -- | @input = snip minBound maxBound@
119 | input :: KnownBase s => Parser s e ByteString
120 | input = absolute \b _ -> SimpleOK b 0
121 | 
122 | -- | @rest = mark >>= \p -> snip p maxBound@
123 | rest :: KnownBase s => Parser s e ByteString
124 | rest = relative \b -> SimpleOK b 0
125 | 
126 | -- | 'snip' is a smidge faster, easier to type, if less fun to say, and
127 | -- doesn't need you to fiddle with explicit type application to actually
128 | -- apply.
129 | --
130 | -- The benefit of this combinator is that it is easy to come up with numbers
131 | -- of bytes into a file, and this combinator will automatically trim the
132 | -- result to the actual range of bytes available, whereas constructing an
133 | -- illegal 'Mark' will error in 'toEnum'/'fromEnum'/'succ' or whatever other
134 | -- combinator tries to produce one out of range to maintain the invariant
135 | -- that a mark is always a well formed location in the content.
136 | betwixt :: forall s. KnownBase s => Int -> Int -> ByteString
137 | betwixt i j = B.take (j-i) $ B.drop i $ bytes @s
138 | 
139 | -- | 'mark' is generally faster
140 | pos :: forall s e. KnownBase s => Parser s e Int
141 | pos = Parser \ p s -> OK (I# (minusAddr# p (start @s))) p s
142 | {-# inline pos #-}
143 | 
144 | loc :: KnownBase s => Parser s e Location
145 | loc = markLocation <$> mark
146 | {-# inline loc #-}
147 | 
148 | -- | Actually looking at one of these is pretty slow, as it has to do a linear
149 | -- scan to figure out its line number for display.
150 | markLocation :: forall s. KnownBase s => Mark s -> Location
151 | markLocation (Mark (Ptr p)) = location (bytes @s) (I# (minusAddr# p (start @s)))
152 | {-# inline markLocation #-}
153 | 


--------------------------------------------------------------------------------
/src/Text/Parsnip/Word8.hs:
--------------------------------------------------------------------------------
  1 | {-# language MagicHash #-}
  2 | {-# language BlockArguments #-}
  3 | {-# language UnboxedTuples #-}
  4 | {-# language BangPatterns #-}
  5 | {-# language TypeApplications #-}
  6 | {-# language NegativeLiterals #-}
  7 | {-# language UnliftedFFITypes #-}
  8 | {-# language ScopedTypeVariables #-}
  9 | {-# language ForeignFunctionInterface #-}
 10 | 
 11 | -- | Note: @parsnip@ will still be assuming that the input is null terminated
 12 | -- even if you use these combinators.
 13 | module Text.Parsnip.Word8
 14 | ( satisfy
 15 | , word8
 16 | , notWord8
 17 | , anyWord8
 18 | , while, whileSome
 19 | , till, tillSome, tillWord8
 20 | , skipWhile, skipWhileSome
 21 | , skipTill, skipTillSome, skipTillWord8
 22 | , previousWord8, previousWord8'
 23 | , nextWord8, nextWord8'
 24 | ) where
 25 | 
 26 | import Data.ByteString (ByteString)
 27 | import Data.Word
 28 | import GHC.Prim
 29 | import GHC.Ptr
 30 | import GHC.Types
 31 | import GHC.Word
 32 | import Text.Parsnip.Internal.Parser
 33 | import Text.Parsnip.Internal.Private
 34 | import Text.Parsnip.Parser
 35 | 
 36 | --------------------------------------------------------------------------------
 37 | -- * Word8 parsers
 38 | --------------------------------------------------------------------------------
 39 | 
 40 | satisfy :: (Word8 -> Bool) -> Parser s e Word8
 41 | satisfy f = Parser \p s -> case readWord8OffAddr# p 0# s of
 42 |   (# t, c #) -> if isTrue# (0## `neWord#` c) && f (W8# c)
 43 |     then OK (W8# c) (plusAddr# p 1#) t
 44 |     else Fail p t
 45 | {-# inline satisfy #-}
 46 | 
 47 | notWord8 :: Word8 -> Parser s e Word8
 48 | notWord8 0 = anyWord8
 49 | notWord8 (W8# c) = Parser \p s -> case readWord8OffAddr# p 0# s of
 50 |   (# t, c' #) -> if isTrue# (0## `neWord#` c') && isTrue# (c `neWord#` c')
 51 |     then OK (W8# c') (plusAddr# p 1#) t
 52 |     else Fail p t
 53 | {-# inline notWord8 #-}
 54 | 
 55 | nextWord8 :: Parser s e (Maybe Word8)
 56 | nextWord8 = Parser \p s -> case readWord8OffAddr# p 0# s of
 57 |   (# t, c #) -> OK (if isTrue# (0## `neWord#` c) then Just (W8# c) else Nothing) p t
 58 | {-# inline nextWord8 #-}
 59 | 
 60 | nextWord8' :: Parser s e Word8
 61 | nextWord8' = Parser \p s -> case readWord8OffAddr# p 0# s of
 62 |   (# t, c #) -> if isTrue# (0## `neWord#` c)
 63 |     then OK (W8# c) p t
 64 |     else Fail p t
 65 | {-# inline nextWord8' #-}
 66 | 
 67 | anyWord8 :: Parser s e Word8
 68 | anyWord8 = Parser \p s -> case readWord8OffAddr# p 0# s of
 69 |   (# t, c #) -> if isTrue# (0## `neWord#` c)
 70 |     then OK (W8# c) (plusAddr# p 1#) t
 71 |     else Fail p t
 72 | {-# inline anyWord8 #-}
 73 | 
 74 | scan :: (Word8 -> Bool) -> Addr# -> State# s -> (# State# s, Addr# #)
 75 | scan f = go where
 76 |   go p s = case readWord8OffAddr# p 0# s of
 77 |     (# t, c #) -> if isTrue# (0## `neWord#` c) && f (W8# c)
 78 |       then (# t, p #)
 79 |       else scan f (plusAddr# p 1#) t
 80 | {-# inline scan #-}
 81 | 
 82 | skipWhile :: (Word8 -> Bool) -> Parser s e ()
 83 | skipWhile f = Parser \p s -> case scan f p s of
 84 |   (# t, q #) -> OK () q t
 85 | {-# inline [1] skipWhile #-}
 86 | 
 87 | {-# RULES
 88 | "skipWhile (x/=)" forall x.
 89 |   skipWhile (x `neWord8`) = skipTillWord8 x
 90 | "skipWhile (/=x)" forall x.
 91 |   skipWhile (`neWord8` x) = skipTillWord8 x
 92 |   #-}
 93 | 
 94 | skipTill :: (Word8 -> Bool) -> Parser s e ()
 95 | skipTill p = skipWhile (not . p)
 96 | {-# inline [1] skipTill #-}
 97 | 
 98 | {-# RULES
 99 | "skipTill (x==)" forall x.
100 |   skipTill (x `eqWord8`) = skipTillWord8 x
101 | "skipWhile (==x)" forall x.
102 |   skipWhile (`eqWord8` x) = skipTillWord8 x
103 |   #-}
104 | 
105 | skipTillSome :: (Word8 -> Bool) -> Parser s e ()
106 | skipTillSome p = skipWhileSome (not . p)
107 | {-# inline skipTillSome #-}
108 | 
109 | foreign import ccall "parsnip.h" strchr0 :: Addr# -> Char# -> IO (Ptr Word8) -- lazy reimport is lazy
110 | 
111 | skipTillWord8 :: Word8 -> Parser s e ()
112 | skipTillWord8 (W8# c) = Parser $ \p s -> case io (strchr0 p (chr# (word2Int# c))) s of -- lazy cast is lazy
113 |   (# t, Ptr q #) -> OK () q t
114 | {-# inline skipTillWord8 #-}
115 | 
116 | skipWhileSome :: (Word8 -> Bool) -> Parser s e ()
117 | skipWhileSome p = satisfy p *> skipWhile p
118 | {-# inline skipWhileSome #-}
119 | 
120 | while :: KnownBase s => (Word8 -> Bool) -> Parser s e ByteString
121 | while f = snipping (skipWhile f)
122 | {-# inline while #-}
123 | 
124 | till :: KnownBase s => (Word8 -> Bool) -> Parser s e ByteString
125 | till p = snipping (skipTill p)
126 | {-# inline till #-}
127 | 
128 | tillWord8 :: KnownBase s => Word8 -> Parser s e ByteString
129 | tillWord8 c = snipping (skipTillWord8 c)
130 | {-# inline tillWord8 #-}
131 | 
132 | whileSome :: KnownBase s => (Word8 -> Bool) -> Parser s e ByteString
133 | whileSome p = snipping (skipWhileSome p)
134 | {-# inline whileSome #-}
135 | 
136 | tillSome :: KnownBase s => (Word8 -> Bool) -> Parser s e ByteString
137 | tillSome p = snipping (skipTillSome p)
138 | {-# inline tillSome #-}
139 | 
140 | -- | Peek at the previous character. Always succeeds.
141 | previousWord8 :: forall s e. KnownBase s => Parser s e (Maybe Word8)
142 | previousWord8 = case reflectBase @s of
143 |   !(Base _ _ l _) -> Parser \p s ->
144 |     if isTrue# (ltAddr# l p)
145 |     then case readWord8OffAddr# p (-1#) s of
146 |       (# t, c #) -> OK (Just (W8# c)) p t
147 |     else OK Nothing p s
148 | {-# inline previousWord8 #-}
149 | 
150 | -- | Peek at the previous character. Fails if we're at the start of input.
151 | previousWord8' :: forall s e. KnownBase s => Parser s e Word8
152 | previousWord8' = case reflectBase @s of
153 |   !(Base _ _ l _) -> Parser \p s ->
154 |     if isTrue# (ltAddr# l p)
155 |     then case readWord8OffAddr# p (-1#) s of
156 |       (# t, c #) -> OK (W8# c) p t
157 |     else Fail p s
158 | {-# inline previousWord8' #-}
159 | 


--------------------------------------------------------------------------------
/src/Text/Parsnip/Word8/Binary.hs:
--------------------------------------------------------------------------------
  1 | {-# language MagicHash #-}
  2 | {-# language BlockArguments #-}
  3 | {-# language UnboxedTuples #-}
  4 | {-# language BangPatterns #-}
  5 | {-# language TypeApplications #-}
  6 | {-# language NegativeLiterals #-}
  7 | {-# language UnliftedFFITypes #-}
  8 | {-# language ScopedTypeVariables #-}
  9 | {-# language ForeignFunctionInterface #-}
 10 | 
 11 | -- | These combinators allow handling of embedded null characters.
 12 | --
 13 | -- However, they do assume that the string is null terminated to
 14 | -- accelerate access, and to properly intermix with other combinators
 15 | -- which means we still have to do an initial copy.
 16 | --
 17 | -- Use 'binaryEof' instead of 'eof' to check for end-of-file, as
 18 | -- it understands embdded nulls.
 19 | module Text.Parsnip.Word8.Binary
 20 | ( satisfy
 21 | , word8
 22 | , notWord8
 23 | , anyWord8
 24 | , while, whileSome
 25 | , till, tillSome, tillWord8
 26 | , skipWhile, skipWhileSome
 27 | , skipTill, skipTillSome, skipTillWord8
 28 | , previousWord8, previousWord8'
 29 | , nextWord8, nextWord8'
 30 | , binaryEof
 31 | ) where
 32 | 
 33 | import Data.ByteString (ByteString)
 34 | import Data.Word
 35 | import Foreign.C.Types
 36 | import GHC.Prim
 37 | import GHC.Ptr
 38 | import GHC.Types
 39 | import GHC.Word
 40 | import Text.Parsnip.Internal.Parser
 41 | import Text.Parsnip.Internal.Private
 42 | import Text.Parsnip.Parser
 43 | 
 44 | --------------------------------------------------------------------------------
 45 | -- * Word8 parsers that handle embedded nulls
 46 | --------------------------------------------------------------------------------
 47 | 
 48 | satisfy :: forall s e. KnownBase s => (Word8 -> Bool) -> Parser s e Word8
 49 | satisfy f = Parser \p s -> case readWord8OffAddr# p 0# s of
 50 |   (# t, c #) -> if f (W8# c) 
 51 |     then if isTrue# (0## `neWord#` c) || isTrue# (neAddr# p (end @s))
 52 |          then OK (W8# c) (plusAddr# p 1#) t
 53 |          else Fail p t
 54 |     else Fail p t
 55 | {-# inline satisfy #-}
 56 | 
 57 | notWord8 :: forall s e. KnownBase s => Word8 -> Parser s e Word8
 58 | notWord8 (W8# c) = Parser \p s -> case readWord8OffAddr# p 0# s of
 59 |   (# t, c' #) -> 
 60 |     if isTrue# (c `neWord#` c') 
 61 |     then if isTrue# (0## `neWord#` c') || isTrue# (neAddr# p (end @s))
 62 |          then OK (W8# c') (plusAddr# p 1#) t
 63 |          else Fail p t
 64 |     else Fail p t
 65 | {-# inline notWord8 #-}
 66 | 
 67 | nextWord8 :: forall s e. KnownBase s => Parser s e (Maybe Word8)
 68 | nextWord8 = Parser \p s -> case readWord8OffAddr# p 0# s of
 69 |   (# t, c #) -> OK 
 70 |     ( if isTrue# (0## `neWord#` c) || isTrue# (neAddr# p (end @s)) 
 71 |       then Just (W8# c) 
 72 |       else Nothing
 73 |     ) 
 74 |     p 
 75 |     t
 76 | {-# inline nextWord8 #-}
 77 | 
 78 | nextWord8' :: forall s e. KnownBase s => Parser s e Word8
 79 | nextWord8' = Parser \p s -> case readWord8OffAddr# p 0# s of
 80 |   (# t, c #) -> 
 81 |     if isTrue# (0## `neWord#` c) || isTrue# (neAddr# p (end @s))
 82 |     then OK (W8# c) p t
 83 |     else Fail p t
 84 | {-# inline nextWord8' #-}
 85 | 
 86 | anyWord8 :: forall s e. KnownBase s => Parser s e Word8
 87 | anyWord8 = Parser \p s -> case readWord8OffAddr# p 0# s of
 88 |   (# t, c #) -> 
 89 |     if isTrue# (0## `neWord#` c) || isTrue# (neAddr# p (end @s))
 90 |     then OK (W8# c) (plusAddr# p 1#) t
 91 |     else Fail p t
 92 | {-# inline anyWord8 #-}
 93 | 
 94 | scan :: forall s. KnownBase s => (Word8 -> Bool) -> Addr# -> State# s -> (# State# s, Addr# #)
 95 | scan f = go where
 96 |   go p s = case readWord8OffAddr# p 0# s of
 97 |     (# t, c #) -> 
 98 |       if (isTrue# (0## `neWord#` c) || isTrue# (neAddr# p (end @s))) && f (W8# c)
 99 |       then (# t, p #)
100 |       else scan f (plusAddr# p 1#) t
101 | {-# inline scan #-}
102 | 
103 | skipWhile :: KnownBase s => (Word8 -> Bool) -> Parser s e ()
104 | skipWhile f = Parser \p s -> case scan f p s of
105 |   (# t, q #) -> OK () q t
106 | {-# inline [1] skipWhile #-}
107 | 
108 | {-# RULES
109 | "skipWhile (x/=)" forall x.
110 |   skipWhile (x `neWord8`) = skipTillWord8 x
111 | "skipWhile (/=x)" forall x.
112 |   skipWhile (`neWord8` x) = skipTillWord8 x
113 |   #-}
114 | 
115 | skipTill :: KnownBase s => (Word8 -> Bool) -> Parser s e ()
116 | skipTill p = skipWhile (not . p)
117 | {-# inline [1] skipTill #-}
118 | 
119 | {-# RULES
120 | "skipTill (x==)" forall x.
121 |   skipTill (x `eqWord8`) = skipTillWord8 x
122 | "skipWhile (==x)" forall x.
123 |   skipWhile (`eqWord8` x) = skipTillWord8 x
124 |   #-}
125 | 
126 | skipTillSome :: KnownBase s => (Word8 -> Bool) -> Parser s e ()
127 | skipTillSome p = skipWhileSome (not . p)
128 | {-# inline skipTillSome #-}
129 | 
130 | foreign import ccall "string.h" memchr :: Addr# -> CInt -> CSize -> IO (Ptr Word8)
131 | 
132 | skipTillWord8 :: forall s e. KnownBase s => Word8 -> Parser s e ()
133 | skipTillWord8 w = Parser $ \p s -> case io (memchr p (fromIntegral w) (fromIntegral $ I# (minusAddr# (end @s) p))) s of
134 |   (# t, Ptr q #) -> OK () q t
135 | {-# inline skipTillWord8 #-}
136 | 
137 | skipWhileSome :: KnownBase s => (Word8 -> Bool) -> Parser s e ()
138 | skipWhileSome p = satisfy p *> skipWhile p
139 | {-# inline skipWhileSome #-}
140 | 
141 | while :: KnownBase s => (Word8 -> Bool) -> Parser s e ByteString
142 | while f = snipping (skipWhile f)
143 | {-# inline while #-}
144 | 
145 | till :: KnownBase s => (Word8 -> Bool) -> Parser s e ByteString
146 | till p = snipping (skipTill p)
147 | {-# inline till #-}
148 | 
149 | tillWord8 :: KnownBase s => Word8 -> Parser s e ByteString
150 | tillWord8 c = snipping (skipTillWord8 c)
151 | {-# inline tillWord8 #-}
152 | 
153 | whileSome :: KnownBase s => (Word8 -> Bool) -> Parser s e ByteString
154 | whileSome p = snipping (skipWhileSome p)
155 | {-# inline whileSome #-}
156 | 
157 | tillSome :: KnownBase s => (Word8 -> Bool) -> Parser s e ByteString
158 | tillSome p = snipping (skipTillSome p)
159 | {-# inline tillSome #-}
160 | 
161 | -- | Peek at the previous character. Always succeeds.
162 | previousWord8 :: forall s e. KnownBase s => Parser s e (Maybe Word8)
163 | previousWord8 = case reflectBase @s of
164 |   !(Base _ _ l _) -> Parser \p s ->
165 |     if isTrue# (ltAddr# l p)
166 |     then case readWord8OffAddr# p (-1#) s of
167 |       (# t, c #) -> OK (Just (W8# c)) p t
168 |     else OK Nothing p s
169 | {-# inline previousWord8 #-}
170 | 
171 | -- | Peek at the previous character. Fails if we're at the start of input.
172 | previousWord8' :: forall s e. KnownBase s => Parser s e Word8
173 | previousWord8' = case reflectBase @s of
174 |   !(Base _ _ l _) -> Parser \p s ->
175 |     if isTrue# (ltAddr# l p)
176 |     then case readWord8OffAddr# p (-1#) s of
177 |       (# t, c #) -> OK (W8# c) p t
178 |     else Fail p s
179 | {-# inline previousWord8' #-}
180 | 
181 | -- This version of 'eof' is not fooled by embedded nulls.
182 | binaryEof :: forall s e. KnownBase s => Parser s e ()
183 | binaryEof = Parser \p s -> case readWord8OffAddr# p 0# s of
184 |   (# t, c #) -> 
185 |     if isTrue# (0## `eqWord#` c) && isTrue# (eqAddr# p (end @s))
186 |     then OK () p t
187 |     else Fail p t
188 | {-# inline binaryEof #-}
189 | 


--------------------------------------------------------------------------------