├── .gitignore
├── CHANGELOG.md
├── LICENSE.md
├── Makefile
├── README.md
├── Setup.hs
├── cabal.project
├── cbits
├── parsnip.c
└── parsnip.h
├── parsnip.cabal
└── src
└── Text
├── Parsnip.hs
└── Parsnip
├── Char8.hs
├── Internal
├── Mark.hs
├── Parser.hs
├── Private.hs
└── Simple.hs
├── Location.hs
├── Parser.hs
├── Word8.hs
└── Word8
└── Binary.hs
/.gitignore:
--------------------------------------------------------------------------------
1 | dist-newstyle
2 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | 0.1 [unreleased]
2 | ----------------
3 | * Added 'Text.Parsnip.Word8.Binary' to support parsing files with embedded nulls.
4 | * Removed support for older versions of 'bytestring'.
5 | * Moved to a separate repository from `codex`.
6 | * Added an 'e' parameter to 'Parser' to allow propagation of user errors.
7 |
8 | 0 [2021.03.23]
9 | --------------
10 | * Split off from `engine`
11 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # License
2 |
3 | Licensed under either of
4 | * Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
5 | * BSD 2-Clause license (https://opensource.org/licenses/BSD-2-Clause)
6 | at your option.
7 |
8 | ## BSD 2-Clause License
9 |
10 | - Copyright 2019 Edward Kmett and Sean Chalmers
11 |
12 | All rights reserved.
13 |
14 | Redistribution and use in source and binary forms, with or without
15 | modification, are permitted provided that the following conditions
16 | are met:
17 |
18 | 1. Redistributions of source code must retain the above copyright
19 | notice, this list of conditions and the following disclaimer.
20 |
21 | 2. Redistributions in binary form must reproduce the above copyright
22 | notice, this list of conditions and the following disclaimer in the
23 | documentation and/or other materials provided with the distribution.
24 |
25 | THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
26 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
27 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
28 | DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
29 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
34 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 | POSSIBILITY OF SUCH DAMAGE.
36 |
37 | ## Apache License
38 |
39 | _Version 2.0, January 2004_
40 | _<>_
41 |
42 | ### Terms and Conditions for use, reproduction, and distribution
43 |
44 | #### 1. Definitions
45 |
46 | “License” shall mean the terms and conditions for use, reproduction, and
47 | distribution as defined by Sections 1 through 9 of this document.
48 |
49 | “Licensor” shall mean the copyright owner or entity authorized by the copyright
50 | owner that is granting the License.
51 |
52 | “Legal Entity” shall mean the union of the acting entity and all other entities
53 | that control, are controlled by, or are under common control with that entity.
54 | For the purposes of this definition, “control” means **(i)** the power, direct or
55 | indirect, to cause the direction or management of such entity, whether by
56 | contract or otherwise, or **(ii)** ownership of fifty percent (50%) or more of the
57 | outstanding shares, or **(iii)** beneficial ownership of such entity.
58 |
59 | “You” (or “Your”) shall mean an individual or Legal Entity exercising
60 | permissions granted by this License.
61 |
62 | “Source” form shall mean the preferred form for making modifications, including
63 | but not limited to software source code, documentation source, and configuration
64 | files.
65 |
66 | “Object” form shall mean any form resulting from mechanical transformation or
67 | translation of a Source form, including but not limited to compiled object code,
68 | generated documentation, and conversions to other media types.
69 |
70 | “Work” shall mean the work of authorship, whether in Source or Object form, made
71 | available under the License, as indicated by a copyright notice that is included
72 | in or attached to the work (an example is provided in the Appendix below).
73 |
74 | “Derivative Works” shall mean any work, whether in Source or Object form, that
75 | is based on (or derived from) the Work and for which the editorial revisions,
76 | annotations, elaborations, or other modifications represent, as a whole, an
77 | original work of authorship. For the purposes of this License, Derivative Works
78 | shall not include works that remain separable from, or merely link (or bind by
79 | name) to the interfaces of, the Work and Derivative Works thereof.
80 |
81 | “Contribution” shall mean any work of authorship, including the original version
82 | of the Work and any modifications or additions to that Work or Derivative Works
83 | thereof, that is intentionally submitted to Licensor for inclusion in the Work
84 | by the copyright owner or by an individual or Legal Entity authorized to submit
85 | on behalf of the copyright owner. For the purposes of this definition,
86 | “submitted” means any form of electronic, verbal, or written communication sent
87 | to the Licensor or its representatives, including but not limited to
88 | communication on electronic mailing lists, source code control systems, and
89 | issue tracking systems that are managed by, or on behalf of, the Licensor for
90 | the purpose of discussing and improving the Work, but excluding communication
91 | that is conspicuously marked or otherwise designated in writing by the copyright
92 | owner as “Not a Contribution.”
93 |
94 | “Contributor” shall mean Licensor and any individual or Legal Entity on behalf
95 | of whom a Contribution has been received by Licensor and subsequently
96 | incorporated within the Work.
97 |
98 | #### 2. Grant of Copyright License
99 |
100 | Subject to the terms and conditions of this License, each Contributor hereby
101 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
102 | irrevocable copyright license to reproduce, prepare Derivative Works of,
103 | publicly display, publicly perform, sublicense, and distribute the Work and such
104 | Derivative Works in Source or Object form.
105 |
106 | #### 3. Grant of Patent License
107 |
108 | Subject to the terms and conditions of this License, each Contributor hereby
109 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
110 | irrevocable (except as stated in this section) patent license to make, have
111 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where
112 | such license applies only to those patent claims licensable by such Contributor
113 | that are necessarily infringed by their Contribution(s) alone or by combination
114 | of their Contribution(s) with the Work to which such Contribution(s) was
115 | submitted. If You institute patent litigation against any entity (including a
116 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a
117 | Contribution incorporated within the Work constitutes direct or contributory
118 | patent infringement, then any patent licenses granted to You under this License
119 | for that Work shall terminate as of the date such litigation is filed.
120 |
121 | #### 4. Redistribution
122 |
123 | You may reproduce and distribute copies of the Work or Derivative Works thereof
124 | in any medium, with or without modifications, and in Source or Object form,
125 | provided that You meet the following conditions:
126 |
127 | * **(a)** You must give any other recipients of the Work or Derivative Works a copy of
128 | this License; and
129 | * **(b)** You must cause any modified files to carry prominent notices stating that You
130 | changed the files; and
131 | * **(c)** You must retain, in the Source form of any Derivative Works that You distribute,
132 | all copyright, patent, trademark, and attribution notices from the Source form
133 | of the Work, excluding those notices that do not pertain to any part of the
134 | Derivative Works; and
135 | * **(d)** If the Work includes a “NOTICE” text file as part of its distribution, then any
136 | Derivative Works that You distribute must include a readable copy of the
137 | attribution notices contained within such NOTICE file, excluding those notices
138 | that do not pertain to any part of the Derivative Works, in at least one of the
139 | following places: within a NOTICE text file distributed as part of the
140 | Derivative Works; within the Source form or documentation, if provided along
141 | with the Derivative Works; or, within a display generated by the Derivative
142 | Works, if and wherever such third-party notices normally appear. The contents of
143 | the NOTICE file are for informational purposes only and do not modify the
144 | License. You may add Your own attribution notices within Derivative Works that
145 | You distribute, alongside or as an addendum to the NOTICE text from the Work,
146 | provided that such additional attribution notices cannot be construed as
147 | modifying the License.
148 |
149 | You may add Your own copyright statement to Your modifications and may provide
150 | additional or different license terms and conditions for use, reproduction, or
151 | distribution of Your modifications, or for any such Derivative Works as a whole,
152 | provided Your use, reproduction, and distribution of the Work otherwise complies
153 | with the conditions stated in this License.
154 |
155 | #### 5. Submission of Contributions
156 |
157 | Unless You explicitly state otherwise, any Contribution intentionally submitted
158 | for inclusion in the Work by You to the Licensor shall be under the terms and
159 | conditions of this License, without any additional terms or conditions.
160 | Notwithstanding the above, nothing herein shall supersede or modify the terms of
161 | any separate license agreement you may have executed with Licensor regarding
162 | such Contributions.
163 |
164 | #### 6. Trademarks
165 |
166 | This License does not grant permission to use the trade names, trademarks,
167 | service marks, or product names of the Licensor, except as required for
168 | reasonable and customary use in describing the origin of the Work and
169 | reproducing the content of the NOTICE file.
170 |
171 | #### 7. Disclaimer of Warranty
172 |
173 | Unless required by applicable law or agreed to in writing, Licensor provides the
174 | Work (and each Contributor provides its Contributions) on an “AS IS” BASIS,
175 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
176 | including, without limitation, any warranties or conditions of TITLE,
177 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
178 | solely responsible for determining the appropriateness of using or
179 | redistributing the Work and assume any risks associated with Your exercise of
180 | permissions under this License.
181 |
182 | #### 8. Limitation of Liability
183 |
184 | In no event and under no legal theory, whether in tort (including negligence),
185 | contract, or otherwise, unless required by applicable law (such as deliberate
186 | and grossly negligent acts) or agreed to in writing, shall any Contributor be
187 | liable to You for damages, including any direct, indirect, special, incidental,
188 | or consequential damages of any character arising as a result of this License or
189 | out of the use or inability to use the Work (including but not limited to
190 | damages for loss of goodwill, work stoppage, computer failure or malfunction, or
191 | any and all other commercial damages or losses), even if such Contributor has
192 | been advised of the possibility of such damages.
193 |
194 | #### 9. Accepting Warranty or Additional Liability
195 |
196 | While redistributing the Work or Derivative Works thereof, You may choose to
197 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or
198 | other liability obligations and/or rights consistent with this License. However,
199 | in accepting such obligations, You may act only on Your own behalf and on Your
200 | sole responsibility, not on behalf of any other Contributor, and only if You
201 | agree to indemnify, defend, and hold each Contributor harmless for any liability
202 | incurred by, or claims asserted against, such Contributor by reason of your
203 | accepting any such warranty or additional liability.
204 |
205 | _END OF TERMS AND CONDITIONS_
206 |
207 | ### APPENDIX: How to apply the Apache License to your work
208 |
209 | To apply the Apache License to your work, attach the following boilerplate
210 | notice, with the fields enclosed by brackets `[]` replaced with your own
211 | identifying information. (Don't include the brackets!) The text should be
212 | enclosed in the appropriate comment syntax for the file format. We also
213 | recommend that a file or class name and description of purpose be included on
214 | the same “printed page” as the copyright notice for easier identification within
215 | third-party archives.
216 |
217 | Copyright [yyyy] [name of copyright owner]
218 |
219 | Licensed under the Apache License, Version 2.0 (the "License");
220 | you may not use this file except in compliance with the License.
221 | You may obtain a copy of the License at
222 |
223 | http://www.apache.org/licenses/LICENSE-2.0
224 |
225 | Unless required by applicable law or agreed to in writing, software
226 | distributed under the License is distributed on an "AS IS" BASIS,
227 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
228 | See the License for the specific language governing permissions and
229 | limitations under the License.
230 |
231 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | PWD=$(shell pwd)
2 | LIBRARY=$(shell basename $(PWD))
3 |
4 | all:
5 | cabal v2-build
6 |
7 | install:
8 | cabal v2-install
9 |
10 | run:
11 | cabal v2-run example
12 |
13 | repl:
14 | cabal v2-repl $(LIBRARY) --repl-options=-v1 --repl-options=-ferror-spans --repl-options=-j
15 |
16 | watch:
17 | ghcid -p $(LIBRARY) --color -c "cabal v2-repl $(LIBRARY) --repl-options=-fno-code --repl-options=-fno-break-on-exception --repl-options=-fno-break-on-error --repl-options=-v1 --repl-options=-ferror-spans --repl-options=-j" --restart *.cabal
18 |
19 | lint:
20 | find src -name "*.hs*" -print | xargs hlint
21 |
22 | .PHONY: all install run repl watch lint
23 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | parsnip
2 | =====
3 |
4 | [](https://hackage.haskell.org/package/parsnip)
5 |
6 | This is a rather minimal parsing library.
7 |
8 | Use with a library like `parsers` or `parser-combinators` to fill in the missing functionality.
9 |
10 | Contact Information
11 | -------------------
12 |
13 | Contributions and bug reports are welcome!
14 |
15 | Please feel free to contact me through github or on the #haskell IRC channel on irc.freenode.net.
16 |
17 | -Edward Kmett
18 |
19 |
--------------------------------------------------------------------------------
/Setup.hs:
--------------------------------------------------------------------------------
1 | import Distribution.Simple
2 | main = defaultMain
3 |
--------------------------------------------------------------------------------
/cabal.project:
--------------------------------------------------------------------------------
1 | packages: .
2 |
--------------------------------------------------------------------------------
/cbits/parsnip.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include "parsnip.h"
3 |
4 | /* find the first occurence of a given character or the location of the terminating null in a string. */
5 |
6 | char * strchr0 (char *s, HsChar hc) {
7 | char c = (char)hc;
8 | for (; ((uint64_t)s & 7) != 0; ++s)
9 | if (*s == c || *s == '\0')
10 | return s;
11 |
12 | uint64_t *p = (uint64_t*)s;
13 | uint64_t magic = (uint64_t)(-1) / 0xff * 0xfe << 1 >> 1 | 1;
14 |
15 | uint64_t mask = (uint64_t)c;
16 | mask |= mask << 8;
17 | mask |= mask << 16;
18 | mask |= mask << 32;
19 |
20 | for (;;) {
21 | uint64_t w = *p++;
22 | if ((((w + magic) ^ ~w) & ~magic) != 0 || ((((w ^ mask) + magic) ^ ~(w ^ mask)) & ~magic) != 0) {
23 | s = (char*)(p-1);
24 | if (*s == c || *s == '\0') return s;
25 | if (*++s == c) return s;
26 | if (*s == '\0') return s;
27 | if (*++s == c) return s;
28 | if (*s == '\0') return s;
29 | if (*++s == c) return s;
30 | if (*s == '\0') return s;
31 | if (*++s == c) return s;
32 | if (*s == '\0') return s;
33 | if (*++s == c) return s;
34 | if (*s == '\0') return s;
35 | if (*++s == c) return s;
36 | if (*s == '\0') return s;
37 | if (*++s == c) return s;
38 | if (*s == '\0') return s;
39 | }
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/cbits/parsnip.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "HsFFI.h"
3 |
4 | extern char * strchr0(char * s, HsChar c);
5 |
--------------------------------------------------------------------------------
/parsnip.cabal:
--------------------------------------------------------------------------------
1 | cabal-version: 2.2
2 | name: parsnip
3 | version: 0.1
4 | synopsis: A fast, minimal parser
5 | description:
6 | A fast, minimal parser.
7 | .
8 | @parsnip@ parses null-terminated input strings with an absolute minimum of
9 | internal state. It copies the input to a c string and works its way through
10 | hand-over-hand with just an @Addr#@ as the internal state.
11 | .
12 | It uses @reflection@-like tricks to lift the input 'ByteString' into a
13 | constraint that is then passed to just the combinators that need access
14 | to it to compute position, snip out slices of the original, or ask the
15 | number of bytes remaining, but none of the monadic combinators nor simple
16 | 'satisfy' or 'eof' predicates need access to this information.
17 | .
18 | Because of the null-termination, we can't assume that we can handle binary
19 | inputs, but this works well for text-based source languages.
20 | homepage: https://github.com/ekmett/parsnip
21 | license: BSD-2-Clause OR Apache-2.0
22 | license-file: LICENSE.md
23 | author: Edward Kmett
24 | maintainer: Edward Kmett
25 | copyright: Copyright (c) 2019-2021 Edward Kmett
26 | stability: experimental
27 | category: Parsing, Text
28 | build-type: Simple
29 | extra-doc-files:
30 | README.md,
31 | CHANGELOG.md
32 | extra-source-files:
33 | cbits/*.h
34 | cbits/*.c
35 |
36 | source-repository head
37 | type: git
38 | location: https://github.com/ekmett/parsnip
39 |
40 | common base
41 | default-language: Haskell2010
42 | ghc-options:
43 | -Wall -Wincomplete-uni-patterns -Wincomplete-record-updates
44 | -Wredundant-constraints -Widentities -Wmissing-export-lists
45 | include-dirs: cbits
46 | c-sources: cbits/parsnip.c
47 | build-depends:
48 | attoparsec,
49 | base >= 4.15 && < 5,
50 | bytestring ^>= 0.11,
51 | containers ^>= 0.6,
52 | data-default < 0.8,
53 | ghc-prim,
54 | primitive ^>= 0.7
55 |
56 | library
57 | import: base
58 | hs-source-dirs: src
59 | exposed-modules:
60 | Text.Parsnip
61 | Text.Parsnip.Char8
62 | Text.Parsnip.Location
63 | Text.Parsnip.Parser
64 | Text.Parsnip.Word8
65 | Text.Parsnip.Word8.Binary
66 | Text.Parsnip.Internal.Mark
67 | Text.Parsnip.Internal.Parser
68 | Text.Parsnip.Internal.Private
69 | Text.Parsnip.Internal.Simple
70 |
--------------------------------------------------------------------------------
/src/Text/Parsnip.hs:
--------------------------------------------------------------------------------
1 | module Text.Parsnip
2 | ( module Text.Parsnip.Char8
3 | , module Text.Parsnip.Location
4 | , module Text.Parsnip.Parser
5 | ) where
6 |
7 | import Text.Parsnip.Char8
8 | import Text.Parsnip.Location
9 | import Text.Parsnip.Parser
10 |
--------------------------------------------------------------------------------
/src/Text/Parsnip/Char8.hs:
--------------------------------------------------------------------------------
1 | {-# language MagicHash #-}
2 | {-# language BlockArguments #-}
3 | {-# language UnboxedTuples #-}
4 | {-# language BangPatterns #-}
5 | {-# language TypeApplications #-}
6 | {-# language NegativeLiterals #-}
7 | {-# language UnliftedFFITypes #-}
8 | {-# language ScopedTypeVariables #-}
9 | {-# language ForeignFunctionInterface #-}
10 | module Text.Parsnip.Char8
11 | ( satisfy
12 | , char
13 | , notChar
14 | , anyChar
15 | , digit
16 | , space
17 | , skipSpace
18 | , letter_ascii
19 | , letter_iso8859_15
20 | , while, whileSome
21 | , till, tillSome, tillChar
22 | , skipWhile, skipWhileSome
23 | , skipTill, skipTillSome, skipTillChar
24 | , previousChar, previousChar'
25 | , nextChar, nextChar'
26 | ) where
27 |
28 | import Control.Applicative
29 | import qualified Data.Attoparsec.ByteString.Char8 as A
30 | import Data.ByteString (ByteString)
31 | import Data.Word
32 | import GHC.Char
33 | import GHC.Prim
34 | import GHC.Ptr
35 | import GHC.Types
36 | import Text.Parsnip.Internal.Parser
37 | import Text.Parsnip.Internal.Private
38 | import Text.Parsnip.Parser
39 |
40 | --------------------------------------------------------------------------------
41 | -- * Char parsers
42 | --------------------------------------------------------------------------------
43 |
44 | satisfy :: (Char -> Bool) -> Parser s e Char
45 | satisfy f = Parser \p s -> case readCharOffAddr# p 0# s of
46 | (# t, c #) -> if isTrue# (chr# 0# `neChar#` c) && f (C# c)
47 | then OK (C# c) (plusAddr# p 1#) t
48 | else Fail p t
49 | {-# inline satisfy #-}
50 |
51 | char :: Char -> Parser s e Char
52 | char '\0' = empty
53 | char r@(C# c) = Parser \p s -> case readCharOffAddr# p 0# s of
54 | (# t, c' #) -> if isTrue# (eqChar# c c')
55 | then OK r (plusAddr# p 1#) t
56 | else Fail p t
57 | {-# inline char #-}
58 |
59 | notChar :: Char -> Parser s e Char
60 | notChar '\0' = anyChar
61 | notChar (C# c) = Parser \p s -> case readCharOffAddr# p 0# s of
62 | (# t, c' #) -> if isTrue# (chr# 0# `neChar#` c') && isTrue# (neChar# c c')
63 | then OK (C# c') (plusAddr# p 1#) t
64 | else Fail p t
65 | {-# inline notChar #-}
66 |
67 | nextChar :: Parser s e (Maybe Char)
68 | nextChar = Parser \p s -> case readCharOffAddr# p 0# s of
69 | (# t, c #) -> OK (if isTrue# (chr# 0# `neChar#` c) then Just (C# c) else Nothing) p t
70 | {-# inline nextChar #-}
71 |
72 | nextChar' :: Parser s e Char
73 | nextChar' = Parser \p s -> case readCharOffAddr# p 0# s of
74 | (# t, c #) -> if isTrue# (chr# 0# `neChar#` c)
75 | then OK (C# c) p t
76 | else Fail p t
77 | {-# inline nextChar' #-}
78 |
79 | anyChar :: Parser s e Char
80 | anyChar = Parser \p s -> case readCharOffAddr# p 0# s of
81 | (# t, c #) -> if isTrue# (chr# 0# `neChar#` c)
82 | then OK (C# c) (plusAddr# p 1#) t
83 | else Fail p t
84 | {-# inline anyChar #-}
85 |
86 | digit :: Parser s e Char
87 | digit = satisfy A.isDigit
88 | {-# inline digit #-}
89 |
90 | space :: Parser s e Char
91 | space = satisfy A.isSpace
92 | {-# inline space #-}
93 |
94 | skipSpace :: Parser s e ()
95 | skipSpace = skipWhile A.isSpace
96 | {-# inline skipSpace #-}
97 |
98 | letter_ascii :: Parser s e Char
99 | letter_ascii = satisfy A.isAlpha_ascii
100 | {-# inline letter_ascii #-}
101 |
102 | letter_iso8859_15:: Parser s e Char
103 | letter_iso8859_15 = satisfy A.isAlpha_iso8859_15
104 | {-# inline letter_iso8859_15 #-}
105 |
106 | scan :: (Char -> Bool) -> Addr# -> State# s -> (# State# s, Addr# #)
107 | scan f = go where
108 | go p s = case readCharOffAddr# p 0# s of
109 | (# t, c #) -> if isTrue# (chr# 0# `neChar#` c) && f (C# c)
110 | then (# t, p #)
111 | else scan f (plusAddr# p 1#) t
112 | {-# inline scan #-}
113 |
114 | skipWhile :: (Char -> Bool) -> Parser s e ()
115 | skipWhile f = Parser \p s -> case scan f p s of
116 | (# t, q #) -> OK () q t
117 | {-# inline [1] skipWhile #-}
118 |
119 | {-# RULES
120 | "skipWhile (x/=)" forall x.
121 | skipWhile (x `neChar`) = skipTillChar x
122 | "skipWhile (/=x)" forall x.
123 | skipWhile (`neChar` x) = skipTillChar x
124 | #-}
125 |
126 | skipTill :: (Char -> Bool) -> Parser s e ()
127 | skipTill p = skipWhile (not . p)
128 | {-# inline [1] skipTill #-}
129 |
130 | {-# RULES
131 | "skipTill (x==)" forall x.
132 | skipTill (x `eqChar`) = skipTillChar x
133 | "skipWhile (==x)" forall x.
134 | skipWhile (`eqChar` x) = skipTillChar x
135 | #-}
136 |
137 | skipTillSome :: (Char -> Bool) -> Parser s e ()
138 | skipTillSome p = skipWhileSome (not . p)
139 | {-# inline skipTillSome #-}
140 |
141 | foreign import ccall "parsnip.h" strchr0 :: Addr# -> Char# -> IO (Ptr Word8)
142 |
143 | skipTillChar :: Char -> Parser s e ()
144 | skipTillChar (C# c) = Parser $ \p s -> case io (strchr0 p c) s of
145 | (# t, Ptr q #) -> OK () q t
146 | {-# inline skipTillChar #-}
147 |
148 | skipWhileSome :: (Char -> Bool) -> Parser s e ()
149 | skipWhileSome p = satisfy p *> skipWhile p
150 | {-# inline skipWhileSome #-}
151 |
152 | while :: KnownBase s => (Char -> Bool) -> Parser s e ByteString
153 | while f = snipping (skipWhile f)
154 | {-# inline while #-}
155 |
156 | till :: KnownBase s => (Char -> Bool) -> Parser s e ByteString
157 | till p = snipping (skipTill p)
158 | {-# inline till #-}
159 |
160 | tillChar :: KnownBase s => Char -> Parser s e ByteString
161 | tillChar c = snipping (skipTillChar c)
162 | {-# inline tillChar #-}
163 |
164 | whileSome :: KnownBase s => (Char -> Bool) -> Parser s e ByteString
165 | whileSome p = snipping (skipWhileSome p)
166 | {-# inline whileSome #-}
167 |
168 | tillSome :: KnownBase s => (Char -> Bool) -> Parser s e ByteString
169 | tillSome p = snipping (skipTillSome p)
170 | {-# inline tillSome #-}
171 |
172 | -- Peek at the previous character. Always succeeds.
173 | previousChar :: forall s e. KnownBase s => Parser s e (Maybe Char)
174 | previousChar = case reflectBase @s of
175 | !(Base _ _ l _) -> Parser \p s ->
176 | if isTrue# (ltAddr# l p)
177 | then case readCharOffAddr# p (-1#) s of
178 | (# t, c #) -> OK (Just (C# c)) p t
179 | else OK Nothing p s
180 |
181 | -- Peek at the previous character. Fails if we're at the start of input.
182 | previousChar' :: forall s e. KnownBase s => Parser s e Char
183 | previousChar' = case reflectBase @s of
184 | !(Base _ _ l _) -> Parser \p s ->
185 | if isTrue# (ltAddr# l p)
186 | then case readCharOffAddr# p (-1#) s of
187 | (# t, c #) -> OK (C# c) p t
188 | else Fail p s
189 |
--------------------------------------------------------------------------------
/src/Text/Parsnip/Internal/Mark.hs:
--------------------------------------------------------------------------------
1 | {-# language MagicHash #-}
2 | {-# language TypeApplications #-}
3 | {-# language ScopedTypeVariables #-}
4 | {-# language PatternSynonyms #-}
5 | {-# language BlockArguments #-}
6 | {-# language BangPatterns #-}
7 | {-# language UnboxedTuples #-}
8 | module Text.Parsnip.Internal.Mark
9 | ( Mark(Mark,Mk)
10 | , minusMark
11 | , mark, release
12 | , snip, snipping
13 | ) where
14 |
15 | import Data.ByteString as B
16 | import Data.Word
17 | import GHC.Arr
18 | import GHC.Prim
19 | import GHC.Ptr
20 | import GHC.Types
21 | import Text.Parsnip.Internal.Parser
22 | import Text.Parsnip.Internal.Private
23 |
24 | ---------------------------------------------------------------------------------------
25 | -- * Marks
26 | ---------------------------------------------------------------------------------------
27 |
28 | newtype Mark s = Mark (Ptr Word8) -- unexposed, so known valid addresses
29 | deriving (Eq,Ord,Show)
30 |
31 | pattern Mk :: Addr# -> Mark s
32 | pattern Mk a = Mark (Ptr a)
33 | {-# complete Mk #-} -- if only...
34 |
35 | instance KnownBase s => Bounded (Mark s) where
36 | minBound = Mk (start @s)
37 | maxBound = Mk (end @s)
38 | {-# inline minBound #-}
39 | {-# inline maxBound #-}
40 |
41 | instance KnownBase s => Enum (Mark s) where
42 | fromEnum p = minusMark p minBound
43 | toEnum = case reflectBase @s of
44 | !(Base _ _ l h) -> \(I# i) -> if isTrue# (0# <=# i) && isTrue# (i <=# minusAddr# h l)
45 | then Mk (plusAddr# l i)
46 | else error "Mark.toEnum: Out of bounds"
47 | succ (Mk p) = if isTrue# (ltAddr# p (end @s))
48 | then Mk (plusAddr# p 1#)
49 | else error "Mark.succ: Out of bounds"
50 | pred (Mk p) = if isTrue# (ltAddr# (start @s) p)
51 | then Mk (plusAddr# p (negateInt# 1#))
52 | else error "Mark.pred: Out of bounds"
53 | enumFrom (Mk p) = ptrs1 p (end @s)
54 | enumFromTo (Mk p) (Mk q) = ptrs1 p q
55 | enumFromThen = case reflectBase @s of
56 | !(Base _ _ l h) -> \(Mk p) (Mk q) -> if isTrue# (gtAddr# p q)
57 | then dptrs p (minusAddr# q p) l
58 | else ptrs p (minusAddr# q p) h
59 | enumFromThenTo (Mk p) (Mk q) (Mk r) = if isTrue# (gtAddr# p q)
60 | then dptrs p (minusAddr# q p) r
61 | else ptrs p (minusAddr# q p) r
62 | {-# inline fromEnum #-}
63 | {-# inline toEnum #-}
64 | {-# inline succ #-}
65 | {-# inline pred #-}
66 | {-# inline enumFrom #-}
67 | {-# inline enumFromTo #-}
68 | {-# inline enumFromThen #-}
69 | {-# inline enumFromThenTo #-}
70 |
71 | instance Ix (Mark s) where
72 | range (Mk p, Mk q) = ptrs1 p q
73 | unsafeIndex (p,_) r = minusMark r p
74 | inRange (Mk p, Mk q) (Mk r) = isTrue# (leAddr# p r) && isTrue# (leAddr# r q)
75 | unsafeRangeSize = uncurry minusMark
76 | {-# inline range #-}
77 | {-# inline unsafeIndex #-}
78 | {-# inline inRange #-}
79 | {-# inline unsafeRangeSize #-}
80 |
81 | ptrs1 :: Addr# -> Addr# -> [Mark s]
82 | ptrs1 l h
83 | | isTrue# (leAddr# l h) = Mk l : ptrs1 (plusAddr# l 1#) h
84 | | otherwise = []
85 | {-# inline ptrs1 #-}
86 |
87 | ptrs :: Addr# -> Int# -> Addr# -> [Mark s]
88 | ptrs l d h
89 | | isTrue# (leAddr# l h) = Mk l : ptrs (plusAddr# l d) d h
90 | | otherwise = []
91 | {-# inline ptrs #-}
92 |
93 | dptrs :: Addr# -> Int# -> Addr# -> [Mark s]
94 | dptrs h d l
95 | | isTrue# (leAddr# l h) = Mark (Ptr h) : ptrs (plusAddr# h d) d l
96 | | otherwise = []
97 | {-# inline dptrs #-}
98 |
99 | minusMark :: Mark s -> Mark s -> Int
100 | minusMark (Mk p) (Mk q) = I# (minusAddr# p q)
101 | {-# inline minusMark #-}
102 |
103 | -- | Record the current position
104 | mark :: Parser s e (Mark s)
105 | mark = Parser \p s -> OK (Mk p) p s
106 | {-# inline mark #-}
107 |
108 | -- | Return to a previous location.
109 | release :: Mark s -> Parser s e ()
110 | release (Mk q) = Parser \_ s -> OK () q s
111 | {-# inline release #-}
112 |
113 | -- | To grab all the text covered by a given parser, consider using @snipping@
114 | -- and applying it to a combinator simply recognizes the content rather than returns
115 | -- it. 'snipping' a 'ByteString' is significantly cheaper than assembling one from
116 | -- smaller fragments.
117 | snip :: forall s. KnownBase s => Mark s -> Mark s -> ByteString
118 | snip = case reflectBase @s of
119 | !(Base x g _ _) -> \(Mk i) (Mk j) ->
120 | if isTrue# (geAddr# i j)
121 | then mkBS x g (minusAddr# i j)
122 | else B.empty
123 | {-# inline snip #-}
124 |
125 | snipping :: forall s e a. KnownBase s => Parser s e a -> Parser s e ByteString
126 | snipping = case reflectBase @s of
127 | !(Base b g r _) -> \(Parser m) -> Parser \p s -> case m p s of
128 | (# o, q, t #) ->
129 | (# setRes
130 | ( if isTrue# (geAddr# q p)
131 | then mkBS (b `plusAddr#` minusAddr# p r) g (minusAddr# q p)
132 | else B.empty
133 | ) o
134 | , q, t #)
135 | {-# inline snipping #-}
136 |
--------------------------------------------------------------------------------
/src/Text/Parsnip/Internal/Parser.hs:
--------------------------------------------------------------------------------
1 | {-# language CPP #-}
2 | {-# language PatternSynonyms #-}
3 | {-# language OverloadedStrings #-}
4 | {-# language MagicHash #-}
5 | {-# language TypeFamilies #-}
6 | {-# language UnboxedSums #-}
7 | {-# language StandaloneDeriving #-}
8 | {-# language UnboxedTuples #-}
9 | {-# language ImplicitParams #-}
10 | {-# language ConstraintKinds #-}
11 | {-# language LambdaCase #-}
12 | {-# language ScopedTypeVariables #-}
13 | {-# language RankNTypes #-}
14 | {-# language BangPatterns #-}
15 | {-# language ForeignFunctionInterface #-}
16 | {-# language KindSignatures #-}
17 | {-# language UnliftedFFITypes #-}
18 | {-# language TypeApplications #-}
19 | {-# language AllowAmbiguousTypes #-}
20 | {-# language BlockArguments #-}
21 | {-# language ViewPatterns #-}
22 | {-# language UnboxedTuples #-}
23 | {-# language MagicHash #-}
24 | {-# language PatternSynonyms #-}
25 | {-# language UnliftedNewtypes #-}
26 | {-# options_ghc -O2 #-}
27 |
28 | module Text.Parsnip.Internal.Parser
29 | (
30 | -- * Parser
31 | Parser(..)
32 | , Res(Res#,Good,Bad,Ugly)
33 | , mapRes, setRes
34 | , Result, pattern OK, pattern Fail, pattern Err
35 | , mapResult, setResult
36 | , try
37 | -- * Unsafe literals
38 | , lit, litN, word8
39 | -- * Guts
40 | , Base(..), bytes, start, end
41 | , KnownBase(..)
42 | , parse
43 | ) where
44 |
45 | import Control.Applicative
46 | import Control.Monad
47 | import Control.Monad.Primitive
48 | import qualified Data.ByteString as B
49 | import Data.ByteString.Internal (ByteString(..))
50 | import qualified Data.ByteString.Internal as B
51 | import Data.Primitive.ByteArray
52 | import Data.String
53 | import Foreign.C.Types
54 | import Foreign.ForeignPtr
55 | import GHC.ForeignPtr
56 | import GHC.Prim
57 | import GHC.Ptr
58 | import GHC.Types
59 | import GHC.Word
60 | import System.IO.Unsafe
61 |
62 | import Text.Parsnip.Location
63 | import Text.Parsnip.Internal.Private
64 |
65 | newtype Res e a = Res# (# a | (##) | e #)
66 |
67 | pattern Good :: a -> Res e a
68 | pattern Good a = Res# (# a | | #)
69 |
70 | pattern Bad :: Res e a
71 | pattern Bad = Res# (# | (##) | #)
72 |
73 | -- user error, don't recover
74 | pattern Ugly :: e -> Res e a
75 | pattern Ugly e = Res# (# | | e #)
76 |
77 | {-# complete Good, Bad, Ugly :: Res #-}
78 |
79 | mapRes :: (a -> b) -> Res e a -> Res e b
80 | mapRes f (Good a) = Good $! f a
81 | mapRes _ Bad = Bad
82 | mapRes _ (Ugly e) = Ugly e
83 | {-# inline mapRes #-}
84 |
85 | setRes :: b -> Res e a -> Res e b
86 | setRes b (Good _) = Good b
87 | setRes _ Bad = Bad
88 | setRes _ (Ugly e) = Ugly e
89 | {-# inline setRes #-}
90 |
91 | --------------------------------------------------------------------------------
92 | -- * Result
93 | --------------------------------------------------------------------------------
94 |
95 | type Result s e a = (# Res e a, Addr#, State# s #)
96 |
97 | pattern OK :: a -> Addr# -> State# s -> Result s e a
98 | pattern OK a p s = (# Good a, p, s #)
99 |
100 | pattern Fail :: Addr# -> State# s -> Result s e a
101 | pattern Fail p s = (# Bad, p, s #)
102 |
103 | pattern Err :: e -> Addr# -> State# s -> Result s e a
104 | pattern Err e p s = (# Ugly e, p, s #)
105 |
106 | {-# complete OK, Fail, Err #-}
107 |
108 | mapResult :: (a -> b) -> Result s e a -> Result s e b
109 | mapResult f (# o, p, s #) = (# mapRes f o, p, s #)
110 | {-# inline mapResult #-}
111 |
112 | setResult :: b -> Result s e a -> Result s e b
113 | setResult b (# o, p, s #) = (# setRes b o, p, s #)
114 | {-# inline setResult #-}
115 |
116 | --------------------------------------------------------------------------------
117 | -- * Result
118 | --------------------------------------------------------------------------------
119 |
120 | newtype Parser s e a = Parser
121 | { runParser :: Addr# -> State# s -> Result s e a
122 | }
123 |
124 | instance Functor (Parser s e) where
125 | fmap f (Parser m) = Parser \ p s -> mapResult f (m p s)
126 | {-# inline fmap #-}
127 | b <$ Parser m = Parser \ p s -> case m p s of
128 | OK _ q t -> OK b q t
129 | Fail q t -> Fail q t
130 | Err e q t -> Err e q t
131 | {-# inline (<$) #-}
132 |
133 | instance Applicative (Parser s e) where
134 | pure a = Parser \ p s -> OK a p s
135 | {-# inline pure #-}
136 | Parser m <*> Parser n = Parser \p s -> case m p s of
137 | Fail q t -> Fail q t
138 | OK f q t -> mapResult f (n q t)
139 | Err e q t -> Err e q t
140 | {-# inline (<*>) #-}
141 | Parser m *> Parser n = Parser \p s -> case m p s of
142 | Fail q t -> Fail q t
143 | OK _ q t -> n q t
144 | Err e q t -> Err e q t
145 | {-# inline (*>) #-}
146 | Parser m <* Parser n = Parser \p s -> case m p s of
147 | OK a q t -> setResult a (n q t)
148 | x -> x
149 | {-# inline (<*) #-}
150 |
151 | instance Monad (Parser s e) where
152 | Parser m >>= f = Parser \p s -> case m p s of
153 | Fail q t -> Fail q t
154 | Err e q t -> Err e q t
155 | OK a q t -> runParser (f a) q t
156 | {-# inline (>>=) #-}
157 | (>>) = (*>)
158 | {-# inline (>>) #-}
159 |
160 | instance Alternative (Parser s e) where
161 | Parser m <|> Parser n = Parser \ p s -> case m p s of
162 | Fail _ t -> n p t
163 | OK a q t -> OK a q t
164 | Err _ _ t -> n p t
165 | {-# inline (<|>) #-}
166 | empty = Parser Fail
167 | {-# inline empty #-}
168 |
169 | instance MonadPlus (Parser s e) where
170 | mplus = (<|>)
171 | {-# inline mplus #-}
172 | mzero = empty
173 | {-# inline mzero #-}
174 |
175 | instance PrimMonad (Parser s e) where
176 | type PrimState (Parser s e) = s
177 | primitive f = Parser \p s -> case f s of
178 | (# t, a #) -> OK a p t
179 | {-# inline primitive #-}
180 |
181 | -- perhaps this interface is a little low level. hrmm
182 | instance a ~ ByteString => IsString (Parser s e a) where
183 | fromString "" = pure B.empty
184 | fromString xs = Parser \p s -> case sizeofMutableByteArray# ba of
185 | n -> case io (c_strncmp (mutableByteArrayContents# ba) p (fromIntegral $ I# n)) s of
186 | (# t, i #)
187 | | i /= 0 -> Fail p t
188 | | otherwise -> OK bs (plusAddr# p n) t
189 | where !(MutableByteArray ba) = pinnedByteArrayFromString0 xs
190 | bs = B.PS (ForeignPtr (mutableByteArrayContents# ba) (PlainPtr ba)) 0 (I# (sizeofMutableByteArray# ba))
191 |
192 | try :: Parser s e a -> Parser s e a
193 | try (Parser m) = Parser $ \p s -> case m p s of
194 | OK a q t -> OK a q t
195 | Fail _ t -> Fail p t
196 | Err e _ t -> Err e p t
197 |
198 | word8 :: Word8 -> Parser s e Word8
199 | word8 0 = empty
200 | word8 r@(W8# c) = Parser \p s -> case readWord8OffAddr# p 0# s of
201 | (# t, c' #) -> if isTrue# (c `eqWord#` c')
202 | then OK r (plusAddr# p 1#) t
203 | else Fail p t
204 | {-# inline word8 #-}
205 |
206 | ---------------------------------------------------------------------------------------
207 | -- * Super-unsafe literal parsers
208 | ---------------------------------------------------------------------------------------
209 |
210 | -- | super-duper unsafe. Fabricates bytestrings that directly reference constant memory
211 | litN :: Addr# -> CSize -> Parser s e ByteString
212 | litN q n = Parser \p s -> case io (c_strncmp p q n) s of
213 | (# t, 0 #) -> OK bs (p `plusAddr#` csize n) t
214 | (# t, _ #) -> Fail p t
215 | where bs = unsafeLiteralByteStringN q n
216 |
217 | -- | Super unsafe. Fabricates a bytestring that directly reference constant memory.
218 | --
219 | -- Usage:
220 | --
221 | -- @
222 | -- hello = lit "hello"#
223 | -- @
224 | lit :: Addr# -> Parser s e ByteString
225 | lit q = litN q (pure_strlen q)
226 |
227 | literalForeignPtrContents :: ForeignPtrContents
228 | literalForeignPtrContents = unsafeDupablePerformIO $ primitive \s -> case newByteArray# 0# s of
229 | (# t, a #) -> (# t, PlainPtr a #)
230 | -- {-# noinline literalForeignPtrContents #-}
231 |
232 | unsafeLiteralForeignPtr :: Addr# -> ForeignPtr Word8
233 | unsafeLiteralForeignPtr addr = ForeignPtr addr literalForeignPtrContents
234 |
235 | unsafeLiteralByteStringN :: Addr# -> CSize -> ByteString
236 | unsafeLiteralByteStringN p n = BS (unsafeLiteralForeignPtr p) (fromIntegral n)
237 | {-# noinline unsafeLiteralByteStringN #-}
238 |
239 | --unsafeLiteralByteString :: Addr# -> ByteString
240 | --unsafeLiteralByteString p = unsafeLiteralByteStringN p (pure_strlen p)
241 |
242 | -- Given a 'Base' you can do two things with it. While in a Parser, you're allowed to
243 | -- access the memory between the start and end addresses, as they'll be alive.
244 | --
245 | -- However, you can always reconstruct a bytestring from the oriignal (non-0 terminated
246 | -- data using 'bytes', and that will remain valid forever or until appropriately
247 | -- garbage collected.
248 | --
249 | -- In general, in a Parser you should try to access the memory in the null-terminated
250 | -- region for cache locality.
251 | --
252 | -- Afterwards, or to report bytestrings, you should trim them off the original, this
253 | -- way, no additional memory needs to be copied, and the garbage collector will just
254 | -- manage the storage of the bytestrings you cut off of the parent for you.
255 |
256 | data Base s = Base
257 | { baseOriginal :: Addr# -- the start of a valid bytestring
258 | , baseContents :: ForeignPtrContents -- memory management for that bytestring
259 | , baseStart :: Addr# -- the start of our null terminated copy of the bytestring
260 | , baseEnd :: Addr# -- the end of our null terminated copy (points to the '\0')
261 | }
262 |
263 | bytes :: forall s. KnownBase s => ByteString
264 | bytes = case reflectBase @s of
265 | !(Base b g p q) -> mkBS b g (minusAddr# q p)
266 | {-# inline bytes #-}
267 |
268 | start :: forall s. KnownBase s => Addr#
269 | start = baseStart (reflectBase @s)
270 | {-# inline start #-}
271 |
272 | end :: forall s. KnownBase s => Addr#
273 | end = baseEnd (reflectBase @s)
274 | {-# inline end #-}
275 |
276 | class KnownBase (s :: Type) where
277 | reflectBase :: Base s
278 |
279 | --------------------------------------------------------------------------------
280 | -- * Parsing
281 | --------------------------------------------------------------------------------
282 |
283 | parse :: (forall s. KnownBase s => Parser s e a) -> ByteString -> Either (Location, Maybe e) a
284 | parse m bs@(B.BS (ForeignPtr b g) (I# len)) = unsafeDupablePerformIO $
285 | B.useAsCString bs \(Ptr p) -> -- now it is null terminated
286 | IO \s -> let base = Base b g p (plusAddr# p len) in
287 | case runParser (withBase (\_ -> m) base proxy#) p s of
288 | (# n, q, t #) -> (# t, finish base q n #)
289 |
290 | finish :: Base s -> Addr# -> Res e a -> Either (Location, Maybe e) a
291 | finish (Base b g q r) p = \case
292 | Good a -> Right a
293 | Bad -> Left (location (mkBS b g (minusAddr# r q)) (I# (minusAddr# p q)), Nothing)
294 | Ugly e -> Left (location (mkBS b g (minusAddr# r q)) (I# (minusAddr# p q)), Just e)
295 | {-# inline finish #-}
296 |
297 | data Wrap s e a = Wrap (KnownBase s => Proxy# s -> Parser s e a)
298 |
299 | withBase :: (KnownBase s => Proxy# s -> Parser s e a) -> Base s -> Proxy# s -> Parser s e a
300 | withBase f x y = magicDict (Wrap f) x y
301 | {-# inline withBase #-}
302 |
--------------------------------------------------------------------------------
/src/Text/Parsnip/Internal/Private.hs:
--------------------------------------------------------------------------------
1 | {-# language MagicHash #-}
2 | {-# language UnboxedTuples #-}
3 | {-# language BlockArguments #-}
4 | {-# language BangPatterns #-}
5 | {-# language ViewPatterns #-}
6 | {-# language UnliftedFFITypes #-}
7 | {-# language UnliftedNewtypes #-}
8 |
9 | -- | the proverbial junk drawer
10 | module Text.Parsnip.Internal.Private
11 | ( io
12 | , mutableByteArrayContents#
13 | , pinnedByteArrayFromString0
14 | , pinnedByteArrayFromStringN0
15 | , c_memchr
16 | , c_strlen
17 | , c_strncmp
18 | , pure_strlen
19 | , cint
20 | , csize
21 | , mkBS
22 | , ForeignString(..)
23 | , packForeignString
24 | , withForeignString
25 | ) where
26 |
27 | import Data.Primitive.ByteArray
28 | import Data.Primitive.PrimArray
29 | import Data.Primitive.Ptr
30 | import Data.ByteString.Internal (ByteString(..))
31 | import qualified Data.ByteString.Internal as B
32 | import Data.String
33 | import Data.Word
34 | import Foreign.ForeignPtr
35 | import Foreign.C.String
36 | import Foreign.C.Types
37 | import GHC.ForeignPtr
38 | import GHC.Prim
39 | import GHC.Ptr
40 | import GHC.Types
41 | import System.IO.Unsafe
42 | import Unsafe.Coerce
43 |
44 | io :: IO a -> State# s -> (# State# s, a #)
45 | io = unsafeCoerce#
46 |
47 | -- | Missing primitive
48 | mutableByteArrayContents# :: MutableByteArray# s -> Addr#
49 | mutableByteArrayContents# arr = byteArrayContents# (unsafeCoerce# arr)
50 |
51 | pinnedByteArrayFromString0 :: String -> MutableByteArray RealWorld
52 | pinnedByteArrayFromString0 xs = pinnedByteArrayFromStringN0 (length xs) xs
53 |
54 | pinnedByteArrayFromStringN0 :: Int -> String -> MutableByteArray RealWorld
55 | pinnedByteArrayFromStringN0 n ys = unsafeDupablePerformIO do
56 | marr <- newPinnedByteArray (n+1)
57 | let go !ix [] = if ix == n
58 | then writeByteArray marr ix (0 :: Word8)
59 | else fail "pinnedByteArrayFromStringN: list length less than specified size"
60 | go !ix (x : xs) = if ix < n
61 | then do
62 | writeByteArray marr ix (B.c2w x)
63 | go (ix + 1) xs
64 | else fail "pinnedByteArrayFromStringN: list length greater than specified size"
65 | go 0 ys
66 | pure marr
67 |
68 | -- | An _immutable_ foreign cstring. This is mostly useful for things like calling strstr through ffi
69 | -- where the needle needs to be null terminated.
70 | newtype ForeignString = ForeignString (ForeignPtr Word8)
71 | deriving (Eq,Ord)
72 |
73 | instance Show ForeignString where
74 | showsPrec d (ForeignString fp) = showsPrec d $ unsafeDupablePerformIO $ withForeignPtr (castForeignPtr fp) peekCString
75 |
76 | instance IsString ForeignString where
77 | fromString s = unsafeDupablePerformIO $ do
78 | cstr <- newCString s
79 | ForeignString <$> newForeignPtr_ (castPtr cstr)
80 |
81 | packForeignString :: ByteString -> ForeignString
82 | packForeignString (PS _fp (plusForeignPtr _fp -> fp) n) = unsafeDupablePerformIO do
83 | MutableByteArray mba <- newPinnedByteArray (n+1)
84 | let mpa = MutablePrimArray mba :: MutablePrimArray RealWorld Word8 -- See haskell/primitive#253
85 | withForeignPtr fp $ \p -> copyPtrToMutablePrimArray mpa 0 p n
86 | writePrimArray mpa 0 (0 :: Word8) -- null terminate
87 | -- PrimArray ba <- unsafeFreezePrimArray mpa
88 | pure $ ForeignString $ ForeignPtr (mutableByteArrayContents# mba) (PlainPtr mba)
89 |
90 | withForeignString :: ForeignString -> (CString -> IO r) -> IO r
91 | withForeignString (ForeignString fp) = withForeignPtr (castForeignPtr fp)
92 |
93 | ---------------------------------------------------------------------------------------
94 | -- * C
95 | ---------------------------------------------------------------------------------------
96 |
97 | foreign import ccall unsafe "string.h memchr" c_memchr :: Addr# -> CInt -> CSize -> IO (Ptr ())
98 | foreign import ccall unsafe "string.h strncmp" c_strncmp :: Addr# -> Addr# -> CSize -> IO CInt
99 | foreign import ccall unsafe "string.h strlen" c_strlen :: Addr# -> IO CSize
100 | foreign import ccall unsafe "string.h strlen" pure_strlen :: Addr# -> CSize
101 |
102 | cint :: CInt -> Int#
103 | cint (fromIntegral -> I# i) = i
104 | {-# inline cint #-}
105 |
106 | csize :: CSize -> Int#
107 | csize (fromIntegral -> I# i) = i
108 | {-# inline csize #-}
109 |
110 | mkBS :: Addr# -> ForeignPtrContents -> Int# -> ByteString
111 | mkBS b g l = BS (ForeignPtr b g) (I# l)
112 | {-# inline mkBS #-}
113 |
--------------------------------------------------------------------------------
/src/Text/Parsnip/Internal/Simple.hs:
--------------------------------------------------------------------------------
1 | {-# language TypeApplications #-}
2 | {-# language BlockArguments #-}
3 | {-# language ScopedTypeVariables #-}
4 | {-# language BangPatterns #-}
5 | {-# language MagicHash #-}
6 | module Text.Parsnip.Internal.Simple
7 | ( SimpleResult(..)
8 | , relative
9 | , absolute
10 | ) where
11 |
12 | import Data.ByteString
13 | import GHC.Prim
14 | import GHC.Types
15 | import Text.Parsnip.Internal.Parser
16 | import Text.Parsnip.Internal.Private
17 |
18 | --------------------------------------------------------------------------------
19 | -- * Simple parsers
20 | --------------------------------------------------------------------------------
21 |
22 | data SimpleResult e a
23 | = SimpleOK a {-# unpack #-} !Int
24 | | SimpleFail {-# unpack #-} !Int
25 | | SimpleErr e {-# unpack #-} !Int
26 |
27 | relative :: forall s e a. KnownBase s => (ByteString -> SimpleResult e a) -> Parser s e a
28 | relative = case reflectBase @s of
29 | !(Base x g q r) -> \f -> Parser \p s -> case f $ mkBS (x `plusAddr#` minusAddr# p q) g (minusAddr# r p) of
30 | SimpleOK a (I# i) -> OK a (plusAddr# p i) s
31 | SimpleFail (I# i) -> Fail (plusAddr# p i) s
32 | SimpleErr e (I# i) -> Err e (plusAddr# p i) s
33 | {-# inline relative #-}
34 |
35 | absolute :: forall s e a. KnownBase s => (ByteString -> Int -> SimpleResult e a) -> Parser s e a
36 | absolute = case reflectBase @s of
37 | !(Base x g q r) -> \f -> Parser \p s -> case f (mkBS x g (minusAddr# r q)) $ I# (minusAddr# p q) of
38 | SimpleOK a (I# i) -> OK a (plusAddr# p i) s
39 | SimpleFail (I# i) -> Fail (plusAddr# p i) s
40 | SimpleErr e (I# i) -> Err e (plusAddr# p i) s
41 | {-# inline absolute #-}
42 |
--------------------------------------------------------------------------------
/src/Text/Parsnip/Location.hs:
--------------------------------------------------------------------------------
1 | module Text.Parsnip.Location
2 | ( location
3 | , Location(..)
4 | , located
5 | ) where
6 |
7 | import Data.ByteString (ByteString)
8 | import qualified Data.ByteString.Char8 as B
9 | import Data.Maybe
10 |
11 | -- | Deliberately lazy, so we don't bother to compute the
12 | -- exact line and column until forced.
13 | data Location = Location
14 | { locationLine :: Int
15 | , locationColumn :: Int
16 | , locationSource :: ByteString
17 | } deriving Show
18 |
19 | -- | /O(n)/ in the size of the input bytestring to actually use one.
20 | -- If used primarily for error location reporting, then this probably
21 | -- puts the burden in the right place.
22 | location :: ByteString -> Int -> Location
23 | location bs j = Location (B.count '\n' before) (j - k) $ B.takeWhile (/='\n') $ B.drop k bs where
24 | before = B.take j bs
25 | k = fromMaybe 0 $ B.elemIndexEnd '\n' before
26 | {-# inline location #-}
27 |
28 | -- | Use this til we get a pretty printer in here
29 | located :: Location -> String -> String
30 | located (Location l c bs) msg = Prelude.unlines
31 | [ show l ++ ":" ++ show c ++ " " ++ msg
32 | , ls
33 | , show l ++ " | " ++ B.unpack bs
34 | , ls ++ Prelude.replicate c ' ' ++ "^"
35 | ] where ls = Prelude.replicate (length (show l) + 1) ' ' ++ "|"
36 |
--------------------------------------------------------------------------------
/src/Text/Parsnip/Parser.hs:
--------------------------------------------------------------------------------
1 | {-# language BlockArguments #-}
2 | {-# language MagicHash #-}
3 | {-# language UnboxedTuples #-}
4 | {-# language ScopedTypeVariables #-}
5 | {-# language UnliftedFFITypes #-}
6 | {-# language BangPatterns #-}
7 | {-# language RankNTypes #-}
8 | {-# language TypeApplications #-}
9 | {-# language LambdaCase #-}
10 | {-# language AllowAmbiguousTypes #-}
11 | {-# language PolyKinds #-}
12 | {-# language CPP #-}
13 | module Text.Parsnip.Parser
14 | ( Parser, KnownBase
15 | , parse
16 | ----------------------------
17 | , try
18 | , atEnd
19 | , endOfInput
20 | ----------------------------
21 | , tillSubstring
22 | , skipTillSubstring
23 | , skip
24 | , skip0
25 | , take
26 | ----------------------------
27 | , Mark
28 | , mark
29 | , release
30 | , snip
31 | , snipping
32 | ----------------------------
33 | , input
34 | , pos
35 | , betwixt
36 | , rest
37 | ----------------------------
38 | , loc
39 | ) where
40 |
41 | import Data.ByteString (ByteString)
42 | import qualified Data.ByteString as B
43 | import qualified Data.ByteString.Internal as B
44 | import qualified Data.ByteString.Unsafe as B
45 | import Foreign.C.Types
46 | import GHC.ForeignPtr
47 | import GHC.Prim
48 | import GHC.Ptr
49 | import GHC.Types
50 | import Prelude hiding (take)
51 | import Text.Parsnip.Internal.Mark
52 | import Text.Parsnip.Internal.Parser
53 | import Text.Parsnip.Internal.Private
54 | import Text.Parsnip.Internal.Simple
55 | import Text.Parsnip.Location
56 |
57 | --------------------------------------------------------------------------------
58 | -- * Combinators
59 | --------------------------------------------------------------------------------
60 |
61 | atEnd :: Parser s e Bool
62 | atEnd = Parser \p s -> case readCharOffAddr# p 0# s of
63 | (# t, c #) -> OK (isTrue# do chr# 0# `eqChar#` c) p t
64 |
65 | endOfInput :: Parser s e ()
66 | endOfInput = Parser \p s -> case readCharOffAddr# p 0# s of
67 | (# t, c #) -> (# if isTrue# do chr# 0# `eqChar#` c then Good () else Bad, p, t #)
68 |
69 | take :: forall s e. KnownBase s => Int -> Parser s e ByteString
70 | take = case reflectBase @s of
71 | !(Base b g q r) -> \(I# i) -> Parser \p s ->
72 | if isTrue# (minusAddr# r p <# i)
73 | then Fail p s
74 | else OK (B.PS (ForeignPtr (b `plusAddr#` minusAddr# p q) g) 0 (I# i)) (plusAddr# p i) s
75 |
76 | -- | We can do this two ways, this way is O(1) but needs KnownBase.
77 | skip :: forall s e. KnownBase s => Int -> Parser s e ()
78 | skip = \(I# i) -> Parser \p s ->
79 | if isTrue# (minusAddr# r p <# i)
80 | then Fail p s
81 | else OK () (plusAddr# p i) s
82 | where r = end @s
83 |
84 | -- | Linear time, but no @KnownBase@ dependency.
85 | skip0 :: Int -> Parser s e ()
86 | skip0 n@(I# i) = Parser \p s -> case io (c_memchr p 0 (fromIntegral n)) s of
87 | (# t, Ptr q #) -> if isTrue# (q `eqAddr#` nullAddr#)
88 | then OK () (plusAddr# p i) t
89 | else Fail p s
90 |
91 | tillSubstring :: KnownBase s => ByteString -> Parser s e ByteString
92 | tillSubstring needle = relative \bs -> case p bs of
93 | (r, _) -> SimpleOK r (B.length r)
94 | where p = B.breakSubstring needle
95 |
96 | foreign import ccall unsafe "string.h" strstr :: Addr# -> Addr# -> IO (Ptr ())
97 | foreign import ccall unsafe "string.h" strlen :: Addr# -> IO CSize
98 |
99 | skipTillSubstring :: ByteString -> Parser s e ()
100 | skipTillSubstring bneedle = case B.length bneedle of
101 | 0 -> pure ()
102 | 1 -> () <$ word8 (B.unsafeHead bneedle)
103 | _ -> let fneedle = packForeignString bneedle
104 | in Parser \p s -> case io
105 | ( withForeignString fneedle \(Ptr cneedle)->
106 | strstr p cneedle >>= \q -> if q == nullPtr
107 | then plusPtr (Ptr p) . fromIntegral <$> strlen p
108 | else pure q
109 | ) s of (# t, Ptr r #) -> OK () r t
110 |
111 |
112 | --skipTillSubstring :: ByteString -> Parser s e ()
113 | --skipTillSubstring needle = relative \bs -> case p bs of
114 | -- (r, _) -> SimpleOK r (B.length r)
115 | -- where p = B.breakSubstring needle
116 |
117 |
118 | -- | @input = snip minBound maxBound@
119 | input :: KnownBase s => Parser s e ByteString
120 | input = absolute \b _ -> SimpleOK b 0
121 |
122 | -- | @rest = mark >>= \p -> snip p maxBound@
123 | rest :: KnownBase s => Parser s e ByteString
124 | rest = relative \b -> SimpleOK b 0
125 |
126 | -- | 'snip' is a smidge faster, easier to type, if less fun to say, and
127 | -- doesn't need you to fiddle with explicit type application to actually
128 | -- apply.
129 | --
130 | -- The benefit of this combinator is that it is easy to come up with numbers
131 | -- of bytes into a file, and this combinator will automatically trim the
132 | -- result to the actual range of bytes available, whereas constructing an
133 | -- illegal 'Mark' will error in 'toEnum'/'fromEnum'/'succ' or whatever other
134 | -- combinator tries to produce one out of range to maintain the invariant
135 | -- that a mark is always a well formed location in the content.
136 | betwixt :: forall s. KnownBase s => Int -> Int -> ByteString
137 | betwixt i j = B.take (j-i) $ B.drop i $ bytes @s
138 |
139 | -- | 'mark' is generally faster
140 | pos :: forall s e. KnownBase s => Parser s e Int
141 | pos = Parser \ p s -> OK (I# (minusAddr# p (start @s))) p s
142 | {-# inline pos #-}
143 |
144 | loc :: KnownBase s => Parser s e Location
145 | loc = markLocation <$> mark
146 | {-# inline loc #-}
147 |
148 | -- | Actually looking at one of these is pretty slow, as it has to do a linear
149 | -- scan to figure out its line number for display.
150 | markLocation :: forall s. KnownBase s => Mark s -> Location
151 | markLocation (Mark (Ptr p)) = location (bytes @s) (I# (minusAddr# p (start @s)))
152 | {-# inline markLocation #-}
153 |
--------------------------------------------------------------------------------
/src/Text/Parsnip/Word8.hs:
--------------------------------------------------------------------------------
1 | {-# language MagicHash #-}
2 | {-# language BlockArguments #-}
3 | {-# language UnboxedTuples #-}
4 | {-# language BangPatterns #-}
5 | {-# language TypeApplications #-}
6 | {-# language NegativeLiterals #-}
7 | {-# language UnliftedFFITypes #-}
8 | {-# language ScopedTypeVariables #-}
9 | {-# language ForeignFunctionInterface #-}
10 |
11 | -- | Note: @parsnip@ will still be assuming that the input is null terminated
12 | -- even if you use these combinators.
13 | module Text.Parsnip.Word8
14 | ( satisfy
15 | , word8
16 | , notWord8
17 | , anyWord8
18 | , while, whileSome
19 | , till, tillSome, tillWord8
20 | , skipWhile, skipWhileSome
21 | , skipTill, skipTillSome, skipTillWord8
22 | , previousWord8, previousWord8'
23 | , nextWord8, nextWord8'
24 | ) where
25 |
26 | import Data.ByteString (ByteString)
27 | import Data.Word
28 | import GHC.Prim
29 | import GHC.Ptr
30 | import GHC.Types
31 | import GHC.Word
32 | import Text.Parsnip.Internal.Parser
33 | import Text.Parsnip.Internal.Private
34 | import Text.Parsnip.Parser
35 |
36 | --------------------------------------------------------------------------------
37 | -- * Word8 parsers
38 | --------------------------------------------------------------------------------
39 |
40 | satisfy :: (Word8 -> Bool) -> Parser s e Word8
41 | satisfy f = Parser \p s -> case readWord8OffAddr# p 0# s of
42 | (# t, c #) -> if isTrue# (0## `neWord#` c) && f (W8# c)
43 | then OK (W8# c) (plusAddr# p 1#) t
44 | else Fail p t
45 | {-# inline satisfy #-}
46 |
47 | notWord8 :: Word8 -> Parser s e Word8
48 | notWord8 0 = anyWord8
49 | notWord8 (W8# c) = Parser \p s -> case readWord8OffAddr# p 0# s of
50 | (# t, c' #) -> if isTrue# (0## `neWord#` c') && isTrue# (c `neWord#` c')
51 | then OK (W8# c') (plusAddr# p 1#) t
52 | else Fail p t
53 | {-# inline notWord8 #-}
54 |
55 | nextWord8 :: Parser s e (Maybe Word8)
56 | nextWord8 = Parser \p s -> case readWord8OffAddr# p 0# s of
57 | (# t, c #) -> OK (if isTrue# (0## `neWord#` c) then Just (W8# c) else Nothing) p t
58 | {-# inline nextWord8 #-}
59 |
60 | nextWord8' :: Parser s e Word8
61 | nextWord8' = Parser \p s -> case readWord8OffAddr# p 0# s of
62 | (# t, c #) -> if isTrue# (0## `neWord#` c)
63 | then OK (W8# c) p t
64 | else Fail p t
65 | {-# inline nextWord8' #-}
66 |
67 | anyWord8 :: Parser s e Word8
68 | anyWord8 = Parser \p s -> case readWord8OffAddr# p 0# s of
69 | (# t, c #) -> if isTrue# (0## `neWord#` c)
70 | then OK (W8# c) (plusAddr# p 1#) t
71 | else Fail p t
72 | {-# inline anyWord8 #-}
73 |
74 | scan :: (Word8 -> Bool) -> Addr# -> State# s -> (# State# s, Addr# #)
75 | scan f = go where
76 | go p s = case readWord8OffAddr# p 0# s of
77 | (# t, c #) -> if isTrue# (0## `neWord#` c) && f (W8# c)
78 | then (# t, p #)
79 | else scan f (plusAddr# p 1#) t
80 | {-# inline scan #-}
81 |
82 | skipWhile :: (Word8 -> Bool) -> Parser s e ()
83 | skipWhile f = Parser \p s -> case scan f p s of
84 | (# t, q #) -> OK () q t
85 | {-# inline [1] skipWhile #-}
86 |
87 | {-# RULES
88 | "skipWhile (x/=)" forall x.
89 | skipWhile (x `neWord8`) = skipTillWord8 x
90 | "skipWhile (/=x)" forall x.
91 | skipWhile (`neWord8` x) = skipTillWord8 x
92 | #-}
93 |
94 | skipTill :: (Word8 -> Bool) -> Parser s e ()
95 | skipTill p = skipWhile (not . p)
96 | {-# inline [1] skipTill #-}
97 |
98 | {-# RULES
99 | "skipTill (x==)" forall x.
100 | skipTill (x `eqWord8`) = skipTillWord8 x
101 | "skipWhile (==x)" forall x.
102 | skipWhile (`eqWord8` x) = skipTillWord8 x
103 | #-}
104 |
105 | skipTillSome :: (Word8 -> Bool) -> Parser s e ()
106 | skipTillSome p = skipWhileSome (not . p)
107 | {-# inline skipTillSome #-}
108 |
109 | foreign import ccall "parsnip.h" strchr0 :: Addr# -> Char# -> IO (Ptr Word8) -- lazy reimport is lazy
110 |
111 | skipTillWord8 :: Word8 -> Parser s e ()
112 | skipTillWord8 (W8# c) = Parser $ \p s -> case io (strchr0 p (chr# (word2Int# c))) s of -- lazy cast is lazy
113 | (# t, Ptr q #) -> OK () q t
114 | {-# inline skipTillWord8 #-}
115 |
116 | skipWhileSome :: (Word8 -> Bool) -> Parser s e ()
117 | skipWhileSome p = satisfy p *> skipWhile p
118 | {-# inline skipWhileSome #-}
119 |
120 | while :: KnownBase s => (Word8 -> Bool) -> Parser s e ByteString
121 | while f = snipping (skipWhile f)
122 | {-# inline while #-}
123 |
124 | till :: KnownBase s => (Word8 -> Bool) -> Parser s e ByteString
125 | till p = snipping (skipTill p)
126 | {-# inline till #-}
127 |
128 | tillWord8 :: KnownBase s => Word8 -> Parser s e ByteString
129 | tillWord8 c = snipping (skipTillWord8 c)
130 | {-# inline tillWord8 #-}
131 |
132 | whileSome :: KnownBase s => (Word8 -> Bool) -> Parser s e ByteString
133 | whileSome p = snipping (skipWhileSome p)
134 | {-# inline whileSome #-}
135 |
136 | tillSome :: KnownBase s => (Word8 -> Bool) -> Parser s e ByteString
137 | tillSome p = snipping (skipTillSome p)
138 | {-# inline tillSome #-}
139 |
140 | -- | Peek at the previous character. Always succeeds.
141 | previousWord8 :: forall s e. KnownBase s => Parser s e (Maybe Word8)
142 | previousWord8 = case reflectBase @s of
143 | !(Base _ _ l _) -> Parser \p s ->
144 | if isTrue# (ltAddr# l p)
145 | then case readWord8OffAddr# p (-1#) s of
146 | (# t, c #) -> OK (Just (W8# c)) p t
147 | else OK Nothing p s
148 | {-# inline previousWord8 #-}
149 |
150 | -- | Peek at the previous character. Fails if we're at the start of input.
151 | previousWord8' :: forall s e. KnownBase s => Parser s e Word8
152 | previousWord8' = case reflectBase @s of
153 | !(Base _ _ l _) -> Parser \p s ->
154 | if isTrue# (ltAddr# l p)
155 | then case readWord8OffAddr# p (-1#) s of
156 | (# t, c #) -> OK (W8# c) p t
157 | else Fail p s
158 | {-# inline previousWord8' #-}
159 |
--------------------------------------------------------------------------------
/src/Text/Parsnip/Word8/Binary.hs:
--------------------------------------------------------------------------------
1 | {-# language MagicHash #-}
2 | {-# language BlockArguments #-}
3 | {-# language UnboxedTuples #-}
4 | {-# language BangPatterns #-}
5 | {-# language TypeApplications #-}
6 | {-# language NegativeLiterals #-}
7 | {-# language UnliftedFFITypes #-}
8 | {-# language ScopedTypeVariables #-}
9 | {-# language ForeignFunctionInterface #-}
10 |
11 | -- | These combinators allow handling of embedded null characters.
12 | --
13 | -- However, they do assume that the string is null terminated to
14 | -- accelerate access, and to properly intermix with other combinators
15 | -- which means we still have to do an initial copy.
16 | --
17 | -- Use 'binaryEof' instead of 'eof' to check for end-of-file, as
18 | -- it understands embdded nulls.
19 | module Text.Parsnip.Word8.Binary
20 | ( satisfy
21 | , word8
22 | , notWord8
23 | , anyWord8
24 | , while, whileSome
25 | , till, tillSome, tillWord8
26 | , skipWhile, skipWhileSome
27 | , skipTill, skipTillSome, skipTillWord8
28 | , previousWord8, previousWord8'
29 | , nextWord8, nextWord8'
30 | , binaryEof
31 | ) where
32 |
33 | import Data.ByteString (ByteString)
34 | import Data.Word
35 | import Foreign.C.Types
36 | import GHC.Prim
37 | import GHC.Ptr
38 | import GHC.Types
39 | import GHC.Word
40 | import Text.Parsnip.Internal.Parser
41 | import Text.Parsnip.Internal.Private
42 | import Text.Parsnip.Parser
43 |
44 | --------------------------------------------------------------------------------
45 | -- * Word8 parsers that handle embedded nulls
46 | --------------------------------------------------------------------------------
47 |
48 | satisfy :: forall s e. KnownBase s => (Word8 -> Bool) -> Parser s e Word8
49 | satisfy f = Parser \p s -> case readWord8OffAddr# p 0# s of
50 | (# t, c #) -> if f (W8# c)
51 | then if isTrue# (0## `neWord#` c) || isTrue# (neAddr# p (end @s))
52 | then OK (W8# c) (plusAddr# p 1#) t
53 | else Fail p t
54 | else Fail p t
55 | {-# inline satisfy #-}
56 |
57 | notWord8 :: forall s e. KnownBase s => Word8 -> Parser s e Word8
58 | notWord8 (W8# c) = Parser \p s -> case readWord8OffAddr# p 0# s of
59 | (# t, c' #) ->
60 | if isTrue# (c `neWord#` c')
61 | then if isTrue# (0## `neWord#` c') || isTrue# (neAddr# p (end @s))
62 | then OK (W8# c') (plusAddr# p 1#) t
63 | else Fail p t
64 | else Fail p t
65 | {-# inline notWord8 #-}
66 |
67 | nextWord8 :: forall s e. KnownBase s => Parser s e (Maybe Word8)
68 | nextWord8 = Parser \p s -> case readWord8OffAddr# p 0# s of
69 | (# t, c #) -> OK
70 | ( if isTrue# (0## `neWord#` c) || isTrue# (neAddr# p (end @s))
71 | then Just (W8# c)
72 | else Nothing
73 | )
74 | p
75 | t
76 | {-# inline nextWord8 #-}
77 |
78 | nextWord8' :: forall s e. KnownBase s => Parser s e Word8
79 | nextWord8' = Parser \p s -> case readWord8OffAddr# p 0# s of
80 | (# t, c #) ->
81 | if isTrue# (0## `neWord#` c) || isTrue# (neAddr# p (end @s))
82 | then OK (W8# c) p t
83 | else Fail p t
84 | {-# inline nextWord8' #-}
85 |
86 | anyWord8 :: forall s e. KnownBase s => Parser s e Word8
87 | anyWord8 = Parser \p s -> case readWord8OffAddr# p 0# s of
88 | (# t, c #) ->
89 | if isTrue# (0## `neWord#` c) || isTrue# (neAddr# p (end @s))
90 | then OK (W8# c) (plusAddr# p 1#) t
91 | else Fail p t
92 | {-# inline anyWord8 #-}
93 |
94 | scan :: forall s. KnownBase s => (Word8 -> Bool) -> Addr# -> State# s -> (# State# s, Addr# #)
95 | scan f = go where
96 | go p s = case readWord8OffAddr# p 0# s of
97 | (# t, c #) ->
98 | if (isTrue# (0## `neWord#` c) || isTrue# (neAddr# p (end @s))) && f (W8# c)
99 | then (# t, p #)
100 | else scan f (plusAddr# p 1#) t
101 | {-# inline scan #-}
102 |
103 | skipWhile :: KnownBase s => (Word8 -> Bool) -> Parser s e ()
104 | skipWhile f = Parser \p s -> case scan f p s of
105 | (# t, q #) -> OK () q t
106 | {-# inline [1] skipWhile #-}
107 |
108 | {-# RULES
109 | "skipWhile (x/=)" forall x.
110 | skipWhile (x `neWord8`) = skipTillWord8 x
111 | "skipWhile (/=x)" forall x.
112 | skipWhile (`neWord8` x) = skipTillWord8 x
113 | #-}
114 |
115 | skipTill :: KnownBase s => (Word8 -> Bool) -> Parser s e ()
116 | skipTill p = skipWhile (not . p)
117 | {-# inline [1] skipTill #-}
118 |
119 | {-# RULES
120 | "skipTill (x==)" forall x.
121 | skipTill (x `eqWord8`) = skipTillWord8 x
122 | "skipWhile (==x)" forall x.
123 | skipWhile (`eqWord8` x) = skipTillWord8 x
124 | #-}
125 |
126 | skipTillSome :: KnownBase s => (Word8 -> Bool) -> Parser s e ()
127 | skipTillSome p = skipWhileSome (not . p)
128 | {-# inline skipTillSome #-}
129 |
130 | foreign import ccall "string.h" memchr :: Addr# -> CInt -> CSize -> IO (Ptr Word8)
131 |
132 | skipTillWord8 :: forall s e. KnownBase s => Word8 -> Parser s e ()
133 | skipTillWord8 w = Parser $ \p s -> case io (memchr p (fromIntegral w) (fromIntegral $ I# (minusAddr# (end @s) p))) s of
134 | (# t, Ptr q #) -> OK () q t
135 | {-# inline skipTillWord8 #-}
136 |
137 | skipWhileSome :: KnownBase s => (Word8 -> Bool) -> Parser s e ()
138 | skipWhileSome p = satisfy p *> skipWhile p
139 | {-# inline skipWhileSome #-}
140 |
141 | while :: KnownBase s => (Word8 -> Bool) -> Parser s e ByteString
142 | while f = snipping (skipWhile f)
143 | {-# inline while #-}
144 |
145 | till :: KnownBase s => (Word8 -> Bool) -> Parser s e ByteString
146 | till p = snipping (skipTill p)
147 | {-# inline till #-}
148 |
149 | tillWord8 :: KnownBase s => Word8 -> Parser s e ByteString
150 | tillWord8 c = snipping (skipTillWord8 c)
151 | {-# inline tillWord8 #-}
152 |
153 | whileSome :: KnownBase s => (Word8 -> Bool) -> Parser s e ByteString
154 | whileSome p = snipping (skipWhileSome p)
155 | {-# inline whileSome #-}
156 |
157 | tillSome :: KnownBase s => (Word8 -> Bool) -> Parser s e ByteString
158 | tillSome p = snipping (skipTillSome p)
159 | {-# inline tillSome #-}
160 |
161 | -- | Peek at the previous character. Always succeeds.
162 | previousWord8 :: forall s e. KnownBase s => Parser s e (Maybe Word8)
163 | previousWord8 = case reflectBase @s of
164 | !(Base _ _ l _) -> Parser \p s ->
165 | if isTrue# (ltAddr# l p)
166 | then case readWord8OffAddr# p (-1#) s of
167 | (# t, c #) -> OK (Just (W8# c)) p t
168 | else OK Nothing p s
169 | {-# inline previousWord8 #-}
170 |
171 | -- | Peek at the previous character. Fails if we're at the start of input.
172 | previousWord8' :: forall s e. KnownBase s => Parser s e Word8
173 | previousWord8' = case reflectBase @s of
174 | !(Base _ _ l _) -> Parser \p s ->
175 | if isTrue# (ltAddr# l p)
176 | then case readWord8OffAddr# p (-1#) s of
177 | (# t, c #) -> OK (W8# c) p t
178 | else Fail p s
179 | {-# inline previousWord8' #-}
180 |
181 | -- This version of 'eof' is not fooled by embedded nulls.
182 | binaryEof :: forall s e. KnownBase s => Parser s e ()
183 | binaryEof = Parser \p s -> case readWord8OffAddr# p 0# s of
184 | (# t, c #) ->
185 | if isTrue# (0## `eqWord#` c) && isTrue# (eqAddr# p (end @s))
186 | then OK () p t
187 | else Fail p t
188 | {-# inline binaryEof #-}
189 |
--------------------------------------------------------------------------------