├── .github ├── CODEOWNERS └── workflows │ ├── release.yaml │ └── build.yaml ├── .gitignore ├── sample └── TakeLetter.hs ├── bench └── Main.hs ├── src └── Data │ └── Bytes │ ├── Parser │ ├── Base128.hs │ ├── Types.hs │ ├── Unsafe.hs │ ├── Leb128.hs │ ├── Utf8.hs │ ├── BigEndian.hs │ ├── LittleEndian.hs │ ├── Ascii.hs │ ├── Internal.hs │ ├── Rebindable.hs │ └── Latin.hs │ └── Parser.hs ├── LICENSE ├── fourmolu.yaml ├── bytesmith.cabal ├── CHANGELOG.md └── test └── Main.hs /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | @byteverse/l3c 2 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: release 2 | on: 3 | push: 4 | tags: 5 | - "*" 6 | 7 | jobs: 8 | call-workflow: 9 | uses: byteverse/.github/.github/workflows/release.yaml@main 10 | secrets: inherit 11 | -------------------------------------------------------------------------------- /.github/workflows/build.yaml: -------------------------------------------------------------------------------- 1 | name: build 2 | on: 3 | pull_request: 4 | branches: 5 | - "*" 6 | 7 | jobs: 8 | call-workflow: 9 | uses: byteverse/.github/.github/workflows/build-matrix.yaml@main 10 | with: 11 | cabal-file: bytesmith.cabal 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | dist 3 | dist-* 4 | cabal-dev 5 | *.o 6 | *.hi 7 | *.chi 8 | *.chs.h 9 | *.dyn_o 10 | *.dyn_hi 11 | .hpc 12 | .hsenv 13 | .cabal-sandbox/ 14 | cabal.sandbox.config 15 | *.prof 16 | *.aux 17 | *.hp 18 | *.eventlog 19 | .stack-work/ 20 | cabal.project.local 21 | cabal.project.local~ 22 | .HTF/ 23 | .ghc.environment.* 24 | stack.yaml 25 | *.swm 26 | *.swo 27 | *.swp 28 | test_results/** 29 | -------------------------------------------------------------------------------- /sample/TakeLetter.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE MagicHash #-} 2 | 3 | -- Build with: 4 | -- cabal build --write-ghc-environment-files=always 5 | -- ghc -fforce-recomp -O2 -ddump-simpl -dsuppress-all -ddump-to-file sample/TakeLetter.hs 6 | -- to examine GHC optimizations. 7 | module TakeLetter 8 | ( takeLetter 9 | ) where 10 | 11 | import Data.Bytes.Parser (Parser) 12 | import Data.Bytes.Parser.Ascii (takeShortWhile) 13 | import Data.Text.Short (ShortText) 14 | import GHC.Exts 15 | 16 | takeLetter :: Parser e s ShortText 17 | {-# NOINLINE takeLetter #-} 18 | takeLetter = takeShortWhile (== 'A') 19 | -------------------------------------------------------------------------------- /bench/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE TypeApplications #-} 3 | 4 | import Data.Char (ord) 5 | import Data.Primitive (ByteArray) 6 | import Data.Word (Word8) 7 | import Gauge.Main (bench, defaultMain, whnf) 8 | 9 | import qualified Data.Bytes.Parser as P 10 | import qualified Data.Bytes.Parser.Latin as Latin 11 | import qualified GHC.Exts as Exts 12 | 13 | main :: IO () 14 | main = 15 | defaultMain 16 | [ bench "decPositiveInteger" $ 17 | whnf 18 | (\x -> P.parseByteArray (Latin.decUnsignedInteger ()) x) 19 | encodedBigNumber 20 | ] 21 | 22 | encodedBigNumber :: ByteArray 23 | encodedBigNumber = 24 | stringToByteArray $ 25 | show $ 26 | id @Integer $ 27 | 246246357264327645234627753190240202405243024304504230544 28 | * 732345623640035232405249305932503920593209520932095234651 29 | 30 | stringToByteArray :: String -> ByteArray 31 | stringToByteArray = 32 | Exts.fromList . map (fromIntegral @Int @Word8 . ord) 33 | -------------------------------------------------------------------------------- /src/Data/Bytes/Parser/Base128.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE TypeApplications #-} 3 | 4 | module Data.Bytes.Parser.Base128 5 | ( -- * Unsigned 6 | word16 7 | , word32 8 | , word64 9 | ) where 10 | 11 | import Control.Monad (when) 12 | import Data.Bits (bit, clearBit, testBit, unsafeShiftL, (.|.)) 13 | import Data.Bytes.Parser (Parser) 14 | import Data.Word (Word16, Word32, Word64, Word8) 15 | 16 | import qualified Data.Bytes.Parser as P 17 | 18 | word16 :: e -> Parser e s Word16 19 | word16 e = fromIntegral @Word64 @Word16 <$> stepBoundedWord e 16 0 20 | 21 | word32 :: e -> Parser e s Word32 22 | word32 e = fromIntegral @Word64 @Word32 <$> stepBoundedWord e 32 0 23 | 24 | word64 :: e -> Parser e s Word64 25 | word64 e = fromIntegral @Word64 @Word64 <$> stepBoundedWord e 64 0 26 | 27 | stepBoundedWord :: e -> Int -> Word64 -> Parser e s Word64 28 | stepBoundedWord e !bitLimit !acc = do 29 | when (acc >= bit (bitLimit - 7)) $ P.fail e 30 | raw <- P.any e 31 | let content = clearBit raw 7 32 | acc' = unsafeShiftL acc 7 .|. fromIntegral @Word8 @Word64 content 33 | if testBit raw 7 34 | then stepBoundedWord e bitLimit acc' 35 | else pure acc' 36 | -------------------------------------------------------------------------------- /src/Data/Bytes/Parser/Types.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveFoldable #-} 2 | {-# LANGUAGE DeriveFunctor #-} 3 | {-# LANGUAGE DerivingStrategies #-} 4 | 5 | module Data.Bytes.Parser.Types 6 | ( Parser 7 | , Result (..) 8 | , Slice (..) 9 | ) where 10 | 11 | import Data.Bytes.Parser.Internal (Parser (..)) 12 | 13 | -- | The result of running a parser. 14 | data Result e a 15 | = -- | An error message indicating what went wrong. 16 | Failure e 17 | | -- | The parsed value and the number of bytes 18 | -- remaining in parsed slice. 19 | Success {-# UNPACK #-} !(Slice a) 20 | deriving stock (Eq, Show, Foldable, Functor) 21 | 22 | {- | Slicing metadata (an offset and a length) accompanied 23 | by a value. This does not represent a slice into the 24 | value. This type is intended to be used as the result 25 | of an executed parser. In this context the slicing metadata 26 | describe a slice into to the array (or byte array) that 27 | from which the value was parsed. 28 | 29 | It is often useful to check the @length@ when a parser 30 | succeeds since a non-zero length indicates that there 31 | was additional unconsumed input. The @offset@ is only 32 | ever needed to construct a new slice (via @Bytes@ or 33 | @SmallVector@) from the remaining input. 34 | -} 35 | data Slice a = Slice 36 | { offset :: {-# UNPACK #-} !Int 37 | -- ^ Offset into the array. 38 | , length :: {-# UNPACK #-} !Int 39 | -- ^ Length of the slice. 40 | , value :: a 41 | -- ^ The structured data that was successfully parsed. 42 | } 43 | deriving stock (Eq, Show, Foldable, Functor) 44 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019, Andrew Martin 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of Andrew Martin nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /fourmolu.yaml: -------------------------------------------------------------------------------- 1 | # Number of spaces per indentation step 2 | indentation: 2 3 | 4 | # Max line length for automatic line breaking 5 | column-limit: 200 6 | 7 | # Styling of arrows in type signatures (choices: trailing, leading, or leading-args) 8 | function-arrows: trailing 9 | 10 | # How to place commas in multi-line lists, records, etc. (choices: leading or trailing) 11 | comma-style: leading 12 | 13 | # Styling of import/export lists (choices: leading, trailing, or diff-friendly) 14 | import-export-style: leading 15 | 16 | # Whether to full-indent or half-indent 'where' bindings past the preceding body 17 | indent-wheres: false 18 | 19 | # Whether to leave a space before an opening record brace 20 | record-brace-space: true 21 | 22 | # Number of spaces between top-level declarations 23 | newlines-between-decls: 1 24 | 25 | # How to print Haddock comments (choices: single-line, multi-line, or multi-line-compact) 26 | haddock-style: multi-line 27 | 28 | # How to print module docstring 29 | haddock-style-module: null 30 | 31 | # Styling of let blocks (choices: auto, inline, newline, or mixed) 32 | let-style: auto 33 | 34 | # How to align the 'in' keyword with respect to the 'let' keyword (choices: left-align, right-align, or no-space) 35 | in-style: right-align 36 | 37 | # Whether to put parentheses around a single constraint (choices: auto, always, or never) 38 | single-constraint-parens: always 39 | 40 | # Output Unicode syntax (choices: detect, always, or never) 41 | unicode: never 42 | 43 | # Give the programmer more choice on where to insert blank lines 44 | respectful: true 45 | 46 | # Fixity information for operators 47 | fixities: [] 48 | 49 | # Module reexports Fourmolu should know about 50 | reexports: [] 51 | 52 | -------------------------------------------------------------------------------- /src/Data/Bytes/Parser/Unsafe.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BinaryLiterals #-} 2 | {-# LANGUAGE DataKinds #-} 3 | {-# LANGUAGE DerivingStrategies #-} 4 | {-# LANGUAGE DuplicateRecordFields #-} 5 | {-# LANGUAGE GADTSyntax #-} 6 | {-# LANGUAGE MagicHash #-} 7 | {-# LANGUAGE NamedFieldPuns #-} 8 | {-# LANGUAGE PolyKinds #-} 9 | {-# LANGUAGE RankNTypes #-} 10 | {-# LANGUAGE ScopedTypeVariables #-} 11 | {-# LANGUAGE UnboxedTuples #-} 12 | 13 | {- | Everything in this module is unsafe and can lead to 14 | nondeterministic output or segfaults if used incorrectly. 15 | -} 16 | module Data.Bytes.Parser.Unsafe 17 | ( -- * Types 18 | Parser (..) 19 | 20 | -- * Functions 21 | , cursor 22 | , cursor# 23 | , expose 24 | , unconsume 25 | , jump 26 | , uneffectful 27 | ) where 28 | 29 | import Prelude hiding (length) 30 | 31 | import Data.Bytes.Parser.Internal (Parser (..), Result (..), uneffectful, uneffectfulInt#) 32 | import Data.Bytes.Types (Bytes (..)) 33 | import Data.Primitive (ByteArray) 34 | import GHC.Exts (Int (I#), Int#) 35 | 36 | {- | Get the current offset into the chunk. Using this makes 37 | it possible to observe the internal difference between 'Bytes' 38 | that refer to equivalent slices. Be careful. 39 | -} 40 | cursor :: Parser e s Int 41 | cursor = uneffectful $ \Bytes {offset, length} -> 42 | Success offset offset length 43 | 44 | -- | Variant of 'cursor' with unboxed result. 45 | cursor# :: Parser e s Int# 46 | cursor# = uneffectfulInt# $ \Bytes {offset = I# off, length = I# len} -> (# | (# off, off, len #) #) 47 | 48 | {- | Return the byte array being parsed. This includes bytes 49 | that preceed the current offset and may include bytes that 50 | go beyond the length. This is somewhat dangerous, so only 51 | use this is you know what you're doing. 52 | -} 53 | expose :: Parser e s ByteArray 54 | expose = uneffectful $ \Bytes {length, offset, array} -> 55 | Success array offset length 56 | 57 | {- | Move the cursor back by @n@ bytes. Precondition: you 58 | must have previously consumed at least @n@ bytes. 59 | -} 60 | unconsume :: Int -> Parser e s () 61 | unconsume n = uneffectful $ \Bytes {length, offset} -> 62 | Success () (offset - n) (length + n) 63 | 64 | {- | Set the position to the given index. Precondition: the index 65 | must be valid. It should be the result of an earlier call to 66 | 'cursor'. 67 | -} 68 | jump :: Int -> Parser e s () 69 | jump ix = uneffectful $ \(Bytes {length, offset}) -> 70 | Success () ix (length + (offset - ix)) 71 | -------------------------------------------------------------------------------- /bytesmith.cabal: -------------------------------------------------------------------------------- 1 | cabal-version: 3.0 2 | name: bytesmith 3 | version: 0.3.14.0 4 | synopsis: Nonresumable byte parser 5 | description: 6 | Parse bytes as fast as possible. This is a nonresumable parser 7 | that aggresively uses `UnboxedSums` to avoid performing any 8 | allocations. 9 | 10 | homepage: https://github.com/byteverse/bytesmith 11 | bug-reports: https://github.com/byteverse/bytesmith/issues 12 | license: BSD-3-Clause 13 | license-file: LICENSE 14 | author: Andrew Martin 15 | maintainer: amartin@layer3com.com 16 | copyright: 2019 Andrew Martin 17 | category: Data 18 | extra-doc-files: CHANGELOG.md 19 | tested-with: GHC ==9.6.3 || ==9.8.1 20 | 21 | common build-settings 22 | default-language: Haskell2010 23 | ghc-options: -Wall -Wunused-packages 24 | 25 | library 26 | import: build-settings 27 | exposed-modules: 28 | Data.Bytes.Parser 29 | Data.Bytes.Parser.Ascii 30 | Data.Bytes.Parser.Base128 31 | Data.Bytes.Parser.BigEndian 32 | Data.Bytes.Parser.Latin 33 | Data.Bytes.Parser.Leb128 34 | Data.Bytes.Parser.LittleEndian 35 | Data.Bytes.Parser.Rebindable 36 | Data.Bytes.Parser.Unsafe 37 | Data.Bytes.Parser.Utf8 38 | 39 | other-modules: 40 | Data.Bytes.Parser.Internal 41 | Data.Bytes.Parser.Types 42 | 43 | build-depends: 44 | , base >=4.18 && <5 45 | , byteslice >=0.2.6 && <0.3 46 | , bytestring >=0.10.8 && <0.13 47 | , contiguous >=0.6 && <0.7 48 | , natural-arithmetic >=0.1.3 49 | , primitive >=0.7 && <0.10 50 | , text-short >=0.1.3 && <0.2 51 | , text >=0.2.1 52 | , wide-word >=0.1.0.9 && <0.2 53 | 54 | hs-source-dirs: src 55 | ghc-options: -O2 56 | 57 | test-suite test 58 | import: build-settings 59 | type: exitcode-stdio-1.0 60 | hs-source-dirs: test 61 | main-is: Main.hs 62 | build-depends: 63 | , base >=4.12.0.0 && <5 64 | , byte-order 65 | , byteslice 66 | , bytesmith 67 | , primitive 68 | , tasty 69 | , tasty-hunit 70 | , tasty-quickcheck 71 | , text-short 72 | , wide-word 73 | 74 | benchmark bench 75 | import: build-settings 76 | type: exitcode-stdio-1.0 77 | build-depends: 78 | , base 79 | , bytesmith 80 | , gauge 81 | , primitive 82 | 83 | ghc-options: -O2 84 | hs-source-dirs: bench 85 | main-is: Main.hs 86 | 87 | source-repository head 88 | type: git 89 | location: git://github.com/byteverse/bytesmith.git 90 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Revision history for bytesmith 2 | 3 | ## 0.3.14.0 -- 2025-11-17 4 | 5 | * Add `take` for ASCII parser. 6 | 7 | ## 0.3.13.0 -- 2025-07-11 8 | 9 | * Add `hexFixedWord16#` 10 | * Drop support for base < 4.18, clean up CPP 11 | * Add more levity-monomorphized variants of bind, pure, and fail 12 | 13 | ## 0.3.11.1 -- 2024-02-28 14 | 15 | * Update package metadata. 16 | * Remove dependency on `run-st` library. 17 | 18 | ## 0.3.11.0 -- 2024-01-05 19 | 20 | * Add `Data.Bytes.Parser.Latin.hexWord32`. 21 | * Add `Data.Bytes.Parser.Latin.hexFixedWord(128|256)`. 22 | * Add `Data.Bytes.Parser.takeN`. 23 | 24 | ## 0.3.10.0 -- 2023-07-25 25 | 26 | * Add `mapErrorEffectfully`. 27 | * Correct the implementation of `satisfy`. 28 | * Add `takeUpTo`. 29 | 30 | ## 0.3.9.1 -- 2022-12-06 31 | 32 | * Build with GHC 9.4. 33 | 34 | ## 0.3.9.0 -- 2022-07-14 35 | 36 | * Build with GHC 9.2.3. 37 | 38 | ## 0.3.8.0 -- 2021-10-11 39 | 40 | * Add `peek` and `peek'` to `Data.Bytes.Parser.Latin`. 41 | * Add inline pragmas to most functions to prevent cost centers. 42 | * Add support for WordRep-to-LiftedRep in Rebindable module. 43 | * Allow building with newer Contiguous. 44 | * Export `uneffectful`. 45 | 46 | ## 0.3.7.0 -- 2020-07-27 47 | 48 | * Add `Data.Bytes.Parser.Base128` module for Base-128 encoding. 49 | * Add `Data.Bytes.Parser.Leb128` module for LEB-128 encoding. 50 | Supports signed integers with zig-zag encoding. 51 | * Add `skipWhile` to `Data.Bytes.Parser.Latin`. 52 | * Reexport `endOfInput` and `isEndOfInput` from `Latin`. 53 | * Add `charInsensitive` to ASCII module. 54 | * Correct implementation of `peek` and `peek'`. 55 | 56 | ## 0.3.6.0 -- 2020-03-04 57 | 58 | * Add `char12` 59 | * Add `skipTrailedBy2`, `skipTrailedBy3`, and variants 60 | with an unboxed result. 61 | * Add `cstring` 62 | * Add `peekRemaining` 63 | * Add `measure_` and `measure_#`, variants of `measure` 64 | that only give the byte count. 65 | * Add `Data.Bytes.Parser.Rebindable`, the ultimate hack. 66 | * Add `Data.Bytes.Latin.takeTrailedBy` 67 | 68 | ## 0.3.5.0 -- 2020-02-10 69 | 70 | * Add big-endian and little-endian `word256` and `word256Array` parsers. 71 | * Add `hexFixedWord64`. 72 | 73 | ## 0.3.4.0 -- 2020-02-03 74 | 75 | * Add `hexFixedWord32`. 76 | 77 | ## 0.3.3.0 -- 2020-01-22 78 | 79 | * Add `hexWord8`, `hexWord16`, and `hexFixedWord8`. 80 | 81 | ## 0.3.2.0 -- 2019-12-27 82 | 83 | * Add `parseBytesEither` and `parseBytesMaybe`. 84 | * Add common idioms from other parser libaries. This includes: `satisfy`, 85 | `satisfyWith`, `scan`, `peek`, and `peek'`. 86 | 87 | ## 0.3.1.0 -- 2019-11-12 88 | 89 | * Add big-endian and little-endian parsers for `Word128`. 90 | * Add a module for little-endian word parsers. This compliments the 91 | existing big-endian module. 92 | * Add functions for parsing arrays of big/little endian words of 93 | various sizes. 94 | * Add `skipUntil` to `Latin`. 95 | * Add `char5`, `char6`, `char7`, `char8`, `char9`, `char10`, and 96 | `char11` to `Latin`. 97 | * Correct the implementation of `takeTrailedBy`. 98 | 99 | ## 0.3.0.0 -- 2019-09-30 100 | 101 | * Include the offset into the byte sequence in `Result`. Breaking change. 102 | * Rename `hexWord16` to `hexFixedWord16`. Breaking change. 103 | * Rename `parseBytesST` to `parseBytesEffectfully`. Breaking change. 104 | * Add `hexNibbleLower` and `tryHexNibbleLower`. 105 | * Add `hexNibble` and `tryHexNibble`. 106 | 107 | ## 0.2.0.1 -- 2019-09-24 108 | 109 | * Correct an overflow-detection mistake in the implementation 110 | of machine-word parsers. 111 | 112 | ## 0.2.0.0 -- 2019-09-24 113 | 114 | * Add big-endian word parsers. 115 | * Redo module structure so that encoding-specific functions each 116 | live in their own module. 117 | * Add a lot more functions and attempt to make naming somewhat 118 | consistent. 119 | * Add `delimit`. 120 | * Add `replicate`. 121 | * Add `annotate` and its infix synonym ``. 122 | 123 | ## 0.1.0.0 -- 2019-08-22 124 | 125 | * First version. 126 | -------------------------------------------------------------------------------- /src/Data/Bytes/Parser/Leb128.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE BinaryLiterals #-} 3 | {-# LANGUAGE TypeApplications #-} 4 | 5 | {- | Parse numbers that have been encoded with . 6 | LEB-128 allows arbitrarily large numbers to be encoded. Parsers in this 7 | module will fail if the number they attempt to parse is outside the 8 | range of what their target type can handle. The parsers for signed 9 | numbers assume that the numbers have been 10 | . 11 | -} 12 | module Data.Bytes.Parser.Leb128 13 | ( -- * Unsigned 14 | word16 15 | , word32 16 | , word64 17 | 18 | -- * Signed (Zig-zag) 19 | , int16 20 | , int32 21 | , int64 22 | ) where 23 | 24 | import Data.Bits (complement, testBit, unsafeShiftL, unsafeShiftR, xor, (.&.), (.|.)) 25 | import Data.Bytes.Parser (Parser) 26 | import Data.Int (Int16, Int32, Int64) 27 | import Data.Word (Word16, Word32, Word64, Word8) 28 | 29 | import qualified Data.Bytes.Parser as P 30 | 31 | {- | Parse a LEB-128-encoded number. If the number is larger 32 | than @0xFFFF@, fails with the provided error. 33 | -} 34 | word16 :: e -> Parser e s Word16 35 | word16 e = do 36 | w <- stepBoundedWord e 16 0 0 37 | pure (fromIntegral @Word64 @Word16 w) 38 | 39 | {- | Parse a LEB-128-encoded number. If the number is larger 40 | than @0xFFFFFFFF@, fails with the provided error. 41 | -} 42 | word32 :: e -> Parser e s Word32 43 | word32 e = do 44 | w <- stepBoundedWord e 32 0 0 45 | pure (fromIntegral @Word64 @Word32 w) 46 | 47 | {- | Parse a LEB-128-encoded number. If the number is larger 48 | than @0xFFFFFFFFFFFFFFFF@, fails with the provided error. 49 | -} 50 | word64 :: e -> Parser e s Word64 51 | word64 e = stepBoundedWord e 64 0 0 52 | 53 | {- | Parse a LEB-128-zigzag-encoded signed number. If the encoded 54 | number is outside the range @[-32768,32767]@, this fails with 55 | the provided error. 56 | -} 57 | int16 :: e -> Parser e s Int16 58 | int16 = fmap zigzagDecode16 . word16 59 | 60 | {- | Parse a LEB-128-zigzag-encoded signed number. If the encoded 61 | number is outside the range @[-2147483648,2147483647]@, this 62 | fails with the provided error. 63 | -} 64 | int32 :: e -> Parser e s Int32 65 | int32 = fmap zigzagDecode32 . word32 66 | 67 | {- | Parse a LEB-128-zigzag-encoded signed number. If the encoded 68 | number is outside the range @[-9223372036854775808,9223372036854775807]@, 69 | this fails with the provided error. 70 | -} 71 | int64 :: e -> Parser e s Int64 72 | int64 = fmap zigzagDecode64 . word64 73 | 74 | -- What these parameters are: 75 | -- 76 | -- bitLimit: number of bits in the target word size 77 | -- accShift: shift amount, increases by 7 at a time 78 | stepBoundedWord :: e -> Int -> Word64 -> Int -> Parser e s Word64 79 | stepBoundedWord e !bitLimit !acc0 !accShift = do 80 | raw <- P.any e 81 | let number = raw .&. 0x7F 82 | acc1 = 83 | acc0 84 | .|. unsafeShiftL (fromIntegral @Word8 @Word64 number) accShift 85 | accShift' = accShift + 7 86 | if accShift' <= bitLimit 87 | then 88 | if testBit raw 7 89 | then stepBoundedWord e bitLimit acc1 accShift' 90 | else pure acc1 91 | else 92 | if fromIntegral @Word8 @Word raw < twoExp (bitLimit - accShift) 93 | then pure acc1 -- TODO: no need to mask upper bit in number 94 | else P.fail e 95 | 96 | twoExp :: Int -> Word 97 | twoExp x = unsafeShiftL 1 x 98 | 99 | -- Zigzag decode strategy taken from https://stackoverflow.com/a/2211086/1405768 100 | -- The accepted answer is a little bit, so an answer further down was used: 101 | -- 102 | -- > zigzag_decode(value) = ( value >> 1 ) ^ ( ~( value & 1 ) + 1 ) 103 | zigzagDecode16 :: Word16 -> Int16 104 | zigzagDecode16 n = 105 | fromIntegral ((unsafeShiftR n 1) `xor` (complement (n .&. 1) + 1)) 106 | 107 | zigzagDecode32 :: Word32 -> Int32 108 | zigzagDecode32 n = 109 | fromIntegral ((unsafeShiftR n 1) `xor` (complement (n .&. 1) + 1)) 110 | 111 | zigzagDecode64 :: Word64 -> Int64 112 | zigzagDecode64 n = 113 | fromIntegral ((unsafeShiftR n 1) `xor` (complement (n .&. 1) + 1)) 114 | -------------------------------------------------------------------------------- /src/Data/Bytes/Parser/Utf8.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE BinaryLiterals #-} 3 | {-# LANGUAGE CPP #-} 4 | {-# LANGUAGE DataKinds #-} 5 | {-# LANGUAGE DerivingStrategies #-} 6 | {-# LANGUAGE GADTSyntax #-} 7 | {-# LANGUAGE MagicHash #-} 8 | {-# LANGUAGE MultiWayIf #-} 9 | {-# LANGUAGE PolyKinds #-} 10 | {-# LANGUAGE RankNTypes #-} 11 | {-# LANGUAGE ScopedTypeVariables #-} 12 | {-# LANGUAGE TypeApplications #-} 13 | {-# LANGUAGE UnboxedTuples #-} 14 | 15 | {- | Parse input as UTF-8-encoded text. Parsers in this module will 16 | fail if they encounter a byte above @0x7F@. 17 | -} 18 | module Data.Bytes.Parser.Utf8 19 | ( -- * Get Character 20 | any# 21 | , shortText 22 | ) where 23 | 24 | import Prelude hiding (any, fail, length, takeWhile) 25 | 26 | import Data.Bits (unsafeShiftL, xor, (.&.), (.|.)) 27 | import Data.Bytes.Parser.Internal (Parser (..)) 28 | import Data.Text.Short (ShortText) 29 | import GHC.Exts (Char (C#), Char#, Int (I#), Int#, chr#, (+#), (-#), (>#)) 30 | import GHC.Word (Word8 (W8#)) 31 | 32 | import qualified Data.ByteString.Short.Internal as BSS 33 | import qualified Data.Bytes.Parser as Parser 34 | import qualified Data.Primitive as PM 35 | import qualified Data.Text.Short as TS 36 | import qualified GHC.Exts as Exts 37 | 38 | {- FOURMOLU_DISABLE -} 39 | -- | Interpret the next one to four bytes as a UTF-8-encoded character. 40 | -- Fails if the decoded codepoint is in the range U+D800 through U+DFFF. 41 | any# :: e -> Parser e s Char# 42 | {-# noinline any# #-} 43 | any# e = Parser 44 | (\(# arr, off, len #) s0 -> case len ># 0# of 45 | 1# -> 46 | let !w0 = Exts.indexWord8Array# arr off 47 | in if | oneByteChar (W8# w0) -> 48 | (# s0, (# | (# chr# (Exts.word2Int# ( 49 | Exts.word8ToWord# 50 | w0)), off +# 1#, len -# 1# #) #) #) 51 | | twoByteChar (W8# w0) -> 52 | if | I# len > 1 53 | , w1 <- Exts.indexWord8Array# arr (off +# 1#) 54 | , followingByte (W8# w1) 55 | , C# c <- codepointFromTwoBytes (W8# w0) (W8# w1) 56 | -> (# s0, (# | (# c, off +# 2#, len -# 2# #) #) #) 57 | | otherwise -> (# s0, (# e | #) #) 58 | | threeByteChar (W8# w0) -> 59 | if | I# len > 2 60 | , w1 <- Exts.indexWord8Array# arr (off +# 1# ) 61 | , w2 <- Exts.indexWord8Array# arr (off +# 2# ) 62 | , followingByte (W8# w1) 63 | , !c@(C# c#) <- codepointFromThreeBytes (W8# w0) (W8# w1) (W8# w2) 64 | , c < '\xD800' || c > '\xDFFF' 65 | -> (# s0, (# | (# c#, off +# 3#, len -# 3# #) #) #) 66 | | otherwise -> (# s0, (# e | #) #) 67 | | fourByteChar (W8# w0) -> 68 | if | I# len > 3 69 | , w1 <- Exts.indexWord8Array# arr (off +# 1# ) 70 | , w2 <- Exts.indexWord8Array# arr (off +# 2# ) 71 | , w3 <- Exts.indexWord8Array# arr (off +# 3# ) 72 | , followingByte (W8# w1) 73 | , !(C# c#) <- codepointFromFourBytes (W8# w0) (W8# w1) (W8# w2) (W8# w3) 74 | -> (# s0, (# | (# c#, off +# 4#, len -# 4# #) #) #) 75 | | otherwise -> (# s0, (# e | #) #) 76 | | otherwise -> (# s0, (# e | #) #) 77 | _ -> (# s0, (# e | #) #) 78 | ) 79 | {- FOURMOLU_ENABLE -} 80 | 81 | codepointFromFourBytes :: Word8 -> Word8 -> Word8 -> Word8 -> Char 82 | codepointFromFourBytes w1 w2 w3 w4 = 83 | C# 84 | ( chr# 85 | ( unI $ 86 | fromIntegral 87 | ( unsafeShiftL (word8ToWord w1 .&. 0b00001111) 18 88 | .|. unsafeShiftL (word8ToWord w2 .&. 0b00111111) 12 89 | .|. unsafeShiftL (word8ToWord w3 .&. 0b00111111) 6 90 | .|. (word8ToWord w4 .&. 0b00111111) 91 | ) 92 | ) 93 | ) 94 | 95 | codepointFromThreeBytes :: Word8 -> Word8 -> Word8 -> Char 96 | codepointFromThreeBytes w1 w2 w3 = 97 | C# 98 | ( chr# 99 | ( unI $ 100 | fromIntegral 101 | ( unsafeShiftL (word8ToWord w1 .&. 0b00001111) 12 102 | .|. unsafeShiftL (word8ToWord w2 .&. 0b00111111) 6 103 | .|. (word8ToWord w3 .&. 0b00111111) 104 | ) 105 | ) 106 | ) 107 | 108 | codepointFromTwoBytes :: Word8 -> Word8 -> Char 109 | codepointFromTwoBytes w1 w2 = 110 | C# 111 | ( chr# 112 | ( unI $ 113 | fromIntegral @Word @Int 114 | ( unsafeShiftL (word8ToWord w1 .&. 0b00011111) 6 115 | .|. (word8ToWord w2 .&. 0b00111111) 116 | ) 117 | ) 118 | ) 119 | 120 | oneByteChar :: Word8 -> Bool 121 | oneByteChar !w = w .&. 0b10000000 == 0 122 | 123 | twoByteChar :: Word8 -> Bool 124 | twoByteChar !w = w .&. 0b11100000 == 0b11000000 125 | 126 | threeByteChar :: Word8 -> Bool 127 | threeByteChar !w = w .&. 0b11110000 == 0b11100000 128 | 129 | fourByteChar :: Word8 -> Bool 130 | fourByteChar !w = w .&. 0b11111000 == 0b11110000 131 | 132 | followingByte :: Word8 -> Bool 133 | followingByte !w = xor w 0b01000000 .&. 0b11000000 == 0b11000000 134 | 135 | word8ToWord :: Word8 -> Word 136 | word8ToWord = fromIntegral 137 | 138 | unI :: Int -> Int# 139 | unI (I# w) = w 140 | 141 | {- | Consume input that matches the argument. Fails if the 142 | input does not match. 143 | -} 144 | shortText :: e -> ShortText -> Parser e s () 145 | shortText e !t = 146 | Parser.byteArray 147 | e 148 | (shortByteStringToByteArray (TS.toShortByteString t)) 149 | 150 | shortByteStringToByteArray :: 151 | BSS.ShortByteString -> 152 | PM.ByteArray 153 | shortByteStringToByteArray (BSS.SBS x) = PM.ByteArray x 154 | -------------------------------------------------------------------------------- /src/Data/Bytes/Parser/BigEndian.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE BinaryLiterals #-} 3 | {-# LANGUAGE CPP #-} 4 | {-# LANGUAGE DataKinds #-} 5 | {-# LANGUAGE DerivingStrategies #-} 6 | {-# LANGUAGE GADTSyntax #-} 7 | {-# LANGUAGE PolyKinds #-} 8 | {-# LANGUAGE RankNTypes #-} 9 | {-# LANGUAGE ScopedTypeVariables #-} 10 | {-# LANGUAGE TypeApplications #-} 11 | {-# LANGUAGE UnboxedSums #-} 12 | 13 | -- | Big-endian fixed-width numbers. 14 | module Data.Bytes.Parser.BigEndian 15 | ( -- * Unsigned 16 | word8 17 | , word16 18 | , word32 19 | , word64 20 | , word128 21 | , word256 22 | 23 | -- * Signed 24 | , int8 25 | , int16 26 | , int32 27 | , int64 28 | 29 | -- * Many 30 | 31 | -- ** Unsigned 32 | , word16Array 33 | , word32Array 34 | , word64Array 35 | , word128Array 36 | , word256Array 37 | ) where 38 | 39 | import Prelude hiding (any, fail, length, takeWhile) 40 | 41 | import Data.Bits (unsafeShiftL, (.|.)) 42 | import Data.Bytes.Parser.Internal (Parser, Result (..), swapArray128, swapArray16, swapArray256, swapArray32, swapArray64, uneffectful) 43 | import Data.Bytes.Types (Bytes (..)) 44 | import Data.Int (Int16, Int32, Int64, Int8) 45 | import Data.Primitive (ByteArray (..), PrimArray (..)) 46 | import Data.WideWord (Word128 (Word128), Word256 (Word256)) 47 | import Data.Word (Word16, Word32, Word64, Word8) 48 | import GHC.ByteOrder (ByteOrder (BigEndian, LittleEndian), targetByteOrder) 49 | 50 | import qualified Data.Bytes as Bytes 51 | import qualified Data.Bytes.Parser as P 52 | import qualified Data.Primitive as PM 53 | 54 | -- | Unsigned 8-bit word. 55 | word8 :: e -> Parser e s Word8 56 | word8 = P.any 57 | 58 | {- | Parse an array of big-endian unsigned 16-bit words. If the host is 59 | big-endian, the implementation is optimized to simply @memcpy@ bytes 60 | into the result array. The result array always has elements in 61 | native-endian byte order. 62 | -} 63 | word16Array :: 64 | -- | Error message if not enough bytes are present 65 | e -> 66 | -- | Number of big-endian 16-bit words to expect 67 | Int -> 68 | -- | Native-endian elements 69 | Parser e s (PrimArray Word16) 70 | word16Array e !n = case targetByteOrder of 71 | BigEndian -> fmap (asWord16s . Bytes.toByteArrayClone) (P.take e (n * 2)) 72 | LittleEndian -> do 73 | bs <- P.take e (n * 2) 74 | let r = swapArray16 bs 75 | pure (asWord16s r) 76 | 77 | -- | Parse an array of big-endian unsigned 32-bit words. 78 | word32Array :: 79 | -- | Error message if not enough bytes are present 80 | e -> 81 | -- | Number of big-endian 32-bit words to expect 82 | Int -> 83 | -- | Native-endian elements 84 | Parser e s (PrimArray Word32) 85 | word32Array e !n = case targetByteOrder of 86 | BigEndian -> fmap (asWord32s . Bytes.toByteArrayClone) (P.take e (n * 4)) 87 | LittleEndian -> do 88 | bs <- P.take e (n * 4) 89 | let r = swapArray32 bs 90 | pure (asWord32s r) 91 | 92 | -- | Parse an array of big-endian unsigned 64-bit words. 93 | word64Array :: 94 | -- | Error message if not enough bytes are present 95 | e -> 96 | -- | Number of big-endian 64-bit words to consume 97 | Int -> 98 | -- | Native-endian elements 99 | Parser e s (PrimArray Word64) 100 | word64Array e !n = case targetByteOrder of 101 | BigEndian -> fmap (asWord64s . Bytes.toByteArrayClone) (P.take e (n * 8)) 102 | LittleEndian -> do 103 | bs <- P.take e (n * 8) 104 | let r = swapArray64 bs 105 | pure (asWord64s r) 106 | 107 | -- | Parse an array of big-endian unsigned 256-bit words. 108 | word256Array :: 109 | -- | Error message if not enough bytes are present 110 | e -> 111 | -- | Number of big-endian 256-bit words to consume 112 | Int -> 113 | -- | Native-endian elements 114 | Parser e s (PrimArray Word256) 115 | word256Array e !n = case targetByteOrder of 116 | BigEndian -> fmap (asWord256s . Bytes.toByteArrayClone) (P.take e (n * 32)) 117 | LittleEndian -> do 118 | bs <- P.take e (n * 32) 119 | let r = swapArray256 bs 120 | pure (asWord256s r) 121 | 122 | -- | Parse an array of big-endian unsigned 128-bit words. 123 | word128Array :: 124 | -- | Error message if not enough bytes are present 125 | e -> 126 | -- | Number of big-endian 128-bit words to consume 127 | Int -> 128 | -- | Native-endian elements 129 | Parser e s (PrimArray Word128) 130 | word128Array e !n = case targetByteOrder of 131 | BigEndian -> fmap (asWord128s . Bytes.toByteArrayClone) (P.take e (n * 16)) 132 | LittleEndian -> do 133 | bs <- P.take e (n * 16) 134 | let r = swapArray128 bs 135 | pure (asWord128s r) 136 | 137 | asWord16s :: ByteArray -> PrimArray Word16 138 | asWord16s (ByteArray x) = PrimArray x 139 | 140 | asWord32s :: ByteArray -> PrimArray Word32 141 | asWord32s (ByteArray x) = PrimArray x 142 | 143 | asWord64s :: ByteArray -> PrimArray Word64 144 | asWord64s (ByteArray x) = PrimArray x 145 | 146 | asWord128s :: ByteArray -> PrimArray Word128 147 | asWord128s (ByteArray x) = PrimArray x 148 | 149 | asWord256s :: ByteArray -> PrimArray Word256 150 | asWord256s (ByteArray x) = PrimArray x 151 | 152 | -- | Unsigned 16-bit word. 153 | word16 :: e -> Parser e s Word16 154 | word16 e = uneffectful $ \chunk -> 155 | if length chunk >= 2 156 | then 157 | let wa = PM.indexByteArray (array chunk) (offset chunk) :: Word8 158 | wb = PM.indexByteArray (array chunk) (offset chunk + 1) :: Word8 159 | in Success 160 | (fromIntegral @Word @Word16 (unsafeShiftL (fromIntegral wa) 8 .|. fromIntegral wb)) 161 | (offset chunk + 2) 162 | (length chunk - 2) 163 | else Failure e 164 | 165 | -- | Unsigned 32-bit word. 166 | word32 :: e -> Parser e s Word32 167 | word32 e = uneffectful $ \chunk -> 168 | if length chunk >= 4 169 | then 170 | let wa = PM.indexByteArray (array chunk) (offset chunk) :: Word8 171 | wb = PM.indexByteArray (array chunk) (offset chunk + 1) :: Word8 172 | wc = PM.indexByteArray (array chunk) (offset chunk + 2) :: Word8 173 | wd = PM.indexByteArray (array chunk) (offset chunk + 3) :: Word8 174 | in Success 175 | ( fromIntegral @Word @Word32 176 | ( unsafeShiftL (fromIntegral wa) 24 177 | .|. unsafeShiftL (fromIntegral wb) 16 178 | .|. unsafeShiftL (fromIntegral wc) 8 179 | .|. fromIntegral wd 180 | ) 181 | ) 182 | (offset chunk + 4) 183 | (length chunk - 4) 184 | else Failure e 185 | 186 | -- | Unsigned 64-bit word. 187 | word64 :: e -> Parser e s Word64 188 | word64 e = uneffectful $ \chunk -> 189 | if length chunk >= 8 190 | then 191 | let wa = PM.indexByteArray (array chunk) (offset chunk) :: Word8 192 | wb = PM.indexByteArray (array chunk) (offset chunk + 1) :: Word8 193 | wc = PM.indexByteArray (array chunk) (offset chunk + 2) :: Word8 194 | wd = PM.indexByteArray (array chunk) (offset chunk + 3) :: Word8 195 | we = PM.indexByteArray (array chunk) (offset chunk + 4) :: Word8 196 | wf = PM.indexByteArray (array chunk) (offset chunk + 5) :: Word8 197 | wg = PM.indexByteArray (array chunk) (offset chunk + 6) :: Word8 198 | wh = PM.indexByteArray (array chunk) (offset chunk + 7) :: Word8 199 | in Success 200 | ( unsafeShiftL (fromIntegral wa) 56 201 | .|. unsafeShiftL (fromIntegral wb) 48 202 | .|. unsafeShiftL (fromIntegral wc) 40 203 | .|. unsafeShiftL (fromIntegral wd) 32 204 | .|. unsafeShiftL (fromIntegral we) 24 205 | .|. unsafeShiftL (fromIntegral wf) 16 206 | .|. unsafeShiftL (fromIntegral wg) 8 207 | .|. fromIntegral wh 208 | ) 209 | (offset chunk + 8) 210 | (length chunk - 8) 211 | else Failure e 212 | 213 | -- | Unsigned 128-bit word. 214 | word128 :: e -> Parser e s Word128 215 | word128 e = liftA2 Word128 (word64 e) (word64 e) 216 | 217 | -- | Unsigned 256-bit word. 218 | word256 :: e -> Parser e s Word256 219 | word256 e = (\a b c d -> Word256 a b c d) <$> word64 e <*> word64 e <*> word64 e <*> word64 e 220 | 221 | -- | Signed 8-bit integer. 222 | int8 :: e -> Parser e s Int8 223 | int8 = fmap fromIntegral . word8 224 | 225 | -- | Signed 16-bit integer. 226 | int16 :: e -> Parser e s Int16 227 | int16 = fmap fromIntegral . word16 228 | 229 | -- | Signed 32-bit integer. 230 | int32 :: e -> Parser e s Int32 231 | int32 = fmap fromIntegral . word32 232 | 233 | -- | Signed 64-bit integer. 234 | int64 :: e -> Parser e s Int64 235 | int64 = fmap fromIntegral . word64 236 | -------------------------------------------------------------------------------- /src/Data/Bytes/Parser/LittleEndian.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE BinaryLiterals #-} 3 | {-# LANGUAGE CPP #-} 4 | {-# LANGUAGE DataKinds #-} 5 | {-# LANGUAGE DerivingStrategies #-} 6 | {-# LANGUAGE GADTSyntax #-} 7 | {-# LANGUAGE PolyKinds #-} 8 | {-# LANGUAGE RankNTypes #-} 9 | {-# LANGUAGE ScopedTypeVariables #-} 10 | {-# LANGUAGE TypeApplications #-} 11 | {-# LANGUAGE UnboxedSums #-} 12 | 13 | -- | Little-endian fixed-width numbers. 14 | module Data.Bytes.Parser.LittleEndian 15 | ( -- * One 16 | 17 | -- ** Unsigned 18 | word8 19 | , word16 20 | , word32 21 | , word64 22 | , word128 23 | , word256 24 | 25 | -- ** Signed 26 | , int8 27 | , int16 28 | , int32 29 | , int64 30 | 31 | -- * Many 32 | 33 | -- ** Unsigned 34 | , word16Array 35 | , word32Array 36 | , word64Array 37 | , word128Array 38 | , word256Array 39 | 40 | -- ** Unsigned 41 | , int64Array 42 | ) where 43 | 44 | import Prelude hiding (any, fail, length, takeWhile) 45 | 46 | import Control.Applicative (liftA2) 47 | import Data.Bits (unsafeShiftL, (.|.)) 48 | import Data.Bytes.Parser.Internal (Parser, Result (..), swapArray128, swapArray16, swapArray256, swapArray32, swapArray64, uneffectful) 49 | import Data.Bytes.Types (Bytes (..)) 50 | import Data.Int (Int16, Int32, Int64, Int8) 51 | import Data.Primitive (ByteArray (..), PrimArray (..)) 52 | import Data.WideWord (Word128 (Word128), Word256 (Word256)) 53 | import Data.Word (Word16, Word32, Word64, Word8) 54 | import GHC.ByteOrder (ByteOrder (BigEndian, LittleEndian), targetByteOrder) 55 | 56 | import qualified Data.Bytes as Bytes 57 | import qualified Data.Bytes.Parser as P 58 | import qualified Data.Primitive as PM 59 | 60 | -- | Unsigned 8-bit word. 61 | word8 :: e -> Parser e s Word8 62 | word8 = P.any 63 | 64 | {- | Array of little-endian unsigned 16-bit words. If the host is 65 | little-endian, the implementation is optimized to simply @memcpy@ 66 | bytes into the result array. The result array always has elements 67 | in native-endian byte order. 68 | -} 69 | word16Array :: 70 | -- | Error message if not enough bytes are present 71 | e -> 72 | -- | Number of little-endian 16-bit words to expect 73 | Int -> 74 | -- | Native-endian elements 75 | Parser e s (PrimArray Word16) 76 | word16Array e !n = case targetByteOrder of 77 | LittleEndian -> fmap (asWord16s . Bytes.toByteArrayClone) (P.take e (n * 2)) 78 | BigEndian -> do 79 | bs <- P.take e (n * 2) 80 | let r = swapArray16 bs 81 | pure (asWord16s r) 82 | 83 | -- | Parse an array of little-endian unsigned 32-bit words. 84 | word32Array :: 85 | -- | Error message if not enough bytes are present 86 | e -> 87 | -- | Number of little-endian 32-bit words to consume 88 | Int -> 89 | -- | Native-endian elements 90 | Parser e s (PrimArray Word32) 91 | word32Array e !n = case targetByteOrder of 92 | LittleEndian -> fmap (asWord32s . Bytes.toByteArrayClone) (P.take e (n * 4)) 93 | BigEndian -> do 94 | bs <- P.take e (n * 4) 95 | let r = swapArray32 bs 96 | pure (asWord32s r) 97 | 98 | -- | Parse an array of little-endian unsigned 64-bit words. 99 | word64Array :: 100 | -- | Error message if not enough bytes are present 101 | e -> 102 | -- | Number of little-endian 64-bit words to consume 103 | Int -> 104 | -- | Native-endian elements 105 | Parser e s (PrimArray Word64) 106 | word64Array e !n = case targetByteOrder of 107 | LittleEndian -> fmap (asWord64s . Bytes.toByteArrayClone) (P.take e (n * 8)) 108 | BigEndian -> do 109 | bs <- P.take e (n * 8) 110 | let r = swapArray64 bs 111 | pure (asWord64s r) 112 | 113 | -- | Parse an array of little-endian unsigned 128-bit words. 114 | word128Array :: 115 | -- | Error message if not enough bytes are present 116 | e -> 117 | -- | Number of little-endian 128-bit words to consume 118 | Int -> 119 | -- | Native-endian elements 120 | Parser e s (PrimArray Word128) 121 | word128Array e !n = case targetByteOrder of 122 | LittleEndian -> fmap (asWord128s . Bytes.toByteArrayClone) (P.take e (n * 16)) 123 | BigEndian -> do 124 | bs <- P.take e (n * 16) 125 | let r = swapArray128 bs 126 | pure (asWord128s r) 127 | 128 | -- | Parse an array of little-endian unsigned 256-bit words. 129 | word256Array :: 130 | -- | Error message if not enough bytes are present 131 | e -> 132 | -- | Number of little-endian 256-bit words to consume 133 | Int -> 134 | -- | Native-endian elements 135 | Parser e s (PrimArray Word256) 136 | word256Array e !n = case targetByteOrder of 137 | LittleEndian -> fmap (asWord256s . Bytes.toByteArrayClone) (P.take e (n * 32)) 138 | BigEndian -> do 139 | bs <- P.take e (n * 32) 140 | let r = swapArray256 bs 141 | pure (asWord256s r) 142 | 143 | -- | Parse an array of little-endian signed 64-bit words. 144 | int64Array :: 145 | -- | Error message if not enough bytes are present 146 | e -> 147 | -- | Number of little-endian 64-bit words to expect 148 | Int -> 149 | -- | Native-endian elements 150 | Parser e s (PrimArray Int64) 151 | int64Array e !n = do 152 | PrimArray x <- word64Array e n 153 | pure (PrimArray x) 154 | 155 | asWord16s :: ByteArray -> PrimArray Word16 156 | asWord16s (ByteArray x) = PrimArray x 157 | 158 | asWord32s :: ByteArray -> PrimArray Word32 159 | asWord32s (ByteArray x) = PrimArray x 160 | 161 | asWord64s :: ByteArray -> PrimArray Word64 162 | asWord64s (ByteArray x) = PrimArray x 163 | 164 | asWord128s :: ByteArray -> PrimArray Word128 165 | asWord128s (ByteArray x) = PrimArray x 166 | 167 | asWord256s :: ByteArray -> PrimArray Word256 168 | asWord256s (ByteArray x) = PrimArray x 169 | 170 | -- | Unsigned 16-bit word. 171 | word16 :: e -> Parser e s Word16 172 | word16 e = uneffectful $ \chunk -> 173 | if length chunk >= 2 174 | then 175 | let wa = PM.indexByteArray (array chunk) (offset chunk) :: Word8 176 | wb = PM.indexByteArray (array chunk) (offset chunk + 1) :: Word8 177 | in Success 178 | (fromIntegral @Word @Word16 (unsafeShiftL (fromIntegral wb) 8 .|. fromIntegral wa)) 179 | (offset chunk + 2) 180 | (length chunk - 2) 181 | else Failure e 182 | 183 | -- | Unsigned 32-bit word. 184 | word32 :: e -> Parser e s Word32 185 | word32 e = uneffectful $ \chunk -> 186 | if length chunk >= 4 187 | then 188 | let wa = PM.indexByteArray (array chunk) (offset chunk) :: Word8 189 | wb = PM.indexByteArray (array chunk) (offset chunk + 1) :: Word8 190 | wc = PM.indexByteArray (array chunk) (offset chunk + 2) :: Word8 191 | wd = PM.indexByteArray (array chunk) (offset chunk + 3) :: Word8 192 | in Success 193 | ( fromIntegral @Word @Word32 194 | ( unsafeShiftL (fromIntegral wd) 24 195 | .|. unsafeShiftL (fromIntegral wc) 16 196 | .|. unsafeShiftL (fromIntegral wb) 8 197 | .|. fromIntegral wa 198 | ) 199 | ) 200 | (offset chunk + 4) 201 | (length chunk - 4) 202 | else Failure e 203 | 204 | -- | Unsigned 64-bit word. 205 | word64 :: e -> Parser e s Word64 206 | word64 e = uneffectful $ \chunk -> 207 | if length chunk >= 8 208 | then 209 | let wa = PM.indexByteArray (array chunk) (offset chunk) :: Word8 210 | wb = PM.indexByteArray (array chunk) (offset chunk + 1) :: Word8 211 | wc = PM.indexByteArray (array chunk) (offset chunk + 2) :: Word8 212 | wd = PM.indexByteArray (array chunk) (offset chunk + 3) :: Word8 213 | we = PM.indexByteArray (array chunk) (offset chunk + 4) :: Word8 214 | wf = PM.indexByteArray (array chunk) (offset chunk + 5) :: Word8 215 | wg = PM.indexByteArray (array chunk) (offset chunk + 6) :: Word8 216 | wh = PM.indexByteArray (array chunk) (offset chunk + 7) :: Word8 217 | in Success 218 | ( unsafeShiftL (fromIntegral wh) 56 219 | .|. unsafeShiftL (fromIntegral wg) 48 220 | .|. unsafeShiftL (fromIntegral wf) 40 221 | .|. unsafeShiftL (fromIntegral we) 32 222 | .|. unsafeShiftL (fromIntegral wd) 24 223 | .|. unsafeShiftL (fromIntegral wc) 16 224 | .|. unsafeShiftL (fromIntegral wb) 8 225 | .|. fromIntegral wa 226 | ) 227 | (offset chunk + 8) 228 | (length chunk - 8) 229 | else Failure e 230 | 231 | -- | Unsigned 256-bit word. 232 | word256 :: e -> Parser e s Word256 233 | word256 e = (\d c b a -> Word256 a b c d) <$> word64 e <*> word64 e <*> word64 e <*> word64 e 234 | 235 | -- | Unsigned 128-bit word. 236 | word128 :: e -> Parser e s Word128 237 | word128 e = liftA2 (flip Word128) (word64 e) (word64 e) 238 | 239 | -- | Signed 8-bit integer. 240 | int8 :: e -> Parser e s Int8 241 | int8 = fmap fromIntegral . word8 242 | 243 | -- | Signed 16-bit integer. 244 | int16 :: e -> Parser e s Int16 245 | int16 = fmap fromIntegral . word16 246 | 247 | -- | Signed 32-bit integer. 248 | int32 :: e -> Parser e s Int32 249 | int32 = fmap fromIntegral . word32 250 | 251 | -- | Signed 64-bit integer. 252 | int64 :: e -> Parser e s Int64 253 | int64 = fmap fromIntegral . word64 254 | -------------------------------------------------------------------------------- /src/Data/Bytes/Parser/Ascii.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE BinaryLiterals #-} 3 | {-# LANGUAGE DataKinds #-} 4 | {-# LANGUAGE DerivingStrategies #-} 5 | {-# LANGUAGE GADTSyntax #-} 6 | {-# LANGUAGE MagicHash #-} 7 | {-# LANGUAGE PolyKinds #-} 8 | {-# LANGUAGE RankNTypes #-} 9 | {-# LANGUAGE ScopedTypeVariables #-} 10 | {-# LANGUAGE TypeApplications #-} 11 | {-# LANGUAGE UnboxedTuples #-} 12 | 13 | {- | Parse input as ASCII-encoded text. Some parsers in this module, 14 | like 'any' and 'peek', fail if they encounter a byte above @0x7F@. 15 | Others, like numeric parsers and skipping parsers, leave the cursor 16 | at the position of the offending byte without failing. 17 | -} 18 | module Data.Bytes.Parser.Ascii 19 | ( -- * Matching 20 | Latin.char 21 | , Latin.char2 22 | , Latin.char3 23 | , Latin.char4 24 | 25 | -- * Case-Insensitive Matching 26 | , charInsensitive 27 | 28 | -- * Get Character 29 | , any 30 | , any# 31 | , peek 32 | , opt 33 | 34 | -- * Match Many 35 | , take 36 | , shortTrailedBy 37 | , takeShortWhile 38 | 39 | -- * Skip 40 | , Latin.skipDigits 41 | , Latin.skipDigits1 42 | , Latin.skipChar 43 | , Latin.skipChar1 44 | , skipAlpha 45 | , skipAlpha1 46 | , skipTrailedBy 47 | , skipWhile 48 | 49 | -- * Numbers 50 | , Latin.decWord 51 | , Latin.decWord8 52 | , Latin.decWord16 53 | , Latin.decWord32 54 | ) where 55 | 56 | import Prelude hiding (any, fail, length, takeWhile, take) 57 | 58 | import Control.Monad.ST (runST) 59 | import Data.Bits (clearBit) 60 | import Data.Bytes.Parser.Internal (Parser (..), Result (..), Result#, indexLatinCharArray, uneffectful, uneffectful#, upcastUnitSuccess) 61 | import Data.Bytes.Types (Bytes (..)) 62 | import Data.Char (ord) 63 | import Data.Text.Short (ShortText) 64 | import Data.Word (Word8) 65 | import GHC.Exts (Char (C#), Char#, Int (I#), Int#, chr#, gtChar#, indexCharArray#, ord#, (+#), (-#), (<#)) 66 | import Data.Text.Internal (Text(Text)) 67 | 68 | import qualified Data.ByteString.Short.Internal as BSS 69 | import qualified Data.Bytes as Bytes 70 | import qualified Data.Bytes.Parser as Parser 71 | import qualified Data.Bytes.Parser.Latin as Latin 72 | import qualified Data.Bytes.Parser.Unsafe as Unsafe 73 | import qualified Data.Primitive as PM 74 | import qualified Data.Text.Short.Unsafe as TS 75 | 76 | {- | Consume the next character, failing if it does not match the expected 77 | value or if there is no more input. This check for equality is case 78 | insensitive. 79 | 80 | Precondition: The argument must be a letter (@[a-zA-Z]@). Behavior is 81 | undefined if it is not. 82 | -} 83 | charInsensitive :: e -> Char -> Parser e s () 84 | {-# INLINE charInsensitive #-} 85 | charInsensitive e !c = uneffectful $ \chunk -> 86 | if length chunk > 0 87 | then 88 | if clearBit (PM.indexByteArray (array chunk) (offset chunk) :: Word8) 5 == w 89 | then Success () (offset chunk + 1) (length chunk - 1) 90 | else Failure e 91 | else Failure e 92 | where 93 | w = clearBit (fromIntegral @Int @Word8 (ord c)) 5 94 | 95 | {- | Consume input until the trailer is found. Then, consume 96 | the trailer as well. This fails if the trailer is not 97 | found or if any non-ASCII characters are encountered. 98 | -} 99 | skipTrailedBy :: e -> Char -> Parser e s () 100 | {-# INLINE skipTrailedBy #-} 101 | skipTrailedBy e !c = do 102 | let go = do 103 | !d <- any e 104 | if d == c 105 | then pure () 106 | else go 107 | go 108 | 109 | -- | Consume a fixed number of ASCII characters (all less than codepoint 128). 110 | take :: e -> Int -> Parser e s Text 111 | {-# INLINE take #-} 112 | take e !n = do 113 | bs@(Bytes arr off len) <- Parser.take e n 114 | if Bytes.all (\w -> w < 128) bs 115 | then pure (Text arr off len) 116 | else Parser.fail e 117 | 118 | {- | Consume characters matching the predicate. The stops when it 119 | encounters a non-matching character or when it encounters a byte 120 | above @0x7F@. This never fails. 121 | -} 122 | takeShortWhile :: (Char -> Bool) -> Parser e s ShortText 123 | {-# INLINE takeShortWhile #-} 124 | takeShortWhile p = do 125 | !start <- Unsafe.cursor 126 | skipWhile p 127 | end <- Unsafe.cursor 128 | src <- Unsafe.expose 129 | let len = end - start 130 | !r = runST $ do 131 | marr <- PM.newByteArray len 132 | PM.copyByteArray marr 0 src start len 133 | PM.unsafeFreezeByteArray marr 134 | pure $ 135 | TS.fromShortByteStringUnsafe $ 136 | byteArrayToShortByteString $ 137 | r 138 | 139 | {- | Consume input through the next occurrence of the target 140 | character and return the consumed input, excluding the 141 | target character, as a 'ShortText'. This fails if it 142 | encounters any bytes above @0x7F@. 143 | -} 144 | shortTrailedBy :: e -> Char -> Parser e s ShortText 145 | shortTrailedBy e !c = do 146 | !start <- Unsafe.cursor 147 | skipTrailedBy e c 148 | end <- Unsafe.cursor 149 | src <- Unsafe.expose 150 | let len = end - start - 1 151 | !r = runST $ do 152 | marr <- PM.newByteArray len 153 | PM.copyByteArray marr 0 src start len 154 | PM.unsafeFreezeByteArray marr 155 | pure $ 156 | TS.fromShortByteStringUnsafe $ 157 | byteArrayToShortByteString $ 158 | r 159 | 160 | -- | Consumes and returns the next character in the input. 161 | any :: e -> Parser e s Char 162 | {-# INLINE any #-} 163 | any e = uneffectful $ \chunk -> 164 | if length chunk > 0 165 | then 166 | let c = indexLatinCharArray (array chunk) (offset chunk) 167 | in if c < '\128' 168 | then Success c (offset chunk + 1) (length chunk - 1) 169 | else Failure e 170 | else Failure e 171 | 172 | -- | Variant of 'any' with unboxed result. 173 | any# :: e -> Parser e s Char# 174 | {-# INLINE any# #-} 175 | any# e = 176 | Parser 177 | ( \(# arr, off, len #) s0 -> case len of 178 | 0# -> (# s0, (# e | #) #) 179 | _ -> 180 | let !w = indexCharArray# arr off 181 | in case ord# w <# 128# of 182 | 1# -> (# s0, (# | (# w, off +# 1#, len -# 1# #) #) #) 183 | _ -> (# s0, (# e | #) #) 184 | ) 185 | 186 | unI :: Int -> Int# 187 | {-# INLINE unI #-} 188 | unI (I# w) = w 189 | 190 | {- | Examine the next byte without consuming it, interpret it as an 191 | ASCII-encoded character. This fails if the byte is above @0x7F@ or 192 | if the end of input has been reached. 193 | -} 194 | peek :: e -> Parser e s Char 195 | {-# INLINE peek #-} 196 | peek e = uneffectful $ \chunk -> 197 | if length chunk > 0 198 | then 199 | let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8 200 | in if w < 128 201 | then 202 | Success 203 | (C# (chr# (unI (fromIntegral w)))) 204 | (offset chunk) 205 | (length chunk) 206 | else Failure e 207 | else Failure e 208 | 209 | {- | Consume the next byte, interpreting it as an ASCII-encoded character. 210 | Fails if the byte is above @0x7F@. Returns @Nothing@ if the 211 | end of the input has been reached. 212 | -} 213 | opt :: e -> Parser e s (Maybe Char) 214 | {-# INLINE opt #-} 215 | opt e = uneffectful $ \chunk -> 216 | if length chunk > 0 217 | then 218 | let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8 219 | in if w < 128 220 | then 221 | Success 222 | (Just (C# (chr# (unI (fromIntegral w))))) 223 | (offset chunk + 1) 224 | (length chunk - 1) 225 | else Failure e 226 | else Success Nothing (offset chunk) (length chunk) 227 | 228 | {- | Consume characters matching the predicate. The stops when it 229 | encounters a non-matching character or when it encounters a byte 230 | above @0x7F@. This never fails. 231 | -} 232 | skipWhile :: (Char -> Bool) -> Parser e s () 233 | {-# INLINE skipWhile #-} 234 | skipWhile p = 235 | Parser 236 | ( \(# arr, off0, len0 #) s0 -> 237 | let go off len = case len of 238 | 0# -> (# (), off, 0# #) 239 | _ -> 240 | let c = indexCharArray# arr off 241 | in case p (C# c) of 242 | True -> case gtChar# c '\x7F'# of 243 | 1# -> (# (), off, len #) 244 | _ -> go (off +# 1#) (len -# 1#) 245 | False -> (# (), off, len #) 246 | in (# s0, (# | go off0 len0 #) #) 247 | ) 248 | 249 | {- | Skip uppercase and lowercase letters until a non-alpha 250 | character is encountered. 251 | -} 252 | skipAlpha :: Parser e s () 253 | {-# INLINE skipAlpha #-} 254 | skipAlpha = uneffectful# $ \c -> 255 | upcastUnitSuccess (skipAlphaAsciiLoop c) 256 | 257 | {- | Skip uppercase and lowercase letters until a non-alpha 258 | character is encountered. 259 | -} 260 | skipAlpha1 :: e -> Parser e s () 261 | {-# INLINE skipAlpha1 #-} 262 | skipAlpha1 e = uneffectful# $ \c -> 263 | skipAlphaAsciiLoop1Start e c 264 | 265 | skipAlphaAsciiLoop :: 266 | Bytes -> -- Chunk 267 | (# Int#, Int# #) 268 | {-# INLINE skipAlphaAsciiLoop #-} 269 | skipAlphaAsciiLoop !c = 270 | if length c > 0 271 | then 272 | let w = indexLatinCharArray (array c) (offset c) 273 | in if (w >= 'a' && w <= 'z') || (w >= 'A' && w <= 'Z') 274 | then skipAlphaAsciiLoop (Bytes.unsafeDrop 1 c) 275 | else (# unI (offset c), unI (length c) #) 276 | else (# unI (offset c), unI (length c) #) 277 | 278 | skipAlphaAsciiLoop1Start :: 279 | e -> 280 | Bytes -> -- chunk 281 | Result# e () 282 | {-# INLINE skipAlphaAsciiLoop1Start #-} 283 | skipAlphaAsciiLoop1Start e !c = 284 | if length c > 0 285 | then 286 | let w = indexLatinCharArray (array c) (offset c) 287 | in if (w >= 'a' && w <= 'z') || (w >= 'A' && w <= 'Z') 288 | then upcastUnitSuccess (skipAlphaAsciiLoop (Bytes.unsafeDrop 1 c)) 289 | else (# e | #) 290 | else (# e | #) 291 | 292 | byteArrayToShortByteString :: PM.ByteArray -> BSS.ShortByteString 293 | {-# INLINE byteArrayToShortByteString #-} 294 | byteArrayToShortByteString (PM.ByteArray x) = BSS.SBS x 295 | -------------------------------------------------------------------------------- /src/Data/Bytes/Parser/Internal.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE BinaryLiterals #-} 3 | {-# LANGUAGE DataKinds #-} 4 | {-# LANGUAGE DerivingStrategies #-} 5 | {-# LANGUAGE GADTSyntax #-} 6 | {-# LANGUAGE MagicHash #-} 7 | {-# LANGUAGE NamedFieldPuns #-} 8 | {-# LANGUAGE PolyKinds #-} 9 | {-# LANGUAGE RankNTypes #-} 10 | {-# LANGUAGE ScopedTypeVariables #-} 11 | {-# LANGUAGE UnboxedTuples #-} 12 | 13 | module Data.Bytes.Parser.Internal 14 | ( Parser (..) 15 | , Result (..) 16 | , InternalStep (..) 17 | , Bytes# 18 | , ST# 19 | , Result# 20 | , unfailing 21 | , uneffectful 22 | , uneffectful# 23 | , uneffectfulInt# 24 | , boxBytes 25 | , unboxBytes 26 | , unboxResult 27 | , fail 28 | , failByteArrayIntInt 29 | , indexLatinCharArray 30 | , upcastUnitSuccess 31 | -- Swapping 32 | , swapArray16 33 | , swapArray32 34 | , swapArray64 35 | , swapArray128 36 | , swapArray256 37 | ) where 38 | 39 | import Prelude hiding (any, fail, length, takeWhile) 40 | 41 | import Control.Applicative (Alternative) 42 | import Control.Monad.ST (runST) 43 | import Data.Bytes.Types (Bytes (..)) 44 | import Data.Kind (Type) 45 | import Data.Primitive (ByteArray (ByteArray)) 46 | import Data.Word (Word8) 47 | import GHC.Exts (ByteArray#, Char (C#), Int (I#), Int#, RuntimeRep, State#, TYPE) 48 | import GHC.Exts (RuntimeRep(IntRep, BoxedRep, TupleRep), Levity(Unlifted)) 49 | 50 | import qualified Control.Applicative 51 | import qualified Control.Monad 52 | import qualified Data.Primitive as PM 53 | import qualified GHC.Exts as Exts 54 | 55 | -- | A non-resumable parser. 56 | newtype Parser :: forall (r :: RuntimeRep). Type -> Type -> TYPE r -> Type where 57 | Parser :: 58 | forall (r :: RuntimeRep) (e :: Type) (s :: Type) (a :: TYPE r). 59 | {runParser :: (# ByteArray#, Int#, Int# #) -> ST# s (Result# e a)} -> 60 | Parser e s a 61 | 62 | -- The result of running a parser. Used internally. 63 | data Result e a 64 | = Failure e 65 | | -- An error message indicating what went wrong. 66 | Success !a !Int !Int 67 | 68 | -- The parsed value, the offset after the last consumed byte, and the 69 | -- number of bytes remaining in parsed slice. 70 | 71 | data InternalStep a = InternalStep !a !Int !Int 72 | 73 | uneffectful :: (Bytes -> Result e a) -> Parser e s a 74 | {-# INLINE uneffectful #-} 75 | uneffectful f = 76 | Parser 77 | (\b s0 -> (# s0, unboxResult (f (boxBytes b)) #)) 78 | 79 | -- This is like uneffectful but for parsers that always succeed. 80 | -- These combinators typically have names that begin with @try@. 81 | unfailing :: (Bytes -> InternalStep a) -> Parser e s a 82 | {-# INLINE unfailing #-} 83 | unfailing f = 84 | Parser 85 | (\b s0 -> (# s0, case f (boxBytes b) of InternalStep a (I# off) (I# len) -> (# | (# a, off, len #) #) #)) 86 | 87 | boxBytes :: Bytes# -> Bytes 88 | {-# INLINE boxBytes #-} 89 | boxBytes (# a, b, c #) = Bytes (ByteArray a) (I# b) (I# c) 90 | 91 | unboxBytes :: Bytes -> Bytes# 92 | {-# INLINE unboxBytes #-} 93 | unboxBytes (Bytes (ByteArray a) (I# b) (I# c)) = (# a, b, c #) 94 | 95 | type Bytes# = (# ByteArray#, Int#, Int# #) 96 | type ST# s (a :: TYPE r) = State# s -> (# State# s, a #) 97 | type Result# e (a :: TYPE r) = 98 | (# 99 | e | 100 | (# a, Int#, Int# #) -- ints are offset and length 101 | #) 102 | 103 | unboxResult :: Result e a -> Result# e a 104 | {-# INLINE unboxResult #-} 105 | unboxResult (Success a (I# b) (I# c)) = (# | (# a, b, c #) #) 106 | unboxResult (Failure e) = (# e | #) 107 | 108 | {- | Combines the error messages using '<>' when both 109 | parsers fail. 110 | -} 111 | instance (Monoid e) => Alternative (Parser e s) where 112 | {-# INLINE empty #-} 113 | {-# INLINE (<|>) #-} 114 | empty = fail mempty 115 | Parser f <|> Parser g = 116 | Parser 117 | ( \x s0 -> case f x s0 of 118 | (# s1, r0 #) -> case r0 of 119 | (# eRight | #) -> case g x s1 of 120 | (# s2, r1 #) -> case r1 of 121 | (# eLeft | #) -> (# s2, (# eRight <> eLeft | #) #) 122 | (# | r #) -> (# s2, (# | r #) #) 123 | (# | r #) -> (# s1, (# | r #) #) 124 | ) 125 | 126 | -- | Fail with the provided error message. 127 | fail :: 128 | -- | Error message 129 | e -> 130 | Parser e s a 131 | {-# INLINE fail #-} 132 | fail e = uneffectful $ \_ -> Failure e 133 | 134 | failByteArrayIntInt :: forall e s (a :: TYPE ('TupleRep '[ 'BoxedRep 'Unlifted, 'IntRep, 'IntRep ])). 135 | -- | Error message 136 | e -> 137 | Parser e s a 138 | {-# INLINE failByteArrayIntInt #-} 139 | failByteArrayIntInt e = Parser (\_ s0 -> (# s0, (# e | #) #)) 140 | 141 | instance Applicative (Parser e s) where 142 | pure = pureParser 143 | (<*>) = Control.Monad.ap 144 | 145 | instance Monad (Parser e s) where 146 | {-# INLINE (>>=) #-} 147 | (>>=) = bindParser 148 | 149 | instance Functor (Parser e s) where 150 | {-# INLINE fmap #-} 151 | fmap f (Parser g) = 152 | Parser 153 | ( \x s0 -> case g x s0 of 154 | (# s1, r #) -> case r of 155 | (# e | #) -> (# s1, (# e | #) #) 156 | (# | (# a, b, c #) #) -> (# s1, (# | (# f a, b, c #) #) #) 157 | ) 158 | 159 | indexLatinCharArray :: ByteArray -> Int -> Char 160 | {-# INLINE indexLatinCharArray #-} 161 | indexLatinCharArray (ByteArray arr) (I# off) = 162 | C# (Exts.indexCharArray# arr off) 163 | 164 | uneffectful# :: (Bytes -> Result# e a) -> Parser e s a 165 | {-# INLINE uneffectful# #-} 166 | uneffectful# f = 167 | Parser 168 | (\b s0 -> (# s0, (f (boxBytes b)) #)) 169 | 170 | uneffectfulInt# :: (Bytes -> Result# e Int#) -> Parser e s Int# 171 | {-# INLINE uneffectfulInt# #-} 172 | uneffectfulInt# f = 173 | Parser 174 | (\b s0 -> (# s0, (f (boxBytes b)) #)) 175 | 176 | upcastUnitSuccess :: (# Int#, Int# #) -> Result# e () 177 | {-# INLINE upcastUnitSuccess #-} 178 | upcastUnitSuccess (# b, c #) = (# | (# (), b, c #) #) 179 | 180 | swapArray16 :: Bytes -> ByteArray 181 | swapArray16 (Bytes {array, offset, length}) = runST $ do 182 | dst <- PM.newByteArray length 183 | let go !ixSrc !ixDst !len = 184 | if len > 0 185 | then do 186 | let v0 = PM.indexByteArray array ixSrc :: Word8 187 | v1 = PM.indexByteArray array (ixSrc + 1) :: Word8 188 | PM.writeByteArray dst ixDst v1 189 | PM.writeByteArray dst (ixDst + 1) v0 190 | go (ixSrc + 2) (ixDst + 2) (len - 2) 191 | else pure () 192 | go offset 0 length 193 | PM.unsafeFreezeByteArray dst 194 | 195 | swapArray32 :: Bytes -> ByteArray 196 | swapArray32 (Bytes {array, offset, length}) = runST $ do 197 | dst <- PM.newByteArray length 198 | let go !ixSrc !ixDst !len = 199 | if len > 0 200 | then do 201 | let v0 = PM.indexByteArray array ixSrc :: Word8 202 | v1 = PM.indexByteArray array (ixSrc + 1) :: Word8 203 | v2 = PM.indexByteArray array (ixSrc + 2) :: Word8 204 | v3 = PM.indexByteArray array (ixSrc + 3) :: Word8 205 | PM.writeByteArray dst ixDst v3 206 | PM.writeByteArray dst (ixDst + 1) v2 207 | PM.writeByteArray dst (ixDst + 2) v1 208 | PM.writeByteArray dst (ixDst + 3) v0 209 | go (ixSrc + 4) (ixDst + 4) (len - 4) 210 | else pure () 211 | go offset 0 length 212 | PM.unsafeFreezeByteArray dst 213 | 214 | swapArray64 :: Bytes -> ByteArray 215 | swapArray64 (Bytes {array, offset, length}) = runST $ do 216 | dst <- PM.newByteArray length 217 | let go !ixSrc !ixDst !len = 218 | if len > 0 219 | then do 220 | let v0 = PM.indexByteArray array ixSrc :: Word8 221 | v1 = PM.indexByteArray array (ixSrc + 1) :: Word8 222 | v2 = PM.indexByteArray array (ixSrc + 2) :: Word8 223 | v3 = PM.indexByteArray array (ixSrc + 3) :: Word8 224 | v4 = PM.indexByteArray array (ixSrc + 4) :: Word8 225 | v5 = PM.indexByteArray array (ixSrc + 5) :: Word8 226 | v6 = PM.indexByteArray array (ixSrc + 6) :: Word8 227 | v7 = PM.indexByteArray array (ixSrc + 7) :: Word8 228 | PM.writeByteArray dst ixDst v7 229 | PM.writeByteArray dst (ixDst + 1) v6 230 | PM.writeByteArray dst (ixDst + 2) v5 231 | PM.writeByteArray dst (ixDst + 3) v4 232 | PM.writeByteArray dst (ixDst + 4) v3 233 | PM.writeByteArray dst (ixDst + 5) v2 234 | PM.writeByteArray dst (ixDst + 6) v1 235 | PM.writeByteArray dst (ixDst + 7) v0 236 | go (ixSrc + 8) (ixDst + 8) (len - 8) 237 | else pure () 238 | go offset 0 length 239 | PM.unsafeFreezeByteArray dst 240 | 241 | swapArray128 :: Bytes -> ByteArray 242 | swapArray128 (Bytes {array, offset, length}) = runST $ do 243 | dst <- PM.newByteArray length 244 | let go !ixSrc !ixDst !len = 245 | if len > 0 246 | then do 247 | let v0 = PM.indexByteArray array ixSrc :: Word8 248 | v1 = PM.indexByteArray array (ixSrc + 1) :: Word8 249 | v2 = PM.indexByteArray array (ixSrc + 2) :: Word8 250 | v3 = PM.indexByteArray array (ixSrc + 3) :: Word8 251 | v4 = PM.indexByteArray array (ixSrc + 4) :: Word8 252 | v5 = PM.indexByteArray array (ixSrc + 5) :: Word8 253 | v6 = PM.indexByteArray array (ixSrc + 6) :: Word8 254 | v7 = PM.indexByteArray array (ixSrc + 7) :: Word8 255 | v8 = PM.indexByteArray array (ixSrc + 8) :: Word8 256 | v9 = PM.indexByteArray array (ixSrc + 9) :: Word8 257 | v10 = PM.indexByteArray array (ixSrc + 10) :: Word8 258 | v11 = PM.indexByteArray array (ixSrc + 11) :: Word8 259 | v12 = PM.indexByteArray array (ixSrc + 12) :: Word8 260 | v13 = PM.indexByteArray array (ixSrc + 13) :: Word8 261 | v14 = PM.indexByteArray array (ixSrc + 14) :: Word8 262 | v15 = PM.indexByteArray array (ixSrc + 15) :: Word8 263 | PM.writeByteArray dst ixDst v15 264 | PM.writeByteArray dst (ixDst + 1) v14 265 | PM.writeByteArray dst (ixDst + 2) v13 266 | PM.writeByteArray dst (ixDst + 3) v12 267 | PM.writeByteArray dst (ixDst + 4) v11 268 | PM.writeByteArray dst (ixDst + 5) v10 269 | PM.writeByteArray dst (ixDst + 6) v9 270 | PM.writeByteArray dst (ixDst + 7) v8 271 | PM.writeByteArray dst (ixDst + 8) v7 272 | PM.writeByteArray dst (ixDst + 9) v6 273 | PM.writeByteArray dst (ixDst + 10) v5 274 | PM.writeByteArray dst (ixDst + 11) v4 275 | PM.writeByteArray dst (ixDst + 12) v3 276 | PM.writeByteArray dst (ixDst + 13) v2 277 | PM.writeByteArray dst (ixDst + 14) v1 278 | PM.writeByteArray dst (ixDst + 15) v0 279 | go (ixSrc + 16) (ixDst + 16) (len - 16) 280 | else pure () 281 | go offset 0 length 282 | PM.unsafeFreezeByteArray dst 283 | 284 | swapArray256 :: Bytes -> ByteArray 285 | swapArray256 (Bytes {array, offset, length}) = runST $ do 286 | dst <- PM.newByteArray length 287 | let go !ixSrc !ixDst !len = 288 | if len > 0 289 | then do 290 | let loop !i 291 | | i < 32 = do 292 | let v = PM.indexByteArray array (ixSrc + i) :: Word8 293 | PM.writeByteArray dst (ixDst + (31 - i)) v 294 | loop (i + 1) 295 | | otherwise = pure () 296 | loop 0 297 | go (ixSrc + 32) (ixDst + 32) (len - 32) 298 | else pure () 299 | go offset 0 length 300 | PM.unsafeFreezeByteArray dst 301 | 302 | pureParser :: a -> Parser e s a 303 | {-# INLINE pureParser #-} 304 | pureParser a = 305 | Parser 306 | (\(# _, b, c #) s -> (# s, (# | (# a, b, c #) #) #)) 307 | 308 | bindParser :: Parser e s a -> (a -> Parser e s b) -> Parser e s b 309 | {-# INLINE bindParser #-} 310 | bindParser (Parser f) g = 311 | Parser 312 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 313 | (# s1, r0 #) -> case r0 of 314 | (# e | #) -> (# s1, (# e | #) #) 315 | (# | (# y, b, c #) #) -> 316 | runParser (g y) (# arr, b, c #) s1 317 | ) 318 | -------------------------------------------------------------------------------- /src/Data/Bytes/Parser/Rebindable.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | {-# LANGUAGE DataKinds #-} 3 | {-# LANGUAGE FlexibleInstances #-} 4 | {-# LANGUAGE MultiParamTypeClasses #-} 5 | {-# LANGUAGE PolyKinds #-} 6 | {-# LANGUAGE RankNTypes #-} 7 | {-# LANGUAGE ScopedTypeVariables #-} 8 | {-# LANGUAGE UnboxedTuples #-} 9 | 10 | {- | Provides levity-polymorphic variants of @>>=@, @>>@, and @pure@ 11 | used to assemble parsers whose result types are unlifted. This 12 | cannot be used with the @RebindableSyntax@ extension because that 13 | extension disallows representations other than @LiftedRep@. Consequently, 14 | users of this module must manually desugar do notation. See the 15 | @url-bytes@ library for an example of this module in action. 16 | 17 | Only resort to the functions in this module after checking that 18 | GHC is unable to optimize away @I#@ and friends in your code. 19 | -} 20 | module Data.Bytes.Parser.Rebindable 21 | ( Bind (..) 22 | , Pure (..) 23 | ) where 24 | 25 | import Data.Bytes.Parser.Internal (Parser (..)) 26 | import GHC.Exts (RuntimeRep (..), TYPE) 27 | import Prelude () 28 | import GHC.Exts (LiftedRep) 29 | 30 | class Bind (ra :: RuntimeRep) (rb :: RuntimeRep) where 31 | (>>=) :: 32 | forall e s (a :: TYPE ra) (b :: TYPE rb). 33 | Parser e s a -> 34 | (a -> Parser e s b) -> 35 | Parser e s b 36 | (>>) :: 37 | forall e s (a :: TYPE ra) (b :: TYPE rb). 38 | Parser e s a -> 39 | Parser e s b -> 40 | Parser e s b 41 | 42 | class Pure (ra :: RuntimeRep) where 43 | pure :: forall e s (a :: TYPE ra). a -> Parser e s a 44 | 45 | pureParser :: a -> Parser e s a 46 | {-# INLINE pureParser #-} 47 | pureParser a = 48 | Parser 49 | (\(# _, b, c #) s -> (# s, (# | (# a, b, c #) #) #)) 50 | 51 | bindParser :: Parser e s a -> (a -> Parser e s b) -> Parser e s b 52 | {-# INLINE bindParser #-} 53 | bindParser (Parser f) g = 54 | Parser 55 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 56 | (# s1, r0 #) -> case r0 of 57 | (# e | #) -> (# s1, (# e | #) #) 58 | (# | (# y, b, c #) #) -> 59 | runParser (g y) (# arr, b, c #) s1 60 | ) 61 | 62 | sequenceParser :: Parser e s a -> Parser e s b -> Parser e s b 63 | {-# INLINE sequenceParser #-} 64 | sequenceParser (Parser f) (Parser g) = 65 | Parser 66 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 67 | (# s1, r0 #) -> case r0 of 68 | (# e | #) -> (# s1, (# e | #) #) 69 | (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 70 | ) 71 | 72 | pureIntParser :: 73 | forall (a :: TYPE 'IntRep) e s. 74 | a -> 75 | Parser e s a 76 | {-# INLINE pureIntParser #-} 77 | pureIntParser a = 78 | Parser 79 | (\(# _, b, c #) s -> (# s, (# | (# a, b, c #) #) #)) 80 | 81 | bindIntParser :: 82 | forall (a :: TYPE 'IntRep) e s b. 83 | Parser e s a -> 84 | (a -> Parser e s b) -> 85 | Parser e s b 86 | {-# INLINE bindIntParser #-} 87 | bindIntParser (Parser f) g = 88 | Parser 89 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 90 | (# s1, r0 #) -> case r0 of 91 | (# e | #) -> (# s1, (# e | #) #) 92 | (# | (# y, b, c #) #) -> 93 | runParser (g y) (# arr, b, c #) s1 94 | ) 95 | 96 | bindWordParser :: 97 | forall (a :: TYPE 'WordRep) e s b. 98 | Parser e s a -> 99 | (a -> Parser e s b) -> 100 | Parser e s b 101 | {-# INLINE bindWordParser #-} 102 | bindWordParser (Parser f) g = 103 | Parser 104 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 105 | (# s1, r0 #) -> case r0 of 106 | (# e | #) -> (# s1, (# e | #) #) 107 | (# | (# y, b, c #) #) -> 108 | runParser (g y) (# arr, b, c #) s1 109 | ) 110 | 111 | sequenceIntParser :: 112 | forall (a :: TYPE 'IntRep) e s b. 113 | Parser e s a -> 114 | Parser e s b -> 115 | Parser e s b 116 | {-# INLINE sequenceIntParser #-} 117 | sequenceIntParser (Parser f) (Parser g) = 118 | Parser 119 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 120 | (# s1, r0 #) -> case r0 of 121 | (# e | #) -> (# s1, (# e | #) #) 122 | (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 123 | ) 124 | 125 | sequenceWordParser :: 126 | forall (a :: TYPE 'WordRep) e s b. 127 | Parser e s a -> 128 | Parser e s b -> 129 | Parser e s b 130 | {-# INLINE sequenceWordParser #-} 131 | sequenceWordParser (Parser f) (Parser g) = 132 | Parser 133 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 134 | (# s1, r0 #) -> case r0 of 135 | (# e | #) -> (# s1, (# e | #) #) 136 | (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 137 | ) 138 | 139 | pureIntPairParser :: 140 | forall (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])) e s. 141 | a -> 142 | Parser e s a 143 | {-# INLINE pureIntPairParser #-} 144 | pureIntPairParser a = 145 | Parser 146 | (\(# _, b, c #) s -> (# s, (# | (# a, b, c #) #) #)) 147 | 148 | bindIntPairParser :: 149 | forall (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])) e s b. 150 | Parser e s a -> 151 | (a -> Parser e s b) -> 152 | Parser e s b 153 | {-# INLINE bindIntPairParser #-} 154 | bindIntPairParser (Parser f) g = 155 | Parser 156 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 157 | (# s1, r0 #) -> case r0 of 158 | (# e | #) -> (# s1, (# e | #) #) 159 | (# | (# y, b, c #) #) -> 160 | runParser (g y) (# arr, b, c #) s1 161 | ) 162 | 163 | pureInt5Parser :: 164 | forall (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])) e s. 165 | a -> 166 | Parser e s a 167 | {-# INLINE pureInt5Parser #-} 168 | pureInt5Parser a = 169 | Parser 170 | (\(# _, b, c #) s -> (# s, (# | (# a, b, c #) #) #)) 171 | 172 | bindInt5Parser :: 173 | forall (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])) e s b. 174 | Parser e s a -> 175 | (a -> Parser e s b) -> 176 | Parser e s b 177 | {-# INLINE bindInt5Parser #-} 178 | bindInt5Parser (Parser f) g = 179 | Parser 180 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 181 | (# s1, r0 #) -> case r0 of 182 | (# e | #) -> (# s1, (# e | #) #) 183 | (# | (# y, b, c #) #) -> 184 | runParser (g y) (# arr, b, c #) s1 185 | ) 186 | 187 | sequenceInt5Parser :: 188 | forall (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])) e s b. 189 | Parser e s a -> 190 | Parser e s b -> 191 | Parser e s b 192 | {-# INLINE sequenceInt5Parser #-} 193 | sequenceInt5Parser (Parser f) (Parser g) = 194 | Parser 195 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 196 | (# s1, r0 #) -> case r0 of 197 | (# e | #) -> (# s1, (# e | #) #) 198 | (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 199 | ) 200 | 201 | sequenceIntPairParser :: 202 | forall (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])) e s b. 203 | Parser e s a -> 204 | Parser e s b -> 205 | Parser e s b 206 | {-# INLINE sequenceIntPairParser #-} 207 | sequenceIntPairParser (Parser f) (Parser g) = 208 | Parser 209 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 210 | (# s1, r0 #) -> case r0 of 211 | (# e | #) -> (# s1, (# e | #) #) 212 | (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 213 | ) 214 | 215 | bindInt2to5Parser :: 216 | forall 217 | (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])) 218 | (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])) 219 | e 220 | s. 221 | Parser e s a -> 222 | (a -> Parser e s b) -> 223 | Parser e s b 224 | {-# INLINE bindInt2to5Parser #-} 225 | bindInt2to5Parser (Parser f) g = 226 | Parser 227 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 228 | (# s1, r0 #) -> case r0 of 229 | (# e | #) -> (# s1, (# e | #) #) 230 | (# | (# y, b, c #) #) -> 231 | runParser (g y) (# arr, b, c #) s1 232 | ) 233 | 234 | sequenceInt2to5Parser :: 235 | forall 236 | (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])) 237 | (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])) 238 | e 239 | s. 240 | Parser e s a -> 241 | Parser e s b -> 242 | Parser e s b 243 | {-# INLINE sequenceInt2to5Parser #-} 244 | sequenceInt2to5Parser (Parser f) (Parser g) = 245 | Parser 246 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 247 | (# s1, r0 #) -> case r0 of 248 | (# e | #) -> (# s1, (# e | #) #) 249 | (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 250 | ) 251 | 252 | instance Bind LiftedRep LiftedRep where 253 | {-# INLINE (>>=) #-} 254 | {-# INLINE (>>) #-} 255 | (>>=) = bindParser 256 | (>>) = sequenceParser 257 | 258 | instance Bind 'WordRep LiftedRep where 259 | {-# INLINE (>>=) #-} 260 | {-# INLINE (>>) #-} 261 | (>>=) = bindWordParser 262 | (>>) = sequenceWordParser 263 | 264 | instance Bind 'IntRep LiftedRep where 265 | {-# INLINE (>>=) #-} 266 | {-# INLINE (>>) #-} 267 | (>>=) = bindIntParser 268 | (>>) = sequenceIntParser 269 | 270 | instance Bind ('TupleRep '[ 'IntRep, 'IntRep]) LiftedRep where 271 | {-# INLINE (>>=) #-} 272 | {-# INLINE (>>) #-} 273 | (>>=) = bindIntPairParser 274 | (>>) = sequenceIntPairParser 275 | 276 | instance 277 | Bind 278 | ('TupleRep '[ 'IntRep, 'IntRep]) 279 | ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep]) 280 | where 281 | {-# INLINE (>>=) #-} 282 | {-# INLINE (>>) #-} 283 | (>>=) = bindInt2to5Parser 284 | (>>) = sequenceInt2to5Parser 285 | 286 | instance 287 | Bind 288 | ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep]) 289 | LiftedRep 290 | where 291 | {-# INLINE (>>=) #-} 292 | {-# INLINE (>>) #-} 293 | (>>=) = bindInt5Parser 294 | (>>) = sequenceInt5Parser 295 | 296 | instance 297 | Bind 298 | 'IntRep 299 | ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep]) 300 | where 301 | {-# INLINE (>>=) #-} 302 | {-# INLINE (>>) #-} 303 | (>>=) = bindFromIntToInt5 304 | (>>) = sequenceIntToInt5 305 | 306 | instance Bind LiftedRep ('TupleRep '[ 'IntRep, 'IntRep]) where 307 | {-# INLINE (>>=) #-} 308 | {-# INLINE (>>) #-} 309 | (>>=) = bindFromLiftedToIntPair 310 | (>>) = sequenceLiftedToIntPair 311 | 312 | instance 313 | Bind 314 | LiftedRep 315 | ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep]) 316 | where 317 | {-# INLINE (>>=) #-} 318 | {-# INLINE (>>) #-} 319 | (>>=) = bindFromLiftedToInt5 320 | (>>) = sequenceLiftedToInt5 321 | 322 | instance Bind 'IntRep ('TupleRep '[ 'IntRep, 'IntRep]) where 323 | {-# INLINE (>>=) #-} 324 | {-# INLINE (>>) #-} 325 | (>>=) = bindFromIntToIntPair 326 | (>>) = sequenceIntToIntPair 327 | 328 | instance Bind LiftedRep 'IntRep where 329 | {-# INLINE (>>=) #-} 330 | {-# INLINE (>>) #-} 331 | (>>=) = bindFromLiftedToInt 332 | (>>) = sequenceLiftedToInt 333 | 334 | instance Pure LiftedRep where 335 | {-# INLINE pure #-} 336 | pure = pureParser 337 | 338 | instance Pure 'IntRep where 339 | {-# INLINE pure #-} 340 | pure = pureIntParser 341 | 342 | instance Pure ('TupleRep '[ 'IntRep, 'IntRep]) where 343 | {-# INLINE pure #-} 344 | pure = pureIntPairParser 345 | 346 | instance Pure ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep]) where 347 | {-# INLINE pure #-} 348 | pure = pureInt5Parser 349 | 350 | bindFromIntToIntPair :: 351 | forall 352 | s 353 | e 354 | (a :: TYPE 'IntRep) 355 | (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])). 356 | Parser s e a -> 357 | (a -> Parser s e b) -> 358 | Parser s e b 359 | {-# INLINE bindFromIntToIntPair #-} 360 | bindFromIntToIntPair (Parser f) g = 361 | Parser 362 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 363 | (# s1, r0 #) -> case r0 of 364 | (# e | #) -> (# s1, (# e | #) #) 365 | (# | (# y, b, c #) #) -> 366 | runParser (g y) (# arr, b, c #) s1 367 | ) 368 | 369 | sequenceIntToIntPair :: 370 | forall 371 | s 372 | e 373 | (a :: TYPE 'IntRep) 374 | (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])). 375 | Parser s e a -> 376 | Parser s e b -> 377 | Parser s e b 378 | {-# INLINE sequenceIntToIntPair #-} 379 | sequenceIntToIntPair (Parser f) (Parser g) = 380 | Parser 381 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 382 | (# s1, r0 #) -> case r0 of 383 | (# e | #) -> (# s1, (# e | #) #) 384 | (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 385 | ) 386 | 387 | bindFromIntToInt5 :: 388 | forall 389 | s 390 | e 391 | (a :: TYPE 'IntRep) 392 | (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])). 393 | Parser s e a -> 394 | (a -> Parser s e b) -> 395 | Parser s e b 396 | {-# INLINE bindFromIntToInt5 #-} 397 | bindFromIntToInt5 (Parser f) g = 398 | Parser 399 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 400 | (# s1, r0 #) -> case r0 of 401 | (# e | #) -> (# s1, (# e | #) #) 402 | (# | (# y, b, c #) #) -> 403 | runParser (g y) (# arr, b, c #) s1 404 | ) 405 | 406 | sequenceIntToInt5 :: 407 | forall 408 | s 409 | e 410 | (a :: TYPE 'IntRep) 411 | (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])). 412 | Parser s e a -> 413 | Parser s e b -> 414 | Parser s e b 415 | {-# INLINE sequenceIntToInt5 #-} 416 | sequenceIntToInt5 (Parser f) (Parser g) = 417 | Parser 418 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 419 | (# s1, r0 #) -> case r0 of 420 | (# e | #) -> (# s1, (# e | #) #) 421 | (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 422 | ) 423 | 424 | bindFromLiftedToIntPair :: 425 | forall 426 | s 427 | e 428 | (a :: TYPE LiftedRep) 429 | (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])). 430 | Parser s e a -> 431 | (a -> Parser s e b) -> 432 | Parser s e b 433 | {-# INLINE bindFromLiftedToIntPair #-} 434 | bindFromLiftedToIntPair (Parser f) g = 435 | Parser 436 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 437 | (# s1, r0 #) -> case r0 of 438 | (# e | #) -> (# s1, (# e | #) #) 439 | (# | (# y, b, c #) #) -> 440 | runParser (g y) (# arr, b, c #) s1 441 | ) 442 | 443 | sequenceLiftedToIntPair :: 444 | forall 445 | s 446 | e 447 | (a :: TYPE LiftedRep) 448 | (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])). 449 | Parser s e a -> 450 | Parser s e b -> 451 | Parser s e b 452 | {-# INLINE sequenceLiftedToIntPair #-} 453 | sequenceLiftedToIntPair (Parser f) (Parser g) = 454 | Parser 455 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 456 | (# s1, r0 #) -> case r0 of 457 | (# e | #) -> (# s1, (# e | #) #) 458 | (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 459 | ) 460 | 461 | bindFromLiftedToInt5 :: 462 | forall 463 | s 464 | e 465 | (a :: TYPE LiftedRep) 466 | (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])). 467 | Parser s e a -> 468 | (a -> Parser s e b) -> 469 | Parser s e b 470 | {-# INLINE bindFromLiftedToInt5 #-} 471 | bindFromLiftedToInt5 (Parser f) g = 472 | Parser 473 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 474 | (# s1, r0 #) -> case r0 of 475 | (# e | #) -> (# s1, (# e | #) #) 476 | (# | (# y, b, c #) #) -> 477 | runParser (g y) (# arr, b, c #) s1 478 | ) 479 | 480 | sequenceLiftedToInt5 :: 481 | forall 482 | s 483 | e 484 | (a :: TYPE LiftedRep) 485 | (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])). 486 | Parser s e a -> 487 | Parser s e b -> 488 | Parser s e b 489 | {-# INLINE sequenceLiftedToInt5 #-} 490 | sequenceLiftedToInt5 (Parser f) (Parser g) = 491 | Parser 492 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 493 | (# s1, r0 #) -> case r0 of 494 | (# e | #) -> (# s1, (# e | #) #) 495 | (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 496 | ) 497 | 498 | bindFromLiftedToInt :: 499 | forall 500 | s 501 | e 502 | (a :: TYPE LiftedRep) 503 | (b :: TYPE 'IntRep). 504 | Parser s e a -> 505 | (a -> Parser s e b) -> 506 | Parser s e b 507 | {-# INLINE bindFromLiftedToInt #-} 508 | bindFromLiftedToInt (Parser f) g = 509 | Parser 510 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 511 | (# s1, r0 #) -> case r0 of 512 | (# e | #) -> (# s1, (# e | #) #) 513 | (# | (# y, b, c #) #) -> 514 | runParser (g y) (# arr, b, c #) s1 515 | ) 516 | 517 | sequenceLiftedToInt :: 518 | forall 519 | s 520 | e 521 | (a :: TYPE LiftedRep) 522 | (b :: TYPE 'IntRep). 523 | Parser s e a -> 524 | Parser s e b -> 525 | Parser s e b 526 | {-# INLINE sequenceLiftedToInt #-} 527 | sequenceLiftedToInt (Parser f) (Parser g) = 528 | Parser 529 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 530 | (# s1, r0 #) -> case r0 of 531 | (# e | #) -> (# s1, (# e | #) #) 532 | (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 533 | ) 534 | -------------------------------------------------------------------------------- /test/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE CPP #-} 3 | {-# LANGUAGE DataKinds #-} 4 | {-# LANGUAGE MagicHash #-} 5 | {-# LANGUAGE OverloadedStrings #-} 6 | {-# LANGUAGE ScopedTypeVariables #-} 7 | {-# LANGUAGE TypeApplications #-} 8 | {-# OPTIONS_GHC -fno-warn-orphans #-} 9 | 10 | import Control.Monad (replicateM) 11 | import Control.Monad.ST (runST) 12 | import Data.Bytes.Parser (Slice (Slice)) 13 | import Data.Bytes.Types (Bytes (Bytes)) 14 | import Data.Char (ord) 15 | import Data.Coerce (coerce) 16 | import Data.Int (Int16, Int32) 17 | import Data.Primitive (ByteArray (..), PrimArray (..)) 18 | import Data.Text.Short (ShortText) 19 | import Data.WideWord (Word128 (Word128)) 20 | import Data.Word (Word16, Word32, Word64, Word8) 21 | import Numeric.Natural (Natural) 22 | import System.ByteOrder (ByteOrder (BigEndian, LittleEndian), Fixed (..)) 23 | import Test.Tasty (TestTree, defaultMain, testGroup) 24 | import Test.Tasty.HUnit (testCase, (@=?)) 25 | import Test.Tasty.QuickCheck (testProperty, (===)) 26 | 27 | import qualified Data.Bits as Bits 28 | import qualified Data.Bytes as Bytes 29 | import qualified Data.Bytes.Parser as P 30 | import qualified Data.Bytes.Parser.Ascii as Ascii 31 | import qualified Data.Bytes.Parser.Base128 as Base128 32 | import qualified Data.Bytes.Parser.BigEndian as BigEndian 33 | import qualified Data.Bytes.Parser.Latin as Latin 34 | import qualified Data.Bytes.Parser.Leb128 as Leb128 35 | import qualified Data.Bytes.Parser.LittleEndian as LittleEndian 36 | import qualified Data.List as List 37 | import qualified Data.Primitive as PM 38 | import qualified GHC.Exts as Exts 39 | import qualified Test.Tasty.QuickCheck as QC 40 | 41 | main :: IO () 42 | main = defaultMain tests 43 | 44 | tests :: TestTree 45 | tests = 46 | testGroup 47 | "Parser" 48 | [ testProperty "decStandardInt" $ \i -> 49 | withSz (show i) $ \str len -> 50 | P.parseBytes (Latin.decStandardInt ()) str 51 | === P.Success (Slice len 0 i) 52 | , testProperty "big-endian-word16-array" $ \(xs :: [Word16]) -> 53 | let src = Exts.fromList (coerce xs :: [Fixed 'BigEndian Word16]) 54 | res = Exts.fromList xs :: PrimArray Word16 55 | sz = length xs * 2 56 | bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz 57 | in P.Success (Slice (sz + 1) 0 res) 58 | === P.parseBytes (BigEndian.word16Array () (length xs)) bs 59 | , testProperty "big-endian-word32-array" $ \(xs :: [Word32]) -> 60 | let src = Exts.fromList (coerce xs :: [Fixed 'BigEndian Word32]) 61 | res = Exts.fromList xs :: PrimArray Word32 62 | sz = length xs * 4 63 | bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz 64 | in P.Success (Slice (sz + 1) 0 res) 65 | === P.parseBytes (BigEndian.word32Array () (length xs)) bs 66 | , testProperty "little-endian-word32-array" $ \(xs :: [Word32]) -> 67 | let src = Exts.fromList (coerce xs :: [Fixed 'LittleEndian Word32]) 68 | res = Exts.fromList xs :: PrimArray Word32 69 | sz = length xs * 4 70 | bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz 71 | in P.Success (Slice (sz + 1) 0 res) 72 | === P.parseBytes (LittleEndian.word32Array () (length xs)) bs 73 | , testProperty "big-endian-word64-array" $ \(xs :: [Word64]) -> 74 | let src = Exts.fromList (coerce xs :: [Fixed 'BigEndian Word64]) 75 | res = Exts.fromList xs :: PrimArray Word64 76 | sz = length xs * 8 77 | bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz 78 | in P.Success (Slice (sz + 1) 0 res) 79 | === P.parseBytes (BigEndian.word64Array () (length xs)) bs 80 | , testProperty "little-endian-word64-array" $ \(xs :: [Word64]) -> 81 | let src = Exts.fromList (coerce xs :: [Fixed 'LittleEndian Word64]) 82 | res = Exts.fromList xs :: PrimArray Word64 83 | sz = length xs * 8 84 | bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz 85 | in P.Success (Slice (sz + 1) 0 res) 86 | === P.parseBytes (LittleEndian.word64Array () (length xs)) bs 87 | , testProperty "little-endian-word128-array" $ \(xs :: [Word128]) -> 88 | let src = Exts.fromList xs 89 | sz = length xs * 16 90 | bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz 91 | in P.parseBytes (replicateM (length xs) (LittleEndian.word128 ())) bs 92 | === P.parseBytes (fmap Exts.toList (LittleEndian.word128Array () (length xs))) bs 93 | , testProperty "big-endian-word128-array" $ \(xs :: [Word128]) -> 94 | let src = Exts.fromList xs 95 | sz = length xs * 16 96 | bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz 97 | in P.parseBytes (replicateM (length xs) (BigEndian.word128 ())) bs 98 | === P.parseBytes (fmap Exts.toList (BigEndian.word128Array () (length xs))) bs 99 | , testProperty "cstring" $ \(xs :: [Word8]) -> 100 | let ys = Exts.fromList xs 101 | bs = Bytes.singleton 0x31 <> ys 102 | in P.parseBytes (P.cstring () (Exts.Ptr "1"#) *> P.bytes () ys *> pure 42) bs 103 | === (P.Success (Slice (Bytes.length ys + 1) 0 42) :: P.Result () Integer) 104 | , testCase "big-endian-word256" $ 105 | P.parseBytesMaybe 106 | (BigEndian.word256Array () 1) 107 | ( Exts.fromList 108 | [ 0x12 109 | , 0x34 110 | , 0x56 111 | , 0x78 112 | , 0x90 113 | , 0x12 114 | , 0x34 115 | , 0x56 116 | , 0x78 117 | , 0x90 118 | , 0x12 119 | , 0x34 120 | , 0x56 121 | , 0x78 122 | , 0x90 123 | , 0x12 124 | , 0x12 125 | , 0x34 126 | , 0x56 127 | , 0x78 128 | , 0x90 129 | , 0x12 130 | , 0x34 131 | , 0x56 132 | , 0x78 133 | , 0x90 134 | , 0x12 135 | , 0x34 136 | , 0x56 137 | , 0x78 138 | , 0x90 139 | , 0x12 140 | ] 141 | ) 142 | @=? Just (Exts.fromList [0x1234567890123456789012345678901212345678901234567890123456789012]) 143 | , testProperty "big-endian-word64" bigEndianWord64 144 | , testProperty "big-endian-word32" bigEndianWord32 145 | , testProperty "little-endian-word32" littleEndianWord32 146 | , testCase "delimit" $ 147 | P.Success (Slice 13 0 (167, 14625)) 148 | @=? P.parseBytes 149 | ( do 150 | len <- Latin.decUnsignedInt () 151 | Latin.char () ',' 152 | r <- 153 | P.delimit () () len $ 154 | (,) 155 | <$> Latin.decUnsignedInt () 156 | <* Latin.char () '*' 157 | <*> Latin.decUnsignedInt () 158 | Latin.char () '0' 159 | pure r 160 | ) 161 | (bytes "9,167*146250") 162 | , testGroup 163 | "decUnsignedInt" 164 | [ testCase "A" $ 165 | P.Failure () 166 | @=? P.parseBytes 167 | (Latin.decUnsignedInt ()) 168 | (bytes "742493495120739103935542") 169 | , testCase "B" $ 170 | P.Success (Slice 8 3 4654667) 171 | @=? P.parseBytes 172 | (Latin.decUnsignedInt ()) 173 | (bytes "4654667,55") 174 | , testCase "C" $ 175 | P.Failure () 176 | @=? P.parseBytes 177 | (Latin.decUnsignedInt ()) 178 | (bytes ('1' : show (maxBound :: Int))) 179 | , testCase "D" $ 180 | P.Failure () 181 | @=? P.parseBytes 182 | (Latin.decUnsignedInt ()) 183 | (bytes "2481030337885070917891") 184 | , testCase "E" $ 185 | P.Failure () 186 | @=? P.parseBytes 187 | (Latin.decUnsignedInt ()) 188 | (bytes (show (fromIntegral @Int @Word maxBound + 1))) 189 | , testCase "F" $ withSz (show (maxBound :: Int)) $ \str len -> 190 | P.Success (Slice len 0 maxBound) 191 | @=? P.parseBytes (Latin.decUnsignedInt ()) str 192 | , testProperty "property" $ \(QC.NonNegative i) -> 193 | withSz (show i) $ \str len -> 194 | P.parseBytes (Latin.decUnsignedInt ()) str 195 | === P.Success (Slice len 0 i) 196 | ] 197 | , testGroup 198 | "hexNibbleLower" 199 | [ testCase "A" $ 200 | P.parseBytes (Latin.hexNibbleLower ()) (bytes "Ab") @=? P.Failure () 201 | , testCase "B" $ 202 | P.parseBytes (Latin.hexNibbleLower ()) (bytes "bA") @=? P.Success (Slice 2 1 0xb) 203 | , testCase "C" $ 204 | P.parseBytes (Latin.hexNibbleLower ()) (bytes "") @=? P.Failure () 205 | ] 206 | , testGroup 207 | "tryHexNibbleLower" 208 | [ testCase "A" $ 209 | P.Success @() (Slice 1 2 Nothing) 210 | @=? P.parseBytes Latin.tryHexNibbleLower (bytes "Ab") 211 | , testCase "B" $ 212 | P.Success @() (Slice 2 1 (Just 0xb)) 213 | @=? P.parseBytes Latin.tryHexNibbleLower (bytes "bA") 214 | , testCase "C" $ 215 | P.Success @() (Slice 1 0 Nothing) 216 | @=? P.parseBytes Latin.tryHexNibbleLower (bytes "") 217 | ] 218 | , testGroup 219 | "decPositiveInteger" 220 | [ testCase "A" $ 221 | P.parseBytes 222 | (Latin.decUnsignedInteger ()) 223 | (bytes "5469999463123462573426452736423546373235260") 224 | @=? P.Success 225 | (Slice 44 0 5469999463123462573426452736423546373235260) 226 | , testProperty "property" $ \(LargeInteger i) -> 227 | withSz (show i) $ \str len -> 228 | i 229 | >= 0 230 | QC.==> P.parseBytes (Latin.decUnsignedInteger ()) str 231 | === P.Success (Slice len 0 i) 232 | ] 233 | , testGroup 234 | "decTrailingInteger" 235 | [ testProperty "property" $ \(LargeInteger i) -> 236 | withSz (show i) $ \str sz -> 237 | i 238 | >= 0 239 | QC.==> P.parseBytes (Latin.decTrailingInteger 2) str 240 | === (P.Success (Slice sz 0 (read ('2' : show i) :: Integer)) :: P.Result () Integer) 241 | ] 242 | , testGroup 243 | "decSignedInteger" 244 | [ testCase "A" $ 245 | P.parseBytes 246 | (Latin.decSignedInteger ()) 247 | (bytes "-54699994631234625734264527364235463732352601") 248 | @=? P.Success 249 | ( Slice 250 | 46 251 | 0 252 | (-54699994631234625734264527364235463732352601) 253 | ) 254 | , testCase "B" $ 255 | P.Success (Slice 25 0 (3, (-206173954435705292503))) 256 | @=? P.parseBytes 257 | ( pure (,) 258 | <*> Latin.decSignedInteger () 259 | <* Latin.char () 'e' 260 | <*> Latin.decSignedInteger () 261 | ) 262 | (bytes "3e-206173954435705292503") 263 | , testProperty "property" $ \(LargeInteger i) -> 264 | withSz (show i) $ \str len -> 265 | P.parseBytes (Latin.decSignedInteger ()) str 266 | === P.Success (Slice len 0 i) 267 | ] 268 | , testGroup 269 | "decSignedInt" 270 | [ testProperty "A" $ \i -> withSz (show i) $ \str len -> 271 | P.parseBytes (Latin.decSignedInt ()) str 272 | === P.Success (Slice len 0 i) 273 | , testProperty "B" $ \i -> 274 | let s = (if i >= 0 then "+" else "") ++ show i 275 | in withSz s $ \str len -> 276 | P.parseBytes (Latin.decSignedInt ()) str 277 | === P.Success (Slice len 0 i) 278 | , testCase "C" $ 279 | P.Failure () 280 | @=? P.parseBytes 281 | (Latin.decSignedInt ()) 282 | (bytes ('1' : show (maxBound :: Int))) 283 | , testCase "D" $ 284 | P.Failure () 285 | @=? P.parseBytes 286 | (Latin.decSignedInt ()) 287 | (bytes ('-' : '3' : show (maxBound :: Int))) 288 | , testCase "E" $ 289 | P.Failure () 290 | @=? P.parseBytes 291 | (Latin.decSignedInt ()) 292 | (bytes "2481030337885070917891") 293 | , testCase "F" $ 294 | P.Failure () 295 | @=? P.parseBytes 296 | (Latin.decSignedInt ()) 297 | (bytes "-4305030950553840988981") 298 | , testCase "G" $ withSz (show (minBound :: Int)) $ \str len -> 299 | P.Success (Slice len 0 minBound) 300 | @=? P.parseBytes (Latin.decSignedInt ()) str 301 | , testCase "H" $ withSz (show (maxBound :: Int)) $ \str len -> 302 | P.Success (Slice len 0 maxBound) 303 | @=? P.parseBytes (Latin.decSignedInt ()) str 304 | , testCase "I" $ 305 | P.Failure () 306 | @=? P.parseBytes 307 | (Latin.decSignedInt ()) 308 | (bytes (show (fromIntegral @Int @Word maxBound + 1))) 309 | , testCase "J" $ 310 | -- This is one number lower than the minimum bound for 311 | -- a signed 64-bit number, but this test will pass on 312 | -- 32-bit architectures as well. 313 | P.Failure () 314 | @=? P.parseBytes 315 | (Latin.decSignedInt ()) 316 | (bytes "-9223372036854775809") 317 | ] 318 | , testGroup 319 | "decWord64" 320 | [ testCase "A" $ 321 | P.Failure () 322 | @=? P.parseBytes 323 | (Latin.decWord64 ()) 324 | (bytes "2481030337885070917891") 325 | ] 326 | , testCase "decWord-composition" $ 327 | P.Success (Slice 6 0 (42, 8)) 328 | @=? P.parseBytes 329 | ( pure (,) 330 | <*> Ascii.decWord () 331 | <* Ascii.char () '.' 332 | <*> Ascii.decWord () 333 | <* Ascii.char () '.' 334 | ) 335 | (bytes "42.8.") 336 | , testCase "decWord-replicate" $ 337 | P.Success (Slice 7 0 (Exts.fromList [42, 93] :: PrimArray Word)) 338 | @=? P.parseBytes 339 | (P.replicate 2 (Ascii.decWord () <* Ascii.char () '.')) 340 | (bytes "42.93.") 341 | , testCase "ascii-takeShortWhile" $ 342 | P.Success (Slice 11 0 (Exts.fromList ["the", "world"] :: PM.Array ShortText)) 343 | @=? P.parseBytes 344 | (P.replicate 2 (Ascii.takeShortWhile (/= ',') <* Ascii.char () ',')) 345 | (bytes "the,world,") 346 | , testGroup 347 | "hexFixedWord8" 348 | [ testCase "A" $ 349 | P.parseBytes (Latin.hexFixedWord8 ()) (bytes "A") @=? P.Failure () 350 | , testCase "B" $ 351 | P.parseBytes (Latin.hexFixedWord8 ()) (bytes "0A") @=? P.Success (Slice 3 0 0x0A) 352 | , testCase "C" $ 353 | P.parseBytes (Latin.hexFixedWord8 ()) (bytes "") @=? P.Failure () 354 | , testCase "D" $ 355 | P.parseBytes (Latin.hexFixedWord8 ()) (bytes "A!") @=? P.Failure () 356 | ] 357 | , testGroup 358 | "hexFixedWord16" 359 | [ testCase "A" $ 360 | P.parseBytes (Latin.hexFixedWord16 ()) (bytes "A") @=? P.Failure () 361 | , testCase "B" $ 362 | P.parseBytes (Latin.hexFixedWord16 ()) (bytes "0A0A") @=? P.Success (Slice 5 0 0x0A0A) 363 | , testCase "C" $ 364 | P.parseBytes (Latin.hexFixedWord16 ()) (bytes "") @=? P.Failure () 365 | , testCase "D" $ 366 | P.parseBytes (Latin.hexFixedWord16 ()) (bytes "A!A!") @=? P.Failure () 367 | ] 368 | , testGroup 369 | "hexFixedWord32" 370 | [ testCase "A" $ 371 | P.parseBytes (Latin.hexFixedWord32 ()) (bytes "A") @=? P.Failure () 372 | , testCase "B" $ 373 | P.parseBytes (Latin.hexFixedWord32 ()) (bytes "0A0A0A0A") @=? P.Success (Slice 9 0 0x0A0A0A0A) 374 | , testCase "C" $ 375 | P.parseBytes (Latin.hexFixedWord32 ()) (bytes "") @=? P.Failure () 376 | , testCase "D" $ 377 | P.parseBytes (Latin.hexFixedWord32 ()) (bytes "A!A0A0A0") @=? P.Failure () 378 | ] 379 | , testGroup 380 | "hexFixedWord64" 381 | [ testCase "A" $ 382 | P.parseBytes (Latin.hexFixedWord64 ()) (bytes "ABCD01235678BCDE") 383 | @=? P.Success 384 | (Slice 17 0 0xABCD01235678BCDE) 385 | ] 386 | , testGroup 387 | "base128-w32" 388 | [ testCase "A" $ 389 | P.Success (Slice 2 0 0x7E) 390 | @=? P.parseBytes (Base128.word32 ()) (bytes "\x7E") 391 | , testCase "B" $ 392 | P.Success (Slice 5 0 0x200000) 393 | @=? P.parseBytes (Base128.word32 ()) (bytes "\x81\x80\x80\x00") 394 | , testCase "C" $ 395 | P.Success (Slice 4 0 1656614) 396 | @=? P.parseBytes (Base128.word32 ()) (bytes "\xE5\x8E\x26") 397 | -- , testProperty "iso" $ \w -> -- TODO 398 | -- P.parseBytesMaybe (Base.word32 ()) (encodeBase128 (fromIntegral w)) 399 | -- === 400 | -- Just w 401 | ] 402 | , testGroup 403 | "leb128-w32" 404 | [ testCase "A" $ 405 | P.Success (Slice 2 0 0x7E) 406 | @=? P.parseBytes (Leb128.word32 ()) (bytes "\x7E") 407 | , testCase "B" $ 408 | P.Success (Slice 5 0 0x200000) 409 | @=? P.parseBytes (Leb128.word32 ()) (bytes "\x80\x80\x80\x01") 410 | , testCase "C" $ 411 | P.Success (Slice 4 0 624485) 412 | @=? P.parseBytes (Leb128.word32 ()) (bytes "\xE5\x8E\x26") 413 | , testProperty "iso" $ \w -> 414 | P.parseBytesMaybe (Leb128.word32 ()) (encodeLeb128 (fromIntegral w)) 415 | === Just w 416 | ] 417 | , testGroup 418 | "leb128-w16" 419 | [ testCase "A" $ 420 | P.Failure () 421 | @=? P.parseBytes (Leb128.word16 ()) (bytes "\x80\x80\x04") 422 | , testCase "B" $ 423 | P.Success (Slice 4 0 0xFFFF) 424 | @=? P.parseBytes (Leb128.word16 ()) (bytes "\xFF\xFF\x03") 425 | , testProperty "iso" $ \w -> 426 | P.parseBytesMaybe (Leb128.word16 ()) (encodeLeb128 (fromIntegral w)) 427 | === Just w 428 | ] 429 | , testGroup 430 | "leb128-i16" 431 | [ testProperty "iso" $ \(w :: Int16) -> 432 | P.parseBytesMaybe 433 | (Leb128.int16 ()) 434 | (encodeLeb128 (fromIntegral @Word16 @Natural (zigzag16 w))) 435 | === Just w 436 | ] 437 | , testGroup 438 | "leb128-i32" 439 | [ testProperty "iso" $ \(w :: Int32) -> 440 | P.parseBytesMaybe 441 | (Leb128.int32 ()) 442 | (encodeLeb128 (fromIntegral @Word32 @Natural (zigzag32 w))) 443 | === Just w 444 | ] 445 | , testGroup 446 | "satisfy" 447 | [ testCase "A" $ 448 | P.Success (Slice 2 0 0x20) 449 | @=? P.parseBytes (P.satisfy () (== 0x20)) (bytes "\x20") 450 | ] 451 | ] 452 | 453 | bytes :: String -> Bytes 454 | bytes s = let b = pack ('x' : s) in Bytes b 1 (PM.sizeofByteArray b - 1) 455 | 456 | pack :: String -> ByteArray 457 | pack = Exts.fromList . map (fromIntegral @Int @Word8 . ord) 458 | 459 | bigEndianWord64 :: 460 | Word8 -> 461 | Word8 -> 462 | Word8 -> 463 | Word8 -> 464 | Word8 -> 465 | Word8 -> 466 | Word8 -> 467 | Word8 -> 468 | QC.Property 469 | bigEndianWord64 a b c d e f g h = 470 | let arr = runST $ do 471 | m <- PM.newByteArray 11 472 | PM.writeByteArray m 0 (0xFF :: Word8) 473 | PM.writeByteArray m 1 (0xFF :: Word8) 474 | PM.writeByteArray m 2 (a :: Word8) 475 | PM.writeByteArray m 3 (b :: Word8) 476 | PM.writeByteArray m 4 (c :: Word8) 477 | PM.writeByteArray m 5 (d :: Word8) 478 | PM.writeByteArray m 6 (e :: Word8) 479 | PM.writeByteArray m 7 (f :: Word8) 480 | PM.writeByteArray m 8 (g :: Word8) 481 | PM.writeByteArray m 9 (h :: Word8) 482 | PM.writeByteArray m 10 (0xEE :: Word8) 483 | PM.unsafeFreezeByteArray m 484 | expected = 485 | (0 :: Word64) 486 | + fromIntegral a * 256 ^ (7 :: Integer) 487 | + fromIntegral b * 256 ^ (6 :: Integer) 488 | + fromIntegral c * 256 ^ (5 :: Integer) 489 | + fromIntegral d * 256 ^ (4 :: Integer) 490 | + fromIntegral e * 256 ^ (3 :: Integer) 491 | + fromIntegral f * 256 ^ (2 :: Integer) 492 | + fromIntegral g * 256 ^ (1 :: Integer) 493 | + fromIntegral h * 256 ^ (0 :: Integer) 494 | in P.parseBytes (BigEndian.word64 ()) (Bytes arr 2 9) 495 | === P.Success (Slice 10 1 expected) 496 | 497 | bigEndianWord32 :: 498 | Word8 -> 499 | Word8 -> 500 | Word8 -> 501 | Word8 -> 502 | QC.Property 503 | bigEndianWord32 a b c d = 504 | let arr = runST $ do 505 | m <- PM.newByteArray 7 506 | PM.writeByteArray m 0 (0xFF :: Word8) 507 | PM.writeByteArray m 1 (0xFF :: Word8) 508 | PM.writeByteArray m 2 (a :: Word8) 509 | PM.writeByteArray m 3 (b :: Word8) 510 | PM.writeByteArray m 4 (c :: Word8) 511 | PM.writeByteArray m 5 (d :: Word8) 512 | PM.writeByteArray m 6 (0xEE :: Word8) 513 | PM.unsafeFreezeByteArray m 514 | expected = 515 | (0 :: Word32) 516 | + fromIntegral a * 256 ^ (3 :: Integer) 517 | + fromIntegral b * 256 ^ (2 :: Integer) 518 | + fromIntegral c * 256 ^ (1 :: Integer) 519 | + fromIntegral d * 256 ^ (0 :: Integer) 520 | in P.parseBytes (BigEndian.word32 ()) (Bytes arr 2 5) 521 | === P.Success (Slice 6 1 expected) 522 | 523 | littleEndianWord32 :: 524 | Word8 -> 525 | Word8 -> 526 | Word8 -> 527 | Word8 -> 528 | QC.Property 529 | littleEndianWord32 a b c d = 530 | let arr = runST $ do 531 | m <- PM.newByteArray 7 532 | PM.writeByteArray m 0 (0xFF :: Word8) 533 | PM.writeByteArray m 1 (0xFF :: Word8) 534 | PM.writeByteArray m 2 (a :: Word8) 535 | PM.writeByteArray m 3 (b :: Word8) 536 | PM.writeByteArray m 4 (c :: Word8) 537 | PM.writeByteArray m 5 (d :: Word8) 538 | PM.writeByteArray m 6 (0xEE :: Word8) 539 | PM.unsafeFreezeByteArray m 540 | expected = 541 | (0 :: Word32) 542 | + fromIntegral a * 256 ^ (0 :: Integer) 543 | + fromIntegral b * 256 ^ (1 :: Integer) 544 | + fromIntegral c * 256 ^ (2 :: Integer) 545 | + fromIntegral d * 256 ^ (3 :: Integer) 546 | in P.parseBytes (LittleEndian.word32 ()) (Bytes arr 2 5) 547 | === P.Success (Slice 6 1 expected) 548 | 549 | -- The Arbitrary instance for Integer that comes with 550 | -- QuickCheck only generates small numbers. 551 | newtype LargeInteger = LargeInteger Integer 552 | deriving (Eq, Show) 553 | 554 | instance QC.Arbitrary Word128 where 555 | arbitrary = liftA2 Word128 QC.arbitrary QC.arbitrary 556 | 557 | instance QC.Arbitrary LargeInteger where 558 | arbitrary = do 559 | n <- QC.choose (1, 27) 560 | sign <- QC.arbitrary 561 | r <- 562 | (if sign then negate else id) . foldr f 0 563 | <$> replicateM n QC.arbitrary 564 | pure (LargeInteger r) 565 | where 566 | f :: Word8 -> Integer -> Integer 567 | f w acc = (acc `Bits.shiftL` 8) + fromIntegral w 568 | 569 | -- We add an extra 1 since bytes gives us a slice that 570 | -- starts at that offset. 571 | withSz :: String -> (Bytes -> Int -> a) -> a 572 | withSz str f = f (bytes str) (length str + 1) 573 | 574 | untype :: PrimArray a -> ByteArray 575 | untype (PrimArray x) = ByteArray x 576 | 577 | encodeLeb128 :: Natural -> Bytes 578 | encodeLeb128 x = Bytes.unsafeDrop 1 (Exts.fromList (0xFF : go [] x)) 579 | where 580 | go !xs !n = 581 | let (q, r) = quotRem n 128 582 | r' = fromIntegral @Natural @Word8 r 583 | w = 584 | if q == 0 585 | then r' 586 | else Bits.setBit r' 7 587 | xs' = w : xs 588 | in if q == 0 589 | then List.reverse xs' 590 | else go xs' q 591 | 592 | -- x zigzagInteger :: Integer -> Natural 593 | -- x zigzagInteger x 594 | -- x | x>=0 = fromInteger (x `Bits.shiftL` 1) 595 | -- x | otherwise = fromInteger (negate (x `Bits.shiftL` 1) - 1) 596 | 597 | zigzag16 :: Int16 -> Word16 598 | zigzag16 x = fromIntegral ((x `Bits.shiftL` 1) `Bits.xor` (x `Bits.shiftR` 15)) 599 | 600 | zigzag32 :: Int32 -> Word32 601 | zigzag32 x = fromIntegral ((x `Bits.shiftL` 1) `Bits.xor` (x `Bits.shiftR` 31)) 602 | -------------------------------------------------------------------------------- /src/Data/Bytes/Parser.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE BinaryLiterals #-} 3 | {-# LANGUAGE CPP #-} 4 | {-# LANGUAGE DataKinds #-} 5 | {-# LANGUAGE DerivingStrategies #-} 6 | {-# LANGUAGE GADTSyntax #-} 7 | {-# LANGUAGE LambdaCase #-} 8 | {-# LANGUAGE MagicHash #-} 9 | {-# LANGUAGE MultiWayIf #-} 10 | {-# LANGUAGE PolyKinds #-} 11 | {-# LANGUAGE RankNTypes #-} 12 | {-# LANGUAGE ScopedTypeVariables #-} 13 | {-# LANGUAGE TypeApplications #-} 14 | {-# LANGUAGE UnboxedTuples #-} 15 | 16 | {- | Parse non-resumable sequence of bytes. To parse a byte sequence 17 | as text, use the @Ascii@, @Latin@, and @Utf8@ modules instead. 18 | Functions for parsing decimal-encoded numbers are found in those 19 | modules. 20 | -} 21 | module Data.Bytes.Parser 22 | ( -- * Types 23 | Parser 24 | , Result (..) 25 | , Slice (..) 26 | 27 | -- * Run Parsers 28 | 29 | -- ** Result 30 | , parseByteArray 31 | , parseBytes 32 | , parseBytesEffectfully 33 | , parseBytesEither 34 | , parseBytesMaybe 35 | 36 | -- * One Byte 37 | , any 38 | 39 | -- * Many Bytes 40 | , take 41 | , takeN 42 | , takeUpTo 43 | , takeWhile 44 | , takeTrailedBy 45 | 46 | -- * Skip 47 | , skipWhile 48 | , skipTrailedBy 49 | , skipTrailedBy2 50 | , skipTrailedBy2# 51 | , skipTrailedBy3# 52 | 53 | -- * Match 54 | , byteArray 55 | , bytes 56 | , satisfy 57 | , satisfyWith 58 | , cstring 59 | 60 | -- * End of Input 61 | , endOfInput 62 | , isEndOfInput 63 | , remaining 64 | , peekRemaining 65 | 66 | -- * Scanning 67 | , scan 68 | 69 | -- * Lookahead 70 | , peek 71 | , peek' 72 | 73 | -- * Control Flow 74 | , fail 75 | , orElse 76 | , annotate 77 | , () 78 | , mapErrorEffectfully 79 | 80 | -- * Repetition 81 | , replicate 82 | , listUntilEoi 83 | 84 | -- * Subparsing 85 | , delimit 86 | , measure 87 | , measure_ 88 | , measure_# 89 | 90 | -- * Lift Effects 91 | , effect 92 | 93 | -- * Box Result 94 | , boxWord32 95 | , boxIntPair 96 | 97 | -- * Unbox Result 98 | , unboxWord32 99 | , unboxIntPair 100 | 101 | -- * Specialized Bind 102 | 103 | -- | Sometimes, GHC ends up building join points in a way that 104 | -- boxes arguments unnecessarily. In this situation, special variants 105 | -- of monadic @>>=@ can be helpful. If @C#@, @I#@, etc. never 106 | -- get used in your original source code, GHC will not introduce them. 107 | , bindFromCharToLifted 108 | , bindFromCharToByteArrayIntInt 109 | , bindFromWordToByteArrayIntInt 110 | , bindFromLiftedToIntPair 111 | , bindFromLiftedToInt 112 | , bindFromIntToIntPair 113 | , bindFromCharToIntPair 114 | , bindFromLiftedToByteArrayIntInt 115 | , bindFromByteArrayIntIntToLifted 116 | , bindFromMaybeCharToIntPair 117 | , bindFromMaybeCharToLifted 118 | 119 | -- * Specialized Pure 120 | , pureIntPair 121 | , pureByteArrayIntInt 122 | 123 | -- * Specialized Fail 124 | , failIntPair 125 | , failByteArrayIntInt 126 | ) where 127 | 128 | import Prelude hiding (any, fail, length, replicate, take, takeWhile) 129 | 130 | import Data.Bytes.Parser.Internal (Parser (..), Result#, ST#, boxBytes, fail, unboxBytes, uneffectful, uneffectful#, uneffectfulInt#) 131 | import Data.Bytes.Parser.Internal (failByteArrayIntInt) 132 | import Data.Bytes.Parser.Types (Result (Failure, Success), Slice (Slice)) 133 | import Data.Bytes.Parser.Unsafe (cursor, expose, unconsume) 134 | import Data.Bytes.Types (Bytes (..), BytesN (BytesN)) 135 | import Data.Primitive (ByteArray (..)) 136 | import Data.Primitive.Contiguous (Contiguous, Element) 137 | import Foreign.C.String (CString) 138 | import GHC.Exts (Char#, Int (I#), Int#, Word#, runRW#, (+#), (-#), (>=#)) 139 | import GHC.Exts (ByteArray#) 140 | import GHC.ST (ST (..)) 141 | import GHC.Word (Word32 (W32#), Word8) 142 | 143 | import qualified Arithmetic.Nat as Nat 144 | import qualified Arithmetic.Types as Arithmetic 145 | import qualified Data.Bytes as B 146 | import qualified Data.Bytes.Parser.Internal as Internal 147 | import qualified Data.List as List 148 | import qualified Data.Primitive as PM 149 | import qualified Data.Primitive.Contiguous as C 150 | import qualified GHC.Exts as Exts 151 | 152 | {- | Parse a byte sequence. This can succeed even if the 153 | entire slice was not consumed by the parser. 154 | -} 155 | parseBytes :: forall e a. (forall s. Parser e s a) -> Bytes -> Result e a 156 | {-# INLINE parseBytes #-} 157 | parseBytes p !b = runResultST action 158 | where 159 | action :: forall s. ST# s (Result# e a) 160 | action s0 = case p @s of 161 | Parser f -> f (unboxBytes b) s0 162 | 163 | {- | Variant of 'parseBytesEither' that discards the error message on failure. 164 | Just like 'parseBytesEither', this does not impose any checks on the length 165 | of the remaining input. 166 | -} 167 | parseBytesMaybe :: forall e a. (forall s. Parser e s a) -> Bytes -> Maybe a 168 | {-# INLINE parseBytesMaybe #-} 169 | parseBytesMaybe p !b = runMaybeST action 170 | where 171 | action :: forall s. ST# s (Result# e a) 172 | action s0 = case p @s of 173 | Parser f -> f (unboxBytes b) s0 174 | 175 | {- | Variant of 'parseBytes' that discards the new offset and the 176 | remaining length. This does not, however, require the remaining 177 | length to be zero. Use 'endOfInput' to accomplish that. 178 | -} 179 | parseBytesEither :: forall e a. (forall s. Parser e s a) -> Bytes -> Either e a 180 | {-# INLINE parseBytesEither #-} 181 | parseBytesEither p !b = runEitherST action 182 | where 183 | action :: forall s. ST# s (Result# e a) 184 | action s0 = case p @s of 185 | Parser f -> f (unboxBytes b) s0 186 | 187 | -- Similar to runResultST 188 | runMaybeST :: (forall s. ST# s (Result# e x)) -> Maybe x 189 | {-# INLINE runMaybeST #-} 190 | runMaybeST f = case (runRW# (\s0 -> case f s0 of (# _, r #) -> r)) of 191 | (# _ | #) -> Nothing 192 | (# | (# x, _, _ #) #) -> Just x 193 | 194 | -- Similar to runResultST 195 | runEitherST :: (forall s. ST# s (Result# e x)) -> Either e x 196 | {-# INLINE runEitherST #-} 197 | runEitherST f = case (runRW# (\s0 -> case f s0 of (# _, r #) -> r)) of 198 | (# e | #) -> Left e 199 | (# | (# x, _, _ #) #) -> Right x 200 | 201 | -- This is used internally to help reduce boxing when a parser 202 | -- gets run. Due to the late inlining of runRW#, this variant 203 | -- of runST still cause the result value to be boxed. However, 204 | -- it avoids the additional boxing that the Success data 205 | -- constructor would normally cause. 206 | runResultST :: (forall s. ST# s (Result# e x)) -> Result e x 207 | {-# INLINE runResultST #-} 208 | runResultST f = case (runRW# (\s0 -> case f s0 of (# _, r #) -> r)) of 209 | (# e | #) -> Failure e 210 | (# | (# x, off, len #) #) -> Success (Slice (I# off) (I# len) x) 211 | 212 | -- | Variant of 'parseBytes' that accepts an unsliced 'ByteArray'. 213 | parseByteArray :: (forall s. Parser e s a) -> ByteArray -> Result e a 214 | {-# INLINE parseByteArray #-} 215 | parseByteArray p b = 216 | parseBytes p (Bytes b 0 (PM.sizeofByteArray b)) 217 | 218 | {- | Variant of 'parseBytes' that allows the parser to be run 219 | as part of an existing effectful context. 220 | -} 221 | parseBytesEffectfully :: Parser e s a -> Bytes -> ST s (Result e a) 222 | {-# INLINE parseBytesEffectfully #-} 223 | parseBytesEffectfully (Parser f) !b = 224 | ST 225 | ( \s0 -> case f (unboxBytes b) s0 of 226 | (# s1, r #) -> (# s1, boxPublicResult r #) 227 | ) 228 | 229 | -- | Lift an effectful computation into a parser. 230 | effect :: ST s a -> Parser e s a 231 | {-# INLINE effect #-} 232 | effect (ST f) = 233 | Parser 234 | ( \(# _, off, len #) s0 -> case f s0 of 235 | (# s1, a #) -> (# s1, (# | (# a, off, len #) #) #) 236 | ) 237 | 238 | byteArray :: e -> ByteArray -> Parser e s () 239 | {-# INLINE byteArray #-} 240 | byteArray e !expected = bytes e (B.fromByteArray expected) 241 | 242 | -- | Consume input matching the byte sequence. 243 | bytes :: e -> Bytes -> Parser e s () 244 | bytes e !expected = 245 | Parser 246 | ( \actual@(# _, off, len #) s -> 247 | let r = 248 | if B.isPrefixOf expected (boxBytes actual) 249 | then 250 | let !(I# movement) = length expected 251 | in (# | (# (), off +# movement, len -# movement #) #) 252 | else (# e | #) 253 | in (# s, r #) 254 | ) 255 | 256 | {- FOURMOLU_DISABLE -} 257 | -- | Consume input matching the @NUL@-terminated C String. 258 | cstring :: e -> CString -> Parser e s () 259 | cstring e (Exts.Ptr ptr0) = Parser 260 | ( \(# arr, off0, len0 #) s -> 261 | let go !ptr !off !len = case 262 | Exts.word8ToWord# 263 | (Exts.indexWord8OffAddr# ptr 0#) of 264 | 0## -> (# s, (# | (# (), off, len #) #) #) 265 | c -> case len of 266 | 0# -> (# s, (# e | #) #) 267 | _ -> case Exts.eqWord# c ( 268 | Exts.word8ToWord# 269 | (Exts.indexWord8Array# arr off)) of 270 | 1# -> go (Exts.plusAddr# ptr 1# ) (off +# 1# ) (len -# 1# ) 271 | _ -> (# s, (# e | #) #) 272 | in go ptr0 off0 len0 273 | ) 274 | {- FOURMOLU_ENABLE -} 275 | 276 | infix 0 277 | 278 | -- | Infix version of 'annotate'. 279 | () :: Parser x s a -> e -> Parser e s a 280 | () = annotate 281 | 282 | {- | Annotate a parser. If the parser fails, the error will 283 | be returned. 284 | -} 285 | annotate :: Parser x s a -> e -> Parser e s a 286 | annotate p e = p `orElse` fail e 287 | 288 | {- | Consumes and returns the next byte in the input. 289 | Fails if no characters are left. 290 | -} 291 | any :: e -> Parser e s Word8 292 | {-# INLINE any #-} 293 | any e = uneffectful $ \chunk -> 294 | if length chunk > 0 295 | then 296 | let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8 297 | in Internal.Success w (offset chunk + 1) (length chunk - 1) 298 | else Internal.Failure e 299 | 300 | {- | Match any byte, to perform lookahead. Returns 'Nothing' if 301 | end of input has been reached. Does not consume any input. 302 | 303 | /Note/: Because this parser does not fail, do not use it 304 | with combinators such as 'many', because such as 'many', 305 | because such parsers loop until a failure occurs. Careless 306 | use will thus result in an infinite loop. 307 | -} 308 | peek :: Parser e s (Maybe Word8) 309 | {-# INLINE peek #-} 310 | peek = uneffectful $ \chunk -> 311 | let v = 312 | if length chunk > 0 313 | then Just (B.unsafeIndex chunk 0) 314 | else Nothing 315 | in Internal.Success v (offset chunk) (length chunk) 316 | 317 | {- | Match any byte, to perform lookahead. Does not consume any 318 | input, but will fail if end of input has been reached. 319 | -} 320 | peek' :: e -> Parser e s Word8 321 | {-# INLINE peek' #-} 322 | peek' e = uneffectful $ \chunk -> 323 | if length chunk > 0 324 | then Internal.Success (B.unsafeIndex chunk 0) (offset chunk) (length chunk) 325 | else Internal.Failure e 326 | 327 | {- | A stateful scanner. The predicate consumes and transforms a 328 | state argument, and each transformed state is passed to 329 | successive invocations of the predicate on each byte of the input 330 | until one returns 'Nothing' or the input ends. 331 | 332 | This parser does not fail. It will return the initial state 333 | if the predicate returns 'Nothing' on the first byte of input. 334 | 335 | /Note/: Because this parser does not fail, do not use it with 336 | combinators such a 'many', because such parsers loop until a 337 | failure occurs. Careless use will thus result in an infinite loop. 338 | -} 339 | scan :: state -> (state -> Word8 -> Maybe state) -> Parser e s state 340 | {-# INLINE scan #-} 341 | scan s0 t = do 342 | let go s = do 343 | mw <- peek 344 | case mw of 345 | Nothing -> pure s 346 | Just w -> case t s w of 347 | Just s' -> go s' 348 | Nothing -> pure s 349 | go s0 350 | 351 | -- Interpret the next byte as an ASCII-encoded character. 352 | -- Does not check to see if any characters are left. This 353 | -- is not exported. 354 | anyUnsafe :: Parser e s Word8 355 | {-# INLINE anyUnsafe #-} 356 | anyUnsafe = uneffectful $ \chunk -> 357 | let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8 358 | in Internal.Success w (offset chunk + 1) (length chunk - 1) 359 | 360 | {- | Take while the predicate is matched. This is always inlined. This 361 | always succeeds. 362 | -} 363 | takeWhile :: (Word8 -> Bool) -> Parser e s Bytes 364 | {-# INLINE takeWhile #-} 365 | takeWhile f = uneffectful $ \chunk -> case B.takeWhile f chunk of 366 | bs -> Internal.Success bs (offset chunk + length bs) (length chunk - length bs) 367 | 368 | {- | Take bytes until the specified byte is encountered. Consumes 369 | the matched byte as well. Fails if the byte is not present. 370 | Visually, the cursor advancement and resulting @Bytes@ for 371 | @takeTrailedBy 0x19@ look like this: 372 | 373 | > 0x10 0x13 0x08 0x15 0x19 0x23 0x17 | input 374 | > |---->---->---->---->----| | cursor 375 | > {\----*----*----*----\} | result bytes 376 | -} 377 | takeTrailedBy :: e -> Word8 -> Parser e s Bytes 378 | takeTrailedBy e !w = do 379 | !start <- cursor 380 | skipTrailedBy e w 381 | !end <- cursor 382 | !arr <- expose 383 | pure (Bytes arr start (end - (start + 1))) 384 | 385 | {- | Skip all characters until the character from the is encountered 386 | and then consume the matching byte as well. 387 | -} 388 | skipTrailedBy :: e -> Word8 -> Parser e s () 389 | {-# INLINE skipTrailedBy #-} 390 | skipTrailedBy e !w = uneffectful# (\c -> skipUntilConsumeByteLoop e w c) 391 | 392 | skipUntilConsumeByteLoop :: 393 | e -> -- Error message 394 | Word8 -> -- byte to match 395 | Bytes -> -- Chunk 396 | Result# e () 397 | skipUntilConsumeByteLoop e !w !c = 398 | if length c > 0 399 | then 400 | if PM.indexByteArray (array c) (offset c) /= (w :: Word8) 401 | then skipUntilConsumeByteLoop e w (B.unsafeDrop 1 c) 402 | else (# | (# (), unI (offset c + 1), unI (length c - 1) #) #) 403 | else (# e | #) 404 | 405 | {- | Skip all bytes until either of the bytes in encountered. Then, 406 | consume the matched byte. @True@ indicates that the first argument 407 | byte was encountered. @False@ indicates that the second argument 408 | byte was encountered. 409 | -} 410 | skipTrailedBy2 :: 411 | -- | Error message 412 | e -> 413 | -- | First trailer, @False@ indicates that this was encountered 414 | Word8 -> 415 | -- | Second trailer, @True@ indicates that this was encountered 416 | Word8 -> 417 | Parser e s Bool 418 | {-# INLINE skipTrailedBy2 #-} 419 | skipTrailedBy2 e !wa !wb = boxBool (skipTrailedBy2# e wa wb) 420 | 421 | skipTrailedBy2# :: 422 | -- | Error message 423 | e -> 424 | -- | First trailer, 0 indicates that this was encountered 425 | Word8 -> 426 | -- | Second trailer, 1 indicates that this was encountered 427 | Word8 -> 428 | Parser e s Int# 429 | {-# INLINE skipTrailedBy2# #-} 430 | skipTrailedBy2# e !wa !wb = 431 | uneffectfulInt# (\c -> skipUntilConsumeByteEitherLoop e wa wb c) 432 | 433 | skipTrailedBy3# :: 434 | -- | Error message 435 | e -> 436 | -- | First trailer, 0 indicates that this was encountered 437 | Word8 -> 438 | -- | Second trailer, 1 indicates that this was encountered 439 | Word8 -> 440 | -- | Third trailer, 2 indicates that this was encountered 441 | Word8 -> 442 | Parser e s Int# 443 | {-# INLINE skipTrailedBy3# #-} 444 | skipTrailedBy3# e !wa !wb !wc = 445 | uneffectfulInt# (\c -> skipUntilConsumeByte3Loop e wa wb wc c) 446 | 447 | skipUntilConsumeByteEitherLoop :: 448 | e -> -- Error message 449 | Word8 -> -- first trailer 450 | Word8 -> -- second trailer 451 | Bytes -> -- Chunk 452 | Result# e Int# 453 | skipUntilConsumeByteEitherLoop e !wa !wb !c = 454 | if length c > 0 455 | then 456 | let byte = PM.indexByteArray (array c) (offset c) 457 | in if 458 | | byte == wa -> (# | (# 0#, unI (offset c + 1), unI (length c - 1) #) #) 459 | | byte == wb -> (# | (# 1#, unI (offset c + 1), unI (length c - 1) #) #) 460 | | otherwise -> skipUntilConsumeByteEitherLoop e wa wb (B.unsafeDrop 1 c) 461 | else (# e | #) 462 | 463 | skipUntilConsumeByte3Loop :: 464 | e -> -- Error message 465 | Word8 -> -- first trailer 466 | Word8 -> -- second trailer 467 | Word8 -> -- third trailer 468 | Bytes -> -- Chunk 469 | Result# e Int# 470 | skipUntilConsumeByte3Loop e !wa !wb !wc !c = 471 | if length c > 0 472 | then 473 | let byte = PM.indexByteArray (array c) (offset c) 474 | in if 475 | | byte == wa -> (# | (# 0#, unI (offset c + 1), unI (length c - 1) #) #) 476 | | byte == wb -> (# | (# 1#, unI (offset c + 1), unI (length c - 1) #) #) 477 | | byte == wc -> (# | (# 2#, unI (offset c + 1), unI (length c - 1) #) #) 478 | | otherwise -> skipUntilConsumeByte3Loop e wa wb wc (B.unsafeDrop 1 c) 479 | else (# e | #) 480 | 481 | {- | Take the given number of bytes. Fails if there is not enough 482 | remaining input. 483 | -} 484 | take :: e -> Int -> Parser e s Bytes 485 | {-# INLINE take #-} 486 | take e n = uneffectful $ \chunk -> 487 | if n <= B.length chunk 488 | then case B.unsafeTake n chunk of 489 | bs -> Internal.Success bs (offset chunk + n) (length chunk - n) 490 | else Internal.Failure e 491 | 492 | -- | Variant of 'take' that tracks the length of the result in the result type. 493 | takeN :: e -> Arithmetic.Nat n -> Parser e s (BytesN n) 494 | takeN e n0 = uneffectful $ \chunk -> 495 | if n <= B.length chunk 496 | then case B.unsafeTake n chunk of 497 | Bytes theChunk theOff _ -> Internal.Success (BytesN theChunk theOff) (offset chunk + n) (length chunk - n) 498 | else Internal.Failure e 499 | where 500 | !n = Nat.demote n0 501 | 502 | {- | Take at most the given number of bytes. This is greedy. It will 503 | consume as many bytes as there are available until it has consumed 504 | @n@ bytes. This never fails. 505 | -} 506 | takeUpTo :: Int -> Parser e s Bytes 507 | {-# INLINE takeUpTo #-} 508 | takeUpTo n = uneffectful $ \chunk -> 509 | let m = min n (B.length chunk) 510 | in case B.unsafeTake m chunk of 511 | bs -> Internal.Success bs (offset chunk + m) (length chunk - m) 512 | 513 | -- | Consume all remaining bytes in the input. 514 | remaining :: Parser e s Bytes 515 | {-# INLINE remaining #-} 516 | remaining = uneffectful $ \chunk -> 517 | Internal.Success chunk (offset chunk + length chunk) 0 518 | 519 | -- | Return all remaining bytes in the input without consuming them. 520 | peekRemaining :: Parser e s Bytes 521 | {-# INLINE peekRemaining #-} 522 | peekRemaining = uneffectful $ \b@(Bytes _ off len) -> 523 | Internal.Success b off len 524 | 525 | -- | Skip while the predicate is matched. This is always inlined. 526 | skipWhile :: (Word8 -> Bool) -> Parser e s () 527 | {-# INLINE skipWhile #-} 528 | skipWhile f = goSkipWhile 529 | where 530 | goSkipWhile = 531 | isEndOfInput >>= \case 532 | True -> pure () 533 | False -> do 534 | w <- anyUnsafe 535 | if f w 536 | then goSkipWhile 537 | else unconsume 1 538 | 539 | {- | The parser @satisfy p@ succeeds for any byte for which the 540 | predicate @p@ returns 'True'. Returns the byte that is 541 | actually parsed. 542 | -} 543 | satisfy :: e -> (Word8 -> Bool) -> Parser e s Word8 544 | satisfy e p = satisfyWith e id p 545 | {-# INLINE satisfy #-} 546 | 547 | {- | The parser @satisfyWith f p@ transforms a byte, and succeeds 548 | if the predicate @p@ returns 'True' on the transformed value. 549 | The parser returns the transformed byte that was parsed. 550 | -} 551 | satisfyWith :: e -> (Word8 -> a) -> (a -> Bool) -> Parser e s a 552 | {-# INLINE satisfyWith #-} 553 | satisfyWith e f p = uneffectful $ \chunk -> 554 | if length chunk > 0 555 | then case B.unsafeIndex chunk 0 of 556 | w -> 557 | let v = f w 558 | in if p v 559 | then Internal.Success v (offset chunk + 1) (length chunk - 1) 560 | else Internal.Failure e 561 | else Internal.Failure e 562 | 563 | -- | Fails if there is still more input remaining. 564 | endOfInput :: e -> Parser e s () 565 | {-# INLINE endOfInput #-} 566 | endOfInput e = uneffectful $ \chunk -> 567 | if length chunk == 0 568 | then Internal.Success () (offset chunk) 0 569 | else Internal.Failure e 570 | 571 | {- | Returns true if there are no more bytes in the input. Returns 572 | false otherwise. Always succeeds. 573 | -} 574 | isEndOfInput :: Parser e s Bool 575 | {-# INLINE isEndOfInput #-} 576 | isEndOfInput = uneffectful $ \chunk -> 577 | Internal.Success (length chunk == 0) (offset chunk) (length chunk) 578 | 579 | boxPublicResult :: Result# e a -> Result e a 580 | {-# INLINE boxPublicResult #-} 581 | boxPublicResult (# | (# a, b, c #) #) = Success (Slice (I# b) (I# c) a) 582 | boxPublicResult (# e | #) = Failure e 583 | 584 | {- FOURMOLU_DISABLE -} 585 | -- | Convert a 'Word32' parser to a 'Word#' parser. 586 | unboxWord32 :: Parser e s Word32 -> Parser e s Word# 587 | {-# inline unboxWord32 #-} 588 | unboxWord32 (Parser f) = Parser 589 | (\x s0 -> case f x s0 of 590 | (# s1, r #) -> case r of 591 | (# e | #) -> (# s1, (# e | #) #) 592 | (# | (# W32# a, b, c #) #) -> (# s1, (# | (# 593 | Exts.word32ToWord# 594 | a, b, c #) #) #) 595 | ) 596 | {- FOURMOLU_ENABLE -} 597 | 598 | -- | Convert a @(Int,Int)@ parser to a @(# Int#, Int# #)@ parser. 599 | unboxIntPair :: Parser e s (Int, Int) -> Parser e s (# Int#, Int# #) 600 | {-# INLINE unboxIntPair #-} 601 | unboxIntPair (Parser f) = 602 | Parser 603 | ( \x s0 -> case f x s0 of 604 | (# s1, r #) -> case r of 605 | (# e | #) -> (# s1, (# e | #) #) 606 | (# | (# (I# y, I# z), b, c #) #) -> (# s1, (# | (# (# y, z #), b, c #) #) #) 607 | ) 608 | 609 | {- FOURMOLU_DISABLE -} 610 | -- | Convert a 'Word#' parser to a 'Word32' parser. Precondition: 611 | -- the argument parser only returns words less than 4294967296. 612 | boxWord32 :: Parser e s Word# -> Parser e s Word32 613 | {-# inline boxWord32 #-} 614 | boxWord32 (Parser f) = Parser 615 | (\x s0 -> case f x s0 of 616 | (# s1, r #) -> case r of 617 | (# e | #) -> (# s1, (# e | #) #) 618 | (# | (# a, b, c #) #) -> (# s1, (# | (# W32# ( 619 | Exts.wordToWord32# 620 | a), b, c #) #) #) 621 | ) 622 | {- FOURMOLU_ENABLE -} 623 | 624 | -- | Convert a @(# Int#, Int# #)@ parser to a @(Int,Int)@ parser. 625 | boxInt :: Parser e s Int# -> Parser e s Int 626 | {-# INLINE boxInt #-} 627 | boxInt (Parser f) = 628 | Parser 629 | ( \x s0 -> case f x s0 of 630 | (# s1, r #) -> case r of 631 | (# e | #) -> (# s1, (# e | #) #) 632 | (# | (# y, b, c #) #) -> (# s1, (# | (# I# y, b, c #) #) #) 633 | ) 634 | 635 | -- | Convert a @(# Int#, Int# #)@ parser to a @(Int,Int)@ parser. 636 | boxBool :: Parser e s Int# -> Parser e s Bool 637 | {-# INLINE boxBool #-} 638 | boxBool (Parser f) = 639 | Parser 640 | ( \x s0 -> case f x s0 of 641 | (# s1, r #) -> case r of 642 | (# e | #) -> (# s1, (# e | #) #) 643 | (# | (# y, b, c #) #) -> (# s1, (# | (# case y of 1# -> True; _ -> False, b, c #) #) #) 644 | ) 645 | 646 | -- | Convert a @(# Int#, Int# #)@ parser to a @(Int,Int)@ parser. 647 | boxIntPair :: Parser e s (# Int#, Int# #) -> Parser e s (Int, Int) 648 | {-# INLINE boxIntPair #-} 649 | boxIntPair (Parser f) = 650 | Parser 651 | ( \x s0 -> case f x s0 of 652 | (# s1, r #) -> case r of 653 | (# e | #) -> (# s1, (# e | #) #) 654 | (# | (# (# y, z #), b, c #) #) -> (# s1, (# | (# (I# y, I# z), b, c #) #) #) 655 | ) 656 | 657 | {- | There is a law-abiding instance of 'Alternative' for 'Parser'. 658 | However, it is not terribly useful since error messages seldom 659 | have a 'Monoid' instance. This function is a variant of @\<|\>@ 660 | that is right-biased in its treatment of error messages. 661 | Consequently, @orElse@ lacks an identity. 662 | See 663 | for more discussion of this topic. 664 | -} 665 | infixl 3 `orElse` 666 | 667 | orElse :: Parser x s a -> Parser e s a -> Parser e s a 668 | {-# INLINE orElse #-} 669 | orElse (Parser f) (Parser g) = 670 | Parser 671 | ( \x s0 -> case f x s0 of 672 | (# s1, r0 #) -> case r0 of 673 | (# _ | #) -> g x s1 674 | (# | r #) -> (# s1, (# | r #) #) 675 | ) 676 | 677 | -- | Effectfully adjusts the error message if an error occurs. 678 | mapErrorEffectfully :: (e1 -> ST s e2) -> Parser e1 s a -> Parser e2 s a 679 | {-# INLINE mapErrorEffectfully #-} 680 | mapErrorEffectfully f (Parser g) = 681 | Parser 682 | ( \x s0 -> case g x s0 of 683 | (# s1, r0 #) -> case r0 of 684 | (# e | #) -> case f e of 685 | ST h -> case h s1 of 686 | (# s2, e' #) -> (# s2, (# e' | #) #) 687 | (# | r #) -> (# s1, (# | r #) #) 688 | ) 689 | 690 | bindFromCharToLifted :: Parser s e Char# -> (Char# -> Parser s e a) -> Parser s e a 691 | {-# INLINE bindFromCharToLifted #-} 692 | bindFromCharToLifted (Parser f) g = 693 | Parser 694 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 695 | (# s1, r0 #) -> case r0 of 696 | (# e | #) -> (# s1, (# e | #) #) 697 | (# | (# y, b, c #) #) -> 698 | runParser (g y) (# arr, b, c #) s1 699 | ) 700 | 701 | bindFromWordToByteArrayIntInt :: Parser s e Word# -> (Word# -> Parser s e (# ByteArray#, Int#, Int# #)) -> Parser s e (# ByteArray#, Int#, Int# #) 702 | {-# INLINE bindFromWordToByteArrayIntInt #-} 703 | bindFromWordToByteArrayIntInt (Parser f) g = 704 | Parser 705 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 706 | (# s1, r0 #) -> case r0 of 707 | (# e | #) -> (# s1, (# e | #) #) 708 | (# | (# y, b, c #) #) -> 709 | runParser (g y) (# arr, b, c #) s1 710 | ) 711 | 712 | bindFromCharToByteArrayIntInt :: Parser s e Char# -> (Char# -> Parser s e (# ByteArray#, Int#, Int# #)) -> Parser s e (# ByteArray#, Int#, Int# #) 713 | {-# INLINE bindFromCharToByteArrayIntInt #-} 714 | bindFromCharToByteArrayIntInt (Parser f) g = 715 | Parser 716 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 717 | (# s1, r0 #) -> case r0 of 718 | (# e | #) -> (# s1, (# e | #) #) 719 | (# | (# y, b, c #) #) -> 720 | runParser (g y) (# arr, b, c #) s1 721 | ) 722 | 723 | bindFromCharToIntPair :: Parser s e Char# -> (Char# -> Parser s e (# Int#, Int# #)) -> Parser s e (# Int#, Int# #) 724 | {-# INLINE bindFromCharToIntPair #-} 725 | bindFromCharToIntPair (Parser f) g = 726 | Parser 727 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 728 | (# s1, r0 #) -> case r0 of 729 | (# e | #) -> (# s1, (# e | #) #) 730 | (# | (# y, b, c #) #) -> 731 | runParser (g y) (# arr, b, c #) s1 732 | ) 733 | 734 | bindFromLiftedToInt :: Parser s e a -> (a -> Parser s e Int#) -> Parser s e Int# 735 | {-# INLINE bindFromLiftedToInt #-} 736 | bindFromLiftedToInt (Parser f) g = 737 | Parser 738 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 739 | (# s1, r0 #) -> case r0 of 740 | (# e | #) -> (# s1, (# e | #) #) 741 | (# | (# y, b, c #) #) -> 742 | runParser (g y) (# arr, b, c #) s1 743 | ) 744 | 745 | bindFromByteArrayIntIntToLifted :: 746 | Parser s e (# ByteArray#, Int#, Int# #) 747 | -> ((# ByteArray#, Int#, Int# #) -> Parser s e a) 748 | -> Parser s e a 749 | {-# INLINE bindFromByteArrayIntIntToLifted #-} 750 | bindFromByteArrayIntIntToLifted (Parser f) g = 751 | Parser 752 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 753 | (# s1, r0 #) -> case r0 of 754 | (# e | #) -> (# s1, (# e | #) #) 755 | (# | (# y, b, c #) #) -> 756 | runParser (g y) (# arr, b, c #) s1 757 | ) 758 | 759 | bindFromLiftedToByteArrayIntInt :: 760 | Parser s e a 761 | -> (a -> Parser s e (# ByteArray#, Int#, Int# #)) 762 | -> Parser s e (# ByteArray#, Int#, Int# #) 763 | {-# INLINE bindFromLiftedToByteArrayIntInt #-} 764 | bindFromLiftedToByteArrayIntInt (Parser f) g = 765 | Parser 766 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 767 | (# s1, r0 #) -> case r0 of 768 | (# e | #) -> (# s1, (# e | #) #) 769 | (# | (# y, b, c #) #) -> 770 | runParser (g y) (# arr, b, c #) s1 771 | ) 772 | 773 | bindFromLiftedToIntPair :: Parser s e a -> (a -> Parser s e (# Int#, Int# #)) -> Parser s e (# Int#, Int# #) 774 | {-# INLINE bindFromLiftedToIntPair #-} 775 | bindFromLiftedToIntPair (Parser f) g = 776 | Parser 777 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 778 | (# s1, r0 #) -> case r0 of 779 | (# e | #) -> (# s1, (# e | #) #) 780 | (# | (# y, b, c #) #) -> 781 | runParser (g y) (# arr, b, c #) s1 782 | ) 783 | 784 | bindFromIntToIntPair :: Parser s e Int# -> (Int# -> Parser s e (# Int#, Int# #)) -> Parser s e (# Int#, Int# #) 785 | {-# INLINE bindFromIntToIntPair #-} 786 | bindFromIntToIntPair (Parser f) g = 787 | Parser 788 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 789 | (# s1, r0 #) -> case r0 of 790 | (# e | #) -> (# s1, (# e | #) #) 791 | (# | (# y, b, c #) #) -> 792 | runParser (g y) (# arr, b, c #) s1 793 | ) 794 | 795 | bindFromMaybeCharToIntPair :: 796 | Parser s e (# (# #) | Char# #) -> 797 | ((# (# #) | Char# #) -> Parser s e (# Int#, Int# #)) -> 798 | Parser s e (# Int#, Int# #) 799 | {-# INLINE bindFromMaybeCharToIntPair #-} 800 | bindFromMaybeCharToIntPair (Parser f) g = 801 | Parser 802 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 803 | (# s1, r0 #) -> case r0 of 804 | (# e | #) -> (# s1, (# e | #) #) 805 | (# | (# y, b, c #) #) -> 806 | runParser (g y) (# arr, b, c #) s1 807 | ) 808 | 809 | bindFromMaybeCharToLifted :: 810 | Parser s e (# (# #) | Char# #) -> 811 | ((# (# #) | Char# #) -> Parser s e a) -> 812 | Parser s e a 813 | {-# INLINE bindFromMaybeCharToLifted #-} 814 | bindFromMaybeCharToLifted (Parser f) g = 815 | Parser 816 | ( \x@(# arr, _, _ #) s0 -> case f x s0 of 817 | (# s1, r0 #) -> case r0 of 818 | (# e | #) -> (# s1, (# e | #) #) 819 | (# | (# y, b, c #) #) -> 820 | runParser (g y) (# arr, b, c #) s1 821 | ) 822 | 823 | pureIntPair :: 824 | (# Int#, Int# #) -> 825 | Parser s e (# Int#, Int# #) 826 | {-# INLINE pureIntPair #-} 827 | pureIntPair a = 828 | Parser 829 | (\(# _, b, c #) s -> (# s, (# | (# a, b, c #) #) #)) 830 | 831 | pureByteArrayIntInt :: 832 | (# ByteArray#, Int#, Int# #) -> 833 | Parser s e (# ByteArray#, Int#, Int# #) 834 | {-# INLINE pureByteArrayIntInt #-} 835 | pureByteArrayIntInt a = 836 | Parser 837 | (\(# _, b, c #) s -> (# s, (# | (# a, b, c #) #) #)) 838 | 839 | failIntPair :: e -> Parser e s (# Int#, Int# #) 840 | {-# INLINE failIntPair #-} 841 | failIntPair e = 842 | Parser 843 | (\(# _, _, _ #) s -> (# s, (# e | #) #)) 844 | 845 | {- | Augment a parser with the number of bytes that were consume while 846 | it executed. 847 | -} 848 | measure :: Parser e s a -> Parser e s (Int, a) 849 | {-# INLINE measure #-} 850 | measure (Parser f) = 851 | Parser 852 | ( \x@(# _, pre, _ #) s0 -> case f x s0 of 853 | (# s1, r #) -> case r of 854 | (# e | #) -> (# s1, (# e | #) #) 855 | (# | (# y, post, c #) #) -> (# s1, (# | (# (I# (post -# pre), y), post, c #) #) #) 856 | ) 857 | 858 | {- | Run a parser and discard the result, returning instead the number 859 | of bytes that the parser consumed. 860 | -} 861 | measure_ :: Parser e s a -> Parser e s Int 862 | {-# INLINE measure_ #-} 863 | measure_ p = boxInt (measure_# p) 864 | 865 | -- | Variant of 'measure_' with an unboxed result. 866 | measure_# :: Parser e s a -> Parser e s Int# 867 | {-# INLINE measure_# #-} 868 | measure_# (Parser f) = 869 | Parser 870 | ( \x@(# _, pre, _ #) s0 -> case f x s0 of 871 | (# s1, r #) -> case r of 872 | (# e | #) -> (# s1, (# e | #) #) 873 | (# | (# _, post, c #) #) -> (# s1, (# | (# post -# pre, post, c #) #) #) 874 | ) 875 | 876 | {- | Run a parser in a delimited context, failing if the requested number 877 | of bytes are not available or if the delimited parser does not 878 | consume all input. This combinator can be understood as a composition 879 | of 'take', 'effect', 'parseBytesEffectfully', and 'endOfInput'. It is 880 | provided as a single combinator because for convenience and because it is 881 | easy to make mistakes when manually assembling the aforementioned parsers. 882 | The pattern of prefixing an encoding with its length is common. 883 | This is discussed more in 884 | . 885 | 886 | > delimit e1 e2 n remaining === take e1 n 887 | -} 888 | delimit :: 889 | -- | Error message when not enough bytes are present 890 | e -> 891 | -- | Error message when delimited parser does not consume all input 892 | e -> 893 | -- | Exact number of bytes delimited parser is expected to consume 894 | Int -> 895 | -- | Parser to execute in delimited context 896 | Parser e s a -> 897 | Parser e s a 898 | {-# INLINE delimit #-} 899 | delimit esz eleftovers (I# n) (Parser f) = 900 | Parser 901 | ( \(# arr, off, len #) s0 -> case len >=# n of 902 | 1# -> case f (# arr, off, n #) s0 of 903 | (# s1, r #) -> case r of 904 | (# e | #) -> (# s1, (# e | #) #) 905 | (# | (# a, newOff, leftovers #) #) -> case leftovers of 906 | 0# -> (# s1, (# | (# a, newOff, len -# n #) #) #) 907 | _ -> (# s1, (# eleftovers | #) #) 908 | _ -> (# s0, (# esz | #) #) 909 | ) 910 | 911 | {- | Apply the parser repeatedly until there is no more input left 912 | to consume. Collects the results into a list. 913 | -} 914 | listUntilEoi :: 915 | Parser e s a -- ^ Parser to repeatedly apply until input is exhausted 916 | -> Parser e s [a] 917 | listUntilEoi p = go [] 918 | where 919 | go !acc = isEndOfInput >>= \case 920 | True -> pure $! List.reverse acc 921 | False -> do 922 | a <- p 923 | go (a : acc) 924 | 925 | {- | Replicate a parser @n@ times, writing the results into 926 | an array of length @n@. For @Array@ and @SmallArray@, this 927 | is lazy in the elements, so be sure the they result of the 928 | parser is evaluated appropriately to avoid unwanted thunks. 929 | -} 930 | replicate :: 931 | forall arr e s a. 932 | (Contiguous arr, Element arr a) => 933 | -- | Number of times to run the parser 934 | Int -> 935 | -- | Parser 936 | Parser e s a -> 937 | Parser e s (arr a) 938 | {-# INLINE replicate #-} 939 | replicate !len p = do 940 | marr <- effect (C.new len) 941 | let go :: Int -> Parser e s (arr a) 942 | go !ix = 943 | if ix < len 944 | then do 945 | a <- p 946 | effect (C.write marr ix a) 947 | go (ix + 1) 948 | else effect (C.unsafeFreeze marr) 949 | go 0 950 | 951 | unI :: Int -> Int# 952 | {-# INLINE unI #-} 953 | unI (I# w) = w 954 | -------------------------------------------------------------------------------- /src/Data/Bytes/Parser/Latin.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE BinaryLiterals #-} 3 | {-# LANGUAGE CPP #-} 4 | {-# LANGUAGE DataKinds #-} 5 | {-# LANGUAGE DeriveFunctor #-} 6 | {-# LANGUAGE DerivingStrategies #-} 7 | {-# LANGUAGE GADTSyntax #-} 8 | {-# LANGUAGE KindSignatures #-} 9 | {-# LANGUAGE LambdaCase #-} 10 | {-# LANGUAGE MagicHash #-} 11 | {-# LANGUAGE MultiWayIf #-} 12 | {-# LANGUAGE PolyKinds #-} 13 | {-# LANGUAGE RankNTypes #-} 14 | {-# LANGUAGE ScopedTypeVariables #-} 15 | {-# LANGUAGE StandaloneDeriving #-} 16 | {-# LANGUAGE TypeApplications #-} 17 | {-# LANGUAGE UnboxedSums #-} 18 | {-# LANGUAGE UnboxedTuples #-} 19 | 20 | {- | Parse input as though it were text encoded by 21 | ISO 8859-1 (Latin-1). All byte sequences are valid 22 | text under ISO 8859-1. 23 | -} 24 | module Data.Bytes.Parser.Latin 25 | ( -- * Matching 26 | 27 | -- ** Required 28 | char 29 | , char2 30 | , char3 31 | , char4 32 | , char5 33 | , char6 34 | , char7 35 | , char8 36 | , char9 37 | , char10 38 | , char11 39 | , char12 40 | 41 | -- ** Try 42 | , trySatisfy 43 | , trySatisfyThen 44 | 45 | -- * One Character 46 | , any 47 | , opt 48 | , opt# 49 | 50 | -- * Many Characters 51 | , takeTrailedBy 52 | 53 | -- * Lookahead 54 | , peek 55 | , peek' 56 | 57 | -- * Skip 58 | , skipDigits 59 | , skipDigits1 60 | , skipChar 61 | , skipChar1 62 | , skipTrailedBy 63 | , skipUntil 64 | , skipWhile 65 | 66 | -- * End of Input 67 | , endOfInput 68 | , isEndOfInput 69 | 70 | -- * Numbers 71 | 72 | -- ** Decimal 73 | 74 | -- *** Unsigned 75 | , decWord 76 | , decWord8 77 | , decWord16 78 | , decWord32 79 | , decWord64 80 | 81 | -- *** Signed 82 | , decUnsignedInt 83 | , decUnsignedInt# 84 | , decSignedInt 85 | , decStandardInt 86 | , decTrailingInt 87 | , decTrailingInt# 88 | , decSignedInteger 89 | , decUnsignedInteger 90 | , decTrailingInteger 91 | 92 | -- ** Hexadecimal 93 | 94 | -- *** Variable Length 95 | , hexWord8 96 | , hexWord16 97 | , hexWord32 98 | 99 | -- *** Fixed Length 100 | , hexFixedWord8 101 | , hexFixedWord16 102 | , hexFixedWord16# 103 | , hexFixedWord32 104 | , hexFixedWord64 105 | , hexFixedWord128 106 | , hexFixedWord256 107 | 108 | -- *** Digit 109 | , hexNibbleLower 110 | , tryHexNibbleLower 111 | , hexNibble 112 | , tryHexNibble 113 | ) where 114 | 115 | import Prelude hiding (any, fail, length, takeWhile) 116 | 117 | import Data.Bits ((.|.)) 118 | import Data.Bytes.Parser (bindFromLiftedToInt, endOfInput, isEndOfInput) 119 | import Data.Bytes.Parser.Internal (InternalStep (..), Parser (..), Result (..), Result#, ST#, boxBytes, indexLatinCharArray, uneffectful, uneffectful#, unfailing, upcastUnitSuccess) 120 | import Data.Bytes.Parser.Unsafe (cursor, expose, unconsume) 121 | import Data.Bytes.Types (Bytes (..)) 122 | import Data.Char (ord) 123 | import Data.Kind (Type) 124 | import Data.WideWord (Word128 (Word128), Word256 (Word256)) 125 | import Data.Word (Word8) 126 | import GHC.Exts (Char (C#), Char#, Int (I#), Int#, RuntimeRep, TYPE, Word#, gtWord#, indexCharArray#, int2Word#, ltWord#, notI#, or#, (+#), (-#)) 127 | import GHC.Word (Word (W#), Word16 (W16#), Word32 (W32#), Word64 (W64#), Word8 (W8#)) 128 | 129 | import qualified Data.Bytes as Bytes 130 | import qualified Data.Primitive as PM 131 | import qualified GHC.Exts as Exts 132 | 133 | {- | Runs the predicate on the next character in the input. If the 134 | predicate is matched, this consumes the character. Otherwise, 135 | the character is not consumed. This returns @False@ if the end 136 | of the input has been reached. This never fails. 137 | -} 138 | trySatisfy :: (Char -> Bool) -> Parser e s Bool 139 | trySatisfy f = uneffectful $ \chunk -> case length chunk of 140 | 0 -> Success False (offset chunk) (length chunk) 141 | _ -> case f (indexLatinCharArray (array chunk) (offset chunk)) of 142 | True -> Success True (offset chunk + 1) (length chunk - 1) 143 | False -> Success False (offset chunk) (length chunk) 144 | 145 | {- | Runs the function on the next character in the input. If the 146 | function returns @Just@, this consumes the character and then 147 | runs the parser on the remaining input. If the function returns 148 | @Nothing@, this does not consume the tested character, and it 149 | runs the default parser on the input (which includes the tested 150 | character). If there is no input remaining, this also runs the 151 | default parser. This combinator never fails. 152 | -} 153 | trySatisfyThen :: 154 | forall (r :: RuntimeRep) (e :: Type) (s :: Type) (a :: TYPE r). 155 | -- | Default parser. Runs on @Nothing@ or end of input. 156 | Parser e s a -> 157 | -- | Parser-selecting predicate 158 | (Char -> Maybe (Parser e s a)) -> 159 | Parser e s a 160 | {-# INLINE trySatisfyThen #-} 161 | trySatisfyThen (Parser g) f = 162 | Parser 163 | ( \input@(# arr, off0, len0 #) s0 -> case len0 of 164 | 0# -> g input s0 165 | _ -> case f (C# (indexCharArray# arr off0)) of 166 | Nothing -> g input s0 167 | Just (Parser p) -> p (# arr, off0 +# 1#, len0 -# 1# #) s0 168 | ) 169 | 170 | {- | Consume the next character, failing if it does not 171 | match the expected value or if there is no more input. 172 | -} 173 | char :: e -> Char -> Parser e s () 174 | {-# INLINE char #-} 175 | char e !c = uneffectful $ \chunk -> 176 | if length chunk > 0 177 | then 178 | if indexLatinCharArray (array chunk) (offset chunk) == c 179 | then Success () (offset chunk + 1) (length chunk - 1) 180 | else Failure e 181 | else Failure e 182 | 183 | {- | Consume the next two characters, failing if they do 184 | not match the expected values. 185 | 186 | > char2 e a b === char e a *> char e b 187 | -} 188 | char2 :: e -> Char -> Char -> Parser e s () 189 | {-# INLINE char2 #-} 190 | char2 e !c0 !c1 = uneffectful $ \chunk -> 191 | if 192 | | length chunk > 1 193 | , indexLatinCharArray (array chunk) (offset chunk) == c0 194 | , indexLatinCharArray (array chunk) (offset chunk + 1) == c1 -> 195 | Success () (offset chunk + 2) (length chunk - 2) 196 | | otherwise -> Failure e 197 | 198 | {- | Consume three characters, failing if they do 199 | not match the expected values. 200 | 201 | > char3 e a b c === char e a *> char e b *> char e c 202 | -} 203 | char3 :: e -> Char -> Char -> Char -> Parser e s () 204 | {-# INLINE char3 #-} 205 | char3 e !c0 !c1 !c2 = uneffectful $ \chunk -> 206 | if 207 | | length chunk > 2 208 | , indexLatinCharArray (array chunk) (offset chunk) == c0 209 | , indexLatinCharArray (array chunk) (offset chunk + 1) == c1 210 | , indexLatinCharArray (array chunk) (offset chunk + 2) == c2 -> 211 | Success () (offset chunk + 3) (length chunk - 3) 212 | | otherwise -> Failure e 213 | 214 | {- | Consume four characters, failing if they do 215 | not match the expected values. 216 | 217 | > char4 e a b c d === char e a *> char e b *> char e c *> char e d 218 | -} 219 | char4 :: e -> Char -> Char -> Char -> Char -> Parser e s () 220 | {-# INLINE char4 #-} 221 | char4 e !c0 !c1 !c2 !c3 = uneffectful $ \chunk -> 222 | if 223 | | length chunk > 3 224 | , indexLatinCharArray (array chunk) (offset chunk) == c0 225 | , indexLatinCharArray (array chunk) (offset chunk + 1) == c1 226 | , indexLatinCharArray (array chunk) (offset chunk + 2) == c2 227 | , indexLatinCharArray (array chunk) (offset chunk + 3) == c3 -> 228 | Success () (offset chunk + 4) (length chunk - 4) 229 | | otherwise -> Failure e 230 | 231 | {- | Consume five characters, failing if they do 232 | not match the expected values. 233 | -} 234 | char5 :: e -> Char -> Char -> Char -> Char -> Char -> Parser e s () 235 | {-# INLINE char5 #-} 236 | char5 e !c0 !c1 !c2 !c3 !c4 = uneffectful $ \chunk -> 237 | if 238 | | length chunk > 4 239 | , indexLatinCharArray (array chunk) (offset chunk) == c0 240 | , indexLatinCharArray (array chunk) (offset chunk + 1) == c1 241 | , indexLatinCharArray (array chunk) (offset chunk + 2) == c2 242 | , indexLatinCharArray (array chunk) (offset chunk + 3) == c3 243 | , indexLatinCharArray (array chunk) (offset chunk + 4) == c4 -> 244 | Success () (offset chunk + 5) (length chunk - 5) 245 | | otherwise -> Failure e 246 | 247 | {- | Consume six characters, failing if they do 248 | not match the expected values. 249 | -} 250 | char6 :: e -> Char -> Char -> Char -> Char -> Char -> Char -> Parser e s () 251 | {-# INLINE char6 #-} 252 | char6 e !c0 !c1 !c2 !c3 !c4 !c5 = uneffectful $ \chunk -> 253 | if 254 | | length chunk > 5 255 | , indexLatinCharArray (array chunk) (offset chunk) == c0 256 | , indexLatinCharArray (array chunk) (offset chunk + 1) == c1 257 | , indexLatinCharArray (array chunk) (offset chunk + 2) == c2 258 | , indexLatinCharArray (array chunk) (offset chunk + 3) == c3 259 | , indexLatinCharArray (array chunk) (offset chunk + 4) == c4 260 | , indexLatinCharArray (array chunk) (offset chunk + 5) == c5 -> 261 | Success () (offset chunk + 6) (length chunk - 6) 262 | | otherwise -> Failure e 263 | 264 | {- | Consume seven characters, failing if they do 265 | not match the expected values. 266 | -} 267 | char7 :: e -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Parser e s () 268 | {-# INLINE char7 #-} 269 | char7 e !c0 !c1 !c2 !c3 !c4 !c5 !c6 = uneffectful $ \chunk -> 270 | if 271 | | length chunk > 6 272 | , indexLatinCharArray (array chunk) (offset chunk) == c0 273 | , indexLatinCharArray (array chunk) (offset chunk + 1) == c1 274 | , indexLatinCharArray (array chunk) (offset chunk + 2) == c2 275 | , indexLatinCharArray (array chunk) (offset chunk + 3) == c3 276 | , indexLatinCharArray (array chunk) (offset chunk + 4) == c4 277 | , indexLatinCharArray (array chunk) (offset chunk + 5) == c5 278 | , indexLatinCharArray (array chunk) (offset chunk + 6) == c6 -> 279 | Success () (offset chunk + 7) (length chunk - 7) 280 | | otherwise -> Failure e 281 | 282 | {- | Consume eight characters, failing if they do 283 | not match the expected values. 284 | -} 285 | char8 :: e -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Parser e s () 286 | {-# INLINE char8 #-} 287 | char8 e !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 = uneffectful $ \chunk -> 288 | if 289 | | length chunk > 7 290 | , indexLatinCharArray (array chunk) (offset chunk) == c0 291 | , indexLatinCharArray (array chunk) (offset chunk + 1) == c1 292 | , indexLatinCharArray (array chunk) (offset chunk + 2) == c2 293 | , indexLatinCharArray (array chunk) (offset chunk + 3) == c3 294 | , indexLatinCharArray (array chunk) (offset chunk + 4) == c4 295 | , indexLatinCharArray (array chunk) (offset chunk + 5) == c5 296 | , indexLatinCharArray (array chunk) (offset chunk + 6) == c6 297 | , indexLatinCharArray (array chunk) (offset chunk + 7) == c7 -> 298 | Success () (offset chunk + 8) (length chunk - 8) 299 | | otherwise -> Failure e 300 | 301 | {- | Consume nine characters, failing if they do 302 | not match the expected values. 303 | -} 304 | char9 :: 305 | e -> 306 | Char -> 307 | Char -> 308 | Char -> 309 | Char -> 310 | Char -> 311 | Char -> 312 | Char -> 313 | Char -> 314 | Char -> 315 | Parser e s () 316 | {-# INLINE char9 #-} 317 | char9 e !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 = uneffectful $ \chunk -> 318 | if 319 | | length chunk > 8 320 | , indexLatinCharArray (array chunk) (offset chunk) == c0 321 | , indexLatinCharArray (array chunk) (offset chunk + 1) == c1 322 | , indexLatinCharArray (array chunk) (offset chunk + 2) == c2 323 | , indexLatinCharArray (array chunk) (offset chunk + 3) == c3 324 | , indexLatinCharArray (array chunk) (offset chunk + 4) == c4 325 | , indexLatinCharArray (array chunk) (offset chunk + 5) == c5 326 | , indexLatinCharArray (array chunk) (offset chunk + 6) == c6 327 | , indexLatinCharArray (array chunk) (offset chunk + 7) == c7 328 | , indexLatinCharArray (array chunk) (offset chunk + 8) == c8 -> 329 | Success () (offset chunk + 9) (length chunk - 9) 330 | | otherwise -> Failure e 331 | 332 | {- | Consume ten characters, failing if they do 333 | not match the expected values. 334 | -} 335 | char10 :: 336 | e -> 337 | Char -> 338 | Char -> 339 | Char -> 340 | Char -> 341 | Char -> 342 | Char -> 343 | Char -> 344 | Char -> 345 | Char -> 346 | Char -> 347 | Parser e s () 348 | {-# INLINE char10 #-} 349 | char10 e !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 !c9 = uneffectful $ \chunk -> 350 | if 351 | | length chunk > 9 352 | , indexLatinCharArray (array chunk) (offset chunk) == c0 353 | , indexLatinCharArray (array chunk) (offset chunk + 1) == c1 354 | , indexLatinCharArray (array chunk) (offset chunk + 2) == c2 355 | , indexLatinCharArray (array chunk) (offset chunk + 3) == c3 356 | , indexLatinCharArray (array chunk) (offset chunk + 4) == c4 357 | , indexLatinCharArray (array chunk) (offset chunk + 5) == c5 358 | , indexLatinCharArray (array chunk) (offset chunk + 6) == c6 359 | , indexLatinCharArray (array chunk) (offset chunk + 7) == c7 360 | , indexLatinCharArray (array chunk) (offset chunk + 8) == c8 361 | , indexLatinCharArray (array chunk) (offset chunk + 9) == c9 -> 362 | Success () (offset chunk + 10) (length chunk - 10) 363 | | otherwise -> Failure e 364 | 365 | {- | Consume eleven characters, failing if they do 366 | not match the expected values. 367 | -} 368 | char11 :: 369 | e -> 370 | Char -> 371 | Char -> 372 | Char -> 373 | Char -> 374 | Char -> 375 | Char -> 376 | Char -> 377 | Char -> 378 | Char -> 379 | Char -> 380 | Char -> 381 | Parser e s () 382 | {-# INLINE char11 #-} 383 | char11 e !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 !c9 !c10 = uneffectful $ \chunk -> 384 | if 385 | | length chunk > 10 386 | , indexLatinCharArray (array chunk) (offset chunk) == c0 387 | , indexLatinCharArray (array chunk) (offset chunk + 1) == c1 388 | , indexLatinCharArray (array chunk) (offset chunk + 2) == c2 389 | , indexLatinCharArray (array chunk) (offset chunk + 3) == c3 390 | , indexLatinCharArray (array chunk) (offset chunk + 4) == c4 391 | , indexLatinCharArray (array chunk) (offset chunk + 5) == c5 392 | , indexLatinCharArray (array chunk) (offset chunk + 6) == c6 393 | , indexLatinCharArray (array chunk) (offset chunk + 7) == c7 394 | , indexLatinCharArray (array chunk) (offset chunk + 8) == c8 395 | , indexLatinCharArray (array chunk) (offset chunk + 9) == c9 396 | , indexLatinCharArray (array chunk) (offset chunk + 10) == c10 -> 397 | Success () (offset chunk + 11) (length chunk - 11) 398 | | otherwise -> Failure e 399 | 400 | {- | Consume twelve characters, failing if they do 401 | not match the expected values. 402 | -} 403 | char12 :: 404 | e -> 405 | Char -> 406 | Char -> 407 | Char -> 408 | Char -> 409 | Char -> 410 | Char -> 411 | Char -> 412 | Char -> 413 | Char -> 414 | Char -> 415 | Char -> 416 | Char -> 417 | Parser e s () 418 | {-# INLINE char12 #-} 419 | char12 e !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 !c9 !c10 !c11 = uneffectful $ \chunk -> 420 | if 421 | | length chunk > 11 422 | , indexLatinCharArray (array chunk) (offset chunk) == c0 423 | , indexLatinCharArray (array chunk) (offset chunk + 1) == c1 424 | , indexLatinCharArray (array chunk) (offset chunk + 2) == c2 425 | , indexLatinCharArray (array chunk) (offset chunk + 3) == c3 426 | , indexLatinCharArray (array chunk) (offset chunk + 4) == c4 427 | , indexLatinCharArray (array chunk) (offset chunk + 5) == c5 428 | , indexLatinCharArray (array chunk) (offset chunk + 6) == c6 429 | , indexLatinCharArray (array chunk) (offset chunk + 7) == c7 430 | , indexLatinCharArray (array chunk) (offset chunk + 8) == c8 431 | , indexLatinCharArray (array chunk) (offset chunk + 9) == c9 432 | , indexLatinCharArray (array chunk) (offset chunk + 10) == c10 433 | , indexLatinCharArray (array chunk) (offset chunk + 11) == c11 -> 434 | Success () (offset chunk + 12) (length chunk - 12) 435 | | otherwise -> Failure e 436 | 437 | -- | Consumes and returns the next character in the input. 438 | any :: e -> Parser e s Char 439 | {-# INLINE any #-} 440 | any e = uneffectful $ \chunk -> 441 | if length chunk > 0 442 | then 443 | let c = indexLatinCharArray (array chunk) (offset chunk) 444 | in Success c (offset chunk + 1) (length chunk - 1) 445 | else Failure e 446 | 447 | {- | Consume a character from the input or return @Nothing@ if 448 | end of the stream has been reached. Since ISO 8859-1 maps every 449 | bytes to a character, this parser never fails. 450 | -} 451 | opt :: Parser e s (Maybe Char) 452 | {-# INLINE opt #-} 453 | opt = uneffectful $ \chunk -> case length chunk of 454 | 0 -> Success Nothing (offset chunk) (length chunk) 455 | _ -> 456 | Success 457 | (Just (indexLatinCharArray (array chunk) (offset chunk))) 458 | (offset chunk + 1) 459 | (length chunk - 1) 460 | 461 | -- | Variant of @opt@ with unboxed result. 462 | opt# :: Parser e s (# (# #) | Char# #) 463 | {-# INLINE opt# #-} 464 | opt# = 465 | Parser 466 | ( \(# arr, off, len #) s0 -> case len of 467 | 0# -> (# s0, (# | (# (# (# #) | #), off, len #) #) #) 468 | _ -> (# s0, (# | (# (# | indexCharArray# arr off #), off +# 1#, len -# 1# #) #) #) 469 | ) 470 | 471 | skipDigitsAsciiLoop :: 472 | Bytes -> -- Chunk 473 | (# Int#, Int# #) 474 | skipDigitsAsciiLoop !c = 475 | if length c > 0 476 | then 477 | let w = indexLatinCharArray (array c) (offset c) 478 | in if w >= '0' && w <= '9' 479 | then skipDigitsAsciiLoop (Bytes.unsafeDrop 1 c) 480 | else (# unI (offset c), unI (length c) #) 481 | else (# unI (offset c), unI (length c) #) 482 | 483 | skipDigitsAscii1LoopStart :: 484 | e -> 485 | Bytes -> -- chunk 486 | Result# e () 487 | skipDigitsAscii1LoopStart e !c = 488 | if length c > 0 489 | then 490 | let w = indexLatinCharArray (array c) (offset c) 491 | in if w >= '0' && w <= '9' 492 | then upcastUnitSuccess (skipDigitsAsciiLoop (Bytes.unsafeDrop 1 c)) 493 | else (# e | #) 494 | else (# e | #) 495 | 496 | {- | Variant of 'skipDigits' that requires at least one digit 497 | to be present. 498 | -} 499 | skipDigits1 :: e -> Parser e s () 500 | {-# INLINE skipDigits1 #-} 501 | skipDigits1 e = uneffectful# $ \c -> 502 | skipDigitsAscii1LoopStart e c 503 | 504 | {- | Skip the characters @0-9@ until a non-digit is encountered. 505 | This parser does not fail. 506 | -} 507 | skipDigits :: Parser e s () 508 | skipDigits = uneffectful# $ \c -> 509 | upcastUnitSuccess (skipDigitsAsciiLoop c) 510 | 511 | unI :: Int -> Int# 512 | {-# INLINE unI #-} 513 | unI (I# w) = w 514 | 515 | {- | Skip the character any number of times. This succeeds 516 | even if the character was not present. 517 | -} 518 | skipChar :: Char -> Parser e s () 519 | {-# INLINE skipChar #-} 520 | skipChar !w = uneffectful# $ \c -> 521 | upcastUnitSuccess (skipLoop w c) 522 | 523 | {- | Skip the character any number of times. It must occur 524 | at least once or else this will fail. 525 | -} 526 | skipChar1 :: e -> Char -> Parser e s () 527 | {-# INLINE skipChar1 #-} 528 | skipChar1 e !w = uneffectful# $ \c -> 529 | skipLoop1Start e w c 530 | 531 | skipLoop :: 532 | Char -> -- byte to match 533 | Bytes -> -- Chunk 534 | (# Int#, Int# #) 535 | skipLoop !w !c = 536 | if length c > 0 537 | then 538 | if indexLatinCharArray (array c) (offset c) == w 539 | then skipLoop w (Bytes.unsafeDrop 1 c) 540 | else (# unI (offset c), unI (length c) #) 541 | else (# unI (offset c), unI (length c) #) 542 | 543 | skipLoop1Start :: 544 | e -> 545 | Char -> -- byte to match 546 | Bytes -> -- chunk 547 | Result# e () 548 | skipLoop1Start e !w !chunk0 = 549 | if length chunk0 > 0 550 | then 551 | if indexLatinCharArray (array chunk0) (offset chunk0) == w 552 | then upcastUnitSuccess (skipLoop w (Bytes.unsafeDrop 1 chunk0)) 553 | else (# e | #) 554 | else (# e | #) 555 | 556 | {- | Parse a decimal-encoded 8-bit word. If the number is larger 557 | than 255, this parser fails. 558 | -} 559 | decWord8 :: e -> Parser e s Word8 560 | decWord8 e = 561 | Parser 562 | ( \chunk0 s0 -> case decSmallWordStart e 256 (boxBytes chunk0) s0 of 563 | (# s1, r #) -> (# s1, upcastWord8Result r #) 564 | ) 565 | 566 | {- | Parse a hexadecimal-encoded 8-bit word. If the number is larger 567 | than 255, this parser fails. This allows leading zeroes and is 568 | insensitive to case. For example, @00A@, @0a@ and @A@ would all 569 | be accepted as the same number. 570 | -} 571 | hexWord8 :: e -> Parser e s Word8 572 | hexWord8 e = 573 | Parser 574 | ( \chunk0 s0 -> case hexSmallWordStart e 256 (boxBytes chunk0) s0 of 575 | (# s1, r #) -> (# s1, upcastWord8Result r #) 576 | ) 577 | 578 | {- | Parse a hexadecimal-encoded 16-bit word. If the number is larger 579 | than 65535, this parser fails. This allows leading zeroes and is 580 | insensitive to case. For example, @0100a@ and @100A@ would both 581 | be accepted as the same number. 582 | -} 583 | hexWord16 :: e -> Parser e s Word16 584 | hexWord16 e = 585 | Parser 586 | ( \chunk0 s0 -> case hexSmallWordStart e 65536 (boxBytes chunk0) s0 of 587 | (# s1, r #) -> (# s1, upcastWord16Result r #) 588 | ) 589 | 590 | hexWord32 :: e -> Parser e s Word32 591 | hexWord32 e = 592 | Parser 593 | ( \chunk0 s0 -> case hexSmallWordStart e 4294967296 (boxBytes chunk0) s0 of 594 | (# s1, r #) -> (# s1, upcastWord32Result r #) 595 | ) 596 | 597 | {- | Parse a decimal-encoded 16-bit word. If the number is larger 598 | than 65535, this parser fails. 599 | -} 600 | decWord16 :: e -> Parser e s Word16 601 | decWord16 e = 602 | Parser 603 | ( \chunk0 s0 -> case decSmallWordStart e 65536 (boxBytes chunk0) s0 of 604 | (# s1, r #) -> (# s1, upcastWord16Result r #) 605 | ) 606 | 607 | {- | Parse a decimal-encoded 32-bit word. If the number is larger 608 | than 4294967295, this parser fails. 609 | -} 610 | decWord32 :: e -> Parser e s Word32 611 | -- This will not work on 32-bit platforms. 612 | decWord32 e = 613 | Parser 614 | ( \chunk0 s0 -> case decSmallWordStart e 4294967296 (boxBytes chunk0) s0 of 615 | (# s1, r #) -> (# s1, upcastWord32Result r #) 616 | ) 617 | 618 | {- | Parse a decimal-encoded number. If the number is too large to be 619 | represented by a machine word, this fails with the provided 620 | error message. This accepts any number of leading zeroes. 621 | -} 622 | decWord :: e -> Parser e s Word 623 | decWord e = 624 | Parser 625 | ( \chunk0 s0 -> case decWordStart e (boxBytes chunk0) s0 of 626 | (# s1, r #) -> (# s1, upcastWordResult r #) 627 | ) 628 | 629 | {- | Parse a decimal-encoded unsigned number. If the number is 630 | too large to be represented by a 64-bit word, this fails with 631 | the provided error message. This accepts any number of leading 632 | zeroes. 633 | -} 634 | decWord64 :: e -> Parser e s Word64 635 | decWord64 e = 636 | Parser 637 | ( \chunk0 s0 -> case decWordStart e (boxBytes chunk0) s0 of 638 | (# s1, r #) -> (# s1, upcastWord64Result r #) 639 | ) 640 | 641 | hexSmallWordStart :: 642 | e -> -- Error message 643 | Word -> -- Upper Bound 644 | Bytes -> -- Chunk 645 | ST# s (Result# e Word#) 646 | hexSmallWordStart e !limit !chunk0 s0 = 647 | if length chunk0 > 0 648 | then case oneHexMaybe (PM.indexByteArray (array chunk0) (offset chunk0)) of 649 | Nothing -> (# s0, (# e | #) #) 650 | Just w -> (# s0, hexSmallWordMore e w limit (Bytes.unsafeDrop 1 chunk0) #) 651 | else (# s0, (# e | #) #) 652 | 653 | decSmallWordStart :: 654 | e -> -- Error message 655 | Word -> -- Upper Bound 656 | Bytes -> -- Chunk 657 | ST# s (Result# e Word#) 658 | decSmallWordStart e !limit !chunk0 s0 = 659 | if length chunk0 > 0 660 | then 661 | let !w = 662 | fromIntegral @Word8 @Word 663 | (PM.indexByteArray (array chunk0) (offset chunk0)) 664 | - 48 665 | in if w < 10 666 | then (# s0, decSmallWordMore e w limit (Bytes.unsafeDrop 1 chunk0) #) 667 | else (# s0, (# e | #) #) 668 | else (# s0, (# e | #) #) 669 | 670 | -- This will not inline since it is recursive, but worker 671 | -- wrapper will still happen. 672 | decWordMore :: 673 | e -> -- Error message 674 | Word -> -- Accumulator 675 | Bytes -> -- Chunk 676 | Result# e Word# 677 | decWordMore e !acc !chunk0 = case len of 678 | 0 -> (# | (# unW (fromIntegral acc), unI (offset chunk0), 0# #) #) 679 | _ -> 680 | let !w = 681 | fromIntegral @Word8 @Word 682 | (PM.indexByteArray (array chunk0) (offset chunk0)) 683 | - 48 684 | in if w < 10 685 | then 686 | let (overflow, acc') = unsignedPushBase10 acc w 687 | in if overflow 688 | then (# e | #) 689 | else decWordMore e acc' (Bytes.unsafeDrop 1 chunk0) 690 | else (# | (# unW (fromIntegral acc), unI (offset chunk0), len# #) #) 691 | where 692 | !len@(I# len#) = length chunk0 693 | 694 | upcastWordResult :: Result# e Word# -> Result# e Word 695 | {-# INLINE upcastWordResult #-} 696 | upcastWordResult (# e | #) = (# e | #) 697 | upcastWordResult (# | (# a, b, c #) #) = (# | (# W# a, b, c #) #) 698 | 699 | {- FOURMOLU_DISABLE -} 700 | -- This only works on 64-bit platforms. 701 | upcastWord64Result :: Result# e Word# -> Result# e Word64 702 | {-# inline upcastWord64Result #-} 703 | upcastWord64Result (# e | #) = (# e | #) 704 | upcastWord64Result (# | (# a, b, c #) #) = (# | (# W64# ( 705 | Exts.wordToWord64# a 706 | ), b, c #) #) 707 | {- FOURMOLU_ENABLE -} 708 | 709 | hexSmallWordMore :: 710 | e -> -- Error message 711 | Word -> -- Accumulator 712 | Word -> -- Upper Bound 713 | Bytes -> -- Chunk 714 | Result# e Word# 715 | hexSmallWordMore e !acc !limit !chunk0 = 716 | if length chunk0 > 0 717 | then case oneHexMaybe (PM.indexByteArray (array chunk0) (offset chunk0)) of 718 | Nothing -> (# | (# unW acc, unI (offset chunk0), unI (length chunk0) #) #) 719 | Just w -> 720 | let w' = acc * 16 + w 721 | in if w' < limit 722 | then hexSmallWordMore e w' limit (Bytes.unsafeDrop 1 chunk0) 723 | else (# e | #) 724 | else (# | (# unW acc, unI (offset chunk0), 0# #) #) 725 | 726 | decSmallWordMore :: 727 | e -> -- Error message 728 | Word -> -- Accumulator 729 | Word -> -- Upper Bound 730 | Bytes -> -- Chunk 731 | Result# e Word# 732 | decSmallWordMore e !acc !limit !chunk0 = 733 | if length chunk0 > 0 734 | then 735 | let !w = 736 | fromIntegral @Word8 @Word 737 | (PM.indexByteArray (array chunk0) (offset chunk0)) 738 | - 48 739 | in if w < 10 740 | then 741 | let w' = acc * 10 + w 742 | in if w' < limit 743 | then decSmallWordMore e w' limit (Bytes.unsafeDrop 1 chunk0) 744 | else (# e | #) 745 | else (# | (# unW acc, unI (offset chunk0), unI (length chunk0) #) #) 746 | else (# | (# unW acc, unI (offset chunk0), 0# #) #) 747 | 748 | unW :: Word -> Word# 749 | {-# INLINE unW #-} 750 | unW (W# w) = w 751 | 752 | decWordStart :: 753 | e -> -- Error message 754 | Bytes -> -- Chunk 755 | ST# s (Result# e Word#) 756 | decWordStart e !chunk0 s0 = 757 | if length chunk0 > 0 758 | then 759 | let !w = 760 | fromIntegral @Word8 @Word 761 | (PM.indexByteArray (array chunk0) (offset chunk0)) 762 | - 48 763 | in if w < 10 764 | then (# s0, decWordMore e w (Bytes.unsafeDrop 1 chunk0) #) 765 | else (# s0, (# e | #) #) 766 | else (# s0, (# e | #) #) 767 | 768 | {- FOURMOLU_DISABLE -} 769 | -- Precondition: the word is small enough 770 | upcastWord16Result :: Result# e Word# -> Result# e Word16 771 | {-# inline upcastWord16Result #-} 772 | upcastWord16Result (# e | #) = (# e | #) 773 | upcastWord16Result (# | (# a, b, c #) #) = (# | (# W16# ( 774 | Exts.wordToWord16# 775 | a), b, c #) #) 776 | 777 | -- Precondition: the word is small enough 778 | upcastWord32Result :: Result# e Word# -> Result# e Word32 779 | {-# inline upcastWord32Result #-} 780 | upcastWord32Result (# e | #) = (# e | #) 781 | upcastWord32Result (# | (# a, b, c #) #) = (# | (# W32# ( 782 | Exts.wordToWord32# 783 | a), b, c #) #) 784 | 785 | -- Precondition: the word is small enough 786 | upcastWord8Result :: Result# e Word# -> Result# e Word8 787 | {-# inline upcastWord8Result #-} 788 | upcastWord8Result (# e | #) = (# e | #) 789 | upcastWord8Result (# | (# a, b, c #) #) = (# | (# W8# ( 790 | Exts.wordToWord8# 791 | a), b, c #) #) 792 | {- FOURMOLU_ENABLE -} 793 | 794 | {- | Parse a decimal-encoded number. If the number is too large to be 795 | represented by a machine integer, this fails with the provided 796 | error message. This rejects input with that is preceeded by plus 797 | or minus. Consequently, it does not parse negative numbers. Use 798 | 'decStandardInt' or 'decSignedInt' for that purpose. On a 64-bit 799 | platform 'decWord' will successfully parse 9223372036854775808 800 | (i.e. @2 ^ 63@), but 'decUnsignedInt' will fail. This parser allows 801 | leading zeroes. 802 | -} 803 | decUnsignedInt :: e -> Parser e s Int 804 | decUnsignedInt e = 805 | Parser 806 | ( \chunk0 s0 -> case decPosIntStart e (boxBytes chunk0) s0 of 807 | (# s1, r #) -> (# s1, upcastIntResult r #) 808 | ) 809 | 810 | -- | Variant of 'decUnsignedInt' with an unboxed result. 811 | decUnsignedInt# :: e -> Parser e s Int# 812 | decUnsignedInt# e = 813 | Parser 814 | (\chunk0 s0 -> decPosIntStart e (boxBytes chunk0) s0) 815 | 816 | {- | Parse a decimal-encoded number. If the number is too large to be 817 | represented by a machine integer, this fails with the provided 818 | error message. This allows the number to optionally be prefixed 819 | by plus or minus. If the sign prefix is not present, the number 820 | is interpreted as positive. This allows leading zeroes. 821 | -} 822 | decSignedInt :: e -> Parser e s Int 823 | decSignedInt e = 824 | Parser 825 | ( \chunk0 s0 -> case runParser (decSignedInt# e) chunk0 s0 of 826 | (# s1, r #) -> (# s1, upcastIntResult r #) 827 | ) 828 | 829 | {- | Variant of 'decUnsignedInt' that lets the caller supply a leading 830 | digit. This is useful when parsing formats like JSON where integers with 831 | leading zeroes are considered invalid. The calling context must 832 | consume the first digit before calling this parser. Results are 833 | always positive numbers. 834 | -} 835 | decTrailingInt :: 836 | -- | Error message 837 | e -> 838 | -- | Leading digit, should be between @0@ and @9@. 839 | Int -> 840 | Parser e s Int 841 | decTrailingInt e (I# w) = 842 | Parser 843 | ( \chunk0 s0 -> case runParser (decTrailingInt# e w) chunk0 s0 of 844 | (# s1, r #) -> (# s1, upcastIntResult r #) 845 | ) 846 | 847 | decTrailingInt# :: 848 | e -> -- Error message 849 | Int# -> -- Leading digit, should be between @0@ and @9@. 850 | Parser e s Int# 851 | decTrailingInt# e !w = 852 | Parser (\chunk0 s0 -> (# s0, decPosIntMore e (W# (int2Word# w)) maxIntAsWord (boxBytes chunk0) #)) 853 | 854 | maxIntAsWord :: Word 855 | maxIntAsWord = fromIntegral (maxBound :: Int) 856 | 857 | {- | Parse a decimal-encoded number. If the number is too large to be 858 | represented by a machine integer, this fails with the provided 859 | error message. This allows the number to optionally be prefixed 860 | by minus. If the minus prefix is not present, the number 861 | is interpreted as positive. The disallows a leading plus sign. 862 | For example, 'decStandardInt' rejects @+42@, but 'decSignedInt' 863 | allows it. 864 | -} 865 | decStandardInt :: e -> Parser e s Int 866 | decStandardInt e = 867 | Parser 868 | ( \chunk0 s0 -> case runParser (decStandardInt# e) chunk0 s0 of 869 | (# s1, r #) -> (# s1, upcastIntResult r #) 870 | ) 871 | 872 | decSignedInt# :: e -> Parser e s Int# 873 | {-# NOINLINE decSignedInt# #-} 874 | decSignedInt# e = 875 | any e `bindFromLiftedToInt` \c -> case c of 876 | '+' -> 877 | Parser -- plus sign 878 | (\chunk0 s0 -> decPosIntStart e (boxBytes chunk0) s0) 879 | '-' -> 880 | Parser -- minus sign 881 | (\chunk0 s0 -> decNegIntStart e (boxBytes chunk0) s0) 882 | _ -> 883 | Parser -- no sign, there should be a digit here 884 | ( \chunk0 s0 -> 885 | let !w = char2Word c - 48 886 | in if w < 10 887 | then (# s0, decPosIntMore e w maxIntAsWord (boxBytes chunk0) #) 888 | else (# s0, (# e | #) #) 889 | ) 890 | 891 | -- This is the same as decSignedInt except that we disallow 892 | -- a leading plus sign. 893 | decStandardInt# :: e -> Parser e s Int# 894 | {-# NOINLINE decStandardInt# #-} 895 | decStandardInt# e = 896 | any e `bindFromLiftedToInt` \c -> case c of 897 | '-' -> 898 | Parser -- minus sign 899 | (\chunk0 s0 -> decNegIntStart e (boxBytes chunk0) s0) 900 | _ -> 901 | Parser -- no sign, there should be a digit here 902 | ( \chunk0 s0 -> 903 | let !w = char2Word c - 48 904 | in if w < 10 905 | then (# s0, decPosIntMore e w maxIntAsWord (boxBytes chunk0) #) 906 | else (# s0, (# e | #) #) 907 | ) 908 | 909 | {- | Variant of 'decUnsignedInteger' that lets the caller supply a leading 910 | digit. This is useful when parsing formats like JSON where integers with 911 | leading zeroes are considered invalid. The calling context must 912 | consume the first digit before calling this parser. Results are 913 | always positive numbers. 914 | -} 915 | decTrailingInteger :: 916 | -- | Leading digit, should be between @0@ and @9@. 917 | Int -> 918 | Parser e s Integer 919 | decTrailingInteger (I# w) = 920 | Parser (\chunk0 s0 -> (# s0, (# | decIntegerChunks (I# w) 10 0 (boxBytes chunk0) #) #)) 921 | 922 | {- | Parse a decimal-encoded positive integer of arbitrary 923 | size. This rejects input that begins with a plus or minus 924 | sign. 925 | -} 926 | decUnsignedInteger :: e -> Parser e s Integer 927 | decUnsignedInteger e = 928 | Parser 929 | (\chunk0 s0 -> decUnsignedIntegerStart e (boxBytes chunk0) s0) 930 | 931 | {- | Parse a decimal-encoded integer of arbitrary size. 932 | This accepts input that begins with a plus or minus sign. 933 | Input without a sign prefix is interpreted as positive. 934 | -} 935 | decSignedInteger :: e -> Parser e s Integer 936 | {-# NOINLINE decSignedInteger #-} 937 | decSignedInteger e = 938 | any e >>= \c -> case c of 939 | '+' -> do 940 | decUnsignedInteger e 941 | '-' -> do 942 | x <- decUnsignedInteger e 943 | pure $! negate x 944 | _ -> 945 | Parser -- no sign, there should be a digit here 946 | ( \chunk0 s0 -> 947 | let !w = char2Word c - 48 948 | in if w < 10 949 | then 950 | let !r = 951 | decIntegerChunks 952 | (fromIntegral @Word @Int w) 953 | 10 954 | 0 955 | (boxBytes chunk0) 956 | in (# s0, (# | r #) #) 957 | else (# s0, (# e | #) #) 958 | ) 959 | 960 | decPosIntStart :: 961 | e -> -- Error message 962 | Bytes -> -- Chunk 963 | ST# s (Result# e Int#) 964 | decPosIntStart e !chunk0 s0 = 965 | if length chunk0 > 0 966 | then 967 | let !w = 968 | fromIntegral @Word8 @Word 969 | (PM.indexByteArray (array chunk0) (offset chunk0)) 970 | - 48 971 | in if w < 10 972 | then (# s0, decPosIntMore e w maxIntAsWord (Bytes.unsafeDrop 1 chunk0) #) 973 | else (# s0, (# e | #) #) 974 | else (# s0, (# e | #) #) 975 | 976 | decNegIntStart :: 977 | e -> -- Error message 978 | Bytes -> -- Chunk 979 | ST# s (Result# e Int#) 980 | decNegIntStart e !chunk0 s0 = 981 | if length chunk0 > 0 982 | then 983 | let !w = 984 | fromIntegral @Word8 @Word 985 | (PM.indexByteArray (array chunk0) (offset chunk0)) 986 | - 48 987 | in if w < 10 988 | then case decPosIntMore e w (maxIntAsWord + 1) (Bytes.unsafeDrop 1 chunk0) of 989 | (# | (# x, y, z #) #) -> 990 | (# s0, (# | (# (notI# x +# 1#), y, z #) #) #) 991 | (# err | #) -> 992 | (# s0, (# err | #) #) 993 | else (# s0, (# e | #) #) 994 | else (# s0, (# e | #) #) 995 | 996 | decUnsignedIntegerStart :: 997 | e -> 998 | Bytes -> 999 | ST# s (Result# e Integer) 1000 | decUnsignedIntegerStart e !chunk0 s0 = 1001 | if length chunk0 > 0 1002 | then 1003 | let !w = (PM.indexByteArray (array chunk0) (offset chunk0)) - 48 1004 | in if w < (10 :: Word8) 1005 | then 1006 | let !r = 1007 | decIntegerChunks 1008 | (fromIntegral @Word8 @Int w) 1009 | 10 1010 | 0 1011 | (Bytes.unsafeDrop 1 chunk0) 1012 | in (# s0, (# | r #) #) 1013 | else (# s0, (# e | #) #) 1014 | else (# s0, (# e | #) #) 1015 | 1016 | -- This will not inline since it is recursive, but worker 1017 | -- wrapper will still happen. Fails if the accumulator 1018 | -- exceeds the upper bound. 1019 | decPosIntMore :: 1020 | e -> -- Error message 1021 | Word -> -- Accumulator, precondition: less than or equal to bound 1022 | Word -> -- Inclusive Upper Bound, either (2^63 - 1) or 2^63 1023 | Bytes -> -- Chunk 1024 | Result# e Int# 1025 | decPosIntMore e !acc !upper !chunk0 = 1026 | if len > 0 1027 | then 1028 | let !w = 1029 | fromIntegral @Word8 @Word 1030 | (PM.indexByteArray (array chunk0) (offset chunk0)) 1031 | - 48 1032 | in if w < 10 1033 | then 1034 | let (overflow, acc') = positivePushBase10 acc w upper 1035 | in if overflow 1036 | then (# e | #) 1037 | else decPosIntMore e acc' upper (Bytes.unsafeDrop 1 chunk0) 1038 | else (# | (# unI (fromIntegral acc), unI (offset chunk0), len# #) #) 1039 | else (# | (# unI (fromIntegral acc), unI (offset chunk0), 0# #) #) 1040 | where 1041 | !len@(I# len#) = length chunk0 1042 | 1043 | -- This will not inline since it is recursive, but worker 1044 | -- wrapper will still happen. When the accumulator 1045 | -- exceeds the size of a machine integer, this pushes the 1046 | -- accumulated machine int and the shift amount onto the 1047 | -- stack. 1048 | -- We are intentionally lazy in the accumulator. There is 1049 | -- no need to force this on every iteration. We do however, 1050 | -- force it preemptively every time it changes. 1051 | -- Because of how we track overflow, we are able to use the 1052 | -- same function for both positive and negative numbers. 1053 | decIntegerChunks :: 1054 | Int -> -- Chunk accumulator (e.g. 236) 1055 | Int -> -- Chunk base-ten bound (e.g. 1000) 1056 | Integer -> -- Accumulator 1057 | Bytes -> -- Chunk 1058 | (# Integer, Int#, Int# #) 1059 | decIntegerChunks !nAcc !eAcc acc !chunk0 = 1060 | if len > 0 1061 | then 1062 | let !w = 1063 | fromIntegral @Word8 @Word 1064 | (PM.indexByteArray (array chunk0) (offset chunk0)) 1065 | - 48 1066 | in if w < 10 1067 | then 1068 | let !eAcc' = eAcc * 10 1069 | in if eAcc' >= eAcc 1070 | then 1071 | decIntegerChunks 1072 | (nAcc * 10 + fromIntegral @Word @Int w) 1073 | eAcc' 1074 | acc 1075 | (Bytes.unsafeDrop 1 chunk0) 1076 | else -- In this case, notice that we deliberately 1077 | -- unconsume the digit that would have caused 1078 | -- an overflow. 1079 | 1080 | let !r = 1081 | (acc * fromIntegral @Int @Integer eAcc) 1082 | + (fromIntegral @Int @Integer nAcc) 1083 | in decIntegerChunks 0 1 r chunk0 1084 | else 1085 | let !r = 1086 | (acc * fromIntegral @Int @Integer eAcc) 1087 | + (fromIntegral @Int @Integer nAcc) 1088 | in (# r, unI (offset chunk0), len# #) 1089 | else 1090 | let !r = 1091 | (acc * fromIntegral @Int @Integer eAcc) 1092 | + (fromIntegral @Int @Integer nAcc) 1093 | in (# r, unI (offset chunk0), 0# #) 1094 | where 1095 | !len@(I# len#) = length chunk0 1096 | 1097 | upcastIntResult :: Result# e Int# -> Result# e Int 1098 | upcastIntResult (# e | #) = (# e | #) 1099 | upcastIntResult (# | (# a, b, c #) #) = (# | (# I# a, b, c #) #) 1100 | 1101 | char2Word :: Char -> Word 1102 | char2Word = fromIntegral . ord 1103 | 1104 | {- | Take characters until the specified character is encountered. 1105 | Consumes the matched character as well. Fails if the character 1106 | is not present. Visually, the cursor advancement and resulting 1107 | @Bytes@ for @takeTrailedBy \'D\'@ look like this: 1108 | 1109 | > A B C D E F | input 1110 | > |->->->-| | cursor 1111 | > {\-*-*-\} | result bytes 1112 | -} 1113 | takeTrailedBy :: e -> Char -> Parser e s Bytes 1114 | takeTrailedBy e !w = do 1115 | !start <- cursor 1116 | skipTrailedBy e w 1117 | !end <- cursor 1118 | !arr <- expose 1119 | pure (Bytes arr start (end - (start + 1))) 1120 | 1121 | {- | Skip all characters until the terminator is encountered 1122 | and then consume the matching character as well. Visually, 1123 | @skipTrailedBy \'C\'@ advances the cursor like this: 1124 | 1125 | > A Z B Y C X C W 1126 | > |->->->->-| 1127 | 1128 | This fails if it reaches the end of input without encountering 1129 | the character. 1130 | -} 1131 | skipTrailedBy :: e -> Char -> Parser e s () 1132 | skipTrailedBy e !w = uneffectful# $ \c -> 1133 | skipUntilConsumeLoop e w c 1134 | 1135 | {- | Skip all characters until the terminator is encountered. 1136 | This does not consume the terminator. Visually, @skipUntil \'C\'@ 1137 | advances the cursor like this: 1138 | 1139 | > A Z B Y C X C W 1140 | > |->->->-| 1141 | 1142 | This succeeds if it reaches the end of the input without 1143 | encountering the terminator. It never fails. 1144 | -} 1145 | skipUntil :: Char -> Parser e s () 1146 | skipUntil !w = uneffectful# $ \c -> skipUntilLoop w c 1147 | 1148 | skipUntilLoop :: 1149 | Char -> -- byte to match 1150 | Bytes -> -- Chunk 1151 | Result# e () 1152 | skipUntilLoop !w !c = case length c of 1153 | 0 -> (# | (# (), unI (offset c), 0# #) #) 1154 | _ -> 1155 | if indexLatinCharArray (array c) (offset c) /= w 1156 | then skipUntilLoop w (Bytes.unsafeDrop 1 c) 1157 | else (# | (# (), unI (offset c), unI (length c) #) #) 1158 | 1159 | skipUntilConsumeLoop :: 1160 | e -> -- Error message 1161 | Char -> -- byte to match 1162 | Bytes -> -- Chunk 1163 | Result# e () 1164 | skipUntilConsumeLoop e !w !c = case length c of 1165 | 0 -> (# e | #) 1166 | _ -> 1167 | if indexLatinCharArray (array c) (offset c) /= w 1168 | then skipUntilConsumeLoop e w (Bytes.unsafeDrop 1 c) 1169 | else (# | (# (), unI (offset c + 1), unI (length c - 1) #) #) 1170 | 1171 | {- FOURMOLU_DISABLE -} 1172 | -- | Parse exactly eight ASCII-encoded characters, interpreting them as the 1173 | -- hexadecimal encoding of a 32-bit number. Note that this rejects a sequence 1174 | -- such as @BC5A9@, requiring @000BC5A9@ instead. This is insensitive to case. 1175 | hexFixedWord32 :: e -> Parser e s Word32 1176 | {-# inline hexFixedWord32 #-} 1177 | hexFixedWord32 e = Parser 1178 | (\x s0 -> case runParser (hexFixedWord32# e) x s0 of 1179 | (# s1, r #) -> case r of 1180 | (# err | #) -> (# s1, (# err | #) #) 1181 | (# | (# a, b, c #) #) -> (# s1, (# | (# W32# ( 1182 | Exts.wordToWord32# 1183 | a), b, c #) #) #) 1184 | ) 1185 | {- FOURMOLU_ENABLE -} 1186 | 1187 | hexFixedWord32# :: e -> Parser e s Word# 1188 | {-# NOINLINE hexFixedWord32# #-} 1189 | hexFixedWord32# e = uneffectfulWord# $ \chunk -> 1190 | if length chunk >= 8 1191 | then 1192 | let !w0@(W# n0) = oneHex $ PM.indexByteArray (array chunk) (offset chunk) 1193 | !w1@(W# n1) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 1) 1194 | !w2@(W# n2) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 2) 1195 | !w3@(W# n3) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 3) 1196 | !w4@(W# n4) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 4) 1197 | !w5@(W# n5) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 5) 1198 | !w6@(W# n6) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 6) 1199 | !w7@(W# n7) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 7) 1200 | in if 1201 | | w0 .|. w1 .|. w2 .|. w3 .|. w4 .|. w5 .|. w6 .|. w7 /= maxBound -> 1202 | (# 1203 | | (# 1204 | (n0 `Exts.timesWord#` 268435456##) 1205 | `Exts.plusWord#` (n1 `Exts.timesWord#` 16777216##) 1206 | `Exts.plusWord#` (n2 `Exts.timesWord#` 1048576##) 1207 | `Exts.plusWord#` (n3 `Exts.timesWord#` 65536##) 1208 | `Exts.plusWord#` (n4 `Exts.timesWord#` 4096##) 1209 | `Exts.plusWord#` (n5 `Exts.timesWord#` 256##) 1210 | `Exts.plusWord#` (n6 `Exts.timesWord#` 16##) 1211 | `Exts.plusWord#` n7 1212 | , unI (offset chunk) +# 8# 1213 | , unI (length chunk) -# 8# 1214 | #) 1215 | #) 1216 | | otherwise -> (# e | #) 1217 | else (# e | #) 1218 | 1219 | {- FOURMOLU_DISABLE -} 1220 | -- | Parse exactly 16 ASCII-encoded characters, interpreting them as the 1221 | -- hexadecimal encoding of a 64-bit number. Note that this rejects a sequence 1222 | -- such as @BC5A9@, requiring @00000000000BC5A9@ instead. This is insensitive 1223 | -- to case. 1224 | hexFixedWord64 :: e -> Parser e s Word64 1225 | {-# inline hexFixedWord64 #-} 1226 | hexFixedWord64 e = Parser 1227 | (\x s0 -> case runParser (hexFixedWord64# e) x s0 of 1228 | (# s1, r #) -> case r of 1229 | (# err | #) -> (# s1, (# err | #) #) 1230 | (# | (# a, b, c #) #) -> (# s1, (# | (# W64# ( 1231 | Exts.wordToWord64# a 1232 | ), b, c #) #) #) 1233 | ) 1234 | {- FOURMOLU_ENABLE -} 1235 | 1236 | hexFixedWord128 :: e -> Parser e s Word128 1237 | hexFixedWord128 e = 1238 | Word128 1239 | <$> hexFixedWord64 e 1240 | <*> hexFixedWord64 e 1241 | 1242 | hexFixedWord256 :: e -> Parser e s Word256 1243 | hexFixedWord256 e = 1244 | Word256 1245 | <$> hexFixedWord64 e 1246 | <*> hexFixedWord64 e 1247 | <*> hexFixedWord64 e 1248 | <*> hexFixedWord64 e 1249 | 1250 | hexFixedWord64# :: e -> Parser e s Word# 1251 | {-# NOINLINE hexFixedWord64# #-} 1252 | hexFixedWord64# e = uneffectfulWord# $ \chunk -> 1253 | if length chunk >= 16 1254 | then 1255 | let go !off !len !acc = case len of 1256 | 0 -> case acc of 1257 | W# r -> 1258 | (# 1259 | | (# 1260 | r 1261 | , unI off 1262 | , unI (length chunk) -# 16# 1263 | #) 1264 | #) 1265 | _ -> case oneHexMaybe (PM.indexByteArray (array chunk) off) of 1266 | Nothing -> (# e | #) 1267 | Just w -> go (off + 1) (len - 1) ((acc * 16) + w) 1268 | in go (offset chunk) (16 :: Int) (0 :: Word) 1269 | else (# e | #) 1270 | 1271 | {- FOURMOLU_DISABLE -} 1272 | -- | Parse exactly four ASCII-encoded characters, interpreting 1273 | -- them as the hexadecimal encoding of a 16-bit number. Note that 1274 | -- this rejects a sequence such as @5A9@, requiring @05A9@ instead. 1275 | -- This is insensitive to case. This is particularly useful when 1276 | -- parsing escape sequences in C or JSON, which allow encoding 1277 | -- characters in the Basic Multilingual Plane as @\\uhhhh@. 1278 | hexFixedWord16 :: e -> Parser e s Word16 1279 | {-# inline hexFixedWord16 #-} 1280 | hexFixedWord16 e = Parser 1281 | (\x s0 -> case runParser (hexFixedWord16# e) x s0 of 1282 | (# s1, r #) -> case r of 1283 | (# err | #) -> (# s1, (# err | #) #) 1284 | (# | (# a, b, c #) #) -> (# s1, (# | (# W16# ( 1285 | Exts.wordToWord16# 1286 | a), b, c #) #) #) 1287 | ) 1288 | 1289 | -- | Variant of hexFixedWord16 that returns an unboxed result. The result is 1290 | -- a machine-sized word instead of a 16-bit word, but this function guarantees 1291 | -- that only the low 16 bits may be non-zero. This is helpful because the 1292 | -- result needs to be constructed as a machine-sized word. GHC's primitives 1293 | -- for working with 16-bit words lower to most ISAs poorly. 1294 | hexFixedWord16# :: e -> Parser e s Word# 1295 | {-# noinline hexFixedWord16# #-} 1296 | hexFixedWord16# e = uneffectfulWord# $ \chunk -> if length chunk >= 4 1297 | then 1298 | let !w0@(W# n0) = oneHex $ PM.indexByteArray (array chunk) (offset chunk) 1299 | !w1@(W# n1) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 1) 1300 | !w2@(W# n2) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 2) 1301 | !w3@(W# n3) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 3) 1302 | in if | w0 .|. w1 .|. w2 .|. w3 /= maxBound -> 1303 | (# | 1304 | (# (n0 `Exts.shiftL#` 12#) `Exts.plusWord#` 1305 | (n1 `Exts.shiftL#` 8#) `Exts.plusWord#` 1306 | (n2 `Exts.shiftL#` 4#) `Exts.plusWord#` 1307 | n3 1308 | , unI (offset chunk) +# 4# 1309 | , unI (length chunk) -# 4# #) #) 1310 | | otherwise -> (# e | #) 1311 | else (# e | #) 1312 | 1313 | -- | Parse exactly two ASCII-encoded characters, interpretting 1314 | -- them as the hexadecimal encoding of a 8-bit number. Note that 1315 | -- this rejects a sequence such as @A@, requiring @0A@ instead. 1316 | -- This is insensitive to case. 1317 | hexFixedWord8 :: e -> Parser e s Word8 1318 | {-# inline hexFixedWord8 #-} 1319 | hexFixedWord8 e = Parser 1320 | (\x s0 -> case runParser (hexFixedWord8# e) x s0 of 1321 | (# s1, r #) -> case r of 1322 | (# err | #) -> (# s1, (# err | #) #) 1323 | (# | (# a, b, c #) #) -> (# s1, (# | (# W8# ( 1324 | Exts.wordToWord8# 1325 | a), b, c #) #) #) 1326 | ) 1327 | {- FOURMOLU_ENABLE -} 1328 | 1329 | hexFixedWord8# :: e -> Parser e s Word# 1330 | {-# NOINLINE hexFixedWord8# #-} 1331 | hexFixedWord8# e = uneffectfulWord# $ \chunk -> 1332 | if length chunk >= 2 1333 | then 1334 | let !w0@(W# n0) = oneHex $ PM.indexByteArray (array chunk) (offset chunk) 1335 | !w1@(W# n1) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 1) 1336 | in if 1337 | | w0 .|. w1 /= maxBound -> 1338 | (# 1339 | | (# 1340 | (n0 `Exts.timesWord#` 16##) 1341 | `Exts.plusWord#` n1 1342 | , unI (offset chunk) +# 2# 1343 | , unI (length chunk) -# 2# 1344 | #) 1345 | #) 1346 | | otherwise -> (# e | #) 1347 | else (# e | #) 1348 | 1349 | {- | Consume a single character that is the lowercase hexadecimal 1350 | encoding of a 4-bit word. Fails if the character is not in the class 1351 | @[a-f0-9]@. 1352 | -} 1353 | hexNibbleLower :: e -> Parser e s Word 1354 | hexNibbleLower e = uneffectful $ \chunk -> case length chunk of 1355 | 0 -> Failure e 1356 | _ -> 1357 | let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8 1358 | in if 1359 | | w >= 48 && w < 58 -> Success (fromIntegral w - 48) (offset chunk + 1) (length chunk - 1) 1360 | | w >= 97 && w < 103 -> Success (fromIntegral w - 87) (offset chunk + 1) (length chunk - 1) 1361 | | otherwise -> Failure e 1362 | 1363 | {- | Consume a single character that is the case-insensitive hexadecimal 1364 | encoding of a 4-bit word. Fails if the character is not in the class 1365 | @[a-fA-F0-9]@. 1366 | -} 1367 | hexNibble :: e -> Parser e s Word 1368 | hexNibble e = uneffectful $ \chunk -> case length chunk of 1369 | 0 -> Failure e 1370 | _ -> 1371 | let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8 1372 | in if 1373 | | w >= 48 && w < 58 -> Success (fromIntegral w - 48) (offset chunk + 1) (length chunk - 1) 1374 | | w >= 65 && w < 71 -> Success (fromIntegral w - 55) (offset chunk + 1) (length chunk - 1) 1375 | | w >= 97 && w < 103 -> Success (fromIntegral w - 87) (offset chunk + 1) (length chunk - 1) 1376 | | otherwise -> Failure e 1377 | 1378 | {- | Consume a single character that is the lowercase hexadecimal 1379 | encoding of a 4-bit word. Returns @Nothing@ without consuming 1380 | the character if it is not in the class @[a-f0-9]@. The parser 1381 | never fails. 1382 | -} 1383 | tryHexNibbleLower :: Parser e s (Maybe Word) 1384 | tryHexNibbleLower = unfailing $ \chunk -> case length chunk of 1385 | 0 -> InternalStep Nothing (offset chunk) (length chunk) 1386 | _ -> 1387 | let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8 1388 | in if 1389 | | w >= 48 && w < 58 -> InternalStep (Just (fromIntegral w - 48)) (offset chunk + 1) (length chunk - 1) 1390 | | w >= 97 && w < 103 -> InternalStep (Just (fromIntegral w - 87)) (offset chunk + 1) (length chunk - 1) 1391 | | otherwise -> InternalStep Nothing (offset chunk) (length chunk) 1392 | 1393 | {- | Consume a single character that is the case-insensitive hexadecimal 1394 | encoding of a 4-bit word. Returns @Nothing@ without consuming 1395 | the character if it is not in the class @[a-fA-F0-9]@. This parser 1396 | never fails. 1397 | -} 1398 | tryHexNibble :: Parser e s (Maybe Word) 1399 | tryHexNibble = unfailing $ \chunk -> case length chunk of 1400 | 0 -> InternalStep Nothing (offset chunk) (length chunk) 1401 | _ -> 1402 | let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8 1403 | in if 1404 | | w >= 48 && w < 58 -> InternalStep (Just (fromIntegral w - 48)) (offset chunk + 1) (length chunk - 1) 1405 | | w >= 65 && w < 71 -> InternalStep (Just (fromIntegral w - 55)) (offset chunk + 1) (length chunk - 1) 1406 | | w >= 97 && w < 103 -> InternalStep (Just (fromIntegral w - 87)) (offset chunk + 1) (length chunk - 1) 1407 | | otherwise -> InternalStep Nothing (offset chunk) (length chunk) 1408 | 1409 | -- Returns the maximum machine word if the argument is not 1410 | -- the ASCII encoding of a hexadecimal digit. 1411 | oneHex :: Word8 -> Word 1412 | {-# INLINE oneHex #-} 1413 | oneHex w 1414 | | w >= 48 && w < 58 = (fromIntegral w - 48) 1415 | | w >= 65 && w < 71 = (fromIntegral w - 55) 1416 | | w >= 97 && w < 103 = (fromIntegral w - 87) 1417 | | otherwise = maxBound 1418 | 1419 | oneHexMaybe :: Word8 -> Maybe Word 1420 | {-# INLINE oneHexMaybe #-} 1421 | oneHexMaybe w 1422 | | w >= 48 && w < 58 = Just (fromIntegral w - 48) 1423 | | w >= 65 && w < 71 = Just (fromIntegral w - 55) 1424 | | w >= 97 && w < 103 = Just (fromIntegral w - 87) 1425 | | otherwise = Nothing 1426 | 1427 | uneffectfulWord# :: (Bytes -> Result# e Word#) -> Parser e s Word# 1428 | {-# INLINE uneffectfulWord# #-} 1429 | uneffectfulWord# f = 1430 | Parser 1431 | (\b s0 -> (# s0, (f (boxBytes b)) #)) 1432 | 1433 | -- Precondition: the arguments are non-negative. Boolean is 1434 | -- true when overflow happens. Performs: a * 10 + b 1435 | -- Postcondition: when overflow is false, the resulting 1436 | -- word is less than or equal to the upper bound 1437 | positivePushBase10 :: Word -> Word -> Word -> (Bool, Word) 1438 | {-# INLINE positivePushBase10 #-} 1439 | positivePushBase10 (W# a) (W# b) (W# upper) = 1440 | let !(# ca, r0 #) = Exts.timesWord2# a 10## 1441 | !r1 = Exts.plusWord# r0 b 1442 | !cb = int2Word# (gtWord# r1 upper) 1443 | !cc = int2Word# (ltWord# r1 0##) 1444 | !c = ca `or#` cb `or#` cc 1445 | in (case c of 0## -> False; _ -> True, W# r1) 1446 | 1447 | unsignedPushBase10 :: Word -> Word -> (Bool, Word) 1448 | {-# INLINE unsignedPushBase10 #-} 1449 | unsignedPushBase10 (W# a) (W# b) = 1450 | let !(# ca, r0 #) = Exts.timesWord2# a 10## 1451 | !r1 = Exts.plusWord# r0 b 1452 | !cb = int2Word# (ltWord# r1 r0) 1453 | !c = ca `or#` cb 1454 | in (case c of 0## -> False; _ -> True, W# r1) 1455 | 1456 | -- | Skip while the predicate is matched. This is always inlined. 1457 | skipWhile :: (Char -> Bool) -> Parser e s () 1458 | {-# INLINE skipWhile #-} 1459 | skipWhile f = go 1460 | where 1461 | go = 1462 | isEndOfInput >>= \case 1463 | True -> pure () 1464 | False -> do 1465 | w <- anyUnsafe 1466 | if f w 1467 | then go 1468 | else unconsume 1 1469 | 1470 | -- Interpret the next byte as an Latin1-encoded character. 1471 | -- Does not check to see if any characters are left. This 1472 | -- is not exported. 1473 | anyUnsafe :: Parser e s Char 1474 | {-# INLINE anyUnsafe #-} 1475 | anyUnsafe = uneffectful $ \chunk -> 1476 | let w = indexCharArray (array chunk) (offset chunk) :: Char 1477 | in Success w (offset chunk + 1) (length chunk - 1) 1478 | 1479 | -- Reads one byte and interprets it as Latin1-encoded character. 1480 | indexCharArray :: PM.ByteArray -> Int -> Char 1481 | {-# INLINE indexCharArray #-} 1482 | indexCharArray (PM.ByteArray x) (I# i) = C# (indexCharArray# x i) 1483 | 1484 | {- | Match any character, to perform lookahead. Returns 'Nothing' if 1485 | end of input has been reached. Does not consume any input. 1486 | 1487 | /Note/: Because this parser does not fail, do not use it 1488 | with combinators such as 'many', because such as 'many', 1489 | because such parsers loop until a failure occurs. Careless 1490 | use will thus result in an infinite loop. 1491 | -} 1492 | peek :: Parser e s (Maybe Char) 1493 | {-# INLINE peek #-} 1494 | peek = uneffectful $ \(Bytes arr off len) -> 1495 | let v = 1496 | if len > 0 1497 | then Just (indexCharArray arr off) 1498 | else Nothing 1499 | in Success v off len 1500 | 1501 | {- | Match any byte, to perform lookahead. Does not consume any 1502 | input, but will fail if end of input has been reached. 1503 | -} 1504 | peek' :: e -> Parser e s Char 1505 | {-# INLINE peek' #-} 1506 | peek' e = uneffectful $ \(Bytes arr off len) -> 1507 | if len > 0 1508 | then Success (indexCharArray arr off) off len 1509 | else Failure e 1510 | --------------------------------------------------------------------------------