├── .github
    ├── CODEOWNERS
    └── workflows
    │   ├── release.yaml
    │   └── build.yaml
├── .gitignore
├── sample
    └── TakeLetter.hs
├── bench
    └── Main.hs
├── src
    └── Data
    │   └── Bytes
    │       ├── Parser
    │           ├── Base128.hs
    │           ├── Types.hs
    │           ├── Unsafe.hs
    │           ├── Leb128.hs
    │           ├── Utf8.hs
    │           ├── BigEndian.hs
    │           ├── LittleEndian.hs
    │           ├── Ascii.hs
    │           ├── Internal.hs
    │           ├── Rebindable.hs
    │           └── Latin.hs
    │       └── Parser.hs
├── LICENSE
├── fourmolu.yaml
├── bytesmith.cabal
├── CHANGELOG.md
└── test
    └── Main.hs


/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | @byteverse/l3c
2 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
 1 | name: release
 2 | on:
 3 |   push:
 4 |     tags:
 5 |       - "*"
 6 | 
 7 | jobs:
 8 |   call-workflow:
 9 |     uses: byteverse/.github/.github/workflows/release.yaml@main
10 |     secrets: inherit
11 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yaml:
--------------------------------------------------------------------------------
 1 | name: build
 2 | on:
 3 |   pull_request:
 4 |     branches:
 5 |       - "*"
 6 | 
 7 | jobs:
 8 |   call-workflow:
 9 |     uses: byteverse/.github/.github/workflows/build-matrix.yaml@main
10 |     with:
11 |       cabal-file: bytesmith.cabal
12 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .vscode/
 2 | dist
 3 | dist-*
 4 | cabal-dev
 5 | *.o
 6 | *.hi
 7 | *.chi
 8 | *.chs.h
 9 | *.dyn_o
10 | *.dyn_hi
11 | .hpc
12 | .hsenv
13 | .cabal-sandbox/
14 | cabal.sandbox.config
15 | *.prof
16 | *.aux
17 | *.hp
18 | *.eventlog
19 | .stack-work/
20 | cabal.project.local
21 | cabal.project.local~
22 | .HTF/
23 | .ghc.environment.*
24 | stack.yaml
25 | *.swm
26 | *.swo
27 | *.swp
28 | test_results/**
29 | 


--------------------------------------------------------------------------------
/sample/TakeLetter.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE MagicHash #-}
 2 | 
 3 | -- Build with:
 4 | --   cabal build --write-ghc-environment-files=always
 5 | --   ghc -fforce-recomp -O2 -ddump-simpl -dsuppress-all -ddump-to-file sample/TakeLetter.hs
 6 | -- to examine GHC optimizations.
 7 | module TakeLetter
 8 |   ( takeLetter
 9 |   ) where
10 | 
11 | import Data.Bytes.Parser (Parser)
12 | import Data.Bytes.Parser.Ascii (takeShortWhile)
13 | import Data.Text.Short (ShortText)
14 | import GHC.Exts
15 | 
16 | takeLetter :: Parser e s ShortText
17 | {-# NOINLINE takeLetter #-}
18 | takeLetter = takeShortWhile (== 'A')
19 | 


--------------------------------------------------------------------------------
/bench/Main.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE BangPatterns #-}
 2 | {-# LANGUAGE TypeApplications #-}
 3 | 
 4 | import Data.Char (ord)
 5 | import Data.Primitive (ByteArray)
 6 | import Data.Word (Word8)
 7 | import Gauge.Main (bench, defaultMain, whnf)
 8 | 
 9 | import qualified Data.Bytes.Parser as P
10 | import qualified Data.Bytes.Parser.Latin as Latin
11 | import qualified GHC.Exts as Exts
12 | 
13 | main :: IO ()
14 | main =
15 |   defaultMain
16 |     [ bench "decPositiveInteger" $
17 |         whnf
18 |           (\x -> P.parseByteArray (Latin.decUnsignedInteger ()) x)
19 |           encodedBigNumber
20 |     ]
21 | 
22 | encodedBigNumber :: ByteArray
23 | encodedBigNumber =
24 |   stringToByteArray $
25 |     show $
26 |       id @Integer $
27 |         246246357264327645234627753190240202405243024304504230544
28 |           * 732345623640035232405249305932503920593209520932095234651
29 | 
30 | stringToByteArray :: String -> ByteArray
31 | stringToByteArray =
32 |   Exts.fromList . map (fromIntegral @Int @Word8 . ord)
33 | 


--------------------------------------------------------------------------------
/src/Data/Bytes/Parser/Base128.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE BangPatterns #-}
 2 | {-# LANGUAGE TypeApplications #-}
 3 | 
 4 | module Data.Bytes.Parser.Base128
 5 |   ( -- * Unsigned
 6 |     word16
 7 |   , word32
 8 |   , word64
 9 |   ) where
10 | 
11 | import Control.Monad (when)
12 | import Data.Bits (bit, clearBit, testBit, unsafeShiftL, (.|.))
13 | import Data.Bytes.Parser (Parser)
14 | import Data.Word (Word16, Word32, Word64, Word8)
15 | 
16 | import qualified Data.Bytes.Parser as P
17 | 
18 | word16 :: e -> Parser e s Word16
19 | word16 e = fromIntegral @Word64 @Word16 <$> stepBoundedWord e 16 0
20 | 
21 | word32 :: e -> Parser e s Word32
22 | word32 e = fromIntegral @Word64 @Word32 <$> stepBoundedWord e 32 0
23 | 
24 | word64 :: e -> Parser e s Word64
25 | word64 e = fromIntegral @Word64 @Word64 <$> stepBoundedWord e 64 0
26 | 
27 | stepBoundedWord :: e -> Int -> Word64 -> Parser e s Word64
28 | stepBoundedWord e !bitLimit !acc = do
29 |   when (acc >= bit (bitLimit - 7)) $ P.fail e
30 |   raw <- P.any e
31 |   let content = clearBit raw 7
32 |       acc' = unsafeShiftL acc 7 .|. fromIntegral @Word8 @Word64 content
33 |   if testBit raw 7
34 |     then stepBoundedWord e bitLimit acc'
35 |     else pure acc'
36 | 


--------------------------------------------------------------------------------
/src/Data/Bytes/Parser/Types.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveFoldable #-}
 2 | {-# LANGUAGE DeriveFunctor #-}
 3 | {-# LANGUAGE DerivingStrategies #-}
 4 | 
 5 | module Data.Bytes.Parser.Types
 6 |   ( Parser
 7 |   , Result (..)
 8 |   , Slice (..)
 9 |   ) where
10 | 
11 | import Data.Bytes.Parser.Internal (Parser (..))
12 | 
13 | -- | The result of running a parser.
14 | data Result e a
15 |   = -- | An error message indicating what went wrong.
16 |     Failure e
17 |   | -- | The parsed value and the number of bytes
18 |     -- remaining in parsed slice.
19 |     Success {-# UNPACK #-} !(Slice a)
20 |   deriving stock (Eq, Show, Foldable, Functor)
21 | 
22 | {- | Slicing metadata (an offset and a length) accompanied
23 | by a value. This does not represent a slice into the
24 | value. This type is intended to be used as the result
25 | of an executed parser. In this context the slicing metadata
26 | describe a slice into to the array (or byte array) that
27 | from which the value was parsed.
28 | 
29 | It is often useful to check the @length@ when a parser
30 | succeeds since a non-zero length indicates that there
31 | was additional unconsumed input. The @offset@ is only
32 | ever needed to construct a new slice (via @Bytes@ or
33 | @SmallVector@) from the remaining input.
34 | -}
35 | data Slice a = Slice
36 |   { offset :: {-# UNPACK #-} !Int
37 |   -- ^ Offset into the array.
38 |   , length :: {-# UNPACK #-} !Int
39 |   -- ^ Length of the slice.
40 |   , value :: a
41 |   -- ^ The structured data that was successfully parsed.
42 |   }
43 |   deriving stock (Eq, Show, Foldable, Functor)
44 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2019, Andrew Martin
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 | 
11 |     * Redistributions in binary form must reproduce the above
12 |       copyright notice, this list of conditions and the following
13 |       disclaimer in the documentation and/or other materials provided
14 |       with the distribution.
15 | 
16 |     * Neither the name of Andrew Martin nor the names of other
17 |       contributors may be used to endorse or promote products derived
18 |       from this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/fourmolu.yaml:
--------------------------------------------------------------------------------
 1 | # Number of spaces per indentation step
 2 | indentation: 2
 3 | 
 4 | # Max line length for automatic line breaking
 5 | column-limit: 200
 6 | 
 7 | # Styling of arrows in type signatures (choices: trailing, leading, or leading-args)
 8 | function-arrows: trailing
 9 | 
10 | # How to place commas in multi-line lists, records, etc. (choices: leading or trailing)
11 | comma-style: leading
12 | 
13 | # Styling of import/export lists (choices: leading, trailing, or diff-friendly)
14 | import-export-style: leading
15 | 
16 | # Whether to full-indent or half-indent 'where' bindings past the preceding body
17 | indent-wheres: false
18 | 
19 | # Whether to leave a space before an opening record brace
20 | record-brace-space: true
21 | 
22 | # Number of spaces between top-level declarations
23 | newlines-between-decls: 1
24 | 
25 | # How to print Haddock comments (choices: single-line, multi-line, or multi-line-compact)
26 | haddock-style: multi-line
27 | 
28 | # How to print module docstring
29 | haddock-style-module: null
30 | 
31 | # Styling of let blocks (choices: auto, inline, newline, or mixed)
32 | let-style: auto
33 | 
34 | # How to align the 'in' keyword with respect to the 'let' keyword (choices: left-align, right-align, or no-space)
35 | in-style: right-align
36 | 
37 | # Whether to put parentheses around a single constraint (choices: auto, always, or never)
38 | single-constraint-parens: always
39 | 
40 | # Output Unicode syntax (choices: detect, always, or never)
41 | unicode: never
42 | 
43 | # Give the programmer more choice on where to insert blank lines
44 | respectful: true
45 | 
46 | # Fixity information for operators
47 | fixities: []
48 | 
49 | # Module reexports Fourmolu should know about
50 | reexports: []
51 | 
52 | 


--------------------------------------------------------------------------------
/src/Data/Bytes/Parser/Unsafe.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE BinaryLiterals #-}
 2 | {-# LANGUAGE DataKinds #-}
 3 | {-# LANGUAGE DerivingStrategies #-}
 4 | {-# LANGUAGE DuplicateRecordFields #-}
 5 | {-# LANGUAGE GADTSyntax #-}
 6 | {-# LANGUAGE MagicHash #-}
 7 | {-# LANGUAGE NamedFieldPuns #-}
 8 | {-# LANGUAGE PolyKinds #-}
 9 | {-# LANGUAGE RankNTypes #-}
10 | {-# LANGUAGE ScopedTypeVariables #-}
11 | {-# LANGUAGE UnboxedTuples #-}
12 | 
13 | {- | Everything in this module is unsafe and can lead to
14 | nondeterministic output or segfaults if used incorrectly.
15 | -}
16 | module Data.Bytes.Parser.Unsafe
17 |   ( -- * Types
18 |     Parser (..)
19 | 
20 |     -- * Functions
21 |   , cursor
22 |   , cursor#
23 |   , expose
24 |   , unconsume
25 |   , jump
26 |   , uneffectful
27 |   ) where
28 | 
29 | import Prelude hiding (length)
30 | 
31 | import Data.Bytes.Parser.Internal (Parser (..), Result (..), uneffectful, uneffectfulInt#)
32 | import Data.Bytes.Types (Bytes (..))
33 | import Data.Primitive (ByteArray)
34 | import GHC.Exts (Int (I#), Int#)
35 | 
36 | {- | Get the current offset into the chunk. Using this makes
37 | it possible to observe the internal difference between 'Bytes'
38 | that refer to equivalent slices. Be careful.
39 | -}
40 | cursor :: Parser e s Int
41 | cursor = uneffectful $ \Bytes {offset, length} ->
42 |   Success offset offset length
43 | 
44 | -- | Variant of 'cursor' with unboxed result.
45 | cursor# :: Parser e s Int#
46 | cursor# = uneffectfulInt# $ \Bytes {offset = I# off, length = I# len} -> (# | (# off, off, len #) #)
47 | 
48 | {- | Return the byte array being parsed. This includes bytes
49 | that preceed the current offset and may include bytes that
50 | go beyond the length. This is somewhat dangerous, so only
51 | use this is you know what you're doing.
52 | -}
53 | expose :: Parser e s ByteArray
54 | expose = uneffectful $ \Bytes {length, offset, array} ->
55 |   Success array offset length
56 | 
57 | {- | Move the cursor back by @n@ bytes. Precondition: you
58 | must have previously consumed at least @n@ bytes.
59 | -}
60 | unconsume :: Int -> Parser e s ()
61 | unconsume n = uneffectful $ \Bytes {length, offset} ->
62 |   Success () (offset - n) (length + n)
63 | 
64 | {- | Set the position to the given index. Precondition: the index
65 | must be valid. It should be the result of an earlier call to
66 | 'cursor'.
67 | -}
68 | jump :: Int -> Parser e s ()
69 | jump ix = uneffectful $ \(Bytes {length, offset}) ->
70 |   Success () ix (length + (offset - ix))
71 | 


--------------------------------------------------------------------------------
/bytesmith.cabal:
--------------------------------------------------------------------------------
 1 | cabal-version:   3.0
 2 | name:            bytesmith
 3 | version:         0.3.14.0
 4 | synopsis:        Nonresumable byte parser
 5 | description:
 6 |   Parse bytes as fast as possible. This is a nonresumable parser
 7 |   that aggresively uses `UnboxedSums` to avoid performing any
 8 |   allocations.
 9 | 
10 | homepage:        https://github.com/byteverse/bytesmith
11 | bug-reports:     https://github.com/byteverse/bytesmith/issues
12 | license:         BSD-3-Clause
13 | license-file:    LICENSE
14 | author:          Andrew Martin
15 | maintainer:      amartin@layer3com.com
16 | copyright:       2019 Andrew Martin
17 | category:        Data
18 | extra-doc-files: CHANGELOG.md
19 | tested-with:     GHC ==9.6.3 || ==9.8.1
20 | 
21 | common build-settings
22 |   default-language: Haskell2010
23 |   ghc-options:      -Wall -Wunused-packages
24 | 
25 | library
26 |   import:          build-settings
27 |   exposed-modules:
28 |     Data.Bytes.Parser
29 |     Data.Bytes.Parser.Ascii
30 |     Data.Bytes.Parser.Base128
31 |     Data.Bytes.Parser.BigEndian
32 |     Data.Bytes.Parser.Latin
33 |     Data.Bytes.Parser.Leb128
34 |     Data.Bytes.Parser.LittleEndian
35 |     Data.Bytes.Parser.Rebindable
36 |     Data.Bytes.Parser.Unsafe
37 |     Data.Bytes.Parser.Utf8
38 | 
39 |   other-modules:
40 |     Data.Bytes.Parser.Internal
41 |     Data.Bytes.Parser.Types
42 | 
43 |   build-depends:
44 |     , base                >=4.18    && <5
45 |     , byteslice           >=0.2.6   && <0.3
46 |     , bytestring          >=0.10.8  && <0.13
47 |     , contiguous          >=0.6     && <0.7
48 |     , natural-arithmetic  >=0.1.3
49 |     , primitive           >=0.7     && <0.10
50 |     , text-short          >=0.1.3   && <0.2
51 |     , text                >=0.2.1
52 |     , wide-word           >=0.1.0.9 && <0.2
53 | 
54 |   hs-source-dirs:  src
55 |   ghc-options:     -O2
56 | 
57 | test-suite test
58 |   import:         build-settings
59 |   type:           exitcode-stdio-1.0
60 |   hs-source-dirs: test
61 |   main-is:        Main.hs
62 |   build-depends:
63 |     , base              >=4.12.0.0 && <5
64 |     , byte-order
65 |     , byteslice
66 |     , bytesmith
67 |     , primitive
68 |     , tasty
69 |     , tasty-hunit
70 |     , tasty-quickcheck
71 |     , text-short
72 |     , wide-word
73 | 
74 | benchmark bench
75 |   import:         build-settings
76 |   type:           exitcode-stdio-1.0
77 |   build-depends:
78 |     , base
79 |     , bytesmith
80 |     , gauge
81 |     , primitive
82 | 
83 |   ghc-options:    -O2
84 |   hs-source-dirs: bench
85 |   main-is:        Main.hs
86 | 
87 | source-repository head
88 |   type:     git
89 |   location: git://github.com/byteverse/bytesmith.git
90 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Revision history for bytesmith
  2 | 
  3 | ## 0.3.14.0 -- 2025-11-17
  4 | 
  5 | * Add `take` for ASCII parser.
  6 | 
  7 | ## 0.3.13.0 -- 2025-07-11
  8 | 
  9 | * Add `hexFixedWord16#`
 10 | * Drop support for base < 4.18, clean up CPP
 11 | * Add more levity-monomorphized variants of bind, pure, and fail
 12 | 
 13 | ## 0.3.11.1 -- 2024-02-28
 14 | 
 15 | * Update package metadata.
 16 | * Remove dependency on `run-st` library.
 17 | 
 18 | ## 0.3.11.0 -- 2024-01-05
 19 | 
 20 | * Add `Data.Bytes.Parser.Latin.hexWord32`.
 21 | * Add `Data.Bytes.Parser.Latin.hexFixedWord(128|256)`.
 22 | * Add `Data.Bytes.Parser.takeN`.
 23 | 
 24 | ## 0.3.10.0 -- 2023-07-25
 25 | 
 26 | * Add `mapErrorEffectfully`.
 27 | * Correct the implementation of `satisfy`.
 28 | * Add `takeUpTo`.
 29 | 
 30 | ## 0.3.9.1 -- 2022-12-06
 31 | 
 32 | * Build with GHC 9.4.
 33 | 
 34 | ## 0.3.9.0 -- 2022-07-14
 35 | 
 36 | * Build with GHC 9.2.3.
 37 | 
 38 | ## 0.3.8.0 -- 2021-10-11
 39 | 
 40 | * Add `peek` and `peek'` to `Data.Bytes.Parser.Latin`.
 41 | * Add inline pragmas to most functions to prevent cost centers.
 42 | * Add support for WordRep-to-LiftedRep in Rebindable module.
 43 | * Allow building with newer Contiguous.
 44 | * Export `uneffectful`.
 45 | 
 46 | ## 0.3.7.0 -- 2020-07-27
 47 | 
 48 | * Add `Data.Bytes.Parser.Base128` module for Base-128 encoding.
 49 | * Add `Data.Bytes.Parser.Leb128` module for LEB-128 encoding.
 50 |   Supports signed integers with zig-zag encoding.
 51 | * Add `skipWhile` to `Data.Bytes.Parser.Latin`.
 52 | * Reexport `endOfInput` and `isEndOfInput` from `Latin`.
 53 | * Add `charInsensitive` to ASCII module.
 54 | * Correct implementation of `peek` and `peek'`.
 55 | 
 56 | ## 0.3.6.0 -- 2020-03-04
 57 | 
 58 | * Add `char12`
 59 | * Add `skipTrailedBy2`, `skipTrailedBy3`, and variants
 60 |   with an unboxed result.
 61 | * Add `cstring`
 62 | * Add `peekRemaining`
 63 | * Add `measure_` and `measure_#`, variants of `measure`
 64 |   that only give the byte count.
 65 | * Add `Data.Bytes.Parser.Rebindable`, the ultimate hack.
 66 | * Add `Data.Bytes.Latin.takeTrailedBy`
 67 | 
 68 | ## 0.3.5.0 -- 2020-02-10
 69 | 
 70 | * Add big-endian and little-endian `word256` and `word256Array` parsers.
 71 | * Add `hexFixedWord64`.
 72 | 
 73 | ## 0.3.4.0 -- 2020-02-03
 74 | 
 75 | * Add `hexFixedWord32`.
 76 | 
 77 | ## 0.3.3.0 -- 2020-01-22
 78 | 
 79 | * Add `hexWord8`, `hexWord16`, and `hexFixedWord8`.
 80 | 
 81 | ## 0.3.2.0 -- 2019-12-27
 82 | 
 83 | * Add `parseBytesEither` and `parseBytesMaybe`.
 84 | * Add common idioms from other parser libaries. This includes: `satisfy`,
 85 |   `satisfyWith`, `scan`, `peek`, and `peek'`.
 86 | 
 87 | ## 0.3.1.0 -- 2019-11-12
 88 | 
 89 | * Add big-endian and little-endian parsers for `Word128`.
 90 | * Add a module for little-endian word parsers. This compliments the
 91 |   existing big-endian module.
 92 | * Add functions for parsing arrays of big/little endian words of
 93 |   various sizes.
 94 | * Add `skipUntil` to `Latin`.
 95 | * Add `char5`, `char6`, `char7`, `char8`, `char9`, `char10`, and
 96 |   `char11` to `Latin`.
 97 | * Correct the implementation of `takeTrailedBy`.
 98 | 
 99 | ## 0.3.0.0 -- 2019-09-30
100 | 
101 | * Include the offset into the byte sequence in `Result`. Breaking change.
102 | * Rename `hexWord16` to `hexFixedWord16`. Breaking change.
103 | * Rename `parseBytesST` to `parseBytesEffectfully`. Breaking change.
104 | * Add `hexNibbleLower` and `tryHexNibbleLower`.
105 | * Add `hexNibble` and `tryHexNibble`.
106 | 
107 | ## 0.2.0.1 -- 2019-09-24
108 | 
109 | * Correct an overflow-detection mistake in the implementation
110 |   of machine-word parsers.
111 | 
112 | ## 0.2.0.0 -- 2019-09-24
113 | 
114 | * Add big-endian word parsers.
115 | * Redo module structure so that encoding-specific functions each
116 |   live in their own module.
117 | * Add a lot more functions and attempt to make naming somewhat
118 |   consistent.
119 | * Add `delimit`.
120 | * Add `replicate`.
121 | * Add `annotate` and its infix synonym `<?>`.
122 | 
123 | ## 0.1.0.0 -- 2019-08-22
124 | 
125 | * First version.
126 | 


--------------------------------------------------------------------------------
/src/Data/Bytes/Parser/Leb128.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns #-}
  2 | {-# LANGUAGE BinaryLiterals #-}
  3 | {-# LANGUAGE TypeApplications #-}
  4 | 
  5 | {- | Parse numbers that have been encoded with <https://en.wikipedia.org/wiki/LEB128 LEB-128>.
  6 | LEB-128 allows arbitrarily large numbers to be encoded. Parsers in this
  7 | module will fail if the number they attempt to parse is outside the
  8 | range of what their target type can handle. The parsers for signed
  9 | numbers assume that the numbers have been
 10 | <https://developers.google.com/protocol-buffers/docs/encoding zigzig encoded>.
 11 | -}
 12 | module Data.Bytes.Parser.Leb128
 13 |   ( -- * Unsigned
 14 |     word16
 15 |   , word32
 16 |   , word64
 17 | 
 18 |     -- * Signed (Zig-zag)
 19 |   , int16
 20 |   , int32
 21 |   , int64
 22 |   ) where
 23 | 
 24 | import Data.Bits (complement, testBit, unsafeShiftL, unsafeShiftR, xor, (.&.), (.|.))
 25 | import Data.Bytes.Parser (Parser)
 26 | import Data.Int (Int16, Int32, Int64)
 27 | import Data.Word (Word16, Word32, Word64, Word8)
 28 | 
 29 | import qualified Data.Bytes.Parser as P
 30 | 
 31 | {- | Parse a LEB-128-encoded number. If the number is larger
 32 | than @0xFFFF@, fails with the provided error.
 33 | -}
 34 | word16 :: e -> Parser e s Word16
 35 | word16 e = do
 36 |   w <- stepBoundedWord e 16 0 0
 37 |   pure (fromIntegral @Word64 @Word16 w)
 38 | 
 39 | {- | Parse a LEB-128-encoded number. If the number is larger
 40 | than @0xFFFFFFFF@, fails with the provided error.
 41 | -}
 42 | word32 :: e -> Parser e s Word32
 43 | word32 e = do
 44 |   w <- stepBoundedWord e 32 0 0
 45 |   pure (fromIntegral @Word64 @Word32 w)
 46 | 
 47 | {- | Parse a LEB-128-encoded number. If the number is larger
 48 | than @0xFFFFFFFFFFFFFFFF@, fails with the provided error.
 49 | -}
 50 | word64 :: e -> Parser e s Word64
 51 | word64 e = stepBoundedWord e 64 0 0
 52 | 
 53 | {- | Parse a LEB-128-zigzag-encoded signed number. If the encoded
 54 | number is outside the range @[-32768,32767]@, this fails with
 55 | the provided error.
 56 | -}
 57 | int16 :: e -> Parser e s Int16
 58 | int16 = fmap zigzagDecode16 . word16
 59 | 
 60 | {- | Parse a LEB-128-zigzag-encoded signed number. If the encoded
 61 | number is outside the range @[-2147483648,2147483647]@, this
 62 | fails with the provided error.
 63 | -}
 64 | int32 :: e -> Parser e s Int32
 65 | int32 = fmap zigzagDecode32 . word32
 66 | 
 67 | {- | Parse a LEB-128-zigzag-encoded signed number. If the encoded
 68 | number is outside the range @[-9223372036854775808,9223372036854775807]@,
 69 | this fails with the provided error.
 70 | -}
 71 | int64 :: e -> Parser e s Int64
 72 | int64 = fmap zigzagDecode64 . word64
 73 | 
 74 | -- What these parameters are:
 75 | --
 76 | -- bitLimit: number of bits in the target word size
 77 | -- accShift: shift amount, increases by 7 at a time
 78 | stepBoundedWord :: e -> Int -> Word64 -> Int -> Parser e s Word64
 79 | stepBoundedWord e !bitLimit !acc0 !accShift = do
 80 |   raw <- P.any e
 81 |   let number = raw .&. 0x7F
 82 |       acc1 =
 83 |         acc0
 84 |           .|. unsafeShiftL (fromIntegral @Word8 @Word64 number) accShift
 85 |       accShift' = accShift + 7
 86 |   if accShift' <= bitLimit
 87 |     then
 88 |       if testBit raw 7
 89 |         then stepBoundedWord e bitLimit acc1 accShift'
 90 |         else pure acc1
 91 |     else
 92 |       if fromIntegral @Word8 @Word raw < twoExp (bitLimit - accShift)
 93 |         then pure acc1 -- TODO: no need to mask upper bit in number
 94 |         else P.fail e
 95 | 
 96 | twoExp :: Int -> Word
 97 | twoExp x = unsafeShiftL 1 x
 98 | 
 99 | -- Zigzag decode strategy taken from https://stackoverflow.com/a/2211086/1405768
100 | -- The accepted answer is a little bit, so an answer further down was used:
101 | --
102 | -- > zigzag_decode(value) = ( value >> 1 ) ^ ( ~( value & 1 ) + 1 )
103 | zigzagDecode16 :: Word16 -> Int16
104 | zigzagDecode16 n =
105 |   fromIntegral ((unsafeShiftR n 1) `xor` (complement (n .&. 1) + 1))
106 | 
107 | zigzagDecode32 :: Word32 -> Int32
108 | zigzagDecode32 n =
109 |   fromIntegral ((unsafeShiftR n 1) `xor` (complement (n .&. 1) + 1))
110 | 
111 | zigzagDecode64 :: Word64 -> Int64
112 | zigzagDecode64 n =
113 |   fromIntegral ((unsafeShiftR n 1) `xor` (complement (n .&. 1) + 1))
114 | 


--------------------------------------------------------------------------------
/src/Data/Bytes/Parser/Utf8.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns #-}
  2 | {-# LANGUAGE BinaryLiterals #-}
  3 | {-# LANGUAGE CPP #-}
  4 | {-# LANGUAGE DataKinds #-}
  5 | {-# LANGUAGE DerivingStrategies #-}
  6 | {-# LANGUAGE GADTSyntax #-}
  7 | {-# LANGUAGE MagicHash #-}
  8 | {-# LANGUAGE MultiWayIf #-}
  9 | {-# LANGUAGE PolyKinds #-}
 10 | {-# LANGUAGE RankNTypes #-}
 11 | {-# LANGUAGE ScopedTypeVariables #-}
 12 | {-# LANGUAGE TypeApplications #-}
 13 | {-# LANGUAGE UnboxedTuples #-}
 14 | 
 15 | {- | Parse input as UTF-8-encoded text. Parsers in this module will
 16 | fail if they encounter a byte above @0x7F@.
 17 | -}
 18 | module Data.Bytes.Parser.Utf8
 19 |   ( -- * Get Character
 20 |     any#
 21 |   , shortText
 22 |   ) where
 23 | 
 24 | import Prelude hiding (any, fail, length, takeWhile)
 25 | 
 26 | import Data.Bits (unsafeShiftL, xor, (.&.), (.|.))
 27 | import Data.Bytes.Parser.Internal (Parser (..))
 28 | import Data.Text.Short (ShortText)
 29 | import GHC.Exts (Char (C#), Char#, Int (I#), Int#, chr#, (+#), (-#), (>#))
 30 | import GHC.Word (Word8 (W8#))
 31 | 
 32 | import qualified Data.ByteString.Short.Internal as BSS
 33 | import qualified Data.Bytes.Parser as Parser
 34 | import qualified Data.Primitive as PM
 35 | import qualified Data.Text.Short as TS
 36 | import qualified GHC.Exts as Exts
 37 | 
 38 | {- FOURMOLU_DISABLE -}
 39 | -- | Interpret the next one to four bytes as a UTF-8-encoded character.
 40 | -- Fails if the decoded codepoint is in the range U+D800 through U+DFFF.
 41 | any# :: e -> Parser e s Char#
 42 | {-# noinline any# #-}
 43 | any# e = Parser
 44 |   (\(# arr, off, len #) s0 -> case len ># 0# of
 45 |     1# ->
 46 |       let !w0 = Exts.indexWord8Array# arr off
 47 |        in if | oneByteChar (W8# w0) ->
 48 |                  (# s0, (# | (# chr# (Exts.word2Int# (
 49 |                  Exts.word8ToWord#
 50 |                  w0)), off +# 1#, len -# 1# #) #) #)
 51 |              | twoByteChar (W8# w0) ->
 52 |                  if | I# len > 1
 53 |                     , w1 <- Exts.indexWord8Array# arr (off +# 1#)
 54 |                     , followingByte (W8# w1)
 55 |                     , C# c <- codepointFromTwoBytes (W8# w0) (W8# w1)
 56 |                       -> (# s0, (# | (# c, off +# 2#, len -# 2# #) #) #)
 57 |                     | otherwise -> (# s0, (# e | #) #)
 58 |              | threeByteChar (W8# w0) ->
 59 |                  if | I# len > 2
 60 |                     , w1 <- Exts.indexWord8Array# arr (off +# 1# )
 61 |                     , w2 <- Exts.indexWord8Array# arr (off +# 2# )
 62 |                     , followingByte (W8# w1)
 63 |                     , !c@(C# c#) <- codepointFromThreeBytes (W8# w0) (W8# w1) (W8# w2)
 64 |                     , c < '\xD800' || c > '\xDFFF'
 65 |                       -> (# s0, (# | (# c#, off +# 3#, len -# 3# #) #) #)
 66 |                     | otherwise -> (# s0, (# e | #) #)
 67 |              | fourByteChar (W8# w0) ->
 68 |                  if | I# len > 3
 69 |                     , w1 <- Exts.indexWord8Array# arr (off +# 1# )
 70 |                     , w2 <- Exts.indexWord8Array# arr (off +# 2# )
 71 |                     , w3 <- Exts.indexWord8Array# arr (off +# 3# )
 72 |                     , followingByte (W8# w1)
 73 |                     , !(C# c#) <- codepointFromFourBytes (W8# w0) (W8# w1) (W8# w2) (W8# w3)
 74 |                       -> (# s0, (# | (# c#, off +# 4#, len -# 4# #) #) #)
 75 |                     | otherwise -> (# s0, (# e | #) #)
 76 |              | otherwise -> (# s0, (# e | #) #)
 77 |     _ -> (# s0, (# e | #) #)
 78 |   )
 79 | {- FOURMOLU_ENABLE -}
 80 | 
 81 | codepointFromFourBytes :: Word8 -> Word8 -> Word8 -> Word8 -> Char
 82 | codepointFromFourBytes w1 w2 w3 w4 =
 83 |   C#
 84 |     ( chr#
 85 |         ( unI $
 86 |             fromIntegral
 87 |               ( unsafeShiftL (word8ToWord w1 .&. 0b00001111) 18
 88 |                   .|. unsafeShiftL (word8ToWord w2 .&. 0b00111111) 12
 89 |                   .|. unsafeShiftL (word8ToWord w3 .&. 0b00111111) 6
 90 |                   .|. (word8ToWord w4 .&. 0b00111111)
 91 |               )
 92 |         )
 93 |     )
 94 | 
 95 | codepointFromThreeBytes :: Word8 -> Word8 -> Word8 -> Char
 96 | codepointFromThreeBytes w1 w2 w3 =
 97 |   C#
 98 |     ( chr#
 99 |         ( unI $
100 |             fromIntegral
101 |               ( unsafeShiftL (word8ToWord w1 .&. 0b00001111) 12
102 |                   .|. unsafeShiftL (word8ToWord w2 .&. 0b00111111) 6
103 |                   .|. (word8ToWord w3 .&. 0b00111111)
104 |               )
105 |         )
106 |     )
107 | 
108 | codepointFromTwoBytes :: Word8 -> Word8 -> Char
109 | codepointFromTwoBytes w1 w2 =
110 |   C#
111 |     ( chr#
112 |         ( unI $
113 |             fromIntegral @Word @Int
114 |               ( unsafeShiftL (word8ToWord w1 .&. 0b00011111) 6
115 |                   .|. (word8ToWord w2 .&. 0b00111111)
116 |               )
117 |         )
118 |     )
119 | 
120 | oneByteChar :: Word8 -> Bool
121 | oneByteChar !w = w .&. 0b10000000 == 0
122 | 
123 | twoByteChar :: Word8 -> Bool
124 | twoByteChar !w = w .&. 0b11100000 == 0b11000000
125 | 
126 | threeByteChar :: Word8 -> Bool
127 | threeByteChar !w = w .&. 0b11110000 == 0b11100000
128 | 
129 | fourByteChar :: Word8 -> Bool
130 | fourByteChar !w = w .&. 0b11111000 == 0b11110000
131 | 
132 | followingByte :: Word8 -> Bool
133 | followingByte !w = xor w 0b01000000 .&. 0b11000000 == 0b11000000
134 | 
135 | word8ToWord :: Word8 -> Word
136 | word8ToWord = fromIntegral
137 | 
138 | unI :: Int -> Int#
139 | unI (I# w) = w
140 | 
141 | {- | Consume input that matches the argument. Fails if the
142 | input does not match.
143 | -}
144 | shortText :: e -> ShortText -> Parser e s ()
145 | shortText e !t =
146 |   Parser.byteArray
147 |     e
148 |     (shortByteStringToByteArray (TS.toShortByteString t))
149 | 
150 | shortByteStringToByteArray ::
151 |   BSS.ShortByteString ->
152 |   PM.ByteArray
153 | shortByteStringToByteArray (BSS.SBS x) = PM.ByteArray x
154 | 


--------------------------------------------------------------------------------
/src/Data/Bytes/Parser/BigEndian.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns #-}
  2 | {-# LANGUAGE BinaryLiterals #-}
  3 | {-# LANGUAGE CPP #-}
  4 | {-# LANGUAGE DataKinds #-}
  5 | {-# LANGUAGE DerivingStrategies #-}
  6 | {-# LANGUAGE GADTSyntax #-}
  7 | {-# LANGUAGE PolyKinds #-}
  8 | {-# LANGUAGE RankNTypes #-}
  9 | {-# LANGUAGE ScopedTypeVariables #-}
 10 | {-# LANGUAGE TypeApplications #-}
 11 | {-# LANGUAGE UnboxedSums #-}
 12 | 
 13 | -- | Big-endian fixed-width numbers.
 14 | module Data.Bytes.Parser.BigEndian
 15 |   ( -- * Unsigned
 16 |     word8
 17 |   , word16
 18 |   , word32
 19 |   , word64
 20 |   , word128
 21 |   , word256
 22 | 
 23 |     -- * Signed
 24 |   , int8
 25 |   , int16
 26 |   , int32
 27 |   , int64
 28 | 
 29 |     -- * Many
 30 | 
 31 |     -- ** Unsigned
 32 |   , word16Array
 33 |   , word32Array
 34 |   , word64Array
 35 |   , word128Array
 36 |   , word256Array
 37 |   ) where
 38 | 
 39 | import Prelude hiding (any, fail, length, takeWhile)
 40 | 
 41 | import Data.Bits (unsafeShiftL, (.|.))
 42 | import Data.Bytes.Parser.Internal (Parser, Result (..), swapArray128, swapArray16, swapArray256, swapArray32, swapArray64, uneffectful)
 43 | import Data.Bytes.Types (Bytes (..))
 44 | import Data.Int (Int16, Int32, Int64, Int8)
 45 | import Data.Primitive (ByteArray (..), PrimArray (..))
 46 | import Data.WideWord (Word128 (Word128), Word256 (Word256))
 47 | import Data.Word (Word16, Word32, Word64, Word8)
 48 | import GHC.ByteOrder (ByteOrder (BigEndian, LittleEndian), targetByteOrder)
 49 | 
 50 | import qualified Data.Bytes as Bytes
 51 | import qualified Data.Bytes.Parser as P
 52 | import qualified Data.Primitive as PM
 53 | 
 54 | -- | Unsigned 8-bit word.
 55 | word8 :: e -> Parser e s Word8
 56 | word8 = P.any
 57 | 
 58 | {- | Parse an array of big-endian unsigned 16-bit words. If the host is
 59 | big-endian, the implementation is optimized to simply @memcpy@ bytes
 60 | into the result array. The result array always has elements in
 61 | native-endian byte order.
 62 | -}
 63 | word16Array ::
 64 |   -- | Error message if not enough bytes are present
 65 |   e ->
 66 |   -- | Number of big-endian 16-bit words to expect
 67 |   Int ->
 68 |   -- | Native-endian elements
 69 |   Parser e s (PrimArray Word16)
 70 | word16Array e !n = case targetByteOrder of
 71 |   BigEndian -> fmap (asWord16s . Bytes.toByteArrayClone) (P.take e (n * 2))
 72 |   LittleEndian -> do
 73 |     bs <- P.take e (n * 2)
 74 |     let r = swapArray16 bs
 75 |     pure (asWord16s r)
 76 | 
 77 | -- | Parse an array of big-endian unsigned 32-bit words.
 78 | word32Array ::
 79 |   -- | Error message if not enough bytes are present
 80 |   e ->
 81 |   -- | Number of big-endian 32-bit words to expect
 82 |   Int ->
 83 |   -- | Native-endian elements
 84 |   Parser e s (PrimArray Word32)
 85 | word32Array e !n = case targetByteOrder of
 86 |   BigEndian -> fmap (asWord32s . Bytes.toByteArrayClone) (P.take e (n * 4))
 87 |   LittleEndian -> do
 88 |     bs <- P.take e (n * 4)
 89 |     let r = swapArray32 bs
 90 |     pure (asWord32s r)
 91 | 
 92 | -- | Parse an array of big-endian unsigned 64-bit words.
 93 | word64Array ::
 94 |   -- | Error message if not enough bytes are present
 95 |   e ->
 96 |   -- | Number of big-endian 64-bit words to consume
 97 |   Int ->
 98 |   -- | Native-endian elements
 99 |   Parser e s (PrimArray Word64)
100 | word64Array e !n = case targetByteOrder of
101 |   BigEndian -> fmap (asWord64s . Bytes.toByteArrayClone) (P.take e (n * 8))
102 |   LittleEndian -> do
103 |     bs <- P.take e (n * 8)
104 |     let r = swapArray64 bs
105 |     pure (asWord64s r)
106 | 
107 | -- | Parse an array of big-endian unsigned 256-bit words.
108 | word256Array ::
109 |   -- | Error message if not enough bytes are present
110 |   e ->
111 |   -- | Number of big-endian 256-bit words to consume
112 |   Int ->
113 |   -- | Native-endian elements
114 |   Parser e s (PrimArray Word256)
115 | word256Array e !n = case targetByteOrder of
116 |   BigEndian -> fmap (asWord256s . Bytes.toByteArrayClone) (P.take e (n * 32))
117 |   LittleEndian -> do
118 |     bs <- P.take e (n * 32)
119 |     let r = swapArray256 bs
120 |     pure (asWord256s r)
121 | 
122 | -- | Parse an array of big-endian unsigned 128-bit words.
123 | word128Array ::
124 |   -- | Error message if not enough bytes are present
125 |   e ->
126 |   -- | Number of big-endian 128-bit words to consume
127 |   Int ->
128 |   -- | Native-endian elements
129 |   Parser e s (PrimArray Word128)
130 | word128Array e !n = case targetByteOrder of
131 |   BigEndian -> fmap (asWord128s . Bytes.toByteArrayClone) (P.take e (n * 16))
132 |   LittleEndian -> do
133 |     bs <- P.take e (n * 16)
134 |     let r = swapArray128 bs
135 |     pure (asWord128s r)
136 | 
137 | asWord16s :: ByteArray -> PrimArray Word16
138 | asWord16s (ByteArray x) = PrimArray x
139 | 
140 | asWord32s :: ByteArray -> PrimArray Word32
141 | asWord32s (ByteArray x) = PrimArray x
142 | 
143 | asWord64s :: ByteArray -> PrimArray Word64
144 | asWord64s (ByteArray x) = PrimArray x
145 | 
146 | asWord128s :: ByteArray -> PrimArray Word128
147 | asWord128s (ByteArray x) = PrimArray x
148 | 
149 | asWord256s :: ByteArray -> PrimArray Word256
150 | asWord256s (ByteArray x) = PrimArray x
151 | 
152 | -- | Unsigned 16-bit word.
153 | word16 :: e -> Parser e s Word16
154 | word16 e = uneffectful $ \chunk ->
155 |   if length chunk >= 2
156 |     then
157 |       let wa = PM.indexByteArray (array chunk) (offset chunk) :: Word8
158 |           wb = PM.indexByteArray (array chunk) (offset chunk + 1) :: Word8
159 |        in Success
160 |             (fromIntegral @Word @Word16 (unsafeShiftL (fromIntegral wa) 8 .|. fromIntegral wb))
161 |             (offset chunk + 2)
162 |             (length chunk - 2)
163 |     else Failure e
164 | 
165 | -- | Unsigned 32-bit word.
166 | word32 :: e -> Parser e s Word32
167 | word32 e = uneffectful $ \chunk ->
168 |   if length chunk >= 4
169 |     then
170 |       let wa = PM.indexByteArray (array chunk) (offset chunk) :: Word8
171 |           wb = PM.indexByteArray (array chunk) (offset chunk + 1) :: Word8
172 |           wc = PM.indexByteArray (array chunk) (offset chunk + 2) :: Word8
173 |           wd = PM.indexByteArray (array chunk) (offset chunk + 3) :: Word8
174 |        in Success
175 |             ( fromIntegral @Word @Word32
176 |                 ( unsafeShiftL (fromIntegral wa) 24
177 |                     .|. unsafeShiftL (fromIntegral wb) 16
178 |                     .|. unsafeShiftL (fromIntegral wc) 8
179 |                     .|. fromIntegral wd
180 |                 )
181 |             )
182 |             (offset chunk + 4)
183 |             (length chunk - 4)
184 |     else Failure e
185 | 
186 | -- | Unsigned 64-bit word.
187 | word64 :: e -> Parser e s Word64
188 | word64 e = uneffectful $ \chunk ->
189 |   if length chunk >= 8
190 |     then
191 |       let wa = PM.indexByteArray (array chunk) (offset chunk) :: Word8
192 |           wb = PM.indexByteArray (array chunk) (offset chunk + 1) :: Word8
193 |           wc = PM.indexByteArray (array chunk) (offset chunk + 2) :: Word8
194 |           wd = PM.indexByteArray (array chunk) (offset chunk + 3) :: Word8
195 |           we = PM.indexByteArray (array chunk) (offset chunk + 4) :: Word8
196 |           wf = PM.indexByteArray (array chunk) (offset chunk + 5) :: Word8
197 |           wg = PM.indexByteArray (array chunk) (offset chunk + 6) :: Word8
198 |           wh = PM.indexByteArray (array chunk) (offset chunk + 7) :: Word8
199 |        in Success
200 |             ( unsafeShiftL (fromIntegral wa) 56
201 |                 .|. unsafeShiftL (fromIntegral wb) 48
202 |                 .|. unsafeShiftL (fromIntegral wc) 40
203 |                 .|. unsafeShiftL (fromIntegral wd) 32
204 |                 .|. unsafeShiftL (fromIntegral we) 24
205 |                 .|. unsafeShiftL (fromIntegral wf) 16
206 |                 .|. unsafeShiftL (fromIntegral wg) 8
207 |                 .|. fromIntegral wh
208 |             )
209 |             (offset chunk + 8)
210 |             (length chunk - 8)
211 |     else Failure e
212 | 
213 | -- | Unsigned 128-bit word.
214 | word128 :: e -> Parser e s Word128
215 | word128 e = liftA2 Word128 (word64 e) (word64 e)
216 | 
217 | -- | Unsigned 256-bit word.
218 | word256 :: e -> Parser e s Word256
219 | word256 e = (\a b c d -> Word256 a b c d) <$> word64 e <*> word64 e <*> word64 e <*> word64 e
220 | 
221 | -- | Signed 8-bit integer.
222 | int8 :: e -> Parser e s Int8
223 | int8 = fmap fromIntegral . word8
224 | 
225 | -- | Signed 16-bit integer.
226 | int16 :: e -> Parser e s Int16
227 | int16 = fmap fromIntegral . word16
228 | 
229 | -- | Signed 32-bit integer.
230 | int32 :: e -> Parser e s Int32
231 | int32 = fmap fromIntegral . word32
232 | 
233 | -- | Signed 64-bit integer.
234 | int64 :: e -> Parser e s Int64
235 | int64 = fmap fromIntegral . word64
236 | 


--------------------------------------------------------------------------------
/src/Data/Bytes/Parser/LittleEndian.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns #-}
  2 | {-# LANGUAGE BinaryLiterals #-}
  3 | {-# LANGUAGE CPP #-}
  4 | {-# LANGUAGE DataKinds #-}
  5 | {-# LANGUAGE DerivingStrategies #-}
  6 | {-# LANGUAGE GADTSyntax #-}
  7 | {-# LANGUAGE PolyKinds #-}
  8 | {-# LANGUAGE RankNTypes #-}
  9 | {-# LANGUAGE ScopedTypeVariables #-}
 10 | {-# LANGUAGE TypeApplications #-}
 11 | {-# LANGUAGE UnboxedSums #-}
 12 | 
 13 | -- | Little-endian fixed-width numbers.
 14 | module Data.Bytes.Parser.LittleEndian
 15 |   ( -- * One
 16 | 
 17 |     -- ** Unsigned
 18 |     word8
 19 |   , word16
 20 |   , word32
 21 |   , word64
 22 |   , word128
 23 |   , word256
 24 | 
 25 |     -- ** Signed
 26 |   , int8
 27 |   , int16
 28 |   , int32
 29 |   , int64
 30 | 
 31 |     -- * Many
 32 | 
 33 |     -- ** Unsigned
 34 |   , word16Array
 35 |   , word32Array
 36 |   , word64Array
 37 |   , word128Array
 38 |   , word256Array
 39 | 
 40 |     -- ** Unsigned
 41 |   , int64Array
 42 |   ) where
 43 | 
 44 | import Prelude hiding (any, fail, length, takeWhile)
 45 | 
 46 | import Control.Applicative (liftA2)
 47 | import Data.Bits (unsafeShiftL, (.|.))
 48 | import Data.Bytes.Parser.Internal (Parser, Result (..), swapArray128, swapArray16, swapArray256, swapArray32, swapArray64, uneffectful)
 49 | import Data.Bytes.Types (Bytes (..))
 50 | import Data.Int (Int16, Int32, Int64, Int8)
 51 | import Data.Primitive (ByteArray (..), PrimArray (..))
 52 | import Data.WideWord (Word128 (Word128), Word256 (Word256))
 53 | import Data.Word (Word16, Word32, Word64, Word8)
 54 | import GHC.ByteOrder (ByteOrder (BigEndian, LittleEndian), targetByteOrder)
 55 | 
 56 | import qualified Data.Bytes as Bytes
 57 | import qualified Data.Bytes.Parser as P
 58 | import qualified Data.Primitive as PM
 59 | 
 60 | -- | Unsigned 8-bit word.
 61 | word8 :: e -> Parser e s Word8
 62 | word8 = P.any
 63 | 
 64 | {- | Array of little-endian unsigned 16-bit words. If the host is
 65 | little-endian, the implementation is optimized to simply @memcpy@
 66 | bytes into the result array. The result array always has elements
 67 | in native-endian byte order.
 68 | -}
 69 | word16Array ::
 70 |   -- | Error message if not enough bytes are present
 71 |   e ->
 72 |   -- | Number of little-endian 16-bit words to expect
 73 |   Int ->
 74 |   -- | Native-endian elements
 75 |   Parser e s (PrimArray Word16)
 76 | word16Array e !n = case targetByteOrder of
 77 |   LittleEndian -> fmap (asWord16s . Bytes.toByteArrayClone) (P.take e (n * 2))
 78 |   BigEndian -> do
 79 |     bs <- P.take e (n * 2)
 80 |     let r = swapArray16 bs
 81 |     pure (asWord16s r)
 82 | 
 83 | -- | Parse an array of little-endian unsigned 32-bit words.
 84 | word32Array ::
 85 |   -- | Error message if not enough bytes are present
 86 |   e ->
 87 |   -- | Number of little-endian 32-bit words to consume
 88 |   Int ->
 89 |   -- | Native-endian elements
 90 |   Parser e s (PrimArray Word32)
 91 | word32Array e !n = case targetByteOrder of
 92 |   LittleEndian -> fmap (asWord32s . Bytes.toByteArrayClone) (P.take e (n * 4))
 93 |   BigEndian -> do
 94 |     bs <- P.take e (n * 4)
 95 |     let r = swapArray32 bs
 96 |     pure (asWord32s r)
 97 | 
 98 | -- | Parse an array of little-endian unsigned 64-bit words.
 99 | word64Array ::
100 |   -- | Error message if not enough bytes are present
101 |   e ->
102 |   -- | Number of little-endian 64-bit words to consume
103 |   Int ->
104 |   -- | Native-endian elements
105 |   Parser e s (PrimArray Word64)
106 | word64Array e !n = case targetByteOrder of
107 |   LittleEndian -> fmap (asWord64s . Bytes.toByteArrayClone) (P.take e (n * 8))
108 |   BigEndian -> do
109 |     bs <- P.take e (n * 8)
110 |     let r = swapArray64 bs
111 |     pure (asWord64s r)
112 | 
113 | -- | Parse an array of little-endian unsigned 128-bit words.
114 | word128Array ::
115 |   -- | Error message if not enough bytes are present
116 |   e ->
117 |   -- | Number of little-endian 128-bit words to consume
118 |   Int ->
119 |   -- | Native-endian elements
120 |   Parser e s (PrimArray Word128)
121 | word128Array e !n = case targetByteOrder of
122 |   LittleEndian -> fmap (asWord128s . Bytes.toByteArrayClone) (P.take e (n * 16))
123 |   BigEndian -> do
124 |     bs <- P.take e (n * 16)
125 |     let r = swapArray128 bs
126 |     pure (asWord128s r)
127 | 
128 | -- | Parse an array of little-endian unsigned 256-bit words.
129 | word256Array ::
130 |   -- | Error message if not enough bytes are present
131 |   e ->
132 |   -- | Number of little-endian 256-bit words to consume
133 |   Int ->
134 |   -- | Native-endian elements
135 |   Parser e s (PrimArray Word256)
136 | word256Array e !n = case targetByteOrder of
137 |   LittleEndian -> fmap (asWord256s . Bytes.toByteArrayClone) (P.take e (n * 32))
138 |   BigEndian -> do
139 |     bs <- P.take e (n * 32)
140 |     let r = swapArray256 bs
141 |     pure (asWord256s r)
142 | 
143 | -- | Parse an array of little-endian signed 64-bit words.
144 | int64Array ::
145 |   -- | Error message if not enough bytes are present
146 |   e ->
147 |   -- | Number of little-endian 64-bit words to expect
148 |   Int ->
149 |   -- | Native-endian elements
150 |   Parser e s (PrimArray Int64)
151 | int64Array e !n = do
152 |   PrimArray x <- word64Array e n
153 |   pure (PrimArray x)
154 | 
155 | asWord16s :: ByteArray -> PrimArray Word16
156 | asWord16s (ByteArray x) = PrimArray x
157 | 
158 | asWord32s :: ByteArray -> PrimArray Word32
159 | asWord32s (ByteArray x) = PrimArray x
160 | 
161 | asWord64s :: ByteArray -> PrimArray Word64
162 | asWord64s (ByteArray x) = PrimArray x
163 | 
164 | asWord128s :: ByteArray -> PrimArray Word128
165 | asWord128s (ByteArray x) = PrimArray x
166 | 
167 | asWord256s :: ByteArray -> PrimArray Word256
168 | asWord256s (ByteArray x) = PrimArray x
169 | 
170 | -- | Unsigned 16-bit word.
171 | word16 :: e -> Parser e s Word16
172 | word16 e = uneffectful $ \chunk ->
173 |   if length chunk >= 2
174 |     then
175 |       let wa = PM.indexByteArray (array chunk) (offset chunk) :: Word8
176 |           wb = PM.indexByteArray (array chunk) (offset chunk + 1) :: Word8
177 |        in Success
178 |             (fromIntegral @Word @Word16 (unsafeShiftL (fromIntegral wb) 8 .|. fromIntegral wa))
179 |             (offset chunk + 2)
180 |             (length chunk - 2)
181 |     else Failure e
182 | 
183 | -- | Unsigned 32-bit word.
184 | word32 :: e -> Parser e s Word32
185 | word32 e = uneffectful $ \chunk ->
186 |   if length chunk >= 4
187 |     then
188 |       let wa = PM.indexByteArray (array chunk) (offset chunk) :: Word8
189 |           wb = PM.indexByteArray (array chunk) (offset chunk + 1) :: Word8
190 |           wc = PM.indexByteArray (array chunk) (offset chunk + 2) :: Word8
191 |           wd = PM.indexByteArray (array chunk) (offset chunk + 3) :: Word8
192 |        in Success
193 |             ( fromIntegral @Word @Word32
194 |                 ( unsafeShiftL (fromIntegral wd) 24
195 |                     .|. unsafeShiftL (fromIntegral wc) 16
196 |                     .|. unsafeShiftL (fromIntegral wb) 8
197 |                     .|. fromIntegral wa
198 |                 )
199 |             )
200 |             (offset chunk + 4)
201 |             (length chunk - 4)
202 |     else Failure e
203 | 
204 | -- | Unsigned 64-bit word.
205 | word64 :: e -> Parser e s Word64
206 | word64 e = uneffectful $ \chunk ->
207 |   if length chunk >= 8
208 |     then
209 |       let wa = PM.indexByteArray (array chunk) (offset chunk) :: Word8
210 |           wb = PM.indexByteArray (array chunk) (offset chunk + 1) :: Word8
211 |           wc = PM.indexByteArray (array chunk) (offset chunk + 2) :: Word8
212 |           wd = PM.indexByteArray (array chunk) (offset chunk + 3) :: Word8
213 |           we = PM.indexByteArray (array chunk) (offset chunk + 4) :: Word8
214 |           wf = PM.indexByteArray (array chunk) (offset chunk + 5) :: Word8
215 |           wg = PM.indexByteArray (array chunk) (offset chunk + 6) :: Word8
216 |           wh = PM.indexByteArray (array chunk) (offset chunk + 7) :: Word8
217 |        in Success
218 |             ( unsafeShiftL (fromIntegral wh) 56
219 |                 .|. unsafeShiftL (fromIntegral wg) 48
220 |                 .|. unsafeShiftL (fromIntegral wf) 40
221 |                 .|. unsafeShiftL (fromIntegral we) 32
222 |                 .|. unsafeShiftL (fromIntegral wd) 24
223 |                 .|. unsafeShiftL (fromIntegral wc) 16
224 |                 .|. unsafeShiftL (fromIntegral wb) 8
225 |                 .|. fromIntegral wa
226 |             )
227 |             (offset chunk + 8)
228 |             (length chunk - 8)
229 |     else Failure e
230 | 
231 | -- | Unsigned 256-bit word.
232 | word256 :: e -> Parser e s Word256
233 | word256 e = (\d c b a -> Word256 a b c d) <$> word64 e <*> word64 e <*> word64 e <*> word64 e
234 | 
235 | -- | Unsigned 128-bit word.
236 | word128 :: e -> Parser e s Word128
237 | word128 e = liftA2 (flip Word128) (word64 e) (word64 e)
238 | 
239 | -- | Signed 8-bit integer.
240 | int8 :: e -> Parser e s Int8
241 | int8 = fmap fromIntegral . word8
242 | 
243 | -- | Signed 16-bit integer.
244 | int16 :: e -> Parser e s Int16
245 | int16 = fmap fromIntegral . word16
246 | 
247 | -- | Signed 32-bit integer.
248 | int32 :: e -> Parser e s Int32
249 | int32 = fmap fromIntegral . word32
250 | 
251 | -- | Signed 64-bit integer.
252 | int64 :: e -> Parser e s Int64
253 | int64 = fmap fromIntegral . word64
254 | 


--------------------------------------------------------------------------------
/src/Data/Bytes/Parser/Ascii.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns #-}
  2 | {-# LANGUAGE BinaryLiterals #-}
  3 | {-# LANGUAGE DataKinds #-}
  4 | {-# LANGUAGE DerivingStrategies #-}
  5 | {-# LANGUAGE GADTSyntax #-}
  6 | {-# LANGUAGE MagicHash #-}
  7 | {-# LANGUAGE PolyKinds #-}
  8 | {-# LANGUAGE RankNTypes #-}
  9 | {-# LANGUAGE ScopedTypeVariables #-}
 10 | {-# LANGUAGE TypeApplications #-}
 11 | {-# LANGUAGE UnboxedTuples #-}
 12 | 
 13 | {- | Parse input as ASCII-encoded text. Some parsers in this module,
 14 | like 'any' and 'peek', fail if they encounter a byte above @0x7F@.
 15 | Others, like numeric parsers and skipping parsers, leave the cursor
 16 | at the position of the offending byte without failing.
 17 | -}
 18 | module Data.Bytes.Parser.Ascii
 19 |   ( -- * Matching
 20 |     Latin.char
 21 |   , Latin.char2
 22 |   , Latin.char3
 23 |   , Latin.char4
 24 | 
 25 |     -- * Case-Insensitive Matching
 26 |   , charInsensitive
 27 | 
 28 |     -- * Get Character
 29 |   , any
 30 |   , any#
 31 |   , peek
 32 |   , opt
 33 | 
 34 |     -- * Match Many
 35 |   , take
 36 |   , shortTrailedBy
 37 |   , takeShortWhile
 38 | 
 39 |     -- * Skip
 40 |   , Latin.skipDigits
 41 |   , Latin.skipDigits1
 42 |   , Latin.skipChar
 43 |   , Latin.skipChar1
 44 |   , skipAlpha
 45 |   , skipAlpha1
 46 |   , skipTrailedBy
 47 |   , skipWhile
 48 | 
 49 |     -- * Numbers
 50 |   , Latin.decWord
 51 |   , Latin.decWord8
 52 |   , Latin.decWord16
 53 |   , Latin.decWord32
 54 |   ) where
 55 | 
 56 | import Prelude hiding (any, fail, length, takeWhile, take)
 57 | 
 58 | import Control.Monad.ST (runST)
 59 | import Data.Bits (clearBit)
 60 | import Data.Bytes.Parser.Internal (Parser (..), Result (..), Result#, indexLatinCharArray, uneffectful, uneffectful#, upcastUnitSuccess)
 61 | import Data.Bytes.Types (Bytes (..))
 62 | import Data.Char (ord)
 63 | import Data.Text.Short (ShortText)
 64 | import Data.Word (Word8)
 65 | import GHC.Exts (Char (C#), Char#, Int (I#), Int#, chr#, gtChar#, indexCharArray#, ord#, (+#), (-#), (<#))
 66 | import Data.Text.Internal (Text(Text))
 67 | 
 68 | import qualified Data.ByteString.Short.Internal as BSS
 69 | import qualified Data.Bytes as Bytes
 70 | import qualified Data.Bytes.Parser as Parser
 71 | import qualified Data.Bytes.Parser.Latin as Latin
 72 | import qualified Data.Bytes.Parser.Unsafe as Unsafe
 73 | import qualified Data.Primitive as PM
 74 | import qualified Data.Text.Short.Unsafe as TS
 75 | 
 76 | {- | Consume the next character, failing if it does not match the expected
 77 | value or if there is no more input. This check for equality is case
 78 | insensitive.
 79 | 
 80 | Precondition: The argument must be a letter (@[a-zA-Z]@). Behavior is
 81 | undefined if it is not.
 82 | -}
 83 | charInsensitive :: e -> Char -> Parser e s ()
 84 | {-# INLINE charInsensitive #-}
 85 | charInsensitive e !c = uneffectful $ \chunk ->
 86 |   if length chunk > 0
 87 |     then
 88 |       if clearBit (PM.indexByteArray (array chunk) (offset chunk) :: Word8) 5 == w
 89 |         then Success () (offset chunk + 1) (length chunk - 1)
 90 |         else Failure e
 91 |     else Failure e
 92 |  where
 93 |   w = clearBit (fromIntegral @Int @Word8 (ord c)) 5
 94 | 
 95 | {- | Consume input until the trailer is found. Then, consume
 96 | the trailer as well. This fails if the trailer is not
 97 | found or if any non-ASCII characters are encountered.
 98 | -}
 99 | skipTrailedBy :: e -> Char -> Parser e s ()
100 | {-# INLINE skipTrailedBy #-}
101 | skipTrailedBy e !c = do
102 |   let go = do
103 |         !d <- any e
104 |         if d == c
105 |           then pure ()
106 |           else go
107 |   go
108 | 
109 | -- | Consume a fixed number of ASCII characters (all less than codepoint 128).
110 | take :: e -> Int -> Parser e s Text
111 | {-# INLINE take #-}
112 | take e !n = do
113 |   bs@(Bytes arr off len) <- Parser.take e n
114 |   if Bytes.all (\w -> w < 128) bs
115 |     then pure (Text arr off len)
116 |     else Parser.fail e
117 | 
118 | {- | Consume characters matching the predicate. The stops when it
119 | encounters a non-matching character or when it encounters a byte
120 | above @0x7F@. This never fails.
121 | -}
122 | takeShortWhile :: (Char -> Bool) -> Parser e s ShortText
123 | {-# INLINE takeShortWhile #-}
124 | takeShortWhile p = do
125 |   !start <- Unsafe.cursor
126 |   skipWhile p
127 |   end <- Unsafe.cursor
128 |   src <- Unsafe.expose
129 |   let len = end - start
130 |       !r = runST $ do
131 |         marr <- PM.newByteArray len
132 |         PM.copyByteArray marr 0 src start len
133 |         PM.unsafeFreezeByteArray marr
134 |   pure $
135 |     TS.fromShortByteStringUnsafe $
136 |       byteArrayToShortByteString $
137 |         r
138 | 
139 | {- | Consume input through the next occurrence of the target
140 | character and return the consumed input, excluding the
141 | target character, as a 'ShortText'. This fails if it
142 | encounters any bytes above @0x7F@.
143 | -}
144 | shortTrailedBy :: e -> Char -> Parser e s ShortText
145 | shortTrailedBy e !c = do
146 |   !start <- Unsafe.cursor
147 |   skipTrailedBy e c
148 |   end <- Unsafe.cursor
149 |   src <- Unsafe.expose
150 |   let len = end - start - 1
151 |       !r = runST $ do
152 |         marr <- PM.newByteArray len
153 |         PM.copyByteArray marr 0 src start len
154 |         PM.unsafeFreezeByteArray marr
155 |   pure $
156 |     TS.fromShortByteStringUnsafe $
157 |       byteArrayToShortByteString $
158 |         r
159 | 
160 | -- | Consumes and returns the next character in the input.
161 | any :: e -> Parser e s Char
162 | {-# INLINE any #-}
163 | any e = uneffectful $ \chunk ->
164 |   if length chunk > 0
165 |     then
166 |       let c = indexLatinCharArray (array chunk) (offset chunk)
167 |        in if c < '\128'
168 |             then Success c (offset chunk + 1) (length chunk - 1)
169 |             else Failure e
170 |     else Failure e
171 | 
172 | -- | Variant of 'any' with unboxed result.
173 | any# :: e -> Parser e s Char#
174 | {-# INLINE any# #-}
175 | any# e =
176 |   Parser
177 |     ( \(# arr, off, len #) s0 -> case len of
178 |         0# -> (# s0, (# e | #) #)
179 |         _ ->
180 |           let !w = indexCharArray# arr off
181 |            in case ord# w <# 128# of
182 |                 1# -> (# s0, (# | (# w, off +# 1#, len -# 1# #) #) #)
183 |                 _ -> (# s0, (# e | #) #)
184 |     )
185 | 
186 | unI :: Int -> Int#
187 | {-# INLINE unI #-}
188 | unI (I# w) = w
189 | 
190 | {- | Examine the next byte without consuming it, interpret it as an
191 | ASCII-encoded character. This fails if the byte is above @0x7F@ or
192 | if the end of input has been reached.
193 | -}
194 | peek :: e -> Parser e s Char
195 | {-# INLINE peek #-}
196 | peek e = uneffectful $ \chunk ->
197 |   if length chunk > 0
198 |     then
199 |       let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8
200 |        in if w < 128
201 |             then
202 |               Success
203 |                 (C# (chr# (unI (fromIntegral w))))
204 |                 (offset chunk)
205 |                 (length chunk)
206 |             else Failure e
207 |     else Failure e
208 | 
209 | {- | Consume the next byte, interpreting it as an ASCII-encoded character.
210 | Fails if the byte is above @0x7F@. Returns @Nothing@ if the
211 | end of the input has been reached.
212 | -}
213 | opt :: e -> Parser e s (Maybe Char)
214 | {-# INLINE opt #-}
215 | opt e = uneffectful $ \chunk ->
216 |   if length chunk > 0
217 |     then
218 |       let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8
219 |        in if w < 128
220 |             then
221 |               Success
222 |                 (Just (C# (chr# (unI (fromIntegral w)))))
223 |                 (offset chunk + 1)
224 |                 (length chunk - 1)
225 |             else Failure e
226 |     else Success Nothing (offset chunk) (length chunk)
227 | 
228 | {- | Consume characters matching the predicate. The stops when it
229 | encounters a non-matching character or when it encounters a byte
230 | above @0x7F@. This never fails.
231 | -}
232 | skipWhile :: (Char -> Bool) -> Parser e s ()
233 | {-# INLINE skipWhile #-}
234 | skipWhile p =
235 |   Parser
236 |     ( \(# arr, off0, len0 #) s0 ->
237 |         let go off len = case len of
238 |               0# -> (# (), off, 0# #)
239 |               _ ->
240 |                 let c = indexCharArray# arr off
241 |                  in case p (C# c) of
242 |                       True -> case gtChar# c '\x7F'# of
243 |                         1# -> (# (), off, len #)
244 |                         _ -> go (off +# 1#) (len -# 1#)
245 |                       False -> (# (), off, len #)
246 |          in (# s0, (# | go off0 len0 #) #)
247 |     )
248 | 
249 | {- | Skip uppercase and lowercase letters until a non-alpha
250 | character is encountered.
251 | -}
252 | skipAlpha :: Parser e s ()
253 | {-# INLINE skipAlpha #-}
254 | skipAlpha = uneffectful# $ \c ->
255 |   upcastUnitSuccess (skipAlphaAsciiLoop c)
256 | 
257 | {- | Skip uppercase and lowercase letters until a non-alpha
258 | character is encountered.
259 | -}
260 | skipAlpha1 :: e -> Parser e s ()
261 | {-# INLINE skipAlpha1 #-}
262 | skipAlpha1 e = uneffectful# $ \c ->
263 |   skipAlphaAsciiLoop1Start e c
264 | 
265 | skipAlphaAsciiLoop ::
266 |   Bytes -> -- Chunk
267 |   (# Int#, Int# #)
268 | {-# INLINE skipAlphaAsciiLoop #-}
269 | skipAlphaAsciiLoop !c =
270 |   if length c > 0
271 |     then
272 |       let w = indexLatinCharArray (array c) (offset c)
273 |        in if (w >= 'a' && w <= 'z') || (w >= 'A' && w <= 'Z')
274 |             then skipAlphaAsciiLoop (Bytes.unsafeDrop 1 c)
275 |             else (# unI (offset c), unI (length c) #)
276 |     else (# unI (offset c), unI (length c) #)
277 | 
278 | skipAlphaAsciiLoop1Start ::
279 |   e ->
280 |   Bytes -> -- chunk
281 |   Result# e ()
282 | {-# INLINE skipAlphaAsciiLoop1Start #-}
283 | skipAlphaAsciiLoop1Start e !c =
284 |   if length c > 0
285 |     then
286 |       let w = indexLatinCharArray (array c) (offset c)
287 |        in if (w >= 'a' && w <= 'z') || (w >= 'A' && w <= 'Z')
288 |             then upcastUnitSuccess (skipAlphaAsciiLoop (Bytes.unsafeDrop 1 c))
289 |             else (# e | #)
290 |     else (# e | #)
291 | 
292 | byteArrayToShortByteString :: PM.ByteArray -> BSS.ShortByteString
293 | {-# INLINE byteArrayToShortByteString #-}
294 | byteArrayToShortByteString (PM.ByteArray x) = BSS.SBS x
295 | 


--------------------------------------------------------------------------------
/src/Data/Bytes/Parser/Internal.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns #-}
  2 | {-# LANGUAGE BinaryLiterals #-}
  3 | {-# LANGUAGE DataKinds #-}
  4 | {-# LANGUAGE DerivingStrategies #-}
  5 | {-# LANGUAGE GADTSyntax #-}
  6 | {-# LANGUAGE MagicHash #-}
  7 | {-# LANGUAGE NamedFieldPuns #-}
  8 | {-# LANGUAGE PolyKinds #-}
  9 | {-# LANGUAGE RankNTypes #-}
 10 | {-# LANGUAGE ScopedTypeVariables #-}
 11 | {-# LANGUAGE UnboxedTuples #-}
 12 | 
 13 | module Data.Bytes.Parser.Internal
 14 |   ( Parser (..)
 15 |   , Result (..)
 16 |   , InternalStep (..)
 17 |   , Bytes#
 18 |   , ST#
 19 |   , Result#
 20 |   , unfailing
 21 |   , uneffectful
 22 |   , uneffectful#
 23 |   , uneffectfulInt#
 24 |   , boxBytes
 25 |   , unboxBytes
 26 |   , unboxResult
 27 |   , fail
 28 |   , failByteArrayIntInt
 29 |   , indexLatinCharArray
 30 |   , upcastUnitSuccess
 31 |   -- Swapping
 32 |   , swapArray16
 33 |   , swapArray32
 34 |   , swapArray64
 35 |   , swapArray128
 36 |   , swapArray256
 37 |   ) where
 38 | 
 39 | import Prelude hiding (any, fail, length, takeWhile)
 40 | 
 41 | import Control.Applicative (Alternative)
 42 | import Control.Monad.ST (runST)
 43 | import Data.Bytes.Types (Bytes (..))
 44 | import Data.Kind (Type)
 45 | import Data.Primitive (ByteArray (ByteArray))
 46 | import Data.Word (Word8)
 47 | import GHC.Exts (ByteArray#, Char (C#), Int (I#), Int#, RuntimeRep, State#, TYPE)
 48 | import GHC.Exts (RuntimeRep(IntRep, BoxedRep, TupleRep), Levity(Unlifted))
 49 | 
 50 | import qualified Control.Applicative
 51 | import qualified Control.Monad
 52 | import qualified Data.Primitive as PM
 53 | import qualified GHC.Exts as Exts
 54 | 
 55 | -- | A non-resumable parser.
 56 | newtype Parser :: forall (r :: RuntimeRep). Type -> Type -> TYPE r -> Type where
 57 |   Parser ::
 58 |     forall (r :: RuntimeRep) (e :: Type) (s :: Type) (a :: TYPE r).
 59 |     {runParser :: (# ByteArray#, Int#, Int# #) -> ST# s (Result# e a)} ->
 60 |     Parser e s a
 61 | 
 62 | -- The result of running a parser. Used internally.
 63 | data Result e a
 64 |   = Failure e
 65 |   | -- An error message indicating what went wrong.
 66 |     Success !a !Int !Int
 67 | 
 68 | -- The parsed value, the offset after the last consumed byte, and the
 69 | -- number of bytes remaining in parsed slice.
 70 | 
 71 | data InternalStep a = InternalStep !a !Int !Int
 72 | 
 73 | uneffectful :: (Bytes -> Result e a) -> Parser e s a
 74 | {-# INLINE uneffectful #-}
 75 | uneffectful f =
 76 |   Parser
 77 |     (\b s0 -> (# s0, unboxResult (f (boxBytes b)) #))
 78 | 
 79 | -- This is like uneffectful but for parsers that always succeed.
 80 | -- These combinators typically have names that begin with @try@.
 81 | unfailing :: (Bytes -> InternalStep a) -> Parser e s a
 82 | {-# INLINE unfailing #-}
 83 | unfailing f =
 84 |   Parser
 85 |     (\b s0 -> (# s0, case f (boxBytes b) of InternalStep a (I# off) (I# len) -> (# | (# a, off, len #) #) #))
 86 | 
 87 | boxBytes :: Bytes# -> Bytes
 88 | {-# INLINE boxBytes #-}
 89 | boxBytes (# a, b, c #) = Bytes (ByteArray a) (I# b) (I# c)
 90 | 
 91 | unboxBytes :: Bytes -> Bytes#
 92 | {-# INLINE unboxBytes #-}
 93 | unboxBytes (Bytes (ByteArray a) (I# b) (I# c)) = (# a, b, c #)
 94 | 
 95 | type Bytes# = (# ByteArray#, Int#, Int# #)
 96 | type ST# s (a :: TYPE r) = State# s -> (# State# s, a #)
 97 | type Result# e (a :: TYPE r) =
 98 |   (#
 99 |     e |
100 |     (# a, Int#, Int# #) -- ints are offset and length
101 |   #)
102 | 
103 | unboxResult :: Result e a -> Result# e a
104 | {-# INLINE unboxResult #-}
105 | unboxResult (Success a (I# b) (I# c)) = (# | (# a, b, c #) #)
106 | unboxResult (Failure e) = (# e | #)
107 | 
108 | {- | Combines the error messages using '<>' when both
109 | parsers fail.
110 | -}
111 | instance (Monoid e) => Alternative (Parser e s) where
112 |   {-# INLINE empty #-}
113 |   {-# INLINE (<|>) #-}
114 |   empty = fail mempty
115 |   Parser f <|> Parser g =
116 |     Parser
117 |       ( \x s0 -> case f x s0 of
118 |           (# s1, r0 #) -> case r0 of
119 |             (# eRight | #) -> case g x s1 of
120 |               (# s2, r1 #) -> case r1 of
121 |                 (# eLeft | #) -> (# s2, (# eRight <> eLeft | #) #)
122 |                 (# | r #) -> (# s2, (# | r #) #)
123 |             (# | r #) -> (# s1, (# | r #) #)
124 |       )
125 | 
126 | -- | Fail with the provided error message.
127 | fail ::
128 |   -- | Error message
129 |   e ->
130 |   Parser e s a
131 | {-# INLINE fail #-}
132 | fail e = uneffectful $ \_ -> Failure e
133 | 
134 | failByteArrayIntInt :: forall e s (a :: TYPE ('TupleRep '[ 'BoxedRep 'Unlifted, 'IntRep, 'IntRep ])).
135 |   -- | Error message
136 |   e ->
137 |   Parser e s a
138 | {-# INLINE failByteArrayIntInt #-}
139 | failByteArrayIntInt e = Parser (\_ s0 -> (# s0, (# e | #)  #))
140 | 
141 | instance Applicative (Parser e s) where
142 |   pure = pureParser
143 |   (<*>) = Control.Monad.ap
144 | 
145 | instance Monad (Parser e s) where
146 |   {-# INLINE (>>=) #-}
147 |   (>>=) = bindParser
148 | 
149 | instance Functor (Parser e s) where
150 |   {-# INLINE fmap #-}
151 |   fmap f (Parser g) =
152 |     Parser
153 |       ( \x s0 -> case g x s0 of
154 |           (# s1, r #) -> case r of
155 |             (# e | #) -> (# s1, (# e | #) #)
156 |             (# | (# a, b, c #) #) -> (# s1, (# | (# f a, b, c #) #) #)
157 |       )
158 | 
159 | indexLatinCharArray :: ByteArray -> Int -> Char
160 | {-# INLINE indexLatinCharArray #-}
161 | indexLatinCharArray (ByteArray arr) (I# off) =
162 |   C# (Exts.indexCharArray# arr off)
163 | 
164 | uneffectful# :: (Bytes -> Result# e a) -> Parser e s a
165 | {-# INLINE uneffectful# #-}
166 | uneffectful# f =
167 |   Parser
168 |     (\b s0 -> (# s0, (f (boxBytes b)) #))
169 | 
170 | uneffectfulInt# :: (Bytes -> Result# e Int#) -> Parser e s Int#
171 | {-# INLINE uneffectfulInt# #-}
172 | uneffectfulInt# f =
173 |   Parser
174 |     (\b s0 -> (# s0, (f (boxBytes b)) #))
175 | 
176 | upcastUnitSuccess :: (# Int#, Int# #) -> Result# e ()
177 | {-# INLINE upcastUnitSuccess #-}
178 | upcastUnitSuccess (# b, c #) = (# | (# (), b, c #) #)
179 | 
180 | swapArray16 :: Bytes -> ByteArray
181 | swapArray16 (Bytes {array, offset, length}) = runST $ do
182 |   dst <- PM.newByteArray length
183 |   let go !ixSrc !ixDst !len =
184 |         if len > 0
185 |           then do
186 |             let v0 = PM.indexByteArray array ixSrc :: Word8
187 |                 v1 = PM.indexByteArray array (ixSrc + 1) :: Word8
188 |             PM.writeByteArray dst ixDst v1
189 |             PM.writeByteArray dst (ixDst + 1) v0
190 |             go (ixSrc + 2) (ixDst + 2) (len - 2)
191 |           else pure ()
192 |   go offset 0 length
193 |   PM.unsafeFreezeByteArray dst
194 | 
195 | swapArray32 :: Bytes -> ByteArray
196 | swapArray32 (Bytes {array, offset, length}) = runST $ do
197 |   dst <- PM.newByteArray length
198 |   let go !ixSrc !ixDst !len =
199 |         if len > 0
200 |           then do
201 |             let v0 = PM.indexByteArray array ixSrc :: Word8
202 |                 v1 = PM.indexByteArray array (ixSrc + 1) :: Word8
203 |                 v2 = PM.indexByteArray array (ixSrc + 2) :: Word8
204 |                 v3 = PM.indexByteArray array (ixSrc + 3) :: Word8
205 |             PM.writeByteArray dst ixDst v3
206 |             PM.writeByteArray dst (ixDst + 1) v2
207 |             PM.writeByteArray dst (ixDst + 2) v1
208 |             PM.writeByteArray dst (ixDst + 3) v0
209 |             go (ixSrc + 4) (ixDst + 4) (len - 4)
210 |           else pure ()
211 |   go offset 0 length
212 |   PM.unsafeFreezeByteArray dst
213 | 
214 | swapArray64 :: Bytes -> ByteArray
215 | swapArray64 (Bytes {array, offset, length}) = runST $ do
216 |   dst <- PM.newByteArray length
217 |   let go !ixSrc !ixDst !len =
218 |         if len > 0
219 |           then do
220 |             let v0 = PM.indexByteArray array ixSrc :: Word8
221 |                 v1 = PM.indexByteArray array (ixSrc + 1) :: Word8
222 |                 v2 = PM.indexByteArray array (ixSrc + 2) :: Word8
223 |                 v3 = PM.indexByteArray array (ixSrc + 3) :: Word8
224 |                 v4 = PM.indexByteArray array (ixSrc + 4) :: Word8
225 |                 v5 = PM.indexByteArray array (ixSrc + 5) :: Word8
226 |                 v6 = PM.indexByteArray array (ixSrc + 6) :: Word8
227 |                 v7 = PM.indexByteArray array (ixSrc + 7) :: Word8
228 |             PM.writeByteArray dst ixDst v7
229 |             PM.writeByteArray dst (ixDst + 1) v6
230 |             PM.writeByteArray dst (ixDst + 2) v5
231 |             PM.writeByteArray dst (ixDst + 3) v4
232 |             PM.writeByteArray dst (ixDst + 4) v3
233 |             PM.writeByteArray dst (ixDst + 5) v2
234 |             PM.writeByteArray dst (ixDst + 6) v1
235 |             PM.writeByteArray dst (ixDst + 7) v0
236 |             go (ixSrc + 8) (ixDst + 8) (len - 8)
237 |           else pure ()
238 |   go offset 0 length
239 |   PM.unsafeFreezeByteArray dst
240 | 
241 | swapArray128 :: Bytes -> ByteArray
242 | swapArray128 (Bytes {array, offset, length}) = runST $ do
243 |   dst <- PM.newByteArray length
244 |   let go !ixSrc !ixDst !len =
245 |         if len > 0
246 |           then do
247 |             let v0 = PM.indexByteArray array ixSrc :: Word8
248 |                 v1 = PM.indexByteArray array (ixSrc + 1) :: Word8
249 |                 v2 = PM.indexByteArray array (ixSrc + 2) :: Word8
250 |                 v3 = PM.indexByteArray array (ixSrc + 3) :: Word8
251 |                 v4 = PM.indexByteArray array (ixSrc + 4) :: Word8
252 |                 v5 = PM.indexByteArray array (ixSrc + 5) :: Word8
253 |                 v6 = PM.indexByteArray array (ixSrc + 6) :: Word8
254 |                 v7 = PM.indexByteArray array (ixSrc + 7) :: Word8
255 |                 v8 = PM.indexByteArray array (ixSrc + 8) :: Word8
256 |                 v9 = PM.indexByteArray array (ixSrc + 9) :: Word8
257 |                 v10 = PM.indexByteArray array (ixSrc + 10) :: Word8
258 |                 v11 = PM.indexByteArray array (ixSrc + 11) :: Word8
259 |                 v12 = PM.indexByteArray array (ixSrc + 12) :: Word8
260 |                 v13 = PM.indexByteArray array (ixSrc + 13) :: Word8
261 |                 v14 = PM.indexByteArray array (ixSrc + 14) :: Word8
262 |                 v15 = PM.indexByteArray array (ixSrc + 15) :: Word8
263 |             PM.writeByteArray dst ixDst v15
264 |             PM.writeByteArray dst (ixDst + 1) v14
265 |             PM.writeByteArray dst (ixDst + 2) v13
266 |             PM.writeByteArray dst (ixDst + 3) v12
267 |             PM.writeByteArray dst (ixDst + 4) v11
268 |             PM.writeByteArray dst (ixDst + 5) v10
269 |             PM.writeByteArray dst (ixDst + 6) v9
270 |             PM.writeByteArray dst (ixDst + 7) v8
271 |             PM.writeByteArray dst (ixDst + 8) v7
272 |             PM.writeByteArray dst (ixDst + 9) v6
273 |             PM.writeByteArray dst (ixDst + 10) v5
274 |             PM.writeByteArray dst (ixDst + 11) v4
275 |             PM.writeByteArray dst (ixDst + 12) v3
276 |             PM.writeByteArray dst (ixDst + 13) v2
277 |             PM.writeByteArray dst (ixDst + 14) v1
278 |             PM.writeByteArray dst (ixDst + 15) v0
279 |             go (ixSrc + 16) (ixDst + 16) (len - 16)
280 |           else pure ()
281 |   go offset 0 length
282 |   PM.unsafeFreezeByteArray dst
283 | 
284 | swapArray256 :: Bytes -> ByteArray
285 | swapArray256 (Bytes {array, offset, length}) = runST $ do
286 |   dst <- PM.newByteArray length
287 |   let go !ixSrc !ixDst !len =
288 |         if len > 0
289 |           then do
290 |             let loop !i
291 |                   | i < 32 = do
292 |                       let v = PM.indexByteArray array (ixSrc + i) :: Word8
293 |                       PM.writeByteArray dst (ixDst + (31 - i)) v
294 |                       loop (i + 1)
295 |                   | otherwise = pure ()
296 |             loop 0
297 |             go (ixSrc + 32) (ixDst + 32) (len - 32)
298 |           else pure ()
299 |   go offset 0 length
300 |   PM.unsafeFreezeByteArray dst
301 | 
302 | pureParser :: a -> Parser e s a
303 | {-# INLINE pureParser #-}
304 | pureParser a =
305 |   Parser
306 |     (\(# _, b, c #) s -> (# s, (# | (# a, b, c #) #) #))
307 | 
308 | bindParser :: Parser e s a -> (a -> Parser e s b) -> Parser e s b
309 | {-# INLINE bindParser #-}
310 | bindParser (Parser f) g =
311 |   Parser
312 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
313 |         (# s1, r0 #) -> case r0 of
314 |           (# e | #) -> (# s1, (# e | #) #)
315 |           (# | (# y, b, c #) #) ->
316 |             runParser (g y) (# arr, b, c #) s1
317 |     )
318 | 


--------------------------------------------------------------------------------
/src/Data/Bytes/Parser/Rebindable.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP #-}
  2 | {-# LANGUAGE DataKinds #-}
  3 | {-# LANGUAGE FlexibleInstances #-}
  4 | {-# LANGUAGE MultiParamTypeClasses #-}
  5 | {-# LANGUAGE PolyKinds #-}
  6 | {-# LANGUAGE RankNTypes #-}
  7 | {-# LANGUAGE ScopedTypeVariables #-}
  8 | {-# LANGUAGE UnboxedTuples #-}
  9 | 
 10 | {- | Provides levity-polymorphic variants of @>>=@, @>>@, and @pure@
 11 | used to assemble parsers whose result types are unlifted. This
 12 | cannot be used with the @RebindableSyntax@ extension because that
 13 | extension disallows representations other than @LiftedRep@. Consequently,
 14 | users of this module must manually desugar do notation. See the
 15 | @url-bytes@ library for an example of this module in action.
 16 | 
 17 | Only resort to the functions in this module after checking that
 18 | GHC is unable to optimize away @I#@ and friends in your code.
 19 | -}
 20 | module Data.Bytes.Parser.Rebindable
 21 |   ( Bind (..)
 22 |   , Pure (..)
 23 |   ) where
 24 | 
 25 | import Data.Bytes.Parser.Internal (Parser (..))
 26 | import GHC.Exts (RuntimeRep (..), TYPE)
 27 | import Prelude ()
 28 | import GHC.Exts (LiftedRep)
 29 | 
 30 | class Bind (ra :: RuntimeRep) (rb :: RuntimeRep) where
 31 |   (>>=) ::
 32 |     forall e s (a :: TYPE ra) (b :: TYPE rb).
 33 |     Parser e s a ->
 34 |     (a -> Parser e s b) ->
 35 |     Parser e s b
 36 |   (>>) ::
 37 |     forall e s (a :: TYPE ra) (b :: TYPE rb).
 38 |     Parser e s a ->
 39 |     Parser e s b ->
 40 |     Parser e s b
 41 | 
 42 | class Pure (ra :: RuntimeRep) where
 43 |   pure :: forall e s (a :: TYPE ra). a -> Parser e s a
 44 | 
 45 | pureParser :: a -> Parser e s a
 46 | {-# INLINE pureParser #-}
 47 | pureParser a =
 48 |   Parser
 49 |     (\(# _, b, c #) s -> (# s, (# | (# a, b, c #) #) #))
 50 | 
 51 | bindParser :: Parser e s a -> (a -> Parser e s b) -> Parser e s b
 52 | {-# INLINE bindParser #-}
 53 | bindParser (Parser f) g =
 54 |   Parser
 55 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
 56 |         (# s1, r0 #) -> case r0 of
 57 |           (# e | #) -> (# s1, (# e | #) #)
 58 |           (# | (# y, b, c #) #) ->
 59 |             runParser (g y) (# arr, b, c #) s1
 60 |     )
 61 | 
 62 | sequenceParser :: Parser e s a -> Parser e s b -> Parser e s b
 63 | {-# INLINE sequenceParser #-}
 64 | sequenceParser (Parser f) (Parser g) =
 65 |   Parser
 66 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
 67 |         (# s1, r0 #) -> case r0 of
 68 |           (# e | #) -> (# s1, (# e | #) #)
 69 |           (# | (# _, b, c #) #) -> g (# arr, b, c #) s1
 70 |     )
 71 | 
 72 | pureIntParser ::
 73 |   forall (a :: TYPE 'IntRep) e s.
 74 |   a ->
 75 |   Parser e s a
 76 | {-# INLINE pureIntParser #-}
 77 | pureIntParser a =
 78 |   Parser
 79 |     (\(# _, b, c #) s -> (# s, (# | (# a, b, c #) #) #))
 80 | 
 81 | bindIntParser ::
 82 |   forall (a :: TYPE 'IntRep) e s b.
 83 |   Parser e s a ->
 84 |   (a -> Parser e s b) ->
 85 |   Parser e s b
 86 | {-# INLINE bindIntParser #-}
 87 | bindIntParser (Parser f) g =
 88 |   Parser
 89 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
 90 |         (# s1, r0 #) -> case r0 of
 91 |           (# e | #) -> (# s1, (# e | #) #)
 92 |           (# | (# y, b, c #) #) ->
 93 |             runParser (g y) (# arr, b, c #) s1
 94 |     )
 95 | 
 96 | bindWordParser ::
 97 |   forall (a :: TYPE 'WordRep) e s b.
 98 |   Parser e s a ->
 99 |   (a -> Parser e s b) ->
100 |   Parser e s b
101 | {-# INLINE bindWordParser #-}
102 | bindWordParser (Parser f) g =
103 |   Parser
104 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
105 |         (# s1, r0 #) -> case r0 of
106 |           (# e | #) -> (# s1, (# e | #) #)
107 |           (# | (# y, b, c #) #) ->
108 |             runParser (g y) (# arr, b, c #) s1
109 |     )
110 | 
111 | sequenceIntParser ::
112 |   forall (a :: TYPE 'IntRep) e s b.
113 |   Parser e s a ->
114 |   Parser e s b ->
115 |   Parser e s b
116 | {-# INLINE sequenceIntParser #-}
117 | sequenceIntParser (Parser f) (Parser g) =
118 |   Parser
119 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
120 |         (# s1, r0 #) -> case r0 of
121 |           (# e | #) -> (# s1, (# e | #) #)
122 |           (# | (# _, b, c #) #) -> g (# arr, b, c #) s1
123 |     )
124 | 
125 | sequenceWordParser ::
126 |   forall (a :: TYPE 'WordRep) e s b.
127 |   Parser e s a ->
128 |   Parser e s b ->
129 |   Parser e s b
130 | {-# INLINE sequenceWordParser #-}
131 | sequenceWordParser (Parser f) (Parser g) =
132 |   Parser
133 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
134 |         (# s1, r0 #) -> case r0 of
135 |           (# e | #) -> (# s1, (# e | #) #)
136 |           (# | (# _, b, c #) #) -> g (# arr, b, c #) s1
137 |     )
138 | 
139 | pureIntPairParser ::
140 |   forall (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])) e s.
141 |   a ->
142 |   Parser e s a
143 | {-# INLINE pureIntPairParser #-}
144 | pureIntPairParser a =
145 |   Parser
146 |     (\(# _, b, c #) s -> (# s, (# | (# a, b, c #) #) #))
147 | 
148 | bindIntPairParser ::
149 |   forall (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])) e s b.
150 |   Parser e s a ->
151 |   (a -> Parser e s b) ->
152 |   Parser e s b
153 | {-# INLINE bindIntPairParser #-}
154 | bindIntPairParser (Parser f) g =
155 |   Parser
156 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
157 |         (# s1, r0 #) -> case r0 of
158 |           (# e | #) -> (# s1, (# e | #) #)
159 |           (# | (# y, b, c #) #) ->
160 |             runParser (g y) (# arr, b, c #) s1
161 |     )
162 | 
163 | pureInt5Parser ::
164 |   forall (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])) e s.
165 |   a ->
166 |   Parser e s a
167 | {-# INLINE pureInt5Parser #-}
168 | pureInt5Parser a =
169 |   Parser
170 |     (\(# _, b, c #) s -> (# s, (# | (# a, b, c #) #) #))
171 | 
172 | bindInt5Parser ::
173 |   forall (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])) e s b.
174 |   Parser e s a ->
175 |   (a -> Parser e s b) ->
176 |   Parser e s b
177 | {-# INLINE bindInt5Parser #-}
178 | bindInt5Parser (Parser f) g =
179 |   Parser
180 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
181 |         (# s1, r0 #) -> case r0 of
182 |           (# e | #) -> (# s1, (# e | #) #)
183 |           (# | (# y, b, c #) #) ->
184 |             runParser (g y) (# arr, b, c #) s1
185 |     )
186 | 
187 | sequenceInt5Parser ::
188 |   forall (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])) e s b.
189 |   Parser e s a ->
190 |   Parser e s b ->
191 |   Parser e s b
192 | {-# INLINE sequenceInt5Parser #-}
193 | sequenceInt5Parser (Parser f) (Parser g) =
194 |   Parser
195 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
196 |         (# s1, r0 #) -> case r0 of
197 |           (# e | #) -> (# s1, (# e | #) #)
198 |           (# | (# _, b, c #) #) -> g (# arr, b, c #) s1
199 |     )
200 | 
201 | sequenceIntPairParser ::
202 |   forall (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])) e s b.
203 |   Parser e s a ->
204 |   Parser e s b ->
205 |   Parser e s b
206 | {-# INLINE sequenceIntPairParser #-}
207 | sequenceIntPairParser (Parser f) (Parser g) =
208 |   Parser
209 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
210 |         (# s1, r0 #) -> case r0 of
211 |           (# e | #) -> (# s1, (# e | #) #)
212 |           (# | (# _, b, c #) #) -> g (# arr, b, c #) s1
213 |     )
214 | 
215 | bindInt2to5Parser ::
216 |   forall
217 |     (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep]))
218 |     (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep]))
219 |     e
220 |     s.
221 |   Parser e s a ->
222 |   (a -> Parser e s b) ->
223 |   Parser e s b
224 | {-# INLINE bindInt2to5Parser #-}
225 | bindInt2to5Parser (Parser f) g =
226 |   Parser
227 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
228 |         (# s1, r0 #) -> case r0 of
229 |           (# e | #) -> (# s1, (# e | #) #)
230 |           (# | (# y, b, c #) #) ->
231 |             runParser (g y) (# arr, b, c #) s1
232 |     )
233 | 
234 | sequenceInt2to5Parser ::
235 |   forall
236 |     (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep]))
237 |     (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep]))
238 |     e
239 |     s.
240 |   Parser e s a ->
241 |   Parser e s b ->
242 |   Parser e s b
243 | {-# INLINE sequenceInt2to5Parser #-}
244 | sequenceInt2to5Parser (Parser f) (Parser g) =
245 |   Parser
246 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
247 |         (# s1, r0 #) -> case r0 of
248 |           (# e | #) -> (# s1, (# e | #) #)
249 |           (# | (# _, b, c #) #) -> g (# arr, b, c #) s1
250 |     )
251 | 
252 | instance Bind LiftedRep LiftedRep where
253 |   {-# INLINE (>>=) #-}
254 |   {-# INLINE (>>) #-}
255 |   (>>=) = bindParser
256 |   (>>) = sequenceParser
257 | 
258 | instance Bind 'WordRep LiftedRep where
259 |   {-# INLINE (>>=) #-}
260 |   {-# INLINE (>>) #-}
261 |   (>>=) = bindWordParser
262 |   (>>) = sequenceWordParser
263 | 
264 | instance Bind 'IntRep LiftedRep where
265 |   {-# INLINE (>>=) #-}
266 |   {-# INLINE (>>) #-}
267 |   (>>=) = bindIntParser
268 |   (>>) = sequenceIntParser
269 | 
270 | instance Bind ('TupleRep '[ 'IntRep, 'IntRep]) LiftedRep where
271 |   {-# INLINE (>>=) #-}
272 |   {-# INLINE (>>) #-}
273 |   (>>=) = bindIntPairParser
274 |   (>>) = sequenceIntPairParser
275 | 
276 | instance
277 |   Bind
278 |     ('TupleRep '[ 'IntRep, 'IntRep])
279 |     ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])
280 |   where
281 |   {-# INLINE (>>=) #-}
282 |   {-# INLINE (>>) #-}
283 |   (>>=) = bindInt2to5Parser
284 |   (>>) = sequenceInt2to5Parser
285 | 
286 | instance
287 |   Bind
288 |     ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])
289 |     LiftedRep
290 |   where
291 |   {-# INLINE (>>=) #-}
292 |   {-# INLINE (>>) #-}
293 |   (>>=) = bindInt5Parser
294 |   (>>) = sequenceInt5Parser
295 | 
296 | instance
297 |   Bind
298 |     'IntRep
299 |     ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])
300 |   where
301 |   {-# INLINE (>>=) #-}
302 |   {-# INLINE (>>) #-}
303 |   (>>=) = bindFromIntToInt5
304 |   (>>) = sequenceIntToInt5
305 | 
306 | instance Bind LiftedRep ('TupleRep '[ 'IntRep, 'IntRep]) where
307 |   {-# INLINE (>>=) #-}
308 |   {-# INLINE (>>) #-}
309 |   (>>=) = bindFromLiftedToIntPair
310 |   (>>) = sequenceLiftedToIntPair
311 | 
312 | instance
313 |   Bind
314 |     LiftedRep
315 |     ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])
316 |   where
317 |   {-# INLINE (>>=) #-}
318 |   {-# INLINE (>>) #-}
319 |   (>>=) = bindFromLiftedToInt5
320 |   (>>) = sequenceLiftedToInt5
321 | 
322 | instance Bind 'IntRep ('TupleRep '[ 'IntRep, 'IntRep]) where
323 |   {-# INLINE (>>=) #-}
324 |   {-# INLINE (>>) #-}
325 |   (>>=) = bindFromIntToIntPair
326 |   (>>) = sequenceIntToIntPair
327 | 
328 | instance Bind LiftedRep 'IntRep where
329 |   {-# INLINE (>>=) #-}
330 |   {-# INLINE (>>) #-}
331 |   (>>=) = bindFromLiftedToInt
332 |   (>>) = sequenceLiftedToInt
333 | 
334 | instance Pure LiftedRep where
335 |   {-# INLINE pure #-}
336 |   pure = pureParser
337 | 
338 | instance Pure 'IntRep where
339 |   {-# INLINE pure #-}
340 |   pure = pureIntParser
341 | 
342 | instance Pure ('TupleRep '[ 'IntRep, 'IntRep]) where
343 |   {-# INLINE pure #-}
344 |   pure = pureIntPairParser
345 | 
346 | instance Pure ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep]) where
347 |   {-# INLINE pure #-}
348 |   pure = pureInt5Parser
349 | 
350 | bindFromIntToIntPair ::
351 |   forall
352 |     s
353 |     e
354 |     (a :: TYPE 'IntRep)
355 |     (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])).
356 |   Parser s e a ->
357 |   (a -> Parser s e b) ->
358 |   Parser s e b
359 | {-# INLINE bindFromIntToIntPair #-}
360 | bindFromIntToIntPair (Parser f) g =
361 |   Parser
362 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
363 |         (# s1, r0 #) -> case r0 of
364 |           (# e | #) -> (# s1, (# e | #) #)
365 |           (# | (# y, b, c #) #) ->
366 |             runParser (g y) (# arr, b, c #) s1
367 |     )
368 | 
369 | sequenceIntToIntPair ::
370 |   forall
371 |     s
372 |     e
373 |     (a :: TYPE 'IntRep)
374 |     (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])).
375 |   Parser s e a ->
376 |   Parser s e b ->
377 |   Parser s e b
378 | {-# INLINE sequenceIntToIntPair #-}
379 | sequenceIntToIntPair (Parser f) (Parser g) =
380 |   Parser
381 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
382 |         (# s1, r0 #) -> case r0 of
383 |           (# e | #) -> (# s1, (# e | #) #)
384 |           (# | (# _, b, c #) #) -> g (# arr, b, c #) s1
385 |     )
386 | 
387 | bindFromIntToInt5 ::
388 |   forall
389 |     s
390 |     e
391 |     (a :: TYPE 'IntRep)
392 |     (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])).
393 |   Parser s e a ->
394 |   (a -> Parser s e b) ->
395 |   Parser s e b
396 | {-# INLINE bindFromIntToInt5 #-}
397 | bindFromIntToInt5 (Parser f) g =
398 |   Parser
399 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
400 |         (# s1, r0 #) -> case r0 of
401 |           (# e | #) -> (# s1, (# e | #) #)
402 |           (# | (# y, b, c #) #) ->
403 |             runParser (g y) (# arr, b, c #) s1
404 |     )
405 | 
406 | sequenceIntToInt5 ::
407 |   forall
408 |     s
409 |     e
410 |     (a :: TYPE 'IntRep)
411 |     (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])).
412 |   Parser s e a ->
413 |   Parser s e b ->
414 |   Parser s e b
415 | {-# INLINE sequenceIntToInt5 #-}
416 | sequenceIntToInt5 (Parser f) (Parser g) =
417 |   Parser
418 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
419 |         (# s1, r0 #) -> case r0 of
420 |           (# e | #) -> (# s1, (# e | #) #)
421 |           (# | (# _, b, c #) #) -> g (# arr, b, c #) s1
422 |     )
423 | 
424 | bindFromLiftedToIntPair ::
425 |   forall
426 |     s
427 |     e
428 |     (a :: TYPE LiftedRep)
429 |     (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])).
430 |   Parser s e a ->
431 |   (a -> Parser s e b) ->
432 |   Parser s e b
433 | {-# INLINE bindFromLiftedToIntPair #-}
434 | bindFromLiftedToIntPair (Parser f) g =
435 |   Parser
436 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
437 |         (# s1, r0 #) -> case r0 of
438 |           (# e | #) -> (# s1, (# e | #) #)
439 |           (# | (# y, b, c #) #) ->
440 |             runParser (g y) (# arr, b, c #) s1
441 |     )
442 | 
443 | sequenceLiftedToIntPair ::
444 |   forall
445 |     s
446 |     e
447 |     (a :: TYPE LiftedRep)
448 |     (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])).
449 |   Parser s e a ->
450 |   Parser s e b ->
451 |   Parser s e b
452 | {-# INLINE sequenceLiftedToIntPair #-}
453 | sequenceLiftedToIntPair (Parser f) (Parser g) =
454 |   Parser
455 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
456 |         (# s1, r0 #) -> case r0 of
457 |           (# e | #) -> (# s1, (# e | #) #)
458 |           (# | (# _, b, c #) #) -> g (# arr, b, c #) s1
459 |     )
460 | 
461 | bindFromLiftedToInt5 ::
462 |   forall
463 |     s
464 |     e
465 |     (a :: TYPE LiftedRep)
466 |     (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])).
467 |   Parser s e a ->
468 |   (a -> Parser s e b) ->
469 |   Parser s e b
470 | {-# INLINE bindFromLiftedToInt5 #-}
471 | bindFromLiftedToInt5 (Parser f) g =
472 |   Parser
473 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
474 |         (# s1, r0 #) -> case r0 of
475 |           (# e | #) -> (# s1, (# e | #) #)
476 |           (# | (# y, b, c #) #) ->
477 |             runParser (g y) (# arr, b, c #) s1
478 |     )
479 | 
480 | sequenceLiftedToInt5 ::
481 |   forall
482 |     s
483 |     e
484 |     (a :: TYPE LiftedRep)
485 |     (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])).
486 |   Parser s e a ->
487 |   Parser s e b ->
488 |   Parser s e b
489 | {-# INLINE sequenceLiftedToInt5 #-}
490 | sequenceLiftedToInt5 (Parser f) (Parser g) =
491 |   Parser
492 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
493 |         (# s1, r0 #) -> case r0 of
494 |           (# e | #) -> (# s1, (# e | #) #)
495 |           (# | (# _, b, c #) #) -> g (# arr, b, c #) s1
496 |     )
497 | 
498 | bindFromLiftedToInt ::
499 |   forall
500 |     s
501 |     e
502 |     (a :: TYPE LiftedRep)
503 |     (b :: TYPE 'IntRep).
504 |   Parser s e a ->
505 |   (a -> Parser s e b) ->
506 |   Parser s e b
507 | {-# INLINE bindFromLiftedToInt #-}
508 | bindFromLiftedToInt (Parser f) g =
509 |   Parser
510 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
511 |         (# s1, r0 #) -> case r0 of
512 |           (# e | #) -> (# s1, (# e | #) #)
513 |           (# | (# y, b, c #) #) ->
514 |             runParser (g y) (# arr, b, c #) s1
515 |     )
516 | 
517 | sequenceLiftedToInt ::
518 |   forall
519 |     s
520 |     e
521 |     (a :: TYPE LiftedRep)
522 |     (b :: TYPE 'IntRep).
523 |   Parser s e a ->
524 |   Parser s e b ->
525 |   Parser s e b
526 | {-# INLINE sequenceLiftedToInt #-}
527 | sequenceLiftedToInt (Parser f) (Parser g) =
528 |   Parser
529 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
530 |         (# s1, r0 #) -> case r0 of
531 |           (# e | #) -> (# s1, (# e | #) #)
532 |           (# | (# _, b, c #) #) -> g (# arr, b, c #) s1
533 |     )
534 | 


--------------------------------------------------------------------------------
/test/Main.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns #-}
  2 | {-# LANGUAGE CPP #-}
  3 | {-# LANGUAGE DataKinds #-}
  4 | {-# LANGUAGE MagicHash #-}
  5 | {-# LANGUAGE OverloadedStrings #-}
  6 | {-# LANGUAGE ScopedTypeVariables #-}
  7 | {-# LANGUAGE TypeApplications #-}
  8 | {-# OPTIONS_GHC -fno-warn-orphans #-}
  9 | 
 10 | import Control.Monad (replicateM)
 11 | import Control.Monad.ST (runST)
 12 | import Data.Bytes.Parser (Slice (Slice))
 13 | import Data.Bytes.Types (Bytes (Bytes))
 14 | import Data.Char (ord)
 15 | import Data.Coerce (coerce)
 16 | import Data.Int (Int16, Int32)
 17 | import Data.Primitive (ByteArray (..), PrimArray (..))
 18 | import Data.Text.Short (ShortText)
 19 | import Data.WideWord (Word128 (Word128))
 20 | import Data.Word (Word16, Word32, Word64, Word8)
 21 | import Numeric.Natural (Natural)
 22 | import System.ByteOrder (ByteOrder (BigEndian, LittleEndian), Fixed (..))
 23 | import Test.Tasty (TestTree, defaultMain, testGroup)
 24 | import Test.Tasty.HUnit (testCase, (@=?))
 25 | import Test.Tasty.QuickCheck (testProperty, (===))
 26 | 
 27 | import qualified Data.Bits as Bits
 28 | import qualified Data.Bytes as Bytes
 29 | import qualified Data.Bytes.Parser as P
 30 | import qualified Data.Bytes.Parser.Ascii as Ascii
 31 | import qualified Data.Bytes.Parser.Base128 as Base128
 32 | import qualified Data.Bytes.Parser.BigEndian as BigEndian
 33 | import qualified Data.Bytes.Parser.Latin as Latin
 34 | import qualified Data.Bytes.Parser.Leb128 as Leb128
 35 | import qualified Data.Bytes.Parser.LittleEndian as LittleEndian
 36 | import qualified Data.List as List
 37 | import qualified Data.Primitive as PM
 38 | import qualified GHC.Exts as Exts
 39 | import qualified Test.Tasty.QuickCheck as QC
 40 | 
 41 | main :: IO ()
 42 | main = defaultMain tests
 43 | 
 44 | tests :: TestTree
 45 | tests =
 46 |   testGroup
 47 |     "Parser"
 48 |     [ testProperty "decStandardInt" $ \i ->
 49 |         withSz (show i) $ \str len ->
 50 |           P.parseBytes (Latin.decStandardInt ()) str
 51 |             === P.Success (Slice len 0 i)
 52 |     , testProperty "big-endian-word16-array" $ \(xs :: [Word16]) ->
 53 |         let src = Exts.fromList (coerce xs :: [Fixed 'BigEndian Word16])
 54 |             res = Exts.fromList xs :: PrimArray Word16
 55 |             sz = length xs * 2
 56 |             bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz
 57 |          in P.Success (Slice (sz + 1) 0 res)
 58 |               === P.parseBytes (BigEndian.word16Array () (length xs)) bs
 59 |     , testProperty "big-endian-word32-array" $ \(xs :: [Word32]) ->
 60 |         let src = Exts.fromList (coerce xs :: [Fixed 'BigEndian Word32])
 61 |             res = Exts.fromList xs :: PrimArray Word32
 62 |             sz = length xs * 4
 63 |             bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz
 64 |          in P.Success (Slice (sz + 1) 0 res)
 65 |               === P.parseBytes (BigEndian.word32Array () (length xs)) bs
 66 |     , testProperty "little-endian-word32-array" $ \(xs :: [Word32]) ->
 67 |         let src = Exts.fromList (coerce xs :: [Fixed 'LittleEndian Word32])
 68 |             res = Exts.fromList xs :: PrimArray Word32
 69 |             sz = length xs * 4
 70 |             bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz
 71 |          in P.Success (Slice (sz + 1) 0 res)
 72 |               === P.parseBytes (LittleEndian.word32Array () (length xs)) bs
 73 |     , testProperty "big-endian-word64-array" $ \(xs :: [Word64]) ->
 74 |         let src = Exts.fromList (coerce xs :: [Fixed 'BigEndian Word64])
 75 |             res = Exts.fromList xs :: PrimArray Word64
 76 |             sz = length xs * 8
 77 |             bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz
 78 |          in P.Success (Slice (sz + 1) 0 res)
 79 |               === P.parseBytes (BigEndian.word64Array () (length xs)) bs
 80 |     , testProperty "little-endian-word64-array" $ \(xs :: [Word64]) ->
 81 |         let src = Exts.fromList (coerce xs :: [Fixed 'LittleEndian Word64])
 82 |             res = Exts.fromList xs :: PrimArray Word64
 83 |             sz = length xs * 8
 84 |             bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz
 85 |          in P.Success (Slice (sz + 1) 0 res)
 86 |               === P.parseBytes (LittleEndian.word64Array () (length xs)) bs
 87 |     , testProperty "little-endian-word128-array" $ \(xs :: [Word128]) ->
 88 |         let src = Exts.fromList xs
 89 |             sz = length xs * 16
 90 |             bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz
 91 |          in P.parseBytes (replicateM (length xs) (LittleEndian.word128 ())) bs
 92 |               === P.parseBytes (fmap Exts.toList (LittleEndian.word128Array () (length xs))) bs
 93 |     , testProperty "big-endian-word128-array" $ \(xs :: [Word128]) ->
 94 |         let src = Exts.fromList xs
 95 |             sz = length xs * 16
 96 |             bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz
 97 |          in P.parseBytes (replicateM (length xs) (BigEndian.word128 ())) bs
 98 |               === P.parseBytes (fmap Exts.toList (BigEndian.word128Array () (length xs))) bs
 99 |     , testProperty "cstring" $ \(xs :: [Word8]) ->
100 |         let ys = Exts.fromList xs
101 |             bs = Bytes.singleton 0x31 <> ys
102 |          in P.parseBytes (P.cstring () (Exts.Ptr "1"#) *> P.bytes () ys *> pure 42) bs
103 |               === (P.Success (Slice (Bytes.length ys + 1) 0 42) :: P.Result () Integer)
104 |     , testCase "big-endian-word256" $
105 |         P.parseBytesMaybe
106 |           (BigEndian.word256Array () 1)
107 |           ( Exts.fromList
108 |               [ 0x12
109 |               , 0x34
110 |               , 0x56
111 |               , 0x78
112 |               , 0x90
113 |               , 0x12
114 |               , 0x34
115 |               , 0x56
116 |               , 0x78
117 |               , 0x90
118 |               , 0x12
119 |               , 0x34
120 |               , 0x56
121 |               , 0x78
122 |               , 0x90
123 |               , 0x12
124 |               , 0x12
125 |               , 0x34
126 |               , 0x56
127 |               , 0x78
128 |               , 0x90
129 |               , 0x12
130 |               , 0x34
131 |               , 0x56
132 |               , 0x78
133 |               , 0x90
134 |               , 0x12
135 |               , 0x34
136 |               , 0x56
137 |               , 0x78
138 |               , 0x90
139 |               , 0x12
140 |               ]
141 |           )
142 |           @=? Just (Exts.fromList [0x1234567890123456789012345678901212345678901234567890123456789012])
143 |     , testProperty "big-endian-word64" bigEndianWord64
144 |     , testProperty "big-endian-word32" bigEndianWord32
145 |     , testProperty "little-endian-word32" littleEndianWord32
146 |     , testCase "delimit" $
147 |         P.Success (Slice 13 0 (167, 14625))
148 |           @=? P.parseBytes
149 |             ( do
150 |                 len <- Latin.decUnsignedInt ()
151 |                 Latin.char () ','
152 |                 r <-
153 |                   P.delimit () () len $
154 |                     (,)
155 |                       <$> Latin.decUnsignedInt ()
156 |                       <* Latin.char () '*'
157 |                       <*> Latin.decUnsignedInt ()
158 |                 Latin.char () '0'
159 |                 pure r
160 |             )
161 |             (bytes "9,167*146250")
162 |     , testGroup
163 |         "decUnsignedInt"
164 |         [ testCase "A" $
165 |             P.Failure ()
166 |               @=? P.parseBytes
167 |                 (Latin.decUnsignedInt ())
168 |                 (bytes "742493495120739103935542")
169 |         , testCase "B" $
170 |             P.Success (Slice 8 3 4654667)
171 |               @=? P.parseBytes
172 |                 (Latin.decUnsignedInt ())
173 |                 (bytes "4654667,55")
174 |         , testCase "C" $
175 |             P.Failure ()
176 |               @=? P.parseBytes
177 |                 (Latin.decUnsignedInt ())
178 |                 (bytes ('1' : show (maxBound :: Int)))
179 |         , testCase "D" $
180 |             P.Failure ()
181 |               @=? P.parseBytes
182 |                 (Latin.decUnsignedInt ())
183 |                 (bytes "2481030337885070917891")
184 |         , testCase "E" $
185 |             P.Failure ()
186 |               @=? P.parseBytes
187 |                 (Latin.decUnsignedInt ())
188 |                 (bytes (show (fromIntegral @Int @Word maxBound + 1)))
189 |         , testCase "F" $ withSz (show (maxBound :: Int)) $ \str len ->
190 |             P.Success (Slice len 0 maxBound)
191 |               @=? P.parseBytes (Latin.decUnsignedInt ()) str
192 |         , testProperty "property" $ \(QC.NonNegative i) ->
193 |             withSz (show i) $ \str len ->
194 |               P.parseBytes (Latin.decUnsignedInt ()) str
195 |                 === P.Success (Slice len 0 i)
196 |         ]
197 |     , testGroup
198 |         "hexNibbleLower"
199 |         [ testCase "A" $
200 |             P.parseBytes (Latin.hexNibbleLower ()) (bytes "Ab") @=? P.Failure ()
201 |         , testCase "B" $
202 |             P.parseBytes (Latin.hexNibbleLower ()) (bytes "bA") @=? P.Success (Slice 2 1 0xb)
203 |         , testCase "C" $
204 |             P.parseBytes (Latin.hexNibbleLower ()) (bytes "") @=? P.Failure ()
205 |         ]
206 |     , testGroup
207 |         "tryHexNibbleLower"
208 |         [ testCase "A" $
209 |             P.Success @() (Slice 1 2 Nothing)
210 |               @=? P.parseBytes Latin.tryHexNibbleLower (bytes "Ab")
211 |         , testCase "B" $
212 |             P.Success @() (Slice 2 1 (Just 0xb))
213 |               @=? P.parseBytes Latin.tryHexNibbleLower (bytes "bA")
214 |         , testCase "C" $
215 |             P.Success @() (Slice 1 0 Nothing)
216 |               @=? P.parseBytes Latin.tryHexNibbleLower (bytes "")
217 |         ]
218 |     , testGroup
219 |         "decPositiveInteger"
220 |         [ testCase "A" $
221 |             P.parseBytes
222 |               (Latin.decUnsignedInteger ())
223 |               (bytes "5469999463123462573426452736423546373235260")
224 |               @=? P.Success
225 |                 (Slice 44 0 5469999463123462573426452736423546373235260)
226 |         , testProperty "property" $ \(LargeInteger i) ->
227 |             withSz (show i) $ \str len ->
228 |               i
229 |                 >= 0
230 |                   QC.==> P.parseBytes (Latin.decUnsignedInteger ()) str
231 |                   === P.Success (Slice len 0 i)
232 |         ]
233 |     , testGroup
234 |         "decTrailingInteger"
235 |         [ testProperty "property" $ \(LargeInteger i) ->
236 |             withSz (show i) $ \str sz ->
237 |               i
238 |                 >= 0
239 |                   QC.==> P.parseBytes (Latin.decTrailingInteger 2) str
240 |                   === (P.Success (Slice sz 0 (read ('2' : show i) :: Integer)) :: P.Result () Integer)
241 |         ]
242 |     , testGroup
243 |         "decSignedInteger"
244 |         [ testCase "A" $
245 |             P.parseBytes
246 |               (Latin.decSignedInteger ())
247 |               (bytes "-54699994631234625734264527364235463732352601")
248 |               @=? P.Success
249 |                 ( Slice
250 |                     46
251 |                     0
252 |                     (-54699994631234625734264527364235463732352601)
253 |                 )
254 |         , testCase "B" $
255 |             P.Success (Slice 25 0 (3, (-206173954435705292503)))
256 |               @=? P.parseBytes
257 |                 ( pure (,)
258 |                     <*> Latin.decSignedInteger ()
259 |                     <* Latin.char () 'e'
260 |                     <*> Latin.decSignedInteger ()
261 |                 )
262 |                 (bytes "3e-206173954435705292503")
263 |         , testProperty "property" $ \(LargeInteger i) ->
264 |             withSz (show i) $ \str len ->
265 |               P.parseBytes (Latin.decSignedInteger ()) str
266 |                 === P.Success (Slice len 0 i)
267 |         ]
268 |     , testGroup
269 |         "decSignedInt"
270 |         [ testProperty "A" $ \i -> withSz (show i) $ \str len ->
271 |             P.parseBytes (Latin.decSignedInt ()) str
272 |               === P.Success (Slice len 0 i)
273 |         , testProperty "B" $ \i ->
274 |             let s = (if i >= 0 then "+" else "") ++ show i
275 |              in withSz s $ \str len ->
276 |                   P.parseBytes (Latin.decSignedInt ()) str
277 |                     === P.Success (Slice len 0 i)
278 |         , testCase "C" $
279 |             P.Failure ()
280 |               @=? P.parseBytes
281 |                 (Latin.decSignedInt ())
282 |                 (bytes ('1' : show (maxBound :: Int)))
283 |         , testCase "D" $
284 |             P.Failure ()
285 |               @=? P.parseBytes
286 |                 (Latin.decSignedInt ())
287 |                 (bytes ('-' : '3' : show (maxBound :: Int)))
288 |         , testCase "E" $
289 |             P.Failure ()
290 |               @=? P.parseBytes
291 |                 (Latin.decSignedInt ())
292 |                 (bytes "2481030337885070917891")
293 |         , testCase "F" $
294 |             P.Failure ()
295 |               @=? P.parseBytes
296 |                 (Latin.decSignedInt ())
297 |                 (bytes "-4305030950553840988981")
298 |         , testCase "G" $ withSz (show (minBound :: Int)) $ \str len ->
299 |             P.Success (Slice len 0 minBound)
300 |               @=? P.parseBytes (Latin.decSignedInt ()) str
301 |         , testCase "H" $ withSz (show (maxBound :: Int)) $ \str len ->
302 |             P.Success (Slice len 0 maxBound)
303 |               @=? P.parseBytes (Latin.decSignedInt ()) str
304 |         , testCase "I" $
305 |             P.Failure ()
306 |               @=? P.parseBytes
307 |                 (Latin.decSignedInt ())
308 |                 (bytes (show (fromIntegral @Int @Word maxBound + 1)))
309 |         , testCase "J" $
310 |             -- This is one number lower than the minimum bound for
311 |             -- a signed 64-bit number, but this test will pass on
312 |             -- 32-bit architectures as well.
313 |             P.Failure ()
314 |               @=? P.parseBytes
315 |                 (Latin.decSignedInt ())
316 |                 (bytes "-9223372036854775809")
317 |         ]
318 |     , testGroup
319 |         "decWord64"
320 |         [ testCase "A" $
321 |             P.Failure ()
322 |               @=? P.parseBytes
323 |                 (Latin.decWord64 ())
324 |                 (bytes "2481030337885070917891")
325 |         ]
326 |     , testCase "decWord-composition" $
327 |         P.Success (Slice 6 0 (42, 8))
328 |           @=? P.parseBytes
329 |             ( pure (,)
330 |                 <*> Ascii.decWord ()
331 |                 <* Ascii.char () '.'
332 |                 <*> Ascii.decWord ()
333 |                 <* Ascii.char () '.'
334 |             )
335 |             (bytes "42.8.")
336 |     , testCase "decWord-replicate" $
337 |         P.Success (Slice 7 0 (Exts.fromList [42, 93] :: PrimArray Word))
338 |           @=? P.parseBytes
339 |             (P.replicate 2 (Ascii.decWord () <* Ascii.char () '.'))
340 |             (bytes "42.93.")
341 |     , testCase "ascii-takeShortWhile" $
342 |         P.Success (Slice 11 0 (Exts.fromList ["the", "world"] :: PM.Array ShortText))
343 |           @=? P.parseBytes
344 |             (P.replicate 2 (Ascii.takeShortWhile (/= ',') <* Ascii.char () ','))
345 |             (bytes "the,world,")
346 |     , testGroup
347 |         "hexFixedWord8"
348 |         [ testCase "A" $
349 |             P.parseBytes (Latin.hexFixedWord8 ()) (bytes "A") @=? P.Failure ()
350 |         , testCase "B" $
351 |             P.parseBytes (Latin.hexFixedWord8 ()) (bytes "0A") @=? P.Success (Slice 3 0 0x0A)
352 |         , testCase "C" $
353 |             P.parseBytes (Latin.hexFixedWord8 ()) (bytes "") @=? P.Failure ()
354 |         , testCase "D" $
355 |             P.parseBytes (Latin.hexFixedWord8 ()) (bytes "A!") @=? P.Failure ()
356 |         ]
357 |     , testGroup
358 |         "hexFixedWord16"
359 |         [ testCase "A" $
360 |             P.parseBytes (Latin.hexFixedWord16 ()) (bytes "A") @=? P.Failure ()
361 |         , testCase "B" $
362 |             P.parseBytes (Latin.hexFixedWord16 ()) (bytes "0A0A") @=? P.Success (Slice 5 0 0x0A0A)
363 |         , testCase "C" $
364 |             P.parseBytes (Latin.hexFixedWord16 ()) (bytes "") @=? P.Failure ()
365 |         , testCase "D" $
366 |             P.parseBytes (Latin.hexFixedWord16 ()) (bytes "A!A!") @=? P.Failure ()
367 |         ]
368 |     , testGroup
369 |         "hexFixedWord32"
370 |         [ testCase "A" $
371 |             P.parseBytes (Latin.hexFixedWord32 ()) (bytes "A") @=? P.Failure ()
372 |         , testCase "B" $
373 |             P.parseBytes (Latin.hexFixedWord32 ()) (bytes "0A0A0A0A") @=? P.Success (Slice 9 0 0x0A0A0A0A)
374 |         , testCase "C" $
375 |             P.parseBytes (Latin.hexFixedWord32 ()) (bytes "") @=? P.Failure ()
376 |         , testCase "D" $
377 |             P.parseBytes (Latin.hexFixedWord32 ()) (bytes "A!A0A0A0") @=? P.Failure ()
378 |         ]
379 |     , testGroup
380 |         "hexFixedWord64"
381 |         [ testCase "A" $
382 |             P.parseBytes (Latin.hexFixedWord64 ()) (bytes "ABCD01235678BCDE")
383 |               @=? P.Success
384 |                 (Slice 17 0 0xABCD01235678BCDE)
385 |         ]
386 |     , testGroup
387 |         "base128-w32"
388 |         [ testCase "A" $
389 |             P.Success (Slice 2 0 0x7E)
390 |               @=? P.parseBytes (Base128.word32 ()) (bytes "\x7E")
391 |         , testCase "B" $
392 |             P.Success (Slice 5 0 0x200000)
393 |               @=? P.parseBytes (Base128.word32 ()) (bytes "\x81\x80\x80\x00")
394 |         , testCase "C" $
395 |             P.Success (Slice 4 0 1656614)
396 |               @=? P.parseBytes (Base128.word32 ()) (bytes "\xE5\x8E\x26")
397 |               -- , testProperty "iso" $ \w -> -- TODO
398 |               --     P.parseBytesMaybe (Base.word32 ()) (encodeBase128 (fromIntegral w))
399 |               --     ===
400 |               --     Just w
401 |         ]
402 |     , testGroup
403 |         "leb128-w32"
404 |         [ testCase "A" $
405 |             P.Success (Slice 2 0 0x7E)
406 |               @=? P.parseBytes (Leb128.word32 ()) (bytes "\x7E")
407 |         , testCase "B" $
408 |             P.Success (Slice 5 0 0x200000)
409 |               @=? P.parseBytes (Leb128.word32 ()) (bytes "\x80\x80\x80\x01")
410 |         , testCase "C" $
411 |             P.Success (Slice 4 0 624485)
412 |               @=? P.parseBytes (Leb128.word32 ()) (bytes "\xE5\x8E\x26")
413 |         , testProperty "iso" $ \w ->
414 |             P.parseBytesMaybe (Leb128.word32 ()) (encodeLeb128 (fromIntegral w))
415 |               === Just w
416 |         ]
417 |     , testGroup
418 |         "leb128-w16"
419 |         [ testCase "A" $
420 |             P.Failure ()
421 |               @=? P.parseBytes (Leb128.word16 ()) (bytes "\x80\x80\x04")
422 |         , testCase "B" $
423 |             P.Success (Slice 4 0 0xFFFF)
424 |               @=? P.parseBytes (Leb128.word16 ()) (bytes "\xFF\xFF\x03")
425 |         , testProperty "iso" $ \w ->
426 |             P.parseBytesMaybe (Leb128.word16 ()) (encodeLeb128 (fromIntegral w))
427 |               === Just w
428 |         ]
429 |     , testGroup
430 |         "leb128-i16"
431 |         [ testProperty "iso" $ \(w :: Int16) ->
432 |             P.parseBytesMaybe
433 |               (Leb128.int16 ())
434 |               (encodeLeb128 (fromIntegral @Word16 @Natural (zigzag16 w)))
435 |               === Just w
436 |         ]
437 |     , testGroup
438 |         "leb128-i32"
439 |         [ testProperty "iso" $ \(w :: Int32) ->
440 |             P.parseBytesMaybe
441 |               (Leb128.int32 ())
442 |               (encodeLeb128 (fromIntegral @Word32 @Natural (zigzag32 w)))
443 |               === Just w
444 |         ]
445 |     , testGroup
446 |         "satisfy"
447 |         [ testCase "A" $
448 |             P.Success (Slice 2 0 0x20)
449 |               @=? P.parseBytes (P.satisfy () (== 0x20)) (bytes "\x20")
450 |         ]
451 |     ]
452 | 
453 | bytes :: String -> Bytes
454 | bytes s = let b = pack ('x' : s) in Bytes b 1 (PM.sizeofByteArray b - 1)
455 | 
456 | pack :: String -> ByteArray
457 | pack = Exts.fromList . map (fromIntegral @Int @Word8 . ord)
458 | 
459 | bigEndianWord64 ::
460 |   Word8 ->
461 |   Word8 ->
462 |   Word8 ->
463 |   Word8 ->
464 |   Word8 ->
465 |   Word8 ->
466 |   Word8 ->
467 |   Word8 ->
468 |   QC.Property
469 | bigEndianWord64 a b c d e f g h =
470 |   let arr = runST $ do
471 |         m <- PM.newByteArray 11
472 |         PM.writeByteArray m 0 (0xFF :: Word8)
473 |         PM.writeByteArray m 1 (0xFF :: Word8)
474 |         PM.writeByteArray m 2 (a :: Word8)
475 |         PM.writeByteArray m 3 (b :: Word8)
476 |         PM.writeByteArray m 4 (c :: Word8)
477 |         PM.writeByteArray m 5 (d :: Word8)
478 |         PM.writeByteArray m 6 (e :: Word8)
479 |         PM.writeByteArray m 7 (f :: Word8)
480 |         PM.writeByteArray m 8 (g :: Word8)
481 |         PM.writeByteArray m 9 (h :: Word8)
482 |         PM.writeByteArray m 10 (0xEE :: Word8)
483 |         PM.unsafeFreezeByteArray m
484 |       expected =
485 |         (0 :: Word64)
486 |           + fromIntegral a * 256 ^ (7 :: Integer)
487 |           + fromIntegral b * 256 ^ (6 :: Integer)
488 |           + fromIntegral c * 256 ^ (5 :: Integer)
489 |           + fromIntegral d * 256 ^ (4 :: Integer)
490 |           + fromIntegral e * 256 ^ (3 :: Integer)
491 |           + fromIntegral f * 256 ^ (2 :: Integer)
492 |           + fromIntegral g * 256 ^ (1 :: Integer)
493 |           + fromIntegral h * 256 ^ (0 :: Integer)
494 |    in P.parseBytes (BigEndian.word64 ()) (Bytes arr 2 9)
495 |         === P.Success (Slice 10 1 expected)
496 | 
497 | bigEndianWord32 ::
498 |   Word8 ->
499 |   Word8 ->
500 |   Word8 ->
501 |   Word8 ->
502 |   QC.Property
503 | bigEndianWord32 a b c d =
504 |   let arr = runST $ do
505 |         m <- PM.newByteArray 7
506 |         PM.writeByteArray m 0 (0xFF :: Word8)
507 |         PM.writeByteArray m 1 (0xFF :: Word8)
508 |         PM.writeByteArray m 2 (a :: Word8)
509 |         PM.writeByteArray m 3 (b :: Word8)
510 |         PM.writeByteArray m 4 (c :: Word8)
511 |         PM.writeByteArray m 5 (d :: Word8)
512 |         PM.writeByteArray m 6 (0xEE :: Word8)
513 |         PM.unsafeFreezeByteArray m
514 |       expected =
515 |         (0 :: Word32)
516 |           + fromIntegral a * 256 ^ (3 :: Integer)
517 |           + fromIntegral b * 256 ^ (2 :: Integer)
518 |           + fromIntegral c * 256 ^ (1 :: Integer)
519 |           + fromIntegral d * 256 ^ (0 :: Integer)
520 |    in P.parseBytes (BigEndian.word32 ()) (Bytes arr 2 5)
521 |         === P.Success (Slice 6 1 expected)
522 | 
523 | littleEndianWord32 ::
524 |   Word8 ->
525 |   Word8 ->
526 |   Word8 ->
527 |   Word8 ->
528 |   QC.Property
529 | littleEndianWord32 a b c d =
530 |   let arr = runST $ do
531 |         m <- PM.newByteArray 7
532 |         PM.writeByteArray m 0 (0xFF :: Word8)
533 |         PM.writeByteArray m 1 (0xFF :: Word8)
534 |         PM.writeByteArray m 2 (a :: Word8)
535 |         PM.writeByteArray m 3 (b :: Word8)
536 |         PM.writeByteArray m 4 (c :: Word8)
537 |         PM.writeByteArray m 5 (d :: Word8)
538 |         PM.writeByteArray m 6 (0xEE :: Word8)
539 |         PM.unsafeFreezeByteArray m
540 |       expected =
541 |         (0 :: Word32)
542 |           + fromIntegral a * 256 ^ (0 :: Integer)
543 |           + fromIntegral b * 256 ^ (1 :: Integer)
544 |           + fromIntegral c * 256 ^ (2 :: Integer)
545 |           + fromIntegral d * 256 ^ (3 :: Integer)
546 |    in P.parseBytes (LittleEndian.word32 ()) (Bytes arr 2 5)
547 |         === P.Success (Slice 6 1 expected)
548 | 
549 | -- The Arbitrary instance for Integer that comes with
550 | -- QuickCheck only generates small numbers.
551 | newtype LargeInteger = LargeInteger Integer
552 |   deriving (Eq, Show)
553 | 
554 | instance QC.Arbitrary Word128 where
555 |   arbitrary = liftA2 Word128 QC.arbitrary QC.arbitrary
556 | 
557 | instance QC.Arbitrary LargeInteger where
558 |   arbitrary = do
559 |     n <- QC.choose (1, 27)
560 |     sign <- QC.arbitrary
561 |     r <-
562 |       (if sign then negate else id) . foldr f 0
563 |         <$> replicateM n QC.arbitrary
564 |     pure (LargeInteger r)
565 |    where
566 |     f :: Word8 -> Integer -> Integer
567 |     f w acc = (acc `Bits.shiftL` 8) + fromIntegral w
568 | 
569 | -- We add an extra 1 since bytes gives us a slice that
570 | -- starts at that offset.
571 | withSz :: String -> (Bytes -> Int -> a) -> a
572 | withSz str f = f (bytes str) (length str + 1)
573 | 
574 | untype :: PrimArray a -> ByteArray
575 | untype (PrimArray x) = ByteArray x
576 | 
577 | encodeLeb128 :: Natural -> Bytes
578 | encodeLeb128 x = Bytes.unsafeDrop 1 (Exts.fromList (0xFF : go [] x))
579 |  where
580 |   go !xs !n =
581 |     let (q, r) = quotRem n 128
582 |         r' = fromIntegral @Natural @Word8 r
583 |         w =
584 |           if q == 0
585 |             then r'
586 |             else Bits.setBit r' 7
587 |         xs' = w : xs
588 |      in if q == 0
589 |           then List.reverse xs'
590 |           else go xs' q
591 | 
592 | -- x zigzagInteger :: Integer -> Natural
593 | -- x zigzagInteger x
594 | -- x   | x>=0 = fromInteger (x `Bits.shiftL` 1)
595 | -- x   | otherwise = fromInteger (negate (x `Bits.shiftL` 1) - 1)
596 | 
597 | zigzag16 :: Int16 -> Word16
598 | zigzag16 x = fromIntegral ((x `Bits.shiftL` 1) `Bits.xor` (x `Bits.shiftR` 15))
599 | 
600 | zigzag32 :: Int32 -> Word32
601 | zigzag32 x = fromIntegral ((x `Bits.shiftL` 1) `Bits.xor` (x `Bits.shiftR` 31))
602 | 


--------------------------------------------------------------------------------
/src/Data/Bytes/Parser.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns #-}
  2 | {-# LANGUAGE BinaryLiterals #-}
  3 | {-# LANGUAGE CPP #-}
  4 | {-# LANGUAGE DataKinds #-}
  5 | {-# LANGUAGE DerivingStrategies #-}
  6 | {-# LANGUAGE GADTSyntax #-}
  7 | {-# LANGUAGE LambdaCase #-}
  8 | {-# LANGUAGE MagicHash #-}
  9 | {-# LANGUAGE MultiWayIf #-}
 10 | {-# LANGUAGE PolyKinds #-}
 11 | {-# LANGUAGE RankNTypes #-}
 12 | {-# LANGUAGE ScopedTypeVariables #-}
 13 | {-# LANGUAGE TypeApplications #-}
 14 | {-# LANGUAGE UnboxedTuples #-}
 15 | 
 16 | {- | Parse non-resumable sequence of bytes. To parse a byte sequence
 17 | as text, use the @Ascii@, @Latin@, and @Utf8@ modules instead.
 18 | Functions for parsing decimal-encoded numbers are found in those
 19 | modules.
 20 | -}
 21 | module Data.Bytes.Parser
 22 |   ( -- * Types
 23 |     Parser
 24 |   , Result (..)
 25 |   , Slice (..)
 26 | 
 27 |     -- * Run Parsers
 28 | 
 29 |     -- ** Result
 30 |   , parseByteArray
 31 |   , parseBytes
 32 |   , parseBytesEffectfully
 33 |   , parseBytesEither
 34 |   , parseBytesMaybe
 35 | 
 36 |     -- * One Byte
 37 |   , any
 38 | 
 39 |     -- * Many Bytes
 40 |   , take
 41 |   , takeN
 42 |   , takeUpTo
 43 |   , takeWhile
 44 |   , takeTrailedBy
 45 | 
 46 |     -- * Skip
 47 |   , skipWhile
 48 |   , skipTrailedBy
 49 |   , skipTrailedBy2
 50 |   , skipTrailedBy2#
 51 |   , skipTrailedBy3#
 52 | 
 53 |     -- * Match
 54 |   , byteArray
 55 |   , bytes
 56 |   , satisfy
 57 |   , satisfyWith
 58 |   , cstring
 59 | 
 60 |     -- * End of Input
 61 |   , endOfInput
 62 |   , isEndOfInput
 63 |   , remaining
 64 |   , peekRemaining
 65 | 
 66 |     -- * Scanning
 67 |   , scan
 68 | 
 69 |     -- * Lookahead
 70 |   , peek
 71 |   , peek'
 72 | 
 73 |     -- * Control Flow
 74 |   , fail
 75 |   , orElse
 76 |   , annotate
 77 |   , (<?>)
 78 |   , mapErrorEffectfully
 79 | 
 80 |     -- * Repetition
 81 |   , replicate
 82 |   , listUntilEoi
 83 | 
 84 |     -- * Subparsing
 85 |   , delimit
 86 |   , measure
 87 |   , measure_
 88 |   , measure_#
 89 | 
 90 |     -- * Lift Effects
 91 |   , effect
 92 | 
 93 |     -- * Box Result
 94 |   , boxWord32
 95 |   , boxIntPair
 96 | 
 97 |     -- * Unbox Result
 98 |   , unboxWord32
 99 |   , unboxIntPair
100 | 
101 |     -- * Specialized Bind
102 | 
103 |     -- | Sometimes, GHC ends up building join points in a way that
104 |     -- boxes arguments unnecessarily. In this situation, special variants
105 |     -- of monadic @>>=@ can be helpful. If @C#@, @I#@, etc. never
106 |     -- get used in your original source code, GHC will not introduce them.
107 |   , bindFromCharToLifted
108 |   , bindFromCharToByteArrayIntInt
109 |   , bindFromWordToByteArrayIntInt
110 |   , bindFromLiftedToIntPair
111 |   , bindFromLiftedToInt
112 |   , bindFromIntToIntPair
113 |   , bindFromCharToIntPair
114 |   , bindFromLiftedToByteArrayIntInt
115 |   , bindFromByteArrayIntIntToLifted
116 |   , bindFromMaybeCharToIntPair
117 |   , bindFromMaybeCharToLifted
118 | 
119 |     -- * Specialized Pure
120 |   , pureIntPair
121 |   , pureByteArrayIntInt
122 | 
123 |     -- * Specialized Fail
124 |   , failIntPair
125 |   , failByteArrayIntInt
126 |   ) where
127 | 
128 | import Prelude hiding (any, fail, length, replicate, take, takeWhile)
129 | 
130 | import Data.Bytes.Parser.Internal (Parser (..), Result#, ST#, boxBytes, fail, unboxBytes, uneffectful, uneffectful#, uneffectfulInt#)
131 | import Data.Bytes.Parser.Internal (failByteArrayIntInt)
132 | import Data.Bytes.Parser.Types (Result (Failure, Success), Slice (Slice))
133 | import Data.Bytes.Parser.Unsafe (cursor, expose, unconsume)
134 | import Data.Bytes.Types (Bytes (..), BytesN (BytesN))
135 | import Data.Primitive (ByteArray (..))
136 | import Data.Primitive.Contiguous (Contiguous, Element)
137 | import Foreign.C.String (CString)
138 | import GHC.Exts (Char#, Int (I#), Int#, Word#, runRW#, (+#), (-#), (>=#))
139 | import GHC.Exts (ByteArray#)
140 | import GHC.ST (ST (..))
141 | import GHC.Word (Word32 (W32#), Word8)
142 | 
143 | import qualified Arithmetic.Nat as Nat
144 | import qualified Arithmetic.Types as Arithmetic
145 | import qualified Data.Bytes as B
146 | import qualified Data.Bytes.Parser.Internal as Internal
147 | import qualified Data.List as List
148 | import qualified Data.Primitive as PM
149 | import qualified Data.Primitive.Contiguous as C
150 | import qualified GHC.Exts as Exts
151 | 
152 | {- | Parse a byte sequence. This can succeed even if the
153 | entire slice was not consumed by the parser.
154 | -}
155 | parseBytes :: forall e a. (forall s. Parser e s a) -> Bytes -> Result e a
156 | {-# INLINE parseBytes #-}
157 | parseBytes p !b = runResultST action
158 |  where
159 |   action :: forall s. ST# s (Result# e a)
160 |   action s0 = case p @s of
161 |     Parser f -> f (unboxBytes b) s0
162 | 
163 | {- | Variant of 'parseBytesEither' that discards the error message on failure.
164 | Just like 'parseBytesEither', this does not impose any checks on the length
165 | of the remaining input.
166 | -}
167 | parseBytesMaybe :: forall e a. (forall s. Parser e s a) -> Bytes -> Maybe a
168 | {-# INLINE parseBytesMaybe #-}
169 | parseBytesMaybe p !b = runMaybeST action
170 |  where
171 |   action :: forall s. ST# s (Result# e a)
172 |   action s0 = case p @s of
173 |     Parser f -> f (unboxBytes b) s0
174 | 
175 | {- | Variant of 'parseBytes' that discards the new offset and the
176 | remaining length. This does not, however, require the remaining
177 | length to be zero. Use 'endOfInput' to accomplish that.
178 | -}
179 | parseBytesEither :: forall e a. (forall s. Parser e s a) -> Bytes -> Either e a
180 | {-# INLINE parseBytesEither #-}
181 | parseBytesEither p !b = runEitherST action
182 |  where
183 |   action :: forall s. ST# s (Result# e a)
184 |   action s0 = case p @s of
185 |     Parser f -> f (unboxBytes b) s0
186 | 
187 | -- Similar to runResultST
188 | runMaybeST :: (forall s. ST# s (Result# e x)) -> Maybe x
189 | {-# INLINE runMaybeST #-}
190 | runMaybeST f = case (runRW# (\s0 -> case f s0 of (# _, r #) -> r)) of
191 |   (# _ | #) -> Nothing
192 |   (# | (# x, _, _ #) #) -> Just x
193 | 
194 | -- Similar to runResultST
195 | runEitherST :: (forall s. ST# s (Result# e x)) -> Either e x
196 | {-# INLINE runEitherST #-}
197 | runEitherST f = case (runRW# (\s0 -> case f s0 of (# _, r #) -> r)) of
198 |   (# e | #) -> Left e
199 |   (# | (# x, _, _ #) #) -> Right x
200 | 
201 | -- This is used internally to help reduce boxing when a parser
202 | -- gets run. Due to the late inlining of runRW#, this variant
203 | -- of runST still cause the result value to be boxed. However,
204 | -- it avoids the additional boxing that the Success data
205 | -- constructor would normally cause.
206 | runResultST :: (forall s. ST# s (Result# e x)) -> Result e x
207 | {-# INLINE runResultST #-}
208 | runResultST f = case (runRW# (\s0 -> case f s0 of (# _, r #) -> r)) of
209 |   (# e | #) -> Failure e
210 |   (# | (# x, off, len #) #) -> Success (Slice (I# off) (I# len) x)
211 | 
212 | -- | Variant of 'parseBytes' that accepts an unsliced 'ByteArray'.
213 | parseByteArray :: (forall s. Parser e s a) -> ByteArray -> Result e a
214 | {-# INLINE parseByteArray #-}
215 | parseByteArray p b =
216 |   parseBytes p (Bytes b 0 (PM.sizeofByteArray b))
217 | 
218 | {- | Variant of 'parseBytes' that allows the parser to be run
219 | as part of an existing effectful context.
220 | -}
221 | parseBytesEffectfully :: Parser e s a -> Bytes -> ST s (Result e a)
222 | {-# INLINE parseBytesEffectfully #-}
223 | parseBytesEffectfully (Parser f) !b =
224 |   ST
225 |     ( \s0 -> case f (unboxBytes b) s0 of
226 |         (# s1, r #) -> (# s1, boxPublicResult r #)
227 |     )
228 | 
229 | -- | Lift an effectful computation into a parser.
230 | effect :: ST s a -> Parser e s a
231 | {-# INLINE effect #-}
232 | effect (ST f) =
233 |   Parser
234 |     ( \(# _, off, len #) s0 -> case f s0 of
235 |         (# s1, a #) -> (# s1, (# | (# a, off, len #) #) #)
236 |     )
237 | 
238 | byteArray :: e -> ByteArray -> Parser e s ()
239 | {-# INLINE byteArray #-}
240 | byteArray e !expected = bytes e (B.fromByteArray expected)
241 | 
242 | -- | Consume input matching the byte sequence.
243 | bytes :: e -> Bytes -> Parser e s ()
244 | bytes e !expected =
245 |   Parser
246 |     ( \actual@(# _, off, len #) s ->
247 |         let r =
248 |               if B.isPrefixOf expected (boxBytes actual)
249 |                 then
250 |                   let !(I# movement) = length expected
251 |                    in (# | (# (), off +# movement, len -# movement #) #)
252 |                 else (# e | #)
253 |          in (# s, r #)
254 |     )
255 | 
256 | {- FOURMOLU_DISABLE -}
257 | -- | Consume input matching the @NUL@-terminated C String.
258 | cstring :: e -> CString -> Parser e s ()
259 | cstring e (Exts.Ptr ptr0) = Parser
260 |   ( \(# arr, off0, len0 #) s ->
261 |     let go !ptr !off !len = case
262 |                  Exts.word8ToWord#
263 |             (Exts.indexWord8OffAddr# ptr 0#) of
264 |           0## -> (# s, (# | (# (), off, len #) #) #)
265 |           c -> case len of
266 |             0# -> (# s, (# e | #) #)
267 |             _ -> case Exts.eqWord# c (
268 |                  Exts.word8ToWord#
269 |               (Exts.indexWord8Array# arr off)) of
270 |               1# -> go (Exts.plusAddr# ptr 1# ) (off +# 1# ) (len -# 1# )
271 |               _ -> (# s, (# e | #) #)
272 |      in go ptr0 off0 len0
273 |   )
274 | {- FOURMOLU_ENABLE -}
275 | 
276 | infix 0 <?>
277 | 
278 | -- | Infix version of 'annotate'.
279 | (<?>) :: Parser x s a -> e -> Parser e s a
280 | (<?>) = annotate
281 | 
282 | {- | Annotate a parser. If the parser fails, the error will
283 |   be returned.
284 | -}
285 | annotate :: Parser x s a -> e -> Parser e s a
286 | annotate p e = p `orElse` fail e
287 | 
288 | {- | Consumes and returns the next byte in the input.
289 | Fails if no characters are left.
290 | -}
291 | any :: e -> Parser e s Word8
292 | {-# INLINE any #-}
293 | any e = uneffectful $ \chunk ->
294 |   if length chunk > 0
295 |     then
296 |       let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8
297 |        in Internal.Success w (offset chunk + 1) (length chunk - 1)
298 |     else Internal.Failure e
299 | 
300 | {- | Match any byte, to perform lookahead. Returns 'Nothing' if
301 |   end of input has been reached. Does not consume any input.
302 | 
303 |   /Note/: Because this parser does not fail, do not use it
304 |   with combinators such as 'many', because such as 'many',
305 |   because such parsers loop until a failure occurs. Careless
306 |   use will thus result in an infinite loop.
307 | -}
308 | peek :: Parser e s (Maybe Word8)
309 | {-# INLINE peek #-}
310 | peek = uneffectful $ \chunk ->
311 |   let v =
312 |         if length chunk > 0
313 |           then Just (B.unsafeIndex chunk 0)
314 |           else Nothing
315 |    in Internal.Success v (offset chunk) (length chunk)
316 | 
317 | {- | Match any byte, to perform lookahead. Does not consume any
318 |   input, but will fail if end of input has been reached.
319 | -}
320 | peek' :: e -> Parser e s Word8
321 | {-# INLINE peek' #-}
322 | peek' e = uneffectful $ \chunk ->
323 |   if length chunk > 0
324 |     then Internal.Success (B.unsafeIndex chunk 0) (offset chunk) (length chunk)
325 |     else Internal.Failure e
326 | 
327 | {- | A stateful scanner. The predicate consumes and transforms a
328 |   state argument, and each transformed state is passed to
329 |   successive invocations of the predicate on each byte of the input
330 |   until one returns 'Nothing' or the input ends.
331 | 
332 |   This parser does not fail. It will return the initial state
333 |   if the predicate returns 'Nothing' on the first byte of input.
334 | 
335 |   /Note/: Because this parser does not fail, do not use it with
336 |   combinators such a 'many', because such parsers loop until a
337 |   failure occurs. Careless use will thus result in an infinite loop.
338 | -}
339 | scan :: state -> (state -> Word8 -> Maybe state) -> Parser e s state
340 | {-# INLINE scan #-}
341 | scan s0 t = do
342 |   let go s = do
343 |         mw <- peek
344 |         case mw of
345 |           Nothing -> pure s
346 |           Just w -> case t s w of
347 |             Just s' -> go s'
348 |             Nothing -> pure s
349 |   go s0
350 | 
351 | -- Interpret the next byte as an ASCII-encoded character.
352 | -- Does not check to see if any characters are left. This
353 | -- is not exported.
354 | anyUnsafe :: Parser e s Word8
355 | {-# INLINE anyUnsafe #-}
356 | anyUnsafe = uneffectful $ \chunk ->
357 |   let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8
358 |    in Internal.Success w (offset chunk + 1) (length chunk - 1)
359 | 
360 | {- | Take while the predicate is matched. This is always inlined. This
361 | always succeeds.
362 | -}
363 | takeWhile :: (Word8 -> Bool) -> Parser e s Bytes
364 | {-# INLINE takeWhile #-}
365 | takeWhile f = uneffectful $ \chunk -> case B.takeWhile f chunk of
366 |   bs -> Internal.Success bs (offset chunk + length bs) (length chunk - length bs)
367 | 
368 | {- | Take bytes until the specified byte is encountered. Consumes
369 | the matched byte as well. Fails if the byte is not present.
370 | Visually, the cursor advancement and resulting @Bytes@ for
371 | @takeTrailedBy 0x19@ look like this:
372 | 
373 | >  0x10 0x13 0x08 0x15 0x19 0x23 0x17 | input
374 | > |---->---->---->---->----|          | cursor
375 | > {\----*----*----*----\}               | result bytes
376 | -}
377 | takeTrailedBy :: e -> Word8 -> Parser e s Bytes
378 | takeTrailedBy e !w = do
379 |   !start <- cursor
380 |   skipTrailedBy e w
381 |   !end <- cursor
382 |   !arr <- expose
383 |   pure (Bytes arr start (end - (start + 1)))
384 | 
385 | {- | Skip all characters until the character from the is encountered
386 | and then consume the matching byte as well.
387 | -}
388 | skipTrailedBy :: e -> Word8 -> Parser e s ()
389 | {-# INLINE skipTrailedBy #-}
390 | skipTrailedBy e !w = uneffectful# (\c -> skipUntilConsumeByteLoop e w c)
391 | 
392 | skipUntilConsumeByteLoop ::
393 |   e -> -- Error message
394 |   Word8 -> -- byte to match
395 |   Bytes -> -- Chunk
396 |   Result# e ()
397 | skipUntilConsumeByteLoop e !w !c =
398 |   if length c > 0
399 |     then
400 |       if PM.indexByteArray (array c) (offset c) /= (w :: Word8)
401 |         then skipUntilConsumeByteLoop e w (B.unsafeDrop 1 c)
402 |         else (# | (# (), unI (offset c + 1), unI (length c - 1) #) #)
403 |     else (# e | #)
404 | 
405 | {- | Skip all bytes until either of the bytes in encountered. Then,
406 | consume the matched byte. @True@ indicates that the first argument
407 | byte was encountered. @False@ indicates that the second argument
408 | byte was encountered.
409 | -}
410 | skipTrailedBy2 ::
411 |   -- | Error message
412 |   e ->
413 |   -- | First trailer, @False@ indicates that this was encountered
414 |   Word8 ->
415 |   -- | Second trailer, @True@ indicates that this was encountered
416 |   Word8 ->
417 |   Parser e s Bool
418 | {-# INLINE skipTrailedBy2 #-}
419 | skipTrailedBy2 e !wa !wb = boxBool (skipTrailedBy2# e wa wb)
420 | 
421 | skipTrailedBy2# ::
422 |   -- | Error message
423 |   e ->
424 |   -- | First trailer, 0 indicates that this was encountered
425 |   Word8 ->
426 |   -- | Second trailer, 1 indicates that this was encountered
427 |   Word8 ->
428 |   Parser e s Int#
429 | {-# INLINE skipTrailedBy2# #-}
430 | skipTrailedBy2# e !wa !wb =
431 |   uneffectfulInt# (\c -> skipUntilConsumeByteEitherLoop e wa wb c)
432 | 
433 | skipTrailedBy3# ::
434 |   -- | Error message
435 |   e ->
436 |   -- | First trailer, 0 indicates that this was encountered
437 |   Word8 ->
438 |   -- | Second trailer, 1 indicates that this was encountered
439 |   Word8 ->
440 |   -- | Third trailer, 2 indicates that this was encountered
441 |   Word8 ->
442 |   Parser e s Int#
443 | {-# INLINE skipTrailedBy3# #-}
444 | skipTrailedBy3# e !wa !wb !wc =
445 |   uneffectfulInt# (\c -> skipUntilConsumeByte3Loop e wa wb wc c)
446 | 
447 | skipUntilConsumeByteEitherLoop ::
448 |   e -> -- Error message
449 |   Word8 -> -- first trailer
450 |   Word8 -> -- second trailer
451 |   Bytes -> -- Chunk
452 |   Result# e Int#
453 | skipUntilConsumeByteEitherLoop e !wa !wb !c =
454 |   if length c > 0
455 |     then
456 |       let byte = PM.indexByteArray (array c) (offset c)
457 |        in if
458 |             | byte == wa -> (# | (# 0#, unI (offset c + 1), unI (length c - 1) #) #)
459 |             | byte == wb -> (# | (# 1#, unI (offset c + 1), unI (length c - 1) #) #)
460 |             | otherwise -> skipUntilConsumeByteEitherLoop e wa wb (B.unsafeDrop 1 c)
461 |     else (# e | #)
462 | 
463 | skipUntilConsumeByte3Loop ::
464 |   e -> -- Error message
465 |   Word8 -> -- first trailer
466 |   Word8 -> -- second trailer
467 |   Word8 -> -- third trailer
468 |   Bytes -> -- Chunk
469 |   Result# e Int#
470 | skipUntilConsumeByte3Loop e !wa !wb !wc !c =
471 |   if length c > 0
472 |     then
473 |       let byte = PM.indexByteArray (array c) (offset c)
474 |        in if
475 |             | byte == wa -> (# | (# 0#, unI (offset c + 1), unI (length c - 1) #) #)
476 |             | byte == wb -> (# | (# 1#, unI (offset c + 1), unI (length c - 1) #) #)
477 |             | byte == wc -> (# | (# 2#, unI (offset c + 1), unI (length c - 1) #) #)
478 |             | otherwise -> skipUntilConsumeByte3Loop e wa wb wc (B.unsafeDrop 1 c)
479 |     else (# e | #)
480 | 
481 | {- | Take the given number of bytes. Fails if there is not enough
482 |   remaining input.
483 | -}
484 | take :: e -> Int -> Parser e s Bytes
485 | {-# INLINE take #-}
486 | take e n = uneffectful $ \chunk ->
487 |   if n <= B.length chunk
488 |     then case B.unsafeTake n chunk of
489 |       bs -> Internal.Success bs (offset chunk + n) (length chunk - n)
490 |     else Internal.Failure e
491 | 
492 | -- | Variant of 'take' that tracks the length of the result in the result type.
493 | takeN :: e -> Arithmetic.Nat n -> Parser e s (BytesN n)
494 | takeN e n0 = uneffectful $ \chunk ->
495 |   if n <= B.length chunk
496 |     then case B.unsafeTake n chunk of
497 |       Bytes theChunk theOff _ -> Internal.Success (BytesN theChunk theOff) (offset chunk + n) (length chunk - n)
498 |     else Internal.Failure e
499 |  where
500 |   !n = Nat.demote n0
501 | 
502 | {- | Take at most the given number of bytes. This is greedy. It will
503 |   consume as many bytes as there are available until it has consumed
504 |   @n@ bytes. This never fails.
505 | -}
506 | takeUpTo :: Int -> Parser e s Bytes
507 | {-# INLINE takeUpTo #-}
508 | takeUpTo n = uneffectful $ \chunk ->
509 |   let m = min n (B.length chunk)
510 |    in case B.unsafeTake m chunk of
511 |         bs -> Internal.Success bs (offset chunk + m) (length chunk - m)
512 | 
513 | -- | Consume all remaining bytes in the input.
514 | remaining :: Parser e s Bytes
515 | {-# INLINE remaining #-}
516 | remaining = uneffectful $ \chunk ->
517 |   Internal.Success chunk (offset chunk + length chunk) 0
518 | 
519 | -- | Return all remaining bytes in the input without consuming them.
520 | peekRemaining :: Parser e s Bytes
521 | {-# INLINE peekRemaining #-}
522 | peekRemaining = uneffectful $ \b@(Bytes _ off len) ->
523 |   Internal.Success b off len
524 | 
525 | -- | Skip while the predicate is matched. This is always inlined.
526 | skipWhile :: (Word8 -> Bool) -> Parser e s ()
527 | {-# INLINE skipWhile #-}
528 | skipWhile f = goSkipWhile
529 |  where
530 |   goSkipWhile =
531 |     isEndOfInput >>= \case
532 |       True -> pure ()
533 |       False -> do
534 |         w <- anyUnsafe
535 |         if f w
536 |           then goSkipWhile
537 |           else unconsume 1
538 | 
539 | {- | The parser @satisfy p@ succeeds for any byte for which the
540 |   predicate @p@ returns 'True'. Returns the byte that is
541 |   actually parsed.
542 | -}
543 | satisfy :: e -> (Word8 -> Bool) -> Parser e s Word8
544 | satisfy e p = satisfyWith e id p
545 | {-# INLINE satisfy #-}
546 | 
547 | {- | The parser @satisfyWith f p@ transforms a byte, and succeeds
548 |   if the predicate @p@ returns 'True' on the transformed value.
549 |   The parser returns the transformed byte that was parsed.
550 | -}
551 | satisfyWith :: e -> (Word8 -> a) -> (a -> Bool) -> Parser e s a
552 | {-# INLINE satisfyWith #-}
553 | satisfyWith e f p = uneffectful $ \chunk ->
554 |   if length chunk > 0
555 |     then case B.unsafeIndex chunk 0 of
556 |       w ->
557 |         let v = f w
558 |          in if p v
559 |               then Internal.Success v (offset chunk + 1) (length chunk - 1)
560 |               else Internal.Failure e
561 |     else Internal.Failure e
562 | 
563 | -- | Fails if there is still more input remaining.
564 | endOfInput :: e -> Parser e s ()
565 | {-# INLINE endOfInput #-}
566 | endOfInput e = uneffectful $ \chunk ->
567 |   if length chunk == 0
568 |     then Internal.Success () (offset chunk) 0
569 |     else Internal.Failure e
570 | 
571 | {- | Returns true if there are no more bytes in the input. Returns
572 | false otherwise. Always succeeds.
573 | -}
574 | isEndOfInput :: Parser e s Bool
575 | {-# INLINE isEndOfInput #-}
576 | isEndOfInput = uneffectful $ \chunk ->
577 |   Internal.Success (length chunk == 0) (offset chunk) (length chunk)
578 | 
579 | boxPublicResult :: Result# e a -> Result e a
580 | {-# INLINE boxPublicResult #-}
581 | boxPublicResult (# | (# a, b, c #) #) = Success (Slice (I# b) (I# c) a)
582 | boxPublicResult (# e | #) = Failure e
583 | 
584 | {- FOURMOLU_DISABLE -}
585 | -- | Convert a 'Word32' parser to a 'Word#' parser.
586 | unboxWord32 :: Parser e s Word32 -> Parser e s Word#
587 | {-# inline unboxWord32 #-}
588 | unboxWord32 (Parser f) = Parser
589 |   (\x s0 -> case f x s0 of
590 |     (# s1, r #) -> case r of
591 |       (# e | #) -> (# s1, (# e | #) #)
592 |       (# | (# W32# a, b, c #) #) -> (# s1, (# | (#
593 |         Exts.word32ToWord#
594 |         a, b, c #) #) #)
595 |   )
596 | {- FOURMOLU_ENABLE -}
597 | 
598 | -- | Convert a @(Int,Int)@ parser to a @(# Int#, Int# #)@ parser.
599 | unboxIntPair :: Parser e s (Int, Int) -> Parser e s (# Int#, Int# #)
600 | {-# INLINE unboxIntPair #-}
601 | unboxIntPair (Parser f) =
602 |   Parser
603 |     ( \x s0 -> case f x s0 of
604 |         (# s1, r #) -> case r of
605 |           (# e | #) -> (# s1, (# e | #) #)
606 |           (# | (# (I# y, I# z), b, c #) #) -> (# s1, (# | (# (# y, z #), b, c #) #) #)
607 |     )
608 | 
609 | {- FOURMOLU_DISABLE -}
610 | -- | Convert a 'Word#' parser to a 'Word32' parser. Precondition:
611 | -- the argument parser only returns words less than 4294967296.
612 | boxWord32 :: Parser e s Word# -> Parser e s Word32
613 | {-# inline boxWord32 #-}
614 | boxWord32 (Parser f) = Parser
615 |   (\x s0 -> case f x s0 of
616 |     (# s1, r #) -> case r of
617 |       (# e | #) -> (# s1, (# e | #) #)
618 |       (# | (# a, b, c #) #) -> (# s1, (# | (# W32# (
619 |         Exts.wordToWord32#
620 |         a), b, c #) #) #)
621 |   )
622 | {- FOURMOLU_ENABLE -}
623 | 
624 | -- | Convert a @(# Int#, Int# #)@ parser to a @(Int,Int)@ parser.
625 | boxInt :: Parser e s Int# -> Parser e s Int
626 | {-# INLINE boxInt #-}
627 | boxInt (Parser f) =
628 |   Parser
629 |     ( \x s0 -> case f x s0 of
630 |         (# s1, r #) -> case r of
631 |           (# e | #) -> (# s1, (# e | #) #)
632 |           (# | (# y, b, c #) #) -> (# s1, (# | (# I# y, b, c #) #) #)
633 |     )
634 | 
635 | -- | Convert a @(# Int#, Int# #)@ parser to a @(Int,Int)@ parser.
636 | boxBool :: Parser e s Int# -> Parser e s Bool
637 | {-# INLINE boxBool #-}
638 | boxBool (Parser f) =
639 |   Parser
640 |     ( \x s0 -> case f x s0 of
641 |         (# s1, r #) -> case r of
642 |           (# e | #) -> (# s1, (# e | #) #)
643 |           (# | (# y, b, c #) #) -> (# s1, (# | (# case y of 1# -> True; _ -> False, b, c #) #) #)
644 |     )
645 | 
646 | -- | Convert a @(# Int#, Int# #)@ parser to a @(Int,Int)@ parser.
647 | boxIntPair :: Parser e s (# Int#, Int# #) -> Parser e s (Int, Int)
648 | {-# INLINE boxIntPair #-}
649 | boxIntPair (Parser f) =
650 |   Parser
651 |     ( \x s0 -> case f x s0 of
652 |         (# s1, r #) -> case r of
653 |           (# e | #) -> (# s1, (# e | #) #)
654 |           (# | (# (# y, z #), b, c #) #) -> (# s1, (# | (# (I# y, I# z), b, c #) #) #)
655 |     )
656 | 
657 | {- | There is a law-abiding instance of 'Alternative' for 'Parser'.
658 | However, it is not terribly useful since error messages seldom
659 | have a 'Monoid' instance. This function is a variant of @\<|\>@
660 | that is right-biased in its treatment of error messages.
661 | Consequently, @orElse@ lacks an identity.
662 | See <https://github.com/bos/attoparsec/issues/122 attoparsec issue #122>
663 | for more discussion of this topic.
664 | -}
665 | infixl 3 `orElse`
666 | 
667 | orElse :: Parser x s a -> Parser e s a -> Parser e s a
668 | {-# INLINE orElse #-}
669 | orElse (Parser f) (Parser g) =
670 |   Parser
671 |     ( \x s0 -> case f x s0 of
672 |         (# s1, r0 #) -> case r0 of
673 |           (# _ | #) -> g x s1
674 |           (# | r #) -> (# s1, (# | r #) #)
675 |     )
676 | 
677 | -- | Effectfully adjusts the error message if an error occurs.
678 | mapErrorEffectfully :: (e1 -> ST s e2) -> Parser e1 s a -> Parser e2 s a
679 | {-# INLINE mapErrorEffectfully #-}
680 | mapErrorEffectfully f (Parser g) =
681 |   Parser
682 |     ( \x s0 -> case g x s0 of
683 |         (# s1, r0 #) -> case r0 of
684 |           (# e | #) -> case f e of
685 |             ST h -> case h s1 of
686 |               (# s2, e' #) -> (# s2, (# e' | #) #)
687 |           (# | r #) -> (# s1, (# | r #) #)
688 |     )
689 | 
690 | bindFromCharToLifted :: Parser s e Char# -> (Char# -> Parser s e a) -> Parser s e a
691 | {-# INLINE bindFromCharToLifted #-}
692 | bindFromCharToLifted (Parser f) g =
693 |   Parser
694 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
695 |         (# s1, r0 #) -> case r0 of
696 |           (# e | #) -> (# s1, (# e | #) #)
697 |           (# | (# y, b, c #) #) ->
698 |             runParser (g y) (# arr, b, c #) s1
699 |     )
700 | 
701 | bindFromWordToByteArrayIntInt :: Parser s e Word# -> (Word# -> Parser s e (# ByteArray#, Int#, Int# #)) -> Parser s e (# ByteArray#, Int#, Int# #)
702 | {-# INLINE bindFromWordToByteArrayIntInt #-}
703 | bindFromWordToByteArrayIntInt (Parser f) g =
704 |   Parser
705 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
706 |         (# s1, r0 #) -> case r0 of
707 |           (# e | #) -> (# s1, (# e | #) #)
708 |           (# | (# y, b, c #) #) ->
709 |             runParser (g y) (# arr, b, c #) s1
710 |     )
711 | 
712 | bindFromCharToByteArrayIntInt :: Parser s e Char# -> (Char# -> Parser s e (# ByteArray#, Int#, Int# #)) -> Parser s e (# ByteArray#, Int#, Int# #)
713 | {-# INLINE bindFromCharToByteArrayIntInt #-}
714 | bindFromCharToByteArrayIntInt (Parser f) g =
715 |   Parser
716 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
717 |         (# s1, r0 #) -> case r0 of
718 |           (# e | #) -> (# s1, (# e | #) #)
719 |           (# | (# y, b, c #) #) ->
720 |             runParser (g y) (# arr, b, c #) s1
721 |     )
722 | 
723 | bindFromCharToIntPair :: Parser s e Char# -> (Char# -> Parser s e (# Int#, Int# #)) -> Parser s e (# Int#, Int# #)
724 | {-# INLINE bindFromCharToIntPair #-}
725 | bindFromCharToIntPair (Parser f) g =
726 |   Parser
727 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
728 |         (# s1, r0 #) -> case r0 of
729 |           (# e | #) -> (# s1, (# e | #) #)
730 |           (# | (# y, b, c #) #) ->
731 |             runParser (g y) (# arr, b, c #) s1
732 |     )
733 | 
734 | bindFromLiftedToInt :: Parser s e a -> (a -> Parser s e Int#) -> Parser s e Int#
735 | {-# INLINE bindFromLiftedToInt #-}
736 | bindFromLiftedToInt (Parser f) g =
737 |   Parser
738 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
739 |         (# s1, r0 #) -> case r0 of
740 |           (# e | #) -> (# s1, (# e | #) #)
741 |           (# | (# y, b, c #) #) ->
742 |             runParser (g y) (# arr, b, c #) s1
743 |     )
744 | 
745 | bindFromByteArrayIntIntToLifted ::
746 |      Parser s e (# ByteArray#, Int#, Int# #)
747 |   -> ((# ByteArray#, Int#, Int# #) -> Parser s e a)
748 |   -> Parser s e a
749 | {-# INLINE bindFromByteArrayIntIntToLifted #-}
750 | bindFromByteArrayIntIntToLifted (Parser f) g =
751 |   Parser
752 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
753 |         (# s1, r0 #) -> case r0 of
754 |           (# e | #) -> (# s1, (# e | #) #)
755 |           (# | (# y, b, c #) #) ->
756 |             runParser (g y) (# arr, b, c #) s1
757 |     )
758 | 
759 | bindFromLiftedToByteArrayIntInt ::
760 |      Parser s e a
761 |   -> (a -> Parser s e (# ByteArray#, Int#, Int# #))
762 |   -> Parser s e (# ByteArray#, Int#, Int# #)
763 | {-# INLINE bindFromLiftedToByteArrayIntInt #-}
764 | bindFromLiftedToByteArrayIntInt (Parser f) g =
765 |   Parser
766 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
767 |         (# s1, r0 #) -> case r0 of
768 |           (# e | #) -> (# s1, (# e | #) #)
769 |           (# | (# y, b, c #) #) ->
770 |             runParser (g y) (# arr, b, c #) s1
771 |     )
772 | 
773 | bindFromLiftedToIntPair :: Parser s e a -> (a -> Parser s e (# Int#, Int# #)) -> Parser s e (# Int#, Int# #)
774 | {-# INLINE bindFromLiftedToIntPair #-}
775 | bindFromLiftedToIntPair (Parser f) g =
776 |   Parser
777 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
778 |         (# s1, r0 #) -> case r0 of
779 |           (# e | #) -> (# s1, (# e | #) #)
780 |           (# | (# y, b, c #) #) ->
781 |             runParser (g y) (# arr, b, c #) s1
782 |     )
783 | 
784 | bindFromIntToIntPair :: Parser s e Int# -> (Int# -> Parser s e (# Int#, Int# #)) -> Parser s e (# Int#, Int# #)
785 | {-# INLINE bindFromIntToIntPair #-}
786 | bindFromIntToIntPair (Parser f) g =
787 |   Parser
788 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
789 |         (# s1, r0 #) -> case r0 of
790 |           (# e | #) -> (# s1, (# e | #) #)
791 |           (# | (# y, b, c #) #) ->
792 |             runParser (g y) (# arr, b, c #) s1
793 |     )
794 | 
795 | bindFromMaybeCharToIntPair ::
796 |   Parser s e (# (# #) | Char# #) ->
797 |   ((# (# #) | Char# #) -> Parser s e (# Int#, Int# #)) ->
798 |   Parser s e (# Int#, Int# #)
799 | {-# INLINE bindFromMaybeCharToIntPair #-}
800 | bindFromMaybeCharToIntPair (Parser f) g =
801 |   Parser
802 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
803 |         (# s1, r0 #) -> case r0 of
804 |           (# e | #) -> (# s1, (# e | #) #)
805 |           (# | (# y, b, c #) #) ->
806 |             runParser (g y) (# arr, b, c #) s1
807 |     )
808 | 
809 | bindFromMaybeCharToLifted ::
810 |   Parser s e (# (# #) | Char# #) ->
811 |   ((# (# #) | Char# #) -> Parser s e a) ->
812 |   Parser s e a
813 | {-# INLINE bindFromMaybeCharToLifted #-}
814 | bindFromMaybeCharToLifted (Parser f) g =
815 |   Parser
816 |     ( \x@(# arr, _, _ #) s0 -> case f x s0 of
817 |         (# s1, r0 #) -> case r0 of
818 |           (# e | #) -> (# s1, (# e | #) #)
819 |           (# | (# y, b, c #) #) ->
820 |             runParser (g y) (# arr, b, c #) s1
821 |     )
822 | 
823 | pureIntPair ::
824 |   (# Int#, Int# #) ->
825 |   Parser s e (# Int#, Int# #)
826 | {-# INLINE pureIntPair #-}
827 | pureIntPair a =
828 |   Parser
829 |     (\(# _, b, c #) s -> (# s, (# | (# a, b, c #) #) #))
830 | 
831 | pureByteArrayIntInt ::
832 |   (# ByteArray#, Int#, Int# #) ->
833 |   Parser s e (# ByteArray#, Int#, Int# #)
834 | {-# INLINE pureByteArrayIntInt #-}
835 | pureByteArrayIntInt a =
836 |   Parser
837 |     (\(# _, b, c #) s -> (# s, (# | (# a, b, c #) #) #))
838 | 
839 | failIntPair :: e -> Parser e s (# Int#, Int# #)
840 | {-# INLINE failIntPair #-}
841 | failIntPair e =
842 |   Parser
843 |     (\(# _, _, _ #) s -> (# s, (# e | #) #))
844 | 
845 | {- | Augment a parser with the number of bytes that were consume while
846 | it executed.
847 | -}
848 | measure :: Parser e s a -> Parser e s (Int, a)
849 | {-# INLINE measure #-}
850 | measure (Parser f) =
851 |   Parser
852 |     ( \x@(# _, pre, _ #) s0 -> case f x s0 of
853 |         (# s1, r #) -> case r of
854 |           (# e | #) -> (# s1, (# e | #) #)
855 |           (# | (# y, post, c #) #) -> (# s1, (# | (# (I# (post -# pre), y), post, c #) #) #)
856 |     )
857 | 
858 | {- | Run a parser and discard the result, returning instead the number
859 | of bytes that the parser consumed.
860 | -}
861 | measure_ :: Parser e s a -> Parser e s Int
862 | {-# INLINE measure_ #-}
863 | measure_ p = boxInt (measure_# p)
864 | 
865 | -- | Variant of 'measure_' with an unboxed result.
866 | measure_# :: Parser e s a -> Parser e s Int#
867 | {-# INLINE measure_# #-}
868 | measure_# (Parser f) =
869 |   Parser
870 |     ( \x@(# _, pre, _ #) s0 -> case f x s0 of
871 |         (# s1, r #) -> case r of
872 |           (# e | #) -> (# s1, (# e | #) #)
873 |           (# | (# _, post, c #) #) -> (# s1, (# | (# post -# pre, post, c #) #) #)
874 |     )
875 | 
876 | {- | Run a parser in a delimited context, failing if the requested number
877 | of bytes are not available or if the delimited parser does not
878 | consume all input. This combinator can be understood as a composition
879 | of 'take', 'effect', 'parseBytesEffectfully', and 'endOfInput'. It is
880 | provided as a single combinator because for convenience and because it is
881 | easy to make mistakes when manually assembling the aforementioned parsers.
882 | The pattern of prefixing an encoding with its length is common.
883 | This is discussed more in
884 | <https://github.com/bos/attoparsec/issues/129 attoparsec issue #129>.
885 | 
886 | > delimit e1 e2 n remaining === take e1 n
887 | -}
888 | delimit ::
889 |   -- | Error message when not enough bytes are present
890 |   e ->
891 |   -- | Error message when delimited parser does not consume all input
892 |   e ->
893 |   -- | Exact number of bytes delimited parser is expected to consume
894 |   Int ->
895 |   -- | Parser to execute in delimited context
896 |   Parser e s a ->
897 |   Parser e s a
898 | {-# INLINE delimit #-}
899 | delimit esz eleftovers (I# n) (Parser f) =
900 |   Parser
901 |     ( \(# arr, off, len #) s0 -> case len >=# n of
902 |         1# -> case f (# arr, off, n #) s0 of
903 |           (# s1, r #) -> case r of
904 |             (# e | #) -> (# s1, (# e | #) #)
905 |             (# | (# a, newOff, leftovers #) #) -> case leftovers of
906 |               0# -> (# s1, (# | (# a, newOff, len -# n #) #) #)
907 |               _ -> (# s1, (# eleftovers | #) #)
908 |         _ -> (# s0, (# esz | #) #)
909 |     )
910 | 
911 | {- | Apply the parser repeatedly until there is no more input left
912 | to consume. Collects the results into a list.
913 | -}
914 | listUntilEoi ::
915 |      Parser e s a -- ^ Parser to repeatedly apply until input is exhausted
916 |   -> Parser e s [a]
917 | listUntilEoi p = go []
918 |   where
919 |   go !acc = isEndOfInput >>= \case
920 |     True -> pure $! List.reverse acc
921 |     False -> do
922 |       a <- p
923 |       go (a : acc)
924 | 
925 | {- | Replicate a parser @n@ times, writing the results into
926 | an array of length @n@. For @Array@ and @SmallArray@, this
927 | is lazy in the elements, so be sure the they result of the
928 | parser is evaluated appropriately to avoid unwanted thunks.
929 | -}
930 | replicate ::
931 |   forall arr e s a.
932 |   (Contiguous arr, Element arr a) =>
933 |   -- | Number of times to run the parser
934 |   Int ->
935 |   -- | Parser
936 |   Parser e s a ->
937 |   Parser e s (arr a)
938 | {-# INLINE replicate #-}
939 | replicate !len p = do
940 |   marr <- effect (C.new len)
941 |   let go :: Int -> Parser e s (arr a)
942 |       go !ix =
943 |         if ix < len
944 |           then do
945 |             a <- p
946 |             effect (C.write marr ix a)
947 |             go (ix + 1)
948 |           else effect (C.unsafeFreeze marr)
949 |   go 0
950 | 
951 | unI :: Int -> Int#
952 | {-# INLINE unI #-}
953 | unI (I# w) = w
954 | 


--------------------------------------------------------------------------------
/src/Data/Bytes/Parser/Latin.hs:
--------------------------------------------------------------------------------
   1 | {-# LANGUAGE BangPatterns #-}
   2 | {-# LANGUAGE BinaryLiterals #-}
   3 | {-# LANGUAGE CPP #-}
   4 | {-# LANGUAGE DataKinds #-}
   5 | {-# LANGUAGE DeriveFunctor #-}
   6 | {-# LANGUAGE DerivingStrategies #-}
   7 | {-# LANGUAGE GADTSyntax #-}
   8 | {-# LANGUAGE KindSignatures #-}
   9 | {-# LANGUAGE LambdaCase #-}
  10 | {-# LANGUAGE MagicHash #-}
  11 | {-# LANGUAGE MultiWayIf #-}
  12 | {-# LANGUAGE PolyKinds #-}
  13 | {-# LANGUAGE RankNTypes #-}
  14 | {-# LANGUAGE ScopedTypeVariables #-}
  15 | {-# LANGUAGE StandaloneDeriving #-}
  16 | {-# LANGUAGE TypeApplications #-}
  17 | {-# LANGUAGE UnboxedSums #-}
  18 | {-# LANGUAGE UnboxedTuples #-}
  19 | 
  20 | {- | Parse input as though it were text encoded by
  21 | ISO 8859-1 (Latin-1). All byte sequences are valid
  22 | text under ISO 8859-1.
  23 | -}
  24 | module Data.Bytes.Parser.Latin
  25 |   ( -- * Matching
  26 | 
  27 |     -- ** Required
  28 |     char
  29 |   , char2
  30 |   , char3
  31 |   , char4
  32 |   , char5
  33 |   , char6
  34 |   , char7
  35 |   , char8
  36 |   , char9
  37 |   , char10
  38 |   , char11
  39 |   , char12
  40 | 
  41 |     -- ** Try
  42 |   , trySatisfy
  43 |   , trySatisfyThen
  44 | 
  45 |     -- * One Character
  46 |   , any
  47 |   , opt
  48 |   , opt#
  49 | 
  50 |     -- * Many Characters
  51 |   , takeTrailedBy
  52 | 
  53 |     -- * Lookahead
  54 |   , peek
  55 |   , peek'
  56 | 
  57 |     -- * Skip
  58 |   , skipDigits
  59 |   , skipDigits1
  60 |   , skipChar
  61 |   , skipChar1
  62 |   , skipTrailedBy
  63 |   , skipUntil
  64 |   , skipWhile
  65 | 
  66 |     -- * End of Input
  67 |   , endOfInput
  68 |   , isEndOfInput
  69 | 
  70 |     -- * Numbers
  71 | 
  72 |     -- ** Decimal
  73 | 
  74 |     -- *** Unsigned
  75 |   , decWord
  76 |   , decWord8
  77 |   , decWord16
  78 |   , decWord32
  79 |   , decWord64
  80 | 
  81 |     -- *** Signed
  82 |   , decUnsignedInt
  83 |   , decUnsignedInt#
  84 |   , decSignedInt
  85 |   , decStandardInt
  86 |   , decTrailingInt
  87 |   , decTrailingInt#
  88 |   , decSignedInteger
  89 |   , decUnsignedInteger
  90 |   , decTrailingInteger
  91 | 
  92 |     -- ** Hexadecimal
  93 | 
  94 |     -- *** Variable Length
  95 |   , hexWord8
  96 |   , hexWord16
  97 |   , hexWord32
  98 | 
  99 |     -- *** Fixed Length
 100 |   , hexFixedWord8
 101 |   , hexFixedWord16
 102 |   , hexFixedWord16#
 103 |   , hexFixedWord32
 104 |   , hexFixedWord64
 105 |   , hexFixedWord128
 106 |   , hexFixedWord256
 107 | 
 108 |     -- *** Digit
 109 |   , hexNibbleLower
 110 |   , tryHexNibbleLower
 111 |   , hexNibble
 112 |   , tryHexNibble
 113 |   ) where
 114 | 
 115 | import Prelude hiding (any, fail, length, takeWhile)
 116 | 
 117 | import Data.Bits ((.|.))
 118 | import Data.Bytes.Parser (bindFromLiftedToInt, endOfInput, isEndOfInput)
 119 | import Data.Bytes.Parser.Internal (InternalStep (..), Parser (..), Result (..), Result#, ST#, boxBytes, indexLatinCharArray, uneffectful, uneffectful#, unfailing, upcastUnitSuccess)
 120 | import Data.Bytes.Parser.Unsafe (cursor, expose, unconsume)
 121 | import Data.Bytes.Types (Bytes (..))
 122 | import Data.Char (ord)
 123 | import Data.Kind (Type)
 124 | import Data.WideWord (Word128 (Word128), Word256 (Word256))
 125 | import Data.Word (Word8)
 126 | import GHC.Exts (Char (C#), Char#, Int (I#), Int#, RuntimeRep, TYPE, Word#, gtWord#, indexCharArray#, int2Word#, ltWord#, notI#, or#, (+#), (-#))
 127 | import GHC.Word (Word (W#), Word16 (W16#), Word32 (W32#), Word64 (W64#), Word8 (W8#))
 128 | 
 129 | import qualified Data.Bytes as Bytes
 130 | import qualified Data.Primitive as PM
 131 | import qualified GHC.Exts as Exts
 132 | 
 133 | {- | Runs the predicate on the next character in the input. If the
 134 | predicate is matched, this consumes the character. Otherwise,
 135 | the character is not consumed. This returns @False@ if the end
 136 | of the input has been reached. This never fails.
 137 | -}
 138 | trySatisfy :: (Char -> Bool) -> Parser e s Bool
 139 | trySatisfy f = uneffectful $ \chunk -> case length chunk of
 140 |   0 -> Success False (offset chunk) (length chunk)
 141 |   _ -> case f (indexLatinCharArray (array chunk) (offset chunk)) of
 142 |     True -> Success True (offset chunk + 1) (length chunk - 1)
 143 |     False -> Success False (offset chunk) (length chunk)
 144 | 
 145 | {- | Runs the function on the next character in the input. If the
 146 | function returns @Just@, this consumes the character and then
 147 | runs the parser on the remaining input. If the function returns
 148 | @Nothing@, this does not consume the tested character, and it
 149 | runs the default parser on the input (which includes the tested
 150 | character). If there is no input remaining, this also runs the
 151 | default parser. This combinator never fails.
 152 | -}
 153 | trySatisfyThen ::
 154 |   forall (r :: RuntimeRep) (e :: Type) (s :: Type) (a :: TYPE r).
 155 |   -- | Default parser. Runs on @Nothing@ or end of input.
 156 |   Parser e s a ->
 157 |   -- | Parser-selecting predicate
 158 |   (Char -> Maybe (Parser e s a)) ->
 159 |   Parser e s a
 160 | {-# INLINE trySatisfyThen #-}
 161 | trySatisfyThen (Parser g) f =
 162 |   Parser
 163 |     ( \input@(# arr, off0, len0 #) s0 -> case len0 of
 164 |         0# -> g input s0
 165 |         _ -> case f (C# (indexCharArray# arr off0)) of
 166 |           Nothing -> g input s0
 167 |           Just (Parser p) -> p (# arr, off0 +# 1#, len0 -# 1# #) s0
 168 |     )
 169 | 
 170 | {- | Consume the next character, failing if it does not
 171 | match the expected value or if there is no more input.
 172 | -}
 173 | char :: e -> Char -> Parser e s ()
 174 | {-# INLINE char #-}
 175 | char e !c = uneffectful $ \chunk ->
 176 |   if length chunk > 0
 177 |     then
 178 |       if indexLatinCharArray (array chunk) (offset chunk) == c
 179 |         then Success () (offset chunk + 1) (length chunk - 1)
 180 |         else Failure e
 181 |     else Failure e
 182 | 
 183 | {- | Consume the next two characters, failing if they do
 184 | not match the expected values.
 185 | 
 186 | > char2 e a b === char e a *> char e b
 187 | -}
 188 | char2 :: e -> Char -> Char -> Parser e s ()
 189 | {-# INLINE char2 #-}
 190 | char2 e !c0 !c1 = uneffectful $ \chunk ->
 191 |   if
 192 |     | length chunk > 1
 193 |     , indexLatinCharArray (array chunk) (offset chunk) == c0
 194 |     , indexLatinCharArray (array chunk) (offset chunk + 1) == c1 ->
 195 |         Success () (offset chunk + 2) (length chunk - 2)
 196 |     | otherwise -> Failure e
 197 | 
 198 | {- | Consume three characters, failing if they do
 199 | not match the expected values.
 200 | 
 201 | > char3 e a b c === char e a *> char e b *> char e c
 202 | -}
 203 | char3 :: e -> Char -> Char -> Char -> Parser e s ()
 204 | {-# INLINE char3 #-}
 205 | char3 e !c0 !c1 !c2 = uneffectful $ \chunk ->
 206 |   if
 207 |     | length chunk > 2
 208 |     , indexLatinCharArray (array chunk) (offset chunk) == c0
 209 |     , indexLatinCharArray (array chunk) (offset chunk + 1) == c1
 210 |     , indexLatinCharArray (array chunk) (offset chunk + 2) == c2 ->
 211 |         Success () (offset chunk + 3) (length chunk - 3)
 212 |     | otherwise -> Failure e
 213 | 
 214 | {- | Consume four characters, failing if they do
 215 | not match the expected values.
 216 | 
 217 | > char4 e a b c d === char e a *> char e b *> char e c *> char e d
 218 | -}
 219 | char4 :: e -> Char -> Char -> Char -> Char -> Parser e s ()
 220 | {-# INLINE char4 #-}
 221 | char4 e !c0 !c1 !c2 !c3 = uneffectful $ \chunk ->
 222 |   if
 223 |     | length chunk > 3
 224 |     , indexLatinCharArray (array chunk) (offset chunk) == c0
 225 |     , indexLatinCharArray (array chunk) (offset chunk + 1) == c1
 226 |     , indexLatinCharArray (array chunk) (offset chunk + 2) == c2
 227 |     , indexLatinCharArray (array chunk) (offset chunk + 3) == c3 ->
 228 |         Success () (offset chunk + 4) (length chunk - 4)
 229 |     | otherwise -> Failure e
 230 | 
 231 | {- | Consume five characters, failing if they do
 232 | not match the expected values.
 233 | -}
 234 | char5 :: e -> Char -> Char -> Char -> Char -> Char -> Parser e s ()
 235 | {-# INLINE char5 #-}
 236 | char5 e !c0 !c1 !c2 !c3 !c4 = uneffectful $ \chunk ->
 237 |   if
 238 |     | length chunk > 4
 239 |     , indexLatinCharArray (array chunk) (offset chunk) == c0
 240 |     , indexLatinCharArray (array chunk) (offset chunk + 1) == c1
 241 |     , indexLatinCharArray (array chunk) (offset chunk + 2) == c2
 242 |     , indexLatinCharArray (array chunk) (offset chunk + 3) == c3
 243 |     , indexLatinCharArray (array chunk) (offset chunk + 4) == c4 ->
 244 |         Success () (offset chunk + 5) (length chunk - 5)
 245 |     | otherwise -> Failure e
 246 | 
 247 | {- | Consume six characters, failing if they do
 248 | not match the expected values.
 249 | -}
 250 | char6 :: e -> Char -> Char -> Char -> Char -> Char -> Char -> Parser e s ()
 251 | {-# INLINE char6 #-}
 252 | char6 e !c0 !c1 !c2 !c3 !c4 !c5 = uneffectful $ \chunk ->
 253 |   if
 254 |     | length chunk > 5
 255 |     , indexLatinCharArray (array chunk) (offset chunk) == c0
 256 |     , indexLatinCharArray (array chunk) (offset chunk + 1) == c1
 257 |     , indexLatinCharArray (array chunk) (offset chunk + 2) == c2
 258 |     , indexLatinCharArray (array chunk) (offset chunk + 3) == c3
 259 |     , indexLatinCharArray (array chunk) (offset chunk + 4) == c4
 260 |     , indexLatinCharArray (array chunk) (offset chunk + 5) == c5 ->
 261 |         Success () (offset chunk + 6) (length chunk - 6)
 262 |     | otherwise -> Failure e
 263 | 
 264 | {- | Consume seven characters, failing if they do
 265 | not match the expected values.
 266 | -}
 267 | char7 :: e -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Parser e s ()
 268 | {-# INLINE char7 #-}
 269 | char7 e !c0 !c1 !c2 !c3 !c4 !c5 !c6 = uneffectful $ \chunk ->
 270 |   if
 271 |     | length chunk > 6
 272 |     , indexLatinCharArray (array chunk) (offset chunk) == c0
 273 |     , indexLatinCharArray (array chunk) (offset chunk + 1) == c1
 274 |     , indexLatinCharArray (array chunk) (offset chunk + 2) == c2
 275 |     , indexLatinCharArray (array chunk) (offset chunk + 3) == c3
 276 |     , indexLatinCharArray (array chunk) (offset chunk + 4) == c4
 277 |     , indexLatinCharArray (array chunk) (offset chunk + 5) == c5
 278 |     , indexLatinCharArray (array chunk) (offset chunk + 6) == c6 ->
 279 |         Success () (offset chunk + 7) (length chunk - 7)
 280 |     | otherwise -> Failure e
 281 | 
 282 | {- | Consume eight characters, failing if they do
 283 | not match the expected values.
 284 | -}
 285 | char8 :: e -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Parser e s ()
 286 | {-# INLINE char8 #-}
 287 | char8 e !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 = uneffectful $ \chunk ->
 288 |   if
 289 |     | length chunk > 7
 290 |     , indexLatinCharArray (array chunk) (offset chunk) == c0
 291 |     , indexLatinCharArray (array chunk) (offset chunk + 1) == c1
 292 |     , indexLatinCharArray (array chunk) (offset chunk + 2) == c2
 293 |     , indexLatinCharArray (array chunk) (offset chunk + 3) == c3
 294 |     , indexLatinCharArray (array chunk) (offset chunk + 4) == c4
 295 |     , indexLatinCharArray (array chunk) (offset chunk + 5) == c5
 296 |     , indexLatinCharArray (array chunk) (offset chunk + 6) == c6
 297 |     , indexLatinCharArray (array chunk) (offset chunk + 7) == c7 ->
 298 |         Success () (offset chunk + 8) (length chunk - 8)
 299 |     | otherwise -> Failure e
 300 | 
 301 | {- | Consume nine characters, failing if they do
 302 | not match the expected values.
 303 | -}
 304 | char9 ::
 305 |   e ->
 306 |   Char ->
 307 |   Char ->
 308 |   Char ->
 309 |   Char ->
 310 |   Char ->
 311 |   Char ->
 312 |   Char ->
 313 |   Char ->
 314 |   Char ->
 315 |   Parser e s ()
 316 | {-# INLINE char9 #-}
 317 | char9 e !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 = uneffectful $ \chunk ->
 318 |   if
 319 |     | length chunk > 8
 320 |     , indexLatinCharArray (array chunk) (offset chunk) == c0
 321 |     , indexLatinCharArray (array chunk) (offset chunk + 1) == c1
 322 |     , indexLatinCharArray (array chunk) (offset chunk + 2) == c2
 323 |     , indexLatinCharArray (array chunk) (offset chunk + 3) == c3
 324 |     , indexLatinCharArray (array chunk) (offset chunk + 4) == c4
 325 |     , indexLatinCharArray (array chunk) (offset chunk + 5) == c5
 326 |     , indexLatinCharArray (array chunk) (offset chunk + 6) == c6
 327 |     , indexLatinCharArray (array chunk) (offset chunk + 7) == c7
 328 |     , indexLatinCharArray (array chunk) (offset chunk + 8) == c8 ->
 329 |         Success () (offset chunk + 9) (length chunk - 9)
 330 |     | otherwise -> Failure e
 331 | 
 332 | {- | Consume ten characters, failing if they do
 333 | not match the expected values.
 334 | -}
 335 | char10 ::
 336 |   e ->
 337 |   Char ->
 338 |   Char ->
 339 |   Char ->
 340 |   Char ->
 341 |   Char ->
 342 |   Char ->
 343 |   Char ->
 344 |   Char ->
 345 |   Char ->
 346 |   Char ->
 347 |   Parser e s ()
 348 | {-# INLINE char10 #-}
 349 | char10 e !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 !c9 = uneffectful $ \chunk ->
 350 |   if
 351 |     | length chunk > 9
 352 |     , indexLatinCharArray (array chunk) (offset chunk) == c0
 353 |     , indexLatinCharArray (array chunk) (offset chunk + 1) == c1
 354 |     , indexLatinCharArray (array chunk) (offset chunk + 2) == c2
 355 |     , indexLatinCharArray (array chunk) (offset chunk + 3) == c3
 356 |     , indexLatinCharArray (array chunk) (offset chunk + 4) == c4
 357 |     , indexLatinCharArray (array chunk) (offset chunk + 5) == c5
 358 |     , indexLatinCharArray (array chunk) (offset chunk + 6) == c6
 359 |     , indexLatinCharArray (array chunk) (offset chunk + 7) == c7
 360 |     , indexLatinCharArray (array chunk) (offset chunk + 8) == c8
 361 |     , indexLatinCharArray (array chunk) (offset chunk + 9) == c9 ->
 362 |         Success () (offset chunk + 10) (length chunk - 10)
 363 |     | otherwise -> Failure e
 364 | 
 365 | {- | Consume eleven characters, failing if they do
 366 | not match the expected values.
 367 | -}
 368 | char11 ::
 369 |   e ->
 370 |   Char ->
 371 |   Char ->
 372 |   Char ->
 373 |   Char ->
 374 |   Char ->
 375 |   Char ->
 376 |   Char ->
 377 |   Char ->
 378 |   Char ->
 379 |   Char ->
 380 |   Char ->
 381 |   Parser e s ()
 382 | {-# INLINE char11 #-}
 383 | char11 e !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 !c9 !c10 = uneffectful $ \chunk ->
 384 |   if
 385 |     | length chunk > 10
 386 |     , indexLatinCharArray (array chunk) (offset chunk) == c0
 387 |     , indexLatinCharArray (array chunk) (offset chunk + 1) == c1
 388 |     , indexLatinCharArray (array chunk) (offset chunk + 2) == c2
 389 |     , indexLatinCharArray (array chunk) (offset chunk + 3) == c3
 390 |     , indexLatinCharArray (array chunk) (offset chunk + 4) == c4
 391 |     , indexLatinCharArray (array chunk) (offset chunk + 5) == c5
 392 |     , indexLatinCharArray (array chunk) (offset chunk + 6) == c6
 393 |     , indexLatinCharArray (array chunk) (offset chunk + 7) == c7
 394 |     , indexLatinCharArray (array chunk) (offset chunk + 8) == c8
 395 |     , indexLatinCharArray (array chunk) (offset chunk + 9) == c9
 396 |     , indexLatinCharArray (array chunk) (offset chunk + 10) == c10 ->
 397 |         Success () (offset chunk + 11) (length chunk - 11)
 398 |     | otherwise -> Failure e
 399 | 
 400 | {- | Consume twelve characters, failing if they do
 401 | not match the expected values.
 402 | -}
 403 | char12 ::
 404 |   e ->
 405 |   Char ->
 406 |   Char ->
 407 |   Char ->
 408 |   Char ->
 409 |   Char ->
 410 |   Char ->
 411 |   Char ->
 412 |   Char ->
 413 |   Char ->
 414 |   Char ->
 415 |   Char ->
 416 |   Char ->
 417 |   Parser e s ()
 418 | {-# INLINE char12 #-}
 419 | char12 e !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 !c9 !c10 !c11 = uneffectful $ \chunk ->
 420 |   if
 421 |     | length chunk > 11
 422 |     , indexLatinCharArray (array chunk) (offset chunk) == c0
 423 |     , indexLatinCharArray (array chunk) (offset chunk + 1) == c1
 424 |     , indexLatinCharArray (array chunk) (offset chunk + 2) == c2
 425 |     , indexLatinCharArray (array chunk) (offset chunk + 3) == c3
 426 |     , indexLatinCharArray (array chunk) (offset chunk + 4) == c4
 427 |     , indexLatinCharArray (array chunk) (offset chunk + 5) == c5
 428 |     , indexLatinCharArray (array chunk) (offset chunk + 6) == c6
 429 |     , indexLatinCharArray (array chunk) (offset chunk + 7) == c7
 430 |     , indexLatinCharArray (array chunk) (offset chunk + 8) == c8
 431 |     , indexLatinCharArray (array chunk) (offset chunk + 9) == c9
 432 |     , indexLatinCharArray (array chunk) (offset chunk + 10) == c10
 433 |     , indexLatinCharArray (array chunk) (offset chunk + 11) == c11 ->
 434 |         Success () (offset chunk + 12) (length chunk - 12)
 435 |     | otherwise -> Failure e
 436 | 
 437 | -- | Consumes and returns the next character in the input.
 438 | any :: e -> Parser e s Char
 439 | {-# INLINE any #-}
 440 | any e = uneffectful $ \chunk ->
 441 |   if length chunk > 0
 442 |     then
 443 |       let c = indexLatinCharArray (array chunk) (offset chunk)
 444 |        in Success c (offset chunk + 1) (length chunk - 1)
 445 |     else Failure e
 446 | 
 447 | {- | Consume a character from the input or return @Nothing@ if
 448 | end of the stream has been reached. Since ISO 8859-1 maps every
 449 | bytes to a character, this parser never fails.
 450 | -}
 451 | opt :: Parser e s (Maybe Char)
 452 | {-# INLINE opt #-}
 453 | opt = uneffectful $ \chunk -> case length chunk of
 454 |   0 -> Success Nothing (offset chunk) (length chunk)
 455 |   _ ->
 456 |     Success
 457 |       (Just (indexLatinCharArray (array chunk) (offset chunk)))
 458 |       (offset chunk + 1)
 459 |       (length chunk - 1)
 460 | 
 461 | -- | Variant of @opt@ with unboxed result.
 462 | opt# :: Parser e s (# (# #) | Char# #)
 463 | {-# INLINE opt# #-}
 464 | opt# =
 465 |   Parser
 466 |     ( \(# arr, off, len #) s0 -> case len of
 467 |         0# -> (# s0, (# | (# (# (# #) | #), off, len #) #) #)
 468 |         _ -> (# s0, (# | (# (# | indexCharArray# arr off #), off +# 1#, len -# 1# #) #) #)
 469 |     )
 470 | 
 471 | skipDigitsAsciiLoop ::
 472 |   Bytes -> -- Chunk
 473 |   (# Int#, Int# #)
 474 | skipDigitsAsciiLoop !c =
 475 |   if length c > 0
 476 |     then
 477 |       let w = indexLatinCharArray (array c) (offset c)
 478 |        in if w >= '0' && w <= '9'
 479 |             then skipDigitsAsciiLoop (Bytes.unsafeDrop 1 c)
 480 |             else (# unI (offset c), unI (length c) #)
 481 |     else (# unI (offset c), unI (length c) #)
 482 | 
 483 | skipDigitsAscii1LoopStart ::
 484 |   e ->
 485 |   Bytes -> -- chunk
 486 |   Result# e ()
 487 | skipDigitsAscii1LoopStart e !c =
 488 |   if length c > 0
 489 |     then
 490 |       let w = indexLatinCharArray (array c) (offset c)
 491 |        in if w >= '0' && w <= '9'
 492 |             then upcastUnitSuccess (skipDigitsAsciiLoop (Bytes.unsafeDrop 1 c))
 493 |             else (# e | #)
 494 |     else (# e | #)
 495 | 
 496 | {- | Variant of 'skipDigits' that requires at least one digit
 497 | to be present.
 498 | -}
 499 | skipDigits1 :: e -> Parser e s ()
 500 | {-# INLINE skipDigits1 #-}
 501 | skipDigits1 e = uneffectful# $ \c ->
 502 |   skipDigitsAscii1LoopStart e c
 503 | 
 504 | {- | Skip the characters @0-9@ until a non-digit is encountered.
 505 | This parser does not fail.
 506 | -}
 507 | skipDigits :: Parser e s ()
 508 | skipDigits = uneffectful# $ \c ->
 509 |   upcastUnitSuccess (skipDigitsAsciiLoop c)
 510 | 
 511 | unI :: Int -> Int#
 512 | {-# INLINE unI #-}
 513 | unI (I# w) = w
 514 | 
 515 | {- | Skip the character any number of times. This succeeds
 516 | even if the character was not present.
 517 | -}
 518 | skipChar :: Char -> Parser e s ()
 519 | {-# INLINE skipChar #-}
 520 | skipChar !w = uneffectful# $ \c ->
 521 |   upcastUnitSuccess (skipLoop w c)
 522 | 
 523 | {- | Skip the character any number of times. It must occur
 524 | at least once or else this will fail.
 525 | -}
 526 | skipChar1 :: e -> Char -> Parser e s ()
 527 | {-# INLINE skipChar1 #-}
 528 | skipChar1 e !w = uneffectful# $ \c ->
 529 |   skipLoop1Start e w c
 530 | 
 531 | skipLoop ::
 532 |   Char -> -- byte to match
 533 |   Bytes -> -- Chunk
 534 |   (# Int#, Int# #)
 535 | skipLoop !w !c =
 536 |   if length c > 0
 537 |     then
 538 |       if indexLatinCharArray (array c) (offset c) == w
 539 |         then skipLoop w (Bytes.unsafeDrop 1 c)
 540 |         else (# unI (offset c), unI (length c) #)
 541 |     else (# unI (offset c), unI (length c) #)
 542 | 
 543 | skipLoop1Start ::
 544 |   e ->
 545 |   Char -> -- byte to match
 546 |   Bytes -> -- chunk
 547 |   Result# e ()
 548 | skipLoop1Start e !w !chunk0 =
 549 |   if length chunk0 > 0
 550 |     then
 551 |       if indexLatinCharArray (array chunk0) (offset chunk0) == w
 552 |         then upcastUnitSuccess (skipLoop w (Bytes.unsafeDrop 1 chunk0))
 553 |         else (# e | #)
 554 |     else (# e | #)
 555 | 
 556 | {- | Parse a decimal-encoded 8-bit word. If the number is larger
 557 | than 255, this parser fails.
 558 | -}
 559 | decWord8 :: e -> Parser e s Word8
 560 | decWord8 e =
 561 |   Parser
 562 |     ( \chunk0 s0 -> case decSmallWordStart e 256 (boxBytes chunk0) s0 of
 563 |         (# s1, r #) -> (# s1, upcastWord8Result r #)
 564 |     )
 565 | 
 566 | {- | Parse a hexadecimal-encoded 8-bit word. If the number is larger
 567 | than 255, this parser fails. This allows leading zeroes and is
 568 | insensitive to case. For example, @00A@, @0a@ and @A@ would all
 569 | be accepted as the same number.
 570 | -}
 571 | hexWord8 :: e -> Parser e s Word8
 572 | hexWord8 e =
 573 |   Parser
 574 |     ( \chunk0 s0 -> case hexSmallWordStart e 256 (boxBytes chunk0) s0 of
 575 |         (# s1, r #) -> (# s1, upcastWord8Result r #)
 576 |     )
 577 | 
 578 | {- | Parse a hexadecimal-encoded 16-bit word. If the number is larger
 579 | than 65535, this parser fails. This allows leading zeroes and is
 580 | insensitive to case. For example, @0100a@ and @100A@ would both
 581 | be accepted as the same number.
 582 | -}
 583 | hexWord16 :: e -> Parser e s Word16
 584 | hexWord16 e =
 585 |   Parser
 586 |     ( \chunk0 s0 -> case hexSmallWordStart e 65536 (boxBytes chunk0) s0 of
 587 |         (# s1, r #) -> (# s1, upcastWord16Result r #)
 588 |     )
 589 | 
 590 | hexWord32 :: e -> Parser e s Word32
 591 | hexWord32 e =
 592 |   Parser
 593 |     ( \chunk0 s0 -> case hexSmallWordStart e 4294967296 (boxBytes chunk0) s0 of
 594 |         (# s1, r #) -> (# s1, upcastWord32Result r #)
 595 |     )
 596 | 
 597 | {- | Parse a decimal-encoded 16-bit word. If the number is larger
 598 | than 65535, this parser fails.
 599 | -}
 600 | decWord16 :: e -> Parser e s Word16
 601 | decWord16 e =
 602 |   Parser
 603 |     ( \chunk0 s0 -> case decSmallWordStart e 65536 (boxBytes chunk0) s0 of
 604 |         (# s1, r #) -> (# s1, upcastWord16Result r #)
 605 |     )
 606 | 
 607 | {- | Parse a decimal-encoded 32-bit word. If the number is larger
 608 | than 4294967295, this parser fails.
 609 | -}
 610 | decWord32 :: e -> Parser e s Word32
 611 | -- This will not work on 32-bit platforms.
 612 | decWord32 e =
 613 |   Parser
 614 |     ( \chunk0 s0 -> case decSmallWordStart e 4294967296 (boxBytes chunk0) s0 of
 615 |         (# s1, r #) -> (# s1, upcastWord32Result r #)
 616 |     )
 617 | 
 618 | {- | Parse a decimal-encoded number. If the number is too large to be
 619 | represented by a machine word, this fails with the provided
 620 | error message. This accepts any number of leading zeroes.
 621 | -}
 622 | decWord :: e -> Parser e s Word
 623 | decWord e =
 624 |   Parser
 625 |     ( \chunk0 s0 -> case decWordStart e (boxBytes chunk0) s0 of
 626 |         (# s1, r #) -> (# s1, upcastWordResult r #)
 627 |     )
 628 | 
 629 | {- | Parse a decimal-encoded unsigned number. If the number is
 630 | too large to be represented by a 64-bit word, this fails with
 631 | the provided error message. This accepts any number of leading
 632 | zeroes.
 633 | -}
 634 | decWord64 :: e -> Parser e s Word64
 635 | decWord64 e =
 636 |   Parser
 637 |     ( \chunk0 s0 -> case decWordStart e (boxBytes chunk0) s0 of
 638 |         (# s1, r #) -> (# s1, upcastWord64Result r #)
 639 |     )
 640 | 
 641 | hexSmallWordStart ::
 642 |   e -> -- Error message
 643 |   Word -> -- Upper Bound
 644 |   Bytes -> -- Chunk
 645 |   ST# s (Result# e Word#)
 646 | hexSmallWordStart e !limit !chunk0 s0 =
 647 |   if length chunk0 > 0
 648 |     then case oneHexMaybe (PM.indexByteArray (array chunk0) (offset chunk0)) of
 649 |       Nothing -> (# s0, (# e | #) #)
 650 |       Just w -> (# s0, hexSmallWordMore e w limit (Bytes.unsafeDrop 1 chunk0) #)
 651 |     else (# s0, (# e | #) #)
 652 | 
 653 | decSmallWordStart ::
 654 |   e -> -- Error message
 655 |   Word -> -- Upper Bound
 656 |   Bytes -> -- Chunk
 657 |   ST# s (Result# e Word#)
 658 | decSmallWordStart e !limit !chunk0 s0 =
 659 |   if length chunk0 > 0
 660 |     then
 661 |       let !w =
 662 |             fromIntegral @Word8 @Word
 663 |               (PM.indexByteArray (array chunk0) (offset chunk0))
 664 |               - 48
 665 |        in if w < 10
 666 |             then (# s0, decSmallWordMore e w limit (Bytes.unsafeDrop 1 chunk0) #)
 667 |             else (# s0, (# e | #) #)
 668 |     else (# s0, (# e | #) #)
 669 | 
 670 | -- This will not inline since it is recursive, but worker
 671 | -- wrapper will still happen.
 672 | decWordMore ::
 673 |   e -> -- Error message
 674 |   Word -> -- Accumulator
 675 |   Bytes -> -- Chunk
 676 |   Result# e Word#
 677 | decWordMore e !acc !chunk0 = case len of
 678 |   0 -> (# | (# unW (fromIntegral acc), unI (offset chunk0), 0# #) #)
 679 |   _ ->
 680 |     let !w =
 681 |           fromIntegral @Word8 @Word
 682 |             (PM.indexByteArray (array chunk0) (offset chunk0))
 683 |             - 48
 684 |      in if w < 10
 685 |           then
 686 |             let (overflow, acc') = unsignedPushBase10 acc w
 687 |              in if overflow
 688 |                   then (# e | #)
 689 |                   else decWordMore e acc' (Bytes.unsafeDrop 1 chunk0)
 690 |           else (# | (# unW (fromIntegral acc), unI (offset chunk0), len# #) #)
 691 |  where
 692 |   !len@(I# len#) = length chunk0
 693 | 
 694 | upcastWordResult :: Result# e Word# -> Result# e Word
 695 | {-# INLINE upcastWordResult #-}
 696 | upcastWordResult (# e | #) = (# e | #)
 697 | upcastWordResult (# | (# a, b, c #) #) = (# | (# W# a, b, c #) #)
 698 | 
 699 | {- FOURMOLU_DISABLE -}
 700 | -- This only works on 64-bit platforms.
 701 | upcastWord64Result :: Result# e Word# -> Result# e Word64
 702 | {-# inline upcastWord64Result #-}
 703 | upcastWord64Result (# e | #) = (# e | #)
 704 | upcastWord64Result (# | (# a, b, c #) #) = (# | (# W64# (
 705 |     Exts.wordToWord64# a
 706 |     ), b, c #) #)
 707 | {- FOURMOLU_ENABLE -}
 708 | 
 709 | hexSmallWordMore ::
 710 |   e -> -- Error message
 711 |   Word -> -- Accumulator
 712 |   Word -> -- Upper Bound
 713 |   Bytes -> -- Chunk
 714 |   Result# e Word#
 715 | hexSmallWordMore e !acc !limit !chunk0 =
 716 |   if length chunk0 > 0
 717 |     then case oneHexMaybe (PM.indexByteArray (array chunk0) (offset chunk0)) of
 718 |       Nothing -> (# | (# unW acc, unI (offset chunk0), unI (length chunk0) #) #)
 719 |       Just w ->
 720 |         let w' = acc * 16 + w
 721 |          in if w' < limit
 722 |               then hexSmallWordMore e w' limit (Bytes.unsafeDrop 1 chunk0)
 723 |               else (# e | #)
 724 |     else (# | (# unW acc, unI (offset chunk0), 0# #) #)
 725 | 
 726 | decSmallWordMore ::
 727 |   e -> -- Error message
 728 |   Word -> -- Accumulator
 729 |   Word -> -- Upper Bound
 730 |   Bytes -> -- Chunk
 731 |   Result# e Word#
 732 | decSmallWordMore e !acc !limit !chunk0 =
 733 |   if length chunk0 > 0
 734 |     then
 735 |       let !w =
 736 |             fromIntegral @Word8 @Word
 737 |               (PM.indexByteArray (array chunk0) (offset chunk0))
 738 |               - 48
 739 |        in if w < 10
 740 |             then
 741 |               let w' = acc * 10 + w
 742 |                in if w' < limit
 743 |                     then decSmallWordMore e w' limit (Bytes.unsafeDrop 1 chunk0)
 744 |                     else (# e | #)
 745 |             else (# | (# unW acc, unI (offset chunk0), unI (length chunk0) #) #)
 746 |     else (# | (# unW acc, unI (offset chunk0), 0# #) #)
 747 | 
 748 | unW :: Word -> Word#
 749 | {-# INLINE unW #-}
 750 | unW (W# w) = w
 751 | 
 752 | decWordStart ::
 753 |   e -> -- Error message
 754 |   Bytes -> -- Chunk
 755 |   ST# s (Result# e Word#)
 756 | decWordStart e !chunk0 s0 =
 757 |   if length chunk0 > 0
 758 |     then
 759 |       let !w =
 760 |             fromIntegral @Word8 @Word
 761 |               (PM.indexByteArray (array chunk0) (offset chunk0))
 762 |               - 48
 763 |        in if w < 10
 764 |             then (# s0, decWordMore e w (Bytes.unsafeDrop 1 chunk0) #)
 765 |             else (# s0, (# e | #) #)
 766 |     else (# s0, (# e | #) #)
 767 | 
 768 | {- FOURMOLU_DISABLE -}
 769 | -- Precondition: the word is small enough
 770 | upcastWord16Result :: Result# e Word# -> Result# e Word16
 771 | {-# inline upcastWord16Result #-}
 772 | upcastWord16Result (# e | #) = (# e | #)
 773 | upcastWord16Result (# | (# a, b, c #) #) = (# | (# W16# (
 774 |   Exts.wordToWord16#
 775 |   a), b, c #) #)
 776 | 
 777 | -- Precondition: the word is small enough
 778 | upcastWord32Result :: Result# e Word# -> Result# e Word32
 779 | {-# inline upcastWord32Result #-}
 780 | upcastWord32Result (# e | #) = (# e | #)
 781 | upcastWord32Result (# | (# a, b, c #) #) = (# | (# W32# (
 782 |   Exts.wordToWord32#
 783 |   a), b, c #) #)
 784 | 
 785 | -- Precondition: the word is small enough
 786 | upcastWord8Result :: Result# e Word# -> Result# e Word8
 787 | {-# inline upcastWord8Result #-}
 788 | upcastWord8Result (# e | #) = (# e | #)
 789 | upcastWord8Result (# | (# a, b, c #) #) = (# | (# W8# (
 790 |   Exts.wordToWord8#
 791 |   a), b, c #) #)
 792 | {- FOURMOLU_ENABLE -}
 793 | 
 794 | {- | Parse a decimal-encoded number. If the number is too large to be
 795 | represented by a machine integer, this fails with the provided
 796 | error message. This rejects input with that is preceeded by plus
 797 | or minus. Consequently, it does not parse negative numbers. Use
 798 | 'decStandardInt' or 'decSignedInt' for that purpose. On a 64-bit
 799 | platform 'decWord' will successfully parse 9223372036854775808
 800 | (i.e. @2 ^ 63@), but 'decUnsignedInt' will fail. This parser allows
 801 | leading zeroes.
 802 | -}
 803 | decUnsignedInt :: e -> Parser e s Int
 804 | decUnsignedInt e =
 805 |   Parser
 806 |     ( \chunk0 s0 -> case decPosIntStart e (boxBytes chunk0) s0 of
 807 |         (# s1, r #) -> (# s1, upcastIntResult r #)
 808 |     )
 809 | 
 810 | -- | Variant of 'decUnsignedInt' with an unboxed result.
 811 | decUnsignedInt# :: e -> Parser e s Int#
 812 | decUnsignedInt# e =
 813 |   Parser
 814 |     (\chunk0 s0 -> decPosIntStart e (boxBytes chunk0) s0)
 815 | 
 816 | {- | Parse a decimal-encoded number. If the number is too large to be
 817 | represented by a machine integer, this fails with the provided
 818 | error message. This allows the number to optionally be prefixed
 819 | by plus or minus. If the sign prefix is not present, the number
 820 | is interpreted as positive. This allows leading zeroes.
 821 | -}
 822 | decSignedInt :: e -> Parser e s Int
 823 | decSignedInt e =
 824 |   Parser
 825 |     ( \chunk0 s0 -> case runParser (decSignedInt# e) chunk0 s0 of
 826 |         (# s1, r #) -> (# s1, upcastIntResult r #)
 827 |     )
 828 | 
 829 | {- | Variant of 'decUnsignedInt' that lets the caller supply a leading
 830 | digit. This is useful when parsing formats like JSON where integers with
 831 | leading zeroes are considered invalid. The calling context must
 832 | consume the first digit before calling this parser. Results are
 833 | always positive numbers.
 834 | -}
 835 | decTrailingInt ::
 836 |   -- | Error message
 837 |   e ->
 838 |   -- | Leading digit, should be between @0@ and @9@.
 839 |   Int ->
 840 |   Parser e s Int
 841 | decTrailingInt e (I# w) =
 842 |   Parser
 843 |     ( \chunk0 s0 -> case runParser (decTrailingInt# e w) chunk0 s0 of
 844 |         (# s1, r #) -> (# s1, upcastIntResult r #)
 845 |     )
 846 | 
 847 | decTrailingInt# ::
 848 |   e -> -- Error message
 849 |   Int# -> -- Leading digit, should be between @0@ and @9@.
 850 |   Parser e s Int#
 851 | decTrailingInt# e !w =
 852 |   Parser (\chunk0 s0 -> (# s0, decPosIntMore e (W# (int2Word# w)) maxIntAsWord (boxBytes chunk0) #))
 853 | 
 854 | maxIntAsWord :: Word
 855 | maxIntAsWord = fromIntegral (maxBound :: Int)
 856 | 
 857 | {- | Parse a decimal-encoded number. If the number is too large to be
 858 | represented by a machine integer, this fails with the provided
 859 | error message. This allows the number to optionally be prefixed
 860 | by minus. If the minus prefix is not present, the number
 861 | is interpreted as positive. The disallows a leading plus sign.
 862 | For example, 'decStandardInt' rejects @+42@, but 'decSignedInt'
 863 | allows it.
 864 | -}
 865 | decStandardInt :: e -> Parser e s Int
 866 | decStandardInt e =
 867 |   Parser
 868 |     ( \chunk0 s0 -> case runParser (decStandardInt# e) chunk0 s0 of
 869 |         (# s1, r #) -> (# s1, upcastIntResult r #)
 870 |     )
 871 | 
 872 | decSignedInt# :: e -> Parser e s Int#
 873 | {-# NOINLINE decSignedInt# #-}
 874 | decSignedInt# e =
 875 |   any e `bindFromLiftedToInt` \c -> case c of
 876 |     '+' ->
 877 |       Parser -- plus sign
 878 |         (\chunk0 s0 -> decPosIntStart e (boxBytes chunk0) s0)
 879 |     '-' ->
 880 |       Parser -- minus sign
 881 |         (\chunk0 s0 -> decNegIntStart e (boxBytes chunk0) s0)
 882 |     _ ->
 883 |       Parser -- no sign, there should be a digit here
 884 |         ( \chunk0 s0 ->
 885 |             let !w = char2Word c - 48
 886 |              in if w < 10
 887 |                   then (# s0, decPosIntMore e w maxIntAsWord (boxBytes chunk0) #)
 888 |                   else (# s0, (# e | #) #)
 889 |         )
 890 | 
 891 | -- This is the same as decSignedInt except that we disallow
 892 | -- a leading plus sign.
 893 | decStandardInt# :: e -> Parser e s Int#
 894 | {-# NOINLINE decStandardInt# #-}
 895 | decStandardInt# e =
 896 |   any e `bindFromLiftedToInt` \c -> case c of
 897 |     '-' ->
 898 |       Parser -- minus sign
 899 |         (\chunk0 s0 -> decNegIntStart e (boxBytes chunk0) s0)
 900 |     _ ->
 901 |       Parser -- no sign, there should be a digit here
 902 |         ( \chunk0 s0 ->
 903 |             let !w = char2Word c - 48
 904 |              in if w < 10
 905 |                   then (# s0, decPosIntMore e w maxIntAsWord (boxBytes chunk0) #)
 906 |                   else (# s0, (# e | #) #)
 907 |         )
 908 | 
 909 | {- | Variant of 'decUnsignedInteger' that lets the caller supply a leading
 910 | digit. This is useful when parsing formats like JSON where integers with
 911 | leading zeroes are considered invalid. The calling context must
 912 | consume the first digit before calling this parser. Results are
 913 | always positive numbers.
 914 | -}
 915 | decTrailingInteger ::
 916 |   -- | Leading digit, should be between @0@ and @9@.
 917 |   Int ->
 918 |   Parser e s Integer
 919 | decTrailingInteger (I# w) =
 920 |   Parser (\chunk0 s0 -> (# s0, (# | decIntegerChunks (I# w) 10 0 (boxBytes chunk0) #) #))
 921 | 
 922 | {- | Parse a decimal-encoded positive integer of arbitrary
 923 | size. This rejects input that begins with a plus or minus
 924 | sign.
 925 | -}
 926 | decUnsignedInteger :: e -> Parser e s Integer
 927 | decUnsignedInteger e =
 928 |   Parser
 929 |     (\chunk0 s0 -> decUnsignedIntegerStart e (boxBytes chunk0) s0)
 930 | 
 931 | {- | Parse a decimal-encoded integer of arbitrary size.
 932 | This accepts input that begins with a plus or minus sign.
 933 | Input without a sign prefix is interpreted as positive.
 934 | -}
 935 | decSignedInteger :: e -> Parser e s Integer
 936 | {-# NOINLINE decSignedInteger #-}
 937 | decSignedInteger e =
 938 |   any e >>= \c -> case c of
 939 |     '+' -> do
 940 |       decUnsignedInteger e
 941 |     '-' -> do
 942 |       x <- decUnsignedInteger e
 943 |       pure $! negate x
 944 |     _ ->
 945 |       Parser -- no sign, there should be a digit here
 946 |         ( \chunk0 s0 ->
 947 |             let !w = char2Word c - 48
 948 |              in if w < 10
 949 |                   then
 950 |                     let !r =
 951 |                           decIntegerChunks
 952 |                             (fromIntegral @Word @Int w)
 953 |                             10
 954 |                             0
 955 |                             (boxBytes chunk0)
 956 |                      in (# s0, (# | r #) #)
 957 |                   else (# s0, (# e | #) #)
 958 |         )
 959 | 
 960 | decPosIntStart ::
 961 |   e -> -- Error message
 962 |   Bytes -> -- Chunk
 963 |   ST# s (Result# e Int#)
 964 | decPosIntStart e !chunk0 s0 =
 965 |   if length chunk0 > 0
 966 |     then
 967 |       let !w =
 968 |             fromIntegral @Word8 @Word
 969 |               (PM.indexByteArray (array chunk0) (offset chunk0))
 970 |               - 48
 971 |        in if w < 10
 972 |             then (# s0, decPosIntMore e w maxIntAsWord (Bytes.unsafeDrop 1 chunk0) #)
 973 |             else (# s0, (# e | #) #)
 974 |     else (# s0, (# e | #) #)
 975 | 
 976 | decNegIntStart ::
 977 |   e -> -- Error message
 978 |   Bytes -> -- Chunk
 979 |   ST# s (Result# e Int#)
 980 | decNegIntStart e !chunk0 s0 =
 981 |   if length chunk0 > 0
 982 |     then
 983 |       let !w =
 984 |             fromIntegral @Word8 @Word
 985 |               (PM.indexByteArray (array chunk0) (offset chunk0))
 986 |               - 48
 987 |        in if w < 10
 988 |             then case decPosIntMore e w (maxIntAsWord + 1) (Bytes.unsafeDrop 1 chunk0) of
 989 |               (# | (# x, y, z #) #) ->
 990 |                 (# s0, (# | (# (notI# x +# 1#), y, z #) #) #)
 991 |               (# err | #) ->
 992 |                 (# s0, (# err | #) #)
 993 |             else (# s0, (# e | #) #)
 994 |     else (# s0, (# e | #) #)
 995 | 
 996 | decUnsignedIntegerStart ::
 997 |   e ->
 998 |   Bytes ->
 999 |   ST# s (Result# e Integer)
1000 | decUnsignedIntegerStart e !chunk0 s0 =
1001 |   if length chunk0 > 0
1002 |     then
1003 |       let !w = (PM.indexByteArray (array chunk0) (offset chunk0)) - 48
1004 |        in if w < (10 :: Word8)
1005 |             then
1006 |               let !r =
1007 |                     decIntegerChunks
1008 |                       (fromIntegral @Word8 @Int w)
1009 |                       10
1010 |                       0
1011 |                       (Bytes.unsafeDrop 1 chunk0)
1012 |                in (# s0, (# | r #) #)
1013 |             else (# s0, (# e | #) #)
1014 |     else (# s0, (# e | #) #)
1015 | 
1016 | -- This will not inline since it is recursive, but worker
1017 | -- wrapper will still happen. Fails if the accumulator
1018 | -- exceeds the upper bound.
1019 | decPosIntMore ::
1020 |   e -> -- Error message
1021 |   Word -> -- Accumulator, precondition: less than or equal to bound
1022 |   Word -> -- Inclusive Upper Bound, either (2^63 - 1) or 2^63
1023 |   Bytes -> -- Chunk
1024 |   Result# e Int#
1025 | decPosIntMore e !acc !upper !chunk0 =
1026 |   if len > 0
1027 |     then
1028 |       let !w =
1029 |             fromIntegral @Word8 @Word
1030 |               (PM.indexByteArray (array chunk0) (offset chunk0))
1031 |               - 48
1032 |        in if w < 10
1033 |             then
1034 |               let (overflow, acc') = positivePushBase10 acc w upper
1035 |                in if overflow
1036 |                     then (# e | #)
1037 |                     else decPosIntMore e acc' upper (Bytes.unsafeDrop 1 chunk0)
1038 |             else (# | (# unI (fromIntegral acc), unI (offset chunk0), len# #) #)
1039 |     else (# | (# unI (fromIntegral acc), unI (offset chunk0), 0# #) #)
1040 |  where
1041 |   !len@(I# len#) = length chunk0
1042 | 
1043 | -- This will not inline since it is recursive, but worker
1044 | -- wrapper will still happen. When the accumulator
1045 | -- exceeds the size of a machine integer, this pushes the
1046 | -- accumulated machine int and the shift amount onto the
1047 | -- stack.
1048 | -- We are intentionally lazy in the accumulator. There is
1049 | -- no need to force this on every iteration. We do however,
1050 | -- force it preemptively every time it changes.
1051 | -- Because of how we track overflow, we are able to use the
1052 | -- same function for both positive and negative numbers.
1053 | decIntegerChunks ::
1054 |   Int -> -- Chunk accumulator (e.g. 236)
1055 |   Int -> -- Chunk base-ten bound (e.g. 1000)
1056 |   Integer -> -- Accumulator
1057 |   Bytes -> -- Chunk
1058 |   (# Integer, Int#, Int# #)
1059 | decIntegerChunks !nAcc !eAcc acc !chunk0 =
1060 |   if len > 0
1061 |     then
1062 |       let !w =
1063 |             fromIntegral @Word8 @Word
1064 |               (PM.indexByteArray (array chunk0) (offset chunk0))
1065 |               - 48
1066 |        in if w < 10
1067 |             then
1068 |               let !eAcc' = eAcc * 10
1069 |                in if eAcc' >= eAcc
1070 |                     then
1071 |                       decIntegerChunks
1072 |                         (nAcc * 10 + fromIntegral @Word @Int w)
1073 |                         eAcc'
1074 |                         acc
1075 |                         (Bytes.unsafeDrop 1 chunk0)
1076 |                     else -- In this case, notice that we deliberately
1077 |                     -- unconsume the digit that would have caused
1078 |                     -- an overflow.
1079 | 
1080 |                       let !r =
1081 |                             (acc * fromIntegral @Int @Integer eAcc)
1082 |                               + (fromIntegral @Int @Integer nAcc)
1083 |                        in decIntegerChunks 0 1 r chunk0
1084 |             else
1085 |               let !r =
1086 |                     (acc * fromIntegral @Int @Integer eAcc)
1087 |                       + (fromIntegral @Int @Integer nAcc)
1088 |                in (# r, unI (offset chunk0), len# #)
1089 |     else
1090 |       let !r =
1091 |             (acc * fromIntegral @Int @Integer eAcc)
1092 |               + (fromIntegral @Int @Integer nAcc)
1093 |        in (# r, unI (offset chunk0), 0# #)
1094 |  where
1095 |   !len@(I# len#) = length chunk0
1096 | 
1097 | upcastIntResult :: Result# e Int# -> Result# e Int
1098 | upcastIntResult (# e | #) = (# e | #)
1099 | upcastIntResult (# | (# a, b, c #) #) = (# | (# I# a, b, c #) #)
1100 | 
1101 | char2Word :: Char -> Word
1102 | char2Word = fromIntegral . ord
1103 | 
1104 | {- | Take characters until the specified character is encountered.
1105 | Consumes the matched character as well. Fails if the character
1106 | is not present.  Visually, the cursor advancement and resulting
1107 | @Bytes@ for @takeTrailedBy \'D\'@ look like this:
1108 | 
1109 | >  A B C D E F | input
1110 | > |->->->-|    | cursor
1111 | > {\-*-*-\}      | result bytes
1112 | -}
1113 | takeTrailedBy :: e -> Char -> Parser e s Bytes
1114 | takeTrailedBy e !w = do
1115 |   !start <- cursor
1116 |   skipTrailedBy e w
1117 |   !end <- cursor
1118 |   !arr <- expose
1119 |   pure (Bytes arr start (end - (start + 1)))
1120 | 
1121 | {- | Skip all characters until the terminator is encountered
1122 | and then consume the matching character as well. Visually,
1123 | @skipTrailedBy \'C\'@ advances the cursor like this:
1124 | 
1125 | >  A Z B Y C X C W
1126 | > |->->->->-|
1127 | 
1128 | This fails if it reaches the end of input without encountering
1129 | the character.
1130 | -}
1131 | skipTrailedBy :: e -> Char -> Parser e s ()
1132 | skipTrailedBy e !w = uneffectful# $ \c ->
1133 |   skipUntilConsumeLoop e w c
1134 | 
1135 | {- | Skip all characters until the terminator is encountered.
1136 | This does not consume the terminator. Visually, @skipUntil \'C\'@
1137 | advances the cursor like this:
1138 | 
1139 | >  A Z B Y C X C W
1140 | > |->->->-|
1141 | 
1142 | This succeeds if it reaches the end of the input without
1143 | encountering the terminator. It never fails.
1144 | -}
1145 | skipUntil :: Char -> Parser e s ()
1146 | skipUntil !w = uneffectful# $ \c -> skipUntilLoop w c
1147 | 
1148 | skipUntilLoop ::
1149 |   Char -> -- byte to match
1150 |   Bytes -> -- Chunk
1151 |   Result# e ()
1152 | skipUntilLoop !w !c = case length c of
1153 |   0 -> (# | (# (), unI (offset c), 0# #) #)
1154 |   _ ->
1155 |     if indexLatinCharArray (array c) (offset c) /= w
1156 |       then skipUntilLoop w (Bytes.unsafeDrop 1 c)
1157 |       else (# | (# (), unI (offset c), unI (length c) #) #)
1158 | 
1159 | skipUntilConsumeLoop ::
1160 |   e -> -- Error message
1161 |   Char -> -- byte to match
1162 |   Bytes -> -- Chunk
1163 |   Result# e ()
1164 | skipUntilConsumeLoop e !w !c = case length c of
1165 |   0 -> (# e | #)
1166 |   _ ->
1167 |     if indexLatinCharArray (array c) (offset c) /= w
1168 |       then skipUntilConsumeLoop e w (Bytes.unsafeDrop 1 c)
1169 |       else (# | (# (), unI (offset c + 1), unI (length c - 1) #) #)
1170 | 
1171 | {- FOURMOLU_DISABLE -}
1172 | -- | Parse exactly eight ASCII-encoded characters, interpreting them as the
1173 | -- hexadecimal encoding of a 32-bit number. Note that this rejects a sequence
1174 | -- such as @BC5A9@, requiring @000BC5A9@ instead. This is insensitive to case.
1175 | hexFixedWord32 :: e -> Parser e s Word32
1176 | {-# inline hexFixedWord32 #-}
1177 | hexFixedWord32 e = Parser
1178 |   (\x s0 -> case runParser (hexFixedWord32# e) x s0 of
1179 |     (# s1, r #) -> case r of
1180 |       (# err | #) -> (# s1, (# err | #) #)
1181 |       (# | (# a, b, c #) #) -> (# s1, (# | (# W32# (
1182 |         Exts.wordToWord32#
1183 |         a), b, c #) #) #)
1184 |   )
1185 | {- FOURMOLU_ENABLE -}
1186 | 
1187 | hexFixedWord32# :: e -> Parser e s Word#
1188 | {-# NOINLINE hexFixedWord32# #-}
1189 | hexFixedWord32# e = uneffectfulWord# $ \chunk ->
1190 |   if length chunk >= 8
1191 |     then
1192 |       let !w0@(W# n0) = oneHex $ PM.indexByteArray (array chunk) (offset chunk)
1193 |           !w1@(W# n1) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 1)
1194 |           !w2@(W# n2) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 2)
1195 |           !w3@(W# n3) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 3)
1196 |           !w4@(W# n4) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 4)
1197 |           !w5@(W# n5) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 5)
1198 |           !w6@(W# n6) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 6)
1199 |           !w7@(W# n7) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 7)
1200 |        in if
1201 |             | w0 .|. w1 .|. w2 .|. w3 .|. w4 .|. w5 .|. w6 .|. w7 /= maxBound ->
1202 |                 (#
1203 |                   | (#
1204 |                       (n0 `Exts.timesWord#` 268435456##)
1205 |                         `Exts.plusWord#` (n1 `Exts.timesWord#` 16777216##)
1206 |                         `Exts.plusWord#` (n2 `Exts.timesWord#` 1048576##)
1207 |                         `Exts.plusWord#` (n3 `Exts.timesWord#` 65536##)
1208 |                         `Exts.plusWord#` (n4 `Exts.timesWord#` 4096##)
1209 |                         `Exts.plusWord#` (n5 `Exts.timesWord#` 256##)
1210 |                         `Exts.plusWord#` (n6 `Exts.timesWord#` 16##)
1211 |                         `Exts.plusWord#` n7
1212 |                       , unI (offset chunk) +# 8#
1213 |                       , unI (length chunk) -# 8#
1214 |                     #)
1215 |                 #)
1216 |             | otherwise -> (# e | #)
1217 |     else (# e | #)
1218 | 
1219 | {- FOURMOLU_DISABLE -}
1220 | -- | Parse exactly 16 ASCII-encoded characters, interpreting them as the
1221 | -- hexadecimal encoding of a 64-bit number. Note that this rejects a sequence
1222 | -- such as @BC5A9@, requiring @00000000000BC5A9@ instead. This is insensitive
1223 | -- to case.
1224 | hexFixedWord64 :: e -> Parser e s Word64
1225 | {-# inline hexFixedWord64 #-}
1226 | hexFixedWord64 e = Parser
1227 |   (\x s0 -> case runParser (hexFixedWord64# e) x s0 of
1228 |     (# s1, r #) -> case r of
1229 |       (# err | #) -> (# s1, (# err | #) #)
1230 |       (# | (# a, b, c #) #) -> (# s1, (# | (# W64# (
1231 |           Exts.wordToWord64# a
1232 |           ), b, c #) #) #)
1233 |   )
1234 | {- FOURMOLU_ENABLE -}
1235 | 
1236 | hexFixedWord128 :: e -> Parser e s Word128
1237 | hexFixedWord128 e =
1238 |   Word128
1239 |     <$> hexFixedWord64 e
1240 |     <*> hexFixedWord64 e
1241 | 
1242 | hexFixedWord256 :: e -> Parser e s Word256
1243 | hexFixedWord256 e =
1244 |   Word256
1245 |     <$> hexFixedWord64 e
1246 |     <*> hexFixedWord64 e
1247 |     <*> hexFixedWord64 e
1248 |     <*> hexFixedWord64 e
1249 | 
1250 | hexFixedWord64# :: e -> Parser e s Word#
1251 | {-# NOINLINE hexFixedWord64# #-}
1252 | hexFixedWord64# e = uneffectfulWord# $ \chunk ->
1253 |   if length chunk >= 16
1254 |     then
1255 |       let go !off !len !acc = case len of
1256 |             0 -> case acc of
1257 |               W# r ->
1258 |                 (#
1259 |                   | (#
1260 |                       r
1261 |                       , unI off
1262 |                       , unI (length chunk) -# 16#
1263 |                     #)
1264 |                 #)
1265 |             _ -> case oneHexMaybe (PM.indexByteArray (array chunk) off) of
1266 |               Nothing -> (# e | #)
1267 |               Just w -> go (off + 1) (len - 1) ((acc * 16) + w)
1268 |        in go (offset chunk) (16 :: Int) (0 :: Word)
1269 |     else (# e | #)
1270 | 
1271 | {- FOURMOLU_DISABLE -}
1272 | -- | Parse exactly four ASCII-encoded characters, interpreting
1273 | -- them as the hexadecimal encoding of a 16-bit number. Note that
1274 | -- this rejects a sequence such as @5A9@, requiring @05A9@ instead.
1275 | -- This is insensitive to case. This is particularly useful when
1276 | -- parsing escape sequences in C or JSON, which allow encoding
1277 | -- characters in the Basic Multilingual Plane as @\\uhhhh@.
1278 | hexFixedWord16 :: e -> Parser e s Word16
1279 | {-# inline hexFixedWord16 #-}
1280 | hexFixedWord16 e = Parser
1281 |   (\x s0 -> case runParser (hexFixedWord16# e) x s0 of
1282 |     (# s1, r #) -> case r of
1283 |       (# err | #) -> (# s1, (# err | #) #)
1284 |       (# | (# a, b, c #) #) -> (# s1, (# | (# W16# (
1285 |         Exts.wordToWord16#
1286 |         a), b, c #) #) #)
1287 |   )
1288 | 
1289 | -- | Variant of hexFixedWord16 that returns an unboxed result. The result is
1290 | -- a machine-sized word instead of a 16-bit word, but this function guarantees
1291 | -- that only the low 16 bits may be non-zero. This is helpful because the
1292 | -- result needs to be constructed as a machine-sized word. GHC's primitives
1293 | -- for working with 16-bit words lower to most ISAs poorly.
1294 | hexFixedWord16# :: e -> Parser e s Word#
1295 | {-# noinline hexFixedWord16# #-}
1296 | hexFixedWord16# e = uneffectfulWord# $ \chunk -> if length chunk >= 4
1297 |   then
1298 |     let !w0@(W# n0) = oneHex $ PM.indexByteArray (array chunk) (offset chunk)
1299 |         !w1@(W# n1) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 1)
1300 |         !w2@(W# n2) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 2)
1301 |         !w3@(W# n3) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 3)
1302 |      in if | w0 .|. w1 .|. w2 .|. w3 /= maxBound ->
1303 |              (# |
1304 |                 (# (n0 `Exts.shiftL#` 12#) `Exts.plusWord#`
1305 |                    (n1 `Exts.shiftL#` 8#) `Exts.plusWord#`
1306 |                    (n2 `Exts.shiftL#` 4#) `Exts.plusWord#`
1307 |                    n3
1308 |                 ,  unI (offset chunk) +# 4#
1309 |                 ,  unI (length chunk) -# 4# #) #)
1310 |            | otherwise -> (# e | #)
1311 |   else (# e | #)
1312 | 
1313 | -- | Parse exactly two ASCII-encoded characters, interpretting
1314 | -- them as the hexadecimal encoding of a 8-bit number. Note that
1315 | -- this rejects a sequence such as @A@, requiring @0A@ instead.
1316 | -- This is insensitive to case.
1317 | hexFixedWord8 :: e -> Parser e s Word8
1318 | {-# inline hexFixedWord8 #-}
1319 | hexFixedWord8 e = Parser
1320 |   (\x s0 -> case runParser (hexFixedWord8# e) x s0 of
1321 |     (# s1, r #) -> case r of
1322 |       (# err | #) -> (# s1, (# err | #) #)
1323 |       (# | (# a, b, c #) #) -> (# s1, (# | (# W8# (
1324 |         Exts.wordToWord8#
1325 |         a), b, c #) #) #)
1326 |   )
1327 | {- FOURMOLU_ENABLE -}
1328 | 
1329 | hexFixedWord8# :: e -> Parser e s Word#
1330 | {-# NOINLINE hexFixedWord8# #-}
1331 | hexFixedWord8# e = uneffectfulWord# $ \chunk ->
1332 |   if length chunk >= 2
1333 |     then
1334 |       let !w0@(W# n0) = oneHex $ PM.indexByteArray (array chunk) (offset chunk)
1335 |           !w1@(W# n1) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 1)
1336 |        in if
1337 |             | w0 .|. w1 /= maxBound ->
1338 |                 (#
1339 |                   | (#
1340 |                       (n0 `Exts.timesWord#` 16##)
1341 |                         `Exts.plusWord#` n1
1342 |                       , unI (offset chunk) +# 2#
1343 |                       , unI (length chunk) -# 2#
1344 |                     #)
1345 |                 #)
1346 |             | otherwise -> (# e | #)
1347 |     else (# e | #)
1348 | 
1349 | {- | Consume a single character that is the lowercase hexadecimal
1350 | encoding of a 4-bit word. Fails if the character is not in the class
1351 | @[a-f0-9]@.
1352 | -}
1353 | hexNibbleLower :: e -> Parser e s Word
1354 | hexNibbleLower e = uneffectful $ \chunk -> case length chunk of
1355 |   0 -> Failure e
1356 |   _ ->
1357 |     let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8
1358 |      in if
1359 |           | w >= 48 && w < 58 -> Success (fromIntegral w - 48) (offset chunk + 1) (length chunk - 1)
1360 |           | w >= 97 && w < 103 -> Success (fromIntegral w - 87) (offset chunk + 1) (length chunk - 1)
1361 |           | otherwise -> Failure e
1362 | 
1363 | {- | Consume a single character that is the case-insensitive hexadecimal
1364 | encoding of a 4-bit word. Fails if the character is not in the class
1365 | @[a-fA-F0-9]@.
1366 | -}
1367 | hexNibble :: e -> Parser e s Word
1368 | hexNibble e = uneffectful $ \chunk -> case length chunk of
1369 |   0 -> Failure e
1370 |   _ ->
1371 |     let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8
1372 |      in if
1373 |           | w >= 48 && w < 58 -> Success (fromIntegral w - 48) (offset chunk + 1) (length chunk - 1)
1374 |           | w >= 65 && w < 71 -> Success (fromIntegral w - 55) (offset chunk + 1) (length chunk - 1)
1375 |           | w >= 97 && w < 103 -> Success (fromIntegral w - 87) (offset chunk + 1) (length chunk - 1)
1376 |           | otherwise -> Failure e
1377 | 
1378 | {- | Consume a single character that is the lowercase hexadecimal
1379 | encoding of a 4-bit word. Returns @Nothing@ without consuming
1380 | the character if it is not in the class @[a-f0-9]@. The parser
1381 | never fails.
1382 | -}
1383 | tryHexNibbleLower :: Parser e s (Maybe Word)
1384 | tryHexNibbleLower = unfailing $ \chunk -> case length chunk of
1385 |   0 -> InternalStep Nothing (offset chunk) (length chunk)
1386 |   _ ->
1387 |     let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8
1388 |      in if
1389 |           | w >= 48 && w < 58 -> InternalStep (Just (fromIntegral w - 48)) (offset chunk + 1) (length chunk - 1)
1390 |           | w >= 97 && w < 103 -> InternalStep (Just (fromIntegral w - 87)) (offset chunk + 1) (length chunk - 1)
1391 |           | otherwise -> InternalStep Nothing (offset chunk) (length chunk)
1392 | 
1393 | {- | Consume a single character that is the case-insensitive hexadecimal
1394 | encoding of a 4-bit word. Returns @Nothing@ without consuming
1395 | the character if it is not in the class @[a-fA-F0-9]@. This parser
1396 | never fails.
1397 | -}
1398 | tryHexNibble :: Parser e s (Maybe Word)
1399 | tryHexNibble = unfailing $ \chunk -> case length chunk of
1400 |   0 -> InternalStep Nothing (offset chunk) (length chunk)
1401 |   _ ->
1402 |     let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8
1403 |      in if
1404 |           | w >= 48 && w < 58 -> InternalStep (Just (fromIntegral w - 48)) (offset chunk + 1) (length chunk - 1)
1405 |           | w >= 65 && w < 71 -> InternalStep (Just (fromIntegral w - 55)) (offset chunk + 1) (length chunk - 1)
1406 |           | w >= 97 && w < 103 -> InternalStep (Just (fromIntegral w - 87)) (offset chunk + 1) (length chunk - 1)
1407 |           | otherwise -> InternalStep Nothing (offset chunk) (length chunk)
1408 | 
1409 | -- Returns the maximum machine word if the argument is not
1410 | -- the ASCII encoding of a hexadecimal digit.
1411 | oneHex :: Word8 -> Word
1412 | {-# INLINE oneHex #-}
1413 | oneHex w
1414 |   | w >= 48 && w < 58 = (fromIntegral w - 48)
1415 |   | w >= 65 && w < 71 = (fromIntegral w - 55)
1416 |   | w >= 97 && w < 103 = (fromIntegral w - 87)
1417 |   | otherwise = maxBound
1418 | 
1419 | oneHexMaybe :: Word8 -> Maybe Word
1420 | {-# INLINE oneHexMaybe #-}
1421 | oneHexMaybe w
1422 |   | w >= 48 && w < 58 = Just (fromIntegral w - 48)
1423 |   | w >= 65 && w < 71 = Just (fromIntegral w - 55)
1424 |   | w >= 97 && w < 103 = Just (fromIntegral w - 87)
1425 |   | otherwise = Nothing
1426 | 
1427 | uneffectfulWord# :: (Bytes -> Result# e Word#) -> Parser e s Word#
1428 | {-# INLINE uneffectfulWord# #-}
1429 | uneffectfulWord# f =
1430 |   Parser
1431 |     (\b s0 -> (# s0, (f (boxBytes b)) #))
1432 | 
1433 | -- Precondition: the arguments are non-negative. Boolean is
1434 | -- true when overflow happens. Performs: a * 10 + b
1435 | -- Postcondition: when overflow is false, the resulting
1436 | -- word is less than or equal to the upper bound
1437 | positivePushBase10 :: Word -> Word -> Word -> (Bool, Word)
1438 | {-# INLINE positivePushBase10 #-}
1439 | positivePushBase10 (W# a) (W# b) (W# upper) =
1440 |   let !(# ca, r0 #) = Exts.timesWord2# a 10##
1441 |       !r1 = Exts.plusWord# r0 b
1442 |       !cb = int2Word# (gtWord# r1 upper)
1443 |       !cc = int2Word# (ltWord# r1 0##)
1444 |       !c = ca `or#` cb `or#` cc
1445 |    in (case c of 0## -> False; _ -> True, W# r1)
1446 | 
1447 | unsignedPushBase10 :: Word -> Word -> (Bool, Word)
1448 | {-# INLINE unsignedPushBase10 #-}
1449 | unsignedPushBase10 (W# a) (W# b) =
1450 |   let !(# ca, r0 #) = Exts.timesWord2# a 10##
1451 |       !r1 = Exts.plusWord# r0 b
1452 |       !cb = int2Word# (ltWord# r1 r0)
1453 |       !c = ca `or#` cb
1454 |    in (case c of 0## -> False; _ -> True, W# r1)
1455 | 
1456 | -- | Skip while the predicate is matched. This is always inlined.
1457 | skipWhile :: (Char -> Bool) -> Parser e s ()
1458 | {-# INLINE skipWhile #-}
1459 | skipWhile f = go
1460 |  where
1461 |   go =
1462 |     isEndOfInput >>= \case
1463 |       True -> pure ()
1464 |       False -> do
1465 |         w <- anyUnsafe
1466 |         if f w
1467 |           then go
1468 |           else unconsume 1
1469 | 
1470 | -- Interpret the next byte as an Latin1-encoded character.
1471 | -- Does not check to see if any characters are left. This
1472 | -- is not exported.
1473 | anyUnsafe :: Parser e s Char
1474 | {-# INLINE anyUnsafe #-}
1475 | anyUnsafe = uneffectful $ \chunk ->
1476 |   let w = indexCharArray (array chunk) (offset chunk) :: Char
1477 |    in Success w (offset chunk + 1) (length chunk - 1)
1478 | 
1479 | -- Reads one byte and interprets it as Latin1-encoded character.
1480 | indexCharArray :: PM.ByteArray -> Int -> Char
1481 | {-# INLINE indexCharArray #-}
1482 | indexCharArray (PM.ByteArray x) (I# i) = C# (indexCharArray# x i)
1483 | 
1484 | {- | Match any character, to perform lookahead. Returns 'Nothing' if
1485 |   end of input has been reached. Does not consume any input.
1486 | 
1487 |   /Note/: Because this parser does not fail, do not use it
1488 |   with combinators such as 'many', because such as 'many',
1489 |   because such parsers loop until a failure occurs. Careless
1490 |   use will thus result in an infinite loop.
1491 | -}
1492 | peek :: Parser e s (Maybe Char)
1493 | {-# INLINE peek #-}
1494 | peek = uneffectful $ \(Bytes arr off len) ->
1495 |   let v =
1496 |         if len > 0
1497 |           then Just (indexCharArray arr off)
1498 |           else Nothing
1499 |    in Success v off len
1500 | 
1501 | {- | Match any byte, to perform lookahead. Does not consume any
1502 |   input, but will fail if end of input has been reached.
1503 | -}
1504 | peek' :: e -> Parser e s Char
1505 | {-# INLINE peek' #-}
1506 | peek' e = uneffectful $ \(Bytes arr off len) ->
1507 |   if len > 0
1508 |     then Success (indexCharArray arr off) off len
1509 |     else Failure e
1510 | 


--------------------------------------------------------------------------------